(untested) ray tracing

2024-12-29 01:20:59 -06:00 · 2024-12-29 01:20:59 -06:00 · 632d3f9005
commit 632d3f9005
parent 67fc659fd9
14 changed files with 127 additions and 117 deletions
--- a/src/.gdb_history
+++ b/src/.gdb_history
@ -0,0 +1,3 @@
+r
+quit
+quit
--- a/src/camera.cu
+++ b/src/camera.cu
@ -1,22 +1,35 @@
 #include <limits>
 #include "scene.cuh"
 #include "camera.cuh"
+#include "common.cuh"

-__device__ void Camera::render(Scene scene, uint8_t *image) {
+__device__ vect3 Camera::render() {
+  const vect3 thread_id = make_vect3((blockIdx * blockDim) + threadIdx);
+  const vect3 safeGridDim = make_vect3(blockDim.x * gridDim.x, blockDim.y * gridDim.y, 1);
+  const vect3 uv = (viewport_size * thread_id) / safeGridDim + (viewport_size / 2);
+  Ray ray{this->pos, uv};
+  Render_object *obj = active_scene->raycast(ray);
+  if(obj) return make_vect3(1,1,1);
+  else return make_vect3(0,0,0);
+
+
+  /**
+  const uint3 uv = ((blockDim * blockIdx) + threadIdx)
  const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx);
  const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4;
  const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1;
  Ray ray;
  ray.direction = normalize(make_vect3(uv.x, uv.y, 1));
-  Render_object **objects = scene.get_objects();
+  Render_object **objects = active_scene->get_objects();
  Render_object *closest_obj;
  T total_dist = 0;
  Color pcolor;
+  int max_steps = 0; //will remove
  for(int steps = 0; steps < max_steps; steps++) { //this could be heavily optimized.
-    scene.raycast(ray);
+    active_scene->raycast(ray);
    T min_dist = T_MAX;
    T dist;
-    ray.start = this->pos_ + (total_dist * ray.direction);
+    ray.start = pos_ + (total_dist * ray.direction);
    for(size_t obj_i = 0; objects[obj_i] != nullptr; obj_i++) {

      dist = objects[obj_i]->distance_estimator(ray.start);
@ -43,4 +56,5 @@ __device__ void Camera::render(Scene scene, uint8_t *image) {
  image[img_index] = pcolor.x * 255;
  image[img_index+1] = pcolor.y * 255;
  image[img_index+2] = pcolor.z * 255;
+  **/
 }
--- a/src/camera.cuh
+++ b/src/camera.cuh
@ -12,65 +12,16 @@

 class Camera : public Entity {
  public:
-    Camera(const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) 
-      : fov(fov), Entity(pos, rot, make_vect3(0)) {};
-    __device__ void render(Scene scene, uint8_t *image);
+    Camera(Scene *active_scene, const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) 
+      : Entity(pos, rot, make_vect3(0)), active_scene(active_scene), fov(fov) {};
+    __device__ vect3 render();
  private:
-    const T clip_min = .1;
-    const T clip_max = 100;
-    const int max_steps = 100;
+    T clip_min = .1; //will allow changing later
+    T clip_max = 100;
    T fov;
-    float2 size;
+    float3 viewport_size = make_float3(1,1,0); //TODO float3
+    Scene *active_scene;
+    
 };

-
-/**
-//later we'll make scenes objects, rn im lazy (TODO)
-template <class T> __device__ void camera::render() {
-  //TODO *really* need to clean this up once you get further
-  //extra dimentions is extra math
-  //either generisize float3 or stop using this fucking template nonsense
-  const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx);
-  const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4;
-  const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1;
-  const vect3 ray_direction = normalize(make_vect3(uv.x, uv.y, 1));
-  T dist;
-  T total_dist = 0;
-  vect3 ray;
-  int i;
-
-
-  //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); }
-
-  T min_dist = clip_max;
-
-  render_object **objs = pscene->get_objs();
-  for(i = 0; i < steps; i++) {
-    ray = this->pos_ + (total_dist * ray_direction);
-    //gyagh memory lookups
-    for(unsigned int oi = 0; objs[oi] != NULL; oi++) {
-      dist = object.distance_estimator(ray);
-    }
-    if((dist < clip_min)) {
-      //image[img_index] = 0xff;
-      break;
-    }
-    if((dist > clip_max)) {
-      //image[img_index+2] = 0xff;
-      break;
-    }
-    total_dist += dist;
-  }
-
-
-
-  
-  //image[img_index] = 0x00;
-  //image[img_index+1] = 0x00;
-  //image[img_index+2] = p;
-  //image[img_index+3] = 0xff;
-  
-}
-**/
-
 #endif
--- a/src/common.cuh
+++ b/src/common.cuh
@ -45,11 +45,12 @@ typedef float3 Color;
 #define make_color(...) (make_float3(__VA_ARGS__))

 //TODO move to ray.cuh
+const int max_bounces = 10;
 struct Ray {
-  Color color;
  vect3 start;
  vect3 direction; //MUST BE A UNIT VECTOR
-  int bounces;
+  int bounces = max_bounces;
+  Color color = make_vect3(0,0,0);
 };

 #endif 
--- a/src/entity.cuh
+++ b/src/entity.cuh
@ -8,23 +8,15 @@
 //out of the question
 class Entity {
  public:
-    Entity() : pos_(make_vect3(0)), rot_(make_vect3(0)), scale_(make_vect3(0)) {};
-    Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos_(pos), rot_(rot), scale_(scale) {};
-    Entity(const float3 pos) : pos_(pos), rot_(make_vect3(0)), scale_(make_vect3(0)) {};
+    Entity() : pos(make_vect3(0)), rot(make_vect3(0)), scale(make_vect3(0)) {};
+    Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos(pos), rot(rot), scale(scale) {};
+    Entity(const float3 pos) : pos(pos), rot(make_vect3(0)), scale(make_vect3(0)) {};


-    vect3 get_pos() const { return pos_; }
-    vect3 get_rot() const { return rot_; }
-    vect3 get_scale() const { return scale_; }
-
-    __device__ void set_pos(const vect3 pos) { pos_ = pos; }
-    __device__ void set_rot(const vect3 rot) { rot_ = rot; }
-    __device__ void set_scale(const vect3 scale) { scale_ = scale; }
-
  protected:
-    vect3 pos_;
-    vect3 rot_;
-    vect3 scale_;
+    vect3 pos;
+    vect3 rot;
+    vect3 scale;

 };
 #endif
--- a/src/kernel.cu
+++ b/src/kernel.cu
@ -4,9 +4,14 @@
 #include "scene.cuh"
 #include "camera.cuh"

-__global__ void render(uint8_t *image) {
-  //Scene scene;
-  //Camera cam(1, make_vect3(0,0,0));
-  //cam.render(scene, image);
-  //scene.render(image);
+__global__ void render(uint8_t *final_img, Camera *cam) {
+  //camera returns raw r/g/b(/z?) value, postprocessing (including gamma) happens per pixel in kernel here
+  vect3 bruh = cam->render();
+  const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx);
+  const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4;
+  final_img[img_index] = bruh.x * 0xff;
+  final_img[img_index+1] = bruh.y * 0xff;
+  final_img[img_index+2] = bruh.z * 0xff;
+  final_img[img_index+3] = 0xff;
+
 }
--- a/src/kernel.cuh
+++ b/src/kernel.cuh
@ -1,5 +1,11 @@
 #ifndef KERNEL_H
 #define KERNEL_H
 #include <stdint.h>
-__global__ void render(uint8_t *image);
+#include "camera.cuh"
+/**
+  Render from the view of the passed camera into img. 
+  Img is in whatever colorspace the output is desited to have, after postprocessesing.
+  Cam is expected to be in global memory.
+**/
+__global__ void render(uint8_t *final_img, Camera *cam);
 #endif
--- a/src/main.cu
+++ b/src/main.cu
@ -10,8 +10,8 @@ namespace raylib {


 #include "kernel.cuh"
-#include "scene.cuh"
 #include "sphere.cuh"
+#include "scene.cuh"
 #include "camera.cuh"


@ -24,9 +24,9 @@ int main() {
  const dim3 blockCount(20, 20);
  const dim3 threadCount(32, 32);

-  uint8_t *image_d;
-  Color texture_data[res_x * res_y];
-  raylib::SetTargetFPS(60);
+  uint8_t *raylib_image;
+  //Color texture_data[res_x * res_y];
+  raylib::SetTargetFPS(1);


  //see if GPU is connected (my egpu is finicky)
@ -46,26 +46,45 @@ int main() {
  raylib::Image image = raylib::GenImageColor(res_x, res_y, raylib::BLUE);
  raylib::Texture tex = raylib::LoadTextureFromImage(image);

-  //initilize scene
-  Scene scene;
-  Sphere sp1 = Sphere(make_vect3(0, -2, 5));
-  Sphere sp2 = Sphere(make_vect3(2, 0, 5));
-  Camera cam = Camera(1, make_float3(0,0,0));
+
+  Sphere *sp1;
+  cudaMallocManaged(&sp1, sizeof(Sphere));
+  *sp1 = Sphere(make_vect3(0, 0, 1));
+
+  Sphere *sp2;
+  cudaMallocManaged(&sp2, sizeof(Sphere));
+  *sp2 = Sphere(make_vect3(0, 1, 2));
+
+  Scene *scene;
+  cudaMallocManaged(&scene, sizeof(Scene));
+  *scene = Scene(); //probably don't need to do?
+
+  scene->add_render_object(sp1);
+  scene->add_render_object(sp2);
+  
+  Camera *camera;
+  cudaMallocManaged(&camera, sizeof(Camera));
+  *camera = Camera(scene);
+
+
+  //maybe we shouldn't be infering how to use the raylib::Color type without using
+  //the header itself within the kernel (which I honestly don't wanna do)
+  cudaMallocManaged(&raylib_image, res_x * res_y * sizeof(raylib::Color));
+

  while(!raylib::WindowShouldClose()) {
    cudaError_t err;
    //cuda stuff
-    cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
-    render<<<blockCount, threadCount>>>(image_d); 
+    render<<<blockCount, threadCount>>>(raylib_image, camera); 
    if((err = cudaGetLastError()) != cudaSuccess) {
      printf("kernel did not launch! Error: %s\n", cudaGetErrorString(err));
    }
    cudaDeviceSynchronize();
-    cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);
+    //cudaMemcpy(texture_data, (void **)raylib_image, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);

    raylib::BeginDrawing();
    raylib::ClearBackground(raylib::BLACK);
-    raylib::UpdateTexture(tex, texture_data);
+    raylib::UpdateTexture(tex, raylib_image);
    raylib::DrawTexture(tex, 0, 0, raylib::WHITE);
    raylib::DrawFPS(0, 0);
    raylib::EndDrawing();
--- a/src/makefile
+++ b/src/makefile
@ -1,5 +1,7 @@
-LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11
-$CC = gcc
+LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 --gpu-architecture=compute_61 --gpu-code=sm_61
+CC = gcc
+#CUDA_CC = /usr/local/cuda-12/bin/nvcc
+CUDA_CC = nvcc
 INC = -I /opt/cuda/include
 COMPILED_BIN = build/indigo_worlds

@ -7,10 +9,10 @@ CU_SRCFILES := $(wildcard *.cu)
 CU_OBJFILES := $(patsubst %.cu, %.o, $(CU_SRCFILES))

 all: $(CU_OBJFILES)
-	nvcc $(LIBS) -g -G -o build/indigo_worlds build/*.o
+	$(CUDA_CC) $(LIBS) -g -G -o build/indigo_worlds build/*.o

 %.o: %.cu
-	nvcc -g -G -dc -c $< -o build/$@
+	$(CUDA_CC) -g -G -dc -c $< -o build/$@

 run: all
 	build/indigo_worlds
--- a/src/render_object.cuh
+++ b/src/render_object.cuh
@ -8,7 +8,6 @@ class Render_object : public Entity {
  public:
    virtual __device__ T distance_estimator(vect3 point) const = 0;
    virtual __device__ Color get_color(struct Ray ray) const = 0;
-
 };

 #endif
--- a/src/scene.cu
+++ b/src/scene.cu
@ -1,9 +1,33 @@
 #include <stdio.h>
+#include "common.cuh"
 #include "scene.cuh"

 __device__ Render_object *Scene::raycast(struct Ray ray) {
-  if(ray.bounces++ > max_bounces);
-  const size_t obj_count = sizeof(objs) / sizeof(objs[0]);
-  for(size_t obj_i = 0; obj_i < obj_count; obj_i++); 
+  const int MAX_STEPS = 20; //LTERM TODO optimize
+  const T INTERSECT_DIST = 0.01;
+  const T MISS_DIST = 20;
+  T closest_dist; 
+  int dist;
+  if(ray.bounces-- < 0) return nullptr; //TODO return skybox
+  for(int steps = 0; steps < MAX_STEPS; steps++) {
+    closest_dist = T_MAX;
+    for(size_t obj_i = 0; render_objs[obj_i] != nullptr; obj_i++) {
+      //LTERM TODO: maybe move this statement once you benchmark a solid testcase
+      if(dist < INTERSECT_DIST) return render_objs[obj_i];
+      dist = fabs(render_objs[obj_i]->distance_estimator(ray.start));
+      if(dist < closest_dist) closest_dist = dist;
+    }
+    //LTERM TODO: maybe move these statements once you benchmark a solid testcase
+    if(closest_dist > MISS_DIST) return nullptr; //TODO return skybox
+    ray.start = ray.start + (ray.direction * closest_dist);
+  }
+
+}
+
+//does not check for bounds; we'll remove this function later
+__host__ void Scene::add_render_object(Render_object *object) { 
+  size_t obj_i;
+  for(obj_i = 0; render_objs[obj_i] != nullptr; obj_i++);
+  render_objs[obj_i] = object;
 }

--- a/src/scene.cuh
+++ b/src/scene.cuh
@ -8,23 +8,17 @@
 //#include "camera.cuh"
 #include <stdint.h>

-class Camera;
-
 //when we get animations with multiple scenes, we'll make this a virtual function
 //with array of DE objects and cam
 class Scene {
-  const int max_bounces = 10;
  public:
+    Scene(){};
+    Scene(const Scene&){};
    __device__ Render_object *raycast(struct Ray ray);
-    __device__ Render_object **get_objects() { return objs; }
-    __device__ Camera *get_camera() { return active_cam; }
-  private:
-    Sphere sp1 = Sphere(make_vect3(0, -2, 5)); //TODO move to it's own file or whatever
-    Sphere sp2 = Sphere(make_vect3(2, 0, 5)); //do some like "add_object" function or somethin
+    __device__ Render_object **get_objects() { return render_objs; }
+    __host__ void add_render_object(Render_object *object);
  protected:
-    //idk why I need to specify the size... why can't the compiler figure that out?
-    Render_object *objs[3] = {&sp1, &sp2, nullptr};
-    Camera *active_cam;
+    Render_object *render_objs[16] = {nullptr}; //TODO we'll make this dynamic later
    uint8_t *image;
 };

--- a/src/sphere.cu
+++ b/src/sphere.cu
@ -1,7 +1,7 @@
 #include "sphere.cuh"

 __device__ T Sphere::distance_estimator(vect3 point) const {
-  return length(this->pos_ - point) - r_;
+  return length(this->pos - point) - r;
 }

 __device__ Color Sphere::get_color(struct Ray ray) const {
--- a/src/sphere.cuh
+++ b/src/sphere.cuh
@ -10,7 +10,7 @@ class Sphere : public Render_object {
   __device__ T distance_estimator(vect3 point) const;
   __device__ Color get_color(struct Ray ray) const;
  private:
-    T r_ = 1;
+    T r = 1;
 };