diff --git a/src/.gdb_history b/src/.gdb_history new file mode 100644 index 0000000..5f753e0 --- /dev/null +++ b/src/.gdb_history @@ -0,0 +1,3 @@ +r +quit +quit diff --git a/src/camera.cu b/src/camera.cu index 706adef..aeb1669 100644 --- a/src/camera.cu +++ b/src/camera.cu @@ -1,22 +1,35 @@ #include #include "scene.cuh" #include "camera.cuh" +#include "common.cuh" -__device__ void Camera::render(Scene scene, uint8_t *image) { +__device__ vect3 Camera::render() { + const vect3 thread_id = make_vect3((blockIdx * blockDim) + threadIdx); + const vect3 safeGridDim = make_vect3(blockDim.x * gridDim.x, blockDim.y * gridDim.y, 1); + const vect3 uv = (viewport_size * thread_id) / safeGridDim + (viewport_size / 2); + Ray ray{this->pos, uv}; + Render_object *obj = active_scene->raycast(ray); + if(obj) return make_vect3(1,1,1); + else return make_vect3(0,0,0); + + + /** + const uint3 uv = ((blockDim * blockIdx) + threadIdx) const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1; Ray ray; ray.direction = normalize(make_vect3(uv.x, uv.y, 1)); - Render_object **objects = scene.get_objects(); + Render_object **objects = active_scene->get_objects(); Render_object *closest_obj; T total_dist = 0; Color pcolor; + int max_steps = 0; //will remove for(int steps = 0; steps < max_steps; steps++) { //this could be heavily optimized. - scene.raycast(ray); + active_scene->raycast(ray); T min_dist = T_MAX; T dist; - ray.start = this->pos_ + (total_dist * ray.direction); + ray.start = pos_ + (total_dist * ray.direction); for(size_t obj_i = 0; objects[obj_i] != nullptr; obj_i++) { dist = objects[obj_i]->distance_estimator(ray.start); @@ -43,4 +56,5 @@ __device__ void Camera::render(Scene scene, uint8_t *image) { image[img_index] = pcolor.x * 255; image[img_index+1] = pcolor.y * 255; image[img_index+2] = pcolor.z * 255; + **/ } diff --git a/src/camera.cuh b/src/camera.cuh index 0ad089b..99193be 100644 --- a/src/camera.cuh +++ b/src/camera.cuh @@ -12,65 +12,16 @@ class Camera : public Entity { public: - Camera(const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) - : fov(fov), Entity(pos, rot, make_vect3(0)) {}; - __device__ void render(Scene scene, uint8_t *image); + Camera(Scene *active_scene, const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) + : Entity(pos, rot, make_vect3(0)), active_scene(active_scene), fov(fov) {}; + __device__ vect3 render(); private: - const T clip_min = .1; - const T clip_max = 100; - const int max_steps = 100; + T clip_min = .1; //will allow changing later + T clip_max = 100; T fov; - float2 size; + float3 viewport_size = make_float3(1,1,0); //TODO float3 + Scene *active_scene; + }; - -/** -//later we'll make scenes objects, rn im lazy (TODO) -template __device__ void camera::render() { - //TODO *really* need to clean this up once you get further - //extra dimentions is extra math - //either generisize float3 or stop using this fucking template nonsense - const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); - const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; - const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1; - const vect3 ray_direction = normalize(make_vect3(uv.x, uv.y, 1)); - T dist; - T total_dist = 0; - vect3 ray; - int i; - - - //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); } - - T min_dist = clip_max; - - render_object **objs = pscene->get_objs(); - for(i = 0; i < steps; i++) { - ray = this->pos_ + (total_dist * ray_direction); - //gyagh memory lookups - for(unsigned int oi = 0; objs[oi] != NULL; oi++) { - dist = object.distance_estimator(ray); - } - if((dist < clip_min)) { - //image[img_index] = 0xff; - break; - } - if((dist > clip_max)) { - //image[img_index+2] = 0xff; - break; - } - total_dist += dist; - } - - - - - //image[img_index] = 0x00; - //image[img_index+1] = 0x00; - //image[img_index+2] = p; - //image[img_index+3] = 0xff; - -} -**/ - #endif diff --git a/src/common.cuh b/src/common.cuh index 2897213..a940dc5 100644 --- a/src/common.cuh +++ b/src/common.cuh @@ -45,11 +45,12 @@ typedef float3 Color; #define make_color(...) (make_float3(__VA_ARGS__)) //TODO move to ray.cuh +const int max_bounces = 10; struct Ray { - Color color; vect3 start; vect3 direction; //MUST BE A UNIT VECTOR - int bounces; + int bounces = max_bounces; + Color color = make_vect3(0,0,0); }; #endif diff --git a/src/entity.cuh b/src/entity.cuh index 214e5f8..a8deac7 100644 --- a/src/entity.cuh +++ b/src/entity.cuh @@ -8,23 +8,15 @@ //out of the question class Entity { public: - Entity() : pos_(make_vect3(0)), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; - Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos_(pos), rot_(rot), scale_(scale) {}; - Entity(const float3 pos) : pos_(pos), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; + Entity() : pos(make_vect3(0)), rot(make_vect3(0)), scale(make_vect3(0)) {}; + Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos(pos), rot(rot), scale(scale) {}; + Entity(const float3 pos) : pos(pos), rot(make_vect3(0)), scale(make_vect3(0)) {}; - vect3 get_pos() const { return pos_; } - vect3 get_rot() const { return rot_; } - vect3 get_scale() const { return scale_; } - - __device__ void set_pos(const vect3 pos) { pos_ = pos; } - __device__ void set_rot(const vect3 rot) { rot_ = rot; } - __device__ void set_scale(const vect3 scale) { scale_ = scale; } - protected: - vect3 pos_; - vect3 rot_; - vect3 scale_; + vect3 pos; + vect3 rot; + vect3 scale; }; #endif diff --git a/src/kernel.cu b/src/kernel.cu index 8c137c5..eab991d 100644 --- a/src/kernel.cu +++ b/src/kernel.cu @@ -4,9 +4,14 @@ #include "scene.cuh" #include "camera.cuh" -__global__ void render(uint8_t *image) { - //Scene scene; - //Camera cam(1, make_vect3(0,0,0)); - //cam.render(scene, image); - //scene.render(image); +__global__ void render(uint8_t *final_img, Camera *cam) { + //camera returns raw r/g/b(/z?) value, postprocessing (including gamma) happens per pixel in kernel here + vect3 bruh = cam->render(); + const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); + const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; + final_img[img_index] = bruh.x * 0xff; + final_img[img_index+1] = bruh.y * 0xff; + final_img[img_index+2] = bruh.z * 0xff; + final_img[img_index+3] = 0xff; + } diff --git a/src/kernel.cuh b/src/kernel.cuh index 2c5256e..09aa210 100644 --- a/src/kernel.cuh +++ b/src/kernel.cuh @@ -1,5 +1,11 @@ #ifndef KERNEL_H #define KERNEL_H #include -__global__ void render(uint8_t *image); +#include "camera.cuh" +/** + Render from the view of the passed camera into img. + Img is in whatever colorspace the output is desited to have, after postprocessesing. + Cam is expected to be in global memory. +**/ +__global__ void render(uint8_t *final_img, Camera *cam); #endif diff --git a/src/main.cu b/src/main.cu index 9d1389e..fd09725 100644 --- a/src/main.cu +++ b/src/main.cu @@ -10,8 +10,8 @@ namespace raylib { #include "kernel.cuh" -#include "scene.cuh" #include "sphere.cuh" +#include "scene.cuh" #include "camera.cuh" @@ -24,9 +24,9 @@ int main() { const dim3 blockCount(20, 20); const dim3 threadCount(32, 32); - uint8_t *image_d; - Color texture_data[res_x * res_y]; - raylib::SetTargetFPS(60); + uint8_t *raylib_image; + //Color texture_data[res_x * res_y]; + raylib::SetTargetFPS(1); //see if GPU is connected (my egpu is finicky) @@ -46,26 +46,45 @@ int main() { raylib::Image image = raylib::GenImageColor(res_x, res_y, raylib::BLUE); raylib::Texture tex = raylib::LoadTextureFromImage(image); - //initilize scene - Scene scene; - Sphere sp1 = Sphere(make_vect3(0, -2, 5)); - Sphere sp2 = Sphere(make_vect3(2, 0, 5)); - Camera cam = Camera(1, make_float3(0,0,0)); + + Sphere *sp1; + cudaMallocManaged(&sp1, sizeof(Sphere)); + *sp1 = Sphere(make_vect3(0, 0, 1)); + + Sphere *sp2; + cudaMallocManaged(&sp2, sizeof(Sphere)); + *sp2 = Sphere(make_vect3(0, 1, 2)); + + Scene *scene; + cudaMallocManaged(&scene, sizeof(Scene)); + *scene = Scene(); //probably don't need to do? + + scene->add_render_object(sp1); + scene->add_render_object(sp2); + + Camera *camera; + cudaMallocManaged(&camera, sizeof(Camera)); + *camera = Camera(scene); + + + //maybe we shouldn't be infering how to use the raylib::Color type without using + //the header itself within the kernel (which I honestly don't wanna do) + cudaMallocManaged(&raylib_image, res_x * res_y * sizeof(raylib::Color)); + while(!raylib::WindowShouldClose()) { cudaError_t err; //cuda stuff - cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color)); - render<<>>(image_d); + render<<>>(raylib_image, camera); if((err = cudaGetLastError()) != cudaSuccess) { printf("kernel did not launch! Error: %s\n", cudaGetErrorString(err)); } cudaDeviceSynchronize(); - cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost); + //cudaMemcpy(texture_data, (void **)raylib_image, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost); raylib::BeginDrawing(); raylib::ClearBackground(raylib::BLACK); - raylib::UpdateTexture(tex, texture_data); + raylib::UpdateTexture(tex, raylib_image); raylib::DrawTexture(tex, 0, 0, raylib::WHITE); raylib::DrawFPS(0, 0); raylib::EndDrawing(); diff --git a/src/makefile b/src/makefile index eb8e91b..c191367 100644 --- a/src/makefile +++ b/src/makefile @@ -1,5 +1,7 @@ -LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 -$CC = gcc +LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 --gpu-architecture=compute_61 --gpu-code=sm_61 +CC = gcc +#CUDA_CC = /usr/local/cuda-12/bin/nvcc +CUDA_CC = nvcc INC = -I /opt/cuda/include COMPILED_BIN = build/indigo_worlds @@ -7,10 +9,10 @@ CU_SRCFILES := $(wildcard *.cu) CU_OBJFILES := $(patsubst %.cu, %.o, $(CU_SRCFILES)) all: $(CU_OBJFILES) - nvcc $(LIBS) -g -G -o build/indigo_worlds build/*.o + $(CUDA_CC) $(LIBS) -g -G -o build/indigo_worlds build/*.o %.o: %.cu - nvcc -g -G -dc -c $< -o build/$@ + $(CUDA_CC) -g -G -dc -c $< -o build/$@ run: all build/indigo_worlds diff --git a/src/render_object.cuh b/src/render_object.cuh index f25f835..16e8b81 100644 --- a/src/render_object.cuh +++ b/src/render_object.cuh @@ -8,7 +8,6 @@ class Render_object : public Entity { public: virtual __device__ T distance_estimator(vect3 point) const = 0; virtual __device__ Color get_color(struct Ray ray) const = 0; - }; #endif diff --git a/src/scene.cu b/src/scene.cu index 5efe0d8..8b65194 100644 --- a/src/scene.cu +++ b/src/scene.cu @@ -1,9 +1,33 @@ #include +#include "common.cuh" #include "scene.cuh" __device__ Render_object *Scene::raycast(struct Ray ray) { - if(ray.bounces++ > max_bounces); - const size_t obj_count = sizeof(objs) / sizeof(objs[0]); - for(size_t obj_i = 0; obj_i < obj_count; obj_i++); + const int MAX_STEPS = 20; //LTERM TODO optimize + const T INTERSECT_DIST = 0.01; + const T MISS_DIST = 20; + T closest_dist; + int dist; + if(ray.bounces-- < 0) return nullptr; //TODO return skybox + for(int steps = 0; steps < MAX_STEPS; steps++) { + closest_dist = T_MAX; + for(size_t obj_i = 0; render_objs[obj_i] != nullptr; obj_i++) { + //LTERM TODO: maybe move this statement once you benchmark a solid testcase + if(dist < INTERSECT_DIST) return render_objs[obj_i]; + dist = fabs(render_objs[obj_i]->distance_estimator(ray.start)); + if(dist < closest_dist) closest_dist = dist; + } + //LTERM TODO: maybe move these statements once you benchmark a solid testcase + if(closest_dist > MISS_DIST) return nullptr; //TODO return skybox + ray.start = ray.start + (ray.direction * closest_dist); + } + +} + +//does not check for bounds; we'll remove this function later +__host__ void Scene::add_render_object(Render_object *object) { + size_t obj_i; + for(obj_i = 0; render_objs[obj_i] != nullptr; obj_i++); + render_objs[obj_i] = object; } diff --git a/src/scene.cuh b/src/scene.cuh index 7904810..85d6e0d 100644 --- a/src/scene.cuh +++ b/src/scene.cuh @@ -8,23 +8,17 @@ //#include "camera.cuh" #include -class Camera; - //when we get animations with multiple scenes, we'll make this a virtual function //with array of DE objects and cam class Scene { - const int max_bounces = 10; public: + Scene(){}; + Scene(const Scene&){}; __device__ Render_object *raycast(struct Ray ray); - __device__ Render_object **get_objects() { return objs; } - __device__ Camera *get_camera() { return active_cam; } - private: - Sphere sp1 = Sphere(make_vect3(0, -2, 5)); //TODO move to it's own file or whatever - Sphere sp2 = Sphere(make_vect3(2, 0, 5)); //do some like "add_object" function or somethin + __device__ Render_object **get_objects() { return render_objs; } + __host__ void add_render_object(Render_object *object); protected: - //idk why I need to specify the size... why can't the compiler figure that out? - Render_object *objs[3] = {&sp1, &sp2, nullptr}; - Camera *active_cam; + Render_object *render_objs[16] = {nullptr}; //TODO we'll make this dynamic later uint8_t *image; }; diff --git a/src/sphere.cu b/src/sphere.cu index d1ae9e9..bc8f9f8 100644 --- a/src/sphere.cu +++ b/src/sphere.cu @@ -1,7 +1,7 @@ #include "sphere.cuh" __device__ T Sphere::distance_estimator(vect3 point) const { - return length(this->pos_ - point) - r_; + return length(this->pos - point) - r; } __device__ Color Sphere::get_color(struct Ray ray) const { diff --git a/src/sphere.cuh b/src/sphere.cuh index 0d228ff..05acf0a 100644 --- a/src/sphere.cuh +++ b/src/sphere.cuh @@ -10,7 +10,7 @@ class Sphere : public Render_object { __device__ T distance_estimator(vect3 point) const; __device__ Color get_color(struct Ray ray) const; private: - T r_ = 1; + T r = 1; };