boutta switch away from templates

author: Brett Weiland <brettsweiland@gmail.com> 2024-05-27 20:56:59 -0500
committer: Brett Weiland <brettsweiland@gmail.com> 2024-05-27 20:56:59 -0500
commit: 093200a449ea38952de52012e324036c106e294b (patch)
tree: 6030076eb894ca100d6aa0d6550ab56955e7fb2f
parent: 7e9e2150619c05f9e8a74432e596b11f373518b9 (diff)
16 files changed, 261 insertions, 26 deletions
diff --git a/.gdb_history b/.gdb_history
index 9cf0bf1..8e2a810 100644
--- a/.gdb_history
+++ b/.gdb_history
@@ -54,3 +54,5 @@ next
 print image
 hexdump 0x55555561a140
 quit
+exit()
+exit
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..f4ee283
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "CUDA C++: Launch",
+            "type": "cuda-gdb",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/indigo_worlds"
+        }
+    ]
+}
+
diff --git a/build/indigo_worlds b/build/indigo_worlds
deleted file mode 100755
index d99fd92..0000000
--- a/build/indigo_worlds
+++ /dev/null
diff --git a/build/kernel.o b/build/kernel.o
deleted file mode 100644
index 549a102..0000000
--- a/build/kernel.o
+++ /dev/null
diff --git a/build/main.o b/build/main.o
index 1102ea1..737de98 100644
--- a/build/main.o
+++ b/build/main.o
diff --git a/camera.cuh b/camera.cuh
new file mode 100644
index 0000000..61944fe
--- /dev/null
+++ b/camera.cuh
@@ -0,0 +1,81 @@
+#ifndef CAMERA_H
+#define CAMERA_H
+
+#include "entity.cuh"
+#include "common.cuh"
+#include <limits>
+
+
+//template <class T> class scene;
+
+//I am soooo high lol
+template<class T> 
+class camera : public entity<T> {
+  using T3 = typename vect_t3<T>::vect_t;
+  using T2 = typename vect_t2<T>::vect_t;
+  public:
+    __device__ void render();
+    __device__ camera(scene<T> *pscene, const T fov = 1, const T3 pos = vect_create<T3>(0), const T3 rot = vect_create<T3>(0)) 
+      : pscene(pscene), fov(fov), entity<T>(pos, rot, vect_create<T3>(0)) {};
+    
+    //__device__ ~camera();
+  private:
+    T fov;
+    T2 size;
+    int steps = 100;
+    T clip_min = .1;
+    T clip_max = 100;
+    scene<T> *pscene;
+};
+
+/**
+//later we'll make scenes objects, rn im lazy (TODO)
+template <class T> __device__ void camera<T>::render() {
+  //TODO *really* need to clean this up once you get further
+  //extra dimentions is extra math
+  //either generisize float3 or stop using this fucking template nonsense
+  const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx);
+  const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4;
+  const T3 uv = ((2 * vect_create<T3>(unnormalized_uv)) / vect_create<T3>(gridDim * blockDim)) - 1;
+  const T3 ray_direction = normalize(vect_create<T3>(uv.x, uv.y, 1));
+  T dist;
+  T total_dist = 0;
+  T3 ray;
+  int i;
+
+
+  //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); }
+
+  T min_dist = clip_max;
+
+  render_object<T> **objs = pscene->get_objs();
+  for(i = 0; i < steps; i++) {
+    ray = this->pos_ + (total_dist * ray_direction);
+    //gyagh memory lookups
+    for(unsigned int oi = 0; objs[oi] != NULL; oi++) {
+      dist = object.distance_estimator(ray);
+    }
+    if((dist < clip_min)) {
+      //image[img_index] = 0xff;
+      break;
+    }
+    if((dist > clip_max)) {
+      //image[img_index+2] = 0xff;
+      break;
+    }
+    total_dist += dist;
+  }
+
+
+
+  
+  //image[img_index] = 0x00;
+  //image[img_index+1] = 0x00;
+  //image[img_index+2] = p;
+  //image[img_index+3] = 0xff;
+  
+}
+**/
+
+
+#endif
diff --git a/common.cuh b/common.cuh
new file mode 100644
index 0000000..d5a9cdf
--- /dev/null
+++ b/common.cuh
@@ -0,0 +1,43 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "include/helper_math.h"
+
+/**
+
+template <class T> class vect_t2;
+template <class T> class vect_t3;
+template <class T> class vect_t4;
+
+//this feels so hacky... idk why people are so scared of metaprogramming
+template <> class vect_t2<double> { public: using vect_t = double2; };
+template <> class vect_t3<double> { public: using vect_t = double3; };
+template <> class vect_t4<double> { public: using vect_t = double4; };
+
+template <> class vect_t2<float> { public: using vect_t = float2; };
+template <> class vect_t3<float> { public: using vect_t = float3; };
+template <> class vect_t4<float> { public: using vect_t = float4; };
+
+
+template <class T, class X> __device__  T vect_create(X x);
+template <class T, class X, class Y, class Z> __device__  T vect_create(X x, Y y, Z z);
+
+//I have no fucking clue if this is right, check me later ig
+template <class float3, class X> __device__ inline float3 vect_create<float3>(X x) { return make_float3(x); }
+
+template <class float3, class X, class Y, class Z> __device__ inline float3 vect_create<float3>(X x, Y y, Z z) { return make_float3(x, y, z); }
+**/
+
+/** I'm not sure weather float or double percision is nessesary. I was using
+templates, but this changes the structure of my entire project in unwanted
+ways, so I'm switching over to typedefs. **/
+
+typedef float2 vect_t2;
+typedef float3 vect_t3;
+typedef float4 vect_t4;
+typedef float T;
+
+#define vect1to3(x) (make_float3(x))
+#define make_vect(x, y, z) (make_float3(x, y, z))
+
+#endif 
diff --git a/entity.cuh b/entity.cuh
new file mode 100644
index 0000000..737ef8b
--- /dev/null
+++ b/entity.cuh
@@ -0,0 +1,31 @@
+#ifndef ENTITY_H
+#define ENTITY_H
+#include "common.cuh"
+
+//we could make a template to allow double percision, but start with float
+//idk how nessesary it is yet so I'll go ahead. 
+//I know I needed it for zoomin far into the mandelbrot ig, so it's not
+//out of the question
+template<class T> class entity {
+  using T3 = typename vect_t3<T>::vect_t;
+  public:
+    __device__ entity() : pos_(vect_create<T3>(0)), rot_(vect_create<T3>(0)), scale_(vect_create<T3>(0)) {};
+    __device__ entity(const T3 pos, const T3 rot, const T3 scale) : pos_(pos), rot_(rot), scale_(scale) {};
+    __device__ entity(const float3 pos) : pos_(pos), rot_(vect_create<T3>(0)), scale_(vect_create<T3>(0)) {};
+
+
+    T3 get_pos() const { return pos_; }
+    T3 get_rot() const { return rot_; }
+    T3 get_scale() const { return scale_; }
+
+    __device__ void set_pos(const T3 pos) { pos_ = pos; }
+    __device__ void set_rot(const T3 rot) { rot_ = rot; }
+    __device__ void set_scale(const T3 scale) { scale_ = scale; }
+
+  protected:
+    T3 pos_;
+    T3 rot_;
+    T3 scale_;
+
+};
+#endif
diff --git a/kernel.cu b/kernel.cu
index 6bed13c..c6895dc 100644
--- a/kernel.cu
+++ b/kernel.cu
@@ -1,18 +1,9 @@
 #include <curand.h>
 #include <stdint.h>
 #include <stdio.h>
-#include "include/helper_math.h"
+#include "scene.cuh"
 
-__global__ void test_image(uint8_t *image) {
-  int2 unnormalized_coordinates = make_int2(blockDim.x, blockDim.y) * make_int2(blockIdx.x, blockIdx.y) + make_int2(threadIdx.x, threadIdx.y);
-  int2 img_res = make_int2(blockDim.x, blockDim.y) * make_int2(gridDim.x, gridDim.y); //can move if needed
-  size_t img_index = ((unnormalized_coordinates.y * img_res.y) + unnormalized_coordinates.x) * 4;
-
-  //vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
-
-
-  image[img_index] = 0x00;
-  image[img_index+1] = 0x00;
-  image[img_index+2] = 0xff;
-  image[img_index+3] = 0xff;
+__global__ void render(uint8_t *image) {
+  scene<float> scene;
+  //scene.render(image);
 }
diff --git a/kernel.cuh b/kernel.cuh
new file mode 100644
index 0000000..2c5256e
--- /dev/null
+++ b/kernel.cuh
@@ -0,0 +1,5 @@
+#ifndef KERNEL_H
+#define KERNEL_H
+#include <stdint.h>
+__global__ void render(uint8_t *image);
+#endif
diff --git a/main.cu b/main.cu
index 4225b09..4aa6cbb 100644
--- a/main.cu
+++ b/main.cu
@@ -3,10 +3,9 @@
 #include <cuda_runtime.h>
 #include <string.h>
 
+#include "kernel.cuh"
 #include "raylib.h"
 
-__global__ void test_image(uint8_t *image);
-
 int main() {
   //bluuuuugh i'll figure out occupancy later, this res are easy
   //calculated manually for gtx1060 with 20 SM, 1024 threads/SM
@@ -17,7 +16,8 @@ int main() {
 
   uint8_t *image_d;
   Color texture_data[res_x * res_y];
-  
+  SetTargetFPS(10);
+
 
   //see if GPU is connected (my egpu is finicky)
   {
@@ -36,16 +36,10 @@ int main() {
   Image image = GenImageColor(res_x, res_y, BLUE);
   Texture tex = LoadTextureFromImage(image);
 
-
-
-
-  
-  //if(!IsWindowFullscreen()) ToggleFullscreen();
-
   while(!WindowShouldClose()) {
     //cuda stuff
     cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
-    test_image<<<blockCount, threadCount>>>(image_d); 
+    render<<<blockCount, threadCount>>>(image_d); 
     cudaDeviceSynchronize();
     cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);
 
diff --git a/makefile b/makefile
index b733c07..7f6bddb 100644
--- a/makefile
+++ b/makefile
@@ -2,14 +2,14 @@ LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11
 $CC = gcc
 INC = -I /opt/cuda/include
 make:
-	nvcc $(LIBS) $(INC) --debug -c main.cu -o build/main.o
+	nvcc $(LIBS) $(INC) -O0 --debug -c main.cu -o build/main.o
 	nvcc  --device-debug --compile kernel.cu -o build/kernel.o
-	nvcc $(LIBS) -o build/indigo_worlds build/main.o build/kernel.o
+	nvcc $(LIBS) -O0 -o build/indigo_worlds build/main.o build/kernel.o
 
 run:
 	build/indigo_worlds
 
 clean:
-	rm -r build/*
+	rm -rf build/*
 
 
diff --git a/render_object.cuh b/render_object.cuh
new file mode 100644
index 0000000..0063d94
--- /dev/null
+++ b/render_object.cuh
@@ -0,0 +1,12 @@
+#ifndef RENDER_OBJECT_H
+#define RENDER_OBJECT_H
+#include "entity.cuh"
+
+template<class T> class render_object : public entity<T> {
+  using T3 = typename vect_t3<T>::vect_t;
+  using entity<T>::entity;
+  public:
+    virtual __device__ T distance_estimator(T3 point) const = 0;
+};
+
+#endif
diff --git a/scene.cuh b/scene.cuh
new file mode 100644
index 0000000..31bb99f
--- /dev/null
+++ b/scene.cuh
@@ -0,0 +1,32 @@
+#ifndef SCENE_H
+#define SCENE_H
+
+#include "common.cuh"
+#include "sphere.cuh"
+#include "render_object.cuh"
+#include "include/helper_math.h"
+
+template <class T> class camera;
+
+//when we get animations with multiple scenes, we'll make this a virtual function
+//with array of DE objects and cam
+template <class T>
+class scene {
+  using T3 = typename vect_t3<T>::vect_t;
+  public:
+    //__device__ void render(uint8_t *image) { cam.render(); };
+    __device__ render_object<T> **get_objs() { return objs; }
+    __device__ render_object<T> **get_image() { return image; }
+  private:
+    camera<T> cam = camera<T>();
+    sphere<T> sp1 = sphere<T>(vect_create<T3>(0,0.4,-5));
+    sphere<T> sp2 = sphere<T>(vect_create<T3>(0,-0.4,-5));
+  protected:
+    //idk why I need to specify the size... why can't the compiler figure that out?
+    render_object<T> *objs[3] = {&sp1, &sp2, NULL};
+    uint8_t *image;
+};
+
+#include "camera.cuh"
+
+#endif
diff --git a/scene.h b/scene.h
new file mode 100644
index 0000000..05ce2af
--- /dev/null
+++ b/scene.h
@@ -0,0 +1,12 @@
+#ifndef SCENE_H
+#include SCENE_H
+#include "camera.cuh"
+#include "render_object.cuh"
+
+//for now we only neeed once scene, later we'll expand this to just be a virtual template
+template <class T> class scene {
+    
+
+}
+
+#endif
+\ No newline at end of file
diff --git a/sphere.cuh b/sphere.cuh
new file mode 100644
index 0000000..33c77f2
--- /dev/null
+++ b/sphere.cuh
@@ -0,0 +1,17 @@
+#ifndef SPHERE_H
+#define SPHERE_H
+#include "render_object.cuh"
+template<class T> class sphere : public render_object<T> {
+  using render_object<T>::render_object;
+  using T3 = typename vect_t3<T>::vect_t;
+  public:
+    __device__ T distance_estimator(T3 point) const;
+  private:
+    T r_ = 1;
+};
+
+template <class T> __device__ T sphere<T>::distance_estimator(T3 point) const {
+  return length(point) - r_;
+}
+
+#endif
author	Brett Weiland <brettsweiland@gmail.com>	2024-05-27 20:56:59 -0500
committer	Brett Weiland <brettsweiland@gmail.com>	2024-05-27 20:56:59 -0500
commit	093200a449ea38952de52012e324036c106e294b (patch)
tree	6030076eb894ca100d6aa0d6550ab56955e7fb2f
parent	7e9e2150619c05f9e8a74432e596b11f373518b9 (diff)