boutta switch away from templates

This commit is contained in:
Brett Weiland 2024-05-27 20:56:59 -05:00
parent 7e9e215061
commit 093200a449
16 changed files with 261 additions and 26 deletions

View File

@ -54,3 +54,5 @@ next
print image
hexdump 0x55555561a140
quit
exit()
exit

15
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/build/indigo_worlds"
}
]
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

81
camera.cuh Normal file
View File

@ -0,0 +1,81 @@
#ifndef CAMERA_H
#define CAMERA_H
#include "entity.cuh"
#include "common.cuh"
#include <limits>
//template <class T> class scene;
//I am soooo high lol
template<class T>
class camera : public entity<T> {
using T3 = typename vect_t3<T>::vect_t;
using T2 = typename vect_t2<T>::vect_t;
public:
__device__ void render();
__device__ camera(scene<T> *pscene, const T fov = 1, const T3 pos = vect_create<T3>(0), const T3 rot = vect_create<T3>(0))
: pscene(pscene), fov(fov), entity<T>(pos, rot, vect_create<T3>(0)) {};
//__device__ ~camera();
private:
T fov;
T2 size;
int steps = 100;
T clip_min = .1;
T clip_max = 100;
scene<T> *pscene;
};
/**
//later we'll make scenes objects, rn im lazy (TODO)
template <class T> __device__ void camera<T>::render() {
//TODO *really* need to clean this up once you get further
//extra dimentions is extra math
//either generisize float3 or stop using this fucking template nonsense
const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx);
const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4;
const T3 uv = ((2 * vect_create<T3>(unnormalized_uv)) / vect_create<T3>(gridDim * blockDim)) - 1;
const T3 ray_direction = normalize(vect_create<T3>(uv.x, uv.y, 1));
T dist;
T total_dist = 0;
T3 ray;
int i;
//if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); }
T min_dist = clip_max;
render_object<T> **objs = pscene->get_objs();
for(i = 0; i < steps; i++) {
ray = this->pos_ + (total_dist * ray_direction);
//gyagh memory lookups
for(unsigned int oi = 0; objs[oi] != NULL; oi++) {
dist = object.distance_estimator(ray);
}
if((dist < clip_min)) {
//image[img_index] = 0xff;
break;
}
if((dist > clip_max)) {
//image[img_index+2] = 0xff;
break;
}
total_dist += dist;
}
//image[img_index] = 0x00;
//image[img_index+1] = 0x00;
//image[img_index+2] = p;
//image[img_index+3] = 0xff;
}
**/
#endif

43
common.cuh Normal file
View File

@ -0,0 +1,43 @@
#ifndef COMMON_H
#define COMMON_H
#include "include/helper_math.h"
/**
template <class T> class vect_t2;
template <class T> class vect_t3;
template <class T> class vect_t4;
//this feels so hacky... idk why people are so scared of metaprogramming
template <> class vect_t2<double> { public: using vect_t = double2; };
template <> class vect_t3<double> { public: using vect_t = double3; };
template <> class vect_t4<double> { public: using vect_t = double4; };
template <> class vect_t2<float> { public: using vect_t = float2; };
template <> class vect_t3<float> { public: using vect_t = float3; };
template <> class vect_t4<float> { public: using vect_t = float4; };
template <class T, class X> __device__ T vect_create(X x);
template <class T, class X, class Y, class Z> __device__ T vect_create(X x, Y y, Z z);
//I have no fucking clue if this is right, check me later ig
template <class float3, class X> __device__ inline float3 vect_create<float3>(X x) { return make_float3(x); }
template <class float3, class X, class Y, class Z> __device__ inline float3 vect_create<float3>(X x, Y y, Z z) { return make_float3(x, y, z); }
**/
/** I'm not sure weather float or double percision is nessesary. I was using
templates, but this changes the structure of my entire project in unwanted
ways, so I'm switching over to typedefs. **/
typedef float2 vect_t2;
typedef float3 vect_t3;
typedef float4 vect_t4;
typedef float T;
#define vect1to3(x) (make_float3(x))
#define make_vect(x, y, z) (make_float3(x, y, z))
#endif

31
entity.cuh Normal file
View File

@ -0,0 +1,31 @@
#ifndef ENTITY_H
#define ENTITY_H
#include "common.cuh"
//we could make a template to allow double percision, but start with float
//idk how nessesary it is yet so I'll go ahead.
//I know I needed it for zoomin far into the mandelbrot ig, so it's not
//out of the question
template<class T> class entity {
using T3 = typename vect_t3<T>::vect_t;
public:
__device__ entity() : pos_(vect_create<T3>(0)), rot_(vect_create<T3>(0)), scale_(vect_create<T3>(0)) {};
__device__ entity(const T3 pos, const T3 rot, const T3 scale) : pos_(pos), rot_(rot), scale_(scale) {};
__device__ entity(const float3 pos) : pos_(pos), rot_(vect_create<T3>(0)), scale_(vect_create<T3>(0)) {};
T3 get_pos() const { return pos_; }
T3 get_rot() const { return rot_; }
T3 get_scale() const { return scale_; }
__device__ void set_pos(const T3 pos) { pos_ = pos; }
__device__ void set_rot(const T3 rot) { rot_ = rot; }
__device__ void set_scale(const T3 scale) { scale_ = scale; }
protected:
T3 pos_;
T3 rot_;
T3 scale_;
};
#endif

View File

@ -1,18 +1,9 @@
#include <curand.h>
#include <stdint.h>
#include <stdio.h>
#include "include/helper_math.h"
#include "scene.cuh"
__global__ void test_image(uint8_t *image) {
int2 unnormalized_coordinates = make_int2(blockDim.x, blockDim.y) * make_int2(blockIdx.x, blockIdx.y) + make_int2(threadIdx.x, threadIdx.y);
int2 img_res = make_int2(blockDim.x, blockDim.y) * make_int2(gridDim.x, gridDim.y); //can move if needed
size_t img_index = ((unnormalized_coordinates.y * img_res.y) + unnormalized_coordinates.x) * 4;
//vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
image[img_index] = 0x00;
image[img_index+1] = 0x00;
image[img_index+2] = 0xff;
image[img_index+3] = 0xff;
__global__ void render(uint8_t *image) {
scene<float> scene;
//scene.render(image);
}

5
kernel.cuh Normal file
View File

@ -0,0 +1,5 @@
#ifndef KERNEL_H
#define KERNEL_H
#include <stdint.h>
__global__ void render(uint8_t *image);
#endif

14
main.cu
View File

@ -3,10 +3,9 @@
#include <cuda_runtime.h>
#include <string.h>
#include "kernel.cuh"
#include "raylib.h"
__global__ void test_image(uint8_t *image);
int main() {
//bluuuuugh i'll figure out occupancy later, this res are easy
//calculated manually for gtx1060 with 20 SM, 1024 threads/SM
@ -17,7 +16,8 @@ int main() {
uint8_t *image_d;
Color texture_data[res_x * res_y];
SetTargetFPS(10);
//see if GPU is connected (my egpu is finicky)
{
@ -36,16 +36,10 @@ int main() {
Image image = GenImageColor(res_x, res_y, BLUE);
Texture tex = LoadTextureFromImage(image);
//if(!IsWindowFullscreen()) ToggleFullscreen();
while(!WindowShouldClose()) {
//cuda stuff
cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
test_image<<<blockCount, threadCount>>>(image_d);
render<<<blockCount, threadCount>>>(image_d);
cudaDeviceSynchronize();
cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);

View File

@ -2,14 +2,14 @@ LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11
$CC = gcc
INC = -I /opt/cuda/include
make:
nvcc $(LIBS) $(INC) --debug -c main.cu -o build/main.o
nvcc $(LIBS) $(INC) -O0 --debug -c main.cu -o build/main.o
nvcc --device-debug --compile kernel.cu -o build/kernel.o
nvcc $(LIBS) -o build/indigo_worlds build/main.o build/kernel.o
nvcc $(LIBS) -O0 -o build/indigo_worlds build/main.o build/kernel.o
run:
build/indigo_worlds
clean:
rm -r build/*
rm -rf build/*

12
render_object.cuh Normal file
View File

@ -0,0 +1,12 @@
#ifndef RENDER_OBJECT_H
#define RENDER_OBJECT_H
#include "entity.cuh"
template<class T> class render_object : public entity<T> {
using T3 = typename vect_t3<T>::vect_t;
using entity<T>::entity;
public:
virtual __device__ T distance_estimator(T3 point) const = 0;
};
#endif

32
scene.cuh Normal file
View File

@ -0,0 +1,32 @@
#ifndef SCENE_H
#define SCENE_H
#include "common.cuh"
#include "sphere.cuh"
#include "render_object.cuh"
#include "include/helper_math.h"
template <class T> class camera;
//when we get animations with multiple scenes, we'll make this a virtual function
//with array of DE objects and cam
template <class T>
class scene {
using T3 = typename vect_t3<T>::vect_t;
public:
//__device__ void render(uint8_t *image) { cam.render(); };
__device__ render_object<T> **get_objs() { return objs; }
__device__ render_object<T> **get_image() { return image; }
private:
camera<T> cam = camera<T>();
sphere<T> sp1 = sphere<T>(vect_create<T3>(0,0.4,-5));
sphere<T> sp2 = sphere<T>(vect_create<T3>(0,-0.4,-5));
protected:
//idk why I need to specify the size... why can't the compiler figure that out?
render_object<T> *objs[3] = {&sp1, &sp2, NULL};
uint8_t *image;
};
#include "camera.cuh"
#endif

12
scene.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef SCENE_H
#include SCENE_H
#include "camera.cuh"
#include "render_object.cuh"
//for now we only neeed once scene, later we'll expand this to just be a virtual template
template <class T> class scene {
}
#endif

17
sphere.cuh Normal file
View File

@ -0,0 +1,17 @@
#ifndef SPHERE_H
#define SPHERE_H
#include "render_object.cuh"
template<class T> class sphere : public render_object<T> {
using render_object<T>::render_object;
using T3 = typename vect_t3<T>::vect_t;
public:
__device__ T distance_estimator(T3 point) const;
private:
T r_ = 1;
};
template <class T> __device__ T sphere<T>::distance_estimator(T3 point) const {
return length(point) - r_;
}
#endif