summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbuild/indigo_worldsbin0 -> 1030064 bytes
-rw-r--r--build/kernel.obin0 -> 17216 bytes
-rw-r--r--build/main.obin32680 -> 32792 bytes
-rw-r--r--kernel.cu18
-rw-r--r--main.cu51
5 files changed, 45 insertions, 24 deletions
diff --git a/build/indigo_worlds b/build/indigo_worlds
new file mode 100755
index 0000000..d99fd92
--- /dev/null
+++ b/build/indigo_worlds
Binary files differ
diff --git a/build/kernel.o b/build/kernel.o
new file mode 100644
index 0000000..549a102
--- /dev/null
+++ b/build/kernel.o
Binary files differ
diff --git a/build/main.o b/build/main.o
index 7add5ec..1102ea1 100644
--- a/build/main.o
+++ b/build/main.o
Binary files differ
diff --git a/kernel.cu b/kernel.cu
index c00056a..6bed13c 100644
--- a/kernel.cu
+++ b/kernel.cu
@@ -5,14 +5,14 @@
__global__ void test_image(uint8_t *image) {
int2 unnormalized_coordinates = make_int2(blockDim.x, blockDim.y) * make_int2(blockIdx.x, blockIdx.y) + make_int2(threadIdx.x, threadIdx.y);
- int2 img_res = make_int2(blockDim.x, blockDim.y) * make_int2(gridDim.x, gridDim.y);
- size_t img_index = (unnormalized_coordinates.y * img_res.y) + unnormalized_coordinates.x;
- //uint8_t pixel_val = (img_index / (img_res.x * img_res.y)) * 255;
- uint8_t pixel_val = 0xff;
- //if(img_index == 1) printf("%i\n", img_index);
- print(img_index)
- image[img_index] = 0xff;
- image[img_index+1] = pixel_val;
- image[img_index+2] = pixel_val;
+ int2 img_res = make_int2(blockDim.x, blockDim.y) * make_int2(gridDim.x, gridDim.y); //can move if needed
+ size_t img_index = ((unnormalized_coordinates.y * img_res.y) + unnormalized_coordinates.x) * 4;
+
+ //vec3 col = 0.5 + 0.5*cos(iTime+uv.xyx+vec3(0,2,4));
+
+
+ image[img_index] = 0x00;
+ image[img_index+1] = 0x00;
+ image[img_index+2] = 0xff;
image[img_index+3] = 0xff;
}
diff --git a/main.cu b/main.cu
index 2b1b180..4225b09 100644
--- a/main.cu
+++ b/main.cu
@@ -8,32 +8,53 @@
__global__ void test_image(uint8_t *image);
int main() {
- const int size_x = 100;
- const int size_y = 100;
+ //bluuuuugh i'll figure out occupancy later, this res are easy
+ //calculated manually for gtx1060 with 20 SM, 1024 threads/SM
+ const int res_x = 32 * 20;
+ const int res_y = 32 * 20;
+ const dim3 blockCount(20, 20);
+ const dim3 threadCount(32, 32);
+
uint8_t *image_d;
- Image image = GenImageColor(size_x, size_y, BLUE);
+ Color texture_data[res_x * res_y];
+
+
+ //see if GPU is connected (my egpu is finicky)
+ {
+ int temp_device;
+ cudaError_t err;
+ if((err = cudaGetDevice(&temp_device)) != cudaSuccess) {
+ printf("failed to get device!\nError: %s\n", cudaGetErrorString(err));
+ return(1);
+ }
+ }
+
SetTraceLogLevel(LOG_ERROR);
+ InitWindow(res_x, res_y, "cuda teseteroni");
+
+ //TODO could probably cut out
+ Image image = GenImageColor(res_x, res_y, BLUE);
+ Texture tex = LoadTextureFromImage(image);
+
+
- InitWindow(size_x, size_y, "cuda teseteroni");
- cudaMalloc((void **)&image_d, 100 * 100 * 4);
- test_image<<<1, dim3(3,3)>>>(image_d);
- cudaDeviceSynchronize();
- //for(;;);
//if(!IsWindowFullscreen()) ToggleFullscreen();
while(!WindowShouldClose()) {
- cudaMemcpy(image.data, (void **)image_d, 100 * 100 * 4, cudaMemcpyDeviceToHost);
- //memset(image.data, (int32_t)0x0000ff, 100 * 100 * 4);
+ //cuda stuff
+ cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
+ test_image<<<blockCount, threadCount>>>(image_d);
+ cudaDeviceSynchronize();
+ cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);
+
BeginDrawing();
- DrawTexture(LoadTextureFromImage(image), 0, 0, WHITE);
+ UpdateTexture(tex, texture_data);
+ DrawTexture(tex, 0, 0, WHITE);
+ DrawFPS(0, 0);
EndDrawing();
}
-
-
-
-
return 0;
}