55 lines
1.4 KiB
Plaintext
55 lines
1.4 KiB
Plaintext
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <cuda_runtime.h>
|
|
#include <string.h>
|
|
|
|
#include "kernel.cuh"
|
|
#include "raylib.h"
|
|
|
|
int main() {
|
|
//bluuuuugh i'll figure out occupancy later, this res are easy
|
|
//calculated manually for gtx1060 with 20 SM, 1024 threads/SM
|
|
const int res_x = 32 * 20;
|
|
const int res_y = 32 * 20;
|
|
const dim3 blockCount(20, 20);
|
|
const dim3 threadCount(32, 32);
|
|
|
|
uint8_t *image_d;
|
|
Color texture_data[res_x * res_y];
|
|
SetTargetFPS(10);
|
|
|
|
|
|
//see if GPU is connected (my egpu is finicky)
|
|
{
|
|
int temp_device;
|
|
cudaError_t err;
|
|
if((err = cudaGetDevice(&temp_device)) != cudaSuccess) {
|
|
printf("failed to get device!\nError: %s\n", cudaGetErrorString(err));
|
|
return(1);
|
|
}
|
|
}
|
|
|
|
SetTraceLogLevel(LOG_ERROR);
|
|
InitWindow(res_x, res_y, "cuda teseteroni");
|
|
|
|
//TODO could probably cut out
|
|
Image image = GenImageColor(res_x, res_y, BLUE);
|
|
Texture tex = LoadTextureFromImage(image);
|
|
|
|
while(!WindowShouldClose()) {
|
|
//cuda stuff
|
|
cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
|
|
render<<<blockCount, threadCount>>>(image_d);
|
|
cudaDeviceSynchronize();
|
|
cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);
|
|
|
|
BeginDrawing();
|
|
UpdateTexture(tex, texture_data);
|
|
DrawTexture(tex, 0, 0, WHITE);
|
|
DrawFPS(0, 0);
|
|
EndDrawing();
|
|
}
|
|
|
|
return 0;
|
|
}
|