1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#include <stdint.h>
#include <stdio.h>
#include <stdint.h>
#include <cuda_runtime.h>
#include <string.h>
#include <raylib.h>
#include "kernel.cuh"
int main() {
//bluuuuugh i'll figure out occupancy later, this res are easy
//calculated manually for gtx1060 with 20 SM, 1024 threads/SM
const int res_x = 32 * 20;
const int res_y = 32 * 20;
const dim3 blockCount(20, 20);
const dim3 threadCount(32, 32);
uint8_t *image_d;
Color texture_data[res_x * res_y];
SetTargetFPS(10);
//see if GPU is connected (my egpu is finicky)
{
int temp_device;
cudaError_t err;
if((err = cudaGetDevice(&temp_device)) != cudaSuccess) {
printf("failed to get device!\nError: %s\n", cudaGetErrorString(err));
return(1);
}
}
SetTraceLogLevel(LOG_ERROR);
InitWindow(res_x, res_y, "cuda teseteroni");
//TODO could probably cut out
Image image = GenImageColor(res_x, res_y, BLUE);
Texture tex = LoadTextureFromImage(image);
while(!WindowShouldClose()) {
cudaError_t err;
//cuda stuff
cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
render<<<blockCount, threadCount>>>(image_d);
if((err = cudaGetLastError()) != cudaSuccess) {
printf("kernel did not launch! Error: %s\n", cudaGetErrorString(err));
}
cudaDeviceSynchronize();
cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);
BeginDrawing();
UpdateTexture(tex, texture_data);
DrawTexture(tex, 0, 0, WHITE);
DrawFPS(0, 0);
EndDrawing();
}
return 0;
}
|