main.cu


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

#include <stdio.h>
#include <stdint.h>
#include <cuda_runtime.h>
#include <string.h>

#include "raylib.h"

__global__ void test_image(uint8_t *image);

int main() {
  //bluuuuugh i'll figure out occupancy later, this res are easy
  //calculated manually for gtx1060 with 20 SM, 1024 threads/SM
  const int res_x = 32 * 20;
  const int res_y = 32 * 20;
  const dim3 blockCount(20, 20);
  const dim3 threadCount(32, 32);

  uint8_t *image_d;
  Color texture_data[res_x * res_y];
  

  //see if GPU is connected (my egpu is finicky)
  {
    int temp_device;
    cudaError_t err;
    if((err = cudaGetDevice(&temp_device)) != cudaSuccess) {
      printf("failed to get device!\nError: %s\n", cudaGetErrorString(err));
      return(1);
    }
  }
  
  SetTraceLogLevel(LOG_ERROR);
  InitWindow(res_x, res_y, "cuda teseteroni");
  
  //TODO could probably cut out
  Image image = GenImageColor(res_x, res_y, BLUE);
  Texture tex = LoadTextureFromImage(image);


  //if(!IsWindowFullscreen()) ToggleFullscreen();

  while(!WindowShouldClose()) {
    //cuda stuff
    cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color));
    test_image<<<blockCount, threadCount>>>(image_d); 
    cudaDeviceSynchronize();
    cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost);

    BeginDrawing();
    UpdateTexture(tex, texture_data);
    DrawTexture(tex, 0, 0, WHITE);
    DrawFPS(0, 0);
    EndDrawing();
  }

  return 0;
}