From b0dd97ee6bf8d5daa587da40ad941efac68152df Mon Sep 17 00:00:00 2001 From: brett weiland Date: Sat, 1 Jun 2024 01:36:18 -0500 Subject: got raycasting laid out --- .gitignore | 2 + 3d_fractals_cuda.prf | 1 + build/main.o | Bin 32912 -> 0 bytes camera.cuh | 81 --- common.cuh | 43 -- entity.cuh | 31 - include/helper_math.h | 1469 --------------------------------------------- kernel.cu | 9 - kernel.cuh | 5 - main.cu | 54 -- makefile | 15 - render_object.cuh | 12 - scene.cuh | 32 - scene.h | 12 - sphere.cuh | 17 - src/.vscode/launch.json | 19 + src/camera.cuh | 75 +++ src/common.cuh | 53 ++ src/entity.cuh | 30 + src/include/helper_math.h | 1469 +++++++++++++++++++++++++++++++++++++++++++++ src/kernel.cu | 10 + src/kernel.cuh | 5 + src/main.cu | 58 ++ src/makefile | 25 + src/render_object.cuh | 14 + src/scene.cu | 14 + src/scene.cuh | 31 + src/sphere.cu | 9 + src/sphere.cuh | 17 + 29 files changed, 1832 insertions(+), 1780 deletions(-) create mode 100644 .gitignore create mode 120000 3d_fractals_cuda.prf delete mode 100644 build/main.o delete mode 100644 camera.cuh delete mode 100644 common.cuh delete mode 100644 entity.cuh delete mode 100644 include/helper_math.h delete mode 100644 kernel.cu delete mode 100644 kernel.cuh delete mode 100644 main.cu delete mode 100644 makefile delete mode 100644 render_object.cuh delete mode 100644 scene.cuh delete mode 100644 scene.h delete mode 100644 sphere.cuh create mode 100644 src/.vscode/launch.json create mode 100644 src/camera.cuh create mode 100644 src/common.cuh create mode 100644 src/entity.cuh create mode 100644 src/include/helper_math.h create mode 100644 src/kernel.cu create mode 100644 src/kernel.cuh create mode 100644 src/main.cu create mode 100644 src/makefile create mode 100644 src/render_object.cuh create mode 100644 src/scene.cu create mode 100644 src/scene.cuh create mode 100644 src/sphere.cu create mode 100644 src/sphere.cuh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8765681 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +old_cuda/* +src/build/* diff --git a/3d_fractals_cuda.prf b/3d_fractals_cuda.prf new file mode 120000 index 0000000..8e05f78 --- /dev/null +++ b/3d_fractals_cuda.prf @@ -0,0 +1 @@ +/home/indigo/.unison/3d_fractals_cuda.prf \ No newline at end of file diff --git a/build/main.o b/build/main.o deleted file mode 100644 index 737de98..0000000 Binary files a/build/main.o and /dev/null differ diff --git a/camera.cuh b/camera.cuh deleted file mode 100644 index 61944fe..0000000 --- a/camera.cuh +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef CAMERA_H -#define CAMERA_H - -#include "entity.cuh" -#include "common.cuh" -#include - - -//template class scene; - -//I am soooo high lol -template -class camera : public entity { - using T3 = typename vect_t3::vect_t; - using T2 = typename vect_t2::vect_t; - public: - __device__ void render(); - __device__ camera(scene *pscene, const T fov = 1, const T3 pos = vect_create(0), const T3 rot = vect_create(0)) - : pscene(pscene), fov(fov), entity(pos, rot, vect_create(0)) {}; - - //__device__ ~camera(); - private: - T fov; - T2 size; - int steps = 100; - T clip_min = .1; - T clip_max = 100; - scene *pscene; -}; - -/** -//later we'll make scenes objects, rn im lazy (TODO) -template __device__ void camera::render() { - //TODO *really* need to clean this up once you get further - //extra dimentions is extra math - //either generisize float3 or stop using this fucking template nonsense - const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); - const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; - const T3 uv = ((2 * vect_create(unnormalized_uv)) / vect_create(gridDim * blockDim)) - 1; - const T3 ray_direction = normalize(vect_create(uv.x, uv.y, 1)); - T dist; - T total_dist = 0; - T3 ray; - int i; - - - //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); } - - T min_dist = clip_max; - - render_object **objs = pscene->get_objs(); - for(i = 0; i < steps; i++) { - ray = this->pos_ + (total_dist * ray_direction); - //gyagh memory lookups - for(unsigned int oi = 0; objs[oi] != NULL; oi++) { - dist = object.distance_estimator(ray); - } - if((dist < clip_min)) { - //image[img_index] = 0xff; - break; - } - if((dist > clip_max)) { - //image[img_index+2] = 0xff; - break; - } - total_dist += dist; - } - - - - - //image[img_index] = 0x00; - //image[img_index+1] = 0x00; - //image[img_index+2] = p; - //image[img_index+3] = 0xff; - -} -**/ - - -#endif diff --git a/common.cuh b/common.cuh deleted file mode 100644 index d5a9cdf..0000000 --- a/common.cuh +++ /dev/null @@ -1,43 +0,0 @@ -#ifndef COMMON_H -#define COMMON_H - -#include "include/helper_math.h" - -/** - -template class vect_t2; -template class vect_t3; -template class vect_t4; - -//this feels so hacky... idk why people are so scared of metaprogramming -template <> class vect_t2 { public: using vect_t = double2; }; -template <> class vect_t3 { public: using vect_t = double3; }; -template <> class vect_t4 { public: using vect_t = double4; }; - -template <> class vect_t2 { public: using vect_t = float2; }; -template <> class vect_t3 { public: using vect_t = float3; }; -template <> class vect_t4 { public: using vect_t = float4; }; - - -template __device__ T vect_create(X x); -template __device__ T vect_create(X x, Y y, Z z); - -//I have no fucking clue if this is right, check me later ig -template __device__ inline float3 vect_create(X x) { return make_float3(x); } - -template __device__ inline float3 vect_create(X x, Y y, Z z) { return make_float3(x, y, z); } -**/ - -/** I'm not sure weather float or double percision is nessesary. I was using -templates, but this changes the structure of my entire project in unwanted -ways, so I'm switching over to typedefs. **/ - -typedef float2 vect_t2; -typedef float3 vect_t3; -typedef float4 vect_t4; -typedef float T; - -#define vect1to3(x) (make_float3(x)) -#define make_vect(x, y, z) (make_float3(x, y, z)) - -#endif diff --git a/entity.cuh b/entity.cuh deleted file mode 100644 index 737ef8b..0000000 --- a/entity.cuh +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef ENTITY_H -#define ENTITY_H -#include "common.cuh" - -//we could make a template to allow double percision, but start with float -//idk how nessesary it is yet so I'll go ahead. -//I know I needed it for zoomin far into the mandelbrot ig, so it's not -//out of the question -template class entity { - using T3 = typename vect_t3::vect_t; - public: - __device__ entity() : pos_(vect_create(0)), rot_(vect_create(0)), scale_(vect_create(0)) {}; - __device__ entity(const T3 pos, const T3 rot, const T3 scale) : pos_(pos), rot_(rot), scale_(scale) {}; - __device__ entity(const float3 pos) : pos_(pos), rot_(vect_create(0)), scale_(vect_create(0)) {}; - - - T3 get_pos() const { return pos_; } - T3 get_rot() const { return rot_; } - T3 get_scale() const { return scale_; } - - __device__ void set_pos(const T3 pos) { pos_ = pos; } - __device__ void set_rot(const T3 rot) { rot_ = rot; } - __device__ void set_scale(const T3 scale) { scale_ = scale; } - - protected: - T3 pos_; - T3 rot_; - T3 scale_; - -}; -#endif diff --git a/include/helper_math.h b/include/helper_math.h deleted file mode 100644 index 6d2f63a..0000000 --- a/include/helper_math.h +++ /dev/null @@ -1,1469 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * This file implements common mathematical operations on vector types - * (float3, float4 etc.) since these are not provided as standard by CUDA. - * - * The syntax is modeled on the Cg standard library. - * - * This is part of the Helper library includes - * - * Thanks to Linh Hah for additions and fixes. - */ - -#ifndef HELPER_MATH_H -#define HELPER_MATH_H - -#include "cuda_runtime.h" - -typedef unsigned int uint; -typedef unsigned short ushort; - -#ifndef EXIT_WAIVED -#define EXIT_WAIVED 2 -#endif - -#ifndef __CUDACC__ -#include - -//////////////////////////////////////////////////////////////////////////////// -// host implementations of CUDA functions -//////////////////////////////////////////////////////////////////////////////// - -inline float fminf(float a, float b) -{ - return a < b ? a : b; -} - -inline float fmaxf(float a, float b) -{ - return a > b ? a : b; -} - -inline int max(int a, int b) -{ - return a > b ? a : b; -} - -inline int min(int a, int b) -{ - return a < b ? a : b; -} - -inline float rsqrtf(float x) -{ - return 1.0f / sqrtf(x); -} -#endif - -//////////////////////////////////////////////////////////////////////////////// -// constructors -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 make_float2(float s) -{ - return make_float2(s, s); -} -inline __host__ __device__ float2 make_float2(float3 a) -{ - return make_float2(a.x, a.y); -} -inline __host__ __device__ float2 make_float2(int2 a) -{ - return make_float2(float(a.x), float(a.y)); -} -inline __host__ __device__ float2 make_float2(uint2 a) -{ - return make_float2(float(a.x), float(a.y)); -} - -inline __host__ __device__ int2 make_int2(int s) -{ - return make_int2(s, s); -} -inline __host__ __device__ int2 make_int2(int3 a) -{ - return make_int2(a.x, a.y); -} -inline __host__ __device__ int2 make_int2(uint2 a) -{ - return make_int2(int(a.x), int(a.y)); -} -inline __host__ __device__ int2 make_int2(float2 a) -{ - return make_int2(int(a.x), int(a.y)); -} - -inline __host__ __device__ uint2 make_uint2(uint s) -{ - return make_uint2(s, s); -} -inline __host__ __device__ uint2 make_uint2(uint3 a) -{ - return make_uint2(a.x, a.y); -} -inline __host__ __device__ uint2 make_uint2(int2 a) -{ - return make_uint2(uint(a.x), uint(a.y)); -} - -inline __host__ __device__ float3 make_float3(float s) -{ - return make_float3(s, s, s); -} -inline __host__ __device__ float3 make_float3(float2 a) -{ - return make_float3(a.x, a.y, 0.0f); -} -inline __host__ __device__ float3 make_float3(float2 a, float s) -{ - return make_float3(a.x, a.y, s); -} -inline __host__ __device__ float3 make_float3(float4 a) -{ - return make_float3(a.x, a.y, a.z); -} -inline __host__ __device__ float3 make_float3(int3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} -inline __host__ __device__ float3 make_float3(uint3 a) -{ - return make_float3(float(a.x), float(a.y), float(a.z)); -} - -inline __host__ __device__ int3 make_int3(int s) -{ - return make_int3(s, s, s); -} -inline __host__ __device__ int3 make_int3(int2 a) -{ - return make_int3(a.x, a.y, 0); -} -inline __host__ __device__ int3 make_int3(int2 a, int s) -{ - return make_int3(a.x, a.y, s); -} -inline __host__ __device__ int3 make_int3(uint3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} -inline __host__ __device__ int3 make_int3(float3 a) -{ - return make_int3(int(a.x), int(a.y), int(a.z)); -} - -inline __host__ __device__ uint3 make_uint3(uint s) -{ - return make_uint3(s, s, s); -} -inline __host__ __device__ uint3 make_uint3(uint2 a) -{ - return make_uint3(a.x, a.y, 0); -} -inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) -{ - return make_uint3(a.x, a.y, s); -} -inline __host__ __device__ uint3 make_uint3(uint4 a) -{ - return make_uint3(a.x, a.y, a.z); -} -inline __host__ __device__ uint3 make_uint3(int3 a) -{ - return make_uint3(uint(a.x), uint(a.y), uint(a.z)); -} - -inline __host__ __device__ float4 make_float4(float s) -{ - return make_float4(s, s, s, s); -} -inline __host__ __device__ float4 make_float4(float3 a) -{ - return make_float4(a.x, a.y, a.z, 0.0f); -} -inline __host__ __device__ float4 make_float4(float3 a, float w) -{ - return make_float4(a.x, a.y, a.z, w); -} -inline __host__ __device__ float4 make_float4(int4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} -inline __host__ __device__ float4 make_float4(uint4 a) -{ - return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); -} - -inline __host__ __device__ int4 make_int4(int s) -{ - return make_int4(s, s, s, s); -} -inline __host__ __device__ int4 make_int4(int3 a) -{ - return make_int4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ int4 make_int4(int3 a, int w) -{ - return make_int4(a.x, a.y, a.z, w); -} -inline __host__ __device__ int4 make_int4(uint4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} -inline __host__ __device__ int4 make_int4(float4 a) -{ - return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); -} - - -inline __host__ __device__ uint4 make_uint4(uint s) -{ - return make_uint4(s, s, s, s); -} -inline __host__ __device__ uint4 make_uint4(uint3 a) -{ - return make_uint4(a.x, a.y, a.z, 0); -} -inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) -{ - return make_uint4(a.x, a.y, a.z, w); -} -inline __host__ __device__ uint4 make_uint4(int4 a) -{ - return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// negate -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 &a) -{ - return make_float2(-a.x, -a.y); -} -inline __host__ __device__ int2 operator-(int2 &a) -{ - return make_int2(-a.x, -a.y); -} -inline __host__ __device__ float3 operator-(float3 &a) -{ - return make_float3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ int3 operator-(int3 &a) -{ - return make_int3(-a.x, -a.y, -a.z); -} -inline __host__ __device__ float4 operator-(float4 &a) -{ - return make_float4(-a.x, -a.y, -a.z, -a.w); -} -inline __host__ __device__ int4 operator-(int4 &a) -{ - return make_int4(-a.x, -a.y, -a.z, -a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// addition -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator+(float2 a, float2 b) -{ - return make_float2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(float2 &a, float2 b) -{ - a.x += b.x; - a.y += b.y; -} -inline __host__ __device__ float2 operator+(float2 a, float b) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ float2 operator+(float b, float2 a) -{ - return make_float2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(float2 &a, float b) -{ - a.x += b; - a.y += b; -} - -inline __host__ __device__ int2 operator+(int2 a, int2 b) -{ - return make_int2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(int2 &a, int2 b) -{ - a.x += b.x; - a.y += b.y; -} -inline __host__ __device__ int2 operator+(int2 a, int b) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ int2 operator+(int b, int2 a) -{ - return make_int2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(int2 &a, int b) -{ - a.x += b; - a.y += b; -} - -inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) -{ - return make_uint2(a.x + b.x, a.y + b.y); -} -inline __host__ __device__ void operator+=(uint2 &a, uint2 b) -{ - a.x += b.x; - a.y += b.y; -} -inline __host__ __device__ uint2 operator+(uint2 a, uint b) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ uint2 operator+(uint b, uint2 a) -{ - return make_uint2(a.x + b, a.y + b); -} -inline __host__ __device__ void operator+=(uint2 &a, uint b) -{ - a.x += b; - a.y += b; -} - - -inline __host__ __device__ float3 operator+(float3 a, float3 b) -{ - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(float3 &a, float3 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; -} -inline __host__ __device__ float3 operator+(float3 a, float b) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(float3 &a, float b) -{ - a.x += b; - a.y += b; - a.z += b; -} - -inline __host__ __device__ int3 operator+(int3 a, int3 b) -{ - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(int3 &a, int3 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; -} -inline __host__ __device__ int3 operator+(int3 a, int b) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(int3 &a, int b) -{ - a.x += b; - a.y += b; - a.z += b; -} - -inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) -{ - return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); -} -inline __host__ __device__ void operator+=(uint3 &a, uint3 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; -} -inline __host__ __device__ uint3 operator+(uint3 a, uint b) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ void operator+=(uint3 &a, uint b) -{ - a.x += b; - a.y += b; - a.z += b; -} - -inline __host__ __device__ int3 operator+(int b, int3 a) -{ - return make_int3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ uint3 operator+(uint b, uint3 a) -{ - return make_uint3(a.x + b, a.y + b, a.z + b); -} -inline __host__ __device__ float3 operator+(float b, float3 a) -{ - return make_float3(a.x + b, a.y + b, a.z + b); -} - -inline __host__ __device__ float4 operator+(float4 a, float4 b) -{ - return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(float4 &a, float4 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; - a.w += b.w; -} -inline __host__ __device__ float4 operator+(float4 a, float b) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ float4 operator+(float b, float4 a) -{ - return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(float4 &a, float b) -{ - a.x += b; - a.y += b; - a.z += b; - a.w += b; -} - -inline __host__ __device__ int4 operator+(int4 a, int4 b) -{ - return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(int4 &a, int4 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; - a.w += b.w; -} -inline __host__ __device__ int4 operator+(int4 a, int b) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ int4 operator+(int b, int4 a) -{ - return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(int4 &a, int b) -{ - a.x += b; - a.y += b; - a.z += b; - a.w += b; -} - -inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) -{ - return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); -} -inline __host__ __device__ void operator+=(uint4 &a, uint4 b) -{ - a.x += b.x; - a.y += b.y; - a.z += b.z; - a.w += b.w; -} -inline __host__ __device__ uint4 operator+(uint4 a, uint b) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ uint4 operator+(uint b, uint4 a) -{ - return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); -} -inline __host__ __device__ void operator+=(uint4 &a, uint b) -{ - a.x += b; - a.y += b; - a.z += b; - a.w += b; -} - -//////////////////////////////////////////////////////////////////////////////// -// subtract -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator-(float2 a, float2 b) -{ - return make_float2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(float2 &a, float2 b) -{ - a.x -= b.x; - a.y -= b.y; -} -inline __host__ __device__ float2 operator-(float2 a, float b) -{ - return make_float2(a.x - b, a.y - b); -} -inline __host__ __device__ float2 operator-(float b, float2 a) -{ - return make_float2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(float2 &a, float b) -{ - a.x -= b; - a.y -= b; -} - -inline __host__ __device__ int2 operator-(int2 a, int2 b) -{ - return make_int2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(int2 &a, int2 b) -{ - a.x -= b.x; - a.y -= b.y; -} -inline __host__ __device__ int2 operator-(int2 a, int b) -{ - return make_int2(a.x - b, a.y - b); -} -inline __host__ __device__ int2 operator-(int b, int2 a) -{ - return make_int2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(int2 &a, int b) -{ - a.x -= b; - a.y -= b; -} - -inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) -{ - return make_uint2(a.x - b.x, a.y - b.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint2 b) -{ - a.x -= b.x; - a.y -= b.y; -} -inline __host__ __device__ uint2 operator-(uint2 a, uint b) -{ - return make_uint2(a.x - b, a.y - b); -} -inline __host__ __device__ uint2 operator-(uint b, uint2 a) -{ - return make_uint2(b - a.x, b - a.y); -} -inline __host__ __device__ void operator-=(uint2 &a, uint b) -{ - a.x -= b; - a.y -= b; -} - -inline __host__ __device__ float3 operator-(float3 a, float3 b) -{ - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(float3 &a, float3 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; -} -inline __host__ __device__ float3 operator-(float3 a, float b) -{ - return make_float3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ float3 operator-(float b, float3 a) -{ - return make_float3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(float3 &a, float b) -{ - a.x -= b; - a.y -= b; - a.z -= b; -} - -inline __host__ __device__ int3 operator-(int3 a, int3 b) -{ - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(int3 &a, int3 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; -} -inline __host__ __device__ int3 operator-(int3 a, int b) -{ - return make_int3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ int3 operator-(int b, int3 a) -{ - return make_int3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(int3 &a, int b) -{ - a.x -= b; - a.y -= b; - a.z -= b; -} - -inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) -{ - return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint3 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; -} -inline __host__ __device__ uint3 operator-(uint3 a, uint b) -{ - return make_uint3(a.x - b, a.y - b, a.z - b); -} -inline __host__ __device__ uint3 operator-(uint b, uint3 a) -{ - return make_uint3(b - a.x, b - a.y, b - a.z); -} -inline __host__ __device__ void operator-=(uint3 &a, uint b) -{ - a.x -= b; - a.y -= b; - a.z -= b; -} - -inline __host__ __device__ float4 operator-(float4 a, float4 b) -{ - return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(float4 &a, float4 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; - a.w -= b.w; -} -inline __host__ __device__ float4 operator-(float4 a, float b) -{ - return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ void operator-=(float4 &a, float b) -{ - a.x -= b; - a.y -= b; - a.z -= b; - a.w -= b; -} - -inline __host__ __device__ int4 operator-(int4 a, int4 b) -{ - return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(int4 &a, int4 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; - a.w -= b.w; -} -inline __host__ __device__ int4 operator-(int4 a, int b) -{ - return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ int4 operator-(int b, int4 a) -{ - return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(int4 &a, int b) -{ - a.x -= b; - a.y -= b; - a.z -= b; - a.w -= b; -} - -inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) -{ - return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint4 b) -{ - a.x -= b.x; - a.y -= b.y; - a.z -= b.z; - a.w -= b.w; -} -inline __host__ __device__ uint4 operator-(uint4 a, uint b) -{ - return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); -} -inline __host__ __device__ uint4 operator-(uint b, uint4 a) -{ - return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); -} -inline __host__ __device__ void operator-=(uint4 &a, uint b) -{ - a.x -= b; - a.y -= b; - a.z -= b; - a.w -= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// multiply -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator*(float2 a, float2 b) -{ - return make_float2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(float2 &a, float2 b) -{ - a.x *= b.x; - a.y *= b.y; -} -inline __host__ __device__ float2 operator*(float2 a, float b) -{ - return make_float2(a.x * b, a.y * b); -} -inline __host__ __device__ float2 operator*(float b, float2 a) -{ - return make_float2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(float2 &a, float b) -{ - a.x *= b; - a.y *= b; -} - -inline __host__ __device__ int2 operator*(int2 a, int2 b) -{ - return make_int2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(int2 &a, int2 b) -{ - a.x *= b.x; - a.y *= b.y; -} -inline __host__ __device__ int2 operator*(int2 a, int b) -{ - return make_int2(a.x * b, a.y * b); -} -inline __host__ __device__ int2 operator*(int b, int2 a) -{ - return make_int2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(int2 &a, int b) -{ - a.x *= b; - a.y *= b; -} - -inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) -{ - return make_uint2(a.x * b.x, a.y * b.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint2 b) -{ - a.x *= b.x; - a.y *= b.y; -} -inline __host__ __device__ uint2 operator*(uint2 a, uint b) -{ - return make_uint2(a.x * b, a.y * b); -} -inline __host__ __device__ uint2 operator*(uint b, uint2 a) -{ - return make_uint2(b * a.x, b * a.y); -} -inline __host__ __device__ void operator*=(uint2 &a, uint b) -{ - a.x *= b; - a.y *= b; -} - -inline __host__ __device__ float3 operator*(float3 a, float3 b) -{ - return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(float3 &a, float3 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; -} -inline __host__ __device__ float3 operator*(float3 a, float b) -{ - return make_float3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ float3 operator*(float b, float3 a) -{ - return make_float3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(float3 &a, float b) -{ - a.x *= b; - a.y *= b; - a.z *= b; -} - -inline __host__ __device__ int3 operator*(int3 a, int3 b) -{ - return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(int3 &a, int3 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; -} -inline __host__ __device__ int3 operator*(int3 a, int b) -{ - return make_int3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ int3 operator*(int b, int3 a) -{ - return make_int3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(int3 &a, int b) -{ - a.x *= b; - a.y *= b; - a.z *= b; -} - -inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) -{ - return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint3 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; -} -inline __host__ __device__ uint3 operator*(uint3 a, uint b) -{ - return make_uint3(a.x * b, a.y * b, a.z * b); -} -inline __host__ __device__ uint3 operator*(uint b, uint3 a) -{ - return make_uint3(b * a.x, b * a.y, b * a.z); -} -inline __host__ __device__ void operator*=(uint3 &a, uint b) -{ - a.x *= b; - a.y *= b; - a.z *= b; -} - -inline __host__ __device__ float4 operator*(float4 a, float4 b) -{ - return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(float4 &a, float4 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - a.w *= b.w; -} -inline __host__ __device__ float4 operator*(float4 a, float b) -{ - return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ float4 operator*(float b, float4 a) -{ - return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(float4 &a, float b) -{ - a.x *= b; - a.y *= b; - a.z *= b; - a.w *= b; -} - -inline __host__ __device__ int4 operator*(int4 a, int4 b) -{ - return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(int4 &a, int4 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - a.w *= b.w; -} -inline __host__ __device__ int4 operator*(int4 a, int b) -{ - return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ int4 operator*(int b, int4 a) -{ - return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(int4 &a, int b) -{ - a.x *= b; - a.y *= b; - a.z *= b; - a.w *= b; -} - -inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) -{ - return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint4 b) -{ - a.x *= b.x; - a.y *= b.y; - a.z *= b.z; - a.w *= b.w; -} -inline __host__ __device__ uint4 operator*(uint4 a, uint b) -{ - return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); -} -inline __host__ __device__ uint4 operator*(uint b, uint4 a) -{ - return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); -} -inline __host__ __device__ void operator*=(uint4 &a, uint b) -{ - a.x *= b; - a.y *= b; - a.z *= b; - a.w *= b; -} - -//////////////////////////////////////////////////////////////////////////////// -// divide -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 operator/(float2 a, float2 b) -{ - return make_float2(a.x / b.x, a.y / b.y); -} -inline __host__ __device__ void operator/=(float2 &a, float2 b) -{ - a.x /= b.x; - a.y /= b.y; -} -inline __host__ __device__ float2 operator/(float2 a, float b) -{ - return make_float2(a.x / b, a.y / b); -} -inline __host__ __device__ void operator/=(float2 &a, float b) -{ - a.x /= b; - a.y /= b; -} -inline __host__ __device__ float2 operator/(float b, float2 a) -{ - return make_float2(b / a.x, b / a.y); -} - -inline __host__ __device__ float3 operator/(float3 a, float3 b) -{ - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -} -inline __host__ __device__ void operator/=(float3 &a, float3 b) -{ - a.x /= b.x; - a.y /= b.y; - a.z /= b.z; -} -inline __host__ __device__ float3 operator/(float3 a, float b) -{ - return make_float3(a.x / b, a.y / b, a.z / b); -} -inline __host__ __device__ void operator/=(float3 &a, float b) -{ - a.x /= b; - a.y /= b; - a.z /= b; -} -inline __host__ __device__ float3 operator/(float b, float3 a) -{ - return make_float3(b / a.x, b / a.y, b / a.z); -} - -inline __host__ __device__ float4 operator/(float4 a, float4 b) -{ - return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); -} -inline __host__ __device__ void operator/=(float4 &a, float4 b) -{ - a.x /= b.x; - a.y /= b.y; - a.z /= b.z; - a.w /= b.w; -} -inline __host__ __device__ float4 operator/(float4 a, float b) -{ - return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); -} -inline __host__ __device__ void operator/=(float4 &a, float b) -{ - a.x /= b; - a.y /= b; - a.z /= b; - a.w /= b; -} -inline __host__ __device__ float4 operator/(float b, float4 a) -{ - return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); -} - -//////////////////////////////////////////////////////////////////////////////// -// min -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fminf(float2 a, float2 b) -{ - return make_float2(fminf(a.x,b.x), fminf(a.y,b.y)); -} -inline __host__ __device__ float3 fminf(float3 a, float3 b) -{ - return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); -} -inline __host__ __device__ float4 fminf(float4 a, float4 b) -{ - return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); -} - -inline __host__ __device__ int2 min(int2 a, int2 b) -{ - return make_int2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ int3 min(int3 a, int3 b) -{ - return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ int4 min(int4 a, int4 b) -{ - return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -inline __host__ __device__ uint2 min(uint2 a, uint2 b) -{ - return make_uint2(min(a.x,b.x), min(a.y,b.y)); -} -inline __host__ __device__ uint3 min(uint3 a, uint3 b) -{ - return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); -} -inline __host__ __device__ uint4 min(uint4 a, uint4 b) -{ - return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// max -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmaxf(float2 a, float2 b) -{ - return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y)); -} -inline __host__ __device__ float3 fmaxf(float3 a, float3 b) -{ - return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); -} -inline __host__ __device__ float4 fmaxf(float4 a, float4 b) -{ - return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); -} - -inline __host__ __device__ int2 max(int2 a, int2 b) -{ - return make_int2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ int3 max(int3 a, int3 b) -{ - return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ int4 max(int4 a, int4 b) -{ - return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -inline __host__ __device__ uint2 max(uint2 a, uint2 b) -{ - return make_uint2(max(a.x,b.x), max(a.y,b.y)); -} -inline __host__ __device__ uint3 max(uint3 a, uint3 b) -{ - return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); -} -inline __host__ __device__ uint4 max(uint4 a, uint4 b) -{ - return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// lerp -// - linear interpolation between a and b, based on value t in [0, 1] range -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float lerp(float a, float b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) -{ - return a + t*(b-a); -} -inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) -{ - return a + t*(b-a); -} - -//////////////////////////////////////////////////////////////////////////////// -// clamp -// - clamp the value v to be in the range [a, b] -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float clamp(float f, float a, float b) -{ - return fmaxf(a, fminf(f, b)); -} -inline __device__ __host__ int clamp(int f, int a, int b) -{ - return max(a, min(f, b)); -} -inline __device__ __host__ uint clamp(uint f, uint a, uint b) -{ - return max(a, min(f, b)); -} - -inline __device__ __host__ float2 clamp(float2 v, float a, float b) -{ - return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) -{ - return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ float3 clamp(float3 v, float a, float b) -{ - return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) -{ - return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ float4 clamp(float4 v, float a, float b) -{ - return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) -{ - return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ int2 clamp(int2 v, int a, int b) -{ - return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) -{ - return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ int3 clamp(int3 v, int a, int b) -{ - return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) -{ - return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ int4 clamp(int4 v, int a, int b) -{ - return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) -{ - return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) -{ - return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); -} -inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) -{ - return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) -{ - return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); -} -inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) -{ - return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) -{ - return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); -} -inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) -{ - return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// dot product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float dot(float2 a, float2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ float dot(float3 a, float3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ float dot(float4 a, float4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ int dot(int2 a, int2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ int dot(int3 a, int3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ int dot(int4 a, int4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline __host__ __device__ uint dot(uint2 a, uint2 b) -{ - return a.x * b.x + a.y * b.y; -} -inline __host__ __device__ uint dot(uint3 a, uint3 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} -inline __host__ __device__ uint dot(uint4 a, uint4 b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -//////////////////////////////////////////////////////////////////////////////// -// length -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float length(float2 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float3 v) -{ - return sqrtf(dot(v, v)); -} -inline __host__ __device__ float length(float4 v) -{ - return sqrtf(dot(v, v)); -} - -//////////////////////////////////////////////////////////////////////////////// -// normalize -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 normalize(float2 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float3 normalize(float3 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} -inline __host__ __device__ float4 normalize(float4 v) -{ - float invLen = rsqrtf(dot(v, v)); - return v * invLen; -} - -//////////////////////////////////////////////////////////////////////////////// -// floor -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 floorf(float2 v) -{ - return make_float2(floorf(v.x), floorf(v.y)); -} -inline __host__ __device__ float3 floorf(float3 v) -{ - return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); -} -inline __host__ __device__ float4 floorf(float4 v) -{ - return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// frac - returns the fractional portion of a scalar or each vector component -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float fracf(float v) -{ - return v - floorf(v); -} -inline __host__ __device__ float2 fracf(float2 v) -{ - return make_float2(fracf(v.x), fracf(v.y)); -} -inline __host__ __device__ float3 fracf(float3 v) -{ - return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); -} -inline __host__ __device__ float4 fracf(float4 v) -{ - return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// fmod -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fmodf(float2 a, float2 b) -{ - return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} -inline __host__ __device__ float3 fmodf(float3 a, float3 b) -{ - return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); -} -inline __host__ __device__ float4 fmodf(float4 a, float4 b) -{ - return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// absolute value -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float2 fabs(float2 v) -{ - return make_float2(fabs(v.x), fabs(v.y)); -} -inline __host__ __device__ float3 fabs(float3 v) -{ - return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); -} -inline __host__ __device__ float4 fabs(float4 v) -{ - return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); -} - -inline __host__ __device__ int2 abs(int2 v) -{ - return make_int2(abs(v.x), abs(v.y)); -} -inline __host__ __device__ int3 abs(int3 v) -{ - return make_int3(abs(v.x), abs(v.y), abs(v.z)); -} -inline __host__ __device__ int4 abs(int4 v) -{ - return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); -} - -//////////////////////////////////////////////////////////////////////////////// -// reflect -// - returns reflection of incident ray I around surface normal N -// - N should be normalized, reflected vector's length is equal to length of I -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 reflect(float3 i, float3 n) -{ - return i - 2.0f * n * dot(n,i); -} - -//////////////////////////////////////////////////////////////////////////////// -// cross product -//////////////////////////////////////////////////////////////////////////////// - -inline __host__ __device__ float3 cross(float3 a, float3 b) -{ - return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); -} - -//////////////////////////////////////////////////////////////////////////////// -// smoothstep -// - returns 0 if x < a -// - returns 1 if x > b -// - otherwise returns smooth interpolation between 0 and 1 based on x -//////////////////////////////////////////////////////////////////////////////// - -inline __device__ __host__ float smoothstep(float a, float b, float x) -{ - float y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(3.0f - (2.0f*y))); -} -inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) -{ - float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y))); -} -inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) -{ - float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y))); -} -inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) -{ - float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); - return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y))); -} - -#endif diff --git a/kernel.cu b/kernel.cu deleted file mode 100644 index c6895dc..0000000 --- a/kernel.cu +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include -#include -#include "scene.cuh" - -__global__ void render(uint8_t *image) { - scene scene; - //scene.render(image); -} diff --git a/kernel.cuh b/kernel.cuh deleted file mode 100644 index 2c5256e..0000000 --- a/kernel.cuh +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef KERNEL_H -#define KERNEL_H -#include -__global__ void render(uint8_t *image); -#endif diff --git a/main.cu b/main.cu deleted file mode 100644 index 4aa6cbb..0000000 --- a/main.cu +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include - -#include "kernel.cuh" -#include "raylib.h" - -int main() { - //bluuuuugh i'll figure out occupancy later, this res are easy - //calculated manually for gtx1060 with 20 SM, 1024 threads/SM - const int res_x = 32 * 20; - const int res_y = 32 * 20; - const dim3 blockCount(20, 20); - const dim3 threadCount(32, 32); - - uint8_t *image_d; - Color texture_data[res_x * res_y]; - SetTargetFPS(10); - - - //see if GPU is connected (my egpu is finicky) - { - int temp_device; - cudaError_t err; - if((err = cudaGetDevice(&temp_device)) != cudaSuccess) { - printf("failed to get device!\nError: %s\n", cudaGetErrorString(err)); - return(1); - } - } - - SetTraceLogLevel(LOG_ERROR); - InitWindow(res_x, res_y, "cuda teseteroni"); - - //TODO could probably cut out - Image image = GenImageColor(res_x, res_y, BLUE); - Texture tex = LoadTextureFromImage(image); - - while(!WindowShouldClose()) { - //cuda stuff - cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color)); - render<<>>(image_d); - cudaDeviceSynchronize(); - cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost); - - BeginDrawing(); - UpdateTexture(tex, texture_data); - DrawTexture(tex, 0, 0, WHITE); - DrawFPS(0, 0); - EndDrawing(); - } - - return 0; -} diff --git a/makefile b/makefile deleted file mode 100644 index 7f6bddb..0000000 --- a/makefile +++ /dev/null @@ -1,15 +0,0 @@ -LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 -$CC = gcc -INC = -I /opt/cuda/include -make: - nvcc $(LIBS) $(INC) -O0 --debug -c main.cu -o build/main.o - nvcc --device-debug --compile kernel.cu -o build/kernel.o - nvcc $(LIBS) -O0 -o build/indigo_worlds build/main.o build/kernel.o - -run: - build/indigo_worlds - -clean: - rm -rf build/* - - diff --git a/render_object.cuh b/render_object.cuh deleted file mode 100644 index 0063d94..0000000 --- a/render_object.cuh +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef RENDER_OBJECT_H -#define RENDER_OBJECT_H -#include "entity.cuh" - -template class render_object : public entity { - using T3 = typename vect_t3::vect_t; - using entity::entity; - public: - virtual __device__ T distance_estimator(T3 point) const = 0; -}; - -#endif diff --git a/scene.cuh b/scene.cuh deleted file mode 100644 index 31bb99f..0000000 --- a/scene.cuh +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef SCENE_H -#define SCENE_H - -#include "common.cuh" -#include "sphere.cuh" -#include "render_object.cuh" -#include "include/helper_math.h" - -template class camera; - -//when we get animations with multiple scenes, we'll make this a virtual function -//with array of DE objects and cam -template -class scene { - using T3 = typename vect_t3::vect_t; - public: - //__device__ void render(uint8_t *image) { cam.render(); }; - __device__ render_object **get_objs() { return objs; } - __device__ render_object **get_image() { return image; } - private: - camera cam = camera(); - sphere sp1 = sphere(vect_create(0,0.4,-5)); - sphere sp2 = sphere(vect_create(0,-0.4,-5)); - protected: - //idk why I need to specify the size... why can't the compiler figure that out? - render_object *objs[3] = {&sp1, &sp2, NULL}; - uint8_t *image; -}; - -#include "camera.cuh" - -#endif diff --git a/scene.h b/scene.h deleted file mode 100644 index 05ce2af..0000000 --- a/scene.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef SCENE_H -#include SCENE_H -#include "camera.cuh" -#include "render_object.cuh" - -//for now we only neeed once scene, later we'll expand this to just be a virtual template -template class scene { - - -} - -#endif \ No newline at end of file diff --git a/sphere.cuh b/sphere.cuh deleted file mode 100644 index 33c77f2..0000000 --- a/sphere.cuh +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef SPHERE_H -#define SPHERE_H -#include "render_object.cuh" -template class sphere : public render_object { - using render_object::render_object; - using T3 = typename vect_t3::vect_t; - public: - __device__ T distance_estimator(T3 point) const; - private: - T r_ = 1; -}; - -template __device__ T sphere::distance_estimator(T3 point) const { - return length(point) - r_; -} - -#endif diff --git a/src/.vscode/launch.json b/src/.vscode/launch.json new file mode 100644 index 0000000..33752cd --- /dev/null +++ b/src/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "program": "${workspaceFolder}/build/indigo_worlds" + }, + { + "name": "CUDA C++: Attach", + "type": "cuda-gdb", + "request": "attach" + } + ] +} \ No newline at end of file diff --git a/src/camera.cuh b/src/camera.cuh new file mode 100644 index 0000000..4d04fcd --- /dev/null +++ b/src/camera.cuh @@ -0,0 +1,75 @@ +#ifndef CAMERA_H +#define CAMERA_H + +#include "entity.cuh" +#include "common.cuh" +#include + + +//template class scene; + +//I am soooo high lol +class camera : public entity { + public: + __device__ camera(scene *pscene, const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) + : pscene(pscene), fov(fov), entity(pos, rot, make_vect3(0)) {}; + private: + T fov; + float2 size; + int steps = 100; + T clip_min = .1; + T clip_max = 100; + //scene *pscene; +}; + +/** +//later we'll make scenes objects, rn im lazy (TODO) +template __device__ void camera::render() { + //TODO *really* need to clean this up once you get further + //extra dimentions is extra math + //either generisize float3 or stop using this fucking template nonsense + const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); + const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; + const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1; + const vect3 ray_direction = normalize(make_vect3(uv.x, uv.y, 1)); + T dist; + T total_dist = 0; + vect3 ray; + int i; + + + //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); } + + T min_dist = clip_max; + + render_object **objs = pscene->get_objs(); + for(i = 0; i < steps; i++) { + ray = this->pos_ + (total_dist * ray_direction); + //gyagh memory lookups + for(unsigned int oi = 0; objs[oi] != NULL; oi++) { + dist = object.distance_estimator(ray); + } + if((dist < clip_min)) { + //image[img_index] = 0xff; + break; + } + if((dist > clip_max)) { + //image[img_index+2] = 0xff; + break; + } + total_dist += dist; + } + + + + + //image[img_index] = 0x00; + //image[img_index+1] = 0x00; + //image[img_index+2] = p; + //image[img_index+3] = 0xff; + +} +**/ + + +#endif diff --git a/src/common.cuh b/src/common.cuh new file mode 100644 index 0000000..9e026f4 --- /dev/null +++ b/src/common.cuh @@ -0,0 +1,53 @@ +#ifndef COMMON_H +#define COMMON_H + +#include "include/helper_math.h" + +/** + +template class vect_t2; +template class vect3; +template class vect_t4; + +//this feels so hacky... idk why people are so scared of metaprogramming +template <> class vect_t2 { public: using vect_t = double2; }; +template <> class vect3 { public: using vect_t = double3; }; +template <> class vect_t4 { public: using vect_t = double4; }; + +template <> class vect_t2 { public: using vect_t = float2; }; +template <> class vect3 { public: using vect_t = float3; }; +template <> class vect_t4 { public: using vect_t = float4; }; + + +template __device__ T vect_create(X x); +template __device__ T vect_create(X x, Y y, Z z); + +//I have no fucking clue if this is right, check me later ig +template __device__ inline float3 vect_create(X x) { return make_float3(x); } + +template __device__ inline float3 vect_create(X x, Y y, Z z) { return make_float3(x, y, z); } +**/ + +/** I'm not sure weather float or double percision is nessesary. I was using +templates, but this changes the structure of my entire project in unwanted +ways, so I'm switching over to typedefs. **/ + +typedef float2 vect2; +typedef float3 vect3; +typedef float T; + +#define make_vect3(...) (make_float3(__VA_ARGS__)) + +//TODO move to color.cuh +typedef float3 Color; +#define make_color(...) (make_float3(__VA_ARGS__)) + +//TODO move to ray.cuh +struct Ray { + Color color; + vect3 start; + vect3 direction; //MUST BE A UNIT VECTOR + int bounces; +}; + +#endif diff --git a/src/entity.cuh b/src/entity.cuh new file mode 100644 index 0000000..b086644 --- /dev/null +++ b/src/entity.cuh @@ -0,0 +1,30 @@ +#ifndef ENTITY_H +#define ENTITY_H +#include "common.cuh" + +//we could make a template to allow double percision, but start with float +//idk how nessesary it is yet so I'll go ahead. +//I know I needed it for zoomin far into the mandelbrot ig, so it's not +//out of the question +class Entity { + public: + __device__ Entity() : pos_(make_vect3(0)), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; + __device__ Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos_(pos), rot_(rot), scale_(scale) {}; + __device__ Entity(const float3 pos) : pos_(pos), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; + + + vect3 get_pos() const { return pos_; } + vect3 get_rot() const { return rot_; } + vect3 get_scale() const { return scale_; } + + __device__ void set_pos(const vect3 pos) { pos_ = pos; } + __device__ void set_rot(const vect3 rot) { rot_ = rot; } + __device__ void set_scale(const vect3 scale) { scale_ = scale; } + + protected: + vect3 pos_; + vect3 rot_; + vect3 scale_; + +}; +#endif diff --git a/src/include/helper_math.h b/src/include/helper_math.h new file mode 100644 index 0000000..6d2f63a --- /dev/null +++ b/src/include/helper_math.h @@ -0,0 +1,1469 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file implements common mathematical operations on vector types + * (float3, float4 etc.) since these are not provided as standard by CUDA. + * + * The syntax is modeled on the Cg standard library. + * + * This is part of the Helper library includes + * + * Thanks to Linh Hah for additions and fixes. + */ + +#ifndef HELPER_MATH_H +#define HELPER_MATH_H + +#include "cuda_runtime.h" + +typedef unsigned int uint; +typedef unsigned short ushort; + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +#ifndef __CUDACC__ +#include + +//////////////////////////////////////////////////////////////////////////////// +// host implementations of CUDA functions +//////////////////////////////////////////////////////////////////////////////// + +inline float fminf(float a, float b) +{ + return a < b ? a : b; +} + +inline float fmaxf(float a, float b) +{ + return a > b ? a : b; +} + +inline int max(int a, int b) +{ + return a > b ? a : b; +} + +inline int min(int a, int b) +{ + return a < b ? a : b; +} + +inline float rsqrtf(float x) +{ + return 1.0f / sqrtf(x); +} +#endif + +//////////////////////////////////////////////////////////////////////////////// +// constructors +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 make_float2(float s) +{ + return make_float2(s, s); +} +inline __host__ __device__ float2 make_float2(float3 a) +{ + return make_float2(a.x, a.y); +} +inline __host__ __device__ float2 make_float2(int2 a) +{ + return make_float2(float(a.x), float(a.y)); +} +inline __host__ __device__ float2 make_float2(uint2 a) +{ + return make_float2(float(a.x), float(a.y)); +} + +inline __host__ __device__ int2 make_int2(int s) +{ + return make_int2(s, s); +} +inline __host__ __device__ int2 make_int2(int3 a) +{ + return make_int2(a.x, a.y); +} +inline __host__ __device__ int2 make_int2(uint2 a) +{ + return make_int2(int(a.x), int(a.y)); +} +inline __host__ __device__ int2 make_int2(float2 a) +{ + return make_int2(int(a.x), int(a.y)); +} + +inline __host__ __device__ uint2 make_uint2(uint s) +{ + return make_uint2(s, s); +} +inline __host__ __device__ uint2 make_uint2(uint3 a) +{ + return make_uint2(a.x, a.y); +} +inline __host__ __device__ uint2 make_uint2(int2 a) +{ + return make_uint2(uint(a.x), uint(a.y)); +} + +inline __host__ __device__ float3 make_float3(float s) +{ + return make_float3(s, s, s); +} +inline __host__ __device__ float3 make_float3(float2 a) +{ + return make_float3(a.x, a.y, 0.0f); +} +inline __host__ __device__ float3 make_float3(float2 a, float s) +{ + return make_float3(a.x, a.y, s); +} +inline __host__ __device__ float3 make_float3(float4 a) +{ + return make_float3(a.x, a.y, a.z); +} +inline __host__ __device__ float3 make_float3(int3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} +inline __host__ __device__ float3 make_float3(uint3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} + +inline __host__ __device__ int3 make_int3(int s) +{ + return make_int3(s, s, s); +} +inline __host__ __device__ int3 make_int3(int2 a) +{ + return make_int3(a.x, a.y, 0); +} +inline __host__ __device__ int3 make_int3(int2 a, int s) +{ + return make_int3(a.x, a.y, s); +} +inline __host__ __device__ int3 make_int3(uint3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} +inline __host__ __device__ int3 make_int3(float3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} + +inline __host__ __device__ uint3 make_uint3(uint s) +{ + return make_uint3(s, s, s); +} +inline __host__ __device__ uint3 make_uint3(uint2 a) +{ + return make_uint3(a.x, a.y, 0); +} +inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) +{ + return make_uint3(a.x, a.y, s); +} +inline __host__ __device__ uint3 make_uint3(uint4 a) +{ + return make_uint3(a.x, a.y, a.z); +} +inline __host__ __device__ uint3 make_uint3(int3 a) +{ + return make_uint3(uint(a.x), uint(a.y), uint(a.z)); +} + +inline __host__ __device__ float4 make_float4(float s) +{ + return make_float4(s, s, s, s); +} +inline __host__ __device__ float4 make_float4(float3 a) +{ + return make_float4(a.x, a.y, a.z, 0.0f); +} +inline __host__ __device__ float4 make_float4(float3 a, float w) +{ + return make_float4(a.x, a.y, a.z, w); +} +inline __host__ __device__ float4 make_float4(int4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} +inline __host__ __device__ float4 make_float4(uint4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} + +inline __host__ __device__ int4 make_int4(int s) +{ + return make_int4(s, s, s, s); +} +inline __host__ __device__ int4 make_int4(int3 a) +{ + return make_int4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ int4 make_int4(int3 a, int w) +{ + return make_int4(a.x, a.y, a.z, w); +} +inline __host__ __device__ int4 make_int4(uint4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} +inline __host__ __device__ int4 make_int4(float4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} + + +inline __host__ __device__ uint4 make_uint4(uint s) +{ + return make_uint4(s, s, s, s); +} +inline __host__ __device__ uint4 make_uint4(uint3 a) +{ + return make_uint4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) +{ + return make_uint4(a.x, a.y, a.z, w); +} +inline __host__ __device__ uint4 make_uint4(int4 a) +{ + return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// negate +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2 &a) +{ + return make_float2(-a.x, -a.y); +} +inline __host__ __device__ int2 operator-(int2 &a) +{ + return make_int2(-a.x, -a.y); +} +inline __host__ __device__ float3 operator-(float3 &a) +{ + return make_float3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ int3 operator-(int3 &a) +{ + return make_int3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ float4 operator-(float4 &a) +{ + return make_float4(-a.x, -a.y, -a.z, -a.w); +} +inline __host__ __device__ int4 operator-(int4 &a) +{ + return make_int4(-a.x, -a.y, -a.z, -a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// addition +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator+(float2 a, float2 b) +{ + return make_float2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(float2 &a, float2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ float2 operator+(float2 a, float b) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ float2 operator+(float b, float2 a) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(float2 &a, float b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ int2 operator+(int2 a, int2 b) +{ + return make_int2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(int2 &a, int2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ int2 operator+(int2 a, int b) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ int2 operator+(int b, int2 a) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(int2 &a, int b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) +{ + return make_uint2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(uint2 &a, uint2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ uint2 operator+(uint2 a, uint b) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ uint2 operator+(uint b, uint2 a) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(uint2 &a, uint b) +{ + a.x += b; + a.y += b; +} + + +inline __host__ __device__ float3 operator+(float3 a, float3 b) +{ + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(float3 &a, float3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ float3 operator+(float3 a, float b) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(float3 &a, float b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int3 a, int3 b) +{ + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(int3 &a, int3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ int3 operator+(int3 a, int b) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(int3 &a, int b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) +{ + return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(uint3 &a, uint3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ uint3 operator+(uint3 a, uint b) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(uint3 &a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int b, int3 a) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ uint3 operator+(uint b, uint3 a) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ float3 operator+(float b, float3 a) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} + +inline __host__ __device__ float4 operator+(float4 a, float4 b) +{ + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(float4 &a, float4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ float4 operator+(float4 a, float b) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ float4 operator+(float b, float4 a) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(float4 &a, float b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ int4 operator+(int4 a, int4 b) +{ + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(int4 &a, int4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ int4 operator+(int4 a, int b) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ int4 operator+(int b, int4 a) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(int4 &a, int b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) +{ + return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(uint4 &a, uint4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ uint4 operator+(uint4 a, uint b) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ uint4 operator+(uint b, uint4 a) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(uint4 &a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +//////////////////////////////////////////////////////////////////////////////// +// subtract +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2 a, float2 b) +{ + return make_float2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(float2 &a, float2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ float2 operator-(float2 a, float b) +{ + return make_float2(a.x - b, a.y - b); +} +inline __host__ __device__ float2 operator-(float b, float2 a) +{ + return make_float2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(float2 &a, float b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ int2 operator-(int2 a, int2 b) +{ + return make_int2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(int2 &a, int2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ int2 operator-(int2 a, int b) +{ + return make_int2(a.x - b, a.y - b); +} +inline __host__ __device__ int2 operator-(int b, int2 a) +{ + return make_int2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(int2 &a, int b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) +{ + return make_uint2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(uint2 &a, uint2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ uint2 operator-(uint2 a, uint b) +{ + return make_uint2(a.x - b, a.y - b); +} +inline __host__ __device__ uint2 operator-(uint b, uint2 a) +{ + return make_uint2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(uint2 &a, uint b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ float3 operator-(float3 a, float3 b) +{ + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(float3 &a, float3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ float3 operator-(float3 a, float b) +{ + return make_float3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ float3 operator-(float b, float3 a) +{ + return make_float3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(float3 &a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ int3 operator-(int3 a, int3 b) +{ + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(int3 &a, int3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ int3 operator-(int3 a, int b) +{ + return make_int3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ int3 operator-(int b, int3 a) +{ + return make_int3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(int3 &a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) +{ + return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(uint3 &a, uint3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ uint3 operator-(uint3 a, uint b) +{ + return make_uint3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ uint3 operator-(uint b, uint3 a) +{ + return make_uint3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(uint3 &a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ float4 operator-(float4 a, float4 b) +{ + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(float4 &a, float4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ float4 operator-(float4 a, float b) +{ + return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ void operator-=(float4 &a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ int4 operator-(int4 a, int4 b) +{ + return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(int4 &a, int4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ int4 operator-(int4 a, int b) +{ + return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ int4 operator-(int b, int4 a) +{ + return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(int4 &a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) +{ + return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(uint4 &a, uint4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ uint4 operator-(uint4 a, uint b) +{ + return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ uint4 operator-(uint b, uint4 a) +{ + return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(uint4 &a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// multiply +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator*(float2 a, float2 b) +{ + return make_float2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(float2 &a, float2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ float2 operator*(float2 a, float b) +{ + return make_float2(a.x * b, a.y * b); +} +inline __host__ __device__ float2 operator*(float b, float2 a) +{ + return make_float2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(float2 &a, float b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ int2 operator*(int2 a, int2 b) +{ + return make_int2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(int2 &a, int2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ int2 operator*(int2 a, int b) +{ + return make_int2(a.x * b, a.y * b); +} +inline __host__ __device__ int2 operator*(int b, int2 a) +{ + return make_int2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(int2 &a, int b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) +{ + return make_uint2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(uint2 &a, uint2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ uint2 operator*(uint2 a, uint b) +{ + return make_uint2(a.x * b, a.y * b); +} +inline __host__ __device__ uint2 operator*(uint b, uint2 a) +{ + return make_uint2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(uint2 &a, uint b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ float3 operator*(float3 a, float3 b) +{ + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(float3 &a, float3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ float3 operator*(float3 a, float b) +{ + return make_float3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ float3 operator*(float b, float3 a) +{ + return make_float3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(float3 &a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ int3 operator*(int3 a, int3 b) +{ + return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(int3 &a, int3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ int3 operator*(int3 a, int b) +{ + return make_int3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ int3 operator*(int b, int3 a) +{ + return make_int3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(int3 &a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) +{ + return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(uint3 &a, uint3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ uint3 operator*(uint3 a, uint b) +{ + return make_uint3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ uint3 operator*(uint b, uint3 a) +{ + return make_uint3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(uint3 &a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ float4 operator*(float4 a, float4 b) +{ + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(float4 &a, float4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ float4 operator*(float4 a, float b) +{ + return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ float4 operator*(float b, float4 a) +{ + return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(float4 &a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ int4 operator*(int4 a, int4 b) +{ + return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(int4 &a, int4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ int4 operator*(int4 a, int b) +{ + return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ int4 operator*(int b, int4 a) +{ + return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(int4 &a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) +{ + return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(uint4 &a, uint4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ uint4 operator*(uint4 a, uint b) +{ + return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ uint4 operator*(uint b, uint4 a) +{ + return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(uint4 &a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// divide +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator/(float2 a, float2 b) +{ + return make_float2(a.x / b.x, a.y / b.y); +} +inline __host__ __device__ void operator/=(float2 &a, float2 b) +{ + a.x /= b.x; + a.y /= b.y; +} +inline __host__ __device__ float2 operator/(float2 a, float b) +{ + return make_float2(a.x / b, a.y / b); +} +inline __host__ __device__ void operator/=(float2 &a, float b) +{ + a.x /= b; + a.y /= b; +} +inline __host__ __device__ float2 operator/(float b, float2 a) +{ + return make_float2(b / a.x, b / a.y); +} + +inline __host__ __device__ float3 operator/(float3 a, float3 b) +{ + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +} +inline __host__ __device__ void operator/=(float3 &a, float3 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; +} +inline __host__ __device__ float3 operator/(float3 a, float b) +{ + return make_float3(a.x / b, a.y / b, a.z / b); +} +inline __host__ __device__ void operator/=(float3 &a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; +} +inline __host__ __device__ float3 operator/(float b, float3 a) +{ + return make_float3(b / a.x, b / a.y, b / a.z); +} + +inline __host__ __device__ float4 operator/(float4 a, float4 b) +{ + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} +inline __host__ __device__ void operator/=(float4 &a, float4 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; + a.w /= b.w; +} +inline __host__ __device__ float4 operator/(float4 a, float b) +{ + return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); +} +inline __host__ __device__ void operator/=(float4 &a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; + a.w /= b; +} +inline __host__ __device__ float4 operator/(float b, float4 a) +{ + return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// min +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fminf(float2 a, float2 b) +{ + return make_float2(fminf(a.x,b.x), fminf(a.y,b.y)); +} +inline __host__ __device__ float3 fminf(float3 a, float3 b) +{ + return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); +} +inline __host__ __device__ float4 fminf(float4 a, float4 b) +{ + return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); +} + +inline __host__ __device__ int2 min(int2 a, int2 b) +{ + return make_int2(min(a.x,b.x), min(a.y,b.y)); +} +inline __host__ __device__ int3 min(int3 a, int3 b) +{ + return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); +} +inline __host__ __device__ int4 min(int4 a, int4 b) +{ + return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); +} + +inline __host__ __device__ uint2 min(uint2 a, uint2 b) +{ + return make_uint2(min(a.x,b.x), min(a.y,b.y)); +} +inline __host__ __device__ uint3 min(uint3 a, uint3 b) +{ + return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); +} +inline __host__ __device__ uint4 min(uint4 a, uint4 b) +{ + return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// max +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmaxf(float2 a, float2 b) +{ + return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y)); +} +inline __host__ __device__ float3 fmaxf(float3 a, float3 b) +{ + return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); +} +inline __host__ __device__ float4 fmaxf(float4 a, float4 b) +{ + return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); +} + +inline __host__ __device__ int2 max(int2 a, int2 b) +{ + return make_int2(max(a.x,b.x), max(a.y,b.y)); +} +inline __host__ __device__ int3 max(int3 a, int3 b) +{ + return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); +} +inline __host__ __device__ int4 max(int4 a, int4 b) +{ + return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); +} + +inline __host__ __device__ uint2 max(uint2 a, uint2 b) +{ + return make_uint2(max(a.x,b.x), max(a.y,b.y)); +} +inline __host__ __device__ uint3 max(uint3 a, uint3 b) +{ + return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); +} +inline __host__ __device__ uint4 max(uint4 a, uint4 b) +{ + return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// lerp +// - linear interpolation between a and b, based on value t in [0, 1] range +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float lerp(float a, float b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) +{ + return a + t*(b-a); +} + +//////////////////////////////////////////////////////////////////////////////// +// clamp +// - clamp the value v to be in the range [a, b] +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float clamp(float f, float a, float b) +{ + return fmaxf(a, fminf(f, b)); +} +inline __device__ __host__ int clamp(int f, int a, int b) +{ + return max(a, min(f, b)); +} +inline __device__ __host__ uint clamp(uint f, uint a, uint b) +{ + return max(a, min(f, b)); +} + +inline __device__ __host__ float2 clamp(float2 v, float a, float b) +{ + return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) +{ + return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ float3 clamp(float3 v, float a, float b) +{ + return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) +{ + return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ float4 clamp(float4 v, float a, float b) +{ + return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) +{ + return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ int2 clamp(int2 v, int a, int b) +{ + return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) +{ + return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ int3 clamp(int3 v, int a, int b) +{ + return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) +{ + return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ int4 clamp(int4 v, int a, int b) +{ + return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) +{ + return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) +{ + return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) +{ + return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) +{ + return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) +{ + return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) +{ + return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) +{ + return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// dot product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float dot(float2 a, float2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ float dot(float3 a, float3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ float dot(float4 a, float4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ int dot(int2 a, int2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ int dot(int3 a, int3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ int dot(int4 a, int4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ uint dot(uint2 a, uint2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ uint dot(uint3 a, uint3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ uint dot(uint4 a, uint4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +//////////////////////////////////////////////////////////////////////////////// +// length +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float length(float2 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float3 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float4 v) +{ + return sqrtf(dot(v, v)); +} + +//////////////////////////////////////////////////////////////////////////////// +// normalize +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 normalize(float2 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float3 normalize(float3 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float4 normalize(float4 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} + +//////////////////////////////////////////////////////////////////////////////// +// floor +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 floorf(float2 v) +{ + return make_float2(floorf(v.x), floorf(v.y)); +} +inline __host__ __device__ float3 floorf(float3 v) +{ + return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); +} +inline __host__ __device__ float4 floorf(float4 v) +{ + return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// frac - returns the fractional portion of a scalar or each vector component +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float fracf(float v) +{ + return v - floorf(v); +} +inline __host__ __device__ float2 fracf(float2 v) +{ + return make_float2(fracf(v.x), fracf(v.y)); +} +inline __host__ __device__ float3 fracf(float3 v) +{ + return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); +} +inline __host__ __device__ float4 fracf(float4 v) +{ + return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// fmod +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmodf(float2 a, float2 b) +{ + return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); +} +inline __host__ __device__ float3 fmodf(float3 a, float3 b) +{ + return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); +} +inline __host__ __device__ float4 fmodf(float4 a, float4 b) +{ + return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// absolute value +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fabs(float2 v) +{ + return make_float2(fabs(v.x), fabs(v.y)); +} +inline __host__ __device__ float3 fabs(float3 v) +{ + return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); +} +inline __host__ __device__ float4 fabs(float4 v) +{ + return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); +} + +inline __host__ __device__ int2 abs(int2 v) +{ + return make_int2(abs(v.x), abs(v.y)); +} +inline __host__ __device__ int3 abs(int3 v) +{ + return make_int3(abs(v.x), abs(v.y), abs(v.z)); +} +inline __host__ __device__ int4 abs(int4 v) +{ + return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// reflect +// - returns reflection of incident ray I around surface normal N +// - N should be normalized, reflected vector's length is equal to length of I +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 reflect(float3 i, float3 n) +{ + return i - 2.0f * n * dot(n,i); +} + +//////////////////////////////////////////////////////////////////////////////// +// cross product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 cross(float3 a, float3 b) +{ + return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); +} + +//////////////////////////////////////////////////////////////////////////////// +// smoothstep +// - returns 0 if x < a +// - returns 1 if x > b +// - otherwise returns smooth interpolation between 0 and 1 based on x +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float smoothstep(float a, float b, float x) +{ + float y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(3.0f - (2.0f*y))); +} +inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) +{ + float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y))); +} +inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) +{ + float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y))); +} +inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) +{ + float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y))); +} + +#endif diff --git a/src/kernel.cu b/src/kernel.cu new file mode 100644 index 0000000..40944c0 --- /dev/null +++ b/src/kernel.cu @@ -0,0 +1,10 @@ +#include +#include +#include +#include "scene.cuh" + +__global__ void render(uint8_t *image) { + Scene scene; + scene.debug(); + //scene.render(image); +} diff --git a/src/kernel.cuh b/src/kernel.cuh new file mode 100644 index 0000000..2c5256e --- /dev/null +++ b/src/kernel.cuh @@ -0,0 +1,5 @@ +#ifndef KERNEL_H +#define KERNEL_H +#include +__global__ void render(uint8_t *image); +#endif diff --git a/src/main.cu b/src/main.cu new file mode 100644 index 0000000..aa74a6c --- /dev/null +++ b/src/main.cu @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include + +#include "kernel.cuh" + +int main() { + //bluuuuugh i'll figure out occupancy later, this res are easy + //calculated manually for gtx1060 with 20 SM, 1024 threads/SM + const int res_x = 32 * 20; + const int res_y = 32 * 20; + const dim3 blockCount(20, 20); + const dim3 threadCount(32, 32); + + uint8_t *image_d; + Color texture_data[res_x * res_y]; + SetTargetFPS(10); + + + //see if GPU is connected (my egpu is finicky) + { + int temp_device; + cudaError_t err; + if((err = cudaGetDevice(&temp_device)) != cudaSuccess) { + printf("failed to get device!\nError: %s\n", cudaGetErrorString(err)); + return(1); + } + } + + SetTraceLogLevel(LOG_ERROR); + InitWindow(res_x, res_y, "cuda teseteroni"); + + //TODO could probably cut out + Image image = GenImageColor(res_x, res_y, BLUE); + Texture tex = LoadTextureFromImage(image); + + while(!WindowShouldClose()) { + cudaError_t err; + //cuda stuff + cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color)); + render<<>>(image_d); + if((err = cudaGetLastError()) != cudaSuccess) { + printf("kernel did not launch! Error: %s\n", cudaGetErrorString(err)); + } + cudaDeviceSynchronize(); + cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost); + + BeginDrawing(); + UpdateTexture(tex, texture_data); + DrawTexture(tex, 0, 0, WHITE); + DrawFPS(0, 0); + EndDrawing(); + } + return 0; +} diff --git a/src/makefile b/src/makefile new file mode 100644 index 0000000..4d86411 --- /dev/null +++ b/src/makefile @@ -0,0 +1,25 @@ +LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 +$CC = gcc +INC = -I /opt/cuda/include +COMPILED_BIN = build/indigo_worlds + +CU_SRCFILES := $(wildcard *.cu) +CU_OBJFILES := $(patsubst %.cu, %.o, $(CU_SRCFILES)) + +all: $(CU_OBJFILES) + nvcc $(LIBS) -o build/indigo_worlds build/*.o + +%.o: %.cu + nvcc --device-debug -dc -c $< -o build/$@ + +run: all + build/indigo_worlds + +clean: + rm -rf build/* + + +#make: +# nvcc $(LIBS) $(INC) -O0 --debug -c main.cu -o build/main.o +# nvcc --device-debug --compile kernel.cu -o build/kernel.o +# nvcc $(LIBS) -O0 -o build/indigo_worlds build/main.o build/kernel.o diff --git a/src/render_object.cuh b/src/render_object.cuh new file mode 100644 index 0000000..f25f835 --- /dev/null +++ b/src/render_object.cuh @@ -0,0 +1,14 @@ +#ifndef RENDER_OBJECT_H +#define RENDER_OBJECT_H +#include "entity.cuh" +#include "common.cuh" //TODO color + +class Render_object : public Entity { + using Entity::Entity; + public: + virtual __device__ T distance_estimator(vect3 point) const = 0; + virtual __device__ Color get_color(struct Ray ray) const = 0; + +}; + +#endif diff --git a/src/scene.cu b/src/scene.cu new file mode 100644 index 0000000..100af14 --- /dev/null +++ b/src/scene.cu @@ -0,0 +1,14 @@ +#include +#include "scene.cuh" + +__device__ void Scene::debug() { + //const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); + //const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; +} + +__device__ Color Scene::raycast(struct Ray ray) { + if(ray.bounces++ > max_bounces) return make_color(0); + const size_t obj_count = sizeof(objs) / sizeof(objs[0]); + for(size_t obj_i = 0; obj_i < obj_count; obj_i++); + return make_color(0); +} diff --git a/src/scene.cuh b/src/scene.cuh new file mode 100644 index 0000000..e3d5896 --- /dev/null +++ b/src/scene.cuh @@ -0,0 +1,31 @@ +#ifndef SCENE_H +#define SCENE_H + +#include "common.cuh" +#include "sphere.cuh" +#include "render_object.cuh" +#include "include/helper_math.h" +#include + +//template class camera; + +//when we get animations with multiple scenes, we'll make this a virtual function +//with array of DE objects and cam +class Scene { + const int max_bounces = 10; + public: + //__device__ void render(uint8_t *image) { cam.render(); }; + __device__ Render_object **get_objs() { return objs; } + __device__ void debug(); + __device__ Color raycast(struct Ray ray); + private: + //camera cam = camera(); + Sphere sp1 = Sphere(make_vect3(0, .4, -5)); + Sphere sp2 = Sphere(make_vect3(0, -0.4,-5)); + protected: + //idk why I need to specify the size... why can't the compiler figure that out? + Render_object *objs[3] = {&sp1, &sp2, NULL}; + uint8_t *image; +}; + +#endif diff --git a/src/sphere.cu b/src/sphere.cu new file mode 100644 index 0000000..849a37d --- /dev/null +++ b/src/sphere.cu @@ -0,0 +1,9 @@ +#include "sphere.cuh" + +__device__ T Sphere::distance_estimator(vect3 point) const { + return length(point) - r_; +} + +__device__ Color Sphere::get_color(struct Ray ray) const { + return make_color(0); +} diff --git a/src/sphere.cuh b/src/sphere.cuh new file mode 100644 index 0000000..0d228ff --- /dev/null +++ b/src/sphere.cuh @@ -0,0 +1,17 @@ +#ifndef SPHERE_H +#define SPHERE_H + +#include "render_object.cuh" +#include "common.cuh" + +class Sphere : public Render_object { + using Render_object::Render_object; + public: + __device__ T distance_estimator(vect3 point) const; + __device__ Color get_color(struct Ray ray) const; + private: + T r_ = 1; +}; + + +#endif -- cgit v1.2.3