diff options
author | brett weiland <brettsweiland@gmail.com> | 2024-06-01 01:36:18 -0500 |
---|---|---|
committer | brett weiland <brettsweiland@gmail.com> | 2024-06-01 01:36:18 -0500 |
commit | b0dd97ee6bf8d5daa587da40ad941efac68152df (patch) | |
tree | f162d32767e0b0f84bed284f6e8ab2c5309ff248 /src | |
parent | 093200a449ea38952de52012e324036c106e294b (diff) |
got raycasting laid out
Diffstat (limited to 'src')
-rw-r--r-- | src/.vscode/launch.json | 19 | ||||
-rw-r--r-- | src/camera.cuh | 75 | ||||
-rw-r--r-- | src/common.cuh | 53 | ||||
-rw-r--r-- | src/entity.cuh | 30 | ||||
-rw-r--r-- | src/include/helper_math.h | 1469 | ||||
-rw-r--r-- | src/kernel.cu | 10 | ||||
-rw-r--r-- | src/kernel.cuh | 5 | ||||
-rw-r--r-- | src/main.cu | 58 | ||||
-rw-r--r-- | src/makefile | 25 | ||||
-rw-r--r-- | src/render_object.cuh | 14 | ||||
-rw-r--r-- | src/scene.cu | 14 | ||||
-rw-r--r-- | src/scene.cuh | 31 | ||||
-rw-r--r-- | src/sphere.cu | 9 | ||||
-rw-r--r-- | src/sphere.cuh | 17 |
14 files changed, 1829 insertions, 0 deletions
diff --git a/src/.vscode/launch.json b/src/.vscode/launch.json new file mode 100644 index 0000000..33752cd --- /dev/null +++ b/src/.vscode/launch.json @@ -0,0 +1,19 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "CUDA C++: Launch", + "type": "cuda-gdb", + "request": "launch", + "program": "${workspaceFolder}/build/indigo_worlds" + }, + { + "name": "CUDA C++: Attach", + "type": "cuda-gdb", + "request": "attach" + } + ] +}
\ No newline at end of file diff --git a/src/camera.cuh b/src/camera.cuh new file mode 100644 index 0000000..4d04fcd --- /dev/null +++ b/src/camera.cuh @@ -0,0 +1,75 @@ +#ifndef CAMERA_H +#define CAMERA_H + +#include "entity.cuh" +#include "common.cuh" +#include <limits> + + +//template <class T> class scene; + +//I am soooo high lol +class camera : public entity { + public: + __device__ camera(scene *pscene, const T fov = 1, const vect3 pos = make_vect3(0), const vect3 rot = make_vect3(0)) + : pscene(pscene), fov(fov), entity(pos, rot, make_vect3(0)) {}; + private: + T fov; + float2 size; + int steps = 100; + T clip_min = .1; + T clip_max = 100; + //scene *pscene; +}; + +/** +//later we'll make scenes objects, rn im lazy (TODO) +template <class T> __device__ void camera::render() { + //TODO *really* need to clean this up once you get further + //extra dimentions is extra math + //either generisize float3 or stop using this fucking template nonsense + const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); + const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; + const vect3 uv = ((2 * make_vect3(unnormalized_uv)) / make_vect3(gridDim * blockDim)) - 1; + const vect3 ray_direction = normalize(make_vect3(uv.x, uv.y, 1)); + T dist; + T total_dist = 0; + vect3 ray; + int i; + + + //if(img_index == 640) { printf("%f, %f, %f\n", uv.x, uv.y, uv.z); } + + T min_dist = clip_max; + + render_object **objs = pscene->get_objs(); + for(i = 0; i < steps; i++) { + ray = this->pos_ + (total_dist * ray_direction); + //gyagh memory lookups + for(unsigned int oi = 0; objs[oi] != NULL; oi++) { + dist = object.distance_estimator(ray); + } + if((dist < clip_min)) { + //image[img_index] = 0xff; + break; + } + if((dist > clip_max)) { + //image[img_index+2] = 0xff; + break; + } + total_dist += dist; + } + + + + + //image[img_index] = 0x00; + //image[img_index+1] = 0x00; + //image[img_index+2] = p; + //image[img_index+3] = 0xff; + +} +**/ + + +#endif diff --git a/src/common.cuh b/src/common.cuh new file mode 100644 index 0000000..9e026f4 --- /dev/null +++ b/src/common.cuh @@ -0,0 +1,53 @@ +#ifndef COMMON_H +#define COMMON_H + +#include "include/helper_math.h" + +/** + +template <class T> class vect_t2; +template <class T> class vect3; +template <class T> class vect_t4; + +//this feels so hacky... idk why people are so scared of metaprogramming +template <> class vect_t2<double> { public: using vect_t = double2; }; +template <> class vect3<double> { public: using vect_t = double3; }; +template <> class vect_t4<double> { public: using vect_t = double4; }; + +template <> class vect_t2<float> { public: using vect_t = float2; }; +template <> class vect3<float> { public: using vect_t = float3; }; +template <> class vect_t4<float> { public: using vect_t = float4; }; + + +template <class T, class X> __device__ T vect_create(X x); +template <class T, class X, class Y, class Z> __device__ T vect_create(X x, Y y, Z z); + +//I have no fucking clue if this is right, check me later ig +template <class float3, class X> __device__ inline float3 vect_create<float3>(X x) { return make_float3(x); } + +template <class float3, class X, class Y, class Z> __device__ inline float3 vect_create<float3>(X x, Y y, Z z) { return make_float3(x, y, z); } +**/ + +/** I'm not sure weather float or double percision is nessesary. I was using +templates, but this changes the structure of my entire project in unwanted +ways, so I'm switching over to typedefs. **/ + +typedef float2 vect2; +typedef float3 vect3; +typedef float T; + +#define make_vect3(...) (make_float3(__VA_ARGS__)) + +//TODO move to color.cuh +typedef float3 Color; +#define make_color(...) (make_float3(__VA_ARGS__)) + +//TODO move to ray.cuh +struct Ray { + Color color; + vect3 start; + vect3 direction; //MUST BE A UNIT VECTOR + int bounces; +}; + +#endif diff --git a/src/entity.cuh b/src/entity.cuh new file mode 100644 index 0000000..b086644 --- /dev/null +++ b/src/entity.cuh @@ -0,0 +1,30 @@ +#ifndef ENTITY_H +#define ENTITY_H +#include "common.cuh" + +//we could make a template to allow double percision, but start with float +//idk how nessesary it is yet so I'll go ahead. +//I know I needed it for zoomin far into the mandelbrot ig, so it's not +//out of the question +class Entity { + public: + __device__ Entity() : pos_(make_vect3(0)), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; + __device__ Entity(const vect3 pos, const vect3 rot, const vect3 scale) : pos_(pos), rot_(rot), scale_(scale) {}; + __device__ Entity(const float3 pos) : pos_(pos), rot_(make_vect3(0)), scale_(make_vect3(0)) {}; + + + vect3 get_pos() const { return pos_; } + vect3 get_rot() const { return rot_; } + vect3 get_scale() const { return scale_; } + + __device__ void set_pos(const vect3 pos) { pos_ = pos; } + __device__ void set_rot(const vect3 rot) { rot_ = rot; } + __device__ void set_scale(const vect3 scale) { scale_ = scale; } + + protected: + vect3 pos_; + vect3 rot_; + vect3 scale_; + +}; +#endif diff --git a/src/include/helper_math.h b/src/include/helper_math.h new file mode 100644 index 0000000..6d2f63a --- /dev/null +++ b/src/include/helper_math.h @@ -0,0 +1,1469 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * This file implements common mathematical operations on vector types + * (float3, float4 etc.) since these are not provided as standard by CUDA. + * + * The syntax is modeled on the Cg standard library. + * + * This is part of the Helper library includes + * + * Thanks to Linh Hah for additions and fixes. + */ + +#ifndef HELPER_MATH_H +#define HELPER_MATH_H + +#include "cuda_runtime.h" + +typedef unsigned int uint; +typedef unsigned short ushort; + +#ifndef EXIT_WAIVED +#define EXIT_WAIVED 2 +#endif + +#ifndef __CUDACC__ +#include <math.h> + +//////////////////////////////////////////////////////////////////////////////// +// host implementations of CUDA functions +//////////////////////////////////////////////////////////////////////////////// + +inline float fminf(float a, float b) +{ + return a < b ? a : b; +} + +inline float fmaxf(float a, float b) +{ + return a > b ? a : b; +} + +inline int max(int a, int b) +{ + return a > b ? a : b; +} + +inline int min(int a, int b) +{ + return a < b ? a : b; +} + +inline float rsqrtf(float x) +{ + return 1.0f / sqrtf(x); +} +#endif + +//////////////////////////////////////////////////////////////////////////////// +// constructors +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 make_float2(float s) +{ + return make_float2(s, s); +} +inline __host__ __device__ float2 make_float2(float3 a) +{ + return make_float2(a.x, a.y); +} +inline __host__ __device__ float2 make_float2(int2 a) +{ + return make_float2(float(a.x), float(a.y)); +} +inline __host__ __device__ float2 make_float2(uint2 a) +{ + return make_float2(float(a.x), float(a.y)); +} + +inline __host__ __device__ int2 make_int2(int s) +{ + return make_int2(s, s); +} +inline __host__ __device__ int2 make_int2(int3 a) +{ + return make_int2(a.x, a.y); +} +inline __host__ __device__ int2 make_int2(uint2 a) +{ + return make_int2(int(a.x), int(a.y)); +} +inline __host__ __device__ int2 make_int2(float2 a) +{ + return make_int2(int(a.x), int(a.y)); +} + +inline __host__ __device__ uint2 make_uint2(uint s) +{ + return make_uint2(s, s); +} +inline __host__ __device__ uint2 make_uint2(uint3 a) +{ + return make_uint2(a.x, a.y); +} +inline __host__ __device__ uint2 make_uint2(int2 a) +{ + return make_uint2(uint(a.x), uint(a.y)); +} + +inline __host__ __device__ float3 make_float3(float s) +{ + return make_float3(s, s, s); +} +inline __host__ __device__ float3 make_float3(float2 a) +{ + return make_float3(a.x, a.y, 0.0f); +} +inline __host__ __device__ float3 make_float3(float2 a, float s) +{ + return make_float3(a.x, a.y, s); +} +inline __host__ __device__ float3 make_float3(float4 a) +{ + return make_float3(a.x, a.y, a.z); +} +inline __host__ __device__ float3 make_float3(int3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} +inline __host__ __device__ float3 make_float3(uint3 a) +{ + return make_float3(float(a.x), float(a.y), float(a.z)); +} + +inline __host__ __device__ int3 make_int3(int s) +{ + return make_int3(s, s, s); +} +inline __host__ __device__ int3 make_int3(int2 a) +{ + return make_int3(a.x, a.y, 0); +} +inline __host__ __device__ int3 make_int3(int2 a, int s) +{ + return make_int3(a.x, a.y, s); +} +inline __host__ __device__ int3 make_int3(uint3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} +inline __host__ __device__ int3 make_int3(float3 a) +{ + return make_int3(int(a.x), int(a.y), int(a.z)); +} + +inline __host__ __device__ uint3 make_uint3(uint s) +{ + return make_uint3(s, s, s); +} +inline __host__ __device__ uint3 make_uint3(uint2 a) +{ + return make_uint3(a.x, a.y, 0); +} +inline __host__ __device__ uint3 make_uint3(uint2 a, uint s) +{ + return make_uint3(a.x, a.y, s); +} +inline __host__ __device__ uint3 make_uint3(uint4 a) +{ + return make_uint3(a.x, a.y, a.z); +} +inline __host__ __device__ uint3 make_uint3(int3 a) +{ + return make_uint3(uint(a.x), uint(a.y), uint(a.z)); +} + +inline __host__ __device__ float4 make_float4(float s) +{ + return make_float4(s, s, s, s); +} +inline __host__ __device__ float4 make_float4(float3 a) +{ + return make_float4(a.x, a.y, a.z, 0.0f); +} +inline __host__ __device__ float4 make_float4(float3 a, float w) +{ + return make_float4(a.x, a.y, a.z, w); +} +inline __host__ __device__ float4 make_float4(int4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} +inline __host__ __device__ float4 make_float4(uint4 a) +{ + return make_float4(float(a.x), float(a.y), float(a.z), float(a.w)); +} + +inline __host__ __device__ int4 make_int4(int s) +{ + return make_int4(s, s, s, s); +} +inline __host__ __device__ int4 make_int4(int3 a) +{ + return make_int4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ int4 make_int4(int3 a, int w) +{ + return make_int4(a.x, a.y, a.z, w); +} +inline __host__ __device__ int4 make_int4(uint4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} +inline __host__ __device__ int4 make_int4(float4 a) +{ + return make_int4(int(a.x), int(a.y), int(a.z), int(a.w)); +} + + +inline __host__ __device__ uint4 make_uint4(uint s) +{ + return make_uint4(s, s, s, s); +} +inline __host__ __device__ uint4 make_uint4(uint3 a) +{ + return make_uint4(a.x, a.y, a.z, 0); +} +inline __host__ __device__ uint4 make_uint4(uint3 a, uint w) +{ + return make_uint4(a.x, a.y, a.z, w); +} +inline __host__ __device__ uint4 make_uint4(int4 a) +{ + return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// negate +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2 &a) +{ + return make_float2(-a.x, -a.y); +} +inline __host__ __device__ int2 operator-(int2 &a) +{ + return make_int2(-a.x, -a.y); +} +inline __host__ __device__ float3 operator-(float3 &a) +{ + return make_float3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ int3 operator-(int3 &a) +{ + return make_int3(-a.x, -a.y, -a.z); +} +inline __host__ __device__ float4 operator-(float4 &a) +{ + return make_float4(-a.x, -a.y, -a.z, -a.w); +} +inline __host__ __device__ int4 operator-(int4 &a) +{ + return make_int4(-a.x, -a.y, -a.z, -a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// addition +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator+(float2 a, float2 b) +{ + return make_float2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(float2 &a, float2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ float2 operator+(float2 a, float b) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ float2 operator+(float b, float2 a) +{ + return make_float2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(float2 &a, float b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ int2 operator+(int2 a, int2 b) +{ + return make_int2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(int2 &a, int2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ int2 operator+(int2 a, int b) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ int2 operator+(int b, int2 a) +{ + return make_int2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(int2 &a, int b) +{ + a.x += b; + a.y += b; +} + +inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) +{ + return make_uint2(a.x + b.x, a.y + b.y); +} +inline __host__ __device__ void operator+=(uint2 &a, uint2 b) +{ + a.x += b.x; + a.y += b.y; +} +inline __host__ __device__ uint2 operator+(uint2 a, uint b) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ uint2 operator+(uint b, uint2 a) +{ + return make_uint2(a.x + b, a.y + b); +} +inline __host__ __device__ void operator+=(uint2 &a, uint b) +{ + a.x += b; + a.y += b; +} + + +inline __host__ __device__ float3 operator+(float3 a, float3 b) +{ + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(float3 &a, float3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ float3 operator+(float3 a, float b) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(float3 &a, float b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int3 a, int3 b) +{ + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(int3 &a, int3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ int3 operator+(int3 a, int b) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(int3 &a, int b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) +{ + return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); +} +inline __host__ __device__ void operator+=(uint3 &a, uint3 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; +} +inline __host__ __device__ uint3 operator+(uint3 a, uint b) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ void operator+=(uint3 &a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; +} + +inline __host__ __device__ int3 operator+(int b, int3 a) +{ + return make_int3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ uint3 operator+(uint b, uint3 a) +{ + return make_uint3(a.x + b, a.y + b, a.z + b); +} +inline __host__ __device__ float3 operator+(float b, float3 a) +{ + return make_float3(a.x + b, a.y + b, a.z + b); +} + +inline __host__ __device__ float4 operator+(float4 a, float4 b) +{ + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(float4 &a, float4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ float4 operator+(float4 a, float b) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ float4 operator+(float b, float4 a) +{ + return make_float4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(float4 &a, float b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ int4 operator+(int4 a, int4 b) +{ + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(int4 &a, int4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ int4 operator+(int4 a, int b) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ int4 operator+(int b, int4 a) +{ + return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(int4 &a, int b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) +{ + return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +} +inline __host__ __device__ void operator+=(uint4 &a, uint4 b) +{ + a.x += b.x; + a.y += b.y; + a.z += b.z; + a.w += b.w; +} +inline __host__ __device__ uint4 operator+(uint4 a, uint b) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ uint4 operator+(uint b, uint4 a) +{ + return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); +} +inline __host__ __device__ void operator+=(uint4 &a, uint b) +{ + a.x += b; + a.y += b; + a.z += b; + a.w += b; +} + +//////////////////////////////////////////////////////////////////////////////// +// subtract +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator-(float2 a, float2 b) +{ + return make_float2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(float2 &a, float2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ float2 operator-(float2 a, float b) +{ + return make_float2(a.x - b, a.y - b); +} +inline __host__ __device__ float2 operator-(float b, float2 a) +{ + return make_float2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(float2 &a, float b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ int2 operator-(int2 a, int2 b) +{ + return make_int2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(int2 &a, int2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ int2 operator-(int2 a, int b) +{ + return make_int2(a.x - b, a.y - b); +} +inline __host__ __device__ int2 operator-(int b, int2 a) +{ + return make_int2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(int2 &a, int b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) +{ + return make_uint2(a.x - b.x, a.y - b.y); +} +inline __host__ __device__ void operator-=(uint2 &a, uint2 b) +{ + a.x -= b.x; + a.y -= b.y; +} +inline __host__ __device__ uint2 operator-(uint2 a, uint b) +{ + return make_uint2(a.x - b, a.y - b); +} +inline __host__ __device__ uint2 operator-(uint b, uint2 a) +{ + return make_uint2(b - a.x, b - a.y); +} +inline __host__ __device__ void operator-=(uint2 &a, uint b) +{ + a.x -= b; + a.y -= b; +} + +inline __host__ __device__ float3 operator-(float3 a, float3 b) +{ + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(float3 &a, float3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ float3 operator-(float3 a, float b) +{ + return make_float3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ float3 operator-(float b, float3 a) +{ + return make_float3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(float3 &a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ int3 operator-(int3 a, int3 b) +{ + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(int3 &a, int3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ int3 operator-(int3 a, int b) +{ + return make_int3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ int3 operator-(int b, int3 a) +{ + return make_int3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(int3 &a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) +{ + return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); +} +inline __host__ __device__ void operator-=(uint3 &a, uint3 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; +} +inline __host__ __device__ uint3 operator-(uint3 a, uint b) +{ + return make_uint3(a.x - b, a.y - b, a.z - b); +} +inline __host__ __device__ uint3 operator-(uint b, uint3 a) +{ + return make_uint3(b - a.x, b - a.y, b - a.z); +} +inline __host__ __device__ void operator-=(uint3 &a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; +} + +inline __host__ __device__ float4 operator-(float4 a, float4 b) +{ + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(float4 &a, float4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ float4 operator-(float4 a, float b) +{ + return make_float4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ void operator-=(float4 &a, float b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ int4 operator-(int4 a, int4 b) +{ + return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(int4 &a, int4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ int4 operator-(int4 a, int b) +{ + return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ int4 operator-(int b, int4 a) +{ + return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(int4 &a, int b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) +{ + return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +} +inline __host__ __device__ void operator-=(uint4 &a, uint4 b) +{ + a.x -= b.x; + a.y -= b.y; + a.z -= b.z; + a.w -= b.w; +} +inline __host__ __device__ uint4 operator-(uint4 a, uint b) +{ + return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); +} +inline __host__ __device__ uint4 operator-(uint b, uint4 a) +{ + return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); +} +inline __host__ __device__ void operator-=(uint4 &a, uint b) +{ + a.x -= b; + a.y -= b; + a.z -= b; + a.w -= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// multiply +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator*(float2 a, float2 b) +{ + return make_float2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(float2 &a, float2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ float2 operator*(float2 a, float b) +{ + return make_float2(a.x * b, a.y * b); +} +inline __host__ __device__ float2 operator*(float b, float2 a) +{ + return make_float2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(float2 &a, float b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ int2 operator*(int2 a, int2 b) +{ + return make_int2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(int2 &a, int2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ int2 operator*(int2 a, int b) +{ + return make_int2(a.x * b, a.y * b); +} +inline __host__ __device__ int2 operator*(int b, int2 a) +{ + return make_int2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(int2 &a, int b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) +{ + return make_uint2(a.x * b.x, a.y * b.y); +} +inline __host__ __device__ void operator*=(uint2 &a, uint2 b) +{ + a.x *= b.x; + a.y *= b.y; +} +inline __host__ __device__ uint2 operator*(uint2 a, uint b) +{ + return make_uint2(a.x * b, a.y * b); +} +inline __host__ __device__ uint2 operator*(uint b, uint2 a) +{ + return make_uint2(b * a.x, b * a.y); +} +inline __host__ __device__ void operator*=(uint2 &a, uint b) +{ + a.x *= b; + a.y *= b; +} + +inline __host__ __device__ float3 operator*(float3 a, float3 b) +{ + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(float3 &a, float3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ float3 operator*(float3 a, float b) +{ + return make_float3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ float3 operator*(float b, float3 a) +{ + return make_float3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(float3 &a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ int3 operator*(int3 a, int3 b) +{ + return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(int3 &a, int3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ int3 operator*(int3 a, int b) +{ + return make_int3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ int3 operator*(int b, int3 a) +{ + return make_int3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(int3 &a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) +{ + return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); +} +inline __host__ __device__ void operator*=(uint3 &a, uint3 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; +} +inline __host__ __device__ uint3 operator*(uint3 a, uint b) +{ + return make_uint3(a.x * b, a.y * b, a.z * b); +} +inline __host__ __device__ uint3 operator*(uint b, uint3 a) +{ + return make_uint3(b * a.x, b * a.y, b * a.z); +} +inline __host__ __device__ void operator*=(uint3 &a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; +} + +inline __host__ __device__ float4 operator*(float4 a, float4 b) +{ + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(float4 &a, float4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ float4 operator*(float4 a, float b) +{ + return make_float4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ float4 operator*(float b, float4 a) +{ + return make_float4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(float4 &a, float b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ int4 operator*(int4 a, int4 b) +{ + return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(int4 &a, int4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ int4 operator*(int4 a, int b) +{ + return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ int4 operator*(int b, int4 a) +{ + return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(int4 &a, int b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) +{ + return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +} +inline __host__ __device__ void operator*=(uint4 &a, uint4 b) +{ + a.x *= b.x; + a.y *= b.y; + a.z *= b.z; + a.w *= b.w; +} +inline __host__ __device__ uint4 operator*(uint4 a, uint b) +{ + return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); +} +inline __host__ __device__ uint4 operator*(uint b, uint4 a) +{ + return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); +} +inline __host__ __device__ void operator*=(uint4 &a, uint b) +{ + a.x *= b; + a.y *= b; + a.z *= b; + a.w *= b; +} + +//////////////////////////////////////////////////////////////////////////////// +// divide +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 operator/(float2 a, float2 b) +{ + return make_float2(a.x / b.x, a.y / b.y); +} +inline __host__ __device__ void operator/=(float2 &a, float2 b) +{ + a.x /= b.x; + a.y /= b.y; +} +inline __host__ __device__ float2 operator/(float2 a, float b) +{ + return make_float2(a.x / b, a.y / b); +} +inline __host__ __device__ void operator/=(float2 &a, float b) +{ + a.x /= b; + a.y /= b; +} +inline __host__ __device__ float2 operator/(float b, float2 a) +{ + return make_float2(b / a.x, b / a.y); +} + +inline __host__ __device__ float3 operator/(float3 a, float3 b) +{ + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +} +inline __host__ __device__ void operator/=(float3 &a, float3 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; +} +inline __host__ __device__ float3 operator/(float3 a, float b) +{ + return make_float3(a.x / b, a.y / b, a.z / b); +} +inline __host__ __device__ void operator/=(float3 &a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; +} +inline __host__ __device__ float3 operator/(float b, float3 a) +{ + return make_float3(b / a.x, b / a.y, b / a.z); +} + +inline __host__ __device__ float4 operator/(float4 a, float4 b) +{ + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +} +inline __host__ __device__ void operator/=(float4 &a, float4 b) +{ + a.x /= b.x; + a.y /= b.y; + a.z /= b.z; + a.w /= b.w; +} +inline __host__ __device__ float4 operator/(float4 a, float b) +{ + return make_float4(a.x / b, a.y / b, a.z / b, a.w / b); +} +inline __host__ __device__ void operator/=(float4 &a, float b) +{ + a.x /= b; + a.y /= b; + a.z /= b; + a.w /= b; +} +inline __host__ __device__ float4 operator/(float b, float4 a) +{ + return make_float4(b / a.x, b / a.y, b / a.z, b / a.w); +} + +//////////////////////////////////////////////////////////////////////////////// +// min +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fminf(float2 a, float2 b) +{ + return make_float2(fminf(a.x,b.x), fminf(a.y,b.y)); +} +inline __host__ __device__ float3 fminf(float3 a, float3 b) +{ + return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); +} +inline __host__ __device__ float4 fminf(float4 a, float4 b) +{ + return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w)); +} + +inline __host__ __device__ int2 min(int2 a, int2 b) +{ + return make_int2(min(a.x,b.x), min(a.y,b.y)); +} +inline __host__ __device__ int3 min(int3 a, int3 b) +{ + return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); +} +inline __host__ __device__ int4 min(int4 a, int4 b) +{ + return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); +} + +inline __host__ __device__ uint2 min(uint2 a, uint2 b) +{ + return make_uint2(min(a.x,b.x), min(a.y,b.y)); +} +inline __host__ __device__ uint3 min(uint3 a, uint3 b) +{ + return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); +} +inline __host__ __device__ uint4 min(uint4 a, uint4 b) +{ + return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// max +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmaxf(float2 a, float2 b) +{ + return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y)); +} +inline __host__ __device__ float3 fmaxf(float3 a, float3 b) +{ + return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); +} +inline __host__ __device__ float4 fmaxf(float4 a, float4 b) +{ + return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w)); +} + +inline __host__ __device__ int2 max(int2 a, int2 b) +{ + return make_int2(max(a.x,b.x), max(a.y,b.y)); +} +inline __host__ __device__ int3 max(int3 a, int3 b) +{ + return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); +} +inline __host__ __device__ int4 max(int4 a, int4 b) +{ + return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); +} + +inline __host__ __device__ uint2 max(uint2 a, uint2 b) +{ + return make_uint2(max(a.x,b.x), max(a.y,b.y)); +} +inline __host__ __device__ uint3 max(uint3 a, uint3 b) +{ + return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); +} +inline __host__ __device__ uint4 max(uint4 a, uint4 b) +{ + return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// lerp +// - linear interpolation between a and b, based on value t in [0, 1] range +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float lerp(float a, float b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float2 lerp(float2 a, float2 b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float3 lerp(float3 a, float3 b, float t) +{ + return a + t*(b-a); +} +inline __device__ __host__ float4 lerp(float4 a, float4 b, float t) +{ + return a + t*(b-a); +} + +//////////////////////////////////////////////////////////////////////////////// +// clamp +// - clamp the value v to be in the range [a, b] +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float clamp(float f, float a, float b) +{ + return fmaxf(a, fminf(f, b)); +} +inline __device__ __host__ int clamp(int f, int a, int b) +{ + return max(a, min(f, b)); +} +inline __device__ __host__ uint clamp(uint f, uint a, uint b) +{ + return max(a, min(f, b)); +} + +inline __device__ __host__ float2 clamp(float2 v, float a, float b) +{ + return make_float2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b) +{ + return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ float3 clamp(float3 v, float a, float b) +{ + return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b) +{ + return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ float4 clamp(float4 v, float a, float b) +{ + return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b) +{ + return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ int2 clamp(int2 v, int a, int b) +{ + return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) +{ + return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ int3 clamp(int3 v, int a, int b) +{ + return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) +{ + return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ int4 clamp(int4 v, int a, int b) +{ + return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) +{ + return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) +{ + return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); +} +inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) +{ + return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) +{ + return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); +} +inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) +{ + return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) +{ + return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b)); +} +inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) +{ + return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// dot product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float dot(float2 a, float2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ float dot(float3 a, float3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ float dot(float4 a, float4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ int dot(int2 a, int2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ int dot(int3 a, int3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ int dot(int4 a, int4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +inline __host__ __device__ uint dot(uint2 a, uint2 b) +{ + return a.x * b.x + a.y * b.y; +} +inline __host__ __device__ uint dot(uint3 a, uint3 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z; +} +inline __host__ __device__ uint dot(uint4 a, uint4 b) +{ + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +} + +//////////////////////////////////////////////////////////////////////////////// +// length +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float length(float2 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float3 v) +{ + return sqrtf(dot(v, v)); +} +inline __host__ __device__ float length(float4 v) +{ + return sqrtf(dot(v, v)); +} + +//////////////////////////////////////////////////////////////////////////////// +// normalize +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 normalize(float2 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float3 normalize(float3 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} +inline __host__ __device__ float4 normalize(float4 v) +{ + float invLen = rsqrtf(dot(v, v)); + return v * invLen; +} + +//////////////////////////////////////////////////////////////////////////////// +// floor +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 floorf(float2 v) +{ + return make_float2(floorf(v.x), floorf(v.y)); +} +inline __host__ __device__ float3 floorf(float3 v) +{ + return make_float3(floorf(v.x), floorf(v.y), floorf(v.z)); +} +inline __host__ __device__ float4 floorf(float4 v) +{ + return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// frac - returns the fractional portion of a scalar or each vector component +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float fracf(float v) +{ + return v - floorf(v); +} +inline __host__ __device__ float2 fracf(float2 v) +{ + return make_float2(fracf(v.x), fracf(v.y)); +} +inline __host__ __device__ float3 fracf(float3 v) +{ + return make_float3(fracf(v.x), fracf(v.y), fracf(v.z)); +} +inline __host__ __device__ float4 fracf(float4 v) +{ + return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// fmod +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fmodf(float2 a, float2 b) +{ + return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y)); +} +inline __host__ __device__ float3 fmodf(float3 a, float3 b) +{ + return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); +} +inline __host__ __device__ float4 fmodf(float4 a, float4 b) +{ + return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// absolute value +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float2 fabs(float2 v) +{ + return make_float2(fabs(v.x), fabs(v.y)); +} +inline __host__ __device__ float3 fabs(float3 v) +{ + return make_float3(fabs(v.x), fabs(v.y), fabs(v.z)); +} +inline __host__ __device__ float4 fabs(float4 v) +{ + return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); +} + +inline __host__ __device__ int2 abs(int2 v) +{ + return make_int2(abs(v.x), abs(v.y)); +} +inline __host__ __device__ int3 abs(int3 v) +{ + return make_int3(abs(v.x), abs(v.y), abs(v.z)); +} +inline __host__ __device__ int4 abs(int4 v) +{ + return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); +} + +//////////////////////////////////////////////////////////////////////////////// +// reflect +// - returns reflection of incident ray I around surface normal N +// - N should be normalized, reflected vector's length is equal to length of I +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 reflect(float3 i, float3 n) +{ + return i - 2.0f * n * dot(n,i); +} + +//////////////////////////////////////////////////////////////////////////////// +// cross product +//////////////////////////////////////////////////////////////////////////////// + +inline __host__ __device__ float3 cross(float3 a, float3 b) +{ + return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); +} + +//////////////////////////////////////////////////////////////////////////////// +// smoothstep +// - returns 0 if x < a +// - returns 1 if x > b +// - otherwise returns smooth interpolation between 0 and 1 based on x +//////////////////////////////////////////////////////////////////////////////// + +inline __device__ __host__ float smoothstep(float a, float b, float x) +{ + float y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(3.0f - (2.0f*y))); +} +inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x) +{ + float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y))); +} +inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x) +{ + float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y))); +} +inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x) +{ + float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f); + return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y))); +} + +#endif diff --git a/src/kernel.cu b/src/kernel.cu new file mode 100644 index 0000000..40944c0 --- /dev/null +++ b/src/kernel.cu @@ -0,0 +1,10 @@ +#include <curand.h> +#include <stdint.h> +#include <stdio.h> +#include "scene.cuh" + +__global__ void render(uint8_t *image) { + Scene scene; + scene.debug(); + //scene.render(image); +} diff --git a/src/kernel.cuh b/src/kernel.cuh new file mode 100644 index 0000000..2c5256e --- /dev/null +++ b/src/kernel.cuh @@ -0,0 +1,5 @@ +#ifndef KERNEL_H +#define KERNEL_H +#include <stdint.h> +__global__ void render(uint8_t *image); +#endif diff --git a/src/main.cu b/src/main.cu new file mode 100644 index 0000000..aa74a6c --- /dev/null +++ b/src/main.cu @@ -0,0 +1,58 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdint.h> +#include <cuda_runtime.h> +#include <string.h> +#include <raylib.h> + +#include "kernel.cuh" + +int main() { + //bluuuuugh i'll figure out occupancy later, this res are easy + //calculated manually for gtx1060 with 20 SM, 1024 threads/SM + const int res_x = 32 * 20; + const int res_y = 32 * 20; + const dim3 blockCount(20, 20); + const dim3 threadCount(32, 32); + + uint8_t *image_d; + Color texture_data[res_x * res_y]; + SetTargetFPS(10); + + + //see if GPU is connected (my egpu is finicky) + { + int temp_device; + cudaError_t err; + if((err = cudaGetDevice(&temp_device)) != cudaSuccess) { + printf("failed to get device!\nError: %s\n", cudaGetErrorString(err)); + return(1); + } + } + + SetTraceLogLevel(LOG_ERROR); + InitWindow(res_x, res_y, "cuda teseteroni"); + + //TODO could probably cut out + Image image = GenImageColor(res_x, res_y, BLUE); + Texture tex = LoadTextureFromImage(image); + + while(!WindowShouldClose()) { + cudaError_t err; + //cuda stuff + cudaMalloc((void **)&image_d, res_x * res_y * sizeof(Color)); + render<<<blockCount, threadCount>>>(image_d); + if((err = cudaGetLastError()) != cudaSuccess) { + printf("kernel did not launch! Error: %s\n", cudaGetErrorString(err)); + } + cudaDeviceSynchronize(); + cudaMemcpy(texture_data, (void **)image_d, res_x * res_y * sizeof(Color), cudaMemcpyDeviceToHost); + + BeginDrawing(); + UpdateTexture(tex, texture_data); + DrawTexture(tex, 0, 0, WHITE); + DrawFPS(0, 0); + EndDrawing(); + } + return 0; +} diff --git a/src/makefile b/src/makefile new file mode 100644 index 0000000..4d86411 --- /dev/null +++ b/src/makefile @@ -0,0 +1,25 @@ +LIBS = -lraylib -lGL -lm -lpthread -ldl -lrt -lX11 +$CC = gcc +INC = -I /opt/cuda/include +COMPILED_BIN = build/indigo_worlds + +CU_SRCFILES := $(wildcard *.cu) +CU_OBJFILES := $(patsubst %.cu, %.o, $(CU_SRCFILES)) + +all: $(CU_OBJFILES) + nvcc $(LIBS) -o build/indigo_worlds build/*.o + +%.o: %.cu + nvcc --device-debug -dc -c $< -o build/$@ + +run: all + build/indigo_worlds + +clean: + rm -rf build/* + + +#make: +# nvcc $(LIBS) $(INC) -O0 --debug -c main.cu -o build/main.o +# nvcc --device-debug --compile kernel.cu -o build/kernel.o +# nvcc $(LIBS) -O0 -o build/indigo_worlds build/main.o build/kernel.o diff --git a/src/render_object.cuh b/src/render_object.cuh new file mode 100644 index 0000000..f25f835 --- /dev/null +++ b/src/render_object.cuh @@ -0,0 +1,14 @@ +#ifndef RENDER_OBJECT_H +#define RENDER_OBJECT_H +#include "entity.cuh" +#include "common.cuh" //TODO color + +class Render_object : public Entity { + using Entity::Entity; + public: + virtual __device__ T distance_estimator(vect3 point) const = 0; + virtual __device__ Color get_color(struct Ray ray) const = 0; + +}; + +#endif diff --git a/src/scene.cu b/src/scene.cu new file mode 100644 index 0000000..100af14 --- /dev/null +++ b/src/scene.cu @@ -0,0 +1,14 @@ +#include <stdio.h> +#include "scene.cuh" + +__device__ void Scene::debug() { + //const uint3 unnormalized_uv = ((blockDim * blockIdx) + threadIdx); + //const unsigned int img_index = (unnormalized_uv.x + (unnormalized_uv.y * blockDim.x * gridDim.x)) * 4; +} + +__device__ Color Scene::raycast(struct Ray ray) { + if(ray.bounces++ > max_bounces) return make_color(0); + const size_t obj_count = sizeof(objs) / sizeof(objs[0]); + for(size_t obj_i = 0; obj_i < obj_count; obj_i++); + return make_color(0); +} diff --git a/src/scene.cuh b/src/scene.cuh new file mode 100644 index 0000000..e3d5896 --- /dev/null +++ b/src/scene.cuh @@ -0,0 +1,31 @@ +#ifndef SCENE_H +#define SCENE_H + +#include "common.cuh" +#include "sphere.cuh" +#include "render_object.cuh" +#include "include/helper_math.h" +#include <stdint.h> + +//template <class T> class camera; + +//when we get animations with multiple scenes, we'll make this a virtual function +//with array of DE objects and cam +class Scene { + const int max_bounces = 10; + public: + //__device__ void render(uint8_t *image) { cam.render(); }; + __device__ Render_object **get_objs() { return objs; } + __device__ void debug(); + __device__ Color raycast(struct Ray ray); + private: + //camera cam = camera(); + Sphere sp1 = Sphere(make_vect3(0, .4, -5)); + Sphere sp2 = Sphere(make_vect3(0, -0.4,-5)); + protected: + //idk why I need to specify the size... why can't the compiler figure that out? + Render_object *objs[3] = {&sp1, &sp2, NULL}; + uint8_t *image; +}; + +#endif diff --git a/src/sphere.cu b/src/sphere.cu new file mode 100644 index 0000000..849a37d --- /dev/null +++ b/src/sphere.cu @@ -0,0 +1,9 @@ +#include "sphere.cuh" + +__device__ T Sphere::distance_estimator(vect3 point) const { + return length(point) - r_; +} + +__device__ Color Sphere::get_color(struct Ray ray) const { + return make_color(0); +} diff --git a/src/sphere.cuh b/src/sphere.cuh new file mode 100644 index 0000000..0d228ff --- /dev/null +++ b/src/sphere.cuh @@ -0,0 +1,17 @@ +#ifndef SPHERE_H +#define SPHERE_H + +#include "render_object.cuh" +#include "common.cuh" + +class Sphere : public Render_object { + using Render_object::Render_object; + public: + __device__ T distance_estimator(vect3 point) const; + __device__ Color get_color(struct Ray ray) const; + private: + T r_ = 1; +}; + + +#endif |