diff --git a/examples/main.cu b/examples/main.cu new file mode 100644 index 0000000000000000000000000000000000000000..94224e0c635a0d218c9c35d3c9479b3166d3bb61 --- /dev/null +++ b/examples/main.cu @@ -0,0 +1,143 @@ +#include <iostream> +#include <cuda_runtime.h> + +void checkCudaError(cudaError_t err, const char* func) { + if (err != cudaSuccess) { + fprintf(stderr, "CUDA error in %s: %s\n", func, cudaGetErrorString(err)); + exit(err); + } +} + +template< typename Type > +class PairsVector3 { +public: + PairsVector3() = default; + + // If the constructor is called from device, v_ is automatically allocated on + // device because it's a static array embeded in the object itself + __host__ __device__ PairsVector3( Type x, Type y, Type z ) { + v_[0] = x; + v_[1] = y; + v_[2] = z; + } + + __host__ __device__ Type& operator[]( int index ) { + return v_[index]; + } + __host__ __device__ const Type& operator[] ( int index ) const { + return v_[index]; + } + +private: + Type v_[3] = {Type(), Type(), Type()}; +}; + + +struct PairsObjects { + double *position_h; + double *position_d; +}; + + +class PairsAccessor{ + private: + PairsObjects *pobj_h; + PairsObjects *pobj_d; + + public: + // PairsAccessor is only constructable from host, but its getters and setters can be called from both host and device + __host__ PairsAccessor(PairsObjects *pobj_): pobj_h(pobj_) { + + // NOTE: Here we copy pobj_h to device, but we will use pobj_d to ONLY work with device pointers + // So for example, we only try to access pobj_d->position_d, NOT pobj_d->position_h + // NOTE: What's copied to device here is the PairsObject, which is a bunch of pointers (including valid device pointers like position_d) + // TODO (maybe): Split PairsObjects into two structs, one holding host pointers, the other holding device pointers. + cudaMalloc(&pobj_d, sizeof(PairsObjects)); + cudaMemcpy(pobj_d, pobj_h, sizeof(PairsObjects), cudaMemcpyHostToDevice); + } + + __host__ __device__ int getTest(){ + #ifdef __CUDA_ARCH__ + return 12; + #else + return 34; + #endif + } + + // If this function is called from device, it returns a PairsVector3 that's constructed on device + // If this function is called from host, it returns a PairsVector3 that's constructed on host + __host__ __device__ PairsVector3<double> getPosition(const size_t i) const { + #ifdef __CUDA_ARCH__ + // Assume postion_d points to uptodate data (we can't do copyPropertyToDevice from __device__ ) + return PairsVector3<double>(pobj_d->position_d[i*3 + 0], pobj_d->position_d[i*3 + 1], pobj_d->position_d[i*3 + 2]); + #else + // Here we can do copyPropertyToHost (ReadOnly) if needed, to make sure position_h has uptodate data + return PairsVector3<double>(pobj_h->position_h[i*3 + 0], pobj_h->position_h[i*3 + 1], pobj_h->position_h[i*3 + 2]); + #endif + + } + + __host__ __device__ void setPosition(const size_t i, PairsVector3<double> const &vec) { + #ifdef __CUDA_ARCH__ + // Assume vec is on device + pobj_d->position_d[i*3 + 0] = vec[0]; + pobj_d->position_d[i*3 + 1] = vec[1]; + pobj_d->position_d[i*3 + 2] = vec[2]; + // Assume we don't need position_h data back on host (we can't do copyPropertyToHost from __device__) + #else + // Assume vec is on host + pobj_h->position_h[i*3 + 0] = vec[0]; + pobj_h->position_h[i*3 + 1] = vec[1]; + pobj_h->position_h[i*3 + 2] = vec[2]; + // Here we can do copyPropertyToDevice (WriteOnly) if needed (just so host and device data match) + #endif + + } + +}; + + +__global__ void mykernel(PairsAccessor ac){ + printf("getTest from device = %d\n", ac.getTest()); + + PairsVector3<double> pos(7,8,9); + ac.setPosition(0, pos); + printf("getPosition(0) from device = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]); +} + + +int main(int argc, char **argv) { + + PairsObjects *pobj = new PairsObjects; + + // User doesn't bother with the stuff below, they are done when PairsSimulation is initialized + //---------------------------------------------------------------------------------------------------- + int numParticles = 1; + int numElements = numParticles * 3; + pobj->position_h = new double[numElements]; + cudaMalloc(&pobj->position_d, numElements * sizeof(double)); + cudaMemcpy(pobj->position_d, pobj->position_h, numElements * sizeof(double), cudaMemcpyHostToDevice); + //---------------------------------------------------------------------------------------------------- + + PairsAccessor ac(pobj); + printf("getTest from host = %d\n", ac.getTest()); + + PairsVector3<double> pos(1.2, 3.4, 5.6); + ac.setPosition(0, pos); + printf("getPosition(0) from host = (%f, %f, %f) \n", ac.getPosition(0)[0], ac.getPosition(0)[1], ac.getPosition(0)[2]); + + + mykernel<<<1,1>>>(ac); + checkCudaError(cudaDeviceSynchronize(), "mykernel"); + + // In mykernel, we modify the position of the particle ON DEVICE + // TODO: To reflect this modification on the host, we need a 'sync' function along with getters and setters (eg: 'syncPosition') + // to make sure both host and device data are uptodate and in sync with eachother. + // But unlike getters and setters, sync functions are only callable form host + // The 'sync' function copies the property to device if its host flag is set, or copies property to host if its device flag is set + // If setter is called from host, set host flag and unset device flag for that property + // If setter is called from device, set device flag and unset host flag for that property + +} + +