diff --git a/.gitignore b/.gitignore index 18d522f0528f176d2a711ff3dae53a13e4801672..37f64d450b07bdc6632bb730810109dabaa3e374 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ dist pairs.egg-info output* obj +obj_cpu +obj_gpu todo* lj_cpu lj_gpu diff --git a/Makefile b/Makefile index ff14b93461d02f3c35bfcc1a5a20a7e9a04b05c6..4024f500a87b1a901a76683ea46b9194ee518ded 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,8 @@ DEBUG_FLAGS= # CUDA settings NVCC=nvcc -NVCC_FLAGS=-O3 +NVCC_FLAGS=-O3 --use_fast_math +#NVCC_FLAGS=-ccbin mpicc $(CFLAGS) NVCC_PATH:="$(shell which ${NVCC})" CUDA_FLAGS=-DENABLE_CUDA_AWARE_MPI CUDA_BIN_PATH:="$(shell dirname ${NVCC_PATH})" @@ -73,14 +74,14 @@ $(GPU_OBJ_PATH)/regular_6d_stencil.o: runtime/domain/regular_6d_stencil.cpp $(CC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CFLAGS) $(CUDA_FLAGS) $(GPU_OBJ_PATH)/cuda_runtime.o: runtime/devices/cuda.cu - $(NVCC) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(NVCC_FLAGS) $(CUDA_FLAGS) + $(NVCC) $(NVCC_FLAGS) -c -o $@ $< $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) # Targets $(CPU_BIN): $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o $(CC) $(CFLAGS) -o $(CPU_BIN) $(CPU_SRC) $(CPU_OBJ_PATH)/pairs.o $(CPU_OBJ_PATH)/regular_6d_stencil.o $(CPU_OBJ_PATH)/dummy.o $(DEBUG_FLAGS) $(GPU_BIN): $(GPU_SRC) $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o $(GPU_OBJ_PATH)/cuda_runtime.o - $(NVCC) -c -o $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_SRC) $(DEBUG_FLAGS) $(MPI_FLAGS) $(NVCC_FLAGS) $(CUDA_FLAGS) + $(NVCC) $(NVCC_FLAGS) -c -o $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_SRC) $(DEBUG_FLAGS) $(MPI_FLAGS) $(CUDA_FLAGS) $(CC) -o $(GPU_BIN) $(GPU_OBJ_PATH)/$(GPU_BIN).o $(GPU_OBJ_PATH)/cuda_runtime.o $(GPU_OBJ_PATH)/pairs.o $(GPU_OBJ_PATH)/regular_6d_stencil.o -lcudart -L$(CUDA_PATH)/lib64 $(CUDA_FLAGS) $(CFLAGS) clean: