Program Listing for File gpu_cuda.h
↰ Return to documentation for file (source/lib/include/gpu_cuda.h
)
// SPDX-License-Identifier: LGPL-3.0-or-later
#pragma once
#include <assert.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include <string>
#include <vector>
#include "errors.h"
#define gpuGetLastError cudaGetLastError
#define gpuDeviceSynchronize cudaDeviceSynchronize
#define gpuMemcpy cudaMemcpy
#define gpuMemcpyDeviceToHost cudaMemcpyDeviceToHost
#define gpuMemcpyHostToDevice cudaMemcpyHostToDevice
#define gpuMemcpyDeviceToDevice cudaMemcpyDeviceToDevice
#define gpuMemset cudaMemset
#define GPU_MAX_NBOR_SIZE 4096
#define DPErrcheck(res) \
{ DPAssert((res), __FILE__, __LINE__); }
inline void DPAssert(cudaError_t code,
const char *file,
int line,
bool abort = true) {
if (code != cudaSuccess) {
std::string error_msg = "CUDA Runtime library throws an error: " +
std::string(cudaGetErrorString(code)) +
", in file " + std::string(file) + ": " +
std::to_string(line);
if (code == 2) {
// out of memory
error_msg +=
"\nYour memory is not enough, thus an error has been raised "
"above. You need to take the following actions:\n"
"1. Check if the network size of the model is too large.\n"
"2. Check if the batch size of training or testing is too large. "
"You can set the training batch size to `auto`.\n"
"3. Check if the number of atoms is too large.\n"
"4. Check if another program is using the same GPU by execuating "
"`nvidia-smi`. "
"The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` "
"environment variable.";
if (abort) {
throw deepmd::deepmd_exception_oom(error_msg);
}
}
if (abort) {
throw deepmd::deepmd_exception(error_msg);
} else {
fprintf(stderr, "%s\n", error_msg.c_str());
}
}
}
#define nborErrcheck(res) \
{ nborAssert((res), __FILE__, __LINE__); }
inline void nborAssert(cudaError_t code,
const char *file,
int line,
bool abort = true) {
if (code != cudaSuccess) {
std::string error_msg = "DeePMD-kit: Illegal nbor list sorting: ";
try {
DPAssert(code, file, line, true);
} catch (deepmd::deepmd_exception_oom &e) {
error_msg += e.what();
if (abort) {
throw deepmd::deepmd_exception_oom(error_msg);
} else {
fprintf(stderr, "%s\n", error_msg.c_str());
}
} catch (deepmd::deepmd_exception &e) {
error_msg += e.what();
if (abort) {
throw deepmd::deepmd_exception(error_msg);
} else {
fprintf(stderr, "%s\n", error_msg.c_str());
}
}
}
}
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
static __inline__ __device__ double atomicAdd(double *address, double val) {
unsigned long long int *address_as_ull = (unsigned long long int *)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
// Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
// NaN) } while (assumed != old);
} while (assumed != old);
return __longlong_as_double(old);
}
#endif
namespace deepmd {
inline void DPGetDeviceCount(int &gpu_num) { cudaGetDeviceCount(&gpu_num); }
inline cudaError_t DPSetDevice(int rank) { return cudaSetDevice(rank); }
template <typename FPTYPE>
void memcpy_host_to_device(FPTYPE *device, const std::vector<FPTYPE> &host) {
DPErrcheck(cudaMemcpy(device, &host[0], sizeof(FPTYPE) * host.size(),
cudaMemcpyHostToDevice));
}
template <typename FPTYPE>
void memcpy_host_to_device(FPTYPE *device, const FPTYPE *host, const int size) {
DPErrcheck(
cudaMemcpy(device, host, sizeof(FPTYPE) * size, cudaMemcpyHostToDevice));
}
template <typename FPTYPE>
void memcpy_device_to_host(const FPTYPE *device, std::vector<FPTYPE> &host) {
DPErrcheck(cudaMemcpy(&host[0], device, sizeof(FPTYPE) * host.size(),
cudaMemcpyDeviceToHost));
}
template <typename FPTYPE>
void memcpy_device_to_host(const FPTYPE *device, FPTYPE *host, const int size) {
DPErrcheck(
cudaMemcpy(host, device, sizeof(FPTYPE) * size, cudaMemcpyDeviceToHost));
}
template <typename FPTYPE>
void malloc_device_memory(FPTYPE *&device, const std::vector<FPTYPE> &host) {
DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
}
template <typename FPTYPE>
void malloc_device_memory(FPTYPE *&device, const int size) {
DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * size));
}
template <typename FPTYPE>
void malloc_device_memory_sync(FPTYPE *&device,
const std::vector<FPTYPE> &host) {
DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
memcpy_host_to_device(device, host);
}
template <typename FPTYPE>
void malloc_device_memory_sync(FPTYPE *&device,
const FPTYPE *host,
const int size) {
DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * size));
memcpy_host_to_device(device, host, size);
}
template <typename FPTYPE>
void delete_device_memory(FPTYPE *&device) {
if (device != NULL) {
DPErrcheck(cudaFree(device));
}
}
template <typename FPTYPE>
void memset_device_memory(FPTYPE *device, const int var, const int size) {
DPErrcheck(cudaMemset(device, var, sizeof(FPTYPE) * size));
}
} // end of namespace deepmd