Skip to content
Snippets Groups Projects
Commit 244bd973 authored by Anna Wellmann's avatar Anna Wellmann
Browse files

Log device number for cuda devices

parent e19c93fe
No related branches found
No related tags found
1 merge request!244Fix the regression tests that run on the Phoenix cluster
...@@ -45,8 +45,7 @@ collect: ...@@ -45,8 +45,7 @@ collect:
overwrite: true overwrite: true
clean: clean:
- multigpu_test/output/* - multigpu_test/*
- multigpu_test/src/*
sbatch: multigpu_test/slurm4GPU.job sbatch: multigpu_test/slurm4GPU.job
continue_if_job_fails: true continue_if_job_fails: true
...@@ -46,8 +46,7 @@ collect: ...@@ -46,8 +46,7 @@ collect:
overwrite: true overwrite: true
clean: clean:
- multigpu_test/output/* - multigpu_test/*
- multigpu_test/src/*
sbatch: multigpu_test/slurm8GPU.job sbatch: multigpu_test/slurm8GPU.job
continue_if_job_fails: true continue_if_job_fails: true
...@@ -15,7 +15,7 @@ void verifyNumberOfDevices(int deviceId) ...@@ -15,7 +15,7 @@ void verifyNumberOfDevices(int deviceId)
int device_count = 0; int device_count = 0;
cudaError_t errorId = cudaGetDeviceCount(&device_count); cudaError_t errorId = cudaGetDeviceCount(&device_count);
if(errorId != cudaSuccess) { if(errorId != cudaSuccess) {
VF_LOG_CRITICAL("Error while accessing the device count: {}", cudaGetErrorString(errorId)); VF_LOG_CRITICAL("Device {}: Error while accessing the device count: {}", deviceId, cudaGetErrorString(errorId));
} }
if (deviceId > device_count) { if (deviceId > device_count) {
throw std::runtime_error("chosen gpudevice >= device_count ... exiting\n"); throw std::runtime_error("chosen gpudevice >= device_count ... exiting\n");
...@@ -28,13 +28,13 @@ void verifyComputeCapability(int deviceId) ...@@ -28,13 +28,13 @@ void verifyComputeCapability(int deviceId)
cudaError_t errorId = cudaGetDeviceProperties(&deviceProp, deviceId); cudaError_t errorId = cudaGetDeviceProperties(&deviceProp, deviceId);
if(errorId != cudaSuccess){ if(errorId != cudaSuccess){
VF_LOG_CRITICAL("Error while accessing the device properties occurs: {}", cudaGetErrorString(errorId)); VF_LOG_CRITICAL("Device {}: Error while accessing the device properties occurs: {}", deviceId, cudaGetErrorString(errorId));
} }
VF_LOG_INFO("[compute capability] = [{}.{}]\n", deviceProp.major, deviceProp.minor); VF_LOG_INFO("[compute capability] = [{}.{}]\n", deviceProp.major, deviceProp.minor);
if (deviceProp.major > 999) { if (deviceProp.major > 999) {
throw std::runtime_error("warning, CUDA Device Emulation (CPU) detected, exiting\n"); throw std::runtime_error("Warning, CUDA Device Emulation (CPU) detected, exiting\n");
} }
} }
...@@ -43,13 +43,13 @@ void setCudaDevice(int deviceId) ...@@ -43,13 +43,13 @@ void setCudaDevice(int deviceId)
// choose a cuda device for kernel execution // choose a cuda device for kernel execution
cudaError_t errorId = cudaSetDevice(deviceId); cudaError_t errorId = cudaSetDevice(deviceId);
if (errorId != cudaSuccess) { if (errorId != cudaSuccess) {
VF_LOG_CRITICAL("Error while setting the device to {}: {}", deviceId, cudaGetErrorString(errorId)); VF_LOG_CRITICAL("Device {}: Error while setting the device to: {}", deviceId, cudaGetErrorString(errorId));
} else { } else {
int device; int device;
// double check that device was properly selected // double check that device was properly selected
errorId = cudaGetDevice(&device); errorId = cudaGetDevice(&device);
if(errorId != cudaSuccess) { if(errorId != cudaSuccess) {
VF_LOG_CRITICAL("Error while getting the device: {}", cudaGetErrorString(errorId)); VF_LOG_CRITICAL("Device {}: Error while getting the device: {}", deviceId, cudaGetErrorString(errorId));
} }
} }
} }
...@@ -70,7 +70,7 @@ void printCudaInformation(int deviceId) ...@@ -70,7 +70,7 @@ void printCudaInformation(int deviceId)
cudaError_t errorId = cudaGetDeviceProperties(&prop, deviceId); cudaError_t errorId = cudaGetDeviceProperties(&prop, deviceId);
if(errorId != cudaSuccess){ if(errorId != cudaSuccess){
VF_LOG_CRITICAL("Error while accessing the device properties occurs: {}", cudaGetErrorString(errorId)); VF_LOG_CRITICAL("Device {}: Error while accessing the device properties for occurs: {}", deviceId, cudaGetErrorString(errorId));
} }
printf(" --- General Information for device %d ---\n", deviceId); printf(" --- General Information for device %d ---\n", deviceId);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment