aboutsummaryrefslogtreecommitdiffstats
path: root/nvtaskset.c
diff options
context:
space:
mode:
Diffstat (limited to 'nvtaskset.c')
-rw-r--r--nvtaskset.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/nvtaskset.c b/nvtaskset.c
index 4901cbe..5cf3a85 100644
--- a/nvtaskset.c
+++ b/nvtaskset.c
@@ -68,6 +68,25 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
68 int res; 68 int res;
69 CUcontext ctx; 69 CUcontext ctx;
70 char *old_order = NULL; 70 char *old_order = NULL;
71 int old_stderr, dev_null_fd;
72
73 // Attempt to read the configuration, assuming the GPU is on, and fall
74 // back to creating a context if this fails.
75 // (Creating a CUDA context is very expensive and best avoided)
76 // (Redirect stderr while doing this to mute libsmctrl error messages)
77 if ((dev_null_fd = open("/dev/null", O_WRONLY)) == -1)
78 error(1, errno, "Unable to open /dev/null");
79 if (old_stderr = dup(STDERR_FILENO) == -1)
80 error(1, errno, "Unable to duplicate stderr file descriptor");
81 if (dup2(dev_null_fd, STDERR_FILENO) == -1)
82 error(1, errno, "Unable to overwrite stderr file descriptor");
83 res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id);
84 if (dup2(old_stderr, STDERR_FILENO) == -1)
85 error(1, errno, "Unable to restore stderr file descriptor");
86 // End if we were successful, otherwise fallback
87 if (res == 0)
88 return;
89
71 // Tell CUDA to use PCI device id ordering (to match nvdebug) 90 // Tell CUDA to use PCI device id ordering (to match nvdebug)
72 putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID"); 91 putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");
73 // Allow CUDA to see all devices (to better match nvdebug) 92 // Allow CUDA to see all devices (to better match nvdebug)
@@ -80,12 +99,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
80 if ((res = cuInit(0))) { 99 if ((res = cuInit(0))) {
81 const char* name; 100 const char* name;
82 cuGetErrorName(res, &name); 101 cuGetErrorName(res, &name);
83 error(1, 0, "Unable to create a initialize CUDA, error %s\n", name); 102 error(1, 0, "Unable to create a initialize CUDA, error %s", name);
84 } 103 }
85 if ((res = cuCtxCreate(&ctx, 0, gpu_id))) { 104 if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {
86 const char* name; 105 const char* name;
87 cuGetErrorName(res, &name); 106 cuGetErrorName(res, &name);
88 error(1, 0, "Unable to create a CUDA context, error %s\n", name); 107 error(1, 0, "Unable to create a CUDA context, error %s", name);
89 } 108 }
90 // Pull topology information from libsmctrl 109 // Pull topology information from libsmctrl
91 if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) { 110 if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) {
@@ -96,6 +115,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
96 fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name); 115 fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);
97 exit(1); 116 exit(1);
98 } 117 }
118 // Delete the CUDA context
119 if (res = cuCtxDestroy(ctx)) {
120 const char* name;
121 cuGetErrorName(res, &name);
122 error(1, 0, "Unable to destroy CUDA context, error %s", name);
123 }
99 // Restore the environment (in case we exec() later) 124 // Restore the environment (in case we exec() later)
100 unsetenv("CUDA_DEVICE_ORDER"); 125 unsetenv("CUDA_DEVICE_ORDER");
101 if (old_order) { 126 if (old_order) {