aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libsmctrl.c10
-rw-r--r--nvtaskset.c29
2 files changed, 32 insertions, 7 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 79d2b33..807cd6d 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -582,11 +582,11 @@ int libsmctrl_get_gpc_info_ext(uint32_t* num_enabled_gpcs, uint128_t** tpcs_for_
582int libsmctrl_get_tpc_info(uint32_t* num_tpcs, int dev) { 582int libsmctrl_get_tpc_info(uint32_t* num_tpcs, int dev) {
583 uint32_t num_gpcs; 583 uint32_t num_gpcs;
584 uint128_t* tpcs_per_gpc; 584 uint128_t* tpcs_per_gpc;
585 int res; 585 int res, gpc;
586 if (res = libsmctrl_get_gpc_info_ext(&num_gpcs, &tpcs_per_gpc, dev)) 586 if (res = libsmctrl_get_gpc_info_ext(&num_gpcs, &tpcs_per_gpc, dev))
587 return res; 587 return res;
588 *num_tpcs = 0; 588 *num_tpcs = 0;
589 for (int gpc = 0; gpc < num_gpcs; gpc++) { 589 for (gpc = 0; gpc < num_gpcs; gpc++) {
590 *num_tpcs += __builtin_popcountl(tpcs_per_gpc[gpc]); 590 *num_tpcs += __builtin_popcountl(tpcs_per_gpc[gpc]);
591 *num_tpcs += __builtin_popcountl(tpcs_per_gpc[gpc] >> 64); 591 *num_tpcs += __builtin_popcountl(tpcs_per_gpc[gpc] >> 64);
592 } 592 }
@@ -596,7 +596,7 @@ int libsmctrl_get_tpc_info(uint32_t* num_tpcs, int dev) {
596// @param dev Device index as understood by CUDA **can differ from nvdebug idx** 596// @param dev Device index as understood by CUDA **can differ from nvdebug idx**
597// This implementation is fragile, and could be incorrect for odd GPUs 597// This implementation is fragile, and could be incorrect for odd GPUs
598int libsmctrl_get_tpc_info_cuda(uint32_t* num_tpcs, int cuda_dev) { 598int libsmctrl_get_tpc_info_cuda(uint32_t* num_tpcs, int cuda_dev) {
599 int num_sms, major, minor, res = 0; 599 int num_sms, sms_per_tpc, major, minor, res = 0;
600 const char* err_str; 600 const char* err_str;
601 if (res = cuInit(0)) 601 if (res = cuInit(0))
602 goto abort_cuda; 602 goto abort_cuda;
@@ -611,7 +611,6 @@ int libsmctrl_get_tpc_info_cuda(uint32_t* num_tpcs, int cuda_dev) {
611 return ENOTSUP; 611 return ENOTSUP;
612 // Everything newer than Pascal (as of Hopper) has 2 SMs per TPC, as well 612 // Everything newer than Pascal (as of Hopper) has 2 SMs per TPC, as well
613 // as the P100, which is uniquely sm_60 613 // as the P100, which is uniquely sm_60
614 int sms_per_tpc;
615 if (major > 6 || (major == 6 && minor == 0)) 614 if (major > 6 || (major == 6 && minor == 0))
616 sms_per_tpc = 2; 615 sms_per_tpc = 2;
617 else 616 else
@@ -708,6 +707,7 @@ __attribute__((constructor)) static void setup(void) {
708 // memory segment. 707 // memory segment.
709 static uint128_t mask; 708 static uint128_t mask;
710 bool invert = false; 709 bool invert = false;
710 int fd;
711 711
712 mask_str = getenv("LIBSMCTRL_MASK"); 712 mask_str = getenv("LIBSMCTRL_MASK");
713 713
@@ -742,7 +742,7 @@ __attribute__((constructor)) static void setup(void) {
742 742
743 // Create shared memory region for the supreme mask such that nvtaskset 743 // Create shared memory region for the supreme mask such that nvtaskset
744 // can read and modify it 744 // can read and modify it
745 int fd = memfd_create("libsmctrl", MFD_CLOEXEC); 745 fd = memfd_create("libsmctrl", MFD_CLOEXEC);
746 if (fd == -1) { 746 if (fd == -1) {
747 abort(0, errno, "Unable to create shared memory for dynamic partition changes. Dynamic changes disabled"); 747 abort(0, errno, "Unable to create shared memory for dynamic partition changes. Dynamic changes disabled");
748 g_supreme_sm_mask = &mask; 748 g_supreme_sm_mask = &mask;
diff --git a/nvtaskset.c b/nvtaskset.c
index 4901cbe..5cf3a85 100644
--- a/nvtaskset.c
+++ b/nvtaskset.c
@@ -68,6 +68,25 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
68 int res; 68 int res;
69 CUcontext ctx; 69 CUcontext ctx;
70 char *old_order = NULL; 70 char *old_order = NULL;
71 int old_stderr, dev_null_fd;
72
73 // Attempt to read the configuration, assuming the GPU is on, and fall
74 // back to creating a context if this fails.
75 // (Creating a CUDA context is very expensive and best avoided)
76 // (Redirect stderr while doing this to mute libsmctrl error messages)
77 if ((dev_null_fd = open("/dev/null", O_WRONLY)) == -1)
78 error(1, errno, "Unable to open /dev/null");
79 if (old_stderr = dup(STDERR_FILENO) == -1)
80 error(1, errno, "Unable to duplicate stderr file descriptor");
81 if (dup2(dev_null_fd, STDERR_FILENO) == -1)
82 error(1, errno, "Unable to overwrite stderr file descriptor");
83 res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id);
84 if (dup2(old_stderr, STDERR_FILENO) == -1)
85 error(1, errno, "Unable to restore stderr file descriptor");
86 // End if we were successful, otherwise fallback
87 if (res == 0)
88 return;
89
71 // Tell CUDA to use PCI device id ordering (to match nvdebug) 90 // Tell CUDA to use PCI device id ordering (to match nvdebug)
72 putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID"); 91 putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");
73 // Allow CUDA to see all devices (to better match nvdebug) 92 // Allow CUDA to see all devices (to better match nvdebug)
@@ -80,12 +99,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
80 if ((res = cuInit(0))) { 99 if ((res = cuInit(0))) {
81 const char* name; 100 const char* name;
82 cuGetErrorName(res, &name); 101 cuGetErrorName(res, &name);
83 error(1, 0, "Unable to create a initialize CUDA, error %s\n", name); 102 error(1, 0, "Unable to create a initialize CUDA, error %s", name);
84 } 103 }
85 if ((res = cuCtxCreate(&ctx, 0, gpu_id))) { 104 if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {
86 const char* name; 105 const char* name;
87 cuGetErrorName(res, &name); 106 cuGetErrorName(res, &name);
88 error(1, 0, "Unable to create a CUDA context, error %s\n", name); 107 error(1, 0, "Unable to create a CUDA context, error %s", name);
89 } 108 }
90 // Pull topology information from libsmctrl 109 // Pull topology information from libsmctrl
91 if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) { 110 if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) {
@@ -96,6 +115,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
96 fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name); 115 fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);
97 exit(1); 116 exit(1);
98 } 117 }
118 // Delete the CUDA context
119 if (res = cuCtxDestroy(ctx)) {
120 const char* name;
121 cuGetErrorName(res, &name);
122 error(1, 0, "Unable to destroy CUDA context, error %s", name);
123 }
99 // Restore the environment (in case we exec() later) 124 // Restore the environment (in case we exec() later)
100 unsetenv("CUDA_DEVICE_ORDER"); 125 unsetenv("CUDA_DEVICE_ORDER");
101 if (old_order) { 126 if (old_order) {