Speed up nvtaskset by skipping CUDA context creation if possible

The GPU needs to be on before the GPC-to-TPC mapping registers can be read. The easiest way to power on the GPU is to create a CUDA context, but this is fairly expensive. In nvtaskset, some GPU-using task will likely already be running, and so we can skip CUDA context creation in the common case. Bug fixes: - Delete the temporary CUDA context created by nvtaskset after it is done with it. Fixes bug where nvtaskset would leak this context into any program it launches. Additional changes: - Style fixes in libsmctrl.c - Remove superfluous newlines from error() calls in nvtaskset.c
author: Joshua Bakita <jbakita@cs.unc.edu> 2025-06-25 20:01:19 -0400
committer: Joshua Bakita <jbakita@cs.unc.edu> 2025-06-25 20:43:38 -0400
commit: 9b4b4f71fd843c3ec97ca6f55935675e62ca31f5 (patch)
tree: e38f415c07415078e37ad8e1a0e08b6ff513114b /nvtaskset.c
parent: e0cb12762d048e388a81cffae3e96bfe2bd672cc (diff)
1 files changed, 27 insertions, 2 deletions
diff --git a/nvtaskset.c b/nvtaskset.c
index 4901cbe..5cf3a85 100644
--- a/nvtaskset.c
+++ b/nvtaskset.c
@@ -68,6 +68,25 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
        int res;
        CUcontext ctx;
        char *old_order = NULL;
+        int old_stderr, dev_null_fd;
+        // Attempt to read the configuration, assuming the GPU is on, and fall
+        // back to creating a context if this fails.
+        // (Creating a CUDA context is very expensive and best avoided)
+        // (Redirect stderr while doing this to mute libsmctrl error messages)
+        if ((dev_null_fd = open("/dev/null", O_WRONLY)) == -1)
+                error(1, errno, "Unable to open /dev/null");
+        if (old_stderr = dup(STDERR_FILENO) == -1)
+                error(1, errno, "Unable to duplicate stderr file descriptor");
+        if (dup2(dev_null_fd, STDERR_FILENO) == -1)
+                error(1, errno, "Unable to overwrite stderr file descriptor");
+        res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id);
+        if (dup2(old_stderr, STDERR_FILENO) == -1)
+                error(1, errno, "Unable to restore stderr file descriptor");
+        // End if we were successful, otherwise fallback
+        if (res == 0)
+                return;
        // Tell CUDA to use PCI device id ordering (to match nvdebug)
        putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");
        // Allow CUDA to see all devices (to better match nvdebug)
@@ -80,12 +99,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
        if ((res = cuInit(0))) {
                const char* name;
                cuGetErrorName(res, &name);
-                error(1, 0, "Unable to create a initialize CUDA, error %s\n", name);
+                error(1, 0, "Unable to create a initialize CUDA, error %s", name);
        }
        if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {
                const char* name;
                cuGetErrorName(res, &name);
-                error(1, 0, "Unable to create a CUDA context, error %s\n", name);
+                error(1, 0, "Unable to create a CUDA context, error %s", name);
        }
        // Pull topology information from libsmctrl
        if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) {
@@ -96,6 +115,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
                        fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);
                exit(1);
        }
+        // Delete the CUDA context
+        if (res = cuCtxDestroy(ctx)) {
+                const char* name;
+                cuGetErrorName(res, &name);
+                error(1, 0, "Unable to destroy CUDA context, error %s", name);
+        }
        // Restore the environment (in case we exec() later)
        unsetenv("CUDA_DEVICE_ORDER");
        if (old_order) {
author	Joshua Bakita <jbakita@cs.unc.edu>	2025-06-25 20:01:19 -0400
committer	Joshua Bakita <jbakita@cs.unc.edu>	2025-06-25 20:43:38 -0400
commit	9b4b4f71fd843c3ec97ca6f55935675e62ca31f5 (patch)
tree	e38f415c07415078e37ad8e1a0e08b6ff513114b /nvtaskset.c
parent	e0cb12762d048e388a81cffae3e96bfe2bd672cc (diff)

diff --git a/nvtaskset.c b/nvtaskset.c index 4901cbe..5cf3a85 100644 --- a/nvtaskset.c +++ b/nvtaskset.c
@@ -68,6 +68,25 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
68	int res;	68	int res;
69	CUcontext ctx;	69	CUcontext ctx;
70	char *old_order = NULL;	70	char *old_order = NULL;
		71	int old_stderr, dev_null_fd;
		72
		73	// Attempt to read the configuration, assuming the GPU is on, and fall
		74	// back to creating a context if this fails.
		75	// (Creating a CUDA context is very expensive and best avoided)
		76	// (Redirect stderr while doing this to mute libsmctrl error messages)
		77	if ((dev_null_fd = open("/dev/null", O_WRONLY)) == -1)
		78	error(1, errno, "Unable to open /dev/null");
		79	if (old_stderr = dup(STDERR_FILENO) == -1)
		80	error(1, errno, "Unable to duplicate stderr file descriptor");
		81	if (dup2(dev_null_fd, STDERR_FILENO) == -1)
		82	error(1, errno, "Unable to overwrite stderr file descriptor");
		83	res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id);
		84	if (dup2(old_stderr, STDERR_FILENO) == -1)
		85	error(1, errno, "Unable to restore stderr file descriptor");
		86	// End if we were successful, otherwise fallback
		87	if (res == 0)
		88	return;
		89
71	// Tell CUDA to use PCI device id ordering (to match nvdebug)	90	// Tell CUDA to use PCI device id ordering (to match nvdebug)
72	putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");	91	putenv((char*)"CUDA_DEVICE_ORDER=PCI_BUS_ID");
73	// Allow CUDA to see all devices (to better match nvdebug)	92	// Allow CUDA to see all devices (to better match nvdebug)
@@ -80,12 +99,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
80	if ((res = cuInit(0))) {	99	if ((res = cuInit(0))) {
81	const char* name;	100	const char* name;
82	cuGetErrorName(res, &name);	101	cuGetErrorName(res, &name);
83	error(1, 0, "Unable to create a initialize CUDA, error %s\n", name);	102	error(1, 0, "Unable to create a initialize CUDA, error %s", name);
84	}	103	}
85	if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {	104	if ((res = cuCtxCreate(&ctx, 0, gpu_id))) {
86	const char* name;	105	const char* name;
87	cuGetErrorName(res, &name);	106	cuGetErrorName(res, &name);
88	error(1, 0, "Unable to create a CUDA context, error %s\n", name);	107	error(1, 0, "Unable to create a CUDA context, error %s", name);
89	}	108	}
90	// Pull topology information from libsmctrl	109	// Pull topology information from libsmctrl
91	if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) {	110	if ((res = libsmctrl_get_gpc_info_ext(num_gpcs, masks, gpu_id)) != 0) {
@@ -96,6 +115,12 @@ void libsmctrl_get_gpc_info_ext_easy(uint32_t* num_gpcs, uint128_t** masks, int
96	fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);	115	fprintf(stderr, "%s: Is the GPU powered on, i.e., is there an active context?\n", program_invocation_name);
97	exit(1);	116	exit(1);
98	}	117	}
		118	// Delete the CUDA context
		119	if (res = cuCtxDestroy(ctx)) {
		120	const char* name;
		121	cuGetErrorName(res, &name);
		122	error(1, 0, "Unable to destroy CUDA context, error %s", name);
		123	}
99	// Restore the environment (in case we exec() later)	124	// Restore the environment (in case we exec() later)
100	unsetenv("CUDA_DEVICE_ORDER");	125	unsetenv("CUDA_DEVICE_ORDER");
101	if (old_order) {	126	if (old_order) {