aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile39
-rw-r--r--nvtaskset.c27
2 files changed, 49 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index 48047bc..406193a 100644
--- a/Makefile
+++ b/Makefile
@@ -71,22 +71,35 @@ clean:
71 libsmctrl_test_next_mask libsmctrl_test_next_mask_override \ 71 libsmctrl_test_next_mask libsmctrl_test_next_mask_override \
72 nvtaskset libcuda.so.1 72 nvtaskset libcuda.so.1
73 73
74# On L4T (Linux4Tegra), the paths are different, and there may be multiple copies of libcuda.so.1
74install: libcuda.so.1 75install: libcuda.so.1
75 @# Check that CUDA is installed first 76 @set -e -x; \
76 test -f /usr/lib/$(ARCH)/libcuda.so.*.* 77 for DIR in /usr/lib/$(ARCH) /usr/local/cuda-*.*/compat /usr/lib/$(ARCH)/nvidia; do \
77 @# Change libcuda.so link to bypass libcuda.so.1 78 if [ ! -d $$DIR ]; then continue; fi; \
78 sudo ln -sf /usr/lib/$(ARCH)/libcuda.so.*.* /usr/lib/$(ARCH)/libcuda.so 79 # Check that CUDA is installed in this location \
79 @# Remove libcuda.so.1 symlink 80 if [ ! -f $$DIR/libcuda.so.*.* ]; then continue; fi; \
80 sudo rm /usr/lib/$(ARCH)/libcuda.so.1 81 # Change libcuda.so link to bypass libcuda.so.1 \
81 @# Install wrapper as libcuda.so.1 82 sudo ln -sf $$DIR/libcuda.so.*.* $$DIR/libcuda.so; \
82 sudo cp libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so.1 83 # Remove libcuda.so.1 symlink \
84 sudo rm $$DIR/libcuda.so.1; \
85 # Install wrapper as libcuda.so.1 \
86 sudo cp libcuda.so.1 $$DIR/libcuda.so.1; \
87 done \
88 # Special handling for L4T \
89 if [ -d /usr/lib/$(ARCH)/nvidia ]; then sudo ln -sf nvidia/libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so.1; fi
83 90
84remove: 91remove:
85 @# Test that our library in installed first 92 @set -e -x; \
86 test ! -L /usr/lib/$(ARCH)/libcuda.so.1 93 for DIR in /usr/lib/$(ARCH) /usr/local/cuda-*.*/compat /usr/lib/$(ARCH)/nvidia; do \
87 @# Overwrite install with original symlinks 94 if [ ! -d $$DIR ]; then continue; fi; \
88 sudo ln -sf libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so 95 # Check that CUDA is installed in this location \
89 sudo ln -sf /usr/lib/$(ARCH)/libcuda.so.*.* /usr/lib/$(ARCH)/libcuda.so.1 96 if [ ! -f $$DIR/libcuda.so.*.* ]; then continue; fi; \
97 # Test that our library in installed here \
98 if [ -L $$DIR/libcuda.so.1 ]; then continue; fi; \
99 # Overwrite install with original symlinks \
100 sudo ln -sf libcuda.so.1 $$DIR/libcuda.so; \
101 sudo ln -sf $$DIR/libcuda.so.*.* $$DIR/libcuda.so.1; \
102 done
90 103
91run_tests: tests 104run_tests: tests
92 ./libsmctrl_test_global_mask 105 ./libsmctrl_test_global_mask
diff --git a/nvtaskset.c b/nvtaskset.c
index 5cf3a85..3a50221 100644
--- a/nvtaskset.c
+++ b/nvtaskset.c
@@ -359,16 +359,34 @@ static error_t arg_parser(int key, char* arg, struct argp_state *state){
359 else 359 else
360 printf("PID %d's current TPC affinity mask: 0x%.0lx%016lx\n", target_pid, (uint64_t)(enable_mask >> 64), (uint64_t)enable_mask); 360 printf("PID %d's current TPC affinity mask: 0x%.0lx%016lx\n", target_pid, (uint64_t)(enable_mask >> 64), (uint64_t)enable_mask);
361 } else if (is_cmd) { 361 } else if (is_cmd) {
362 if (!getenv("CUDA_MPS_PIPE_DIRECTORY")) {
363 // Pipe directory is not set by default on L4T aarch64
364 putenv("CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps");
365 }
362 // start MPS (as needed) 366 // start MPS (as needed)
363 if (!libsmctrl_is_mps_running()) { 367 if (!libsmctrl_is_mps_running()) {
364 fprintf(stderr, "nvtaskset: MPS control deamon does not appear to be running. Automatically starting...\n"); 368 fprintf(stderr, "nvtaskset: MPS control deamon does not appear to be running. Automatically starting...\n");
369 // TODO: Mute the error message if this command isn't found?
365 int ret = system("nvidia-cuda-mps-control -d"); 370 int ret = system("nvidia-cuda-mps-control -d");
371 // TODO: Fall back to full x86_64 install location?
372 // Fall back to full L4T aarch64 install location
373 if (ret == 0x7f00) {
374 // nvidia-cuda-mps-control needs nvidia-cuda-mps-server to be on PATH
375 char *old_path = getenv("PATH");
376 char *new_path;
377 if (old_path)
378 asprintf(&new_path, "PATH=/usr/local/cuda/compat/:%s", old_path);
379 else
380 new_path = "PATH=/usr/local/cuda/compat/";
381 putenv(new_path);
382 ret = system("nvidia-cuda-mps-control -d");
383 // TODO: Put this warning after error checking
384 fprintf(stderr, "nvtaskset: Warning: Set the CUDA_MPS_PIPE_DIRECTORY environment variable to /tmp/nvidia-mps to ensure that subsequently launched tasks associate with MPS on L4T systems!\n");
385 }
366 if (ret == -1) 386 if (ret == -1)
367 error(1, errno, "Unable to run subshell to start MPS"); 387 error(1, errno, "Unable to run subshell to start MPS");
368 if (ret == 1) { 388 else if (ret)
369 fprintf(stderr, "nvtaskset: Error starting MPS control deamon. Terminating...\n"); 389 error(1, 0, "Error starting MPS control deamon. Terminating...");
370 return 1;
371 }
372 fprintf(stderr, "nvtaskset: Done. Use \"echo quit | nvidia-cuda-mps-control\" to terminate it later as desired.\n"); 390 fprintf(stderr, "nvtaskset: Done. Use \"echo quit | nvidia-cuda-mps-control\" to terminate it later as desired.\n");
373 } 391 }
374 // launch subprocess 392 // launch subprocess
@@ -377,6 +395,7 @@ static error_t arg_parser(int key, char* arg, struct argp_state *state){
377 snprintf(mask_str, 36, "~0x%.0lx%016lx", (uint64_t)(mask >> 64), (uint64_t)mask); 395 snprintf(mask_str, 36, "~0x%.0lx%016lx", (uint64_t)(mask >> 64), (uint64_t)mask);
378 setenv("LIBSMCTRL_MASK", mask_str, 1); 396 setenv("LIBSMCTRL_MASK", mask_str, 1);
379 // Start task 397 // Start task
398 // TODO: Check that the loader is configured to find the corrrect libcuda.so.1
380 execvp(sub_argv[0], sub_argv); 399 execvp(sub_argv[0], sub_argv);
381 error(1, errno, "Unable to launch task '%s'", sub_argv[0]); 400 error(1, errno, "Unable to launch task '%s'", sub_argv[0]);
382 } else { 401 } else {