From 85c0b374fefa5bc8a5c3036ab79ba3df78fa71d2 Mon Sep 17 00:00:00 2001 From: Joshua Bakita Date: Thu, 3 Jul 2025 13:41:36 -0400 Subject: Support installation on Linux4Tegra - Use alternate path if nvidia-cuda-mps-control is not found. - Work-around missing CUDA_MPS_PIPE_DIRECTORY default in L4T. - Install to L4T-specific paths if they exist. Note that CUDA_MPS_PIPE_DIRECTORY must be manually set by the user on L4T systems before they can call nvidia-cuda-mps-control, or to have to have non-nvtaskset-launched applications run with MPS. Tested on Jetson AGX Orin with L4T 36.3.0 and cuda-compat-12-6. --- Makefile | 39 ++++++++++++++++++++++++++------------- nvtaskset.c | 27 +++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 48047bc..406193a 100644 --- a/Makefile +++ b/Makefile @@ -71,22 +71,35 @@ clean: libsmctrl_test_next_mask libsmctrl_test_next_mask_override \ nvtaskset libcuda.so.1 +# On L4T (Linux4Tegra), the paths are different, and there may be multiple copies of libcuda.so.1 install: libcuda.so.1 - @# Check that CUDA is installed first - test -f /usr/lib/$(ARCH)/libcuda.so.*.* - @# Change libcuda.so link to bypass libcuda.so.1 - sudo ln -sf /usr/lib/$(ARCH)/libcuda.so.*.* /usr/lib/$(ARCH)/libcuda.so - @# Remove libcuda.so.1 symlink - sudo rm /usr/lib/$(ARCH)/libcuda.so.1 - @# Install wrapper as libcuda.so.1 - sudo cp libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so.1 + @set -e -x; \ + for DIR in /usr/lib/$(ARCH) /usr/local/cuda-*.*/compat /usr/lib/$(ARCH)/nvidia; do \ + if [ ! -d $$DIR ]; then continue; fi; \ + # Check that CUDA is installed in this location \ + if [ ! -f $$DIR/libcuda.so.*.* ]; then continue; fi; \ + # Change libcuda.so link to bypass libcuda.so.1 \ + sudo ln -sf $$DIR/libcuda.so.*.* $$DIR/libcuda.so; \ + # Remove libcuda.so.1 symlink \ + sudo rm $$DIR/libcuda.so.1; \ + # Install wrapper as libcuda.so.1 \ + sudo cp libcuda.so.1 $$DIR/libcuda.so.1; \ + done \ + # Special handling for L4T \ + if [ -d /usr/lib/$(ARCH)/nvidia ]; then sudo ln -sf nvidia/libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so.1; fi remove: - @# Test that our library in installed first - test ! -L /usr/lib/$(ARCH)/libcuda.so.1 - @# Overwrite install with original symlinks - sudo ln -sf libcuda.so.1 /usr/lib/$(ARCH)/libcuda.so - sudo ln -sf /usr/lib/$(ARCH)/libcuda.so.*.* /usr/lib/$(ARCH)/libcuda.so.1 + @set -e -x; \ + for DIR in /usr/lib/$(ARCH) /usr/local/cuda-*.*/compat /usr/lib/$(ARCH)/nvidia; do \ + if [ ! -d $$DIR ]; then continue; fi; \ + # Check that CUDA is installed in this location \ + if [ ! -f $$DIR/libcuda.so.*.* ]; then continue; fi; \ + # Test that our library in installed here \ + if [ -L $$DIR/libcuda.so.1 ]; then continue; fi; \ + # Overwrite install with original symlinks \ + sudo ln -sf libcuda.so.1 $$DIR/libcuda.so; \ + sudo ln -sf $$DIR/libcuda.so.*.* $$DIR/libcuda.so.1; \ + done run_tests: tests ./libsmctrl_test_global_mask diff --git a/nvtaskset.c b/nvtaskset.c index 5cf3a85..3a50221 100644 --- a/nvtaskset.c +++ b/nvtaskset.c @@ -359,16 +359,34 @@ static error_t arg_parser(int key, char* arg, struct argp_state *state){ else printf("PID %d's current TPC affinity mask: 0x%.0lx%016lx\n", target_pid, (uint64_t)(enable_mask >> 64), (uint64_t)enable_mask); } else if (is_cmd) { + if (!getenv("CUDA_MPS_PIPE_DIRECTORY")) { + // Pipe directory is not set by default on L4T aarch64 + putenv("CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps"); + } // start MPS (as needed) if (!libsmctrl_is_mps_running()) { fprintf(stderr, "nvtaskset: MPS control deamon does not appear to be running. Automatically starting...\n"); + // TODO: Mute the error message if this command isn't found? int ret = system("nvidia-cuda-mps-control -d"); + // TODO: Fall back to full x86_64 install location? + // Fall back to full L4T aarch64 install location + if (ret == 0x7f00) { + // nvidia-cuda-mps-control needs nvidia-cuda-mps-server to be on PATH + char *old_path = getenv("PATH"); + char *new_path; + if (old_path) + asprintf(&new_path, "PATH=/usr/local/cuda/compat/:%s", old_path); + else + new_path = "PATH=/usr/local/cuda/compat/"; + putenv(new_path); + ret = system("nvidia-cuda-mps-control -d"); + // TODO: Put this warning after error checking + fprintf(stderr, "nvtaskset: Warning: Set the CUDA_MPS_PIPE_DIRECTORY environment variable to /tmp/nvidia-mps to ensure that subsequently launched tasks associate with MPS on L4T systems!\n"); + } if (ret == -1) error(1, errno, "Unable to run subshell to start MPS"); - if (ret == 1) { - fprintf(stderr, "nvtaskset: Error starting MPS control deamon. Terminating...\n"); - return 1; - } + else if (ret) + error(1, 0, "Error starting MPS control deamon. Terminating..."); fprintf(stderr, "nvtaskset: Done. Use \"echo quit | nvidia-cuda-mps-control\" to terminate it later as desired.\n"); } // launch subprocess @@ -377,6 +395,7 @@ static error_t arg_parser(int key, char* arg, struct argp_state *state){ snprintf(mask_str, 36, "~0x%.0lx%016lx", (uint64_t)(mask >> 64), (uint64_t)mask); setenv("LIBSMCTRL_MASK", mask_str, 1); // Start task + // TODO: Check that the loader is configured to find the corrrect libcuda.so.1 execvp(sub_argv[0], sub_argv); error(1, errno, "Unable to launch task '%s'", sub_argv[0]); } else { -- cgit v1.2.2