aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-11-29 17:40:45 -0500
committerJoshua Bakita <bakitajoshua@gmail.com>2023-11-29 17:40:45 -0500
commit3ee974590403730f2fea911a2574d335cedc4fab (patch)
treeb2ff669a50e743a036e6d1187c50586886119e49
parentaba56610404c90143f7837aadfd19b769caf5460 (diff)
Build on CUDA 11.8+; Adds libdl dependency
nvcc links against a stub version of libcuda.so by default which is missing a required symbol starting around CUDA 11.8. Use libdl to resolve the symbol at runtime instead.
-rw-r--r--Makefile2
-rw-r--r--libsmctrl.c26
2 files changed, 25 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index cfbd971..6abc124 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
2NVCC ?= nvcc 2NVCC ?= nvcc
3# -fPIC is needed in all cases, as we may be linked into another shared library 3# -fPIC is needed in all cases, as we may be linked into another shared library
4CFLAGS = -fPIC 4CFLAGS = -fPIC
5LDFLAGS = -lcuda -I/usr/local/cuda/include 5LDFLAGS = -lcuda -I/usr/local/cuda/include -ldl
6 6
7.PHONY: clean tests 7.PHONY: clean tests
8 8
diff --git a/libsmctrl.c b/libsmctrl.c
index dfd71b8..f932b5f 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -2,6 +2,22 @@
2 * Copyright 2023 Joshua Bakita 2 * Copyright 2023 Joshua Bakita
3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug 3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug
4 * logic in the CUDA driver library, and thus requires a build with -lcuda. 4 * logic in the CUDA driver library, and thus requires a build with -lcuda.
5 *
6 * This file implements partitioning via three different mechanisms:
7 * - Modifying the QMD/TMD immediately prior to upload
8 * - Changing a field in CUDA's global struct that CUDA applies to the QMD/TMD
9 * - Changing a field in CUDA's stream struct that CUDA applies to the QMD/TMD
10 * This table shows the mechanism used with each CUDA version:
11 * +-----------+---------------+---------------+--------------+
12 * | Version | Global Mask | Stream Mask | Next Mask |
13 * +-----------+---------------+---------------+--------------+
14 * | 11.0-12.2 | TMD/QMD Hook | stream struct | TMD/QMD Hook |
15 * | 10.2 | global struct | stream struct | N/A |
16 * | 8.0-10.1 | N/A | stream struct | N/A |
17 * +-----------+---------------+---------------+--------------+
18 * "N/A" indicates that a mask type is unsupported on that CUDA version.
19 * Please contact the authors if support is needed for a particular feature on
20 * an older CUDA version. Support for those is unimplemented, not impossible.
5 */ 21 */
6#include <cuda.h> 22#include <cuda.h>
7 23
@@ -12,6 +28,8 @@
12#include <stdio.h> 28#include <stdio.h>
13#include <unistd.h> 29#include <unistd.h>
14 30
31#include <dlfcn.h>
32
15// In functions that do not return an error code, we favor terminating with an 33// In functions that do not return an error code, we favor terminating with an
16// error rather than merely printing a warning and continuing. 34// error rather than merely printing a warning and continuing.
17#define abort(ret, errno, ...) error_at_line(ret, errno, __FILE__, __LINE__, \ 35#define abort(ret, errno, ...) error_at_line(ret, errno, __FILE__, __LINE__, \
@@ -49,8 +67,12 @@ static void setup_g_sm_control_10() {
49 // `cudbgReportDriverApiErrorFlags` as our reference point. (This ends 67 // `cudbgReportDriverApiErrorFlags` as our reference point. (This ends
50 // up being the closest to an intermediate table we use as part of our 68 // up being the closest to an intermediate table we use as part of our
51 // lookup---process discussed below.) 69 // lookup---process discussed below.)
52 extern uint32_t cudbgReportDriverApiErrorFlags; 70 //
53 uint32_t* sym = &cudbgReportDriverApiErrorFlags; 71 // Unfortunately, the symbol we reference is errantly omitted from the
72 // libcuda.so stub used by nvcc starting around CUDA 11.8, so we have to
73 // use dlsym to avoid build-time issues.
74 void* hndl = dlopen(NULL, RTLD_LAZY);
75 uint32_t* sym = dlsym(hndl, "cudbgReportDriverApiErrorFlags");
54 76
55 // == Deriving Location: 77 // == Deriving Location:
56 // The number of CUDA devices available is co-located in the same CUDA 78 // The number of CUDA devices available is co-located in the same CUDA