aboutsummaryrefslogtreecommitdiffstats
path: root/libsmctrl.c
diff options
context:
space:
mode:
Diffstat (limited to 'libsmctrl.c')
-rw-r--r--libsmctrl.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index dfd71b8..f932b5f 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -2,6 +2,22 @@
2 * Copyright 2023 Joshua Bakita 2 * Copyright 2023 Joshua Bakita
3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug 3 * Library to control SM masks on CUDA launches. Co-opts preexisting debug
4 * logic in the CUDA driver library, and thus requires a build with -lcuda. 4 * logic in the CUDA driver library, and thus requires a build with -lcuda.
5 *
6 * This file implements partitioning via three different mechanisms:
7 * - Modifying the QMD/TMD immediately prior to upload
8 * - Changing a field in CUDA's global struct that CUDA applies to the QMD/TMD
9 * - Changing a field in CUDA's stream struct that CUDA applies to the QMD/TMD
10 * This table shows the mechanism used with each CUDA version:
11 * +-----------+---------------+---------------+--------------+
12 * | Version | Global Mask | Stream Mask | Next Mask |
13 * +-----------+---------------+---------------+--------------+
14 * | 11.0-12.2 | TMD/QMD Hook | stream struct | TMD/QMD Hook |
15 * | 10.2 | global struct | stream struct | N/A |
16 * | 8.0-10.1 | N/A | stream struct | N/A |
17 * +-----------+---------------+---------------+--------------+
18 * "N/A" indicates that a mask type is unsupported on that CUDA version.
19 * Please contact the authors if support is needed for a particular feature on
20 * an older CUDA version. Support for those is unimplemented, not impossible.
5 */ 21 */
6#include <cuda.h> 22#include <cuda.h>
7 23
@@ -12,6 +28,8 @@
12#include <stdio.h> 28#include <stdio.h>
13#include <unistd.h> 29#include <unistd.h>
14 30
31#include <dlfcn.h>
32
15// In functions that do not return an error code, we favor terminating with an 33// In functions that do not return an error code, we favor terminating with an
16// error rather than merely printing a warning and continuing. 34// error rather than merely printing a warning and continuing.
17#define abort(ret, errno, ...) error_at_line(ret, errno, __FILE__, __LINE__, \ 35#define abort(ret, errno, ...) error_at_line(ret, errno, __FILE__, __LINE__, \
@@ -49,8 +67,12 @@ static void setup_g_sm_control_10() {
49 // `cudbgReportDriverApiErrorFlags` as our reference point. (This ends 67 // `cudbgReportDriverApiErrorFlags` as our reference point. (This ends
50 // up being the closest to an intermediate table we use as part of our 68 // up being the closest to an intermediate table we use as part of our
51 // lookup---process discussed below.) 69 // lookup---process discussed below.)
52 extern uint32_t cudbgReportDriverApiErrorFlags; 70 //
53 uint32_t* sym = &cudbgReportDriverApiErrorFlags; 71 // Unfortunately, the symbol we reference is errantly omitted from the
72 // libcuda.so stub used by nvcc starting around CUDA 11.8, so we have to
73 // use dlsym to avoid build-time issues.
74 void* hndl = dlopen(NULL, RTLD_LAZY);
75 uint32_t* sym = dlsym(hndl, "cudbgReportDriverApiErrorFlags");
54 76
55 // == Deriving Location: 77 // == Deriving Location:
56 // The number of CUDA devices available is co-located in the same CUDA 78 // The number of CUDA devices available is co-located in the same CUDA