Build on CUDA 11.8+; Adds libdl dependency

nvcc links against a stub version of libcuda.so by default which is missing a required symbol starting around CUDA 11.8. Use libdl to resolve the symbol at runtime instead.
author: Joshua Bakita <bakitajoshua@gmail.com> 2023-11-29 17:40:45 -0500
committer: Joshua Bakita <bakitajoshua@gmail.com> 2023-11-29 17:40:45 -0500
commit: 3ee974590403730f2fea911a2574d335cedc4fab (patch)
tree: b2ff669a50e743a036e6d1187c50586886119e49
parent: aba56610404c90143f7837aadfd19b769caf5460 (diff)
2 files changed, 25 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index cfbd971..6abc124 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CC = gcc
 NVCC ?= nvcc
 # -fPIC is needed in all cases, as we may be linked into another shared library
 CFLAGS = -fPIC
-LDFLAGS = -lcuda -I/usr/local/cuda/include
+LDFLAGS = -lcuda -I/usr/local/cuda/include -ldl
 .PHONY: clean tests
diff --git a/libsmctrl.c b/libsmctrl.c
index dfd71b8..f932b5f 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -2,6 +2,22 @@
 * Copyright 2023 Joshua Bakita
 * Library to control SM masks on CUDA launches. Co-opts preexisting debug
 * logic in the CUDA driver library, and thus requires a build with -lcuda.
+ *
+ * This file implements partitioning via three different mechanisms:
+ * - Modifying the QMD/TMD immediately prior to upload
+ * - Changing a field in CUDA's global struct that CUDA applies to the QMD/TMD
+ * - Changing a field in CUDA's stream struct that CUDA applies to the QMD/TMD
+ * This table shows the mechanism used with each CUDA version:
+ *   +-----------+---------------+---------------+--------------+
+ *   |  Version  |  Global Mask  |  Stream Mask  |  Next Mask   |
+ *   +-----------+---------------+---------------+--------------+
+ *   | 11.0-12.2 | TMD/QMD Hook  | stream struct | TMD/QMD Hook |
+ *   | 10.2      | global struct | stream struct | N/A          |
+ *   | 8.0-10.1  | N/A           | stream struct | N/A          |
+ *   +-----------+---------------+---------------+--------------+
+ * "N/A" indicates that a mask type is unsupported on that CUDA version.
+ * Please contact the authors if support is needed for a particular feature on
+ * an older CUDA version. Support for those is unimplemented, not impossible.
 */
 #include <cuda.h>
@@ -12,6 +28,8 @@
 #include <stdio.h>
 #include <unistd.h>
+#include <dlfcn.h>
 // In functions that do not return an error code, we favor terminating with an
 // error rather than merely printing a warning and continuing.
 #define abort(ret, errno, ...) error_at_line(ret, errno, __FILE__, __LINE__, \
@@ -49,8 +67,12 @@ static void setup_g_sm_control_10() {
        // `cudbgReportDriverApiErrorFlags` as our reference point. (This ends
        // up being the closest to an intermediate table we use as part of our
        // lookup---process discussed below.)
-        extern uint32_t cudbgReportDriverApiErrorFlags;
+        //
-        uint32_t* sym = &cudbgReportDriverApiErrorFlags;
+        // Unfortunately, the symbol we reference is errantly omitted from the
+        // libcuda.so stub used by nvcc starting around CUDA 11.8, so we have to
+        // use dlsym to avoid build-time issues.
+        void* hndl = dlopen(NULL, RTLD_LAZY);
+        uint32_t* sym = dlsym(hndl, "cudbgReportDriverApiErrorFlags");
        // == Deriving Location:
        // The number of CUDA devices available is co-located in the same CUDA
author	Joshua Bakita <bakitajoshua@gmail.com>	2023-11-29 17:40:45 -0500
committer	Joshua Bakita <bakitajoshua@gmail.com>	2023-11-29 17:40:45 -0500
commit	3ee974590403730f2fea911a2574d335cedc4fab (patch)
tree	b2ff669a50e743a036e6d1187c50586886119e49
parent	aba56610404c90143f7837aadfd19b769caf5460 (diff)