aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-02-14 15:36:25 -0500
committerJoshua Bakita <jbakita@cs.unc.edu>2024-02-14 15:36:25 -0500
commitb5281f5fc01fc925898c9323edab41b817df8661 (patch)
tree6a14c270c7a3bb061bd167d87430d3dd9f3696ec
parent973b919cfe6d05fdb3b82f538b1afbc3233a7008 (diff)
Add test that higher-granularity masks override lower-granularity ones
Stream-level masks should always override globally-set masks. Next-kernel masks should always override both stream-level masks and globally-set masks. Tests reveal an issue with the next-kernel mask not overriding the stream mask on CUDA 11.0+. CUDA appears to apply the per-stream mask to the QMD/TMD after `launchCallback()` is triggered, making it impossible to override as currently implemented.
-rw-r--r--.gitignore2
-rw-r--r--Makefile11
-rw-r--r--README.md7
-rw-r--r--libsmctrl_test_mask_shared.cu9
-rw-r--r--libsmctrl_test_mask_shared.h2
-rw-r--r--libsmctrl_test_next_mask.c4
-rw-r--r--libsmctrl_test_next_mask_override.c10
-rw-r--r--libsmctrl_test_stream_mask.c4
-rw-r--r--libsmctrl_test_stream_mask_override.c10
9 files changed, 48 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore
index 553e0fc..7c52826 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,7 +4,9 @@ libsmctrl.so
4libsmctrl_test_gpc_info 4libsmctrl_test_gpc_info
5libsmctrl_test_global_mask 5libsmctrl_test_global_mask
6libsmctrl_test_stream_mask 6libsmctrl_test_stream_mask
7libsmctrl_test_stream_mask_override
7libsmctrl_test_next_mask 8libsmctrl_test_next_mask
9libsmctrl_test_next_mask_override
8*.pyc 10*.pyc
9*.o 11*.o
10.gdb_history 12.gdb_history
diff --git a/Makefile b/Makefile
index b91f6b9..dcf44e4 100644
--- a/Makefile
+++ b/Makefile
@@ -26,12 +26,19 @@ libsmctrl_test_global_mask: libsmctrl_test_global_mask.c libsmctrl.a libsmctrl_t
26libsmctrl_test_stream_mask: libsmctrl_test_stream_mask.c libsmctrl.a libsmctrl_test_mask_shared.o 26libsmctrl_test_stream_mask: libsmctrl_test_stream_mask.c libsmctrl.a libsmctrl_test_mask_shared.o
27 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS) 27 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS)
28 28
29libsmctrl_test_stream_mask_override: libsmctrl_test_stream_mask_override.c libsmctrl.a libsmctrl_test_mask_shared.o
30 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS)
31
29libsmctrl_test_next_mask: libsmctrl_test_next_mask.c libsmctrl.a libsmctrl_test_mask_shared.o 32libsmctrl_test_next_mask: libsmctrl_test_next_mask.c libsmctrl.a libsmctrl_test_mask_shared.o
30 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS) 33 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS)
31 34
32tests: libsmctrl_test_gpc_info libsmctrl_test_global_mask libsmctrl_test_stream_mask libsmctrl_test_next_mask 35libsmctrl_test_next_mask_override: libsmctrl_test_next_mask_override.c libsmctrl.a libsmctrl_test_mask_shared.o
36 $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS)
37
38tests: libsmctrl_test_gpc_info libsmctrl_test_global_mask libsmctrl_test_stream_mask libsmctrl_test_stream_mask_override libsmctrl_test_next_mask libsmctrl_test_next_mask_override
33 39
34clean: 40clean:
35 rm -f libsmctrl.so libsmctrl.a libsmctrl_test_gpu_info \ 41 rm -f libsmctrl.so libsmctrl.a libsmctrl_test_gpu_info \
36 libsmctrl_test_mask_shared.o libmsctrl_test_global_mask \ 42 libsmctrl_test_mask_shared.o libmsctrl_test_global_mask \
37 libsmctrl_test_stream_mask libmsctrl_test_next_mask 43 libsmctrl_test_stream_mask libmsctrl_test_stream_mask_override \
44 libsmctrl_test_next_mask libmsctrl_test_next_mask_override
diff --git a/README.md b/README.md
index 705f2b6..6e7c75d 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,8 @@ And if `nvdebug` has been installed:
44 44
45#### Known Issues 45#### Known Issues
46 46
47- `next_mask` will not override `stream_mask` on CUDA 12.0+ 47- `next_mask` will not override `stream_mask` on CUDA 11.0+
48 - _As of Feb 2024, a fix for this is coming soon..._
48- `global_mask` and `next_mask` cannot disable TPCs with IDs above 128 49- `global_mask` and `next_mask` cannot disable TPCs with IDs above 128
49 - Only relevant on GPUs with over 128 TPCs, such as the RTX 6000 Ada 50 - Only relevant on GPUs with over 128 TPCs, such as the RTX 6000 Ada
50- Untested on H100 (compute capability 9.0) 51- Untested on H100 (compute capability 9.0)
@@ -74,7 +75,3 @@ How this works:
743. If the test succeeded (returned zero) the loop aborts, otherwise it increments the offset to attempt and repeats. 753. If the test succeeded (returned zero) the loop aborts, otherwise it increments the offset to attempt and repeats.
75 76
76Once this loop aborts, take the found offset and add it into the switch statement for the appropriate CUDA version and CPU architecture. 77Once this loop aborts, take the found offset and add it into the switch statement for the appropriate CUDA version and CPU architecture.
77
78## TODO
79
80- Add a test to check that more-granularly-set compute masks override more-corsely-set ones.
diff --git a/libsmctrl_test_mask_shared.cu b/libsmctrl_test_mask_shared.cu
index c3817fa..f3e8933 100644
--- a/libsmctrl_test_mask_shared.cu
+++ b/libsmctrl_test_mask_shared.cu
@@ -87,9 +87,18 @@ int test_constrained_size_and_location(enum partitioning_type part_type) {
87 case PARTITION_STREAM: 87 case PARTITION_STREAM:
88 libsmctrl_set_stream_mask_ext(stream, mask); 88 libsmctrl_set_stream_mask_ext(stream, mask);
89 break; 89 break;
90 case PARTITION_STREAM_OVERRIDE:
91 libsmctrl_set_global_mask(~mask);
92 libsmctrl_set_stream_mask_ext(stream, mask);
93 break;
90 case PARTITION_NEXT: 94 case PARTITION_NEXT:
91 libsmctrl_set_next_mask(mask); 95 libsmctrl_set_next_mask(mask);
92 break; 96 break;
97 case PARTITION_NEXT_OVERRIDE:
98 libsmctrl_set_global_mask(~mask);
99 libsmctrl_set_stream_mask_ext(stream, ~mask);
100 libsmctrl_set_next_mask(mask);
101 break;
93 default: 102 default:
94 error(1, 0, "Shared test core called with unrecognized partitioning type."); 103 error(1, 0, "Shared test core called with unrecognized partitioning type.");
95 } 104 }
diff --git a/libsmctrl_test_mask_shared.h b/libsmctrl_test_mask_shared.h
index f95757d..5a115b7 100644
--- a/libsmctrl_test_mask_shared.h
+++ b/libsmctrl_test_mask_shared.h
@@ -6,7 +6,9 @@ extern "C" {
6enum partitioning_type { 6enum partitioning_type {
7 PARTITION_GLOBAL, 7 PARTITION_GLOBAL,
8 PARTITION_STREAM, 8 PARTITION_STREAM,
9 PARTITION_STREAM_OVERRIDE,
9 PARTITION_NEXT, 10 PARTITION_NEXT,
11 PARTITION_NEXT_OVERRIDE,
10}; 12};
11 13
12extern int test_constrained_size_and_location(enum partitioning_type part_type); 14extern int test_constrained_size_and_location(enum partitioning_type part_type);
diff --git a/libsmctrl_test_next_mask.c b/libsmctrl_test_next_mask.c
index 7faa9e5..2e4a36a 100644
--- a/libsmctrl_test_next_mask.c
+++ b/libsmctrl_test_next_mask.c
@@ -1,5 +1,5 @@
1// Copyright 2023 Joshua Bakita 1// Copyright 2024 Joshua Bakita
2// Test libsmctrl_set_global_mask(). 2// Test libsmctrl_set_next_mask().
3// All types of partitioning use the same test, so this file is trival. 3// All types of partitioning use the same test, so this file is trival.
4 4
5#include "libsmctrl_test_mask_shared.h" 5#include "libsmctrl_test_mask_shared.h"
diff --git a/libsmctrl_test_next_mask_override.c b/libsmctrl_test_next_mask_override.c
new file mode 100644
index 0000000..6e2f9d7
--- /dev/null
+++ b/libsmctrl_test_next_mask_override.c
@@ -0,0 +1,10 @@
1// Copyright 2024 Joshua Bakita
2// Test that libsmctrl_set_next_mask() overrides the global and stream masks.
3// All types of partitioning use the same test, so this file is trival.
4
5#include "libsmctrl_test_mask_shared.h"
6
7int main() {
8 return test_constrained_size_and_location(PARTITION_NEXT_OVERRIDE);
9}
10
diff --git a/libsmctrl_test_stream_mask.c b/libsmctrl_test_stream_mask.c
index 063f934..d8895df 100644
--- a/libsmctrl_test_stream_mask.c
+++ b/libsmctrl_test_stream_mask.c
@@ -1,5 +1,5 @@
1// Copyright 2023 Joshua Bakita 1// Copyright 2024 Joshua Bakita
2// Test libsmctrl_set_global_mask(). 2// Test libsmctrl_set_stream_mask_ext().
3// All types of partitioning use the same test, so this file is trival. 3// All types of partitioning use the same test, so this file is trival.
4 4
5#include "libsmctrl_test_mask_shared.h" 5#include "libsmctrl_test_mask_shared.h"
diff --git a/libsmctrl_test_stream_mask_override.c b/libsmctrl_test_stream_mask_override.c
new file mode 100644
index 0000000..5098d17
--- /dev/null
+++ b/libsmctrl_test_stream_mask_override.c
@@ -0,0 +1,10 @@
1// Copyright 2024 Joshua Bakita
2// Test that libsmctrl_set_stream_mask_ext() overrides the global mask.
3// All types of partitioning use the same test, so this file is trival.
4
5#include "libsmctrl_test_mask_shared.h"
6
7int main() {
8 return test_constrained_size_and_location(PARTITION_STREAM_OVERRIDE);
9}
10