diff options
author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-11-29 18:00:31 -0500 |
---|---|---|
committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-11-29 18:24:25 -0500 |
commit | 8062646a185baa6d3934d1e19743ac671e943fa8 (patch) | |
tree | e00b4a15b143262c6a5865393cd8a6d94116d50b | |
parent | 3c075c8f71a7c85d735018143fc13a6eb91813eb (diff) |
Add a README and tests for stream masking and next masking
Also rewrite the global masking test to be much more thorough.
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | README.md | 80 | ||||
-rw-r--r-- | libsmctrl_test_global_mask.c | 10 | ||||
-rw-r--r-- | libsmctrl_test_global_mask.cu | 103 | ||||
-rw-r--r-- | libsmctrl_test_mask_shared.cu | 140 | ||||
-rw-r--r-- | libsmctrl_test_mask_shared.h | 16 | ||||
-rw-r--r-- | libsmctrl_test_next_mask.c | 10 | ||||
-rw-r--r-- | libsmctrl_test_stream_mask.c | 10 | ||||
-rw-r--r-- | testbench.h | 31 |
10 files changed, 317 insertions, 108 deletions
@@ -2,4 +2,9 @@ libsmctrl.a | |||
2 | libsmctrl.o | 2 | libsmctrl.o |
3 | libsmctrl.so | 3 | libsmctrl.so |
4 | libsmctrl_test_gpc_info | 4 | libsmctrl_test_gpc_info |
5 | libsmctrl_test_global_mask | ||
6 | libsmctrl_test_stream_mask | ||
7 | libsmctrl_test_next_mask | ||
5 | *.pyc | 8 | *.pyc |
9 | *.o | ||
10 | .gdb_history | ||
@@ -14,14 +14,24 @@ libsmctrl.a: libsmctrl.c libsmctrl.h | |||
14 | ar rcs $@ libsmctrl.o | 14 | ar rcs $@ libsmctrl.o |
15 | 15 | ||
16 | # Use static linking with tests to avoid LD_LIBRARY_PATH issues | 16 | # Use static linking with tests to avoid LD_LIBRARY_PATH issues |
17 | libsmctrl_test_gpc_info: libsmctrl_test_gpc_info.c libsmctrl.a | 17 | libsmctrl_test_gpc_info: libsmctrl_test_gpc_info.c libsmctrl.a testbench.h |
18 | $(CC) $< -o $@ -g -L. -l:libsmctrl.a $(LDFLAGS) | 18 | $(CC) $< -o $@ -g -L. -l:libsmctrl.a $(LDFLAGS) |
19 | 19 | ||
20 | libsmctrl_test_global_mask: libsmctrl_test_global_mask.cu libsmctrl.a | 20 | libsmctrl_test_mask_shared.o: libsmctrl_test_mask_shared.cu testbench.h |
21 | $(NVCC) $< -o $@ -g -L. -l:libsmctrl.a $(LDFLAGS) | 21 | $(NVCC) $< -c -g |
22 | 22 | ||
23 | tests: libsmctrl_test_gpc_info libsmctrl_test_global_mask | 23 | libsmctrl_test_global_mask: libsmctrl_test_global_mask.c libsmctrl.a libsmctrl_test_mask_shared.o |
24 | $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS) | ||
25 | |||
26 | libsmctrl_test_stream_mask: libsmctrl_test_stream_mask.c libsmctrl.a libsmctrl_test_mask_shared.o | ||
27 | $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS) | ||
28 | |||
29 | libsmctrl_test_next_mask: libsmctrl_test_next_mask.c libsmctrl.a libsmctrl_test_mask_shared.o | ||
30 | $(NVCC) $@.c -o $@ libsmctrl_test_mask_shared.o -g -L. -l:libsmctrl.a $(LDFLAGS) | ||
31 | |||
32 | tests: libsmctrl_test_gpc_info libsmctrl_test_global_mask libsmctrl_test_stream_mask libsmctrl_test_next_mask | ||
24 | 33 | ||
25 | clean: | 34 | clean: |
26 | rm -f libsmctrl.so libsmctrl.a libsmctrl_test_gpu_info \ | 35 | rm -f libsmctrl.so libsmctrl.a libsmctrl_test_gpu_info \ |
27 | libsmctrl_test_global_mask | 36 | libsmctrl_test_mask_shared.o libmsctrl_test_global_mask \ |
37 | libsmctrl_test_stream_mask libmsctrl_test_next_mask | ||
diff --git a/README.md b/README.md new file mode 100644 index 0000000..705f2b6 --- /dev/null +++ b/README.md | |||
@@ -0,0 +1,80 @@ | |||
1 | # libsmctrl: Quick & Easy Hardware Compute Partitioning on NVIDIA GPUs | ||
2 | |||
3 | This library was developed as part of the following paper: | ||
4 | |||
5 | _J. Bakita and J. H. Anderson, "Hardware Compute Partitioning on NVIDIA GPUs", Proceedings of the 29th IEEE Real-Time and Embedded Technology and Applications Symposium, pp. 54-66, May 2023._ | ||
6 | |||
7 | Please cite this paper in any work which leverages our library. Here's the BibTeX entry: | ||
8 | ``` | ||
9 | @inproceedings{bakita2023hardware, | ||
10 | title={Hardware Compute Partitioning on {NVIDIA} {GPUs}}, | ||
11 | author={Bakita, Joshua and Anderson, James H}, | ||
12 | booktitle={Proceedings of the 29th IEEE Real-Time and Embedded Technology and Applications Symposium}, | ||
13 | year={2023}, | ||
14 | month={May}, | ||
15 | pages={54--66}, | ||
16 | _series={RTAS} | ||
17 | } | ||
18 | ``` | ||
19 | |||
20 | Please see [the paper](https://www.cs.unc.edu/~jbakita/rtas23.pdf) and libsmctrl.h for details and examples of how to use this library. | ||
21 | We strongly encourage consulting those resources first; the below comments serve merely as an appendum. | ||
22 | |||
23 | ## Run Tests | ||
24 | To test partitioning: | ||
25 | ``` | ||
26 | make tests | ||
27 | ./libsmctrl_test_global_mask | ||
28 | ./libsmctrl_test_stream_mask | ||
29 | ./libsmctrl_test_next_mask | ||
30 | ``` | ||
31 | |||
32 | And if `nvdebug` has been installed: | ||
33 | ``` | ||
34 | ./libsmctrl_test_gpu_info | ||
35 | ``` | ||
36 | |||
37 | ## Supported GPUs | ||
38 | |||
39 | #### Known Working | ||
40 | |||
41 | - NVIDIA GPUs from compute capability 3.5 through 8.9, including embedded "Jetson" GPUs | ||
42 | - CUDA 8.1 through 12.2 | ||
43 | - `x86_64` and Jetson `aarch64` platforms | ||
44 | |||
45 | #### Known Issues | ||
46 | |||
47 | - `next_mask` will not override `stream_mask` on CUDA 12.0+ | ||
48 | - `global_mask` and `next_mask` cannot disable TPCs with IDs above 128 | ||
49 | - Only relevant on GPUs with over 128 TPCs, such as the RTX 6000 Ada | ||
50 | - Untested on H100 (compute capability 9.0) | ||
51 | - Untested on non-Jetson `aarch64` platforms | ||
52 | |||
53 | ## Important Limitations | ||
54 | |||
55 | 1. Only supports partitioning _within_ a single GPU context. | ||
56 | At time of writing, it is challenging to impossible to share a GPU context across multiple CPU address spaces. | ||
57 | The implication is that your applications must first be combined together into a single CPU process. | ||
58 | 2. No aspect of this system prevents implicit synchronization on the GPU. | ||
59 | See prior work, particularly that of Amert et al. (perhaps the CUPiD^RT paper), for ways to avoid this. | ||
60 | |||
61 | ## Porting to New Architectures | ||
62 | |||
63 | Build the tests with `make tests`. And then run the following: | ||
64 | ``` | ||
65 | for (( i=0; $?!=0; i+=8 )); do MASK_OFF=$i ./libsmctrl_test_stream_mask; done | ||
66 | ``` | ||
67 | |||
68 | How this works: | ||
69 | |||
70 | 1. If `MASK_OFF` is set, `libsmctrl` applies this as a byte offset to a base address for the location | ||
71 | of the SM mask fields in CUDA's stream data structure. | ||
72 | - That base address is the one for CUDA 12.2 at time of writing | ||
73 | 2. The stream masking test is run. | ||
74 | 3. If the test succeeded (returned zero) the loop aborts, otherwise it increments the offset to attempt and repeats. | ||
75 | |||
76 | Once this loop aborts, take the found offset and add it into the switch statement for the appropriate CUDA version and CPU architecture. | ||
77 | |||
78 | ## TODO | ||
79 | |||
80 | - Add a test to check that more-granularly-set compute masks override more-corsely-set ones. | ||
diff --git a/libsmctrl_test_global_mask.c b/libsmctrl_test_global_mask.c new file mode 100644 index 0000000..9e3189b --- /dev/null +++ b/libsmctrl_test_global_mask.c | |||
@@ -0,0 +1,10 @@ | |||
1 | // Copyright 2023 Joshua Bakita | ||
2 | // Test libsmctrl_set_global_mask(). | ||
3 | // All types of partitioning use the same test, so this file is trival. | ||
4 | |||
5 | #include "libsmctrl_test_mask_shared.h" | ||
6 | |||
7 | int main() { | ||
8 | return test_constrained_size_and_location(PARTITION_GLOBAL); | ||
9 | } | ||
10 | |||
diff --git a/libsmctrl_test_global_mask.cu b/libsmctrl_test_global_mask.cu deleted file mode 100644 index f6a487f..0000000 --- a/libsmctrl_test_global_mask.cu +++ /dev/null | |||
@@ -1,103 +0,0 @@ | |||
1 | #include <error.h> | ||
2 | #include <errno.h> | ||
3 | #include <stdio.h> | ||
4 | #include <stdbool.h> | ||
5 | #include <cuda_runtime.h> | ||
6 | |||
7 | #include "libsmctrl.h" | ||
8 | #include "testbench.h" | ||
9 | |||
10 | __global__ void read_and_store_smid(uint8_t* smid_arr) { | ||
11 | if (threadIdx.x != 1) | ||
12 | return; | ||
13 | int smid; | ||
14 | asm("mov.u32 %0, %%smid;" : "=r"(smid)); | ||
15 | smid_arr[blockIdx.x] = smid; | ||
16 | } | ||
17 | |||
18 | // Assuming SMs continue to support a maximum of 2048 resident threads, six | ||
19 | // blocks of 1024 threads should span at least three SMs without partitioning | ||
20 | #define NUM_BLOCKS 6 | ||
21 | |||
22 | int sort_asc(const void* a, const void* b) { | ||
23 | return *(uint8_t*)a - *(uint8_t*)b; | ||
24 | } | ||
25 | |||
26 | // Warning: Mutates input array via qsort | ||
27 | int count_unique(uint8_t* arr, int len) { | ||
28 | qsort(arr, len, 1, sort_asc); | ||
29 | int num_uniq = 1; | ||
30 | for (int i = 0; i < len - 1; i++) | ||
31 | num_uniq += (arr[i] != arr[i + 1]); | ||
32 | return num_uniq; | ||
33 | } | ||
34 | |||
35 | int main() { | ||
36 | cudaError_t err; // Needed by SAFE() macro | ||
37 | int res; | ||
38 | uint8_t *smids_native_d, *smids_native_h; | ||
39 | uint8_t *smids_partitioned_d, *smids_partitioned_h; | ||
40 | int uniq_native, uniq_partitioned; | ||
41 | uint32_t num_tpcs; | ||
42 | int num_sms, sms_per_tpc; | ||
43 | |||
44 | // Determine number of SMs per TPC | ||
45 | SAFE(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, 0)); | ||
46 | if (res = libsmctrl_get_tpc_info_cuda(&num_tpcs, 0)) | ||
47 | error(1, res, "libsmctrl_test_global: Unable to get TPC configuration for test"); | ||
48 | sms_per_tpc = num_sms/num_tpcs; | ||
49 | |||
50 | // Test baseline (native) behavior without partitioning | ||
51 | SAFE(cudaMalloc(&smids_native_d, NUM_BLOCKS)); | ||
52 | if (!(smids_native_h = (uint8_t*)malloc(NUM_BLOCKS))) | ||
53 | error(1, errno, "libsmctrl_test_global: Unable to allocate memory for test"); | ||
54 | read_and_store_smid<<<NUM_BLOCKS, 1024>>>(smids_native_d); | ||
55 | SAFE(cudaMemcpy(smids_native_h, smids_native_d, NUM_BLOCKS, cudaMemcpyDeviceToHost)); | ||
56 | |||
57 | uniq_native = count_unique(smids_native_h, NUM_BLOCKS); | ||
58 | if (uniq_native < sms_per_tpc) { | ||
59 | printf("libsmctrl_test_global: ***Test failure.***\n" | ||
60 | "libsmctrl_test_global: Reason: In baseline test, %d blocks of 1024 " | ||
61 | "threads were launched on the GPU, but only %d SMs were utilized, " | ||
62 | "when it was expected that at least %d would be used.\n", NUM_BLOCKS, | ||
63 | uniq_native, sms_per_tpc); | ||
64 | return 1; | ||
65 | } | ||
66 | |||
67 | // Verify that partitioning changes the SMID distribution | ||
68 | libsmctrl_set_global_mask(~0x1); // Enable only one TPC | ||
69 | SAFE(cudaMalloc(&smids_partitioned_d, NUM_BLOCKS)); | ||
70 | if (!(smids_partitioned_h = (uint8_t*)malloc(NUM_BLOCKS))) | ||
71 | error(1, errno, "libsmctrl_test_global: Unable to allocate memory for test"); | ||
72 | read_and_store_smid<<<NUM_BLOCKS, 1024>>>(smids_partitioned_d); | ||
73 | SAFE(cudaMemcpy(smids_partitioned_h, smids_partitioned_d, NUM_BLOCKS, cudaMemcpyDeviceToHost)); | ||
74 | |||
75 | // Make sure it only ran on the number of TPCs provided | ||
76 | // May run on up to two SMs, as up to two per TPC | ||
77 | uniq_partitioned = count_unique(smids_partitioned_h, NUM_BLOCKS); | ||
78 | if (uniq_partitioned > sms_per_tpc) { | ||
79 | printf("libsmctrl_test_global: ***Test failure.***\n" | ||
80 | "libsmctrl_test_global: Reason: With global TPC mask set to " | ||
81 | "constrain all kernels to a single TPC, a kernel of %d blocks of " | ||
82 | "1024 threads was launched and found to run on %d SMs (at most %d---" | ||
83 | "one TPC---expected).\n", NUM_BLOCKS, uniq_partitioned, sms_per_tpc); | ||
84 | return 1; | ||
85 | } | ||
86 | |||
87 | // Make sure it ran on the right TPC | ||
88 | if (smids_partitioned_h[NUM_BLOCKS - 1] > sms_per_tpc - 1) { | ||
89 | printf("libsmctrl_test_global: ***Test failure.***\n" | ||
90 | "libsmctrl_test_global: Reason: With global TPC mask set to" | ||
91 | "constrain all kernels to the first TPC, a kernel was run and found " | ||
92 | "to run on an SM ID as high as %d (max of %d expected).\n", | ||
93 | smids_partitioned_h[NUM_BLOCKS - 1], sms_per_tpc - 1); | ||
94 | return 1; | ||
95 | } | ||
96 | |||
97 | printf("libsmctrl_test_global: Test passed!\n" | ||
98 | "libsmctrl_test_global: Reason: With a global partition enabled which " | ||
99 | "contained only TPC ID 0, the test kernel was found to use only %d " | ||
100 | "SMs (%d without), and all SMs in-use had IDs below %d (were contained" | ||
101 | " in the first TPC).\n", uniq_partitioned, uniq_native, sms_per_tpc); | ||
102 | return 0; | ||
103 | } | ||
diff --git a/libsmctrl_test_mask_shared.cu b/libsmctrl_test_mask_shared.cu new file mode 100644 index 0000000..c3817fa --- /dev/null +++ b/libsmctrl_test_mask_shared.cu | |||
@@ -0,0 +1,140 @@ | |||
1 | // Copyright 2023 Joshua Bakita | ||
2 | #include <error.h> | ||
3 | #include <errno.h> | ||
4 | #include <stdio.h> | ||
5 | #include <stdbool.h> | ||
6 | #include <cuda_runtime.h> | ||
7 | |||
8 | #include "libsmctrl.h" | ||
9 | #include "testbench.h" | ||
10 | #include "libsmctrl_test_mask_shared.h" | ||
11 | |||
12 | __global__ void read_and_store_smid(uint8_t* smid_arr) { | ||
13 | if (threadIdx.x != 1) | ||
14 | return; | ||
15 | int smid; | ||
16 | asm("mov.u32 %0, %%smid;" : "=r"(smid)); | ||
17 | smid_arr[blockIdx.x] = smid; | ||
18 | } | ||
19 | |||
20 | // Assuming SMs continue to support a maximum of 2048 resident threads, six | ||
21 | // blocks of 1024 threads should span at least three SMs without partitioning | ||
22 | #define NUM_BLOCKS 142 //6 | ||
23 | |||
24 | static int sort_asc(const void* a, const void* b) { | ||
25 | return *(uint8_t*)a - *(uint8_t*)b; | ||
26 | } | ||
27 | |||
28 | // Warning: Mutates input array via qsort | ||
29 | static int count_unique(uint8_t* arr, int len) { | ||
30 | qsort(arr, len, 1, sort_asc); | ||
31 | int num_uniq = 1; | ||
32 | for (int i = 0; i < len - 1; i++) | ||
33 | num_uniq += (arr[i] != arr[i + 1]); | ||
34 | return num_uniq; | ||
35 | } | ||
36 | |||
37 | // Test that adding an SM mask: | ||
38 | // 1. Constrains the number of SMs accessible | ||
39 | // 2. Constrains an application to the correct subset of SMs | ||
40 | int test_constrained_size_and_location(enum partitioning_type part_type) { | ||
41 | int res; | ||
42 | uint8_t *smids_native_d, *smids_native_h; | ||
43 | uint8_t *smids_partitioned_d, *smids_partitioned_h; | ||
44 | int uniq_native, uniq_partitioned; | ||
45 | uint32_t num_tpcs; | ||
46 | int num_sms, sms_per_tpc; | ||
47 | cudaStream_t stream; | ||
48 | |||
49 | SAFE(cudaStreamCreate(&stream)); | ||
50 | |||
51 | // Determine number of SMs per TPC | ||
52 | SAFE(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, 0)); | ||
53 | if (res = libsmctrl_get_tpc_info_cuda(&num_tpcs, 0)) | ||
54 | error(1, res, "Unable to get TPC configuration for test"); | ||
55 | sms_per_tpc = num_sms/num_tpcs; | ||
56 | |||
57 | // Test baseline (native) behavior without partitioning | ||
58 | SAFE(cudaMalloc(&smids_native_d, NUM_BLOCKS)); | ||
59 | if (!(smids_native_h = (uint8_t*)malloc(NUM_BLOCKS))) | ||
60 | error(1, errno, "Unable to allocate memory for test"); | ||
61 | read_and_store_smid<<<NUM_BLOCKS, 1024, 0, stream>>>(smids_native_d); | ||
62 | SAFE(cudaMemcpy(smids_native_h, smids_native_d, NUM_BLOCKS, cudaMemcpyDeviceToHost)); | ||
63 | |||
64 | uniq_native = count_unique(smids_native_h, NUM_BLOCKS); | ||
65 | if (uniq_native < sms_per_tpc) { | ||
66 | printf("%s: ***Test failure.***\n" | ||
67 | "%s: Reason: In baseline test, %d blocks of 1024 " | ||
68 | "threads were launched on the GPU, but only %d SMs were utilized, " | ||
69 | "when it was expected that at least %d would be used.\n", program_invocation_name, program_invocation_name, NUM_BLOCKS, | ||
70 | uniq_native, sms_per_tpc); | ||
71 | return 1; | ||
72 | } | ||
73 | |||
74 | // Test at 32-TPC boundaries to verify that the mask is applied in the | ||
75 | // correct order to each of the QMD/stream struct fields. | ||
76 | char* reason[4] = {0}; | ||
77 | for (int enabled_tpc = 0; enabled_tpc < num_tpcs && enabled_tpc < 128; enabled_tpc += 32) { | ||
78 | uint128_t mask = 1; | ||
79 | mask <<= enabled_tpc; | ||
80 | mask = ~mask; | ||
81 | |||
82 | // Apply partitioning to enable only the first TPC of each 32-bit block | ||
83 | switch (part_type) { | ||
84 | case PARTITION_GLOBAL: | ||
85 | libsmctrl_set_global_mask(mask); | ||
86 | break; | ||
87 | case PARTITION_STREAM: | ||
88 | libsmctrl_set_stream_mask_ext(stream, mask); | ||
89 | break; | ||
90 | case PARTITION_NEXT: | ||
91 | libsmctrl_set_next_mask(mask); | ||
92 | break; | ||
93 | default: | ||
94 | error(1, 0, "Shared test core called with unrecognized partitioning type."); | ||
95 | } | ||
96 | |||
97 | // Verify that partitioning changes the SMID distribution | ||
98 | SAFE(cudaMalloc(&smids_partitioned_d, NUM_BLOCKS)); | ||
99 | if (!(smids_partitioned_h = (uint8_t*)malloc(NUM_BLOCKS))) | ||
100 | error(1, errno, "Unable to allocate memory for test"); | ||
101 | read_and_store_smid<<<NUM_BLOCKS, 1024, 0, stream>>>(smids_partitioned_d); | ||
102 | SAFE(cudaMemcpy(smids_partitioned_h, smids_partitioned_d, NUM_BLOCKS, cudaMemcpyDeviceToHost)); | ||
103 | |||
104 | // Make sure it only ran on the number of TPCs provided | ||
105 | // May run on up to two SMs, as up to two per TPC | ||
106 | uniq_partitioned = count_unique(smids_partitioned_h, NUM_BLOCKS); // Sorts too | ||
107 | if (uniq_partitioned > sms_per_tpc) { | ||
108 | printf("%s: ***Test failure.***\n" | ||
109 | "%s: Reason: With TPC mask set to " | ||
110 | "constrain all kernels to a single TPC, a kernel of %d blocks of " | ||
111 | "1024 threads was launched and found to run on %d SMs (at most %d---" | ||
112 | "one TPC---expected).\n", program_invocation_name, program_invocation_name, NUM_BLOCKS, uniq_partitioned, sms_per_tpc); | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | // Make sure it ran on the right TPC | ||
117 | if (smids_partitioned_h[NUM_BLOCKS - 1] > (enabled_tpc * sms_per_tpc) + sms_per_tpc - 1 || | ||
118 | smids_partitioned_h[NUM_BLOCKS - 1] < (enabled_tpc * sms_per_tpc)) { | ||
119 | printf("%s: ***Test failure.***\n" | ||
120 | "%s: Reason: With TPC mask set to" | ||
121 | "constrain all kernels to TPC %d, a kernel was run and found " | ||
122 | "to run on an SM IDs: as high as %d and as low as %d (range of %d to %d expected).\n", | ||
123 | program_invocation_name, program_invocation_name, enabled_tpc, smids_partitioned_h[NUM_BLOCKS - 1], smids_partitioned_h[0], enabled_tpc * sms_per_tpc + sms_per_tpc - 1, enabled_tpc * sms_per_tpc); | ||
124 | return 1; | ||
125 | } | ||
126 | |||
127 | // Div by 32 via a shift | ||
128 | asprintf(&reason[enabled_tpc >> 5], | ||
129 | "With a partition enabled which " | ||
130 | "contained only TPC ID %d, the test kernel was found to use only %d " | ||
131 | "SMs (%d without), and all SMs in-use had IDs between %d and %d (were contained" | ||
132 | " in TPC %d).", enabled_tpc, uniq_partitioned, uniq_native, smids_partitioned_h[0], smids_partitioned_h[NUM_BLOCKS - 1], enabled_tpc); | ||
133 | } | ||
134 | |||
135 | printf("%s: Test passed!\n", program_invocation_name); | ||
136 | for (int i = 0; i < 4 && reason[i]; i++) | ||
137 | printf("%s: Reason %d: %s\n", program_invocation_name, i + 1, reason[i]); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
diff --git a/libsmctrl_test_mask_shared.h b/libsmctrl_test_mask_shared.h new file mode 100644 index 0000000..f95757d --- /dev/null +++ b/libsmctrl_test_mask_shared.h | |||
@@ -0,0 +1,16 @@ | |||
1 | // Copyright 2023 Joshua Bakita | ||
2 | #ifdef __cplusplus | ||
3 | extern "C" { | ||
4 | #endif | ||
5 | |||
6 | enum partitioning_type { | ||
7 | PARTITION_GLOBAL, | ||
8 | PARTITION_STREAM, | ||
9 | PARTITION_NEXT, | ||
10 | }; | ||
11 | |||
12 | extern int test_constrained_size_and_location(enum partitioning_type part_type); | ||
13 | |||
14 | #ifdef __cplusplus | ||
15 | } | ||
16 | #endif | ||
diff --git a/libsmctrl_test_next_mask.c b/libsmctrl_test_next_mask.c new file mode 100644 index 0000000..7faa9e5 --- /dev/null +++ b/libsmctrl_test_next_mask.c | |||
@@ -0,0 +1,10 @@ | |||
1 | // Copyright 2023 Joshua Bakita | ||
2 | // Test libsmctrl_set_global_mask(). | ||
3 | // All types of partitioning use the same test, so this file is trival. | ||
4 | |||
5 | #include "libsmctrl_test_mask_shared.h" | ||
6 | |||
7 | int main() { | ||
8 | return test_constrained_size_and_location(PARTITION_NEXT); | ||
9 | } | ||
10 | |||
diff --git a/libsmctrl_test_stream_mask.c b/libsmctrl_test_stream_mask.c new file mode 100644 index 0000000..063f934 --- /dev/null +++ b/libsmctrl_test_stream_mask.c | |||
@@ -0,0 +1,10 @@ | |||
1 | // Copyright 2023 Joshua Bakita | ||
2 | // Test libsmctrl_set_global_mask(). | ||
3 | // All types of partitioning use the same test, so this file is trival. | ||
4 | |||
5 | #include "libsmctrl_test_mask_shared.h" | ||
6 | |||
7 | int main() { | ||
8 | return test_constrained_size_and_location(PARTITION_STREAM); | ||
9 | } | ||
10 | |||
diff --git a/testbench.h b/testbench.h new file mode 100644 index 0000000..dff1211 --- /dev/null +++ b/testbench.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* Copyright 2021-2023 Joshua Bakita | ||
2 | * Header for miscellaneous experimental helper functions. | ||
3 | */ | ||
4 | |||
5 | // cudaError_t and CUResult can both safely be cast to an unsigned int | ||
6 | static __thread unsigned int __SAFE_err; | ||
7 | |||
8 | // The very strange cast in these macros is to satisfy two goals at tension: | ||
9 | // 1. This file should be able to be included in non-CUDA-using files, and thus | ||
10 | // should use no CUDA types outside of this macro. | ||
11 | // 2. We want to typecheck uses of these macros. The driver and runtime APIs | ||
12 | // do not have identical error numbers and/or meanings, so runtime library | ||
13 | // calls should use SAFE, and driver library calls should use SAFE_D. | ||
14 | // These macros allow typechecking, but keep a generic global error variable. | ||
15 | #define SAFE(x) \ | ||
16 | if ((*(cudaError_t*)(&__SAFE_err) = (x)) != 0) { \ | ||
17 | printf("(%s:%d) CUDA error %d: %s i.e. \"%s\" returned by %s. Aborting...\n", \ | ||
18 | __FILE__, __LINE__, __SAFE_err, cudaGetErrorName((cudaError_t)__SAFE_err), cudaGetErrorString((cudaError_t)__SAFE_err), #x); \ | ||
19 | exit(1); \ | ||
20 | } | ||
21 | |||
22 | #define SAFE_D(x) \ | ||
23 | if ((*(CUresult*)&(__SAFE_err) = (x)) != 0) { \ | ||
24 | const char* name; \ | ||
25 | const char* desc; \ | ||
26 | cuGetErrorName((CUresult)__SAFE_err, &name); \ | ||
27 | cuGetErrorString((CUresult)__SAFE_err, &desc); \ | ||
28 | printf("(%s:%d) CUDA error %d: %s i.e. \"%s\" returned by %s. Aborting...\n", \ | ||
29 | __FILE__, __LINE__, __SAFE_err, name, desc, #x); \ | ||
30 | exit(1); \ | ||
31 | } | ||