aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Bakita <jbakita@cs.unc.edu>2024-12-18 15:37:27 -0500
committerJoshua Bakita <jbakita@cs.unc.edu>2024-12-18 15:49:48 -0500
commitde331f42ab6efd7739c9d02594bc796421779a5f (patch)
treee1a046b053c0083902e64f5d25d37231627bde7a
parent2ad0e819a9a9652f6afc0b6da4d70a1232c124d7 (diff)
Fix a potential bug with stream masking on CUDA 12.6 on aarch64 Jetson
Commit 3f9bda39 made an error by using the pre-CUDA-12 mask structure layout on CUDA 12.6 on aarch64 Jetson. Switch to the CUDA 12+ layout (as used on x86_64). Tests work either way on the Jetson Orin, so this change is not strictly required, but seems advisable to support potenital large (PCIe-attached?) GPUs on Jetson/DRIVE platforms.
-rw-r--r--libsmctrl.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/libsmctrl.c b/libsmctrl.c
index 7202572..7debe3a 100644
--- a/libsmctrl.c
+++ b/libsmctrl.c
@@ -421,7 +421,7 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) {
421 hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_11_4_MASK_OFF_JETSON); 421 hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_11_4_MASK_OFF_JETSON);
422 break; 422 break;
423 case 12060: 423 case 12060:
424 hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_12_6_MASK_OFF_JETSON); 424 hw_mask_v2 = (void*)(stream_struct_base + CU_12_6_MASK_OFF_JETSON);
425 break; 425 break;
426#endif 426#endif
427 } 427 }