diff options
author | Joshua Bakita <jbakita@cs.unc.edu> | 2024-12-18 15:37:27 -0500 |
---|---|---|
committer | Joshua Bakita <jbakita@cs.unc.edu> | 2024-12-18 15:49:48 -0500 |
commit | de331f42ab6efd7739c9d02594bc796421779a5f (patch) | |
tree | e1a046b053c0083902e64f5d25d37231627bde7a | |
parent | 2ad0e819a9a9652f6afc0b6da4d70a1232c124d7 (diff) |
Fix a potential bug with stream masking on CUDA 12.6 on aarch64 Jetson
Commit 3f9bda39 made an error by using the pre-CUDA-12 mask
structure layout on CUDA 12.6 on aarch64 Jetson. Switch to the
CUDA 12+ layout (as used on x86_64).
Tests work either way on the Jetson Orin, so this change is not
strictly required, but seems advisable to support potenital large
(PCIe-attached?) GPUs on Jetson/DRIVE platforms.
-rw-r--r-- | libsmctrl.c | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/libsmctrl.c b/libsmctrl.c index 7202572..7debe3a 100644 --- a/libsmctrl.c +++ b/libsmctrl.c | |||
@@ -421,7 +421,7 @@ void libsmctrl_set_stream_mask_ext(void* stream, uint128_t mask) { | |||
421 | hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_11_4_MASK_OFF_JETSON); | 421 | hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_11_4_MASK_OFF_JETSON); |
422 | break; | 422 | break; |
423 | case 12060: | 423 | case 12060: |
424 | hw_mask = (struct stream_sm_mask*)(stream_struct_base + CU_12_6_MASK_OFF_JETSON); | 424 | hw_mask_v2 = (void*)(stream_struct_base + CU_12_6_MASK_OFF_JETSON); |
425 | break; | 425 | break; |
426 | #endif | 426 | #endif |
427 | } | 427 | } |