diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 18 |
1 files changed, 11 insertions, 7 deletions
@@ -1,4 +1,4 @@ | |||
1 | /* Copyright 2021 Joshua Bakita | 1 | /* Copyright 2024 Joshua Bakita |
2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
3 | * | 3 | * |
4 | * File outline: | 4 | * File outline: |
@@ -688,17 +688,20 @@ typedef union { | |||
688 | 688 | ||
689 | SCAL_NUM_CES : Number of externally accessible copy engines | 689 | SCAL_NUM_CES : Number of externally accessible copy engines |
690 | 690 | ||
691 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be | ||
692 | consistent with PTOP data. | ||
693 | |||
691 | Support: Kepler through (at least) Blackwell | 694 | Support: Kepler through (at least) Blackwell |
692 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 695 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
693 | */ | 696 | */ |
694 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 697 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
695 | // Defined number of GRCEs for a GPU | 698 | // Defined max number of GRCEs for a GPU (TX2 has only one) |
696 | # define NV_GRCE_NUM 2 | 699 | # define NV_GRCE_MAX 2 |
697 | // Defined GRCE->CE mapping offsets from nvgpu | 700 | // Defined GRCE->CE mapping offsets from nvgpu |
698 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) | 701 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) |
699 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) | 702 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) |
700 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 703 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) |
701 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) | 704 | #define NV_LCE_FOR_PCE_GP100 0x0010402c |
702 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | 705 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) |
703 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | 706 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) |
704 | // Struct for use with nvdebug_reg_range_read() | 707 | // Struct for use with nvdebug_reg_range_read() |
@@ -717,13 +720,14 @@ union reg_range { | |||
717 | 720 | ||
718 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index | 721 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index |
719 | is enabled (not floorswept) on this GPU. Count the number of set | 722 | is enabled (not floorswept) on this GPU. Count the number of set |
720 | bits to get the number of PCEs. | 723 | bits to get the number of PCEs. Note that this may be bogus if |
724 | the GPU has not been used since reset. | ||
721 | 725 | ||
722 | Support: Kepler through (at least) Blackwell | 726 | Support: Pascal through (at least) Blackwell |
723 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 727 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
724 | */ | 728 | */ |
725 | #define NV_CE_PCE_MAP 0x00104028 | 729 | #define NV_CE_PCE_MAP 0x00104028 |
726 | #define MAP_SIZE 32 | 730 | #define NV_CE_PCE_MAP_SIZE 32 |
727 | 731 | ||
728 | 732 | ||
729 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 733 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |