diff options
Diffstat (limited to 'nvdebug.h')
| -rw-r--r-- | nvdebug.h | 18 |
1 files changed, 11 insertions, 7 deletions
| @@ -1,4 +1,4 @@ | |||
| 1 | /* Copyright 2021 Joshua Bakita | 1 | /* Copyright 2024 Joshua Bakita |
| 2 | * SPDX-License-Identifier: MIT | 2 | * SPDX-License-Identifier: MIT |
| 3 | * | 3 | * |
| 4 | * File outline: | 4 | * File outline: |
| @@ -688,17 +688,20 @@ typedef union { | |||
| 688 | 688 | ||
| 689 | SCAL_NUM_CES : Number of externally accessible copy engines | 689 | SCAL_NUM_CES : Number of externally accessible copy engines |
| 690 | 690 | ||
| 691 | Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be | ||
| 692 | consistent with PTOP data. | ||
| 693 | |||
| 691 | Support: Kepler through (at least) Blackwell | 694 | Support: Kepler through (at least) Blackwell |
| 692 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 695 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
| 693 | */ | 696 | */ |
| 694 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | 697 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 |
| 695 | // Defined number of GRCEs for a GPU | 698 | // Defined max number of GRCEs for a GPU (TX2 has only one) |
| 696 | # define NV_GRCE_NUM 2 | 699 | # define NV_GRCE_MAX 2 |
| 697 | // Defined GRCE->CE mapping offsets from nvgpu | 700 | // Defined GRCE->CE mapping offsets from nvgpu |
| 698 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) | 701 | #define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) |
| 699 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) | 702 | #define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) |
| 700 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) | 703 | // Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) |
| 701 | #define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) | 704 | #define NV_LCE_FOR_PCE_GP100 0x0010402c |
| 702 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) | 705 | #define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) |
| 703 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) | 706 | #define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) |
| 704 | // Struct for use with nvdebug_reg_range_read() | 707 | // Struct for use with nvdebug_reg_range_read() |
| @@ -717,13 +720,14 @@ union reg_range { | |||
| 717 | 720 | ||
| 718 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index | 721 | CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index |
| 719 | is enabled (not floorswept) on this GPU. Count the number of set | 722 | is enabled (not floorswept) on this GPU. Count the number of set |
| 720 | bits to get the number of PCEs. | 723 | bits to get the number of PCEs. Note that this may be bogus if |
| 724 | the GPU has not been used since reset. | ||
| 721 | 725 | ||
| 722 | Support: Kepler through (at least) Blackwell | 726 | Support: Pascal through (at least) Blackwell |
| 723 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. | 727 | Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. |
| 724 | */ | 728 | */ |
| 725 | #define NV_CE_PCE_MAP 0x00104028 | 729 | #define NV_CE_PCE_MAP 0x00104028 |
| 726 | #define MAP_SIZE 32 | 730 | #define NV_CE_PCE_MAP_SIZE 32 |
| 727 | 731 | ||
| 728 | 732 | ||
| 729 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | 733 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. |
