aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2024-04-08 13:33:28 -0400
committerJoshua Bakita <jbakita@cs.unc.edu>2024-04-08 14:02:07 -0400
commitac60151ea0a4a1f3882fde3c486af870029b7977 (patch)
tree4a32ead49a987030cb4222c687b23bc1f23d4d34 /nvdebug.h
parent3aab3c220f3f0bcc3d3d58d0daf6fd6acf1819e2 (diff)
Rework LCE<->PCE and GRCE->LCE configuration printing APIarchive/saman63-wip
Rather than up to dozens of individual files exposing part of each copy engine's configuration, have one file which exposes a unified view of the full topology. Example new output on RTX 2080 Ti: $ cat /proc/gpu0/copy_topology GRCE0 -> LCE04 GRCE1 -> LCE03 LCE02 -> PCE02 LCE03 -> PCE03 LCE04 -> PCE01 Old output: $ tail -n 1 /proc/gpu0/lce_for_pce* ==> /proc/gpu0/lce_for_pce0 <== 0xf ==> /proc/gpu0/lce_for_pce1 <== 0x4 ==> /proc/gpu0/lce_for_pce2 <== 0x2 ==> /proc/gpu0/lce_for_pce3 <== 0x3 $ tail -n 1 /proc/gpu1/shared_lce_for_grce* ==> /proc/gpu0/shared_lce_for_grce0 <== 0x4 ==> /proc/gpu0/shared_lce_for_grce1 <== 0x3 Specifically: - Add `copy_topology` API - Remove `shared_lce_for_grce#` and `lce_for_pce#` APIs - Move logic from `nvdebug_entry.c` to `copy_topology_procfs.c` - Do not print PCE or Shared LCE configuration if flagged absent - Refer to LCE0 and LCE1 as GRCE0 and GRCE1 - Print by LCE ID, which is move helpful when attempting to trace how a given copy runlist maps to a physical copy engine. - Document two errata with CE registers Tested working on Pascal Integrated, Pascal, Volta Integrated Volta, Turing, and Ampere Integrated on Linux 4.9 through 5.10.
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h18
1 files changed, 11 insertions, 7 deletions
diff --git a/nvdebug.h b/nvdebug.h
index a9366e0..ac254f0 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -1,4 +1,4 @@
1/* Copyright 2021 Joshua Bakita 1/* Copyright 2024 Joshua Bakita
2 * SPDX-License-Identifier: MIT 2 * SPDX-License-Identifier: MIT
3 * 3 *
4 * File outline: 4 * File outline:
@@ -688,17 +688,20 @@ typedef union {
688 688
689 SCAL_NUM_CES : Number of externally accessible copy engines 689 SCAL_NUM_CES : Number of externally accessible copy engines
690 690
691 Errata: Incorrectly reports "3" on Jetson TX1 and TX2. Should report "1" to be
692 consistent with PTOP data.
693
691 Support: Kepler through (at least) Blackwell 694 Support: Kepler through (at least) Blackwell
692 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 695 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
693*/ 696*/
694#define NV_PTOP_SCAL_NUM_CES 0x00022444 697#define NV_PTOP_SCAL_NUM_CES 0x00022444
695// Defined number of GRCEs for a GPU 698// Defined max number of GRCEs for a GPU (TX2 has only one)
696# define NV_GRCE_NUM 2 699# define NV_GRCE_MAX 2
697// Defined GRCE->CE mapping offsets from nvgpu 700// Defined GRCE->CE mapping offsets from nvgpu
698#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4) 701#define NV_GRCE_FOR_CE_GP100(i) (0x00104034+(i)*4)
699#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4) 702#define NV_GRCE_FOR_CE_GA100(i) (0x001041c0+(i)*4)
700// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu) 703// Defined LCE->PCE mapping offset from nvgpu (same as ce_pce2lce_config_r(i) in nvgpu)
701#define NV_LCE_FOR_PCE_GP100(i) (0x0010402c+(i)/2) 704#define NV_LCE_FOR_PCE_GP100 0x0010402c
702#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4) 705#define NV_LCE_FOR_PCE_GV100(i) (0x00104040+(i)*4)
703#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4) 706#define NV_LCE_FOR_PCE_GA100(i) (0x00104100+(i)*4)
704// Struct for use with nvdebug_reg_range_read() 707// Struct for use with nvdebug_reg_range_read()
@@ -717,13 +720,14 @@ union reg_range {
717 720
718 CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index 721 CE_PCE_MAP : A bitmask, where a set bit indicates that the PCE for that index
719 is enabled (not floorswept) on this GPU. Count the number of set 722 is enabled (not floorswept) on this GPU. Count the number of set
720 bits to get the number of PCEs. 723 bits to get the number of PCEs. Note that this may be bogus if
724 the GPU has not been used since reset.
721 725
722 Support: Kepler through (at least) Blackwell 726 Support: Pascal through (at least) Blackwell
723 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info. 727 Also see dev_ce.ref.txt of NVIDIA's open-gpu-doc for info.
724*/ 728*/
725#define NV_CE_PCE_MAP 0x00104028 729#define NV_CE_PCE_MAP 0x00104028
726#define MAP_SIZE 32 730#define NV_CE_PCE_MAP_SIZE 32
727 731
728 732
729/* Location of the 1Kb instance block with page tables for BAR1 and BAR2. 733/* Location of the 1Kb instance block with page tables for BAR1 and BAR2.