diff options
| author | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
|---|---|---|
| committer | Joshua Bakita <bakitajoshua@gmail.com> | 2023-06-22 12:52:59 -0400 |
| commit | 306a03d18b305e4e573be3b2931978fa10679eb9 (patch) | |
| tree | 349570dfbe5f531e903c949c3f663627ee1097a8 /nvdebug.h | |
| parent | f4b83713672acaf88a526b930b8e417453f6edc5 (diff) | |
Quick dump of current state for Ben to review.
Diffstat (limited to 'nvdebug.h')
| -rw-r--r-- | nvdebug.h | 719 |
1 files changed, 673 insertions, 46 deletions
| @@ -5,14 +5,18 @@ | |||
| 5 | // TODO(jbakita): Don't depend on these. | 5 | // TODO(jbakita): Don't depend on these. |
| 6 | #include <nvgpu/gk20a.h> // For struct gk20a | 6 | #include <nvgpu/gk20a.h> // For struct gk20a |
| 7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux | 7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux |
| 8 | #include <linux/proc_fs.h> // For PDE_DATA() macro | ||
| 8 | 9 | ||
| 9 | /* Runlist Channel | 10 | /* Runlist Channel |
| 10 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue | 11 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue |
| 11 | of GPU commands. These commands are typically queued from userspace. | 12 | of GPU commands. These commands are typically queued from userspace. |
| 12 | 13 | ||
| 13 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU | 14 | Prior to Volta, channels could also exist independent of a TSG. These are |
| 14 | virtual address space for this context. All channels in a TSG point to the | 15 | called "bare channels" in the Jetson nvgpu driver. |
| 15 | same GPU Instance Block (?). | 16 | |
| 17 | `INST_PTR` points to a GPU Instance Block which contains FIFO states, virtual | ||
| 18 | address space configuration for this context, and a pointer to the page | ||
| 19 | tables. All channels in a TSG point to the same GPU Instance Block (?). | ||
| 16 | 20 | ||
| 17 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and | 21 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and |
| 18 | thereby which PBDMA will run the channel. Increasing values select | 22 | thereby which PBDMA will run the channel. Increasing values select |
| @@ -30,7 +34,13 @@ | |||
| 30 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN | 34 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN |
| 31 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) | 35 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) |
| 32 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if | 36 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if |
| 33 | more than one PBDMA is supported by the runlist | 37 | more than one PBDMA is supported by the runlist, |
| 38 | additionally, "A value of 0 targets the first FE | ||
| 39 | pipe, which can process all FE driven engines: | ||
| 40 | Graphics, Compute, Inline2Memory, and TwoD. A value | ||
| 41 | of 1 targets the second FE pipe, which can only | ||
| 42 | process Compute work. Note that GRCE work is allowed | ||
| 43 | on either runqueue.)" | ||
| 34 | 44 | ||
| 35 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer | 45 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer |
| 36 | INST_PTR_HI : upper 32 bit of instance block pointer | 46 | INST_PTR_HI : upper 32 bit of instance block pointer |
| @@ -39,6 +49,9 @@ | |||
| 39 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer | 49 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer |
| 40 | USERD_PTR_HI : upper 32 bits of USERD pointer | 50 | USERD_PTR_HI : upper 32 bits of USERD pointer |
| 41 | USERD_TARGET (TGU) : aperture of the USERD data structure | 51 | USERD_TARGET (TGU) : aperture of the USERD data structure |
| 52 | |||
| 53 | Channels were around since at least Fermi, but were rearranged with Volta to | ||
| 54 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | ||
| 42 | */ | 55 | */ |
| 43 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 56 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
| 44 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 57 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
| @@ -52,11 +65,12 @@ static inline char* target_to_text(enum INST_TARGET t) { | |||
| 52 | return "SYS_MEM_NONCOHERENT"; | 65 | return "SYS_MEM_NONCOHERENT"; |
| 53 | default: | 66 | default: |
| 54 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | 67 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); |
| 55 | return NULL; | 68 | return "INVALID"; |
| 56 | } | 69 | } |
| 57 | } | 70 | } |
| 58 | 71 | ||
| 59 | struct runlist_chan { | 72 | // Support: Volta, Ampere, Turing |
| 73 | struct gv100_runlist_chan { | ||
| 60 | // 0:63 | 74 | // 0:63 |
| 61 | enum ENTRY_TYPE entry_type:1; | 75 | enum ENTRY_TYPE entry_type:1; |
| 62 | uint32_t runqueue_selector:1; | 76 | uint32_t runqueue_selector:1; |
| @@ -71,6 +85,20 @@ struct runlist_chan { | |||
| 71 | uint32_t inst_ptr_hi:32; | 85 | uint32_t inst_ptr_hi:32; |
| 72 | } __attribute__((packed)); | 86 | } __attribute__((packed)); |
| 73 | 87 | ||
| 88 | // Support: Fermi, Kepler*, Maxwell, Pascal | ||
| 89 | // *In Kepler, inst fields may be unpopulated? | ||
| 90 | struct gm107_runlist_chan { | ||
| 91 | uint32_t chid:12; | ||
| 92 | uint32_t padding0:1; | ||
| 93 | enum ENTRY_TYPE entry_type:1; | ||
| 94 | uint32_t padding1:18; | ||
| 95 | uint32_t inst_ptr_lo:20; | ||
| 96 | enum INST_TARGET inst_target:2; // Totally guessing on this | ||
| 97 | uint32_t padding2:10; | ||
| 98 | } __attribute__((packed)); | ||
| 99 | |||
| 100 | #define gk110_runlist_chan gm107_runlist_chan | ||
| 101 | |||
| 74 | /* Runlist TSG (TimeSlice Group) | 102 | /* Runlist TSG (TimeSlice Group) |
| 75 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds | 103 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds |
| 76 | to a single virtual address space on the GPU and contains `TSG_LENGTH` | 104 | to a single virtual address space on the GPU and contains `TSG_LENGTH` |
| @@ -85,8 +113,15 @@ struct runlist_chan { | |||
| 85 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice | 113 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice |
| 86 | TSG_LENGTH : number of channels that are part of this timeslice group | 114 | TSG_LENGTH : number of channels that are part of this timeslice group |
| 87 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | 115 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) |
| 116 | |||
| 117 | TSGs appear to have been introduced with Kepler and stayed the same until | ||
| 118 | they were rearranged at the time of channel rearrangement to support longer | ||
| 119 | GPU instance addresses with Volta. | ||
| 88 | */ | 120 | */ |
| 89 | struct entry_tsg { | 121 | |
| 122 | // Support: Volta, Ampere*, Turing* | ||
| 123 | // *These treat the top 8 bits of TSGID as GFID (unused) | ||
| 124 | struct gv100_runlist_tsg { | ||
| 90 | // 0:63 | 125 | // 0:63 |
| 91 | enum ENTRY_TYPE entry_type:1; | 126 | enum ENTRY_TYPE entry_type:1; |
| 92 | uint64_t padding:15; | 127 | uint64_t padding:15; |
| @@ -101,14 +136,28 @@ struct entry_tsg { | |||
| 101 | } __attribute__((packed)); | 136 | } __attribute__((packed)); |
| 102 | #define MAX_TSGID (1 << 12) | 137 | #define MAX_TSGID (1 << 12) |
| 103 | 138 | ||
| 139 | // Support: Kepler (v2?), Maxwell, Pascal | ||
| 140 | // Same fields as Volta except tsg_length is 6 bits rather than 8 | ||
| 141 | // Last 32 bits appear to contain an undocumented inst ptr | ||
| 142 | struct gk110_runlist_tsg { | ||
| 143 | uint32_t tsgid:12; | ||
| 144 | uint32_t padding0:1; | ||
| 145 | enum ENTRY_TYPE entry_type:1; | ||
| 146 | uint32_t timeslice_scale:4; | ||
| 147 | uint32_t timeslice_timeout:8; | ||
| 148 | uint32_t tsg_length:6; | ||
| 149 | uint32_t padding1:32; | ||
| 150 | } __attribute__((packed)); | ||
| 151 | |||
| 152 | |||
| 104 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | 153 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; |
| 105 | 154 | ||
| 106 | /* Preempt a TSG or Channel by ID | 155 | /* Preempt a TSG or Channel by ID |
| 107 | ID/CHID : Id of TSG or channel to preempt | 156 | ID/CHID : Id of TSG or channel to preempt |
| 108 | IS_PENDING : ???? | 157 | IS_PENDING : Is a context switch pending? |
| 109 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 158 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
| 110 | 159 | ||
| 111 | Support: Kepler, Maxwell, Pascal, Volta | 160 | Support: Kepler, Maxwell, Pascal, Volta, Turing |
| 112 | */ | 161 | */ |
| 113 | #define NV_PFIFO_PREEMPT 0x00002634 | 162 | #define NV_PFIFO_PREEMPT 0x00002634 |
| 114 | typedef union { | 163 | typedef union { |
| @@ -195,26 +244,36 @@ typedef union { | |||
| 195 | */ | 244 | */ |
| 196 | 245 | ||
| 197 | // Note: This is different with Turing | 246 | // Note: This is different with Turing |
| 198 | // Support: Kepler, Maxwell, Pascal, Volta | 247 | // Support: Fermi, Kepler, Maxwell, Pascal, Volta |
| 199 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 248 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 |
| 249 | #define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) | ||
| 200 | typedef union { | 250 | typedef union { |
| 201 | struct { | 251 | struct { |
| 202 | uint32_t ptr:28; | 252 | uint32_t ptr:28; |
| 203 | uint32_t type:2; | 253 | enum INST_TARGET target:2; |
| 204 | uint32_t padding:2; | 254 | uint32_t padding:2; |
| 205 | } __attribute__((packed)); | 255 | } __attribute__((packed)); |
| 206 | uint32_t raw; | 256 | uint32_t raw; |
| 207 | } runlist_base_t; | 257 | } runlist_base_t; |
| 208 | 258 | ||
| 209 | // Support: Kepler, Maxwell, Pascal, Volta | 259 | // Support: Kepler, Maxwell, Pascal, Volta |
| 260 | // Works on Fermi, but id is one bit longer and is b11111 | ||
| 210 | #define NV_PFIFO_RUNLIST 0x00002274 | 261 | #define NV_PFIFO_RUNLIST 0x00002274 |
| 262 | #define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) | ||
| 211 | typedef union { | 263 | typedef union { |
| 264 | // RUNLIST fields | ||
| 212 | struct { | 265 | struct { |
| 213 | uint32_t len:16; | 266 | uint32_t len:16; |
| 214 | uint32_t padding:4; | 267 | uint32_t padding:4; |
| 215 | uint32_t id:4; | 268 | uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) |
| 216 | uint32_t padding2:8; | 269 | uint32_t padding2:8; |
| 217 | } __attribute__((packed)); | 270 | } __attribute__((packed)); |
| 271 | // ENG_RUNLIST fields that differ | ||
| 272 | struct { | ||
| 273 | uint32_t padding3:20; | ||
| 274 | bool is_pending:1; // Is runlist not yet committed? | ||
| 275 | uint32_t padding4:11; | ||
| 276 | } __attribute__((packed)); | ||
| 218 | uint32_t raw; | 277 | uint32_t raw; |
| 219 | } runlist_info_t; | 278 | } runlist_info_t; |
| 220 | 279 | ||
| @@ -301,63 +360,631 @@ typedef union { | |||
| 301 | uint32_t raw; | 360 | uint32_t raw; |
| 302 | } runlist_disable_t; | 361 | } runlist_disable_t; |
| 303 | 362 | ||
| 363 | /* Read GPU descriptors from the Master Controller (MC) | ||
| 364 | |||
| 365 | MINOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
| 366 | MAJOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
| 367 | IMPLEMENTATION : Which implementation of the GPU architecture | ||
| 368 | ARCHITECTURE : Which GPU architecture | ||
| 369 | |||
| 370 | CHIP_ID = IMPLEMENTATION + ARCHITECTURE << 4 | ||
| 371 | CHIP_ID : Unique ID of all chips since Kelvin | ||
| 372 | |||
| 373 | Support: Kelvin, Rankline, Curie, Tesla, Fermi, Kepler, Maxwell, Pascal, | ||
| 374 | Volta, Turing, Ampere | ||
| 375 | */ | ||
| 376 | #define NV_MC_BOOT_0 0x00000000 | ||
| 377 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | ||
| 378 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | ||
| 379 | #define NV_CHIP_ID_KEPLER 0x0E0 | ||
| 380 | #define NV_CHIP_ID_VOLTA 0x140 | ||
| 381 | |||
| 382 | inline static const char* ARCH2NAME(uint32_t arch) { | ||
| 383 | switch (arch) { | ||
| 384 | case 0x01: | ||
| 385 | return "Celsius"; | ||
| 386 | case 0x02: | ||
| 387 | return "Kelvin"; | ||
| 388 | case 0x03: | ||
| 389 | return "Rankline"; | ||
| 390 | case 0x04: | ||
| 391 | case 0x06: // 0x06 is (nForce 6XX integrated only) | ||
| 392 | return "Curie"; | ||
| 393 | // 0x07 is unused/skipped | ||
| 394 | case 0x05: // First Tesla card was released before the nForce 6XX | ||
| 395 | case 0x08: | ||
| 396 | case 0x09: | ||
| 397 | case 0x0A: | ||
| 398 | return "Tesla"; | ||
| 399 | // 0x0B is unused/skipped | ||
| 400 | case 0x0C: | ||
| 401 | case 0x0D: | ||
| 402 | return "Fermi"; | ||
| 403 | case 0x0E: | ||
| 404 | case 0x0F: | ||
| 405 | case 0x11: | ||
| 406 | return "Kepler"; | ||
| 407 | case 0x12: | ||
| 408 | return "Maxwell"; | ||
| 409 | case 0x13: | ||
| 410 | return "Pascal"; | ||
| 411 | case 0x14: | ||
| 412 | case 0x15: // Volta integrated | ||
| 413 | return "Volta"; | ||
| 414 | case 0x16: | ||
| 415 | return "Turing"; | ||
| 416 | case 0x17: | ||
| 417 | return "Ampere"; | ||
| 418 | case 0x18: | ||
| 419 | case 0x19: | ||
| 420 | return "Hopper (?) or Lovelace (?)"; | ||
| 421 | default: | ||
| 422 | if (arch < 0x19) | ||
| 423 | return "[unknown historical architecture]"; | ||
| 424 | else | ||
| 425 | return "[future]"; | ||
| 426 | } | ||
| 427 | } | ||
| 428 | |||
| 429 | typedef union { | ||
| 430 | // Fields as defined in the NVIDIA reference | ||
| 431 | struct { | ||
| 432 | uint32_t minor_revision:4; | ||
| 433 | uint32_t major_revision:4; | ||
| 434 | uint32_t reserved:4; | ||
| 435 | uint32_t padding0:8; | ||
| 436 | uint32_t implementation:4; | ||
| 437 | uint32_t architecture:5; | ||
| 438 | uint32_t padding1:3; | ||
| 439 | } __attribute__((packed)); | ||
| 440 | uint32_t raw; | ||
| 441 | // Arch << 4 + impl is also often used | ||
| 442 | struct { | ||
| 443 | uint32_t padding2:20; | ||
| 444 | uint32_t chip_id:9; | ||
| 445 | uint32_t padding3:3; | ||
| 446 | } __attribute__((packed)); | ||
| 447 | } mc_boot_0_t; | ||
| 448 | |||
| 449 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
| 450 | enum ENGINE_TYPES { | ||
| 451 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
| 452 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
| 453 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
| 454 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
| 455 | |||
| 456 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
| 457 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
| 458 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
| 459 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
| 460 | ENGINE_VIC = 12, // Video Image Compositor | ||
| 461 | ENGINE_SEC = 13, // SEquenCer [?] | ||
| 462 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
| 463 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
| 464 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
| 465 | |||
| 466 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
| 467 | ENGINE_LCE = 19, // Logical Copy Engine | ||
| 468 | ENGINE_GSP = 20, // Gpu System Processor | ||
| 469 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
| 470 | }; | ||
| 471 | #define ENGINE_TYPES_LEN 22 | ||
| 472 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
| 473 | "Graphics/Compute", | ||
| 474 | "COPY0", | ||
| 475 | "COPY1", | ||
| 476 | "COPY2", | ||
| 477 | "Unknown Engine ID#4", | ||
| 478 | "Unknown Engine ID#5", | ||
| 479 | "Unknown Engine ID#6", | ||
| 480 | "Unknown Engine ID#7", | ||
| 481 | "MSPDEC: Picture Decoder", | ||
| 482 | "MSPPP: Post Processing", | ||
| 483 | "MSVLD: Variable Length Decoder", | ||
| 484 | "MSENC: Encoder", | ||
| 485 | "VIC: Video Image Compositor", | ||
| 486 | "SEC: Sequencer", | ||
| 487 | "NVENC0: NVIDIA Video Encoder #0", | ||
| 488 | "NVENC1: NVIDIA Video Encoder #1", | ||
| 489 | "NVDEC: NVIDIA Video Decoder", | ||
| 490 | "Unknown Engine ID#17", | ||
| 491 | "IOCTRL: I/O Controller", | ||
| 492 | "LCE: Logical Copy Engine", | ||
| 493 | "GSP: GPU System Processor", | ||
| 494 | "NVJPG: NVIDIA JPEG Decoder", | ||
| 495 | }; | ||
| 496 | |||
| 497 | /* GPU engine information and control register offsets | ||
| 498 | Each engine is described by one or more entries (terminated by an entry with | ||
| 499 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A | ||
| 500 | typical device, such as the graphics/compute engine and any copy engines, are | ||
| 501 | described by three entries, one of each type. | ||
| 502 | |||
| 503 | The PTOP_DEVICE_INFO table is sparsely populated (entries of type | ||
| 504 | INFO_TYPE_NOT_VALID may be intermingled with valid entries), so any traversal | ||
| 505 | code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate | ||
| 506 | upon reaching the first entry of INFO_TYPE_NOT_VALID. | ||
| 507 | |||
| 508 | INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry? | ||
| 509 | HAS_NEXT_ENTRY : Does the following entry refer to the same engine? | ||
| 510 | |||
| 511 | == INFO_TYPE_DATA fields == | ||
| 512 | PRI_BASE : BAR0 base = (PRI_BASE << 12) aka 4k aligned. | ||
| 513 | INST_ID : "Note that some instanced [engines] (such as logical copy | ||
| 514 | engines aka LCE) share a PRI_BASE across all [engines] of | ||
| 515 | the same engine type; such [engines] require an additional | ||
| 516 | offset: instanced base = BAR0 base + stride * INST_ID. | ||
| 517 | FAULT_ID_IS_VALID : Does this engine have its own bind point and fault ID | ||
| 518 | with the MMU? | ||
| 519 | FAULT_ID : "The MMU fault id used by this [engine]. These IDs | ||
| 520 | correspond to the NV_PFAULT_MMU_ENG_ID define list." | ||
| 521 | |||
| 522 | == INFO_TYPE_ENUM fields == | ||
| 523 | ENGINE_IS_VALID : Is this engine a host engine? | ||
| 524 | ENGINE_ENUM : "[T]he host engine ID for the current [engine] if it is | ||
| 525 | a host engine, meaning Host can send methods to the | ||
| 526 | engine. This id is used to index into any register array | ||
| 527 | whose __SIZE_1 is equal to NV_HOST_NUM_ENGINES. A given | ||
| 528 | ENGINE_ENUM can be present for at most one device in the | ||
| 529 | table. Devices corresponding to all ENGINE_ENUM ids 0 | ||
| 530 | through NV_HOST_NUM_ENGINES - 1 must be present in the | ||
| 531 | device info table." | ||
| 532 | RUNLIST_IS_VALID : Is this engine a host engine with a runlist? | ||
| 533 | RUNLIST_ENUM : "[T]he Host runlist ID on which methods for the current | ||
| 534 | [engine] should be submitted... The runlist id is used to | ||
| 535 | index into any register array whose __SIZE_1 is equal to | ||
| 536 | NV_HOST_NUM_RUNLISTS. [Engines] corresponding to all | ||
| 537 | RUNLIST_ENUM ids 0 through NV_HOST_NUM_RUNLISTS - 1 must | ||
| 538 | be present in the device info table." | ||
| 539 | INTR_IS_VALID : Does this device have an interrupt? | ||
| 540 | INTR_ENUM : Interrupt ID for use with "the NV_PMC_INTR_*_DEVICE | ||
| 541 | register bitfields." | ||
| 542 | RESET_IS_VALID : Does this engine have a reset ID? | ||
| 543 | RESET_ENUM : Reset ID for use indexing the "NV_PMC_ENABLE_DEVICE(i) | ||
| 544 | and NV_PMC_ELPG_ENABLE_DEVICE(i) register bitfields." | ||
| 545 | |||
| 546 | == INFO_TYPE_ENGINE_TYPE fields == | ||
| 547 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | ||
| 548 | |||
| 549 | Support: Kepler, Maxwell, Pascal, Volta, Ampere | ||
| 550 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | ||
| 551 | */ | ||
| 552 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | ||
| 553 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | ||
| 554 | typedef union { | ||
| 555 | // DATA type fields | ||
| 556 | struct { | ||
| 557 | enum DEVICE_INFO_TYPE info_type:2; | ||
| 558 | bool fault_id_is_valid:1; | ||
| 559 | uint32_t fault_id:7; | ||
| 560 | uint32_t padding0:2; | ||
| 561 | uint32_t pri_base:12; | ||
| 562 | uint32_t padding1:2; | ||
| 563 | uint32_t inst_id:4; | ||
| 564 | uint32_t is_not_enum2:1; | ||
| 565 | bool has_next_entry:1; | ||
| 566 | } __attribute__((packed)); | ||
| 567 | // ENUM type fields | ||
| 568 | struct { | ||
| 569 | uint32_t padding2:2; | ||
| 570 | bool reset_is_valid:1; | ||
| 571 | bool intr_is_valid:1; | ||
| 572 | bool runlist_is_valid:1; | ||
| 573 | bool engine_is_valid:1; | ||
| 574 | uint32_t padding3:3; | ||
| 575 | uint32_t reset_enum:5; | ||
| 576 | uint32_t padding4:1; | ||
| 577 | uint32_t intr_enum:5; | ||
| 578 | uint32_t padding5:1; | ||
| 579 | uint32_t runlist_enum:4; | ||
| 580 | uint32_t padding6:1; | ||
| 581 | uint32_t engine_enum:4; | ||
| 582 | uint32_t padding7:2; | ||
| 583 | } __attribute__((packed)); | ||
| 584 | // ENGINE_TYPE type fields | ||
| 585 | struct { | ||
| 586 | uint32_t padding8:2; | ||
| 587 | enum ENGINE_TYPES engine_type:29; | ||
| 588 | uint32_t padding9:1; | ||
| 589 | } __attribute__((packed)); | ||
| 590 | uint32_t raw; | ||
| 591 | } ptop_device_info_t; | ||
| 592 | |||
| 593 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | ||
| 594 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | ||
| 595 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
| 596 | // PCE_MAP is Volta+ only | ||
| 597 | #define NV_CE_PCE_MAP 0x00104028 | ||
| 598 | |||
| 599 | // GPC and TPC masks | ||
| 600 | // Support: Maxwell+ | ||
| 601 | #define NV_FUSE_GPC 0x00021c1c | ||
| 602 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | ||
| 603 | |||
| 604 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | ||
| 605 | Support: Fermi+ (?), Pascal | ||
| 606 | */ | ||
| 607 | #define NV_PBUS_BAR1_BLOCK 0x00001704 | ||
| 608 | #define NV_PBUS_BAR2_BLOCK 0x00001714 | ||
| 609 | typedef union { | ||
| 610 | struct { | ||
| 611 | uint32_t ptr:28; | ||
| 612 | enum INST_TARGET target:2; | ||
| 613 | uint32_t padding0:1; | ||
| 614 | bool is_virtual:1; | ||
| 615 | } __attribute__((packed)); | ||
| 616 | uint32_t raw; | ||
| 617 | struct { | ||
| 618 | uint32_t map:30; | ||
| 619 | uint32_t padding1:2; | ||
| 620 | } __attribute__((packed)); | ||
| 621 | } bar_config_block_t; | ||
| 622 | |||
| 623 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | ||
| 624 | |||
| 625 | BASE : Base of window >> 16 in [TARGET] virtual address space | ||
| 626 | TARGET : Which address space BASE points into | ||
| 627 | |||
| 628 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes | ||
| 629 | |||
| 630 | Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
| 631 | */ | ||
| 632 | #define NV_PBUS_BAR0_WINDOW 0x00001700 | ||
| 633 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | ||
| 634 | #define NV_PRAMIN_LEN 0x00100000 | ||
| 635 | typedef union { | ||
| 636 | struct { | ||
| 637 | uint32_t base:24; | ||
| 638 | enum INST_TARGET target:2; | ||
| 639 | uint32_t padding0:6; | ||
| 640 | } __attribute__((packed)); | ||
| 641 | uint32_t raw; | ||
| 642 | } bar0_window_t; | ||
| 643 | |||
| 644 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
| 645 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | ||
| 646 | typedef union { | ||
| 647 | struct { | ||
| 648 | uint32_t target:2; | ||
| 649 | uint32_t vol:1; | ||
| 650 | uint32_t padding0:1; | ||
| 651 | uint32_t fault_replay_tex:1; | ||
| 652 | uint32_t fault_replay_gcc:1; | ||
| 653 | uint32_t padding1:4; | ||
| 654 | bool is_ver2:1; | ||
| 655 | bool is_64k_big_page:1; // 128Kb otherwise | ||
| 656 | uint32_t page_dir_lo:20; | ||
| 657 | uint32_t page_dir_hi:32; | ||
| 658 | } __attribute__((packed)); | ||
| 659 | uint64_t raw; | ||
| 660 | } page_dir_config_t; | ||
| 661 | |||
| 662 | /* Page directory entry | ||
| 663 | |||
| 664 | Note: Format changed with Pascal (how?) | ||
| 665 | |||
| 666 | Support: Pascal, Volta, Turing, Ampere | ||
| 667 | */ | ||
| 668 | // FIXME: PDE/PTEs are actually 64 bits =S | ||
| 669 | // Important: Aperture keys are different with PDEs | ||
| 670 | enum PD_TARGET { | ||
| 671 | PD_AND_TARGET_INVALID = 0, // b000 | ||
| 672 | PD_AND_TARGET_VID_MEM = 2, // b010 | ||
| 673 | PD_AND_TARGET_SYS_MEM_COHERENT = 4, // b100 | ||
| 674 | PD_AND_TARGET_SYS_MEM_NONCOHERENT = 6, // b110 | ||
| 675 | PTE_AND_TARGET_VID_MEM = 1, // b001 | ||
| 676 | PTE_AND_TARGET_PEER = 3, // b011 | ||
| 677 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 | ||
| 678 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 | ||
| 679 | }; | ||
| 680 | static inline char* pd_target_to_text(enum PD_TARGET t) { | ||
| 681 | switch (t) { | ||
| 682 | case PD_AND_TARGET_INVALID: | ||
| 683 | return "INVALID"; | ||
| 684 | case PD_AND_TARGET_VID_MEM: | ||
| 685 | case PTE_AND_TARGET_VID_MEM: | ||
| 686 | return "VID_MEM"; | ||
| 687 | case PTE_AND_TARGET_PEER: | ||
| 688 | return "PEER"; | ||
| 689 | case PD_AND_TARGET_SYS_MEM_COHERENT: | ||
| 690 | case PTE_AND_TARGET_SYS_MEM_COHERENT: | ||
| 691 | return "SYS_MEM_COHERENT"; | ||
| 692 | case PD_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
| 693 | case PTE_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
| 694 | return "SYS_MEM_NONCOHERENT"; | ||
| 695 | default: | ||
| 696 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | ||
| 697 | return NULL; | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | // PDE/PTE V2 type | ||
| 702 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | ||
| 703 | // is a PTE or not, this combines them into a single target field to | ||
| 704 | // simplify comparisons. | ||
| 705 | // Support: Pascal, Turing, Ampere | ||
| 706 | typedef union { | ||
| 707 | // Page Directory Entry (PDE) | ||
| 708 | struct { | ||
| 709 | bool is_pte:1; | ||
| 710 | uint32_t __target:2; | ||
| 711 | bool is_volatile:1; | ||
| 712 | uint32_t padding1:4; | ||
| 713 | uint32_t addr:24; | ||
| 714 | } __attribute__((packed)); | ||
| 715 | // Page Table Entry (PTE) | ||
| 716 | struct { | ||
| 717 | enum PD_TARGET target:3; | ||
| 718 | uint32_t __is_volatile:1; | ||
| 719 | bool is_encrypted:1; | ||
| 720 | bool is_privileged:1; | ||
| 721 | bool is_readonly:1; | ||
| 722 | bool atomics_disabled:1; | ||
| 723 | uint32_t __addr:24; | ||
| 724 | } __attribute__((packed)); | ||
| 725 | uint32_t raw; | ||
| 726 | } page_dir_entry_t; | ||
| 727 | |||
| 728 | // PDE/PTE V1 types | ||
| 729 | // Support: Fermi, Kepler, Maxwell | ||
| 730 | enum V1_PD_TARGET { | ||
| 731 | PD_TARGET_INVALID = 0, | ||
| 732 | PD_TARGET_VID_MEM = 1, | ||
| 733 | PD_TARGET_SYS_MEM_COHERENT = 2, | ||
| 734 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, | ||
| 735 | }; | ||
| 736 | // Page Directory Entry (PDE) | ||
| 737 | typedef union { | ||
| 738 | // Large page fields | ||
| 739 | struct { | ||
| 740 | // 0:32 | ||
| 741 | enum V1_PD_TARGET target:2; | ||
| 742 | uint32_t padding0:2; | ||
| 743 | uint64_t addr:28; // May be wider? | ||
| 744 | // 32:63 | ||
| 745 | uint32_t padding2:3; | ||
| 746 | uint32_t is_volatile:1; // Might have counted wrong? | ||
| 747 | uint32_t padding3:28; | ||
| 748 | } __attribute__((packed)); | ||
| 749 | // Small page fields | ||
| 750 | struct { | ||
| 751 | // 0:32 | ||
| 752 | uint32_t padding00:32; | ||
| 753 | // 32:63 | ||
| 754 | enum V1_PD_TARGET alt_target:2; | ||
| 755 | uint32_t alt_is_volatile:1; // Might have counted wrong? | ||
| 756 | uint32_t padding03:1; | ||
| 757 | uint64_t alt_addr:28; | ||
| 758 | } __attribute__((packed)); | ||
| 759 | uint64_t raw; | ||
| 760 | } page_dir_entry_v1_t; | ||
| 761 | // Page Table Entry (PTE) | ||
| 762 | // Reconstructed from info in Jetson nvgpu driver | ||
| 763 | typedef union { | ||
| 764 | struct { | ||
| 765 | // 0:32 | ||
| 766 | bool is_present:1; | ||
| 767 | bool is_privileged:1; | ||
| 768 | bool is_readonly:1; | ||
| 769 | uint32_t padding0:1; | ||
| 770 | uint64_t addr:28; | ||
| 771 | // 32:63 | ||
| 772 | bool is_volatile:1; | ||
| 773 | enum INST_TARGET:2; | ||
| 774 | uint32_t padding1:1; | ||
| 775 | uint32_t kind:8; | ||
| 776 | uint32_t comptag:17; | ||
| 777 | uint32_t padding2:1; | ||
| 778 | bool is_read_disabled:1; | ||
| 779 | bool is_write_disabled:1; | ||
| 780 | } __attribute__((packed)); | ||
| 781 | uint64_t raw; | ||
| 782 | } page_tbl_entry_v1_t; | ||
| 783 | //enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | ||
| 784 | //enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | ||
| 785 | //static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | ||
| 786 | /* PDE V0 (nv50/Tesla) | ||
| 787 | typedef union { | ||
| 788 | struct { | ||
| 789 | enum V1_PDE_TYPE type:2; | ||
| 790 | enum INST_TARGET target:2; | ||
| 791 | uint32_t padding0:1; | ||
| 792 | enum V1_PDE_SIZE sublevel_size:2; | ||
| 793 | uint32_t padding1:5; | ||
| 794 | uint32_t addr:28; | ||
| 795 | uint32_t padding2:24; | ||
| 796 | } __attribute__((packed)); | ||
| 797 | uint64_t raw; | ||
| 798 | } page_dir_entry_v1_t;*/ | ||
| 799 | /* PTE V0 (nv50) | ||
| 800 | typedef union { | ||
| 801 | struct { | ||
| 802 | bool is_present:1; | ||
| 803 | uint32_t padding3:2; | ||
| 804 | bool is_readonly:1; | ||
| 805 | enum INST_TARGET target:2; | ||
| 806 | bool is_privileged:1; | ||
| 807 | uint32_t contig_blk_sz:3; | ||
| 808 | uint32_t padding4:2; | ||
| 809 | uint32_t addr:28; | ||
| 810 | uint32_t storage_type:7; // ??? | ||
| 811 | uint32_t compression_mode:2; // ??? | ||
| 812 | uint32_t compression_tag:12; // ??? | ||
| 813 | bool is_long_partition_cycle:1; // ??? | ||
| 814 | bool is_encrypted:1; | ||
| 815 | uint32_t padding5:1; | ||
| 816 | } __attribute__((packed)); | ||
| 817 | uint64_t raw; | ||
| 818 | } page_tbl_entry_v1_t;*/ | ||
| 819 | |||
| 304 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 820 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
| 305 | 821 | ||
| 306 | #define for_chan_in_tsg(chan, tsg) \ | 822 | #define NV_PCI_VENDOR 0x10de |
| 307 | for (chan = (struct runlist_chan*)(tsg + 1); \ | 823 | struct nvdebug_state { |
| 308 | (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ | 824 | // Pointer to the mapped base address of the GPU control registers (obtained |
| 309 | chan++) | 825 | // via ioremap() originally). For embedded GPUs, we extract this from their |
| 826 | // struct nvgpu_os_linux. For discrete GPUs, we create our own mapping of | ||
| 827 | // BAR0 with pci_iomap(). Access via nvgpu_readl/writel functions. | ||
| 828 | void __iomem *regs; | ||
| 829 | // Depending on the architecture, BAR2 or BAR3 are used to access PRAMIN | ||
| 830 | union { | ||
| 831 | void __iomem *bar2; | ||
| 832 | void __iomem *bar3; | ||
| 833 | }; | ||
| 834 | int chip_id; | ||
| 835 | // Additional state from the built-in driver. Only set iff | ||
| 836 | // chip_id == NV_CHIP_ID_GV11B | ||
| 837 | struct gk20a *g; | ||
| 838 | // Pointer to PCI device needed for pci_iounmap | ||
| 839 | struct pci_dev *pcid; | ||
| 840 | }; | ||
| 841 | |||
| 842 | /*const struct runlist_funcs { | ||
| 843 | u8 size; | ||
| 844 | enum ENTRY_TYPE (*entry_type)(struct nvdebug_state *, void *); | ||
| 845 | uint32_t (*chid)(struct nvdebug_state *, void *); | ||
| 846 | uint32_t (*inst_ptr_lo)(struct nvdebug_state *, void *); | ||
| 847 | enum INST_TARGET (*inst_target)(struct nvdebug_state *, void *): | ||
| 848 | uint32_t (*tsgid)(struct nvdebug_state *, void *); | ||
| 849 | uint32_t (*timeslice_scale)(struct nvdebug_state *, void *); | ||
| 850 | uint32_t (*timeslice_timeout)(struct nvdebug_state *, void *); | ||
| 851 | uint32_t (*tsg_length)(struct nvdebug_state *, void *); | ||
| 852 | };*/ | ||
| 853 | |||
| 854 | // This disgusting macro is a crutch to work around the fact that runlists were | ||
| 855 | // different prior to Volta. | ||
| 856 | #define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \ | ||
| 857 | __attribute__((unused)) \ | ||
| 858 | static type (prop)(const struct nvdebug_state *g, const void *raw) { \ | ||
| 859 | if (g->chip_id > NV_CHIP_ID_VOLTA) { \ | ||
| 860 | const struct gv100_runlist_ ## _ENTRY_TYPE *entry = (struct gv100_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
| 861 | return entry->prop; \ | ||
| 862 | } else if (g->chip_id > NV_CHIP_ID_KEPLER) { \ | ||
| 863 | const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
| 864 | return entry->prop; \ | ||
| 865 | } else { \ | ||
| 866 | printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \ | ||
| 867 | return (type)0; \ | ||
| 868 | } \ | ||
| 869 | } | ||
| 870 | |||
| 871 | VERSIONED_RL_ACCESSOR(chan, uint32_t, chid); | ||
| 872 | VERSIONED_RL_ACCESSOR(chan, uint32_t, inst_ptr_lo); | ||
| 873 | VERSIONED_RL_ACCESSOR(chan, enum INST_TARGET, inst_target); | ||
| 874 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsgid); | ||
| 875 | VERSIONED_RL_ACCESSOR(tsg, enum ENTRY_TYPE, entry_type); | ||
| 876 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_scale); | ||
| 877 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_timeout); | ||
| 878 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); | ||
| 310 | 879 | ||
| 311 | #define next_tsg(tsg) \ | 880 | |
| 312 | (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length | 881 | #define NV_RL_ENTRY_SIZE(g) \ |
| 882 | ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg)) | ||
| 883 | |||
| 884 | #define for_chan_in_tsg(g, chan, tsg) \ | ||
| 885 | for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \ | ||
| 886 | (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \ | ||
| 887 | chan = (typeof(chan))(((u8*)chan) + NV_RL_ENTRY_SIZE(g))) | ||
| 888 | |||
| 889 | #define next_tsg(g, tsg) \ | ||
| 890 | (typeof(tsg))((u8*)(tsg) + NV_RL_ENTRY_SIZE(g) * (tsg_length(g, tsg) + 1)) | ||
| 313 | 891 | ||
| 314 | struct runlist_iter { | 892 | struct runlist_iter { |
| 315 | struct entry_tsg *curr_tsg; | 893 | // Pointer to either a TSG or channel entry (they're the same size) |
| 894 | void *curr_entry; | ||
| 895 | // This should be set to tsg_length when a TSG is reached, and | ||
| 896 | // decremented as each subsequent channel is printed. This allows us to | ||
| 897 | // track which channel are and are not part of the TSG. | ||
| 898 | int channels_left_in_tsg; | ||
| 899 | // Total runlist length, etc | ||
| 316 | runlist_info_t rl_info; | 900 | runlist_info_t rl_info; |
| 317 | }; | 901 | }; |
| 318 | 902 | ||
| 903 | #define NVDEBUG_MAX_DEVICES 8 | ||
| 904 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | ||
| 905 | |||
| 319 | // Defined in runlist.c | 906 | // Defined in runlist.c |
| 320 | struct gk20a* get_live_gk20a(void); | 907 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); |
| 321 | int get_runlist_iter(struct runlist_iter *rl_iter); | 908 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
| 322 | int preempt_tsg(uint32_t tsg_id); | 909 | |
| 910 | // Defined in mmu.c | ||
| 911 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); | ||
| 912 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); | ||
| 913 | uint64_t search_page_directory( | ||
| 914 | struct nvdebug_state *g, | ||
| 915 | void __iomem *pde_offset, | ||
| 916 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
| 917 | uint64_t addr_to_find); | ||
| 918 | uint64_t search_v1_page_directory( | ||
| 919 | struct nvdebug_state *g, | ||
| 920 | void __iomem *pde_offset, | ||
| 921 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
| 922 | uint64_t addr_to_find); | ||
| 923 | |||
| 323 | 924 | ||
| 324 | static inline struct gk20a *get_gk20a(struct device *dev) { | 925 | static inline struct gk20a *get_gk20a(struct device *dev) { |
| 325 | // XXX: Only works because gk20a* is the first member of gk20a_platform | 926 | // XXX: Only works because gk20a* is the first member of gk20a_platform |
| 326 | return *((struct gk20a**)dev_get_drvdata(dev)); | 927 | return *((struct gk20a**)dev_get_drvdata(dev)); |
| 327 | } | 928 | } |
| 328 | 929 | ||
| 329 | // Functionally identical to nvgpu_readl() | 930 | // We us the data field of the proc_dir_entry ("PDE" in this function) to store |
| 931 | // our index into the g_nvdebug_state array | ||
| 932 | static inline int seq2gpuidx(struct seq_file *s) { | ||
| 933 | const struct file *f = s->file; | ||
| 934 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
| 935 | } | ||
| 936 | static inline int file2gpuidx(const struct file *f) { | ||
| 937 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
| 938 | } | ||
| 939 | static inline int file2parentgpuidx(const struct file *f) { | ||
| 940 | // Should be safe to call on ProcFS entries, as our parent should (?) | ||
| 941 | // still exist if we're called. If not, there are worse races in this | ||
| 942 | // module. | ||
| 943 | return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); | ||
| 944 | } | ||
| 945 | |||
| 946 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
| 947 | |||
| 948 | // Similar to nvgpu_readl() | ||
| 330 | // (except we don't try to resolve situations where regs is NULL) | 949 | // (except we don't try to resolve situations where regs is NULL) |
| 331 | static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | 950 | static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { |
| 332 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 951 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
| 333 | if (unlikely(!g_os->regs)) { | 952 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
| 334 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 953 | return -1; |
| 335 | return -1; | 954 | } |
| 336 | } | 955 | return readl(s->regs + r); |
| 337 | return readl(g_os->regs + r); | ||
| 338 | } | 956 | } |
| 339 | 957 | ||
| 340 | // quadword version of nvdebug_readl() | 958 | // quadword version of nvdebug_readl() |
| 341 | static inline u64 nvdebug_readq(struct gk20a* g, u32 r) { | 959 | static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
| 342 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 960 | u64 ret; |
| 343 | u64 ret; | 961 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
| 344 | if (unlikely(!g_os->regs)) { | 962 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
| 345 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 963 | return -1; |
| 346 | return -1; | 964 | } |
| 347 | } | ||
| 348 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | 965 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl |
| 349 | ret = readl(g_os->regs + r); | 966 | ret = readl(s->regs + r); |
| 350 | ret |= ((u64)readl(g_os->regs + r + 4)) << 32; | 967 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
| 351 | return ret; | 968 | return ret; |
| 352 | } | 969 | } |
| 353 | 970 | ||
| 354 | // Functionally identical to nvgpu_writel() | 971 | // Similar to nvgpu_writel() |
| 355 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | 972 | static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
| 356 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 973 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
| 357 | if (unlikely(!g_os->regs)) { | 974 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
| 975 | return; | ||
| 976 | } | ||
| 977 | writel_relaxed(v, s->regs + r); | ||
| 978 | wmb(); | ||
| 979 | } | ||
| 980 | |||
| 981 | // quadword version of nvdebug_writel() | ||
| 982 | // XXX: This probably doesn't work XXX: Untested | ||
| 983 | static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | ||
| 984 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
| 358 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); | 985 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
| 359 | return; | 986 | return; |
| 360 | } | 987 | } |
| 361 | writel_relaxed(v, g_os->regs + r); | 988 | writeq_relaxed(v, s->regs + r); |
| 362 | wmb(); | 989 | wmb(); |
| 363 | } | 990 | } |
