diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 719 |
1 files changed, 673 insertions, 46 deletions
@@ -5,14 +5,18 @@ | |||
5 | // TODO(jbakita): Don't depend on these. | 5 | // TODO(jbakita): Don't depend on these. |
6 | #include <nvgpu/gk20a.h> // For struct gk20a | 6 | #include <nvgpu/gk20a.h> // For struct gk20a |
7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux | 7 | #include <os/linux/os_linux.h> // For struct nvgpu_os_linux |
8 | #include <linux/proc_fs.h> // For PDE_DATA() macro | ||
8 | 9 | ||
9 | /* Runlist Channel | 10 | /* Runlist Channel |
10 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue | 11 | A timeslice group (TSG) is composed of channels. Each channel is a FIFO queue |
11 | of GPU commands. These commands are typically queued from userspace. | 12 | of GPU commands. These commands are typically queued from userspace. |
12 | 13 | ||
13 | `INST_PTR` points to a GPU Instance Block which contains pointers to the GPU | 14 | Prior to Volta, channels could also exist independent of a TSG. These are |
14 | virtual address space for this context. All channels in a TSG point to the | 15 | called "bare channels" in the Jetson nvgpu driver. |
15 | same GPU Instance Block (?). | 16 | |
17 | `INST_PTR` points to a GPU Instance Block which contains FIFO states, virtual | ||
18 | address space configuration for this context, and a pointer to the page | ||
19 | tables. All channels in a TSG point to the same GPU Instance Block (?). | ||
16 | 20 | ||
17 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and | 21 | "RUNQUEUE_SELECTOR determines to which runqueue the channel belongs, and |
18 | thereby which PBDMA will run the channel. Increasing values select | 22 | thereby which PBDMA will run the channel. Increasing values select |
@@ -30,7 +34,13 @@ | |||
30 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN | 34 | ENTRY_TYPE (T) : type of this entry: ENTRY_TYPE_CHAN |
31 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) | 35 | CHID (ID) : identifier of the channel to run (overlays ENTRY_ID) |
32 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if | 36 | RUNQUEUE_SELECTOR (Q) : selects which PBDMA should run this channel if |
33 | more than one PBDMA is supported by the runlist | 37 | more than one PBDMA is supported by the runlist, |
38 | additionally, "A value of 0 targets the first FE | ||
39 | pipe, which can process all FE driven engines: | ||
40 | Graphics, Compute, Inline2Memory, and TwoD. A value | ||
41 | of 1 targets the second FE pipe, which can only | ||
42 | process Compute work. Note that GRCE work is allowed | ||
43 | on either runqueue.)" | ||
34 | 44 | ||
35 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer | 45 | INST_PTR_LO : lower 20 bits of the 4k-aligned instance block pointer |
36 | INST_PTR_HI : upper 32 bit of instance block pointer | 46 | INST_PTR_HI : upper 32 bit of instance block pointer |
@@ -39,6 +49,9 @@ | |||
39 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer | 49 | USERD_PTR_LO : upper 24 bits of the low 32 bits, of the 512-byte-aligned USERD pointer |
40 | USERD_PTR_HI : upper 32 bits of USERD pointer | 50 | USERD_PTR_HI : upper 32 bits of USERD pointer |
41 | USERD_TARGET (TGU) : aperture of the USERD data structure | 51 | USERD_TARGET (TGU) : aperture of the USERD data structure |
52 | |||
53 | Channels were around since at least Fermi, but were rearranged with Volta to | ||
54 | add a USERD pointer, a longer INST pointer, and a runqueue selector flag. | ||
42 | */ | 55 | */ |
43 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; | 56 | enum ENTRY_TYPE {ENTRY_TYPE_CHAN = 0, ENTRY_TYPE_TSG = 1}; |
44 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; | 57 | enum INST_TARGET {TARGET_VID_MEM = 0, TARGET_SYS_MEM_COHERENT = 2, TARGET_SYS_MEM_NONCOHERENT = 3}; |
@@ -52,11 +65,12 @@ static inline char* target_to_text(enum INST_TARGET t) { | |||
52 | return "SYS_MEM_NONCOHERENT"; | 65 | return "SYS_MEM_NONCOHERENT"; |
53 | default: | 66 | default: |
54 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | 67 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); |
55 | return NULL; | 68 | return "INVALID"; |
56 | } | 69 | } |
57 | } | 70 | } |
58 | 71 | ||
59 | struct runlist_chan { | 72 | // Support: Volta, Ampere, Turing |
73 | struct gv100_runlist_chan { | ||
60 | // 0:63 | 74 | // 0:63 |
61 | enum ENTRY_TYPE entry_type:1; | 75 | enum ENTRY_TYPE entry_type:1; |
62 | uint32_t runqueue_selector:1; | 76 | uint32_t runqueue_selector:1; |
@@ -71,6 +85,20 @@ struct runlist_chan { | |||
71 | uint32_t inst_ptr_hi:32; | 85 | uint32_t inst_ptr_hi:32; |
72 | } __attribute__((packed)); | 86 | } __attribute__((packed)); |
73 | 87 | ||
88 | // Support: Fermi, Kepler*, Maxwell, Pascal | ||
89 | // *In Kepler, inst fields may be unpopulated? | ||
90 | struct gm107_runlist_chan { | ||
91 | uint32_t chid:12; | ||
92 | uint32_t padding0:1; | ||
93 | enum ENTRY_TYPE entry_type:1; | ||
94 | uint32_t padding1:18; | ||
95 | uint32_t inst_ptr_lo:20; | ||
96 | enum INST_TARGET inst_target:2; // Totally guessing on this | ||
97 | uint32_t padding2:10; | ||
98 | } __attribute__((packed)); | ||
99 | |||
100 | #define gk110_runlist_chan gm107_runlist_chan | ||
101 | |||
74 | /* Runlist TSG (TimeSlice Group) | 102 | /* Runlist TSG (TimeSlice Group) |
75 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds | 103 | The runlist is composed of timeslice groups (TSG). Each TSG corresponds |
76 | to a single virtual address space on the GPU and contains `TSG_LENGTH` | 104 | to a single virtual address space on the GPU and contains `TSG_LENGTH` |
@@ -85,8 +113,15 @@ struct runlist_chan { | |||
85 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice | 113 | TIMESLICE_TIMEOUT : timeout amount for the TSG's timeslice |
86 | TSG_LENGTH : number of channels that are part of this timeslice group | 114 | TSG_LENGTH : number of channels that are part of this timeslice group |
87 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) | 115 | TSGID : identifier of the Timeslice group (overlays ENTRY_ID) |
116 | |||
117 | TSGs appear to have been introduced with Kepler and stayed the same until | ||
118 | they were rearranged at the time of channel rearrangement to support longer | ||
119 | GPU instance addresses with Volta. | ||
88 | */ | 120 | */ |
89 | struct entry_tsg { | 121 | |
122 | // Support: Volta, Ampere*, Turing* | ||
123 | // *These treat the top 8 bits of TSGID as GFID (unused) | ||
124 | struct gv100_runlist_tsg { | ||
90 | // 0:63 | 125 | // 0:63 |
91 | enum ENTRY_TYPE entry_type:1; | 126 | enum ENTRY_TYPE entry_type:1; |
92 | uint64_t padding:15; | 127 | uint64_t padding:15; |
@@ -101,14 +136,28 @@ struct entry_tsg { | |||
101 | } __attribute__((packed)); | 136 | } __attribute__((packed)); |
102 | #define MAX_TSGID (1 << 12) | 137 | #define MAX_TSGID (1 << 12) |
103 | 138 | ||
139 | // Support: Kepler (v2?), Maxwell, Pascal | ||
140 | // Same fields as Volta except tsg_length is 6 bits rather than 8 | ||
141 | // Last 32 bits appear to contain an undocumented inst ptr | ||
142 | struct gk110_runlist_tsg { | ||
143 | uint32_t tsgid:12; | ||
144 | uint32_t padding0:1; | ||
145 | enum ENTRY_TYPE entry_type:1; | ||
146 | uint32_t timeslice_scale:4; | ||
147 | uint32_t timeslice_timeout:8; | ||
148 | uint32_t tsg_length:6; | ||
149 | uint32_t padding1:32; | ||
150 | } __attribute__((packed)); | ||
151 | |||
152 | |||
104 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; | 153 | enum PREEMPT_TYPE {PREEMPT_TYPE_CHANNEL = 0, PREEMPT_TYPE_TSG = 1}; |
105 | 154 | ||
106 | /* Preempt a TSG or Channel by ID | 155 | /* Preempt a TSG or Channel by ID |
107 | ID/CHID : Id of TSG or channel to preempt | 156 | ID/CHID : Id of TSG or channel to preempt |
108 | IS_PENDING : ???? | 157 | IS_PENDING : Is a context switch pending? |
109 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG | 158 | TYPE : PREEMPT_TYPE_CHANNEL or PREEMPT_TYPE_TSG |
110 | 159 | ||
111 | Support: Kepler, Maxwell, Pascal, Volta | 160 | Support: Kepler, Maxwell, Pascal, Volta, Turing |
112 | */ | 161 | */ |
113 | #define NV_PFIFO_PREEMPT 0x00002634 | 162 | #define NV_PFIFO_PREEMPT 0x00002634 |
114 | typedef union { | 163 | typedef union { |
@@ -195,26 +244,36 @@ typedef union { | |||
195 | */ | 244 | */ |
196 | 245 | ||
197 | // Note: This is different with Turing | 246 | // Note: This is different with Turing |
198 | // Support: Kepler, Maxwell, Pascal, Volta | 247 | // Support: Fermi, Kepler, Maxwell, Pascal, Volta |
199 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 | 248 | #define NV_PFIFO_RUNLIST_BASE 0x00002270 |
249 | #define NV_PFIFO_ENG_RUNLIST_BASE(i) (0x00002280+(i)*8) | ||
200 | typedef union { | 250 | typedef union { |
201 | struct { | 251 | struct { |
202 | uint32_t ptr:28; | 252 | uint32_t ptr:28; |
203 | uint32_t type:2; | 253 | enum INST_TARGET target:2; |
204 | uint32_t padding:2; | 254 | uint32_t padding:2; |
205 | } __attribute__((packed)); | 255 | } __attribute__((packed)); |
206 | uint32_t raw; | 256 | uint32_t raw; |
207 | } runlist_base_t; | 257 | } runlist_base_t; |
208 | 258 | ||
209 | // Support: Kepler, Maxwell, Pascal, Volta | 259 | // Support: Kepler, Maxwell, Pascal, Volta |
260 | // Works on Fermi, but id is one bit longer and is b11111 | ||
210 | #define NV_PFIFO_RUNLIST 0x00002274 | 261 | #define NV_PFIFO_RUNLIST 0x00002274 |
262 | #define NV_PFIFO_ENG_RUNLIST(i) (0x00002284+(i)*8) | ||
211 | typedef union { | 263 | typedef union { |
264 | // RUNLIST fields | ||
212 | struct { | 265 | struct { |
213 | uint32_t len:16; | 266 | uint32_t len:16; |
214 | uint32_t padding:4; | 267 | uint32_t padding:4; |
215 | uint32_t id:4; | 268 | uint32_t id:4; // Runlist ID (each engine may have a seperate runlist) |
216 | uint32_t padding2:8; | 269 | uint32_t padding2:8; |
217 | } __attribute__((packed)); | 270 | } __attribute__((packed)); |
271 | // ENG_RUNLIST fields that differ | ||
272 | struct { | ||
273 | uint32_t padding3:20; | ||
274 | bool is_pending:1; // Is runlist not yet committed? | ||
275 | uint32_t padding4:11; | ||
276 | } __attribute__((packed)); | ||
218 | uint32_t raw; | 277 | uint32_t raw; |
219 | } runlist_info_t; | 278 | } runlist_info_t; |
220 | 279 | ||
@@ -301,63 +360,631 @@ typedef union { | |||
301 | uint32_t raw; | 360 | uint32_t raw; |
302 | } runlist_disable_t; | 361 | } runlist_disable_t; |
303 | 362 | ||
363 | /* Read GPU descriptors from the Master Controller (MC) | ||
364 | |||
365 | MINOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
366 | MAJOR_REVISION : Legacy (only used with Celvin in Nouveau) | ||
367 | IMPLEMENTATION : Which implementation of the GPU architecture | ||
368 | ARCHITECTURE : Which GPU architecture | ||
369 | |||
370 | CHIP_ID = IMPLEMENTATION + ARCHITECTURE << 4 | ||
371 | CHIP_ID : Unique ID of all chips since Kelvin | ||
372 | |||
373 | Support: Kelvin, Rankline, Curie, Tesla, Fermi, Kepler, Maxwell, Pascal, | ||
374 | Volta, Turing, Ampere | ||
375 | */ | ||
376 | #define NV_MC_BOOT_0 0x00000000 | ||
377 | #define NV_CHIP_ID_GP106 0x136 // Discrete GeForce GTX 1060 | ||
378 | #define NV_CHIP_ID_GV11B 0x15B // Jetson Xavier embedded GPU | ||
379 | #define NV_CHIP_ID_KEPLER 0x0E0 | ||
380 | #define NV_CHIP_ID_VOLTA 0x140 | ||
381 | |||
382 | inline static const char* ARCH2NAME(uint32_t arch) { | ||
383 | switch (arch) { | ||
384 | case 0x01: | ||
385 | return "Celsius"; | ||
386 | case 0x02: | ||
387 | return "Kelvin"; | ||
388 | case 0x03: | ||
389 | return "Rankline"; | ||
390 | case 0x04: | ||
391 | case 0x06: // 0x06 is (nForce 6XX integrated only) | ||
392 | return "Curie"; | ||
393 | // 0x07 is unused/skipped | ||
394 | case 0x05: // First Tesla card was released before the nForce 6XX | ||
395 | case 0x08: | ||
396 | case 0x09: | ||
397 | case 0x0A: | ||
398 | return "Tesla"; | ||
399 | // 0x0B is unused/skipped | ||
400 | case 0x0C: | ||
401 | case 0x0D: | ||
402 | return "Fermi"; | ||
403 | case 0x0E: | ||
404 | case 0x0F: | ||
405 | case 0x11: | ||
406 | return "Kepler"; | ||
407 | case 0x12: | ||
408 | return "Maxwell"; | ||
409 | case 0x13: | ||
410 | return "Pascal"; | ||
411 | case 0x14: | ||
412 | case 0x15: // Volta integrated | ||
413 | return "Volta"; | ||
414 | case 0x16: | ||
415 | return "Turing"; | ||
416 | case 0x17: | ||
417 | return "Ampere"; | ||
418 | case 0x18: | ||
419 | case 0x19: | ||
420 | return "Hopper (?) or Lovelace (?)"; | ||
421 | default: | ||
422 | if (arch < 0x19) | ||
423 | return "[unknown historical architecture]"; | ||
424 | else | ||
425 | return "[future]"; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | typedef union { | ||
430 | // Fields as defined in the NVIDIA reference | ||
431 | struct { | ||
432 | uint32_t minor_revision:4; | ||
433 | uint32_t major_revision:4; | ||
434 | uint32_t reserved:4; | ||
435 | uint32_t padding0:8; | ||
436 | uint32_t implementation:4; | ||
437 | uint32_t architecture:5; | ||
438 | uint32_t padding1:3; | ||
439 | } __attribute__((packed)); | ||
440 | uint32_t raw; | ||
441 | // Arch << 4 + impl is also often used | ||
442 | struct { | ||
443 | uint32_t padding2:20; | ||
444 | uint32_t chip_id:9; | ||
445 | uint32_t padding3:3; | ||
446 | } __attribute__((packed)); | ||
447 | } mc_boot_0_t; | ||
448 | |||
449 | enum DEVICE_INFO_TYPE {INFO_TYPE_NOT_VALID = 0, INFO_TYPE_DATA = 1, INFO_TYPE_ENUM = 2, INFO_TYPE_ENGINE_TYPE = 3}; | ||
450 | enum ENGINE_TYPES { | ||
451 | ENGINE_GRAPHICS = 0, // GRAPHICS [/compute] | ||
452 | ENGINE_COPY0 = 1, // [raw/physical] COPY #0 | ||
453 | ENGINE_COPY1 = 2, // [raw/physical] COPY #1 | ||
454 | ENGINE_COPY2 = 3, // [raw/physical] COPY #2 | ||
455 | |||
456 | ENGINE_MSPDEC = 8, // Picture DECoder | ||
457 | ENGINE_MSPPP = 9, // [Video] Post Processing | ||
458 | ENGINE_MSVLD = 10, // [Video] Variable Length Decoder | ||
459 | ENGINE_MSENC = 11, // [Video] ENCoding | ||
460 | ENGINE_VIC = 12, // Video Image Compositor | ||
461 | ENGINE_SEC = 13, // SEquenCer [?] | ||
462 | ENGINE_NVENC0 = 14, // Nvidia Video ENCoder #0 | ||
463 | ENGINE_NVENC1 = 15, // Nvidia Video ENCoder #1 | ||
464 | ENGINE_NVDEC = 16, // Nvidia Video DECoder | ||
465 | |||
466 | ENGINE_IOCTRL = 18, // I/O ConTRoLler [of NVLINK at least] | ||
467 | ENGINE_LCE = 19, // Logical Copy Engine | ||
468 | ENGINE_GSP = 20, // Gpu System Processor | ||
469 | ENGINE_NVJPG = 21, // NVidia JPeG [Decoder] (Ampere+) | ||
470 | }; | ||
471 | #define ENGINE_TYPES_LEN 22 | ||
472 | static const char* const ENGINE_TYPES_NAMES[ENGINE_TYPES_LEN] = { | ||
473 | "Graphics/Compute", | ||
474 | "COPY0", | ||
475 | "COPY1", | ||
476 | "COPY2", | ||
477 | "Unknown Engine ID#4", | ||
478 | "Unknown Engine ID#5", | ||
479 | "Unknown Engine ID#6", | ||
480 | "Unknown Engine ID#7", | ||
481 | "MSPDEC: Picture Decoder", | ||
482 | "MSPPP: Post Processing", | ||
483 | "MSVLD: Variable Length Decoder", | ||
484 | "MSENC: Encoder", | ||
485 | "VIC: Video Image Compositor", | ||
486 | "SEC: Sequencer", | ||
487 | "NVENC0: NVIDIA Video Encoder #0", | ||
488 | "NVENC1: NVIDIA Video Encoder #1", | ||
489 | "NVDEC: NVIDIA Video Decoder", | ||
490 | "Unknown Engine ID#17", | ||
491 | "IOCTRL: I/O Controller", | ||
492 | "LCE: Logical Copy Engine", | ||
493 | "GSP: GPU System Processor", | ||
494 | "NVJPG: NVIDIA JPEG Decoder", | ||
495 | }; | ||
496 | |||
497 | /* GPU engine information and control register offsets | ||
498 | Each engine is described by one or more entries (terminated by an entry with | ||
499 | the `has_next_entry` flag unset) in the fixed-size PTOP_DEVICE_INFO table. A | ||
500 | typical device, such as the graphics/compute engine and any copy engines, are | ||
501 | described by three entries, one of each type. | ||
502 | |||
503 | The PTOP_DEVICE_INFO table is sparsely populated (entries of type | ||
504 | INFO_TYPE_NOT_VALID may be intermingled with valid entries), so any traversal | ||
505 | code should check all NV_PTOP_DEVICE_INFO__SIZE_1 entries and not terminate | ||
506 | upon reaching the first entry of INFO_TYPE_NOT_VALID. | ||
507 | |||
508 | INFO_TYPE : Is this a DATA, ENUM, or ENGINE_TYPE table entry? | ||
509 | HAS_NEXT_ENTRY : Does the following entry refer to the same engine? | ||
510 | |||
511 | == INFO_TYPE_DATA fields == | ||
512 | PRI_BASE : BAR0 base = (PRI_BASE << 12) aka 4k aligned. | ||
513 | INST_ID : "Note that some instanced [engines] (such as logical copy | ||
514 | engines aka LCE) share a PRI_BASE across all [engines] of | ||
515 | the same engine type; such [engines] require an additional | ||
516 | offset: instanced base = BAR0 base + stride * INST_ID. | ||
517 | FAULT_ID_IS_VALID : Does this engine have its own bind point and fault ID | ||
518 | with the MMU? | ||
519 | FAULT_ID : "The MMU fault id used by this [engine]. These IDs | ||
520 | correspond to the NV_PFAULT_MMU_ENG_ID define list." | ||
521 | |||
522 | == INFO_TYPE_ENUM fields == | ||
523 | ENGINE_IS_VALID : Is this engine a host engine? | ||
524 | ENGINE_ENUM : "[T]he host engine ID for the current [engine] if it is | ||
525 | a host engine, meaning Host can send methods to the | ||
526 | engine. This id is used to index into any register array | ||
527 | whose __SIZE_1 is equal to NV_HOST_NUM_ENGINES. A given | ||
528 | ENGINE_ENUM can be present for at most one device in the | ||
529 | table. Devices corresponding to all ENGINE_ENUM ids 0 | ||
530 | through NV_HOST_NUM_ENGINES - 1 must be present in the | ||
531 | device info table." | ||
532 | RUNLIST_IS_VALID : Is this engine a host engine with a runlist? | ||
533 | RUNLIST_ENUM : "[T]he Host runlist ID on which methods for the current | ||
534 | [engine] should be submitted... The runlist id is used to | ||
535 | index into any register array whose __SIZE_1 is equal to | ||
536 | NV_HOST_NUM_RUNLISTS. [Engines] corresponding to all | ||
537 | RUNLIST_ENUM ids 0 through NV_HOST_NUM_RUNLISTS - 1 must | ||
538 | be present in the device info table." | ||
539 | INTR_IS_VALID : Does this device have an interrupt? | ||
540 | INTR_ENUM : Interrupt ID for use with "the NV_PMC_INTR_*_DEVICE | ||
541 | register bitfields." | ||
542 | RESET_IS_VALID : Does this engine have a reset ID? | ||
543 | RESET_ENUM : Reset ID for use indexing the "NV_PMC_ENABLE_DEVICE(i) | ||
544 | and NV_PMC_ELPG_ENABLE_DEVICE(i) register bitfields." | ||
545 | |||
546 | == INFO_TYPE_ENGINE_TYPE fields == | ||
547 | ENGINE_TYPE : What type of engine is this? (see ENGINE_TYPES_NAMES) | ||
548 | |||
549 | Support: Kepler, Maxwell, Pascal, Volta, Ampere | ||
550 | See dev_top.ref.txt of NVIDIA's open-gpu-doc for more info. | ||
551 | */ | ||
552 | #define NV_PTOP_DEVICE_INFO(i) (0x00022700+(i)*4) | ||
553 | #define NV_PTOP_DEVICE_INFO__SIZE_1 64 | ||
554 | typedef union { | ||
555 | // DATA type fields | ||
556 | struct { | ||
557 | enum DEVICE_INFO_TYPE info_type:2; | ||
558 | bool fault_id_is_valid:1; | ||
559 | uint32_t fault_id:7; | ||
560 | uint32_t padding0:2; | ||
561 | uint32_t pri_base:12; | ||
562 | uint32_t padding1:2; | ||
563 | uint32_t inst_id:4; | ||
564 | uint32_t is_not_enum2:1; | ||
565 | bool has_next_entry:1; | ||
566 | } __attribute__((packed)); | ||
567 | // ENUM type fields | ||
568 | struct { | ||
569 | uint32_t padding2:2; | ||
570 | bool reset_is_valid:1; | ||
571 | bool intr_is_valid:1; | ||
572 | bool runlist_is_valid:1; | ||
573 | bool engine_is_valid:1; | ||
574 | uint32_t padding3:3; | ||
575 | uint32_t reset_enum:5; | ||
576 | uint32_t padding4:1; | ||
577 | uint32_t intr_enum:5; | ||
578 | uint32_t padding5:1; | ||
579 | uint32_t runlist_enum:4; | ||
580 | uint32_t padding6:1; | ||
581 | uint32_t engine_enum:4; | ||
582 | uint32_t padding7:2; | ||
583 | } __attribute__((packed)); | ||
584 | // ENGINE_TYPE type fields | ||
585 | struct { | ||
586 | uint32_t padding8:2; | ||
587 | enum ENGINE_TYPES engine_type:29; | ||
588 | uint32_t padding9:1; | ||
589 | } __attribute__((packed)); | ||
590 | uint32_t raw; | ||
591 | } ptop_device_info_t; | ||
592 | |||
593 | #define NV_PTOP_SCAL_NUM_GPCS 0x00022430 | ||
594 | #define NV_PTOP_SCAL_NUM_TPC_PER_GPC 0x00022434 | ||
595 | #define NV_PTOP_SCAL_NUM_CES 0x00022444 | ||
596 | // PCE_MAP is Volta+ only | ||
597 | #define NV_CE_PCE_MAP 0x00104028 | ||
598 | |||
599 | // GPC and TPC masks | ||
600 | // Support: Maxwell+ | ||
601 | #define NV_FUSE_GPC 0x00021c1c | ||
602 | #define NV_FUSE_TPC_FOR_GPC(i) (0x00021c38+(i)*4) | ||
603 | |||
604 | /* Location of the 1Kb instance block with page tables for BAR1 and BAR2. | ||
605 | Support: Fermi+ (?), Pascal | ||
606 | */ | ||
607 | #define NV_PBUS_BAR1_BLOCK 0x00001704 | ||
608 | #define NV_PBUS_BAR2_BLOCK 0x00001714 | ||
609 | typedef union { | ||
610 | struct { | ||
611 | uint32_t ptr:28; | ||
612 | enum INST_TARGET target:2; | ||
613 | uint32_t padding0:1; | ||
614 | bool is_virtual:1; | ||
615 | } __attribute__((packed)); | ||
616 | uint32_t raw; | ||
617 | struct { | ||
618 | uint32_t map:30; | ||
619 | uint32_t padding1:2; | ||
620 | } __attribute__((packed)); | ||
621 | } bar_config_block_t; | ||
622 | |||
623 | /* BAR0 PRAMIN (Private RAM Instance) window configuration | ||
624 | |||
625 | BASE : Base of window >> 16 in [TARGET] virtual address space | ||
626 | TARGET : Which address space BASE points into | ||
627 | |||
628 | Note: This seems to be set to 0x0bff00000 - 0x0c0000000 at least sometimes | ||
629 | |||
630 | Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
631 | */ | ||
632 | #define NV_PBUS_BAR0_WINDOW 0x00001700 | ||
633 | #define NV_PRAMIN 0x00700000 // Goes until 0x00800000 (1MB window) | ||
634 | #define NV_PRAMIN_LEN 0x00100000 | ||
635 | typedef union { | ||
636 | struct { | ||
637 | uint32_t base:24; | ||
638 | enum INST_TARGET target:2; | ||
639 | uint32_t padding0:6; | ||
640 | } __attribute__((packed)); | ||
641 | uint32_t raw; | ||
642 | } bar0_window_t; | ||
643 | |||
644 | // Support: Tesla 2.0, Fermi, Kepler, Maxwell, Pascal, Turing, Ampere | ||
645 | #define NV_PRAMIN_PDB_CONFIG_OFF 0x200 | ||
646 | typedef union { | ||
647 | struct { | ||
648 | uint32_t target:2; | ||
649 | uint32_t vol:1; | ||
650 | uint32_t padding0:1; | ||
651 | uint32_t fault_replay_tex:1; | ||
652 | uint32_t fault_replay_gcc:1; | ||
653 | uint32_t padding1:4; | ||
654 | bool is_ver2:1; | ||
655 | bool is_64k_big_page:1; // 128Kb otherwise | ||
656 | uint32_t page_dir_lo:20; | ||
657 | uint32_t page_dir_hi:32; | ||
658 | } __attribute__((packed)); | ||
659 | uint64_t raw; | ||
660 | } page_dir_config_t; | ||
661 | |||
662 | /* Page directory entry | ||
663 | |||
664 | Note: Format changed with Pascal (how?) | ||
665 | |||
666 | Support: Pascal, Volta, Turing, Ampere | ||
667 | */ | ||
668 | // FIXME: PDE/PTEs are actually 64 bits =S | ||
669 | // Important: Aperture keys are different with PDEs | ||
670 | enum PD_TARGET { | ||
671 | PD_AND_TARGET_INVALID = 0, // b000 | ||
672 | PD_AND_TARGET_VID_MEM = 2, // b010 | ||
673 | PD_AND_TARGET_SYS_MEM_COHERENT = 4, // b100 | ||
674 | PD_AND_TARGET_SYS_MEM_NONCOHERENT = 6, // b110 | ||
675 | PTE_AND_TARGET_VID_MEM = 1, // b001 | ||
676 | PTE_AND_TARGET_PEER = 3, // b011 | ||
677 | PTE_AND_TARGET_SYS_MEM_COHERENT = 5, // b101 | ||
678 | PTE_AND_TARGET_SYS_MEM_NONCOHERENT = 7, // b111 | ||
679 | }; | ||
680 | static inline char* pd_target_to_text(enum PD_TARGET t) { | ||
681 | switch (t) { | ||
682 | case PD_AND_TARGET_INVALID: | ||
683 | return "INVALID"; | ||
684 | case PD_AND_TARGET_VID_MEM: | ||
685 | case PTE_AND_TARGET_VID_MEM: | ||
686 | return "VID_MEM"; | ||
687 | case PTE_AND_TARGET_PEER: | ||
688 | return "PEER"; | ||
689 | case PD_AND_TARGET_SYS_MEM_COHERENT: | ||
690 | case PTE_AND_TARGET_SYS_MEM_COHERENT: | ||
691 | return "SYS_MEM_COHERENT"; | ||
692 | case PD_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
693 | case PTE_AND_TARGET_SYS_MEM_NONCOHERENT: | ||
694 | return "SYS_MEM_NONCOHERENT"; | ||
695 | default: | ||
696 | printk(KERN_WARNING "[nvdebug] Invalid aperture!\n"); | ||
697 | return NULL; | ||
698 | } | ||
699 | } | ||
700 | |||
701 | // PDE/PTE V2 type | ||
702 | // Note: As the meaning of target (bits 2:1) changes depending on if the entry | ||
703 | // is a PTE or not, this combines them into a single target field to | ||
704 | // simplify comparisons. | ||
705 | // Support: Pascal, Turing, Ampere | ||
706 | typedef union { | ||
707 | // Page Directory Entry (PDE) | ||
708 | struct { | ||
709 | bool is_pte:1; | ||
710 | uint32_t __target:2; | ||
711 | bool is_volatile:1; | ||
712 | uint32_t padding1:4; | ||
713 | uint32_t addr:24; | ||
714 | } __attribute__((packed)); | ||
715 | // Page Table Entry (PTE) | ||
716 | struct { | ||
717 | enum PD_TARGET target:3; | ||
718 | uint32_t __is_volatile:1; | ||
719 | bool is_encrypted:1; | ||
720 | bool is_privileged:1; | ||
721 | bool is_readonly:1; | ||
722 | bool atomics_disabled:1; | ||
723 | uint32_t __addr:24; | ||
724 | } __attribute__((packed)); | ||
725 | uint32_t raw; | ||
726 | } page_dir_entry_t; | ||
727 | |||
728 | // PDE/PTE V1 types | ||
729 | // Support: Fermi, Kepler, Maxwell | ||
730 | enum V1_PD_TARGET { | ||
731 | PD_TARGET_INVALID = 0, | ||
732 | PD_TARGET_VID_MEM = 1, | ||
733 | PD_TARGET_SYS_MEM_COHERENT = 2, | ||
734 | PD_TARGET_SYS_MEM_NONCOHERENT = 3, | ||
735 | }; | ||
736 | // Page Directory Entry (PDE) | ||
737 | typedef union { | ||
738 | // Large page fields | ||
739 | struct { | ||
740 | // 0:32 | ||
741 | enum V1_PD_TARGET target:2; | ||
742 | uint32_t padding0:2; | ||
743 | uint64_t addr:28; // May be wider? | ||
744 | // 32:63 | ||
745 | uint32_t padding2:3; | ||
746 | uint32_t is_volatile:1; // Might have counted wrong? | ||
747 | uint32_t padding3:28; | ||
748 | } __attribute__((packed)); | ||
749 | // Small page fields | ||
750 | struct { | ||
751 | // 0:32 | ||
752 | uint32_t padding00:32; | ||
753 | // 32:63 | ||
754 | enum V1_PD_TARGET alt_target:2; | ||
755 | uint32_t alt_is_volatile:1; // Might have counted wrong? | ||
756 | uint32_t padding03:1; | ||
757 | uint64_t alt_addr:28; | ||
758 | } __attribute__((packed)); | ||
759 | uint64_t raw; | ||
760 | } page_dir_entry_v1_t; | ||
761 | // Page Table Entry (PTE) | ||
762 | // Reconstructed from info in Jetson nvgpu driver | ||
763 | typedef union { | ||
764 | struct { | ||
765 | // 0:32 | ||
766 | bool is_present:1; | ||
767 | bool is_privileged:1; | ||
768 | bool is_readonly:1; | ||
769 | uint32_t padding0:1; | ||
770 | uint64_t addr:28; | ||
771 | // 32:63 | ||
772 | bool is_volatile:1; | ||
773 | enum INST_TARGET:2; | ||
774 | uint32_t padding1:1; | ||
775 | uint32_t kind:8; | ||
776 | uint32_t comptag:17; | ||
777 | uint32_t padding2:1; | ||
778 | bool is_read_disabled:1; | ||
779 | bool is_write_disabled:1; | ||
780 | } __attribute__((packed)); | ||
781 | uint64_t raw; | ||
782 | } page_tbl_entry_v1_t; | ||
783 | //enum V0_PDE_TYPE {NOT_PRESENT = 0, PAGE_64K = 1, PAGE_16K = 2, PAGE_4K = 3}; | ||
784 | //enum V0_PDE_SIZE {PDE_SZ_128K = 0, PDE_SZ_32K = 1, PDE_SZ_16K = 2, PDE_SZ_8K = 3}; | ||
785 | //static const int V0_PDE_SIZE2NUM[4] = {128*1024, 32*1024, 16*1024, 8*1024}; | ||
786 | /* PDE V0 (nv50/Tesla) | ||
787 | typedef union { | ||
788 | struct { | ||
789 | enum V1_PDE_TYPE type:2; | ||
790 | enum INST_TARGET target:2; | ||
791 | uint32_t padding0:1; | ||
792 | enum V1_PDE_SIZE sublevel_size:2; | ||
793 | uint32_t padding1:5; | ||
794 | uint32_t addr:28; | ||
795 | uint32_t padding2:24; | ||
796 | } __attribute__((packed)); | ||
797 | uint64_t raw; | ||
798 | } page_dir_entry_v1_t;*/ | ||
799 | /* PTE V0 (nv50) | ||
800 | typedef union { | ||
801 | struct { | ||
802 | bool is_present:1; | ||
803 | uint32_t padding3:2; | ||
804 | bool is_readonly:1; | ||
805 | enum INST_TARGET target:2; | ||
806 | bool is_privileged:1; | ||
807 | uint32_t contig_blk_sz:3; | ||
808 | uint32_t padding4:2; | ||
809 | uint32_t addr:28; | ||
810 | uint32_t storage_type:7; // ??? | ||
811 | uint32_t compression_mode:2; // ??? | ||
812 | uint32_t compression_tag:12; // ??? | ||
813 | bool is_long_partition_cycle:1; // ??? | ||
814 | bool is_encrypted:1; | ||
815 | uint32_t padding5:1; | ||
816 | } __attribute__((packed)); | ||
817 | uint64_t raw; | ||
818 | } page_tbl_entry_v1_t;*/ | ||
819 | |||
304 | // TODO(jbakita): Maybe put the above GPU types in a different file. | 820 | // TODO(jbakita): Maybe put the above GPU types in a different file. |
305 | 821 | ||
306 | #define for_chan_in_tsg(chan, tsg) \ | 822 | #define NV_PCI_VENDOR 0x10de |
307 | for (chan = (struct runlist_chan*)(tsg + 1); \ | 823 | struct nvdebug_state { |
308 | (void*)chan < (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length; \ | 824 | // Pointer to the mapped base address of the GPU control registers (obtained |
309 | chan++) | 825 | // via ioremap() originally). For embedded GPUs, we extract this from their |
826 | // struct nvgpu_os_linux. For discrete GPUs, we create our own mapping of | ||
827 | // BAR0 with pci_iomap(). Access via nvgpu_readl/writel functions. | ||
828 | void __iomem *regs; | ||
829 | // Depending on the architecture, BAR2 or BAR3 are used to access PRAMIN | ||
830 | union { | ||
831 | void __iomem *bar2; | ||
832 | void __iomem *bar3; | ||
833 | }; | ||
834 | int chip_id; | ||
835 | // Additional state from the built-in driver. Only set iff | ||
836 | // chip_id == NV_CHIP_ID_GV11B | ||
837 | struct gk20a *g; | ||
838 | // Pointer to PCI device needed for pci_iounmap | ||
839 | struct pci_dev *pcid; | ||
840 | }; | ||
841 | |||
842 | /*const struct runlist_funcs { | ||
843 | u8 size; | ||
844 | enum ENTRY_TYPE (*entry_type)(struct nvdebug_state *, void *); | ||
845 | uint32_t (*chid)(struct nvdebug_state *, void *); | ||
846 | uint32_t (*inst_ptr_lo)(struct nvdebug_state *, void *); | ||
847 | enum INST_TARGET (*inst_target)(struct nvdebug_state *, void *): | ||
848 | uint32_t (*tsgid)(struct nvdebug_state *, void *); | ||
849 | uint32_t (*timeslice_scale)(struct nvdebug_state *, void *); | ||
850 | uint32_t (*timeslice_timeout)(struct nvdebug_state *, void *); | ||
851 | uint32_t (*tsg_length)(struct nvdebug_state *, void *); | ||
852 | };*/ | ||
853 | |||
854 | // This disgusting macro is a crutch to work around the fact that runlists were | ||
855 | // different prior to Volta. | ||
856 | #define VERSIONED_RL_ACCESSOR(_ENTRY_TYPE, type, prop) \ | ||
857 | __attribute__((unused)) \ | ||
858 | static type (prop)(const struct nvdebug_state *g, const void *raw) { \ | ||
859 | if (g->chip_id > NV_CHIP_ID_VOLTA) { \ | ||
860 | const struct gv100_runlist_ ## _ENTRY_TYPE *entry = (struct gv100_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
861 | return entry->prop; \ | ||
862 | } else if (g->chip_id > NV_CHIP_ID_KEPLER) { \ | ||
863 | const struct gk110_runlist_ ## _ENTRY_TYPE *entry = (struct gk110_runlist_ ## _ENTRY_TYPE*)raw; \ | ||
864 | return entry->prop; \ | ||
865 | } else { \ | ||
866 | printk(KERN_WARNING "[nvdebug] " #prop " unavailable on GPU ID %x, which is older than Kepler.\n", g->chip_id); \ | ||
867 | return (type)0; \ | ||
868 | } \ | ||
869 | } | ||
870 | |||
871 | VERSIONED_RL_ACCESSOR(chan, uint32_t, chid); | ||
872 | VERSIONED_RL_ACCESSOR(chan, uint32_t, inst_ptr_lo); | ||
873 | VERSIONED_RL_ACCESSOR(chan, enum INST_TARGET, inst_target); | ||
874 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsgid); | ||
875 | VERSIONED_RL_ACCESSOR(tsg, enum ENTRY_TYPE, entry_type); | ||
876 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_scale); | ||
877 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, timeslice_timeout); | ||
878 | VERSIONED_RL_ACCESSOR(tsg, uint32_t, tsg_length); | ||
310 | 879 | ||
311 | #define next_tsg(tsg) \ | 880 | |
312 | (void*)(tsg + 1) + sizeof(struct runlist_chan) * tsg->tsg_length | 881 | #define NV_RL_ENTRY_SIZE(g) \ |
882 | ((g)->chip_id >= NV_CHIP_ID_VOLTA ? sizeof(struct gv100_runlist_tsg) : sizeof(struct gk110_runlist_tsg)) | ||
883 | |||
884 | #define for_chan_in_tsg(g, chan, tsg) \ | ||
885 | for (chan = (typeof(chan))(((u8*)tsg) + NV_RL_ENTRY_SIZE(g)); \ | ||
886 | (u8*)chan < ((u8*)tsg) + (1 + tsg_length(g, tsg)) * NV_RL_ENTRY_SIZE(g); \ | ||
887 | chan = (typeof(chan))(((u8*)chan) + NV_RL_ENTRY_SIZE(g))) | ||
888 | |||
889 | #define next_tsg(g, tsg) \ | ||
890 | (typeof(tsg))((u8*)(tsg) + NV_RL_ENTRY_SIZE(g) * (tsg_length(g, tsg) + 1)) | ||
313 | 891 | ||
314 | struct runlist_iter { | 892 | struct runlist_iter { |
315 | struct entry_tsg *curr_tsg; | 893 | // Pointer to either a TSG or channel entry (they're the same size) |
894 | void *curr_entry; | ||
895 | // This should be set to tsg_length when a TSG is reached, and | ||
896 | // decremented as each subsequent channel is printed. This allows us to | ||
897 | // track which channel are and are not part of the TSG. | ||
898 | int channels_left_in_tsg; | ||
899 | // Total runlist length, etc | ||
316 | runlist_info_t rl_info; | 900 | runlist_info_t rl_info; |
317 | }; | 901 | }; |
318 | 902 | ||
903 | #define NVDEBUG_MAX_DEVICES 8 | ||
904 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | ||
905 | |||
319 | // Defined in runlist.c | 906 | // Defined in runlist.c |
320 | struct gk20a* get_live_gk20a(void); | 907 | int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter); |
321 | int get_runlist_iter(struct runlist_iter *rl_iter); | 908 | int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id); |
322 | int preempt_tsg(uint32_t tsg_id); | 909 | |
910 | // Defined in mmu.c | ||
911 | uint32_t vram2PRAMIN(struct nvdebug_state *g, uint64_t addr); | ||
912 | void __iomem *phy2PRAMIN(struct nvdebug_state* g, uint64_t phy); | ||
913 | uint64_t search_page_directory( | ||
914 | struct nvdebug_state *g, | ||
915 | void __iomem *pde_offset, | ||
916 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
917 | uint64_t addr_to_find); | ||
918 | uint64_t search_v1_page_directory( | ||
919 | struct nvdebug_state *g, | ||
920 | void __iomem *pde_offset, | ||
921 | void __iomem *(*off2addr)(struct nvdebug_state*, uint64_t), | ||
922 | uint64_t addr_to_find); | ||
923 | |||
323 | 924 | ||
324 | static inline struct gk20a *get_gk20a(struct device *dev) { | 925 | static inline struct gk20a *get_gk20a(struct device *dev) { |
325 | // XXX: Only works because gk20a* is the first member of gk20a_platform | 926 | // XXX: Only works because gk20a* is the first member of gk20a_platform |
326 | return *((struct gk20a**)dev_get_drvdata(dev)); | 927 | return *((struct gk20a**)dev_get_drvdata(dev)); |
327 | } | 928 | } |
328 | 929 | ||
329 | // Functionally identical to nvgpu_readl() | 930 | // We us the data field of the proc_dir_entry ("PDE" in this function) to store |
931 | // our index into the g_nvdebug_state array | ||
932 | static inline int seq2gpuidx(struct seq_file *s) { | ||
933 | const struct file *f = s->file; | ||
934 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
935 | } | ||
936 | static inline int file2gpuidx(const struct file *f) { | ||
937 | return (uintptr_t)PDE_DATA(file_inode(f)); | ||
938 | } | ||
939 | static inline int file2parentgpuidx(const struct file *f) { | ||
940 | // Should be safe to call on ProcFS entries, as our parent should (?) | ||
941 | // still exist if we're called. If not, there are worse races in this | ||
942 | // module. | ||
943 | return (uintptr_t)PDE_DATA(file_dentry(f)->d_parent->d_inode); | ||
944 | } | ||
945 | |||
946 | #define gk20a_regs(gk20a) (container_of(gk20a, struct nvgpu_os_linux, g)->regs) | ||
947 | |||
948 | // Similar to nvgpu_readl() | ||
330 | // (except we don't try to resolve situations where regs is NULL) | 949 | // (except we don't try to resolve situations where regs is NULL) |
331 | static inline u32 nvdebug_readl(struct gk20a* g, u32 r) { | 950 | static inline u32 nvdebug_readl(struct nvdebug_state *s, u32 r) { |
332 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 951 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
333 | if (unlikely(!g_os->regs)) { | 952 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
334 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 953 | return -1; |
335 | return -1; | 954 | } |
336 | } | 955 | return readl(s->regs + r); |
337 | return readl(g_os->regs + r); | ||
338 | } | 956 | } |
339 | 957 | ||
340 | // quadword version of nvdebug_readl() | 958 | // quadword version of nvdebug_readl() |
341 | static inline u64 nvdebug_readq(struct gk20a* g, u32 r) { | 959 | static inline u64 nvdebug_readq(struct nvdebug_state *s, u32 r) { |
342 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 960 | u64 ret; |
343 | u64 ret; | 961 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
344 | if (unlikely(!g_os->regs)) { | 962 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); |
345 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_readl on non-existent registers!\n"); | 963 | return -1; |
346 | return -1; | 964 | } |
347 | } | ||
348 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl | 965 | // readq seems to always return the uppermost 32 bits as 0, so workaround with readl |
349 | ret = readl(g_os->regs + r); | 966 | ret = readl(s->regs + r); |
350 | ret |= ((u64)readl(g_os->regs + r + 4)) << 32; | 967 | ret |= ((u64)readl(s->regs + r + 4)) << 32; |
351 | return ret; | 968 | return ret; |
352 | } | 969 | } |
353 | 970 | ||
354 | // Functionally identical to nvgpu_writel() | 971 | // Similar to nvgpu_writel() |
355 | static inline void nvdebug_writel(struct gk20a* g, u32 r, u32 v) { | 972 | static inline void nvdebug_writel(struct nvdebug_state *s, u32 r, u32 v) { |
356 | struct nvgpu_os_linux* g_os = container_of(g, struct nvgpu_os_linux, g); | 973 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { |
357 | if (unlikely(!g_os->regs)) { | 974 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
975 | return; | ||
976 | } | ||
977 | writel_relaxed(v, s->regs + r); | ||
978 | wmb(); | ||
979 | } | ||
980 | |||
981 | // quadword version of nvdebug_writel() | ||
982 | // XXX: This probably doesn't work XXX: Untested | ||
983 | static inline void nvdebug_writeq(struct nvdebug_state *s, u32 r, u64 v) { | ||
984 | if (unlikely(!s->regs || (s->g && !gk20a_regs(s->g)))) { | ||
358 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); | 985 | printk(KERN_ERR "[nvdebug] Attempted nvgpu_writel on non-existent registers!\n"); |
359 | return; | 986 | return; |
360 | } | 987 | } |
361 | writel_relaxed(v, g_os->regs + r); | 988 | writeq_relaxed(v, s->regs + r); |
362 | wmb(); | 989 | wmb(); |
363 | } | 990 | } |