aboutsummaryrefslogtreecommitdiffstats
path: root/nvdebug.h
diff options
context:
space:
mode:
Diffstat (limited to 'nvdebug.h')
-rw-r--r--nvdebug.h84
1 files changed, 82 insertions, 2 deletions
diff --git a/nvdebug.h b/nvdebug.h
index fd88b2e..26689d9 100644
--- a/nvdebug.h
+++ b/nvdebug.h
@@ -365,6 +365,37 @@ enum CHANNEL_STATUS {
365 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, 365 CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14,
366}; 366};
367 367
368/* RunList RAM (RLRAM)
369 Starting with Ampere, the PFIFO register region no longer exists, and each
370 engine has seperate runlist RAM and channel RAM. The register (BAR0) offset for
371 Runlist RAM for each engine must be pulled from the runlist_pri_base field
372 (RUNLIST Private Register BASE address) provided by PTOP.
373
374 See get_runlist_ram() in runlist.c
375
376 Support: Ampere+
377*/
378#define NV_RUNLIST_BASE_GA100 0x080
379#define NV_RUNLIST_SUBMIT_GA100 0x088
380#define NV_RUNLIST_CHANNEL_CONFIG_GA100 0x004
381
382/* Channel RAM configuration, as contained in Runlist RAM
383
384 NUM_CHANNELS_LOG2 : 1 << NUM_CHANNELS_LOG2 is the number of channel_ctrl_ga100_t
385 entries in the described Channel RAM region.
386 BAR0_OFFSET : BAR0_OFFSET << 4 is the register offset (off BAR0) for the
387 Channel RAM region.
388
389 Support: Ampere+
390*/
391typedef union {
392 struct {
393 uint8_t num_channels_log2:4;
394 uint32_t bar0_offset:28;
395 }__attribute__((packed));
396 uint32_t raw;
397} runlist_channel_config_t;
398
368/* Programmable Channel Control System RAM (PCCSR) 399/* Programmable Channel Control System RAM (PCCSR)
369 512-entry array of channel control and status data structures. 400 512-entry array of channel control and status data structures.
370 401
@@ -425,6 +456,50 @@ typedef union {
425 uint64_t raw; 456 uint64_t raw;
426} channel_ctrl_t; 457} channel_ctrl_t;
427 458
459/* CHannel RAM (CHRAM) (PCCSR replacement on Ampere+)
460 Starting with Ampere, channel IDs are no longer unique indexes into the
461 global channel RAM region (PCCSR), but are indexes into per-runlist channel
462 RAMs.
463
464 As Channel RAM entries are now subsidiary to a runlist, they do not contain
465 duplicate information, such as the instance pointer (to "result in smaller
466 hardware" per ga100/dev_ram.ref.txt in open-gpu-doc).
467
468 The new format retains and adds to the status information available about a
469 channel, but does so via bit flags rather than an enum. Some bit flags are
470 writable to trigger behavior previously dedicated to a bit (eg. writing to
471 `ctx_reload` triggers the same behavior as writing to `force_ctx_reload` did).
472
473 When the first bit (`is_write_one_clears_bits`) is set in this structure,
474 writing a 1 to any field will clear, rather than set, it. Writing a 0 to any
475 field is a no-op.
476
477 All fields read/write, except the following are read-only: BUSY, ON_PBDMA,
478 ON_ENG, PBDMA_BUSY, ENG_BUSY.
479
480 Support: Ampere, Hopper, Ada (and newer likely)
481 See also: manuals/ampere/ga100/dev_runlist.ref.txt in NVIDIA's open-gpu-doc
482*/
483typedef union {
484 struct {
485 bool is_write_one_clears_bits:1; // new
486 bool enable:1;
487 bool next:1;
488 bool busy:1;
489 bool pbdma_faulted:1; // write to force_pbdma_faulted
490 bool eng_faulted:1; // write to force_eng_faulted
491 bool on_pbdma:1; // breakout
492 bool on_eng:1; // breakout
493 bool pending:1; // breakout
494 bool ctx_reload:1; // breakout; write to force_ctx_reload
495 bool pbdma_busy:1; // breakout
496 bool eng_busy:1; // new
497 bool acquire_fail:1; // breakout
498 uint32_t :19;
499 } __attribute__((packed));
500 uint32_t raw;
501} channel_ctrl_ga100_t;
502
428/* Control word for runlist enable/disable. 503/* Control word for runlist enable/disable.
429 504
430 RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) 505 RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled)
@@ -1413,14 +1488,19 @@ struct runlist_iter {
1413 int entries_left_in_tsg; 1488 int entries_left_in_tsg;
1414 // Number of entries in runlist 1489 // Number of entries in runlist
1415 int len; 1490 int len;
1416 // Offset to start of Channel RAM (as this is per-runlist on Ampere+) 1491 // (Ampere+ only) Offset to the per-runlist "Runlist RAM" register region.
1417 uint32_t channel_ram; 1492 // This includes the offset for Channel RAM (per-runlist on Ampere+).
1493 uint32_t runlist_pri_base;
1418}; 1494};
1419 1495
1420#define NVDEBUG_MAX_DEVICES 8 1496#define NVDEBUG_MAX_DEVICES 8
1421extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; 1497extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES];
1422 1498
1423// Defined in runlist.c 1499// Defined in runlist.c
1500int get_runlist_ram(
1501 struct nvdebug_state *g,
1502 int rl_id,
1503 uint32_t *rl_ram_off /* out */);
1424int get_runlist_iter( 1504int get_runlist_iter(
1425 struct nvdebug_state *g, 1505 struct nvdebug_state *g,
1426 int rl_id, 1506 int rl_id,