diff options
Diffstat (limited to 'nvdebug.h')
-rw-r--r-- | nvdebug.h | 84 |
1 files changed, 82 insertions, 2 deletions
@@ -365,6 +365,37 @@ enum CHANNEL_STATUS { | |||
365 | CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, | 365 | CHANNEL_STATUS_ON_ENG_PENDING_ACQ_CTX_RELOAD = 14, |
366 | }; | 366 | }; |
367 | 367 | ||
368 | /* RunList RAM (RLRAM) | ||
369 | Starting with Ampere, the PFIFO register region no longer exists, and each | ||
370 | engine has seperate runlist RAM and channel RAM. The register (BAR0) offset for | ||
371 | Runlist RAM for each engine must be pulled from the runlist_pri_base field | ||
372 | (RUNLIST Private Register BASE address) provided by PTOP. | ||
373 | |||
374 | See get_runlist_ram() in runlist.c | ||
375 | |||
376 | Support: Ampere+ | ||
377 | */ | ||
378 | #define NV_RUNLIST_BASE_GA100 0x080 | ||
379 | #define NV_RUNLIST_SUBMIT_GA100 0x088 | ||
380 | #define NV_RUNLIST_CHANNEL_CONFIG_GA100 0x004 | ||
381 | |||
382 | /* Channel RAM configuration, as contained in Runlist RAM | ||
383 | |||
384 | NUM_CHANNELS_LOG2 : 1 << NUM_CHANNELS_LOG2 is the number of channel_ctrl_ga100_t | ||
385 | entries in the described Channel RAM region. | ||
386 | BAR0_OFFSET : BAR0_OFFSET << 4 is the register offset (off BAR0) for the | ||
387 | Channel RAM region. | ||
388 | |||
389 | Support: Ampere+ | ||
390 | */ | ||
391 | typedef union { | ||
392 | struct { | ||
393 | uint8_t num_channels_log2:4; | ||
394 | uint32_t bar0_offset:28; | ||
395 | }__attribute__((packed)); | ||
396 | uint32_t raw; | ||
397 | } runlist_channel_config_t; | ||
398 | |||
368 | /* Programmable Channel Control System RAM (PCCSR) | 399 | /* Programmable Channel Control System RAM (PCCSR) |
369 | 512-entry array of channel control and status data structures. | 400 | 512-entry array of channel control and status data structures. |
370 | 401 | ||
@@ -425,6 +456,50 @@ typedef union { | |||
425 | uint64_t raw; | 456 | uint64_t raw; |
426 | } channel_ctrl_t; | 457 | } channel_ctrl_t; |
427 | 458 | ||
459 | /* CHannel RAM (CHRAM) (PCCSR replacement on Ampere+) | ||
460 | Starting with Ampere, channel IDs are no longer unique indexes into the | ||
461 | global channel RAM region (PCCSR), but are indexes into per-runlist channel | ||
462 | RAMs. | ||
463 | |||
464 | As Channel RAM entries are now subsidiary to a runlist, they do not contain | ||
465 | duplicate information, such as the instance pointer (to "result in smaller | ||
466 | hardware" per ga100/dev_ram.ref.txt in open-gpu-doc). | ||
467 | |||
468 | The new format retains and adds to the status information available about a | ||
469 | channel, but does so via bit flags rather than an enum. Some bit flags are | ||
470 | writable to trigger behavior previously dedicated to a bit (eg. writing to | ||
471 | `ctx_reload` triggers the same behavior as writing to `force_ctx_reload` did). | ||
472 | |||
473 | When the first bit (`is_write_one_clears_bits`) is set in this structure, | ||
474 | writing a 1 to any field will clear, rather than set, it. Writing a 0 to any | ||
475 | field is a no-op. | ||
476 | |||
477 | All fields read/write, except the following are read-only: BUSY, ON_PBDMA, | ||
478 | ON_ENG, PBDMA_BUSY, ENG_BUSY. | ||
479 | |||
480 | Support: Ampere, Hopper, Ada (and newer likely) | ||
481 | See also: manuals/ampere/ga100/dev_runlist.ref.txt in NVIDIA's open-gpu-doc | ||
482 | */ | ||
483 | typedef union { | ||
484 | struct { | ||
485 | bool is_write_one_clears_bits:1; // new | ||
486 | bool enable:1; | ||
487 | bool next:1; | ||
488 | bool busy:1; | ||
489 | bool pbdma_faulted:1; // write to force_pbdma_faulted | ||
490 | bool eng_faulted:1; // write to force_eng_faulted | ||
491 | bool on_pbdma:1; // breakout | ||
492 | bool on_eng:1; // breakout | ||
493 | bool pending:1; // breakout | ||
494 | bool ctx_reload:1; // breakout; write to force_ctx_reload | ||
495 | bool pbdma_busy:1; // breakout | ||
496 | bool eng_busy:1; // new | ||
497 | bool acquire_fail:1; // breakout | ||
498 | uint32_t :19; | ||
499 | } __attribute__((packed)); | ||
500 | uint32_t raw; | ||
501 | } channel_ctrl_ga100_t; | ||
502 | |||
428 | /* Control word for runlist enable/disable. | 503 | /* Control word for runlist enable/disable. |
429 | 504 | ||
430 | RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) | 505 | RUNLIST_N : Is runlist n disabled? (1 == disabled, 0 == enabled) |
@@ -1413,14 +1488,19 @@ struct runlist_iter { | |||
1413 | int entries_left_in_tsg; | 1488 | int entries_left_in_tsg; |
1414 | // Number of entries in runlist | 1489 | // Number of entries in runlist |
1415 | int len; | 1490 | int len; |
1416 | // Offset to start of Channel RAM (as this is per-runlist on Ampere+) | 1491 | // (Ampere+ only) Offset to the per-runlist "Runlist RAM" register region. |
1417 | uint32_t channel_ram; | 1492 | // This includes the offset for Channel RAM (per-runlist on Ampere+). |
1493 | uint32_t runlist_pri_base; | ||
1418 | }; | 1494 | }; |
1419 | 1495 | ||
1420 | #define NVDEBUG_MAX_DEVICES 8 | 1496 | #define NVDEBUG_MAX_DEVICES 8 |
1421 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; | 1497 | extern struct nvdebug_state g_nvdebug_state[NVDEBUG_MAX_DEVICES]; |
1422 | 1498 | ||
1423 | // Defined in runlist.c | 1499 | // Defined in runlist.c |
1500 | int get_runlist_ram( | ||
1501 | struct nvdebug_state *g, | ||
1502 | int rl_id, | ||
1503 | uint32_t *rl_ram_off /* out */); | ||
1424 | int get_runlist_iter( | 1504 | int get_runlist_iter( |
1425 | struct nvdebug_state *g, | 1505 | struct nvdebug_state *g, |
1426 | int rl_id, | 1506 | int rl_id, |