aboutsummaryrefslogtreecommitdiffstats
path: root/bus.c
blob: 802b6df59863d8f995650c23450860fbc1f35ad8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* Copyright 2024 Joshua Bakita
 * Helpers for dealing with the PCIe and platform bus
 *
 * = Design Notes =
 * We have to use PRAMIN to access the BAR2 page table. While it's typically also
 * mapped into BAR2, we have no way to know where without the table. If the table
 * changes, the new sections will have new mappings in BAR2, repeating the
 * problem, and making caching insufficient.
 *
 * = Terms =
 * VRAM/VID_MEM: Video RAM; Addresses to physical frames in the on-GPU RAM.
 * SYS_MEM: System Memory; "Bus addresses"; Addresses which can be presented to
 *          the PCIe Host for resolution. On x86_64 without an IOMMU, these are
 *          just physical addresses, but may be I/O Virtual Addresses (IOVAs)
 *          or translated via an I/O MMU on other platforms; DMA Addresses.
 * PEER: Addresses to RAM on another GPU.
 */
#include <linux/printk.h> // For printk()
#include <asm/errno.h> // For error defines
#include <asm/io.h> // For readl()

#include "nvdebug.h"

/* Obtain the PRAMIN offset at which `addr` can be accessed
  @param addr   Address to find
  @param target Which address space to use (VRAM, SYS_MEM, PEER(?))
  @return positive offset or -EINVAL on invalid arguments

  Note: Will move the PRAMIN window to accomodate the request. Only guarantees
        that the surrounding 64KiB window will be accessible.
  Note: Moving the PRAMIN window will cause problems if it races with driver
        code that tries to do the same, or expects the window not to move.
  Bugs: Untested on PEER.
*/
int addr_to_pramin_mut(struct nvdebug_state *g,
                       uint64_t addr, enum INST_TARGET target) {
	bar0_window_t window;
	uint64_t pramin_base;
	// For us, accuracy and robustness is more important than speed
	// Check that the address is valid (49 bits are addressable on-GPU, but
	// PRAMIN only supports up to 40 bits).
	if (addr & ~0x000000ffffffffff) {
		printk(KERN_ERR "[nvdebug] Invalid address %llx passed to %s!\n",
		       addr, __func__);
		return -EINVAL;
	}
	window.raw = nvdebug_readl(g, NV_PBUS_BAR0_WINDOW);
	if (window.target != target)
		goto relocate;
	pramin_base = ((uint64_t)window.base) << 16;
	if (addr < pramin_base || addr > pramin_base + NV_PRAMIN_LEN)
		goto relocate;
	return addr - pramin_base; // Guaranteed to be < 1MiB, so safe for int
relocate:
	printk(KERN_INFO "[nvdebug] Moving PRAMIN win from base %llx to %llx to accomodate %#018llx\n", pramin_base, (addr >> 16) << 16, addr);
	// Move PRAMIN window to a 64KiB-aligned address
	window.base = (u32)(addr >> 16); // Safe, due to above range check
	window.target = target;
	nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, window.raw);
	return (int)(addr & 0xffffull);
}


/* Get a persistent pointer to the page directory base
  @param pdb Dereferencable pointer to the zeroeth entry of top-level page
             directory (PD3) for the BAR2 register region.
  Note: The returned pointer will be into the PRAMIN space. If the PRAMIN
        window is moved to a region that does not cover the BAR2 page table,
        this ***will move the window***.
  Note: Even if the page table is located in SYS_MEM, we route reads/writes via
        PRAMIN. This ensures that we always see what the GPU sees, and that
        includes any passes through I/O MMUs or IOVA spaces.
*/
int get_bar2_pdb(struct nvdebug_state *g, void **pdb, bool *is_v2_pdb) {
	static void* cached_pdb = NULL;
	static bool cached_is_v2_pdb = false;
	static long pd_hash = 0;
	int ret;
	bar_config_block_t bar2_block;
	page_dir_config_t pd_config;
	uint64_t pdb_vram;

	// Use cached base as long as it's still pointing to the same thing
	if (cached_pdb && readl(cached_pdb) == pd_hash) {
		*pdb = cached_pdb;
		*is_v2_pdb = cached_is_v2_pdb;
		return 0;
	}

	if (!g->bar2)
		return -ENXIO;

	// BAR2 has its own instance block (typically in VRAM) which contains the
	// Page Directory Base (PDB), a pointer to a page directory/table
	// hierarchy used to translate BAR2 offsets to VRAM or SYS_MEM addresses.

	// Determine location of BAR2 instance block
	if ((bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK)) == -1) {
		printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 configuration! BAR2/3 inaccessible.\n");
		return -ENOTSUPP;
	}
	printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar2_block.is_virtual ? "virtual" : "physical");
	// Setup PRAMIN to point at the BAR2 instance block
	if ((ret = addr_to_pramin_mut(g, (uint64_t)bar2_block.ptr << 12, bar2_block.target)) < 0) {
		printk(KERN_ERR "[nvdebug] Invalid BAR2/3 Instance Block configuration! BAR2/3 inaccessible.\n");
		return ret;
	}
	printk(KERN_INFO "[nvdebug] BAR2 inst block at off %x in PRAMIN\n", ret);
	// Pull the page directory base configuration from the instance block
	if ((pd_config.raw = nvdebug_readq(g, NV_PRAMIN + ret + NV_PRAMIN_PDB_CONFIG_OFF)) == -1) {
		printk(KERN_ERR "[nvdebug] Unable to read BAR2/3 PDB configuration! BAR2/3 inaccessible.\n");
		return -ENOTSUPP;
	}
	pdb_vram = pd_config.page_dir_hi;
	pdb_vram <<= 20;
	pdb_vram |= pd_config.page_dir_lo;
	pdb_vram <<= 12;
	printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx (config raw: %llx)\n", pdb_vram, pd_config.raw);
	// Setup PRAMIN to point at the page directory
	if ((ret = addr_to_pramin_mut(g, pdb_vram, pd_config.target)) < 0) {
		printk(KERN_ERR "[nvdebug] Invalid BAR2/3 PDB configuration! BAR2/3 inaccessible.\n");
		return ret;
	}

	*pdb = cached_pdb = g->regs + NV_PRAMIN + ret;
	pd_hash = readl(cached_pdb);
	*is_v2_pdb = cached_is_v2_pdb = pd_config.is_ver2;

	return 0;
}