runlist.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172

#include <linux/kernel.h>  // Kernel types

#include "nvdebug.h"

#define FALLBACK_TO_PRAMIN

/* Get runlist head and info (incl. length)
   @param rl_iter Location at which to store output
   @param rl_id   Which runlist to obtain?
*/
int get_runlist_iter(struct nvdebug_state *g, int rl_id, struct runlist_iter *rl_iter) {
	runlist_base_t rl_base;
	runlist_info_t rl_info;
	u64 runlist_iova;
	*rl_iter = (struct runlist_iter){0};
	rl_base.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST_BASE(rl_id));
	// Check that reads are working
	if (rl_base.raw == -1)
		return -EIO;
	// The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
	// - A GPU address (type is sysmem_coherent)
	// - A physical address (dereferencing after ioremap crashes)
	// - A kernel virtual address (dereferencing segfaults)
	// So maybe it's some sort of custom thing? This is an address that the GPU
	// can use, so it would make most sense for it to be a physical address.
	//
	// BUT, it can't possibly be a physical address, as it would refer to an
	// address greater than the maximum one on our system (by a lot!).
	// Maybe I'm reading the runlist base wrong?
	// Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
	// address! So, what's this I/O address space? All I know is that it's what
	// nvgpu_mem_get_addr() returns. That function returns the result of either:
	// - gpu_phys_addr which is  __nvgpu_sgl_phys on our platform which (?)
	//   converts an IPA to a PA?
	// - nvgpu_mem_iommu_translate
	//
	// The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
	// returns SYSMEM.
	//
	// To convert a physical address to a IOMMU address, we add a bit
	//
	// BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
	// before because the GPU had simply gone to sleep and invalidated its
	// register state, so nvgpu_readl() was simply returning garbage.
	rl_info.raw = nvdebug_readl(g, NV_PFIFO_ENG_RUNLIST(rl_id));
	if (rl_info.raw == -1)
		return -EIO;
	runlist_iova = ((u64)rl_base.ptr) << 12;
	printk(KERN_INFO "[nvdebug] Runlist %d @ %llx in %s (config raw: %x)\n",
	       rl_id, runlist_iova, target_to_text(rl_base.target), rl_base.raw);
	printk(KERN_INFO "[nvdebug] Runlist length %d, ID %d\n", rl_info.len, rl_info.id);
	// Return early on an empty runlist
	if (!rl_info.len)
		return 0;
	// If the runlist is in VID_MEM, search the BAR2/3 page tables for a mapping
	if (rl_base.target == TARGET_VID_MEM) {
		printk(KERN_WARNING "[nvdebug] Runlist is located in video memory. Access to video memory is experimental.");
		bar_config_block_t bar1_block, bar2_block;
		bar1_block.raw = nvdebug_readl(g, NV_PBUS_BAR1_BLOCK);
		printk(KERN_INFO "[nvdebug] BAR1 inst block @ %llx in %s's %s address space.\n", ((u64)bar1_block.ptr) << 12, target_to_text(bar1_block.target), bar1_block.is_virtual ? "virtual" : "physical");
		bar2_block.raw = nvdebug_readl(g, NV_PBUS_BAR2_BLOCK);
		printk(KERN_INFO "[nvdebug] BAR2 inst block @ %llx in %s's %s address space.\n", ((u64)bar2_block.ptr) << 12, target_to_text(bar2_block.target), bar1_block.is_virtual ? "virtual" : "physical");
		uint32_t bar_inst_pramin_offset = vram2PRAMIN(g, (uint64_t)bar2_block.ptr << 12);
		if (!bar_inst_pramin_offset) {
			printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
			goto attempt_pramin_access;
		}
		/* TODO: Support BAR1?
		bar_inst_pramin_offset = vram2PRAMIN(g, bar1_block.ptr << 12);
		if (!bar_inst_pramin_offset) {
			printk(KERN_WARNING "[nvdebug] Unable to find instance block for BAR1 in the current NV_PRAMIN window. VRAM inaccessible.\n");
			return -EOPNOTSUPP;
		}*/
		// Instance blocks (size == 1kb) contain many things, but we only care about
		// the section which describes the location of the page directory (page table)
		uint32_t bar_pdb_config_pramin_offset = bar_inst_pramin_offset + NV_PRAMIN_PDB_CONFIG_OFF;
		page_dir_config_t pd_config;
		pd_config.raw = nvdebug_readq(g, bar_pdb_config_pramin_offset + NV_PRAMIN);
		uint64_t bar_pdb_vram_addr = pd_config.page_dir_hi;
		bar_pdb_vram_addr <<= 20;
		bar_pdb_vram_addr |= pd_config.page_dir_lo;
		bar_pdb_vram_addr <<= 12;
		printk(KERN_INFO "[nvdebug] BAR2 PDB @ %llx in %s of version %s (config raw: %llx)\n", bar_pdb_vram_addr, target_to_text(pd_config.target), pd_config.is_ver2 ? "2" : "1", pd_config.raw);
		// TODO: SYSMEM support for page table location
		if (pd_config.target != TARGET_VID_MEM) {
			printk(KERN_WARNING "[nvdebug] BAR2 PDB is in an unsupported location.\n");
			goto attempt_pramin_access;
		}
		uint32_t bar_pdb_pramin_offset = vram2PRAMIN(g, bar_pdb_vram_addr);
		if (!bar_pdb_pramin_offset) {
			printk(KERN_WARNING "[nvdebug] Unable to find page directory BAR2/3 in the current NV_PRAMIN window. VRAM inaccessible.\n");
			goto attempt_pramin_access;
		}
		uint64_t runlist_bar_vaddr;
		if (pd_config.is_ver2)
			runlist_bar_vaddr = search_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
		else
			runlist_bar_vaddr = search_v1_page_directory(g, g->regs + NV_PRAMIN + bar_pdb_pramin_offset, phy2PRAMIN, runlist_iova);
		if (!runlist_bar_vaddr) {
			printk(KERN_WARNING "[nvdebug] Unable to find runlist mapping in BAR2/3 page tables.\n");
			goto attempt_pramin_access;
		}
		printk(KERN_INFO "[nvdebug] Runlist @ %llx in BAR2 virtual address space.\n", runlist_bar_vaddr);
		/* XXX: Old test code
		uint32_t bar2_pd_pramin_offset = vram_to_pramin_off(bar2_pd);
		//walk_pd_subtree(bar2_pd_pramin_offset);
		uint64_t runlist_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, runlist_iova);
		page_dir_entry_t pde_0;
		pde_0.raw = nvdebug_readl(g, NV_PRAMIN + bar2_pd_pramin_offset);
		uint32_t pde_1 = nvdebug_readl(g, NV_PRAMIN + vram_to_pramin_off(((u64)pde_0.addr) << 12));
		uint64_t pde_bar2_vaddr = search_pd_subtree(bar2_pd_pramin_offset, ((u64)pde_0.addr) << 12);
		uint32_t pde_2 = readl(g->bar3 + pde_bar2_vaddr);
		printk(KERN_INFO "[nvdebug] PDE0 via PRAMIN: %x, via BAR3: %x\n", pde_1, pde_2);
		*/
		if (!g->bar3) {
			printk(KERN_WARNING "[nvdebug] BAR2/3 not mapped.\n");
			return -ENODEV;
		}
		rl_iter->curr_entry = g->bar2 + runlist_bar_vaddr;
	} else {
		// Directly access the runlist if stored in SYS_MEM (physically addressed)
		rl_iter->curr_entry = phys_to_virt(runlist_iova);
	}
	rl_iter->rl_info = rl_info;
	return 0;
attempt_pramin_access:
#ifdef FALLBACK_TO_PRAMIN
	printk(KERN_INFO "[nvdebug] Attempting to move PRAMIN window to runlist as BAR2/3-based access failed [DANGEROUS SIDE EFFECTS]!\n");
	bar0_window_t win;
	win.base = (runlist_iova >> 16);
	win.target = TARGET_VID_MEM;
	// Shift PRAMIN window. This will cause problems if it races with driver code
	// that tries to do the same, or expects the window not to move.
	nvdebug_writel(g, NV_PBUS_BAR0_WINDOW, win.raw);
	uint32_t off = vram2PRAMIN(g, runlist_iova);
	// Workaround bug for if `off` should be zero (vram2PRAMIN normally returns
	// this on error)
	if (!off && (runlist_iova & 0xffffull != runlist_iova)) {
		printk(KERN_INFO "[nvdebug] Unable to shift PRAMIN to runlist. Aborting...\n");
		return -EOPNOTSUPP;
	}
	rl_iter->curr_entry = g->regs + NV_PRAMIN + off;
	rl_iter->rl_info = rl_info;
	return 0;
#else
	return -EOPNOTSUPP;
#endif // FALLBACK_TO_PRAMIN
}

int preempt_tsg(struct nvdebug_state *g, uint32_t tsg_id) {
	runlist_info_t rl_info;
	pfifo_preempt_t pfifo_preempt;
	runlist_disable_t rl_disable;
	if (!g)
		return -EIO;
        rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
	pfifo_preempt.id = tsg_id;
	pfifo_preempt.is_pending = 0;
	pfifo_preempt.type = PREEMPT_TYPE_TSG;
	// There may be a bug (?) that requires us to disable scheduling before preempting
	rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
	rl_disable.raw |= BIT(rl_info.id);  // Disable runlist rl_info.id
	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
	// Actually trigger the preemption
	nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw);
	// Renable scheduling
	rl_disable.raw &= ~BIT(rl_info.id);  // Enable runlist rl_info.id
	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);

	printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id);
	return 0;
}