runlist.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

#include <linux/device.h>  // For struct device, bus_find_device*()
//#include <linux/iommu.h>  // For struct iommu_domain
#include <linux/kernel.h>  // Kernel types
#include <asm/io.h>

#include "nvdebug.h"

// Bus types are global symbols in the kernel
extern struct bus_type platform_bus_type;

struct gk20a* get_live_gk20a(void) {
        struct device *dev = NULL;
        struct device *temp_dev;
        struct gk20a *g;
	struct nvgpu_os_linux *l;
        // Get the last device that matches our name
        while ((temp_dev = bus_find_device_by_name(&platform_bus_type, dev, "17000000.gv11b"))) {
                dev = temp_dev;
                printk(KERN_INFO "[nvdebug] Found a matching device %s\n", dev_name(dev));
        }
        if (!dev)
                return NULL;
        g = get_gk20a(dev);
        // The address pointed to `regs` + NV_PFIFO_RUNLIST_BASE seems to not be:
        // - A GPU address (type is sysmem_coherent)
        // - A physical address (dereferencing after ioremap crashes)
        // - A kernel virtual address (dereferencing segfaults)
        // So maybe it's some sort of custom thing? This is an address that the GPU
        // can use, so it would make most sense for it to be a physical address.
        //
        // BUT, it can't possibly be a physical address, as it would refer to an
        // address greater than the maximum one on our system (by a lot!).
        // Maybe I'm reading the runlist base wrong?
        // Aha, the driver calls it runlist_iova. Sounds like runlist I/O virtual
        // address! So, what's this I/O address space? All I know is that it's what
        // nvgpu_mem_get_addr() returns. That function returns the result of either:
        // - gpu_phys_addr which is  __nvgpu_sgl_phys on our platform which (?)
        //   converts an IPA to a PA?
        // - nvgpu_mem_iommu_translate
        //
        // The original memory is allocated with nvgpu_dma_alloc_flags_sys(), which
        // returns SYSMEM.
        //
        // To convert a physical address to a IOMMU address, we add a bit
        //
        // BUT, it turns out that it IS JUST A PHYSICAL ADDRESS! It wasn't working
        // before because the GPU had simply gone to sleep and invalidated its
        // register state, so nvgpu_readl() was simply returning garbage.
        l = container_of(g, struct nvgpu_os_linux, g);
        if (!l->regs)
                return NULL;
	return g;
}

/* Get runlist head and info (incl. length)
   @param rl_iter Location at which to store output
*/
int get_runlist_iter(struct runlist_iter *rl_iter) {
        struct entry_tsg head;
        runlist_base_t rl_base;
        runlist_info_t rl_info;
        u64 runlist_iova;
	struct gk20a *g = get_live_gk20a();
	if (!g)
		return -EIO;
        rl_base.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST_BASE);
        rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
        runlist_iova = ((u64)rl_base.ptr) << 12;
        printk(KERN_INFO "[nvdebug] Runlist ptr: %x, type: %d, raw: %x, IOVA: %px\n",
		rl_base.ptr, rl_base.type, rl_base.raw, (void*)runlist_iova);
        // Segfaults
        //u32 attempted_read = ioread32(runlist_iova);
        //printk(KERN_INFO "[nvdebug] first word of runlist: %0x\n", attempted_read);

        // Errors out
        //u32* virt_rt_addr = ioremap(phys_rl_addr, sizeof(struct entry_tsg));
        //printk(KERN_INFO "[nvdebug] Runlist virt_addr: %px\n", virt_rt_addr);

        /* Overcomplicated?
        struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
        if (!domain) {
                printk(KERN_INFO "[nvdebug] No IOMMU domain!\n");
                return -EIO;
        }
        u64 phys_addr = platform_bus_type.iommu_ops->iova_to_phys(domain, runlist_iova);
        printk(KERN_INFO "[nvdebug] Runlist PA: %px\n", phys_addr);
        */

        printk(KERN_INFO "[nvdebug] Runlist phys_to_virt:   %px\n", (void*)phys_to_virt(runlist_iova));
        printk(KERN_INFO "[nvdebug] Runlist *phys_to_virt:  %x\n", *(u32*)phys_to_virt(runlist_iova));
        head = *(struct entry_tsg*)phys_to_virt(runlist_iova);

        rl_iter->curr_tsg = (struct entry_tsg*)phys_to_virt(runlist_iova);
        rl_iter->rl_info = rl_info;
        return 0;
        //printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
        //printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
        //printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
        //printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
        //printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid);

        //printk(KERN_INFO "[nvdebug] Mem base phys: %p\n", (void*)virt_to_phys((void*)0xffffffc000000000ULL));
        //printk(KERN_INFO "[nvdebug] Mem end phys: %p\n", (void*)virt_to_phys((void*)0xffffffc400000000ULL));
        //printk(KERN_INFO "[nvdebug] Runlist *virt_addr: %x\n", readl(virt_rt_addr)); // This crashes
        //read_bytes(&head, virt_rt_addr, sizeof(struct entry_tsg));
        /*printk(KERN_INFO "[nvdebug] entry_type: %d\n", head.entry_type);
        printk(KERN_INFO "[nvdebug] timeslice_scale: %d\n", head.timeslice_scale);
        printk(KERN_INFO "[nvdebug] timeslice_timeout: %d\n", head.timeslice_timeout);
        printk(KERN_INFO "[nvdebug] tsg_length: %d\n", head.tsg_length);
        printk(KERN_INFO "[nvdebug] tsgid: %d\n", head.tsgid); */
}

int preempt_tsg(uint32_t tsg_id) {
	struct gk20a *g = get_live_gk20a();
	runlist_info_t rl_info;
	pfifo_preempt_t pfifo_preempt;
	runlist_disable_t rl_disable;
	if (!g)
		return -EIO;
        rl_info.raw = nvdebug_readl(g, NV_PFIFO_RUNLIST);
	pfifo_preempt.id = tsg_id;
	pfifo_preempt.is_pending = 0;
	pfifo_preempt.type = PREEMPT_TYPE_TSG;
	// There may be a bug (?) that requires us to disable scheduling before preempting
	rl_disable.raw = nvdebug_readl(g, NV_PFIFO_SCHED_DISABLE);
	rl_disable.raw |= BIT(rl_info.id);  // Disable runlist rl_info.id
	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);
	// Actually trigger the preemption
	nvdebug_writel(g, NV_PFIFO_PREEMPT, pfifo_preempt.raw);
	// Renable scheduling
	rl_disable.raw &= ~BIT(rl_info.id);  // Enable runlist rl_info.id
	nvdebug_writel(g, NV_PFIFO_SCHED_DISABLE, rl_disable.raw);

	printk(KERN_INFO "[nvdebug] TSG %d preempted (runlist %d)\n", tsg_id, rl_info.id);
	return 0;
}