diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/gmmu.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index a2ed3f3a..695347bc 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -15,12 +15,150 @@ | |||
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <nvgpu/log.h> | 17 | #include <nvgpu/log.h> |
18 | #include <nvgpu/dma.h> | ||
18 | #include <nvgpu/gmmu.h> | 19 | #include <nvgpu/gmmu.h> |
19 | #include <nvgpu/nvgpu_mem.h> | 20 | #include <nvgpu/nvgpu_mem.h> |
20 | 21 | ||
21 | #include "gk20a/gk20a.h" | 22 | #include "gk20a/gk20a.h" |
22 | #include "gk20a/mm_gk20a.h" | 23 | #include "gk20a/mm_gk20a.h" |
23 | 24 | ||
25 | static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order, | ||
26 | struct gk20a_mm_entry *entry) | ||
27 | { | ||
28 | u32 num_pages = 1 << order; | ||
29 | u32 len = num_pages * PAGE_SIZE; | ||
30 | int err; | ||
31 | struct page *pages; | ||
32 | struct gk20a *g = vm->mm->g; | ||
33 | |||
34 | /* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */ | ||
35 | |||
36 | pages = alloc_pages(GFP_KERNEL, order); | ||
37 | if (!pages) { | ||
38 | nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed"); | ||
39 | goto err_out; | ||
40 | } | ||
41 | entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt)); | ||
42 | if (!entry->mem.priv.sgt) { | ||
43 | nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table"); | ||
44 | goto err_alloced; | ||
45 | } | ||
46 | err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL); | ||
47 | if (err) { | ||
48 | nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed"); | ||
49 | goto err_sg_table; | ||
50 | } | ||
51 | sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0); | ||
52 | entry->mem.cpu_va = page_address(pages); | ||
53 | memset(entry->mem.cpu_va, 0, len); | ||
54 | entry->mem.size = len; | ||
55 | entry->mem.aperture = APERTURE_SYSMEM; | ||
56 | FLUSH_CPU_DCACHE(entry->mem.cpu_va, | ||
57 | sg_phys(entry->mem.priv.sgt->sgl), len); | ||
58 | |||
59 | return 0; | ||
60 | |||
61 | err_sg_table: | ||
62 | nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt); | ||
63 | err_alloced: | ||
64 | __free_pages(pages, order); | ||
65 | err_out: | ||
66 | return -ENOMEM; | ||
67 | } | ||
68 | |||
69 | static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order, | ||
70 | struct gk20a_mm_entry *entry) | ||
71 | { | ||
72 | struct gk20a *g = gk20a_from_vm(vm); | ||
73 | u32 num_pages = 1 << order; | ||
74 | u32 len = num_pages * PAGE_SIZE; | ||
75 | int err; | ||
76 | |||
77 | if (g->is_fmodel) | ||
78 | return alloc_gmmu_phys_pages(vm, order, entry); | ||
79 | |||
80 | /* | ||
81 | * On arm32 we're limited by vmalloc space, so we do not map pages by | ||
82 | * default. | ||
83 | */ | ||
84 | if (IS_ENABLED(CONFIG_ARM64)) | ||
85 | err = nvgpu_dma_alloc(g, len, &entry->mem); | ||
86 | else | ||
87 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, | ||
88 | len, &entry->mem); | ||
89 | |||
90 | |||
91 | if (err) { | ||
92 | nvgpu_err(g, "memory allocation failed"); | ||
93 | return -ENOMEM; | ||
94 | } | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Allocate a phys contig region big enough for a full | ||
101 | * sized gmmu page table for the given gmmu_page_size. | ||
102 | * the whole range is zeroed so it's "invalid"/will fault. | ||
103 | * | ||
104 | * If a previous entry is supplied, its memory will be used for | ||
105 | * suballocation for this next entry too, if there is space. | ||
106 | */ | ||
107 | int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm, | ||
108 | enum gmmu_pgsz_gk20a pgsz_idx, | ||
109 | const struct gk20a_mmu_level *l, | ||
110 | struct gk20a_mm_entry *entry, | ||
111 | struct gk20a_mm_entry *prev_entry) | ||
112 | { | ||
113 | int err = -ENOMEM; | ||
114 | int order; | ||
115 | struct gk20a *g = gk20a_from_vm(vm); | ||
116 | u32 bytes; | ||
117 | |||
118 | /* allocate enough pages for the table */ | ||
119 | order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1; | ||
120 | order += ilog2(l->entry_size); | ||
121 | bytes = 1 << order; | ||
122 | order -= PAGE_SHIFT; | ||
123 | if (order < 0 && prev_entry) { | ||
124 | /* try to suballocate from previous chunk */ | ||
125 | u32 capacity = prev_entry->mem.size / bytes; | ||
126 | u32 prev = prev_entry->woffset * sizeof(u32) / bytes; | ||
127 | u32 free = capacity - prev - 1; | ||
128 | |||
129 | nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d", | ||
130 | capacity, prev, free, bytes); | ||
131 | |||
132 | if (free) { | ||
133 | memcpy(&entry->mem, &prev_entry->mem, | ||
134 | sizeof(entry->mem)); | ||
135 | entry->woffset = prev_entry->woffset | ||
136 | + bytes / sizeof(u32); | ||
137 | err = 0; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | if (err) { | ||
142 | /* no suballoc space */ | ||
143 | order = max(0, order); | ||
144 | err = nvgpu_alloc_gmmu_pages(vm, order, entry); | ||
145 | entry->woffset = 0; | ||
146 | } | ||
147 | |||
148 | nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x", | ||
149 | entry, | ||
150 | (entry->mem.priv.sgt && | ||
151 | entry->mem.aperture == APERTURE_SYSMEM) ? | ||
152 | g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0, | ||
153 | order, entry->woffset); | ||
154 | if (err) | ||
155 | return err; | ||
156 | entry->pgsz = pgsz_idx; | ||
157 | entry->mem.skip_wmb = true; | ||
158 | |||
159 | return err; | ||
160 | } | ||
161 | |||
24 | /* | 162 | /* |
25 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU | 163 | * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU |
26 | * VA will be allocated for you. If addr is non-zero then the buffer will be | 164 | * VA will be allocated for you. If addr is non-zero then the buffer will be |