diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 139 |
1 files changed, 72 insertions, 67 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 89390c30..179e6fc1 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -89,31 +89,11 @@ void gk20a_mem_end(struct gk20a *g, struct mem_desc *mem) | |||
89 | mem->cpu_va = NULL; | 89 | mem->cpu_va = NULL; |
90 | } | 90 | } |
91 | 91 | ||
92 | static u64 gk20a_mem_get_vidmem_addr(struct gk20a *g, struct mem_desc *mem) | ||
93 | { | ||
94 | struct gk20a_page_alloc *alloc; | ||
95 | struct page_alloc_chunk *chunk; | ||
96 | |||
97 | if (mem && mem->aperture == APERTURE_VIDMEM) { | ||
98 | alloc = (struct gk20a_page_alloc *) | ||
99 | sg_dma_address(mem->sgt->sgl); | ||
100 | |||
101 | /* This API should not be used with > 1 chunks */ | ||
102 | if (alloc->nr_chunks != 1) | ||
103 | return 0; | ||
104 | |||
105 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
106 | struct page_alloc_chunk, list_entry); | ||
107 | return chunk->base; | ||
108 | } | ||
109 | |||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | 92 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ |
114 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | 93 | static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, |
94 | struct page_alloc_chunk *chunk, u32 w) | ||
115 | { | 95 | { |
116 | u64 bufbase = gk20a_mem_get_vidmem_addr(g, mem); | 96 | u64 bufbase = chunk->base; |
117 | u64 addr = bufbase + w * sizeof(u32); | 97 | u64 addr = bufbase + w * sizeof(u32); |
118 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 98 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
119 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 99 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
@@ -124,8 +104,9 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
124 | bus_bar0_window_base_f(hi); | 104 | bus_bar0_window_base_f(hi); |
125 | 105 | ||
126 | gk20a_dbg(gpu_dbg_mem, | 106 | gk20a_dbg(gpu_dbg_mem, |
127 | "0x%08x:%08x begin for %p at [%llx,%llx] (sz %zu)", | 107 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", |
128 | hi, lo, mem, bufbase, bufbase + mem->size, mem->size); | 108 | hi, lo, mem, chunk, bufbase, |
109 | bufbase + chunk->length, chunk->length); | ||
129 | 110 | ||
130 | WARN_ON(!bufbase); | 111 | WARN_ON(!bufbase); |
131 | 112 | ||
@@ -140,42 +121,14 @@ static u32 gk20a_pramin_enter(struct gk20a *g, struct mem_desc *mem, u32 w) | |||
140 | return lo; | 121 | return lo; |
141 | } | 122 | } |
142 | 123 | ||
143 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem) | 124 | static void gk20a_pramin_exit(struct gk20a *g, struct mem_desc *mem, |
125 | struct page_alloc_chunk *chunk) | ||
144 | { | 126 | { |
145 | gk20a_dbg(gpu_dbg_mem, "end for %p", mem); | 127 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); |
146 | 128 | ||
147 | spin_unlock(&g->mm.pramin_window_lock); | 129 | spin_unlock(&g->mm.pramin_window_lock); |
148 | } | 130 | } |
149 | 131 | ||
150 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
151 | { | ||
152 | u32 data = 0; | ||
153 | |||
154 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
155 | u32 *ptr = mem->cpu_va; | ||
156 | |||
157 | WARN_ON(!ptr); | ||
158 | data = ptr[w]; | ||
159 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
160 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
161 | #endif | ||
162 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
163 | u32 addr = gk20a_pramin_enter(g, mem, w); | ||
164 | data = gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | ||
165 | gk20a_pramin_exit(g, mem); | ||
166 | } else { | ||
167 | WARN_ON("Accessing unallocated mem_desc"); | ||
168 | } | ||
169 | |||
170 | return data; | ||
171 | } | ||
172 | |||
173 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | ||
174 | { | ||
175 | WARN_ON(offset & 3); | ||
176 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | ||
177 | } | ||
178 | |||
179 | /* | 132 | /* |
180 | * Batch innerloop for the function below once per each PRAMIN range (some | 133 | * Batch innerloop for the function below once per each PRAMIN range (some |
181 | * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. | 134 | * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. |
@@ -191,22 +144,40 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | |||
191 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | 144 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, |
192 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | 145 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) |
193 | { | 146 | { |
147 | struct gk20a_page_alloc *alloc = NULL; | ||
148 | struct page_alloc_chunk *chunk = NULL; | ||
149 | u32 byteoff, start_reg, until_end, n; | ||
150 | |||
151 | alloc = (struct gk20a_page_alloc *)sg_dma_address(mem->sgt->sgl); | ||
152 | list_for_each_entry(chunk, &alloc->alloc_chunks, list_entry) { | ||
153 | if (offset >= chunk->length) | ||
154 | offset -= chunk->length; | ||
155 | else | ||
156 | break; | ||
157 | } | ||
158 | |||
194 | offset /= sizeof(u32); | 159 | offset /= sizeof(u32); |
195 | 160 | ||
196 | while (size) { | 161 | while (size) { |
197 | u32 byteoff = gk20a_pramin_enter(g, mem, offset); | 162 | byteoff = gk20a_pramin_enter(g, mem, chunk, offset); |
198 | u32 start_reg = pram_data032_r(byteoff / sizeof(u32)); | 163 | start_reg = pram_data032_r(byteoff / sizeof(u32)); |
199 | u32 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | 164 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); |
200 | u32 n = min(size, until_end); | 165 | |
166 | n = min3(size, until_end, (u32)(chunk->length - offset)); | ||
201 | 167 | ||
202 | loop(g, start_reg, n / sizeof(u32), arg); | 168 | loop(g, start_reg, n / sizeof(u32), arg); |
203 | 169 | ||
204 | /* read back to synchronize accesses */ | 170 | /* read back to synchronize accesses */ |
205 | gk20a_readl(g, start_reg); | 171 | gk20a_readl(g, start_reg); |
206 | gk20a_pramin_exit(g, mem); | 172 | gk20a_pramin_exit(g, mem, chunk); |
207 | 173 | ||
208 | offset += n / sizeof(u32); | 174 | offset += n / sizeof(u32); |
209 | size -= n; | 175 | size -= n; |
176 | |||
177 | if (n == (chunk->length - offset)) { | ||
178 | chunk = list_next_entry(chunk, list_entry); | ||
179 | offset = 0; | ||
180 | } | ||
210 | } | 181 | } |
211 | } | 182 | } |
212 | 183 | ||
@@ -247,6 +218,40 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start, | |||
247 | } | 218 | } |
248 | } | 219 | } |
249 | 220 | ||
221 | u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) | ||
222 | { | ||
223 | u32 data = 0; | ||
224 | |||
225 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
226 | u32 *ptr = mem->cpu_va; | ||
227 | |||
228 | WARN_ON(!ptr); | ||
229 | data = ptr[w]; | ||
230 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
231 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x", ptr + w, data); | ||
232 | #endif | ||
233 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
234 | u32 value; | ||
235 | u32 *p = &value; | ||
236 | |||
237 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), | ||
238 | pramin_access_batch_rd_n, &p); | ||
239 | |||
240 | data = value; | ||
241 | |||
242 | } else { | ||
243 | WARN_ON("Accessing unallocated mem_desc"); | ||
244 | } | ||
245 | |||
246 | return data; | ||
247 | } | ||
248 | |||
249 | u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | ||
250 | { | ||
251 | WARN_ON(offset & 3); | ||
252 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | ||
253 | } | ||
254 | |||
250 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | 255 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, |
251 | u32 offset, void *dest, u32 size) | 256 | u32 offset, void *dest, u32 size) |
252 | { | 257 | { |
@@ -284,11 +289,11 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | |||
284 | #endif | 289 | #endif |
285 | ptr[w] = data; | 290 | ptr[w] = data; |
286 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | 291 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { |
287 | u32 addr = gk20a_pramin_enter(g, mem, w); | 292 | u32 value = data; |
288 | gk20a_writel(g, pram_data032_r(addr / sizeof(u32)), data); | 293 | u32 *p = &value; |
289 | /* read back to synchronize accesses */ | 294 | |
290 | gk20a_readl(g, pram_data032_r(addr / sizeof(u32))); | 295 | pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), |
291 | gk20a_pramin_exit(g, mem); | 296 | pramin_access_batch_wr_n, &p); |
292 | } else { | 297 | } else { |
293 | WARN_ON("Accessing unallocated mem_desc"); | 298 | WARN_ON("Accessing unallocated mem_desc"); |
294 | } | 299 | } |
@@ -3000,7 +3005,7 @@ static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | |||
3000 | schedule_work(&g->mm.vidmem_clear_mem_worker); | 3005 | schedule_work(&g->mm.vidmem_clear_mem_worker); |
3001 | } | 3006 | } |
3002 | } else { | 3007 | } else { |
3003 | /* TODO: clear with PRAMIN here */ | 3008 | gk20a_memset(g, mem, 0, 0, mem->size); |
3004 | gk20a_free(mem->allocator, | 3009 | gk20a_free(mem->allocator, |
3005 | sg_dma_address(mem->sgt->sgl)); | 3010 | sg_dma_address(mem->sgt->sgl)); |
3006 | gk20a_free_sgtable(&mem->sgt); | 3011 | gk20a_free_sgtable(&mem->sgt); |