diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 151 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 4 |
2 files changed, 132 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 60c1b7ea..14a3dbc6 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -150,19 +150,101 @@ u32 gk20a_mem_rd(struct gk20a *g, struct mem_desc *mem, u32 offset) | |||
150 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); | 150 | return gk20a_mem_rd32(g, mem, offset / sizeof(u32)); |
151 | } | 151 | } |
152 | 152 | ||
153 | /* | ||
154 | * Batch innerloop for the function below once per each PRAMIN range (some | ||
155 | * 4B..1MB at a time). "start" reg goes as-is to gk20a_{readl,writel}. | ||
156 | */ | ||
157 | typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, | ||
158 | u32 **arg); | ||
159 | |||
160 | /* | ||
161 | * The PRAMIN range is 1 MB, must change base addr if a buffer crosses that. | ||
162 | * This same loop is used for read/write/memset. Offset and size in bytes. | ||
163 | * One call to "loop" is done per range, with "arg" supplied. | ||
164 | */ | ||
165 | static inline void pramin_access_batched(struct gk20a *g, struct mem_desc *mem, | ||
166 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | ||
167 | { | ||
168 | offset /= sizeof(u32); | ||
169 | |||
170 | while (size) { | ||
171 | u32 byteoff = gk20a_pramin_enter(g, mem, offset); | ||
172 | u32 start_reg = pram_data032_r(byteoff / sizeof(u32)); | ||
173 | u32 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | ||
174 | u32 n = min(size, until_end); | ||
175 | |||
176 | loop(g, start_reg, n / sizeof(u32), arg); | ||
177 | |||
178 | /* read back to synchronize accesses */ | ||
179 | gk20a_readl(g, start_reg); | ||
180 | gk20a_pramin_exit(g, mem); | ||
181 | |||
182 | offset += n / sizeof(u32); | ||
183 | size -= n; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static inline void pramin_access_batch_rd_n(struct gk20a *g, u32 start, | ||
188 | u32 words, u32 **arg) | ||
189 | { | ||
190 | u32 r = start, *dest_u32 = *arg; | ||
191 | |||
192 | while (words--) { | ||
193 | *dest_u32++ = gk20a_readl(g, r); | ||
194 | r += sizeof(u32); | ||
195 | } | ||
196 | |||
197 | *arg = dest_u32; | ||
198 | } | ||
199 | |||
200 | static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start, | ||
201 | u32 words, u32 **arg) | ||
202 | { | ||
203 | u32 r = start, *src_u32 = *arg; | ||
204 | |||
205 | while (words--) { | ||
206 | gk20a_writel(g, r, *src_u32++); | ||
207 | r += sizeof(u32); | ||
208 | } | ||
209 | |||
210 | *arg = src_u32; | ||
211 | } | ||
212 | |||
213 | static inline void pramin_access_batch_set(struct gk20a *g, u32 start, | ||
214 | u32 words, u32 **arg) | ||
215 | { | ||
216 | u32 r = start, repeat = **arg; | ||
217 | |||
218 | while (words--) { | ||
219 | gk20a_writel(g, r, repeat); | ||
220 | r += sizeof(u32); | ||
221 | } | ||
222 | } | ||
223 | |||
153 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, | 224 | void gk20a_mem_rd_n(struct gk20a *g, struct mem_desc *mem, |
154 | u32 offset, void *dest, u32 size) | 225 | u32 offset, void *dest, u32 size) |
155 | { | 226 | { |
156 | u32 i; | ||
157 | u32 *dest_u32 = dest; | ||
158 | |||
159 | WARN_ON(offset & 3); | 227 | WARN_ON(offset & 3); |
160 | WARN_ON(size & 3); | 228 | WARN_ON(size & 3); |
161 | offset /= sizeof(u32); | ||
162 | size /= sizeof(u32); | ||
163 | 229 | ||
164 | for (i = 0; i < size; i++) | 230 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { |
165 | dest_u32[i] = gk20a_mem_rd32(g, mem, offset + i); | 231 | u8 *src = (u8 *)mem->cpu_va + offset; |
232 | |||
233 | WARN_ON(!mem->cpu_va); | ||
234 | memcpy(dest, src, size); | ||
235 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
236 | if (size) | ||
237 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
238 | src, *dest, size); | ||
239 | #endif | ||
240 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
241 | u32 *dest_u32 = dest; | ||
242 | |||
243 | pramin_access_batched(g, mem, offset, size, | ||
244 | pramin_access_batch_rd_n, &dest_u32); | ||
245 | } else { | ||
246 | WARN_ON("Accessing unallocated mem_desc"); | ||
247 | } | ||
166 | } | 248 | } |
167 | 249 | ||
168 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) | 250 | void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data) |
@@ -195,30 +277,57 @@ void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data) | |||
195 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | 277 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, |
196 | void *src, u32 size) | 278 | void *src, u32 size) |
197 | { | 279 | { |
198 | u32 i; | ||
199 | u32 *src_u32 = src; | ||
200 | |||
201 | WARN_ON(offset & 3); | 280 | WARN_ON(offset & 3); |
202 | WARN_ON(size & 3); | 281 | WARN_ON(size & 3); |
203 | offset /= sizeof(u32); | ||
204 | size /= sizeof(u32); | ||
205 | 282 | ||
206 | for (i = 0; i < size; i++) | 283 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { |
207 | gk20a_mem_wr32(g, mem, offset + i, src_u32[i]); | 284 | u8 *dest = (u8 *)mem->cpu_va + offset; |
285 | |||
286 | WARN_ON(!mem->cpu_va); | ||
287 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
288 | if (size) | ||
289 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x ... [%d bytes]", | ||
290 | dest, *src, size); | ||
291 | #endif | ||
292 | memcpy(dest, src, size); | ||
293 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
294 | u32 *src_u32 = src; | ||
295 | |||
296 | pramin_access_batched(g, mem, offset, size, | ||
297 | pramin_access_batch_wr_n, &src_u32); | ||
298 | } else { | ||
299 | WARN_ON("Accessing unallocated mem_desc"); | ||
300 | } | ||
208 | } | 301 | } |
209 | 302 | ||
210 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | 303 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, |
211 | u32 value, u32 size) | 304 | u32 c, u32 size) |
212 | { | 305 | { |
213 | u32 i; | ||
214 | |||
215 | WARN_ON(offset & 3); | 306 | WARN_ON(offset & 3); |
216 | WARN_ON(size & 3); | 307 | WARN_ON(size & 3); |
217 | offset /= sizeof(u32); | 308 | WARN_ON(c & ~0xff); |
218 | size /= sizeof(u32); | 309 | |
310 | c &= 0xff; | ||
311 | |||
312 | if (mem->aperture == APERTURE_SYSMEM && !g->mm.force_pramin) { | ||
313 | u8 *dest = (u8 *)mem->cpu_va + offset; | ||
314 | |||
315 | WARN_ON(!mem->cpu_va); | ||
316 | #ifdef CONFIG_TEGRA_SIMULATION_PLATFORM | ||
317 | if (size) | ||
318 | gk20a_dbg(gpu_dbg_mem, " %p = 0x%x [times %d]", | ||
319 | dest, c, size); | ||
320 | #endif | ||
321 | memset(dest, c, size); | ||
322 | } else if (mem->aperture == APERTURE_VIDMEM || g->mm.force_pramin) { | ||
323 | u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24); | ||
324 | u32 *p = &repeat_value; | ||
219 | 325 | ||
220 | for (i = 0; i < size; i++) | 326 | pramin_access_batched(g, mem, offset, size, |
221 | gk20a_mem_wr32(g, mem, offset + i, value); | 327 | pramin_access_batch_set, &p); |
328 | } else { | ||
329 | WARN_ON("Accessing unallocated mem_desc"); | ||
330 | } | ||
222 | } | 331 | } |
223 | 332 | ||
224 | /* | 333 | /* |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index d1628b07..23420fef 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -458,9 +458,9 @@ void gk20a_mem_wr(struct gk20a *g, struct mem_desc *mem, u32 offset, u32 data); | |||
458 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ | 458 | /* memcpy from cpu, offset and size in bytes (32b-aligned) */ |
459 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, | 459 | void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset, |
460 | void *src, u32 size); | 460 | void *src, u32 size); |
461 | /* size and offset in bytes (32b-aligned), filled with u32s */ | 461 | /* size and offset in bytes (32b-aligned), filled with the constant byte c */ |
462 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, | 462 | void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset, |
463 | u32 value, u32 size); | 463 | u32 c, u32 size); |
464 | 464 | ||
465 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ | 465 | #if 0 /*related to addr bits above, concern below TBD on which is accurate */ |
466 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ | 466 | #define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\ |