summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-09-12 05:37:30 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-17 17:39:00 -0400
commita8e260bc8dcc0ce9efe622f01ff9a1b2db24f8df (patch)
tree88c40719282c4d77b00d7514f167a6395cf7075c /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent718af968f056cb1bdf5b57e1ce9957720d6053a4 (diff)
gpu: nvgpu: allow skipping pramin barriers
A wmb() next to each gk20a_mem_wr32() via PRAMIN may be overly careful, so support not inserting these barriers for performance, in cases where they are not necessary, where the caller would do an explicit barrier after a bunch of reads. Also, move those optional wmb()s to be done at the end of the whole internally batched write for gk20a_mem_{wr_n,memset} from the per-batch subloops that may run multiple times. Jira DNVGPU-23 Change-Id: I61ee65418335863110bca6f036b2e883b048c5c2 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1225149 (cherry picked from commit d2c40327d1995f76e8ab9cb4cd8c76407dabc6de) Reviewed-on: http://git-master/r/1227474 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c18
1 files changed, 6 insertions, 12 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 0c1c6d8f..4ae09e89 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -234,12 +234,6 @@ static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
234 r += sizeof(u32); 234 r += sizeof(u32);
235 } 235 }
236 236
237 /*
238 * Barrier moved here from gk20a_writel in the loop. The writes don't
239 * have to be ordered.
240 */
241 wmb();
242
243 *arg = src_u32; 237 *arg = src_u32;
244} 238}
245 239
@@ -252,12 +246,6 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
252 writel_relaxed(repeat, g->regs + r); 246 writel_relaxed(repeat, g->regs + r);
253 r += sizeof(u32); 247 r += sizeof(u32);
254 } 248 }
255
256 /*
257 * Barrier moved here from gk20a_writel in the loop. The writes don't
258 * have to be ordered.
259 */
260 wmb();
261} 249}
262 250
263u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w) 251u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
@@ -336,6 +324,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
336 324
337 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32), 325 pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
338 pramin_access_batch_wr_n, &p); 326 pramin_access_batch_wr_n, &p);
327 if (!mem->skip_wmb)
328 wmb();
339 } else { 329 } else {
340 WARN_ON("Accessing unallocated mem_desc"); 330 WARN_ON("Accessing unallocated mem_desc");
341 } 331 }
@@ -368,6 +358,8 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
368 358
369 pramin_access_batched(g, mem, offset, size, 359 pramin_access_batched(g, mem, offset, size,
370 pramin_access_batch_wr_n, &src_u32); 360 pramin_access_batch_wr_n, &src_u32);
361 if (!mem->skip_wmb)
362 wmb();
371 } else { 363 } else {
372 WARN_ON("Accessing unallocated mem_desc"); 364 WARN_ON("Accessing unallocated mem_desc");
373 } 365 }
@@ -398,6 +390,8 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
398 390
399 pramin_access_batched(g, mem, offset, size, 391 pramin_access_batched(g, mem, offset, size,
400 pramin_access_batch_set, &p); 392 pramin_access_batch_set, &p);
393 if (!mem->skip_wmb)
394 wmb();
401 } else { 395 } else {
402 WARN_ON("Accessing unallocated mem_desc"); 396 WARN_ON("Accessing unallocated mem_desc");
403 } 397 }