summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2014-05-08 08:13:32 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:49 -0400
commit48239f5f8cb5763cf4b6dc5db4668257da153cf9 (patch)
treeb12e13c981efe9db4e22cfe696bbd3c62ab77089 /drivers/gpu
parent3e5c123862c87e22311c21558178f287f85ecb5d (diff)
gpu: nvgpu: Prune redundant cache maintenance
Remove redundant cache maintenance operations. Instance blocks and graphics context buffers are uncached, so they do not need any cache maintenance. Bug 1421824 Change-Id: Ie0be67bf0be493d9ec9e6f8226f2f9359cba9f54 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/406948
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c17
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c31
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c2
3 files changed, 0 insertions, 50 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 00f8ac94..61938f8e 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -128,8 +128,6 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
128 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), 128 gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
129 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); 129 ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit)));
130 130
131 gk20a_mm_l2_invalidate(c->g);
132
133 return 0; 131 return 0;
134} 132}
135 133
@@ -159,8 +157,6 @@ static int channel_gk20a_commit_userd(struct channel_gk20a *c)
159 pbdma_userd_target_vid_mem_f() | 157 pbdma_userd_target_vid_mem_f() |
160 pbdma_userd_hi_addr_f(addr_hi)); 158 pbdma_userd_hi_addr_f(addr_hi));
161 159
162 gk20a_mm_l2_invalidate(c->g);
163
164 return 0; 160 return 0;
165} 161}
166 162
@@ -183,9 +179,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
183 /* preempt the channel */ 179 /* preempt the channel */
184 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid)); 180 WARN_ON(gk20a_fifo_preempt_channel(c->g, c->hw_chid));
185 181
186 /* flush GPU cache */
187 gk20a_mm_l2_flush(c->g, true);
188
189 /* value field is 8 bits long */ 182 /* value field is 8 bits long */
190 while (value >= 1 << 8) { 183 while (value >= 1 << 8) {
191 value >>= 1; 184 value >>= 1;
@@ -209,8 +202,6 @@ static int channel_gk20a_set_schedule_params(struct channel_gk20a *c,
209 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) | 202 gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
210 ccsr_channel_enable_set_true_f()); 203 ccsr_channel_enable_set_true_f());
211 204
212 gk20a_mm_l2_invalidate(c->g);
213
214 return 0; 205 return 0;
215} 206}
216 207
@@ -277,8 +268,6 @@ static int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
277 268
278 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 269 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
279 270
280 gk20a_mm_l2_invalidate(c->g);
281
282 return 0; 271 return 0;
283} 272}
284 273
@@ -299,8 +288,6 @@ static int channel_gk20a_setup_userd(struct channel_gk20a *c)
299 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0); 288 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_get_w(), 0);
300 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0); 289 gk20a_mem_wr32(c->userd_cpu_va, ram_userd_gp_put_w(), 0);
301 290
302 gk20a_mm_l2_invalidate(c->g);
303
304 return 0; 291 return 0;
305} 292}
306 293
@@ -649,8 +636,6 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish)
649 ch->gpfifo.cpu_va = NULL; 636 ch->gpfifo.cpu_va = NULL;
650 ch->gpfifo.iova = 0; 637 ch->gpfifo.iova = 0;
651 638
652 gk20a_mm_l2_invalidate(ch->g);
653
654 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc)); 639 memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
655 640
656#if defined(CONFIG_GK20A_CYCLE_STATS) 641#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -1155,8 +1140,6 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1155 channel_gk20a_setup_userd(c); 1140 channel_gk20a_setup_userd(c);
1156 channel_gk20a_commit_userd(c); 1141 channel_gk20a_commit_userd(c);
1157 1142
1158 gk20a_mm_l2_invalidate(c->g);
1159
1160 /* TBD: setup engine contexts */ 1143 /* TBD: setup engine contexts */
1161 1144
1162 err = channel_gk20a_alloc_priv_cmdbuf(c); 1145 err = channel_gk20a_alloc_priv_cmdbuf(c);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3dbf1435..466f6eed 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -538,9 +538,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
538 538
539 gk20a_dbg_fn(""); 539 gk20a_dbg_fn("");
540 540
541 /* flush gpu_va before commit */
542 gk20a_mm_fb_flush(c->g); 541 gk20a_mm_fb_flush(c->g);
543 gk20a_mm_l2_flush(c->g, true);
544 542
545 inst_ptr = c->inst_block.cpuva; 543 inst_ptr = c->inst_block.cpuva;
546 if (!inst_ptr) 544 if (!inst_ptr)
@@ -556,8 +554,6 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
556 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), 554 gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(),
557 ram_in_gr_wfi_ptr_hi_f(addr_hi)); 555 ram_in_gr_wfi_ptr_hi_f(addr_hi));
558 556
559 gk20a_mm_l2_invalidate(c->g);
560
561 return 0; 557 return 0;
562} 558}
563 559
@@ -600,8 +596,6 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g,
600 596
601 vunmap(ch_ctx->patch_ctx.cpu_va); 597 vunmap(ch_ctx->patch_ctx.cpu_va);
602 ch_ctx->patch_ctx.cpu_va = NULL; 598 ch_ctx->patch_ctx.cpu_va = NULL;
603
604 gk20a_mm_l2_invalidate(g);
605 return 0; 599 return 0;
606} 600}
607 601
@@ -718,10 +712,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
718 } 712 }
719 } 713 }
720 714
721 /* Channel gr_ctx buffer is gpu cacheable.
722 Flush and invalidate before cpu update. */
723 gk20a_mm_fb_flush(g); 715 gk20a_mm_fb_flush(g);
724 gk20a_mm_l2_flush(g, true);
725 716
726 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, 717 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0,
727 ch_ctx->zcull_ctx.ctx_sw_mode); 718 ch_ctx->zcull_ctx.ctx_sw_mode);
@@ -736,7 +727,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c,
736 goto clean_up; 727 goto clean_up;
737 } 728 }
738 } 729 }
739 gk20a_mm_l2_invalidate(g);
740 730
741clean_up: 731clean_up:
742 vunmap(ctx_ptr); 732 vunmap(ctx_ptr);
@@ -1466,10 +1456,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1466 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1456 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1467 ctx_header_words >>= 2; 1457 ctx_header_words >>= 2;
1468 1458
1469 /* Channel gr_ctx buffer is gpu cacheable.
1470 Flush before cpu read. */
1471 gk20a_mm_fb_flush(g); 1459 gk20a_mm_fb_flush(g);
1472 gk20a_mm_l2_flush(g, false);
1473 1460
1474 for (i = 0; i < ctx_header_words; i++) { 1461 for (i = 0; i < ctx_header_words; i++) {
1475 data = gk20a_mem_rd32(ctx_ptr, i); 1462 data = gk20a_mem_rd32(ctx_ptr, i);
@@ -1504,8 +1491,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1504 1491
1505 gr->ctx_vars.golden_image_initialized = true; 1492 gr->ctx_vars.golden_image_initialized = true;
1506 1493
1507 gk20a_mm_l2_invalidate(g);
1508
1509 gk20a_writel(g, gr_fecs_current_ctx_r(), 1494 gk20a_writel(g, gr_fecs_current_ctx_r(),
1510 gr_fecs_current_ctx_valid_false_f()); 1495 gr_fecs_current_ctx_valid_false_f());
1511 1496
@@ -1537,7 +1522,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1537 /* Channel gr_ctx buffer is gpu cacheable. 1522 /* Channel gr_ctx buffer is gpu cacheable.
1538 Flush and invalidate before cpu update. */ 1523 Flush and invalidate before cpu update. */
1539 gk20a_mm_fb_flush(g); 1524 gk20a_mm_fb_flush(g);
1540 gk20a_mm_l2_flush(g, true);
1541 1525
1542 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 1526 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
1543 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 1527 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
@@ -1555,8 +1539,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1555 1539
1556 vunmap(ctx_ptr); 1540 vunmap(ctx_ptr);
1557 1541
1558 gk20a_mm_l2_invalidate(g);
1559
1560 return 0; 1542 return 0;
1561} 1543}
1562 1544
@@ -1580,7 +1562,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1580 /* Channel gr_ctx buffer is gpu cacheable. 1562 /* Channel gr_ctx buffer is gpu cacheable.
1581 Flush and invalidate before cpu update. */ 1563 Flush and invalidate before cpu update. */
1582 gk20a_mm_fb_flush(g); 1564 gk20a_mm_fb_flush(g);
1583 gk20a_mm_l2_flush(g, true);
1584 1565
1585 ctx_ptr = vmap(ch_ctx->gr_ctx.pages, 1566 ctx_ptr = vmap(ch_ctx->gr_ctx.pages,
1586 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, 1567 PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT,
@@ -1636,8 +1617,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1636 1617
1637 vunmap(ctx_ptr); 1618 vunmap(ctx_ptr);
1638 1619
1639 gk20a_mm_l2_invalidate(g);
1640
1641 if (tegra_platform_is_linsim()) { 1620 if (tegra_platform_is_linsim()) {
1642 u32 inst_base_ptr = 1621 u32 inst_base_ptr =
1643 u64_lo32(c->inst_block.cpu_pa 1622 u64_lo32(c->inst_block.cpu_pa
@@ -2716,7 +2695,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
2716 } 2695 }
2717 c->first_init = true; 2696 c->first_init = true;
2718 } 2697 }
2719 gk20a_mm_l2_invalidate(g);
2720 2698
2721 c->num_objects++; 2699 c->num_objects++;
2722 2700
@@ -4223,8 +4201,6 @@ restore_fe_go_idle:
4223 sw_method_init->l[i].addr); 4201 sw_method_init->l[i].addr);
4224 } 4202 }
4225 4203
4226 gk20a_mm_l2_invalidate(g);
4227
4228 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); 4204 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4229 if (err) 4205 if (err)
4230 goto out; 4206 goto out;
@@ -5797,10 +5773,6 @@ int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
5797 5773
5798 /* we're not caching these on cpu side, 5774 /* we're not caching these on cpu side,
5799 but later watch for it */ 5775 but later watch for it */
5800
5801 /* the l2 invalidate in the patch_write
5802 * would be too early for this? */
5803 gk20a_mm_l2_invalidate(g);
5804 return 0; 5776 return 0;
5805 } 5777 }
5806 } 5778 }
@@ -6538,10 +6510,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
6538 goto cleanup; 6510 goto cleanup;
6539 } 6511 }
6540 6512
6541 /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate.
6542 * There should be no on-going/in-flight references by the gpu now. */
6543 gk20a_mm_fb_flush(g); 6513 gk20a_mm_fb_flush(g);
6544 gk20a_mm_l2_flush(g, true);
6545 6514
6546 /* write to appropriate place in context image, 6515 /* write to appropriate place in context image,
6547 * first have to figure out where that really is */ 6516 * first have to figure out where that really is */
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 234b43c2..fc37bbd4 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1937,8 +1937,6 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
1937 vaddr += pgsz; 1937 vaddr += pgsz;
1938 } 1938 }
1939 1939
1940 gk20a_mm_l2_flush(mm->g, true);
1941
1942 return 0; 1940 return 0;
1943 1941
1944err_unmap: 1942err_unmap: