diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2014-05-08 08:13:32 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:49 -0400 |
commit | 48239f5f8cb5763cf4b6dc5db4668257da153cf9 (patch) | |
tree | b12e13c981efe9db4e22cfe696bbd3c62ab77089 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |
parent | 3e5c123862c87e22311c21558178f287f85ecb5d (diff) |
gpu: nvgpu: Prune redundant cache maintenance
Remove redundant cache maintenance operations. Instance blocks and
graphics context buffers are uncached, so they do not need any cache
maintenance.
Bug 1421824
Change-Id: Ie0be67bf0be493d9ec9e6f8226f2f9359cba9f54
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/406948
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 31 |
1 files changed, 0 insertions, 31 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3dbf1435..466f6eed 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -538,9 +538,7 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
538 | 538 | ||
539 | gk20a_dbg_fn(""); | 539 | gk20a_dbg_fn(""); |
540 | 540 | ||
541 | /* flush gpu_va before commit */ | ||
542 | gk20a_mm_fb_flush(c->g); | 541 | gk20a_mm_fb_flush(c->g); |
543 | gk20a_mm_l2_flush(c->g, true); | ||
544 | 542 | ||
545 | inst_ptr = c->inst_block.cpuva; | 543 | inst_ptr = c->inst_block.cpuva; |
546 | if (!inst_ptr) | 544 | if (!inst_ptr) |
@@ -556,8 +554,6 @@ static int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
556 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), | 554 | gk20a_mem_wr32(inst_ptr, ram_in_gr_wfi_ptr_hi_w(), |
557 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); | 555 | ram_in_gr_wfi_ptr_hi_f(addr_hi)); |
558 | 556 | ||
559 | gk20a_mm_l2_invalidate(c->g); | ||
560 | |||
561 | return 0; | 557 | return 0; |
562 | } | 558 | } |
563 | 559 | ||
@@ -600,8 +596,6 @@ int gr_gk20a_ctx_patch_write_end(struct gk20a *g, | |||
600 | 596 | ||
601 | vunmap(ch_ctx->patch_ctx.cpu_va); | 597 | vunmap(ch_ctx->patch_ctx.cpu_va); |
602 | ch_ctx->patch_ctx.cpu_va = NULL; | 598 | ch_ctx->patch_ctx.cpu_va = NULL; |
603 | |||
604 | gk20a_mm_l2_invalidate(g); | ||
605 | return 0; | 599 | return 0; |
606 | } | 600 | } |
607 | 601 | ||
@@ -718,10 +712,7 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, | |||
718 | } | 712 | } |
719 | } | 713 | } |
720 | 714 | ||
721 | /* Channel gr_ctx buffer is gpu cacheable. | ||
722 | Flush and invalidate before cpu update. */ | ||
723 | gk20a_mm_fb_flush(g); | 715 | gk20a_mm_fb_flush(g); |
724 | gk20a_mm_l2_flush(g, true); | ||
725 | 716 | ||
726 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, | 717 | gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_zcull_o(), 0, |
727 | ch_ctx->zcull_ctx.ctx_sw_mode); | 718 | ch_ctx->zcull_ctx.ctx_sw_mode); |
@@ -736,7 +727,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c, | |||
736 | goto clean_up; | 727 | goto clean_up; |
737 | } | 728 | } |
738 | } | 729 | } |
739 | gk20a_mm_l2_invalidate(g); | ||
740 | 730 | ||
741 | clean_up: | 731 | clean_up: |
742 | vunmap(ctx_ptr); | 732 | vunmap(ctx_ptr); |
@@ -1466,10 +1456,7 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1466 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); | 1456 | ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); |
1467 | ctx_header_words >>= 2; | 1457 | ctx_header_words >>= 2; |
1468 | 1458 | ||
1469 | /* Channel gr_ctx buffer is gpu cacheable. | ||
1470 | Flush before cpu read. */ | ||
1471 | gk20a_mm_fb_flush(g); | 1459 | gk20a_mm_fb_flush(g); |
1472 | gk20a_mm_l2_flush(g, false); | ||
1473 | 1460 | ||
1474 | for (i = 0; i < ctx_header_words; i++) { | 1461 | for (i = 0; i < ctx_header_words; i++) { |
1475 | data = gk20a_mem_rd32(ctx_ptr, i); | 1462 | data = gk20a_mem_rd32(ctx_ptr, i); |
@@ -1504,8 +1491,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1504 | 1491 | ||
1505 | gr->ctx_vars.golden_image_initialized = true; | 1492 | gr->ctx_vars.golden_image_initialized = true; |
1506 | 1493 | ||
1507 | gk20a_mm_l2_invalidate(g); | ||
1508 | |||
1509 | gk20a_writel(g, gr_fecs_current_ctx_r(), | 1494 | gk20a_writel(g, gr_fecs_current_ctx_r(), |
1510 | gr_fecs_current_ctx_valid_false_f()); | 1495 | gr_fecs_current_ctx_valid_false_f()); |
1511 | 1496 | ||
@@ -1537,7 +1522,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1537 | /* Channel gr_ctx buffer is gpu cacheable. | 1522 | /* Channel gr_ctx buffer is gpu cacheable. |
1538 | Flush and invalidate before cpu update. */ | 1523 | Flush and invalidate before cpu update. */ |
1539 | gk20a_mm_fb_flush(g); | 1524 | gk20a_mm_fb_flush(g); |
1540 | gk20a_mm_l2_flush(g, true); | ||
1541 | 1525 | ||
1542 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | 1526 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, |
1543 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | 1527 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, |
@@ -1555,8 +1539,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1555 | 1539 | ||
1556 | vunmap(ctx_ptr); | 1540 | vunmap(ctx_ptr); |
1557 | 1541 | ||
1558 | gk20a_mm_l2_invalidate(g); | ||
1559 | |||
1560 | return 0; | 1542 | return 0; |
1561 | } | 1543 | } |
1562 | 1544 | ||
@@ -1580,7 +1562,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1580 | /* Channel gr_ctx buffer is gpu cacheable. | 1562 | /* Channel gr_ctx buffer is gpu cacheable. |
1581 | Flush and invalidate before cpu update. */ | 1563 | Flush and invalidate before cpu update. */ |
1582 | gk20a_mm_fb_flush(g); | 1564 | gk20a_mm_fb_flush(g); |
1583 | gk20a_mm_l2_flush(g, true); | ||
1584 | 1565 | ||
1585 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, | 1566 | ctx_ptr = vmap(ch_ctx->gr_ctx.pages, |
1586 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, | 1567 | PAGE_ALIGN(ch_ctx->gr_ctx.size) >> PAGE_SHIFT, |
@@ -1636,8 +1617,6 @@ static int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1636 | 1617 | ||
1637 | vunmap(ctx_ptr); | 1618 | vunmap(ctx_ptr); |
1638 | 1619 | ||
1639 | gk20a_mm_l2_invalidate(g); | ||
1640 | |||
1641 | if (tegra_platform_is_linsim()) { | 1620 | if (tegra_platform_is_linsim()) { |
1642 | u32 inst_base_ptr = | 1621 | u32 inst_base_ptr = |
1643 | u64_lo32(c->inst_block.cpu_pa | 1622 | u64_lo32(c->inst_block.cpu_pa |
@@ -2716,7 +2695,6 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | |||
2716 | } | 2695 | } |
2717 | c->first_init = true; | 2696 | c->first_init = true; |
2718 | } | 2697 | } |
2719 | gk20a_mm_l2_invalidate(g); | ||
2720 | 2698 | ||
2721 | c->num_objects++; | 2699 | c->num_objects++; |
2722 | 2700 | ||
@@ -4223,8 +4201,6 @@ restore_fe_go_idle: | |||
4223 | sw_method_init->l[i].addr); | 4201 | sw_method_init->l[i].addr); |
4224 | } | 4202 | } |
4225 | 4203 | ||
4226 | gk20a_mm_l2_invalidate(g); | ||
4227 | |||
4228 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | 4204 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); |
4229 | if (err) | 4205 | if (err) |
4230 | goto out; | 4206 | goto out; |
@@ -5797,10 +5773,6 @@ int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
5797 | 5773 | ||
5798 | /* we're not caching these on cpu side, | 5774 | /* we're not caching these on cpu side, |
5799 | but later watch for it */ | 5775 | but later watch for it */ |
5800 | |||
5801 | /* the l2 invalidate in the patch_write | ||
5802 | * would be too early for this? */ | ||
5803 | gk20a_mm_l2_invalidate(g); | ||
5804 | return 0; | 5776 | return 0; |
5805 | } | 5777 | } |
5806 | } | 5778 | } |
@@ -6538,10 +6510,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
6538 | goto cleanup; | 6510 | goto cleanup; |
6539 | } | 6511 | } |
6540 | 6512 | ||
6541 | /* Channel gr_ctx buffer is gpu cacheable; so flush and invalidate. | ||
6542 | * There should be no on-going/in-flight references by the gpu now. */ | ||
6543 | gk20a_mm_fb_flush(g); | 6513 | gk20a_mm_fb_flush(g); |
6544 | gk20a_mm_l2_flush(g, true); | ||
6545 | 6514 | ||
6546 | /* write to appropriate place in context image, | 6515 | /* write to appropriate place in context image, |
6547 | * first have to figure out where that really is */ | 6516 | * first have to figure out where that really is */ |