summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-09-18 14:39:32 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-19 20:45:29 -0400
commit1132fd2a12a48271f94e995c0466c48b8228c185 (patch)
treeeaa69417b39f228995dad657146776eec4884ee3 /drivers/gpu/nvgpu/gk20a
parentc4370d7deff6e3545157e06f51df2fef605a18cc (diff)
gpu: nvgpu: changes related handling ctx header
ctx header holds only gpu va for each address space. All other information will be held in main context. Ctx header will have gpu va for following fields: ctxsw_prog_main_image_context_buffer_ptr ctxsw_prog_main_image_context_buffer_ptr_hi ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_pm_ptr ctxsw_prog_main_image_pm_ptr_hi ctxsw_prog_main_image_full_preemption_ptr_hi ctxsw_prog_main_image_full_preemption_ptr ctxsw_prog_main_image_full_preemption_ptr_xxxx0 ctxsw_prog_main_image_full_preemption_ptr_xxxx0_v ctxsw_prog_main_image_patch_adr_lo ctxsw_prog_main_image_patch_adr_hi Changes done as part of this CL: - Read ctx_id from from main context header - Golden context creation: Use gold_mem for for golden context creation and copy golden context from save gold local memory to main context. No need to restore golden context to context header. - Write ctx_patch_count and smpc_ctxsw_mode in main context header only. - Update preemption mode in main context header and preemption buffer va in context header. - Updated image patch buffer va in context header. Bug 1958308 Change-Id: Ic076aad8b1802f76f941d2d15cb9a8c07308e3e8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1562680 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c122
1 files changed, 38 insertions, 84 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 82c9fa89..263d8bac 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -104,19 +104,12 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
104 u32 *ctx_id) 104 u32 *ctx_id)
105{ 105{
106 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 106 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
107 struct ctx_header_desc *ctx_header = &ch_ctx->ctx_header; 107 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem;
108 struct nvgpu_mem *ctx_header_mem = &ctx_header->mem;
109 struct nvgpu_mem *mem;
110 108
111 /* Channel gr_ctx buffer is gpu cacheable. 109 /* Channel gr_ctx buffer is gpu cacheable.
112 Flush and invalidate before cpu update. */ 110 Flush and invalidate before cpu update. */
113 g->ops.mm.l2_flush(g, true); 111 g->ops.mm.l2_flush(g, true);
114 112
115 if (ctx_header_mem->gpu_va)
116 mem = ctx_header_mem;
117 else
118 mem = &ch_ctx->gr_ctx->mem;
119
120 if (nvgpu_mem_begin(g, mem)) 113 if (nvgpu_mem_begin(g, mem))
121 return -ENOMEM; 114 return -ENOMEM;
122 115
@@ -681,20 +674,10 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
681void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 674void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
682 struct channel_ctx_gk20a *ch_ctx) 675 struct channel_ctx_gk20a *ch_ctx)
683{ 676{
684 struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
685 struct nvgpu_mem *ctxheader = &ctx->mem;
686
687 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); 677 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);
688 678
689 /* Write context count to context image if it is mapped */ 679 /* Write context count to context image if it is mapped */
690 if (ctxheader->gpu_va) { 680 if (ch_ctx->gr_ctx->mem.cpu_va) {
691
692 if (ctxheader->cpu_va)
693 nvgpu_mem_wr(g, ctxheader,
694 ctxsw_prog_main_image_patch_count_o(),
695 ch_ctx->patch_ctx.data_count);
696
697 } else if (ch_ctx->gr_ctx->mem.cpu_va) {
698 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, 681 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
699 ctxsw_prog_main_image_patch_count_o(), 682 ctxsw_prog_main_image_patch_count_o(),
700 ch_ctx->patch_ctx.data_count); 683 ch_ctx->patch_ctx.data_count);
@@ -1357,8 +1340,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1357 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; 1340 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1358 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; 1341 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
1359 u32 last_method_data = 0; 1342 u32 last_method_data = 0;
1360 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1361 struct nvgpu_mem *ctxheader = &ctx->mem;
1362 1343
1363 gk20a_dbg_fn(""); 1344 gk20a_dbg_fn("");
1364 1345
@@ -1549,14 +1530,7 @@ restore_fe_go_idle:
1549 nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), 1530 nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
1550 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); 1531 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());
1551 1532
1552 if (nvgpu_mem_begin(g, ctxheader)) 1533 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1553 goto clean_up;
1554
1555 if (ctxheader->gpu_va)
1556 g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
1557 else
1558 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1559 nvgpu_mem_end(g, ctxheader);
1560 1534
1561 err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1535 err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1562 if (err) 1536 if (err)
@@ -1564,8 +1538,7 @@ restore_fe_go_idle:
1564 1538
1565 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); 1539 gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());
1566 1540
1567 if (nvgpu_mem_begin(g, ctxheader)) 1541
1568 goto clean_up;
1569 1542
1570 if (gr->ctx_vars.local_golden_image == NULL) { 1543 if (gr->ctx_vars.local_golden_image == NULL) {
1571 1544
@@ -1576,17 +1549,11 @@ restore_fe_go_idle:
1576 err = -ENOMEM; 1549 err = -ENOMEM;
1577 goto clean_up; 1550 goto clean_up;
1578 } 1551 }
1552 nvgpu_mem_rd_n(g, gold_mem, 0,
1553 gr->ctx_vars.local_golden_image,
1554 gr->ctx_vars.golden_image_size);
1579 1555
1580 if (ctxheader->gpu_va)
1581 nvgpu_mem_rd_n(g, ctxheader, 0,
1582 gr->ctx_vars.local_golden_image,
1583 gr->ctx_vars.golden_image_size);
1584 else
1585 nvgpu_mem_rd_n(g, gold_mem, 0,
1586 gr->ctx_vars.local_golden_image,
1587 gr->ctx_vars.golden_image_size);
1588 } 1556 }
1589 nvgpu_mem_end(g, ctxheader);
1590 1557
1591 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); 1558 err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
1592 if (err) 1559 if (err)
@@ -1618,8 +1585,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1618 struct nvgpu_mem *mem; 1585 struct nvgpu_mem *mem;
1619 u32 data; 1586 u32 data;
1620 int ret; 1587 int ret;
1621 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1622 struct nvgpu_mem *ctxheader = &ctx->mem;
1623 1588
1624 gk20a_dbg_fn(""); 1589 gk20a_dbg_fn("");
1625 1590
@@ -1651,34 +1616,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1651 goto out; 1616 goto out;
1652 } 1617 }
1653 1618
1654 if (nvgpu_mem_begin(g, ctxheader)) { 1619 data = nvgpu_mem_rd(g, mem,
1655 ret = -ENOMEM; 1620 ctxsw_prog_main_image_pm_o());
1656 goto clean_up_mem;
1657 }
1658 if (ctxheader->gpu_va)
1659 data = nvgpu_mem_rd(g, ctxheader,
1660 ctxsw_prog_main_image_pm_o());
1661 else
1662 data = nvgpu_mem_rd(g, mem,
1663 ctxsw_prog_main_image_pm_o());
1664 1621
1665 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); 1622 data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
1666 data |= enable_smpc_ctxsw ? 1623 data |= enable_smpc_ctxsw ?
1667 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : 1624 ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
1668 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); 1625 ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
1669 1626
1670 if (ctxheader->gpu_va) 1627 nvgpu_mem_wr(g, mem,
1671 nvgpu_mem_wr(g, ctxheader, 1628 ctxsw_prog_main_image_pm_o(), data);
1672 ctxsw_prog_main_image_pm_o(),
1673 data);
1674 else
1675 nvgpu_mem_wr(g, mem,
1676 ctxsw_prog_main_image_pm_o(),
1677 data);
1678
1679 nvgpu_mem_end(g, ctxheader);
1680 1629
1681clean_up_mem:
1682 nvgpu_mem_end(g, mem); 1630 nvgpu_mem_end(g, mem);
1683out: 1631out:
1684 gk20a_enable_channel_tsg(g, c); 1632 gk20a_enable_channel_tsg(g, c);
@@ -1862,14 +1810,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1862 goto clean_up_mem; 1810 goto clean_up_mem;
1863 } 1811 }
1864 1812
1865 if (ctxheader->gpu_va) { 1813 nvgpu_mem_wr_n(g, mem, 0,
1866 if (g->ops.gr.restore_context_header) 1814 gr->ctx_vars.local_golden_image,
1867 g->ops.gr.restore_context_header(g, ctxheader); 1815 gr->ctx_vars.golden_image_size);
1868 } else {
1869 nvgpu_mem_wr_n(g, mem, 0,
1870 gr->ctx_vars.local_golden_image,
1871 gr->ctx_vars.golden_image_size);
1872 }
1873 1816
1874 if (g->ops.gr.init_ctxsw_hdr_data) 1817 if (g->ops.gr.init_ctxsw_hdr_data)
1875 g->ops.gr.init_ctxsw_hdr_data(g, mem); 1818 g->ops.gr.init_ctxsw_hdr_data(g, mem);
@@ -1923,19 +1866,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1923 1866
1924 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), 1867 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1925 ch_ctx->patch_ctx.data_count); 1868 ch_ctx->patch_ctx.data_count);
1926 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
1927 virt_addr_lo);
1928 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
1929 virt_addr_hi);
1930 1869
1931 if (ctxheader->gpu_va) { 1870 if (ctxheader->gpu_va) {
1932 nvgpu_mem_wr(g, ctxheader, 1871 nvgpu_mem_wr(g, ctxheader,
1933 ctxsw_prog_main_image_patch_count_o(), 1872 ctxsw_prog_main_image_patch_adr_lo_o(),
1934 ch_ctx->patch_ctx.data_count); 1873 virt_addr_lo);
1935 nvgpu_mem_wr(g, ctxheader, 1874 nvgpu_mem_wr(g, ctxheader,
1875 ctxsw_prog_main_image_patch_adr_hi_o(),
1876 virt_addr_hi);
1877 } else {
1878 nvgpu_mem_wr(g, mem,
1936 ctxsw_prog_main_image_patch_adr_lo_o(), 1879 ctxsw_prog_main_image_patch_adr_lo_o(),
1937 virt_addr_lo); 1880 virt_addr_lo);
1938 nvgpu_mem_wr(g, ctxheader, 1881 nvgpu_mem_wr(g, mem,
1939 ctxsw_prog_main_image_patch_adr_hi_o(), 1882 ctxsw_prog_main_image_patch_adr_hi_o(),
1940 virt_addr_hi); 1883 virt_addr_hi);
1941 } 1884 }
@@ -6606,6 +6549,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6606 u32 *ovr_perf_regs = NULL; 6549 u32 *ovr_perf_regs = NULL;
6607 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 6550 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6608 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 6551 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6552 struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
6553 struct nvgpu_mem *ctxheader = &ctx->mem;
6609 6554
6610 g->ops.gr.init_ovr_sm_dsm_perf(); 6555 g->ops.gr.init_ovr_sm_dsm_perf();
6611 g->ops.gr.init_sm_dsm_reg_info(); 6556 g->ops.gr.init_sm_dsm_reg_info();
@@ -6640,12 +6585,21 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6640 nvgpu_mem_wr(g, mem, 6585 nvgpu_mem_wr(g, mem,
6641 ctxsw_prog_main_image_patch_count_o(), 6586 ctxsw_prog_main_image_patch_count_o(),
6642 ch_ctx->patch_ctx.data_count); 6587 ch_ctx->patch_ctx.data_count);
6643 nvgpu_mem_wr(g, mem, 6588 if (ctxheader->gpu_va) {
6644 ctxsw_prog_main_image_patch_adr_lo_o(), 6589 nvgpu_mem_wr(g, ctxheader,
6645 vaddr_lo); 6590 ctxsw_prog_main_image_patch_adr_lo_o(),
6646 nvgpu_mem_wr(g, mem, 6591 vaddr_lo);
6647 ctxsw_prog_main_image_patch_adr_hi_o(), 6592 nvgpu_mem_wr(g, ctxheader,
6648 vaddr_hi); 6593 ctxsw_prog_main_image_patch_adr_hi_o(),
6594 vaddr_hi);
6595 } else {
6596 nvgpu_mem_wr(g, mem,
6597 ctxsw_prog_main_image_patch_adr_lo_o(),
6598 vaddr_lo);
6599 nvgpu_mem_wr(g, mem,
6600 ctxsw_prog_main_image_patch_adr_hi_o(),
6601 vaddr_hi);
6602 }
6649 6603
6650 /* we're not caching these on cpu side, 6604 /* we're not caching these on cpu side,
6651 but later watch for it */ 6605 but later watch for it */