diff options
author | seshendra Gadagottu <sgadagottu@nvidia.com> | 2017-09-18 14:39:32 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-19 20:45:29 -0400 |
commit | 1132fd2a12a48271f94e995c0466c48b8228c185 (patch) | |
tree | eaa69417b39f228995dad657146776eec4884ee3 /drivers | |
parent | c4370d7deff6e3545157e06f51df2fef605a18cc (diff) |
gpu: nvgpu: changes related handling ctx header
ctx header holds only gpu va for each address space.
All other information will be held in main
context. Ctx header will have gpu va for following
fields:
ctxsw_prog_main_image_context_buffer_ptr
ctxsw_prog_main_image_context_buffer_ptr_hi
ctxsw_prog_main_image_zcull_ptr
ctxsw_prog_main_image_zcull_ptr
ctxsw_prog_main_image_pm_ptr
ctxsw_prog_main_image_pm_ptr_hi
ctxsw_prog_main_image_full_preemption_ptr_hi
ctxsw_prog_main_image_full_preemption_ptr
ctxsw_prog_main_image_full_preemption_ptr_xxxx0
ctxsw_prog_main_image_full_preemption_ptr_xxxx0_v
ctxsw_prog_main_image_patch_adr_lo
ctxsw_prog_main_image_patch_adr_hi
Changes done as part of this CL:
- Read ctx_id from from main context header
- Golden context creation:
Use gold_mem for for golden context creation
and copy golden context from save gold local
memory to main context. No need to restore
golden context to context header.
- Write ctx_patch_count and smpc_ctxsw_mode in
main context header only.
- Update preemption mode in main context header and
preemption buffer va in context header.
- Updated image patch buffer va in context header.
Bug 1958308
Change-Id: Ic076aad8b1802f76f941d2d15cb9a8c07308e3e8
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1562680
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 122 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 20 |
2 files changed, 50 insertions, 92 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 82c9fa89..263d8bac 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -104,19 +104,12 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
104 | u32 *ctx_id) | 104 | u32 *ctx_id) |
105 | { | 105 | { |
106 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 106 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; |
107 | struct ctx_header_desc *ctx_header = &ch_ctx->ctx_header; | 107 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; |
108 | struct nvgpu_mem *ctx_header_mem = &ctx_header->mem; | ||
109 | struct nvgpu_mem *mem; | ||
110 | 108 | ||
111 | /* Channel gr_ctx buffer is gpu cacheable. | 109 | /* Channel gr_ctx buffer is gpu cacheable. |
112 | Flush and invalidate before cpu update. */ | 110 | Flush and invalidate before cpu update. */ |
113 | g->ops.mm.l2_flush(g, true); | 111 | g->ops.mm.l2_flush(g, true); |
114 | 112 | ||
115 | if (ctx_header_mem->gpu_va) | ||
116 | mem = ctx_header_mem; | ||
117 | else | ||
118 | mem = &ch_ctx->gr_ctx->mem; | ||
119 | |||
120 | if (nvgpu_mem_begin(g, mem)) | 113 | if (nvgpu_mem_begin(g, mem)) |
121 | return -ENOMEM; | 114 | return -ENOMEM; |
122 | 115 | ||
@@ -681,20 +674,10 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | |||
681 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 674 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
682 | struct channel_ctx_gk20a *ch_ctx) | 675 | struct channel_ctx_gk20a *ch_ctx) |
683 | { | 676 | { |
684 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | ||
685 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
686 | |||
687 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); | 677 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); |
688 | 678 | ||
689 | /* Write context count to context image if it is mapped */ | 679 | /* Write context count to context image if it is mapped */ |
690 | if (ctxheader->gpu_va) { | 680 | if (ch_ctx->gr_ctx->mem.cpu_va) { |
691 | |||
692 | if (ctxheader->cpu_va) | ||
693 | nvgpu_mem_wr(g, ctxheader, | ||
694 | ctxsw_prog_main_image_patch_count_o(), | ||
695 | ch_ctx->patch_ctx.data_count); | ||
696 | |||
697 | } else if (ch_ctx->gr_ctx->mem.cpu_va) { | ||
698 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, | 681 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, |
699 | ctxsw_prog_main_image_patch_count_o(), | 682 | ctxsw_prog_main_image_patch_count_o(), |
700 | ch_ctx->patch_ctx.data_count); | 683 | ch_ctx->patch_ctx.data_count); |
@@ -1357,8 +1340,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1357 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; | 1340 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; |
1358 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; | 1341 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; |
1359 | u32 last_method_data = 0; | 1342 | u32 last_method_data = 0; |
1360 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
1361 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
1362 | 1343 | ||
1363 | gk20a_dbg_fn(""); | 1344 | gk20a_dbg_fn(""); |
1364 | 1345 | ||
@@ -1549,14 +1530,7 @@ restore_fe_go_idle: | |||
1549 | nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), | 1530 | nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(), |
1550 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); | 1531 | ctxsw_prog_main_image_zcull_mode_no_ctxsw_v()); |
1551 | 1532 | ||
1552 | if (nvgpu_mem_begin(g, ctxheader)) | 1533 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); |
1553 | goto clean_up; | ||
1554 | |||
1555 | if (ctxheader->gpu_va) | ||
1556 | g->ops.gr.write_zcull_ptr(g, ctxheader, 0); | ||
1557 | else | ||
1558 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); | ||
1559 | nvgpu_mem_end(g, ctxheader); | ||
1560 | 1534 | ||
1561 | err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1535 | err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1562 | if (err) | 1536 | if (err) |
@@ -1564,8 +1538,7 @@ restore_fe_go_idle: | |||
1564 | 1538 | ||
1565 | gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); | 1539 | gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v()); |
1566 | 1540 | ||
1567 | if (nvgpu_mem_begin(g, ctxheader)) | 1541 | |
1568 | goto clean_up; | ||
1569 | 1542 | ||
1570 | if (gr->ctx_vars.local_golden_image == NULL) { | 1543 | if (gr->ctx_vars.local_golden_image == NULL) { |
1571 | 1544 | ||
@@ -1576,17 +1549,11 @@ restore_fe_go_idle: | |||
1576 | err = -ENOMEM; | 1549 | err = -ENOMEM; |
1577 | goto clean_up; | 1550 | goto clean_up; |
1578 | } | 1551 | } |
1552 | nvgpu_mem_rd_n(g, gold_mem, 0, | ||
1553 | gr->ctx_vars.local_golden_image, | ||
1554 | gr->ctx_vars.golden_image_size); | ||
1579 | 1555 | ||
1580 | if (ctxheader->gpu_va) | ||
1581 | nvgpu_mem_rd_n(g, ctxheader, 0, | ||
1582 | gr->ctx_vars.local_golden_image, | ||
1583 | gr->ctx_vars.golden_image_size); | ||
1584 | else | ||
1585 | nvgpu_mem_rd_n(g, gold_mem, 0, | ||
1586 | gr->ctx_vars.local_golden_image, | ||
1587 | gr->ctx_vars.golden_image_size); | ||
1588 | } | 1556 | } |
1589 | nvgpu_mem_end(g, ctxheader); | ||
1590 | 1557 | ||
1591 | err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); | 1558 | err = g->ops.gr.commit_inst(c, gr_mem->gpu_va); |
1592 | if (err) | 1559 | if (err) |
@@ -1618,8 +1585,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1618 | struct nvgpu_mem *mem; | 1585 | struct nvgpu_mem *mem; |
1619 | u32 data; | 1586 | u32 data; |
1620 | int ret; | 1587 | int ret; |
1621 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
1622 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
1623 | 1588 | ||
1624 | gk20a_dbg_fn(""); | 1589 | gk20a_dbg_fn(""); |
1625 | 1590 | ||
@@ -1651,34 +1616,17 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1651 | goto out; | 1616 | goto out; |
1652 | } | 1617 | } |
1653 | 1618 | ||
1654 | if (nvgpu_mem_begin(g, ctxheader)) { | 1619 | data = nvgpu_mem_rd(g, mem, |
1655 | ret = -ENOMEM; | 1620 | ctxsw_prog_main_image_pm_o()); |
1656 | goto clean_up_mem; | ||
1657 | } | ||
1658 | if (ctxheader->gpu_va) | ||
1659 | data = nvgpu_mem_rd(g, ctxheader, | ||
1660 | ctxsw_prog_main_image_pm_o()); | ||
1661 | else | ||
1662 | data = nvgpu_mem_rd(g, mem, | ||
1663 | ctxsw_prog_main_image_pm_o()); | ||
1664 | 1621 | ||
1665 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); | 1622 | data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); |
1666 | data |= enable_smpc_ctxsw ? | 1623 | data |= enable_smpc_ctxsw ? |
1667 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : | 1624 | ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : |
1668 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); | 1625 | ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); |
1669 | 1626 | ||
1670 | if (ctxheader->gpu_va) | 1627 | nvgpu_mem_wr(g, mem, |
1671 | nvgpu_mem_wr(g, ctxheader, | 1628 | ctxsw_prog_main_image_pm_o(), data); |
1672 | ctxsw_prog_main_image_pm_o(), | ||
1673 | data); | ||
1674 | else | ||
1675 | nvgpu_mem_wr(g, mem, | ||
1676 | ctxsw_prog_main_image_pm_o(), | ||
1677 | data); | ||
1678 | |||
1679 | nvgpu_mem_end(g, ctxheader); | ||
1680 | 1629 | ||
1681 | clean_up_mem: | ||
1682 | nvgpu_mem_end(g, mem); | 1630 | nvgpu_mem_end(g, mem); |
1683 | out: | 1631 | out: |
1684 | gk20a_enable_channel_tsg(g, c); | 1632 | gk20a_enable_channel_tsg(g, c); |
@@ -1862,14 +1810,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1862 | goto clean_up_mem; | 1810 | goto clean_up_mem; |
1863 | } | 1811 | } |
1864 | 1812 | ||
1865 | if (ctxheader->gpu_va) { | 1813 | nvgpu_mem_wr_n(g, mem, 0, |
1866 | if (g->ops.gr.restore_context_header) | 1814 | gr->ctx_vars.local_golden_image, |
1867 | g->ops.gr.restore_context_header(g, ctxheader); | 1815 | gr->ctx_vars.golden_image_size); |
1868 | } else { | ||
1869 | nvgpu_mem_wr_n(g, mem, 0, | ||
1870 | gr->ctx_vars.local_golden_image, | ||
1871 | gr->ctx_vars.golden_image_size); | ||
1872 | } | ||
1873 | 1816 | ||
1874 | if (g->ops.gr.init_ctxsw_hdr_data) | 1817 | if (g->ops.gr.init_ctxsw_hdr_data) |
1875 | g->ops.gr.init_ctxsw_hdr_data(g, mem); | 1818 | g->ops.gr.init_ctxsw_hdr_data(g, mem); |
@@ -1923,19 +1866,19 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1923 | 1866 | ||
1924 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), | 1867 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), |
1925 | ch_ctx->patch_ctx.data_count); | 1868 | ch_ctx->patch_ctx.data_count); |
1926 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(), | ||
1927 | virt_addr_lo); | ||
1928 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(), | ||
1929 | virt_addr_hi); | ||
1930 | 1869 | ||
1931 | if (ctxheader->gpu_va) { | 1870 | if (ctxheader->gpu_va) { |
1932 | nvgpu_mem_wr(g, ctxheader, | 1871 | nvgpu_mem_wr(g, ctxheader, |
1933 | ctxsw_prog_main_image_patch_count_o(), | 1872 | ctxsw_prog_main_image_patch_adr_lo_o(), |
1934 | ch_ctx->patch_ctx.data_count); | 1873 | virt_addr_lo); |
1935 | nvgpu_mem_wr(g, ctxheader, | 1874 | nvgpu_mem_wr(g, ctxheader, |
1875 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
1876 | virt_addr_hi); | ||
1877 | } else { | ||
1878 | nvgpu_mem_wr(g, mem, | ||
1936 | ctxsw_prog_main_image_patch_adr_lo_o(), | 1879 | ctxsw_prog_main_image_patch_adr_lo_o(), |
1937 | virt_addr_lo); | 1880 | virt_addr_lo); |
1938 | nvgpu_mem_wr(g, ctxheader, | 1881 | nvgpu_mem_wr(g, mem, |
1939 | ctxsw_prog_main_image_patch_adr_hi_o(), | 1882 | ctxsw_prog_main_image_patch_adr_hi_o(), |
1940 | virt_addr_hi); | 1883 | virt_addr_hi); |
1941 | } | 1884 | } |
@@ -6606,6 +6549,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6606 | u32 *ovr_perf_regs = NULL; | 6549 | u32 *ovr_perf_regs = NULL; |
6607 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 6550 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
6608 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 6551 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
6552 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | ||
6553 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
6609 | 6554 | ||
6610 | g->ops.gr.init_ovr_sm_dsm_perf(); | 6555 | g->ops.gr.init_ovr_sm_dsm_perf(); |
6611 | g->ops.gr.init_sm_dsm_reg_info(); | 6556 | g->ops.gr.init_sm_dsm_reg_info(); |
@@ -6640,12 +6585,21 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6640 | nvgpu_mem_wr(g, mem, | 6585 | nvgpu_mem_wr(g, mem, |
6641 | ctxsw_prog_main_image_patch_count_o(), | 6586 | ctxsw_prog_main_image_patch_count_o(), |
6642 | ch_ctx->patch_ctx.data_count); | 6587 | ch_ctx->patch_ctx.data_count); |
6643 | nvgpu_mem_wr(g, mem, | 6588 | if (ctxheader->gpu_va) { |
6644 | ctxsw_prog_main_image_patch_adr_lo_o(), | 6589 | nvgpu_mem_wr(g, ctxheader, |
6645 | vaddr_lo); | 6590 | ctxsw_prog_main_image_patch_adr_lo_o(), |
6646 | nvgpu_mem_wr(g, mem, | 6591 | vaddr_lo); |
6647 | ctxsw_prog_main_image_patch_adr_hi_o(), | 6592 | nvgpu_mem_wr(g, ctxheader, |
6648 | vaddr_hi); | 6593 | ctxsw_prog_main_image_patch_adr_hi_o(), |
6594 | vaddr_hi); | ||
6595 | } else { | ||
6596 | nvgpu_mem_wr(g, mem, | ||
6597 | ctxsw_prog_main_image_patch_adr_lo_o(), | ||
6598 | vaddr_lo); | ||
6599 | nvgpu_mem_wr(g, mem, | ||
6600 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
6601 | vaddr_hi); | ||
6602 | } | ||
6649 | 6603 | ||
6650 | /* we're not caching these on cpu side, | 6604 | /* we're not caching these on cpu side, |
6651 | but later watch for it */ | 6605 | but later watch for it */ |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index d3e45f55..fc27b120 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -1168,6 +1168,9 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1168 | struct nvgpu_mem *mem) | 1168 | struct nvgpu_mem *mem) |
1169 | { | 1169 | { |
1170 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 1170 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; |
1171 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | ||
1172 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
1173 | |||
1171 | u32 gfxp_preempt_option = | 1174 | u32 gfxp_preempt_option = |
1172 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); | 1175 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); |
1173 | u32 cilp_preempt_option = | 1176 | u32 cilp_preempt_option = |
@@ -1204,9 +1207,14 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1204 | u32 size; | 1207 | u32 size; |
1205 | u32 cbes_reserve; | 1208 | u32 cbes_reserve; |
1206 | 1209 | ||
1207 | if (g->ops.gr.set_preemption_buffer_va) | 1210 | if (g->ops.gr.set_preemption_buffer_va) { |
1208 | g->ops.gr.set_preemption_buffer_va(g, mem, | 1211 | if (ctxheader->gpu_va) |
1212 | g->ops.gr.set_preemption_buffer_va(g, ctxheader, | ||
1209 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); | 1213 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); |
1214 | else | ||
1215 | g->ops.gr.set_preemption_buffer_va(g, mem, | ||
1216 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); | ||
1217 | } | ||
1210 | 1218 | ||
1211 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 1219 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
1212 | if (err) { | 1220 | if (err) { |
@@ -2247,12 +2255,8 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2247 | goto enable_ch; | 2255 | goto enable_ch; |
2248 | 2256 | ||
2249 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 2257 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
2250 | if (ctxheader->gpu_va) | 2258 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, |
2251 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | 2259 | ch_ctx, mem); |
2252 | ch_ctx, ctxheader); | ||
2253 | else | ||
2254 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | ||
2255 | ch_ctx, mem); | ||
2256 | 2260 | ||
2257 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 2261 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
2258 | if (err) { | 2262 | if (err) { |