summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-06-25 08:47:15 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-06-28 14:13:35 -0400
commitdd146d42fc910128b0e2987d12b83430bf97bae0 (patch)
tree23d3f808eb6ea65dd8ab90398a22ff52fce09736 /drivers/gpu/nvgpu/gk20a/gr_gk20a.c
parent2dda362e6395a6d486ba3d1a75e707933690023e (diff)
gpu: nvgpu: don't mem_{begin,end}() for gr
Now that GR buffers always have a kernel mapping, remove the unnecessary calls to nvgpu_mem_begin() and nvgpu_mem_end() on these buffers: - global ctx buffer mem in gr - gr ctx mem in a tsg - patch ctx mem in a gr ctx - pm ctx mem in a gr ctx - ctx_header mem in a channel (subctx header) Change-Id: Id2a8ad108aef8db8b16dce5bae8003bbcd3b23e4 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760599 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c119
1 files changed, 4 insertions, 115 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 28ccb896..b218397a 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -111,15 +111,10 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
111 Flush and invalidate before cpu update. */ 111 Flush and invalidate before cpu update. */
112 g->ops.mm.l2_flush(g, true); 112 g->ops.mm.l2_flush(g, true);
113 113
114 if (nvgpu_mem_begin(g, mem))
115 return -ENOMEM;
116
117 *ctx_id = nvgpu_mem_rd(g, mem, 114 *ctx_id = nvgpu_mem_rd(g, mem,
118 ctxsw_prog_main_image_context_id_o()); 115 ctxsw_prog_main_image_context_id_o());
119 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id); 116 nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id);
120 117
121 nvgpu_mem_end(g, mem);
122
123 return 0; 118 return 0;
124} 119}
125 120
@@ -696,12 +691,6 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
696 struct nvgpu_gr_ctx *gr_ctx, 691 struct nvgpu_gr_ctx *gr_ctx,
697 bool update_patch_count) 692 bool update_patch_count)
698{ 693{
699 int err = 0;
700
701 err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem);
702 if (err)
703 return err;
704
705 if (update_patch_count) { 694 if (update_patch_count) {
706 /* reset patch count if ucode has already processed it */ 695 /* reset patch count if ucode has already processed it */
707 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, 696 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
@@ -717,8 +706,6 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
717 struct nvgpu_gr_ctx *gr_ctx, 706 struct nvgpu_gr_ctx *gr_ctx,
718 bool update_patch_count) 707 bool update_patch_count)
719{ 708{
720 nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem);
721
722 /* Write context count to context image if it is mapped */ 709 /* Write context count to context image if it is mapped */
723 if (update_patch_count) { 710 if (update_patch_count) {
724 nvgpu_mem_wr(g, &gr_ctx->mem, 711 nvgpu_mem_wr(g, &gr_ctx->mem,
@@ -832,31 +819,22 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
832 gr_ctx = &tsg->gr_ctx; 819 gr_ctx = &tsg->gr_ctx;
833 mem = &gr_ctx->mem; 820 mem = &gr_ctx->mem;
834 821
835 if (nvgpu_mem_begin(g, mem))
836 return -ENOMEM;
837
838 if (nvgpu_mem_begin(g, ctxheader)) {
839 ret = -ENOMEM;
840 goto clean_up_mem;
841 }
842
843 if (gr_ctx->zcull_ctx.gpu_va == 0 && 822 if (gr_ctx->zcull_ctx.gpu_va == 0 &&
844 gr_ctx->zcull_ctx.ctx_sw_mode == 823 gr_ctx->zcull_ctx.ctx_sw_mode ==
845 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { 824 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
846 ret = -EINVAL; 825 return -EINVAL;
847 goto clean_up;
848 } 826 }
849 827
850 ret = gk20a_disable_channel_tsg(g, c); 828 ret = gk20a_disable_channel_tsg(g, c);
851 if (ret) { 829 if (ret) {
852 nvgpu_err(g, "failed to disable channel/TSG"); 830 nvgpu_err(g, "failed to disable channel/TSG");
853 goto clean_up; 831 return ret;
854 } 832 }
855 ret = gk20a_fifo_preempt(g, c); 833 ret = gk20a_fifo_preempt(g, c);
856 if (ret) { 834 if (ret) {
857 gk20a_enable_channel_tsg(g, c); 835 gk20a_enable_channel_tsg(g, c);
858 nvgpu_err(g, "failed to preempt channel/TSG"); 836 nvgpu_err(g, "failed to preempt channel/TSG");
859 goto clean_up; 837 return ret;
860 } 838 }
861 839
862 nvgpu_mem_wr(g, mem, 840 nvgpu_mem_wr(g, mem,
@@ -871,11 +849,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
871 849
872 gk20a_enable_channel_tsg(g, c); 850 gk20a_enable_channel_tsg(g, c);
873 851
874clean_up:
875 nvgpu_mem_end(g, ctxheader);
876clean_up_mem:
877 nvgpu_mem_end(g, mem);
878
879 return ret; 852 return ret;
880} 853}
881 854
@@ -1597,12 +1570,6 @@ restore_fe_go_idle:
1597 goto restore_fe_go_idle; 1570 goto restore_fe_go_idle;
1598 } 1571 }
1599 1572
1600 if (nvgpu_mem_begin(g, gold_mem))
1601 goto clean_up;
1602
1603 if (nvgpu_mem_begin(g, gr_mem))
1604 goto clean_up;
1605
1606 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32)); 1573 ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
1607 ctx_header_words >>= 2; 1574 ctx_header_words >>= 2;
1608 1575
@@ -1655,9 +1622,6 @@ clean_up:
1655 else 1622 else
1656 nvgpu_log_fn(g, "done"); 1623 nvgpu_log_fn(g, "done");
1657 1624
1658 nvgpu_mem_end(g, gold_mem);
1659 nvgpu_mem_end(g, gr_mem);
1660
1661 nvgpu_mutex_release(&gr->ctx_mutex); 1625 nvgpu_mutex_release(&gr->ctx_mutex);
1662 return err; 1626 return err;
1663} 1627}
@@ -1701,11 +1665,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1701 Flush and invalidate before cpu update. */ 1665 Flush and invalidate before cpu update. */
1702 g->ops.mm.l2_flush(g, true); 1666 g->ops.mm.l2_flush(g, true);
1703 1667
1704 if (nvgpu_mem_begin(g, mem)) {
1705 ret = -ENOMEM;
1706 goto out;
1707 }
1708
1709 data = nvgpu_mem_rd(g, mem, 1668 data = nvgpu_mem_rd(g, mem,
1710 ctxsw_prog_main_image_pm_o()); 1669 ctxsw_prog_main_image_pm_o());
1711 1670
@@ -1717,7 +1676,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1717 nvgpu_mem_wr(g, mem, 1676 nvgpu_mem_wr(g, mem,
1718 ctxsw_prog_main_image_pm_o(), data); 1677 ctxsw_prog_main_image_pm_o(), data);
1719 1678
1720 nvgpu_mem_end(g, mem);
1721out: 1679out:
1722 gk20a_enable_channel_tsg(g, c); 1680 gk20a_enable_channel_tsg(g, c);
1723 return ret; 1681 return ret;
@@ -1807,24 +1765,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1807 } 1765 }
1808 1766
1809 /* Now clear the buffer */ 1767 /* Now clear the buffer */
1810 if (nvgpu_mem_begin(g, &pm_ctx->mem)) {
1811 ret = -ENOMEM;
1812 goto cleanup_pm_buf;
1813 }
1814
1815 nvgpu_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size); 1768 nvgpu_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
1816
1817 nvgpu_mem_end(g, &pm_ctx->mem);
1818 }
1819
1820 if (nvgpu_mem_begin(g, gr_mem)) {
1821 ret = -ENOMEM;
1822 goto cleanup_pm_buf;
1823 }
1824
1825 if (nvgpu_mem_begin(g, ctxheader)) {
1826 ret = -ENOMEM;
1827 goto clean_up_mem;
1828 } 1769 }
1829 1770
1830 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o()); 1771 data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
@@ -1848,22 +1789,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1848 else 1789 else
1849 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr); 1790 g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);
1850 1791
1851 nvgpu_mem_end(g, ctxheader);
1852 nvgpu_mem_end(g, gr_mem);
1853
1854 /* enable channel */ 1792 /* enable channel */
1855 gk20a_enable_channel_tsg(g, c); 1793 gk20a_enable_channel_tsg(g, c);
1856 1794
1857 return 0; 1795 return 0;
1858clean_up_mem:
1859 nvgpu_mem_end(g, gr_mem);
1860cleanup_pm_buf:
1861 nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
1862 nvgpu_dma_free(g, &pm_ctx->mem);
1863 memset(&pm_ctx->mem, 0, sizeof(struct nvgpu_mem));
1864
1865 gk20a_enable_channel_tsg(g, c);
1866 return ret;
1867} 1796}
1868 1797
1869void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g, 1798void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
@@ -1904,9 +1833,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1904 Flush and invalidate before cpu update. */ 1833 Flush and invalidate before cpu update. */
1905 g->ops.mm.l2_flush(g, true); 1834 g->ops.mm.l2_flush(g, true);
1906 1835
1907 if (nvgpu_mem_begin(g, mem))
1908 return -ENOMEM;
1909
1910 nvgpu_mem_wr_n(g, mem, 0, 1836 nvgpu_mem_wr_n(g, mem, 0,
1911 gr->ctx_vars.local_golden_image, 1837 gr->ctx_vars.local_golden_image,
1912 gr->ctx_vars.golden_image_size); 1838 gr->ctx_vars.golden_image_size);
@@ -1973,7 +1899,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1973 if (gr_ctx->pm_ctx.mem.gpu_va == 0) { 1899 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
1974 nvgpu_err(g, 1900 nvgpu_err(g,
1975 "context switched pm with no pm buffer!"); 1901 "context switched pm with no pm buffer!");
1976 nvgpu_mem_end(g, mem);
1977 return -EFAULT; 1902 return -EFAULT;
1978 } 1903 }
1979 1904
@@ -1989,8 +1914,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1989 1914
1990 g->ops.gr.write_pm_ptr(g, mem, virt_addr); 1915 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
1991 1916
1992 nvgpu_mem_end(g, mem);
1993
1994 return ret; 1917 return ret;
1995} 1918}
1996 1919
@@ -4840,12 +4763,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
4840 u32 *whitelist = NULL; 4763 u32 *whitelist = NULL;
4841 int w, num_entries = 0; 4764 int w, num_entries = 0;
4842 4765
4843 if (nvgpu_mem_begin(g, mem)) {
4844 nvgpu_err(g,
4845 "failed to map priv access map memory");
4846 return -ENOMEM;
4847 }
4848
4849 nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages); 4766 nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);
4850 4767
4851 g->ops.gr.get_access_map(g, &whitelist, &num_entries); 4768 g->ops.gr.get_access_map(g, &whitelist, &num_entries);
@@ -4864,7 +4781,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
4864 nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x); 4781 nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x);
4865 } 4782 }
4866 4783
4867 nvgpu_mem_end(g, mem);
4868 return 0; 4784 return 0;
4869} 4785}
4870 4786
@@ -6758,22 +6674,12 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6758 ctxsw_prog_main_image_patch_count_o(), 6674 ctxsw_prog_main_image_patch_count_o(),
6759 gr_ctx->patch_ctx.data_count); 6675 gr_ctx->patch_ctx.data_count);
6760 if (ctxheader->gpu_va) { 6676 if (ctxheader->gpu_va) {
6761 /*
6762 * Main context can be gr_ctx or pm_ctx.
6763 * CPU access for relevant ctx is taken
6764 * care of in the calling function
6765 * __gr_gk20a_exec_ctx_ops. Need to take
6766 * care of cpu access to ctxheader here.
6767 */
6768 if (nvgpu_mem_begin(g, ctxheader))
6769 return -ENOMEM;
6770 nvgpu_mem_wr(g, ctxheader, 6677 nvgpu_mem_wr(g, ctxheader,
6771 ctxsw_prog_main_image_patch_adr_lo_o(), 6678 ctxsw_prog_main_image_patch_adr_lo_o(),
6772 vaddr_lo); 6679 vaddr_lo);
6773 nvgpu_mem_wr(g, ctxheader, 6680 nvgpu_mem_wr(g, ctxheader,
6774 ctxsw_prog_main_image_patch_adr_hi_o(), 6681 ctxsw_prog_main_image_patch_adr_hi_o(),
6775 vaddr_hi); 6682 vaddr_hi);
6776 nvgpu_mem_end(g, ctxheader);
6777 } else { 6683 } else {
6778 nvgpu_mem_wr(g, mem, 6684 nvgpu_mem_wr(g, mem,
6779 ctxsw_prog_main_image_patch_adr_lo_o(), 6685 ctxsw_prog_main_image_patch_adr_lo_o(),
@@ -8038,17 +7944,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8038 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD), 7944 ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
8039 ctx_ops[i].quad); 7945 ctx_ops[i].quad);
8040 if (!err) { 7946 if (!err) {
8041 if (!gr_ctx_ready) { 7947 if (!gr_ctx_ready)
8042 /* would have been a variant of
8043 * gr_gk20a_apply_instmem_overrides,
8044 * recoded in-place instead.
8045 */
8046 if (nvgpu_mem_begin(g, &gr_ctx->mem)) {
8047 err = -ENOMEM;
8048 goto cleanup;
8049 }
8050 gr_ctx_ready = true; 7948 gr_ctx_ready = true;
8051 }
8052 current_mem = &gr_ctx->mem; 7949 current_mem = &gr_ctx->mem;
8053 } else { 7950 } else {
8054 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7951 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
@@ -8072,10 +7969,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8072 err = -EINVAL; 7969 err = -EINVAL;
8073 goto cleanup; 7970 goto cleanup;
8074 } 7971 }
8075 if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) {
8076 err = -ENOMEM;
8077 goto cleanup;
8078 }
8079 pm_ctx_ready = true; 7972 pm_ctx_ready = true;
8080 } 7973 }
8081 current_mem = &gr_ctx->pm_ctx.mem; 7974 current_mem = &gr_ctx->pm_ctx.mem;
@@ -8148,10 +8041,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
8148 8041
8149 if (gr_ctx->patch_ctx.mem.cpu_va) 8042 if (gr_ctx->patch_ctx.mem.cpu_va)
8150 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); 8043 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
8151 if (gr_ctx_ready)
8152 nvgpu_mem_end(g, &gr_ctx->mem);
8153 if (pm_ctx_ready)
8154 nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem);
8155 8044
8156 return err; 8045 return err;
8157} 8046}