summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2016-05-12 02:31:30 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:15 -0500
commit18a017865946617fd63256858a0d2300160643f4 (patch)
tree310822095e53bb0bb377f4955df7e2cc0f62fc7d /drivers/gpu/nvgpu
parente746a16f7abbaacba89e390c692620941fc1b34c (diff)
gpu: nvgpu: refactor gk20a_mem_{wr,rd} for vidmem
To support vidmem, pass g and mem_desc to the buffer memory accessor functions. This allows the functions to select the memory access method based on the buffer aperture instead of using the cpu pointer directly (like until now). The selection and aperture support will be in another patch; this patch only refactors these accessors, but keeps the underlying functionality as-is. JIRA DNVGPU-23 Change-Id: I21d4a54827b0e2741012dfde7952c0555a583435 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1121914 GVS: Gerrit_Virtual_Submit Reviewed-by: Ken Adams <kadams@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gp10b/fifo_gp10b.c61
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c78
-rw-r--r--drivers/gpu/nvgpu/gp10b/mm_gp10b.c44
3 files changed, 91 insertions, 92 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index 9cb26d3f..4766e0e4 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -25,24 +25,24 @@
25#include "hw_ram_gp10b.h" 25#include "hw_ram_gp10b.h"
26 26
27static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g, 27static void gp10b_set_pdb_fault_replay_flags(struct gk20a *g,
28 void *inst_ptr) 28 struct mem_desc *mem)
29{ 29{
30 u32 val; 30 u32 val;
31 31
32 gk20a_dbg_fn(""); 32 gk20a_dbg_fn("");
33 33
34 val = gk20a_mem_rd32(inst_ptr, 34 val = gk20a_mem_rd32(g, mem,
35 ram_in_page_dir_base_fault_replay_tex_w()); 35 ram_in_page_dir_base_fault_replay_tex_w());
36 val &= ~ram_in_page_dir_base_fault_replay_tex_m(); 36 val &= ~ram_in_page_dir_base_fault_replay_tex_m();
37 val |= ram_in_page_dir_base_fault_replay_tex_true_f(); 37 val |= ram_in_page_dir_base_fault_replay_tex_true_f();
38 gk20a_mem_wr32(inst_ptr, 38 gk20a_mem_wr32(g, mem,
39 ram_in_page_dir_base_fault_replay_tex_w(), val); 39 ram_in_page_dir_base_fault_replay_tex_w(), val);
40 40
41 val = gk20a_mem_rd32(inst_ptr, 41 val = gk20a_mem_rd32(g, mem,
42 ram_in_page_dir_base_fault_replay_gcc_w()); 42 ram_in_page_dir_base_fault_replay_gcc_w());
43 val &= ~ram_in_page_dir_base_fault_replay_gcc_m(); 43 val &= ~ram_in_page_dir_base_fault_replay_gcc_m();
44 val |= ram_in_page_dir_base_fault_replay_gcc_true_f(); 44 val |= ram_in_page_dir_base_fault_replay_gcc_true_f();
45 gk20a_mem_wr32(inst_ptr, 45 gk20a_mem_wr32(g, mem,
46 ram_in_page_dir_base_fault_replay_gcc_w(), val); 46 ram_in_page_dir_base_fault_replay_gcc_w(), val);
47 47
48 gk20a_dbg_fn("done"); 48 gk20a_dbg_fn("done");
@@ -52,28 +52,25 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
52{ 52{
53 u32 addr_lo; 53 u32 addr_lo;
54 u32 addr_hi; 54 u32 addr_hi;
55 void *inst_ptr;
56 struct gk20a *g = c->g; 55 struct gk20a *g = c->g;
57 56
58 gk20a_dbg_fn(""); 57 gk20a_dbg_fn("");
59 58
60 inst_ptr = c->inst_block.cpu_va;
61 if (!inst_ptr)
62 return -ENOMEM;
63
64 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v()); 59 addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
65 addr_hi = u64_hi32(c->userd_iova); 60 addr_hi = u64_hi32(c->userd_iova);
66 61
67 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx", 62 gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
68 c->hw_chid, (u64)c->userd_iova); 63 c->hw_chid, (u64)c->userd_iova);
69 64
70 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_w(), 65 gk20a_mem_wr32(g, &c->inst_block,
66 ram_in_ramfc_w() + ram_fc_userd_w(),
71 (g->mm.vidmem_is_vidmem ? 67 (g->mm.vidmem_is_vidmem ?
72 pbdma_userd_target_sys_mem_ncoh_f() : 68 pbdma_userd_target_sys_mem_ncoh_f() :
73 pbdma_userd_target_vid_mem_f()) | 69 pbdma_userd_target_vid_mem_f()) |
74 pbdma_userd_addr_f(addr_lo)); 70 pbdma_userd_addr_f(addr_lo));
75 71
76 gk20a_mem_wr32(inst_ptr, ram_in_ramfc_w() + ram_fc_userd_hi_w(), 72 gk20a_mem_wr32(g, &c->inst_block,
73 ram_in_ramfc_w() + ram_fc_userd_hi_w(),
77 pbdma_userd_hi_addr_f(addr_hi)); 74 pbdma_userd_hi_addr_f(addr_hi));
78 75
79 return 0; 76 return 0;
@@ -82,33 +79,30 @@ static int channel_gp10b_commit_userd(struct channel_gk20a *c)
82static int channel_gp10b_setup_ramfc(struct channel_gk20a *c, 79static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
83 u64 gpfifo_base, u32 gpfifo_entries, u32 flags) 80 u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
84{ 81{
85 void *inst_ptr; 82 struct gk20a *g = c->g;
83 struct mem_desc *mem = &c->inst_block;
86 84
87 gk20a_dbg_fn(""); 85 gk20a_dbg_fn("");
88 86
89 inst_ptr = c->inst_block.cpu_va; 87 gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
90 if (!inst_ptr)
91 return -ENOMEM;
92 88
93 memset(inst_ptr, 0, ram_fc_size_val_v()); 89 gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
94
95 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_w(),
96 pbdma_gp_base_offset_f( 90 pbdma_gp_base_offset_f(
97 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s()))); 91 u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
98 92
99 gk20a_mem_wr32(inst_ptr, ram_fc_gp_base_hi_w(), 93 gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
100 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) | 94 pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
101 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries))); 95 pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
102 96
103 gk20a_mem_wr32(inst_ptr, ram_fc_signature_w(), 97 gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
104 c->g->ops.fifo.get_pbdma_signature(c->g)); 98 c->g->ops.fifo.get_pbdma_signature(c->g));
105 99
106 gk20a_mem_wr32(inst_ptr, ram_fc_formats_w(), 100 gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
107 pbdma_formats_gp_fermi0_f() | 101 pbdma_formats_gp_fermi0_f() |
108 pbdma_formats_pb_fermi1_f() | 102 pbdma_formats_pb_fermi1_f() |
109 pbdma_formats_mp_fermi0_f()); 103 pbdma_formats_mp_fermi0_f());
110 104
111 gk20a_mem_wr32(inst_ptr, ram_fc_pb_header_w(), 105 gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
112 pbdma_pb_header_priv_user_f() | 106 pbdma_pb_header_priv_user_f() |
113 pbdma_pb_header_method_zero_f() | 107 pbdma_pb_header_method_zero_f() |
114 pbdma_pb_header_subchannel_zero_f() | 108 pbdma_pb_header_subchannel_zero_f() |
@@ -116,26 +110,26 @@ static int channel_gp10b_setup_ramfc(struct channel_gk20a *c,
116 pbdma_pb_header_first_true_f() | 110 pbdma_pb_header_first_true_f() |
117 pbdma_pb_header_type_inc_f()); 111 pbdma_pb_header_type_inc_f());
118 112
119 gk20a_mem_wr32(inst_ptr, ram_fc_subdevice_w(), 113 gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
120 pbdma_subdevice_id_f(1) | 114 pbdma_subdevice_id_f(1) |
121 pbdma_subdevice_status_active_f() | 115 pbdma_subdevice_status_active_f() |
122 pbdma_subdevice_channel_dma_enable_f()); 116 pbdma_subdevice_channel_dma_enable_f());
123 117
124 gk20a_mem_wr32(inst_ptr, ram_fc_target_w(), pbdma_target_engine_sw_f()); 118 gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
125 119
126 gk20a_mem_wr32(inst_ptr, ram_fc_acquire_w(), 120 gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
127 channel_gk20a_pbdma_acquire_val(c)); 121 channel_gk20a_pbdma_acquire_val(c));
128 122
129 gk20a_mem_wr32(inst_ptr, ram_fc_runlist_timeslice_w(), 123 gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
130 pbdma_runlist_timeslice_timeout_128_f() | 124 pbdma_runlist_timeslice_timeout_128_f() |
131 pbdma_runlist_timeslice_timescale_3_f() | 125 pbdma_runlist_timeslice_timescale_3_f() |
132 pbdma_runlist_timeslice_enable_true_f()); 126 pbdma_runlist_timeslice_enable_true_f());
133 127
134 if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE) 128 if ( flags & NVGPU_ALLOC_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE)
135 gp10b_set_pdb_fault_replay_flags(c->g, inst_ptr); 129 gp10b_set_pdb_fault_replay_flags(c->g, mem);
136 130
137 131
138 gk20a_mem_wr32(inst_ptr, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid)); 132 gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
139 133
140 return channel_gp10b_commit_userd(c); 134 return channel_gp10b_commit_userd(c);
141} 135}
@@ -149,14 +143,12 @@ static u32 gp10b_fifo_get_pbdma_signature(struct gk20a *g)
149static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c) 143static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
150{ 144{
151 u32 new_syncpt = 0, old_syncpt; 145 u32 new_syncpt = 0, old_syncpt;
152 void *inst_ptr;
153 u32 v; 146 u32 v;
154 147
155 gk20a_dbg_fn(""); 148 gk20a_dbg_fn("");
156 149
157 inst_ptr = c->inst_block.cpu_va; 150 v = gk20a_mem_rd32(c->g, &c->inst_block,
158 151 ram_fc_allowed_syncpoints_w());
159 v = gk20a_mem_rd32(inst_ptr, ram_fc_allowed_syncpoints_w());
160 old_syncpt = pbdma_allowed_syncpoints_0_index_v(v); 152 old_syncpt = pbdma_allowed_syncpoints_0_index_v(v);
161 if (c->sync) 153 if (c->sync)
162 new_syncpt = c->sync->syncpt_id(c->sync); 154 new_syncpt = c->sync->syncpt_id(c->sync);
@@ -175,7 +167,8 @@ static int gp10b_fifo_resetup_ramfc(struct channel_gk20a *c)
175 167
176 v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt); 168 v |= pbdma_allowed_syncpoints_0_index_f(new_syncpt);
177 169
178 gk20a_mem_wr32(inst_ptr, ram_fc_allowed_syncpoints_w(), v); 170 gk20a_mem_wr32(c->g, &c->inst_block,
171 ram_fc_allowed_syncpoints_w(), v);
179 } 172 }
180 173
181 /* enable channel */ 174 /* enable channel */
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index b36eff8f..07f1014f 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -958,52 +958,51 @@ fail_free_gk20a_ctx:
958} 958}
959 959
960static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm, 960static void dump_ctx_switch_stats(struct gk20a *g, struct vm_gk20a *vm,
961 struct gr_ctx_desc *gr_ctx) { 961 struct gr_ctx_desc *gr_ctx)
962 void *ctx_ptr = vmap(gr_ctx->mem.pages, 962{
963 PAGE_ALIGN(gr_ctx->mem.size) >> PAGE_SHIFT, 963 struct mem_desc *mem = &gr_ctx->mem;
964 0, pgprot_writecombine(PAGE_KERNEL)); 964
965 if (!ctx_ptr) { 965 if (gk20a_mem_begin(g, mem)) {
966 WARN_ON("Cannot map context"); 966 WARN_ON("Cannot map context");
967 return; 967 return;
968 } 968 }
969 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n", 969 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_magic_value_o : %x (expect %x)\n",
970 gk20a_mem_rd32(ctx_ptr + 970 gk20a_mem_rd(g, mem,
971 ctxsw_prog_main_image_magic_value_o(), 0), 971 ctxsw_prog_main_image_magic_value_o()),
972 ctxsw_prog_main_image_magic_value_v_value_v()); 972 ctxsw_prog_main_image_magic_value_v_value_v());
973 973
974 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n", 974 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi : %x\n",
975 gk20a_mem_rd32(ctx_ptr + 975 gk20a_mem_rd(g, mem,
976 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(), 0)); 976 ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o()));
977 977
978 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n", 978 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_ptr : %x\n",
979 gk20a_mem_rd32(ctx_ptr + 979 gk20a_mem_rd(g, mem,
980 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(), 0)); 980 ctxsw_prog_main_image_context_timestamp_buffer_ptr_o()));
981 981
982 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n", 982 gk20a_err(dev_from_gk20a(g), "ctxsw_prog_main_image_context_timestamp_buffer_control : %x\n",
983 gk20a_mem_rd32(ctx_ptr + 983 gk20a_mem_rd(g, mem,
984 ctxsw_prog_main_image_context_timestamp_buffer_control_o(), 0)); 984 ctxsw_prog_main_image_context_timestamp_buffer_control_o()));
985 985
986 gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n", 986 gk20a_err(dev_from_gk20a(g), "NUM_SAVE_OPERATIONS : %d\n",
987 gk20a_mem_rd32(ctx_ptr + 987 gk20a_mem_rd(g, mem,
988 ctxsw_prog_main_image_num_save_ops_o(), 0)); 988 ctxsw_prog_main_image_num_save_ops_o()));
989 gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n", 989 gk20a_err(dev_from_gk20a(g), "WFI_SAVE_OPERATIONS : %d\n",
990 gk20a_mem_rd32(ctx_ptr + 990 gk20a_mem_rd(g, mem,
991 ctxsw_prog_main_image_num_wfi_save_ops_o(), 0)); 991 ctxsw_prog_main_image_num_wfi_save_ops_o()));
992 gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n", 992 gk20a_err(dev_from_gk20a(g), "CTA_SAVE_OPERATIONS : %d\n",
993 gk20a_mem_rd32(ctx_ptr + 993 gk20a_mem_rd(g, mem,
994 ctxsw_prog_main_image_num_cta_save_ops_o(), 0)); 994 ctxsw_prog_main_image_num_cta_save_ops_o()));
995 gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n", 995 gk20a_err(dev_from_gk20a(g), "GFXP_SAVE_OPERATIONS : %d\n",
996 gk20a_mem_rd32(ctx_ptr + 996 gk20a_mem_rd(g, mem,
997 ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0)); 997 ctxsw_prog_main_image_num_gfxp_save_ops_o()));
998 gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n", 998 gk20a_err(dev_from_gk20a(g), "CILP_SAVE_OPERATIONS : %d\n",
999 gk20a_mem_rd32(ctx_ptr + 999 gk20a_mem_rd(g, mem,
1000 ctxsw_prog_main_image_num_cilp_save_ops_o(), 0)); 1000 ctxsw_prog_main_image_num_cilp_save_ops_o()));
1001 gk20a_err(dev_from_gk20a(g), 1001 gk20a_err(dev_from_gk20a(g),
1002 "image gfx preemption option (GFXP is 1) %x\n", 1002 "image gfx preemption option (GFXP is 1) %x\n",
1003 gk20a_mem_rd32(ctx_ptr + 1003 gk20a_mem_rd(g, mem,
1004 ctxsw_prog_main_image_graphics_preemption_options_o(), 1004 ctxsw_prog_main_image_graphics_preemption_options_o()));
1005 0)); 1005 gk20a_mem_end(g, mem);
1006 vunmap(ctx_ptr);
1007} 1006}
1008 1007
1009static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, 1008static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
@@ -1028,7 +1027,7 @@ static void gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
1028 1027
1029static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, 1028static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1030 struct channel_ctx_gk20a *ch_ctx, 1029 struct channel_ctx_gk20a *ch_ctx,
1031 void *ctx_ptr) 1030 struct mem_desc *mem)
1032{ 1031{
1033 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 1032 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1034 u32 gfxp_preempt_option = 1033 u32 gfxp_preempt_option =
@@ -1043,19 +1042,22 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1043 1042
1044 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) { 1043 if (gr_ctx->graphics_preempt_mode == NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP) {
1045 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); 1044 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1046 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_graphics_preemption_options_o(), 0, 1045 gk20a_mem_wr(g, mem,
1046 ctxsw_prog_main_image_graphics_preemption_options_o(),
1047 gfxp_preempt_option); 1047 gfxp_preempt_option);
1048 } 1048 }
1049 1049
1050 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) { 1050 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CILP) {
1051 gk20a_dbg_info("CILP: %x", cilp_preempt_option); 1051 gk20a_dbg_info("CILP: %x", cilp_preempt_option);
1052 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, 1052 gk20a_mem_wr(g, mem,
1053 ctxsw_prog_main_image_compute_preemption_options_o(),
1053 cilp_preempt_option); 1054 cilp_preempt_option);
1054 } 1055 }
1055 1056
1056 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) { 1057 if (gr_ctx->compute_preempt_mode == NVGPU_COMPUTE_PREEMPTION_MODE_CTA) {
1057 gk20a_dbg_info("CTA: %x", cta_preempt_option); 1058 gk20a_dbg_info("CTA: %x", cta_preempt_option);
1058 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_compute_preemption_options_o(), 0, 1059 gk20a_mem_wr(g, mem,
1060 ctxsw_prog_main_image_compute_preemption_options_o(),
1059 cta_preempt_option); 1061 cta_preempt_option);
1060 } 1062 }
1061 1063
@@ -1064,7 +1066,8 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1064 u32 size; 1066 u32 size;
1065 u32 cbes_reserve; 1067 u32 cbes_reserve;
1066 1068
1067 gk20a_mem_wr32(ctx_ptr + ctxsw_prog_main_image_full_preemption_ptr_o(), 0, 1069 gk20a_mem_wr(g, mem,
1070 ctxsw_prog_main_image_full_preemption_ptr_o(),
1068 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); 1071 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8);
1069 1072
1070 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 1073 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
@@ -1931,7 +1934,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
1931 struct gk20a *g = ch->g; 1934 struct gk20a *g = ch->g;
1932 struct tsg_gk20a *tsg; 1935 struct tsg_gk20a *tsg;
1933 struct vm_gk20a *vm; 1936 struct vm_gk20a *vm;
1934 void *ctx_ptr; 1937 struct mem_desc *mem = &gr_ctx->mem;
1935 u32 class; 1938 u32 class;
1936 int err = 0; 1939 int err = 0;
1937 1940
@@ -1955,10 +1958,7 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
1955 if (err) 1958 if (err)
1956 return err; 1959 return err;
1957 1960
1958 ctx_ptr = vmap(gr_ctx->mem.pages, 1961 if (gk20a_mem_begin(g, mem))
1959 PAGE_ALIGN(ch_ctx->gr_ctx->mem.size) >> PAGE_SHIFT,
1960 0, pgprot_writecombine(PAGE_KERNEL));
1961 if (!ctx_ptr)
1962 return -ENOMEM; 1962 return -ENOMEM;
1963 1963
1964 g->ops.fifo.disable_channel(ch); 1964 g->ops.fifo.disable_channel(ch);
@@ -1967,14 +1967,14 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
1967 goto unmap_ctx; 1967 goto unmap_ctx;
1968 1968
1969 if (g->ops.gr.update_ctxsw_preemption_mode) { 1969 if (g->ops.gr.update_ctxsw_preemption_mode) {
1970 g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, ctx_ptr); 1970 g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem);
1971 g->ops.gr.commit_global_cb_manager(g, ch, true); 1971 g->ops.gr.commit_global_cb_manager(g, ch, true);
1972 } 1972 }
1973 1973
1974 g->ops.fifo.enable_channel(ch); 1974 g->ops.fifo.enable_channel(ch);
1975 1975
1976unmap_ctx: 1976unmap_ctx:
1977 vunmap(ctx_ptr); 1977 gk20a_mem_end(g, mem);
1978 1978
1979 return err; 1979 return err;
1980} 1980}
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 0e1f3c4b..0b693f7c 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -145,9 +145,14 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
145 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl)); 145 return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
146} 146}
147 147
148static u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i) 148static u32 pde3_from_index(u32 i)
149{ 149{
150 return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_pde__size_v()); 150 return i * gmmu_new_pde__size_v() / sizeof(u32);
151}
152
153static u32 pte3_from_index(u32 i)
154{
155 return i * gmmu_new_pte__size_v() / sizeof(u32);
151} 156}
152 157
153static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry) 158static u64 entry_addr(struct gk20a *g, struct gk20a_mm_entry *entry)
@@ -176,7 +181,7 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
176 u64 pde_addr = 0; 181 u64 pde_addr = 0;
177 struct gk20a_mm_entry *pte = parent->entries + i; 182 struct gk20a_mm_entry *pte = parent->entries + i;
178 u32 pde_v[2] = {0, 0}; 183 u32 pde_v[2] = {0, 0};
179 u32 *pde; 184 u32 pde;
180 185
181 gk20a_dbg_fn(""); 186 gk20a_dbg_fn("");
182 187
@@ -189,10 +194,10 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
189 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); 194 pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
190 pde_v[0] |= gmmu_new_pde_vol_true_f(); 195 pde_v[0] |= gmmu_new_pde_vol_true_f();
191 pde_v[1] |= pte_addr >> 24; 196 pde_v[1] |= pte_addr >> 24;
192 pde = pde3_from_index(parent, i); 197 pde = pde3_from_index(i);
193 198
194 gk20a_mem_wr32(pde, 0, pde_v[0]); 199 gk20a_mem_wr32(g, &parent->mem, pde + 0, pde_v[0]);
195 gk20a_mem_wr32(pde, 1, pde_v[1]); 200 gk20a_mem_wr32(g, &parent->mem, pde + 1, pde_v[1]);
196 201
197 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x", 202 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
198 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]); 203 i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
@@ -200,9 +205,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
200 return 0; 205 return 0;
201} 206}
202 207
203static u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i) 208static u32 pde0_from_index(u32 i)
204{ 209{
205 return (u32 *) (((u8 *)entry->mem.cpu_va) + i*gmmu_new_dual_pde__size_v()); 210 return i * gmmu_new_dual_pde__size_v() / sizeof(u32);
206} 211}
207 212
208static int update_gmmu_pde0_locked(struct vm_gk20a *vm, 213static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
@@ -220,7 +225,7 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
220 u32 pte_addr_small = 0, pte_addr_big = 0; 225 u32 pte_addr_small = 0, pte_addr_big = 0;
221 struct gk20a_mm_entry *entry = pte->entries + i; 226 struct gk20a_mm_entry *entry = pte->entries + i;
222 u32 pde_v[4] = {0, 0, 0, 0}; 227 u32 pde_v[4] = {0, 0, 0, 0};
223 u32 *pde; 228 u32 pde;
224 229
225 gk20a_dbg_fn(""); 230 gk20a_dbg_fn("");
226 231
@@ -254,12 +259,12 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
254 pde_v[1] |= pte_addr_big >> 28; 259 pde_v[1] |= pte_addr_big >> 28;
255 } 260 }
256 261
257 pde = pde0_from_index(pte, i); 262 pde = pde0_from_index(i);
258 263
259 gk20a_mem_wr32(pde, 0, pde_v[0]); 264 gk20a_mem_wr32(g, &pte->mem, pde + 0, pde_v[0]);
260 gk20a_mem_wr32(pde, 1, pde_v[1]); 265 gk20a_mem_wr32(g, &pte->mem, pde + 1, pde_v[1]);
261 gk20a_mem_wr32(pde, 2, pde_v[2]); 266 gk20a_mem_wr32(g, &pte->mem, pde + 2, pde_v[2]);
262 gk20a_mem_wr32(pde, 3, pde_v[3]); 267 gk20a_mem_wr32(g, &pte->mem, pde + 3, pde_v[3]);
263 268
264 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]", 269 gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
265 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]); 270 i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
@@ -323,8 +328,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
323 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i); 328 gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
324 } 329 }
325 330
326 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 0, pte_w[0]); 331 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 0, pte_w[0]);
327 gk20a_mem_wr32(pte->mem.cpu_va + i*8, 1, pte_w[1]); 332 gk20a_mem_wr32(g, &pte->mem, pte3_from_index(i) + 1, pte_w[1]);
328 333
329 if (*iova) { 334 if (*iova) {
330 *iova += page_size; 335 *iova += page_size;
@@ -376,12 +381,13 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
376 return gp10b_mm_levels; 381 return gp10b_mm_levels;
377} 382}
378 383
379static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr) 384static void gp10b_mm_init_pdb(struct gk20a *g, struct mem_desc *mem,
385 u64 pdb_addr)
380{ 386{
381 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); 387 u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
382 u32 pdb_addr_hi = u64_hi32(pdb_addr); 388 u32 pdb_addr_hi = u64_hi32(pdb_addr);
383 389
384 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(), 390 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_lo_w(),
385 (g->mm.vidmem_is_vidmem ? 391 (g->mm.vidmem_is_vidmem ?
386 ram_in_page_dir_base_target_sys_mem_ncoh_f() : 392 ram_in_page_dir_base_target_sys_mem_ncoh_f() :
387 ram_in_page_dir_base_target_vid_mem_f()) | 393 ram_in_page_dir_base_target_vid_mem_f()) |
@@ -389,7 +395,7 @@ static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
389 ram_in_page_dir_base_lo_f(pdb_addr_lo) | 395 ram_in_page_dir_base_lo_f(pdb_addr_lo) |
390 1 << 10); 396 1 << 10);
391 397
392 gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(), 398 gk20a_mem_wr32(g, mem, ram_in_page_dir_base_hi_w(),
393 ram_in_page_dir_base_hi_f(pdb_addr_hi)); 399 ram_in_page_dir_base_hi_f(pdb_addr_hi));
394} 400}
395 401