diff options
33 files changed, 833 insertions, 739 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c index ad157ee7..aeab0c92 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c +++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c | |||
@@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show( | |||
91 | tsg->timeslice_us, | 91 | tsg->timeslice_us, |
92 | ch->timeout_ms_max, | 92 | ch->timeout_ms_max, |
93 | tsg->interleave_level, | 93 | tsg->interleave_level, |
94 | ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, | 94 | tsg->gr_ctx.graphics_preempt_mode, |
95 | ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); | 95 | tsg->gr_ctx.compute_preempt_mode); |
96 | gk20a_channel_put(ch); | 96 | gk20a_channel_put(ch); |
97 | } | 97 | } |
98 | return 0; | 98 | return 0; |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 94501a89..e8f4c14b 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c | |||
@@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param( | |||
85 | tsg_gk20a_from_ch(ch)->timeslice_us, | 85 | tsg_gk20a_from_ch(ch)->timeslice_us, |
86 | ch->timeout_ms_max, | 86 | ch->timeout_ms_max, |
87 | gk20a_fifo_interleave_level_name(tsg->interleave_level), | 87 | gk20a_fifo_interleave_level_name(tsg->interleave_level), |
88 | gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ? | 88 | gr_gk20a_graphics_preempt_mode_name( |
89 | ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0), | 89 | tsg->gr_ctx.graphics_preempt_mode), |
90 | gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? | 90 | gr_gk20a_compute_preempt_mode_name( |
91 | ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)); | 91 | tsg->gr_ctx.compute_preempt_mode)); |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c index fc3f6ed8..e6211790 100644 --- a/drivers/gpu/nvgpu/common/linux/sched.c +++ b/drivers/gpu/nvgpu/common/linux/sched.c | |||
@@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, | |||
198 | arg->runlist_interleave = tsg->interleave_level; | 198 | arg->runlist_interleave = tsg->interleave_level; |
199 | arg->timeslice = tsg->timeslice_us; | 199 | arg->timeslice = tsg->timeslice_us; |
200 | 200 | ||
201 | if (tsg->tsg_gr_ctx) { | 201 | arg->graphics_preempt_mode = |
202 | arg->graphics_preempt_mode = | 202 | tsg->gr_ctx.graphics_preempt_mode; |
203 | tsg->tsg_gr_ctx->graphics_preempt_mode; | 203 | arg->compute_preempt_mode = |
204 | arg->compute_preempt_mode = | 204 | tsg->gr_ctx.compute_preempt_mode; |
205 | tsg->tsg_gr_ctx->compute_preempt_mode; | ||
206 | } else { | ||
207 | arg->graphics_preempt_mode = 0; | ||
208 | arg->compute_preempt_mode = 0; | ||
209 | } | ||
210 | 205 | ||
211 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | 206 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); |
212 | 207 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c index ed61f16b..9adf20d1 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c | |||
@@ -27,12 +27,11 @@ | |||
27 | #include <nvgpu/hw/gp10b/hw_gr_gp10b.h> | 27 | #include <nvgpu/hw/gp10b/hw_gr_gp10b.h> |
28 | 28 | ||
29 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 29 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
30 | struct gr_ctx_desc **__gr_ctx, | 30 | struct nvgpu_gr_ctx *gr_ctx, |
31 | struct vm_gk20a *vm, | 31 | struct vm_gk20a *vm, |
32 | u32 class, | 32 | u32 class, |
33 | u32 flags) | 33 | u32 flags) |
34 | { | 34 | { |
35 | struct gr_ctx_desc *gr_ctx; | ||
36 | u32 graphics_preempt_mode = 0; | 35 | u32 graphics_preempt_mode = 0; |
37 | u32 compute_preempt_mode = 0; | 36 | u32 compute_preempt_mode = 0; |
38 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | 37 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); |
@@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
40 | 39 | ||
41 | gk20a_dbg_fn(""); | 40 | gk20a_dbg_fn(""); |
42 | 41 | ||
43 | err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); | 42 | err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags); |
44 | if (err) | 43 | if (err) |
45 | return err; | 44 | return err; |
46 | 45 | ||
47 | gr_ctx = *__gr_ctx; | ||
48 | |||
49 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) | 46 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) |
50 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | 47 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; |
51 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) | 48 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) |
@@ -84,7 +81,7 @@ fail: | |||
84 | } | 81 | } |
85 | 82 | ||
86 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 83 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
87 | struct gr_ctx_desc *gr_ctx, | 84 | struct nvgpu_gr_ctx *gr_ctx, |
88 | struct vm_gk20a *vm, u32 class, | 85 | struct vm_gk20a *vm, u32 class, |
89 | u32 graphics_preempt_mode, | 86 | u32 graphics_preempt_mode, |
90 | u32 compute_preempt_mode) | 87 | u32 compute_preempt_mode) |
@@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
240 | u32 graphics_preempt_mode, | 237 | u32 graphics_preempt_mode, |
241 | u32 compute_preempt_mode) | 238 | u32 compute_preempt_mode) |
242 | { | 239 | { |
243 | struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; | 240 | struct nvgpu_gr_ctx *gr_ctx; |
244 | struct gk20a *g = ch->g; | 241 | struct gk20a *g = ch->g; |
245 | struct tsg_gk20a *tsg; | 242 | struct tsg_gk20a *tsg; |
246 | struct vm_gk20a *vm; | 243 | struct vm_gk20a *vm; |
@@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
251 | if (!class) | 248 | if (!class) |
252 | return -EINVAL; | 249 | return -EINVAL; |
253 | 250 | ||
251 | tsg = tsg_gk20a_from_ch(ch); | ||
252 | if (!tsg) | ||
253 | return -EINVAL; | ||
254 | |||
255 | vm = tsg->vm; | ||
256 | gr_ctx = &tsg->gr_ctx; | ||
257 | |||
254 | /* skip setting anything if both modes are already set */ | 258 | /* skip setting anything if both modes are already set */ |
255 | if (graphics_preempt_mode && | 259 | if (graphics_preempt_mode && |
256 | (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) | 260 | (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) |
@@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
263 | if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) | 267 | if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) |
264 | return 0; | 268 | return 0; |
265 | 269 | ||
266 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
267 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
268 | vm = tsg->vm; | ||
269 | } else { | ||
270 | vm = ch->vm; | ||
271 | } | ||
272 | |||
273 | if (g->ops.gr.set_ctxsw_preemption_mode) { | 270 | if (g->ops.gr.set_ctxsw_preemption_mode) { |
274 | err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, | 271 | err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, |
275 | graphics_preempt_mode, | 272 | graphics_preempt_mode, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h index 31b88d19..559bd227 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h | |||
@@ -20,12 +20,12 @@ | |||
20 | #include "gk20a/gk20a.h" | 20 | #include "gk20a/gk20a.h" |
21 | 21 | ||
22 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 22 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
23 | struct gr_ctx_desc **__gr_ctx, | 23 | struct nvgpu_gr_ctx *gr_ctx, |
24 | struct vm_gk20a *vm, | 24 | struct vm_gk20a *vm, |
25 | u32 class, | 25 | u32 class, |
26 | u32 flags); | 26 | u32 flags); |
27 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 27 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
28 | struct gr_ctx_desc *gr_ctx, | 28 | struct nvgpu_gr_ctx *gr_ctx, |
29 | struct vm_gk20a *vm, u32 class, | 29 | struct vm_gk20a *vm, u32 class, |
30 | u32 graphics_preempt_mode, | 30 | u32 graphics_preempt_mode, |
31 | u32 compute_preempt_mode); | 31 | u32 compute_preempt_mode); |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c index e8cb96b4..d5fd5102 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | |||
@@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { | |||
112 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 112 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
113 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, | 113 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, |
114 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, | 114 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, |
115 | .free_channel_ctx = vgpu_gr_free_channel_ctx, | ||
116 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, | 115 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, |
117 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, | 116 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, |
118 | .get_zcull_info = vgpu_gr_get_zcull_info, | 117 | .get_zcull_info = vgpu_gr_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index e8790587..8f1c5d78 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | |||
@@ -20,14 +20,18 @@ | |||
20 | 20 | ||
21 | #include <nvgpu/kmem.h> | 21 | #include <nvgpu/kmem.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/dma.h> | ||
23 | #include <nvgpu/error_notifier.h> | 24 | #include <nvgpu/error_notifier.h> |
24 | #include <nvgpu/dma.h> | 25 | #include <nvgpu/dma.h> |
25 | 26 | ||
26 | #include "vgpu.h" | 27 | #include "vgpu.h" |
27 | #include "gr_vgpu.h" | 28 | #include "gr_vgpu.h" |
28 | #include "gk20a/dbg_gpu_gk20a.h" | 29 | #include "gk20a/dbg_gpu_gk20a.h" |
30 | #include "gk20a/channel_gk20a.h" | ||
31 | #include "gk20a/tsg_gk20a.h" | ||
29 | 32 | ||
30 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 33 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
34 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
31 | 35 | ||
32 | void vgpu_gr_detect_sm_arch(struct gk20a *g) | 36 | void vgpu_gr_detect_sm_arch(struct gk20a *g) |
33 | { | 37 | { |
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
152 | struct tegra_vgpu_cmd_msg msg; | 156 | struct tegra_vgpu_cmd_msg msg; |
153 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 157 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
154 | struct vm_gk20a *ch_vm = c->vm; | 158 | struct vm_gk20a *ch_vm = c->vm; |
155 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 159 | struct tsg_gk20a *tsg; |
156 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 160 | u64 *g_bfr_va; |
161 | u64 *g_bfr_size; | ||
157 | struct gr_gk20a *gr = &g->gr; | 162 | struct gr_gk20a *gr = &g->gr; |
158 | u64 gpu_va; | 163 | u64 gpu_va; |
159 | u32 i; | 164 | u32 i; |
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
161 | 166 | ||
162 | gk20a_dbg_fn(""); | 167 | gk20a_dbg_fn(""); |
163 | 168 | ||
164 | /* FIXME: add VPR support */ | 169 | tsg = tsg_gk20a_from_ch(c); |
170 | if (!tsg) | ||
171 | return -EINVAL; | ||
172 | |||
173 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | ||
174 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | ||
165 | 175 | ||
166 | /* Circular Buffer */ | 176 | /* Circular Buffer */ |
167 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 177 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
213 | if (err || msg.ret) | 223 | if (err || msg.ret) |
214 | goto clean_up; | 224 | goto clean_up; |
215 | 225 | ||
216 | c->ch_ctx.global_ctx_buffer_mapped = true; | 226 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
217 | return 0; | 227 | return 0; |
218 | 228 | ||
219 | clean_up: | 229 | clean_up: |
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
227 | return -ENOMEM; | 237 | return -ENOMEM; |
228 | } | 238 | } |
229 | 239 | ||
230 | static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) | 240 | static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg) |
231 | { | 241 | { |
232 | struct vm_gk20a *ch_vm = c->vm; | 242 | struct vm_gk20a *ch_vm = tsg->vm; |
233 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 243 | u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; |
234 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 244 | u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; |
235 | u32 i; | 245 | u32 i; |
236 | 246 | ||
237 | gk20a_dbg_fn(""); | 247 | gk20a_dbg_fn(""); |
238 | 248 | ||
239 | if (c->ch_ctx.global_ctx_buffer_mapped) { | 249 | if (tsg->gr_ctx.global_ctx_buffer_mapped) { |
240 | struct tegra_vgpu_cmd_msg msg; | 250 | /* server will unmap on channel close */ |
241 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
242 | int err; | ||
243 | 251 | ||
244 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; | 252 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { |
245 | msg.handle = vgpu_get_handle(c->g); | 253 | if (g_bfr_va[i]) { |
246 | p->handle = c->virt_ctx; | 254 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], |
247 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 255 | gmmu_page_size_kernel); |
248 | WARN_ON(err || msg.ret); | 256 | g_bfr_va[i] = 0; |
249 | } | 257 | g_bfr_size[i] = 0; |
250 | 258 | } | |
251 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
252 | if (g_bfr_va[i]) { | ||
253 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], | ||
254 | gmmu_page_size_kernel); | ||
255 | g_bfr_va[i] = 0; | ||
256 | g_bfr_size[i] = 0; | ||
257 | } | 259 | } |
260 | |||
261 | tsg->gr_ctx.global_ctx_buffer_mapped = false; | ||
258 | } | 262 | } |
259 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
260 | } | 263 | } |
261 | 264 | ||
262 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 265 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
263 | struct gr_ctx_desc **__gr_ctx, | 266 | struct nvgpu_gr_ctx *gr_ctx, |
264 | struct vm_gk20a *vm, | 267 | struct vm_gk20a *vm, |
265 | u32 class, | 268 | u32 class, |
266 | u32 flags) | 269 | u32 flags) |
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
268 | struct tegra_vgpu_cmd_msg msg = {0}; | 271 | struct tegra_vgpu_cmd_msg msg = {0}; |
269 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | 272 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
270 | struct gr_gk20a *gr = &g->gr; | 273 | struct gr_gk20a *gr = &g->gr; |
271 | struct gr_ctx_desc *gr_ctx; | ||
272 | int err; | 274 | int err; |
273 | 275 | ||
274 | gk20a_dbg_fn(""); | 276 | gk20a_dbg_fn(""); |
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
280 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 282 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
281 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 283 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
282 | 284 | ||
283 | gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); | ||
284 | if (!gr_ctx) | ||
285 | return -ENOMEM; | ||
286 | |||
287 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; | ||
288 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, | 285 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, |
289 | gr_ctx->mem.size, | 286 | gr->ctx_vars.buffer_total_size, |
290 | gmmu_page_size_kernel); | 287 | gmmu_page_size_kernel); |
291 | 288 | ||
292 | if (!gr_ctx->mem.gpu_va) { | 289 | if (!gr_ctx->mem.gpu_va) |
293 | nvgpu_kfree(g, gr_ctx); | ||
294 | return -ENOMEM; | 290 | return -ENOMEM; |
295 | } | 291 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; |
292 | gr_ctx->mem.aperture = APERTURE_SYSMEM; | ||
296 | 293 | ||
297 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; | 294 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; |
298 | msg.handle = vgpu_get_handle(g); | 295 | msg.handle = vgpu_get_handle(g); |
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
306 | nvgpu_err(g, "fail to alloc gr_ctx"); | 303 | nvgpu_err(g, "fail to alloc gr_ctx"); |
307 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | 304 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
308 | gmmu_page_size_kernel); | 305 | gmmu_page_size_kernel); |
309 | nvgpu_kfree(g, gr_ctx); | 306 | gr_ctx->mem.aperture = APERTURE_INVALID; |
310 | } else { | 307 | } else { |
311 | gr_ctx->virt_ctx = p->gr_ctx_handle; | 308 | gr_ctx->virt_ctx = p->gr_ctx_handle; |
312 | *__gr_ctx = gr_ctx; | ||
313 | } | 309 | } |
314 | 310 | ||
315 | return err; | 311 | return err; |
316 | } | 312 | } |
317 | 313 | ||
318 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | ||
319 | struct gr_ctx_desc *gr_ctx) | ||
320 | { | ||
321 | struct tegra_vgpu_cmd_msg msg; | ||
322 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
323 | int err; | ||
324 | |||
325 | gk20a_dbg_fn(""); | ||
326 | |||
327 | if (!gr_ctx || !gr_ctx->mem.gpu_va) | ||
328 | return; | ||
329 | |||
330 | |||
331 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; | ||
332 | msg.handle = vgpu_get_handle(g); | ||
333 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
334 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
335 | WARN_ON(err || msg.ret); | ||
336 | |||
337 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | ||
338 | gmmu_page_size_kernel); | ||
339 | |||
340 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
341 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
342 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
343 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
344 | |||
345 | nvgpu_kfree(g, gr_ctx); | ||
346 | } | ||
347 | |||
348 | static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) | ||
349 | { | ||
350 | gk20a_dbg_fn(""); | ||
351 | |||
352 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | ||
353 | c->ch_ctx.gr_ctx = NULL; | ||
354 | } | ||
355 | |||
356 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | 314 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, |
357 | struct channel_gk20a *c) | 315 | struct channel_gk20a *c) |
358 | { | 316 | { |
359 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 317 | struct tsg_gk20a *tsg; |
318 | struct patch_desc *patch_ctx; | ||
360 | struct vm_gk20a *ch_vm = c->vm; | 319 | struct vm_gk20a *ch_vm = c->vm; |
361 | struct tegra_vgpu_cmd_msg msg; | 320 | struct tegra_vgpu_cmd_msg msg; |
362 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 321 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
364 | 323 | ||
365 | gk20a_dbg_fn(""); | 324 | gk20a_dbg_fn(""); |
366 | 325 | ||
326 | tsg = tsg_gk20a_from_ch(c); | ||
327 | if (!tsg) | ||
328 | return -EINVAL; | ||
329 | |||
330 | patch_ctx = &tsg->gr_ctx.patch_ctx; | ||
367 | patch_ctx->mem.size = 128 * sizeof(u32); | 331 | patch_ctx->mem.size = 128 * sizeof(u32); |
368 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 332 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
369 | patch_ctx->mem.size, | 333 | patch_ctx->mem.size, |
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
385 | return err; | 349 | return err; |
386 | } | 350 | } |
387 | 351 | ||
388 | static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | 352 | static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg) |
389 | { | 353 | { |
390 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 354 | struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx; |
391 | struct vm_gk20a *ch_vm = c->vm; | ||
392 | 355 | ||
393 | gk20a_dbg_fn(""); | 356 | gk20a_dbg_fn(""); |
394 | 357 | ||
395 | if (patch_ctx->mem.gpu_va) { | 358 | if (patch_ctx->mem.gpu_va) { |
396 | struct tegra_vgpu_cmd_msg msg; | 359 | /* server will free on channel close */ |
397 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
398 | int err; | ||
399 | 360 | ||
400 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; | 361 | __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va, |
401 | msg.handle = vgpu_get_handle(c->g); | ||
402 | p->handle = c->virt_ctx; | ||
403 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
404 | WARN_ON(err || msg.ret); | ||
405 | |||
406 | __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, | ||
407 | gmmu_page_size_kernel); | 362 | gmmu_page_size_kernel); |
408 | patch_ctx->mem.gpu_va = 0; | 363 | patch_ctx->mem.gpu_va = 0; |
409 | } | 364 | } |
410 | } | 365 | } |
411 | 366 | ||
412 | static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | 367 | static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg) |
413 | { | 368 | { |
414 | struct tegra_vgpu_cmd_msg msg; | 369 | struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx; |
415 | struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; | ||
416 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
417 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 370 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; |
418 | int err; | ||
419 | 371 | ||
420 | gk20a_dbg_fn(""); | 372 | gk20a_dbg_fn(""); |
421 | 373 | ||
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | |||
423 | if (pm_ctx->mem.gpu_va == 0) | 375 | if (pm_ctx->mem.gpu_va == 0) |
424 | return; | 376 | return; |
425 | 377 | ||
426 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; | 378 | /* server will free on channel close */ |
427 | msg.handle = vgpu_get_handle(c->g); | ||
428 | p->handle = c->virt_ctx; | ||
429 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
430 | WARN_ON(err || msg.ret); | ||
431 | 379 | ||
432 | __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, | 380 | __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va, |
433 | gmmu_page_size_kernel); | 381 | gmmu_page_size_kernel); |
434 | pm_ctx->mem.gpu_va = 0; | 382 | pm_ctx->mem.gpu_va = 0; |
435 | } | 383 | } |
436 | 384 | ||
437 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) | 385 | void vgpu_gr_free_gr_ctx(struct gk20a *g, |
386 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
438 | { | 387 | { |
388 | struct tsg_gk20a *tsg; | ||
389 | |||
439 | gk20a_dbg_fn(""); | 390 | gk20a_dbg_fn(""); |
440 | 391 | ||
441 | if (c->g->ops.fifo.free_channel_ctx_header) | 392 | if (gr_ctx->mem.gpu_va) { |
442 | c->g->ops.fifo.free_channel_ctx_header(c); | 393 | struct tegra_vgpu_cmd_msg msg; |
443 | vgpu_gr_unmap_global_ctx_buffers(c); | 394 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
444 | vgpu_gr_free_channel_patch_ctx(c); | 395 | int err; |
445 | vgpu_gr_free_channel_pm_ctx(c); | ||
446 | if (!is_tsg) | ||
447 | vgpu_gr_free_channel_gr_ctx(c); | ||
448 | 396 | ||
449 | /* zcull_ctx, pm_ctx */ | 397 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; |
398 | msg.handle = vgpu_get_handle(g); | ||
399 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
400 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
401 | WARN_ON(err || msg.ret); | ||
450 | 402 | ||
451 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | 403 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
404 | gmmu_page_size_kernel); | ||
405 | |||
406 | tsg = &g->fifo.tsg[gr_ctx->tsgid]; | ||
407 | vgpu_gr_unmap_global_ctx_buffers(tsg); | ||
408 | vgpu_gr_free_channel_patch_ctx(tsg); | ||
409 | vgpu_gr_free_channel_pm_ctx(tsg); | ||
410 | |||
411 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
412 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
413 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
414 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
452 | 415 | ||
453 | c->first_init = false; | 416 | memset(gr_ctx, 0, sizeof(*gr_ctx)); |
417 | } | ||
454 | } | 418 | } |
455 | 419 | ||
456 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | 420 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) |
457 | { | 421 | { |
458 | struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; | 422 | struct tsg_gk20a *tsg; |
423 | struct nvgpu_gr_ctx *gr_ctx; | ||
459 | struct tegra_vgpu_cmd_msg msg = {0}; | 424 | struct tegra_vgpu_cmd_msg msg = {0}; |
460 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = | 425 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = |
461 | &msg.params.ch_bind_gr_ctx; | 426 | &msg.params.ch_bind_gr_ctx; |
462 | int err; | 427 | int err; |
463 | 428 | ||
429 | tsg = tsg_gk20a_from_ch(c); | ||
430 | if (!tsg) | ||
431 | return -EINVAL; | ||
432 | |||
433 | gr_ctx = &tsg->gr_ctx; | ||
434 | |||
464 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; | 435 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; |
465 | msg.handle = vgpu_get_handle(c->g); | 436 | msg.handle = vgpu_get_handle(c->g); |
466 | p->ch_handle = c->virt_ctx; | 437 | p->ch_handle = c->virt_ctx; |
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | |||
474 | 445 | ||
475 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) | 446 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) |
476 | { | 447 | { |
477 | struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; | 448 | struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; |
478 | struct tegra_vgpu_cmd_msg msg = {0}; | 449 | struct tegra_vgpu_cmd_msg msg = {0}; |
479 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = | 450 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = |
480 | &msg.params.tsg_bind_gr_ctx; | 451 | &msg.params.tsg_bind_gr_ctx; |
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
495 | { | 466 | { |
496 | struct gk20a *g = c->g; | 467 | struct gk20a *g = c->g; |
497 | struct fifo_gk20a *f = &g->fifo; | 468 | struct fifo_gk20a *f = &g->fifo; |
498 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 469 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
499 | struct tsg_gk20a *tsg = NULL; | 470 | struct tsg_gk20a *tsg = NULL; |
500 | int err = 0; | 471 | int err = 0; |
501 | 472 | ||
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
515 | } | 486 | } |
516 | c->obj_class = class_num; | 487 | c->obj_class = class_num; |
517 | 488 | ||
518 | if (gk20a_is_channel_marked_as_tsg(c)) | 489 | if (!gk20a_is_channel_marked_as_tsg(c)) |
519 | tsg = &f->tsg[c->tsgid]; | 490 | return -EINVAL; |
520 | 491 | ||
521 | if (!tsg) { | 492 | tsg = &f->tsg[c->tsgid]; |
522 | /* allocate gr ctx buffer */ | 493 | gr_ctx = &tsg->gr_ctx; |
523 | if (!ch_ctx->gr_ctx) { | 494 | |
524 | err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, | 495 | if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { |
525 | c->vm, | 496 | tsg->vm = c->vm; |
526 | class_num, | 497 | nvgpu_vm_get(tsg->vm); |
527 | flags); | 498 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, |
528 | if (!err) | 499 | c->vm, |
529 | err = vgpu_gr_ch_bind_gr_ctx(c); | 500 | class_num, |
530 | if (err) { | 501 | flags); |
531 | nvgpu_err(g, "fail to allocate gr ctx buffer"); | 502 | if (!err) |
532 | goto out; | 503 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); |
533 | } | 504 | if (err) { |
534 | } else { | ||
535 | /*TBD: needs to be more subtle about which is | ||
536 | * being allocated as some are allowed to be | ||
537 | * allocated along same channel */ | ||
538 | nvgpu_err(g, | 505 | nvgpu_err(g, |
539 | "too many classes alloc'd on same channel"); | 506 | "fail to allocate TSG gr ctx buffer, err=%d", err); |
540 | err = -EINVAL; | 507 | nvgpu_vm_put(tsg->vm); |
508 | tsg->vm = NULL; | ||
541 | goto out; | 509 | goto out; |
542 | } | 510 | } |
543 | } else { | ||
544 | if (!tsg->tsg_gr_ctx) { | ||
545 | tsg->vm = c->vm; | ||
546 | nvgpu_vm_get(tsg->vm); | ||
547 | err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, | ||
548 | c->vm, | ||
549 | class_num, | ||
550 | flags); | ||
551 | if (!err) | ||
552 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); | ||
553 | if (err) { | ||
554 | nvgpu_err(g, | ||
555 | "fail to allocate TSG gr ctx buffer, err=%d", err); | ||
556 | nvgpu_vm_put(tsg->vm); | ||
557 | tsg->vm = NULL; | ||
558 | goto out; | ||
559 | } | ||
560 | } | ||
561 | 511 | ||
562 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | ||
563 | err = vgpu_gr_ch_bind_gr_ctx(c); | 512 | err = vgpu_gr_ch_bind_gr_ctx(c); |
564 | if (err) { | 513 | if (err) { |
565 | nvgpu_err(g, "fail to bind gr ctx buffer"); | 514 | nvgpu_err(g, "fail to bind gr ctx buffer"); |
566 | goto out; | 515 | goto out; |
567 | } | 516 | } |
568 | } | ||
569 | 517 | ||
570 | /* commit gr ctx buffer */ | 518 | /* commit gr ctx buffer */ |
571 | err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 519 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); |
572 | if (err) { | 520 | if (err) { |
573 | nvgpu_err(g, "fail to commit gr ctx buffer"); | 521 | nvgpu_err(g, "fail to commit gr ctx buffer"); |
574 | goto out; | 522 | goto out; |
575 | } | 523 | } |
576 | 524 | ||
577 | /* allocate patch buffer */ | 525 | /* allocate patch buffer */ |
578 | if (ch_ctx->patch_ctx.mem.priv.pages == NULL) { | ||
579 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); | 526 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); |
580 | if (err) { | 527 | if (err) { |
581 | nvgpu_err(g, "fail to allocate patch buffer"); | 528 | nvgpu_err(g, "fail to allocate patch buffer"); |
582 | goto out; | 529 | goto out; |
583 | } | 530 | } |
584 | } | ||
585 | 531 | ||
586 | /* map global buffer to channel gpu_va and commit */ | 532 | /* map global buffer to channel gpu_va and commit */ |
587 | if (!ch_ctx->global_ctx_buffer_mapped) { | ||
588 | err = vgpu_gr_map_global_ctx_buffers(g, c); | 533 | err = vgpu_gr_map_global_ctx_buffers(g, c); |
589 | if (err) { | 534 | if (err) { |
590 | nvgpu_err(g, "fail to map global ctx buffer"); | 535 | nvgpu_err(g, "fail to map global ctx buffer"); |
591 | goto out; | 536 | goto out; |
592 | } | 537 | } |
593 | vgpu_gr_commit_global_ctx_buffers(g, c, true); | ||
594 | } | ||
595 | 538 | ||
596 | /* load golden image */ | 539 | err = vgpu_gr_commit_global_ctx_buffers(g, c, true); |
597 | if (!c->first_init) { | 540 | if (err) { |
541 | nvgpu_err(g, "fail to commit global ctx buffers"); | ||
542 | goto out; | ||
543 | } | ||
544 | |||
545 | /* load golden image */ | ||
598 | err = gr_gk20a_elpg_protected_call(g, | 546 | err = gr_gk20a_elpg_protected_call(g, |
599 | vgpu_gr_load_golden_ctx_image(g, c)); | 547 | vgpu_gr_load_golden_ctx_image(g, c)); |
600 | if (err) { | 548 | if (err) { |
601 | nvgpu_err(g, "fail to load golden ctx image"); | 549 | nvgpu_err(g, "fail to load golden ctx image"); |
602 | goto out; | 550 | goto out; |
603 | } | 551 | } |
604 | c->first_init = true; | 552 | } else { |
553 | err = vgpu_gr_ch_bind_gr_ctx(c); | ||
554 | if (err) { | ||
555 | nvgpu_err(g, "fail to bind gr ctx buffer"); | ||
556 | goto out; | ||
557 | } | ||
558 | |||
559 | /* commit gr ctx buffer */ | ||
560 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); | ||
561 | if (err) { | ||
562 | nvgpu_err(g, "fail to commit gr ctx buffer"); | ||
563 | goto out; | ||
564 | } | ||
605 | } | 565 | } |
606 | 566 | ||
567 | /* PM ctxt switch is off by default */ | ||
568 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
569 | |||
607 | gk20a_dbg_fn("done"); | 570 | gk20a_dbg_fn("done"); |
608 | return 0; | 571 | return 0; |
609 | out: | 572 | out: |
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1055 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | 1018 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, |
1056 | struct channel_gk20a *ch, bool enable) | 1019 | struct channel_gk20a *ch, bool enable) |
1057 | { | 1020 | { |
1058 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 1021 | struct tsg_gk20a *tsg; |
1059 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1022 | struct nvgpu_gr_ctx *ch_ctx; |
1023 | struct pm_ctx_desc *pm_ctx; | ||
1060 | struct tegra_vgpu_cmd_msg msg; | 1024 | struct tegra_vgpu_cmd_msg msg; |
1061 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; | 1025 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; |
1062 | int err; | 1026 | int err; |
1063 | 1027 | ||
1064 | gk20a_dbg_fn(""); | 1028 | gk20a_dbg_fn(""); |
1065 | 1029 | ||
1030 | tsg = tsg_gk20a_from_ch(ch); | ||
1031 | if (!tsg) | ||
1032 | return -EINVAL; | ||
1033 | |||
1034 | ch_ctx = &tsg->gr_ctx; | ||
1035 | pm_ctx = &ch_ctx->pm_ctx; | ||
1036 | |||
1066 | if (enable) { | 1037 | if (enable) { |
1038 | /* | ||
1039 | * send command to enable HWPM only once - otherwise server | ||
1040 | * will return an error due to using the same GPU VA twice. | ||
1041 | */ | ||
1042 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | ||
1043 | return 0; | ||
1044 | |||
1067 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; | 1045 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; |
1068 | 1046 | ||
1069 | /* Allocate buffer if necessary */ | 1047 | /* Allocate buffer if necessary */ |
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1076 | return -ENOMEM; | 1054 | return -ENOMEM; |
1077 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; | 1055 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; |
1078 | } | 1056 | } |
1079 | } else | 1057 | } else { |
1058 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) | ||
1059 | return 0; | ||
1060 | |||
1080 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; | 1061 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; |
1062 | } | ||
1081 | 1063 | ||
1082 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; | 1064 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; |
1083 | msg.handle = vgpu_get_handle(g); | 1065 | msg.handle = vgpu_get_handle(g); |
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1086 | 1068 | ||
1087 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 1069 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
1088 | WARN_ON(err || msg.ret); | 1070 | WARN_ON(err || msg.ret); |
1071 | err = err ? err : msg.ret; | ||
1072 | if (!err) | ||
1073 | pm_ctx->pm_mode = enable ? | ||
1074 | ctxsw_prog_main_image_pm_mode_ctxsw_f() : | ||
1075 | ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1089 | 1076 | ||
1090 | return err ? err : msg.ret; | 1077 | return err; |
1091 | } | 1078 | } |
1092 | 1079 | ||
1093 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, | 1080 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h index 16aa92a9..4b81da91 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h | |||
@@ -29,6 +29,7 @@ struct dbg_session_gk20a; | |||
29 | 29 | ||
30 | void vgpu_gr_detect_sm_arch(struct gk20a *g); | 30 | void vgpu_gr_detect_sm_arch(struct gk20a *g); |
31 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); | 31 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); |
32 | void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg); | ||
32 | int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); | 33 | int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); |
33 | int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | 34 | int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, |
34 | struct channel_gk20a *c, u64 zcull_va, | 35 | struct channel_gk20a *c, u64 zcull_va, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c index 968eae10..132ce6e5 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | |||
@@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { | |||
131 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 131 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
132 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, | 132 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, |
133 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, | 133 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, |
134 | .free_channel_ctx = vgpu_gr_free_channel_ctx, | ||
135 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, | 134 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, |
136 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, | 135 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, |
137 | .get_zcull_info = vgpu_gr_get_zcull_info, | 136 | .get_zcull_info = vgpu_gr_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c index d59f0381..a0099f03 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c | |||
@@ -21,7 +21,7 @@ | |||
21 | 21 | ||
22 | int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) | 22 | int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) |
23 | { | 23 | { |
24 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 24 | struct ctx_header_desc *ctx = &c->ctx_header; |
25 | struct tegra_vgpu_cmd_msg msg = {}; | 25 | struct tegra_vgpu_cmd_msg msg = {}; |
26 | struct tegra_vgpu_alloc_ctx_header_params *p = | 26 | struct tegra_vgpu_alloc_ctx_header_params *p = |
27 | &msg.params.alloc_ctx_header; | 27 | &msg.params.alloc_ctx_header; |
@@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) | |||
52 | 52 | ||
53 | void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) | 53 | void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) |
54 | { | 54 | { |
55 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 55 | struct ctx_header_desc *ctx = &c->ctx_header; |
56 | struct tegra_vgpu_cmd_msg msg = {}; | 56 | struct tegra_vgpu_cmd_msg msg = {}; |
57 | struct tegra_vgpu_free_ctx_header_params *p = | 57 | struct tegra_vgpu_free_ctx_header_params *p = |
58 | &msg.params.free_ctx_header; | 58 | &msg.params.free_ctx_header; |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h index 8c306ea0..20624240 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h | |||
@@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); | |||
79 | int vgpu_gr_nonstall_isr(struct gk20a *g, | 79 | int vgpu_gr_nonstall_isr(struct gk20a *g, |
80 | struct tegra_vgpu_gr_nonstall_intr_info *info); | 80 | struct tegra_vgpu_gr_nonstall_intr_info *info); |
81 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 81 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
82 | struct gr_ctx_desc **__gr_ctx, | 82 | struct nvgpu_gr_ctx *gr_ctx, |
83 | struct vm_gk20a *vm, | 83 | struct vm_gk20a *vm, |
84 | u32 class, | 84 | u32 class, |
85 | u32 flags); | 85 | u32 flags); |
86 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 86 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
87 | struct gr_ctx_desc *gr_ctx); | 87 | struct nvgpu_gr_ctx *gr_ctx); |
88 | void vgpu_gr_handle_sm_esr_event(struct gk20a *g, | 88 | void vgpu_gr_handle_sm_esr_event(struct gk20a *g, |
89 | struct tegra_vgpu_sm_esr_info *info); | 89 | struct tegra_vgpu_sm_esr_info *info); |
90 | int vgpu_gr_init_ctx_state(struct gk20a *g); | 90 | int vgpu_gr_init_ctx_state(struct gk20a *g); |
@@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g, | |||
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
143 | static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 143 | static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
144 | struct gr_ctx_desc **__gr_ctx, | 144 | struct nvgpu_gr_ctx *gr_ctx, |
145 | struct vm_gk20a *vm, | 145 | struct vm_gk20a *vm, |
146 | u32 class, | 146 | u32 class, |
147 | u32 flags) | 147 | u32 flags) |
@@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
149 | return -ENOSYS; | 149 | return -ENOSYS; |
150 | } | 150 | } |
151 | static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 151 | static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
152 | struct gr_ctx_desc *gr_ctx) | 152 | struct nvgpu_gr_ctx *gr_ctx) |
153 | { | 153 | { |
154 | } | 154 | } |
155 | static inline int vgpu_gr_init_ctx_state(struct gk20a *g) | 155 | static inline int vgpu_gr_init_ctx_state(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 16d4711f..64266fe5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -259,7 +259,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) | |||
259 | 259 | ||
260 | ch->g->ops.fifo.disable_channel(ch); | 260 | ch->g->ops.fifo.disable_channel(ch); |
261 | 261 | ||
262 | if (channel_preempt && ch->ch_ctx.gr_ctx) | 262 | if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) |
263 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); | 263 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); |
264 | 264 | ||
265 | gk20a_channel_abort_clean_up(ch); | 265 | gk20a_channel_abort_clean_up(ch); |
@@ -421,8 +421,8 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
421 | g->ops.fecs_trace.unbind_channel(g, ch); | 421 | g->ops.fecs_trace.unbind_channel(g, ch); |
422 | #endif | 422 | #endif |
423 | 423 | ||
424 | /* release channel ctx */ | 424 | if(g->ops.fifo.free_channel_ctx_header) |
425 | g->ops.gr.free_channel_ctx(ch, was_tsg); | 425 | g->ops.fifo.free_channel_ctx_header(ch); |
426 | 426 | ||
427 | gk20a_gr_flush_channel_tlb(gr); | 427 | gk20a_gr_flush_channel_tlb(gr); |
428 | 428 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index c13b1c58..29fa302f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <nvgpu/atomic.h> | 31 | #include <nvgpu/atomic.h> |
32 | 32 | ||
33 | struct gk20a; | 33 | struct gk20a; |
34 | struct gr_gk20a; | ||
35 | struct dbg_session_gk20a; | 34 | struct dbg_session_gk20a; |
36 | struct gk20a_fence; | 35 | struct gk20a_fence; |
37 | struct fifo_profile_gk20a; | 36 | struct fifo_profile_gk20a; |
@@ -50,10 +49,6 @@ struct fifo_profile_gk20a; | |||
50 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) | 49 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) |
51 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) | 50 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) |
52 | 51 | ||
53 | /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ | ||
54 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) | ||
55 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) | ||
56 | |||
57 | struct notification { | 52 | struct notification { |
58 | struct { | 53 | struct { |
59 | u32 nanoseconds[2]; | 54 | u32 nanoseconds[2]; |
@@ -63,19 +58,6 @@ struct notification { | |||
63 | u16 status; | 58 | u16 status; |
64 | }; | 59 | }; |
65 | 60 | ||
66 | /* contexts associated with a channel */ | ||
67 | struct channel_ctx_gk20a { | ||
68 | struct gr_ctx_desc *gr_ctx; | ||
69 | struct patch_desc patch_ctx; | ||
70 | struct zcull_ctx_desc zcull_ctx; | ||
71 | struct pm_ctx_desc pm_ctx; | ||
72 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
73 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
74 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
75 | bool global_ctx_buffer_mapped; | ||
76 | struct ctx_header_desc ctx_header; | ||
77 | }; | ||
78 | |||
79 | struct channel_gk20a_job { | 61 | struct channel_gk20a_job { |
80 | struct nvgpu_mapped_buf **mapped_buffers; | 62 | struct nvgpu_mapped_buf **mapped_buffers; |
81 | int num_mapped_buffers; | 63 | int num_mapped_buffers; |
@@ -190,7 +172,6 @@ struct channel_gk20a { | |||
190 | int chid; | 172 | int chid; |
191 | bool wdt_enabled; | 173 | bool wdt_enabled; |
192 | nvgpu_atomic_t bound; | 174 | nvgpu_atomic_t bound; |
193 | bool first_init; | ||
194 | bool vpr; | 175 | bool vpr; |
195 | bool deterministic; | 176 | bool deterministic; |
196 | /* deterministic, but explicitly idle and submits disallowed */ | 177 | /* deterministic, but explicitly idle and submits disallowed */ |
@@ -210,8 +191,6 @@ struct channel_gk20a { | |||
210 | 191 | ||
211 | struct gpfifo_desc gpfifo; | 192 | struct gpfifo_desc gpfifo; |
212 | 193 | ||
213 | struct channel_ctx_gk20a ch_ctx; | ||
214 | |||
215 | struct nvgpu_mem inst_block; | 194 | struct nvgpu_mem inst_block; |
216 | 195 | ||
217 | u64 userd_iova; | 196 | u64 userd_iova; |
@@ -262,6 +241,8 @@ struct channel_gk20a { | |||
262 | struct channel_t19x t19x; | 241 | struct channel_t19x t19x; |
263 | #endif | 242 | #endif |
264 | 243 | ||
244 | struct ctx_header_desc ctx_header; | ||
245 | |||
265 | /* Any operating system specific data. */ | 246 | /* Any operating system specific data. */ |
266 | void *os_priv; | 247 | void *os_priv; |
267 | }; | 248 | }; |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index d283a82e..409661fc 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -625,9 +625,10 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
625 | u32 lo; | 625 | u32 lo; |
626 | u32 hi; | 626 | u32 hi; |
627 | u64 pa; | 627 | u64 pa; |
628 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 628 | struct tsg_gk20a *tsg; |
629 | struct nvgpu_gr_ctx *ch_ctx; | ||
629 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 630 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
630 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 631 | struct nvgpu_mem *mem; |
631 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | 632 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); |
632 | pid_t pid; | 633 | pid_t pid; |
633 | u32 aperture; | 634 | u32 aperture; |
@@ -637,6 +638,13 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
637 | ch->chid, context_ptr, | 638 | ch->chid, context_ptr, |
638 | nvgpu_inst_block_addr(g, &ch->inst_block)); | 639 | nvgpu_inst_block_addr(g, &ch->inst_block)); |
639 | 640 | ||
641 | tsg = tsg_gk20a_from_ch(ch); | ||
642 | if (!tsg) | ||
643 | return -EINVAL; | ||
644 | |||
645 | ch_ctx = &tsg->gr_ctx; | ||
646 | mem = &ch_ctx->mem; | ||
647 | |||
640 | if (!trace) | 648 | if (!trace) |
641 | return -ENOMEM; | 649 | return -ENOMEM; |
642 | 650 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 070b26b6..685976b1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -187,16 +187,16 @@ struct gpu_ops { | |||
187 | void (*cb_size_default)(struct gk20a *g); | 187 | void (*cb_size_default)(struct gk20a *g); |
188 | int (*calc_global_ctx_buffer_size)(struct gk20a *g); | 188 | int (*calc_global_ctx_buffer_size)(struct gk20a *g); |
189 | void (*commit_global_attrib_cb)(struct gk20a *g, | 189 | void (*commit_global_attrib_cb)(struct gk20a *g, |
190 | struct channel_ctx_gk20a *ch_ctx, | 190 | struct nvgpu_gr_ctx *ch_ctx, |
191 | u64 addr, bool patch); | 191 | u64 addr, bool patch); |
192 | void (*commit_global_bundle_cb)(struct gk20a *g, | 192 | void (*commit_global_bundle_cb)(struct gk20a *g, |
193 | struct channel_ctx_gk20a *ch_ctx, | 193 | struct nvgpu_gr_ctx *ch_ctx, |
194 | u64 addr, u64 size, bool patch); | 194 | u64 addr, u64 size, bool patch); |
195 | int (*commit_global_cb_manager)(struct gk20a *g, | 195 | int (*commit_global_cb_manager)(struct gk20a *g, |
196 | struct channel_gk20a *ch, | 196 | struct channel_gk20a *ch, |
197 | bool patch); | 197 | bool patch); |
198 | void (*commit_global_pagepool)(struct gk20a *g, | 198 | void (*commit_global_pagepool)(struct gk20a *g, |
199 | struct channel_ctx_gk20a *ch_ctx, | 199 | struct nvgpu_gr_ctx *ch_ctx, |
200 | u64 addr, u32 size, bool patch); | 200 | u64 addr, u32 size, bool patch); |
201 | void (*init_gpc_mmu)(struct gk20a *g); | 201 | void (*init_gpc_mmu)(struct gk20a *g); |
202 | int (*handle_sw_method)(struct gk20a *g, u32 addr, | 202 | int (*handle_sw_method)(struct gk20a *g, u32 addr, |
@@ -230,7 +230,6 @@ struct gpu_ops { | |||
230 | int (*load_ctxsw_ucode)(struct gk20a *g); | 230 | int (*load_ctxsw_ucode)(struct gk20a *g); |
231 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); | 231 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); |
232 | void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); | 232 | void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); |
233 | void (*free_channel_ctx)(struct channel_gk20a *c, bool is_tsg); | ||
234 | int (*alloc_obj_ctx)(struct channel_gk20a *c, | 233 | int (*alloc_obj_ctx)(struct channel_gk20a *c, |
235 | u32 class_num, u32 flags); | 234 | u32 class_num, u32 flags); |
236 | int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, | 235 | int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, |
@@ -285,13 +284,12 @@ struct gpu_ops { | |||
285 | u32 (*pagepool_default_size)(struct gk20a *g); | 284 | u32 (*pagepool_default_size)(struct gk20a *g); |
286 | int (*init_ctx_state)(struct gk20a *g); | 285 | int (*init_ctx_state)(struct gk20a *g); |
287 | int (*alloc_gr_ctx)(struct gk20a *g, | 286 | int (*alloc_gr_ctx)(struct gk20a *g, |
288 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 287 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
289 | u32 class, u32 padding); | 288 | u32 class, u32 padding); |
290 | void (*free_gr_ctx)(struct gk20a *g, | 289 | void (*free_gr_ctx)(struct gk20a *g, |
291 | struct vm_gk20a *vm, | 290 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); |
292 | struct gr_ctx_desc *gr_ctx); | ||
293 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, | 291 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, |
294 | struct channel_ctx_gk20a *ch_ctx, | 292 | struct channel_gk20a *c, |
295 | struct nvgpu_mem *mem); | 293 | struct nvgpu_mem *mem); |
296 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, | 294 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, |
297 | struct channel_gk20a *c, | 295 | struct channel_gk20a *c, |
@@ -384,14 +382,14 @@ struct gpu_ops { | |||
384 | int (*get_preemption_mode_flags)(struct gk20a *g, | 382 | int (*get_preemption_mode_flags)(struct gk20a *g, |
385 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 383 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
386 | int (*set_ctxsw_preemption_mode)(struct gk20a *g, | 384 | int (*set_ctxsw_preemption_mode)(struct gk20a *g, |
387 | struct gr_ctx_desc *gr_ctx, | 385 | struct nvgpu_gr_ctx *gr_ctx, |
388 | struct vm_gk20a *vm, u32 class, | 386 | struct vm_gk20a *vm, u32 class, |
389 | u32 graphics_preempt_mode, | 387 | u32 graphics_preempt_mode, |
390 | u32 compute_preempt_mode); | 388 | u32 compute_preempt_mode); |
391 | int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); | 389 | int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); |
392 | void (*update_boosted_ctx)(struct gk20a *g, | 390 | void (*update_boosted_ctx)(struct gk20a *g, |
393 | struct nvgpu_mem *mem, | 391 | struct nvgpu_mem *mem, |
394 | struct gr_ctx_desc *gr_ctx); | 392 | struct nvgpu_gr_ctx *gr_ctx); |
395 | int (*init_sm_id_table)(struct gk20a *g); | 393 | int (*init_sm_id_table)(struct gk20a *g); |
396 | int (*load_smid_config)(struct gk20a *g); | 394 | int (*load_smid_config)(struct gk20a *g); |
397 | void (*program_sm_id_numbering)(struct gk20a *g, | 395 | void (*program_sm_id_numbering)(struct gk20a *g, |
@@ -440,7 +438,7 @@ struct gpu_ops { | |||
440 | u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); | 438 | u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); |
441 | u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); | 439 | u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); |
442 | void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, | 440 | void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, |
443 | struct gr_ctx_desc *gr_ctx); | 441 | struct nvgpu_gr_ctx *gr_ctx); |
444 | } gr; | 442 | } gr; |
445 | struct { | 443 | struct { |
446 | void (*init_hw)(struct gk20a *g); | 444 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 263ae030..f8af091b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -85,18 +85,19 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); | |||
85 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); | 85 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); |
86 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | 86 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, |
87 | struct channel_gk20a *c); | 87 | struct channel_gk20a *c); |
88 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); | 88 | static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, |
89 | 89 | struct vm_gk20a *vm, | |
90 | /* channel gr ctx buffer */ | 90 | struct nvgpu_gr_ctx *gr_ctx); |
91 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 91 | static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, |
92 | struct channel_gk20a *c, | 92 | struct vm_gk20a *vm, |
93 | u32 class, u32 padding); | 93 | struct nvgpu_gr_ctx *gr_ctx); |
94 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); | ||
95 | 94 | ||
96 | /* channel patch ctx buffer */ | 95 | /* channel patch ctx buffer */ |
97 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | 96 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, |
98 | struct channel_gk20a *c); | 97 | struct channel_gk20a *c); |
99 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); | 98 | static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, |
99 | struct vm_gk20a *vm, | ||
100 | struct nvgpu_gr_ctx *gr_ctx); | ||
100 | 101 | ||
101 | /* golden ctx image */ | 102 | /* golden ctx image */ |
102 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | 103 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, |
@@ -108,8 +109,16 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
108 | struct channel_gk20a *c, | 109 | struct channel_gk20a *c, |
109 | u32 *ctx_id) | 110 | u32 *ctx_id) |
110 | { | 111 | { |
111 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 112 | struct tsg_gk20a *tsg; |
112 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 113 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
114 | struct nvgpu_mem *mem = NULL; | ||
115 | |||
116 | tsg = tsg_gk20a_from_ch(c); | ||
117 | if (!tsg) | ||
118 | return -EINVAL; | ||
119 | |||
120 | gr_ctx = &tsg->gr_ctx; | ||
121 | mem = &gr_ctx->mem; | ||
113 | 122 | ||
114 | /* Channel gr_ctx buffer is gpu cacheable. | 123 | /* Channel gr_ctx buffer is gpu cacheable. |
115 | Flush and invalidate before cpu update. */ | 124 | Flush and invalidate before cpu update. */ |
@@ -671,62 +680,62 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
671 | */ | 680 | */ |
672 | 681 | ||
673 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 682 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
674 | struct channel_ctx_gk20a *ch_ctx, | 683 | struct nvgpu_gr_ctx *gr_ctx, |
675 | bool update_patch_count) | 684 | bool update_patch_count) |
676 | { | 685 | { |
677 | int err = 0; | 686 | int err = 0; |
678 | 687 | ||
679 | err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); | 688 | err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem); |
680 | if (err) | 689 | if (err) |
681 | return err; | 690 | return err; |
682 | 691 | ||
683 | if (update_patch_count) { | 692 | if (update_patch_count) { |
684 | /* reset patch count if ucode has already processed it */ | 693 | /* reset patch count if ucode has already processed it */ |
685 | ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, | 694 | gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, |
686 | &ch_ctx->gr_ctx->mem, | 695 | &gr_ctx->mem, |
687 | ctxsw_prog_main_image_patch_count_o()); | 696 | ctxsw_prog_main_image_patch_count_o()); |
688 | nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", | 697 | nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", |
689 | ch_ctx->patch_ctx.data_count); | 698 | gr_ctx->patch_ctx.data_count); |
690 | } | 699 | } |
691 | return 0; | 700 | return 0; |
692 | } | 701 | } |
693 | 702 | ||
694 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 703 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
695 | struct channel_ctx_gk20a *ch_ctx, | 704 | struct nvgpu_gr_ctx *gr_ctx, |
696 | bool update_patch_count) | 705 | bool update_patch_count) |
697 | { | 706 | { |
698 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); | 707 | nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem); |
699 | 708 | ||
700 | /* Write context count to context image if it is mapped */ | 709 | /* Write context count to context image if it is mapped */ |
701 | if (update_patch_count) { | 710 | if (update_patch_count) { |
702 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, | 711 | nvgpu_mem_wr(g, &gr_ctx->mem, |
703 | ctxsw_prog_main_image_patch_count_o(), | 712 | ctxsw_prog_main_image_patch_count_o(), |
704 | ch_ctx->patch_ctx.data_count); | 713 | gr_ctx->patch_ctx.data_count); |
705 | nvgpu_log(g, gpu_dbg_info, "write patch count %d", | 714 | nvgpu_log(g, gpu_dbg_info, "write patch count %d", |
706 | ch_ctx->patch_ctx.data_count); | 715 | gr_ctx->patch_ctx.data_count); |
707 | } | 716 | } |
708 | } | 717 | } |
709 | 718 | ||
710 | void gr_gk20a_ctx_patch_write(struct gk20a *g, | 719 | void gr_gk20a_ctx_patch_write(struct gk20a *g, |
711 | struct channel_ctx_gk20a *ch_ctx, | 720 | struct nvgpu_gr_ctx *gr_ctx, |
712 | u32 addr, u32 data, bool patch) | 721 | u32 addr, u32 data, bool patch) |
713 | { | 722 | { |
714 | if (patch) { | 723 | if (patch) { |
715 | u32 patch_slot = ch_ctx->patch_ctx.data_count * | 724 | u32 patch_slot = gr_ctx->patch_ctx.data_count * |
716 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; | 725 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; |
717 | if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( | 726 | if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( |
718 | ch_ctx->patch_ctx.mem.size) - | 727 | gr_ctx->patch_ctx.mem.size) - |
719 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { | 728 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { |
720 | nvgpu_err(g, "failed to access patch_slot %d", | 729 | nvgpu_err(g, "failed to access patch_slot %d", |
721 | patch_slot); | 730 | patch_slot); |
722 | return; | 731 | return; |
723 | } | 732 | } |
724 | nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); | 733 | nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr); |
725 | nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); | 734 | nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data); |
726 | ch_ctx->patch_ctx.data_count++; | 735 | gr_ctx->patch_ctx.data_count++; |
727 | nvgpu_log(g, gpu_dbg_info, | 736 | nvgpu_log(g, gpu_dbg_info, |
728 | "patch addr = 0x%x data = 0x%x data_count %d", | 737 | "patch addr = 0x%x data = 0x%x data_count %d", |
729 | addr, data, ch_ctx->patch_ctx.data_count); | 738 | addr, data, gr_ctx->patch_ctx.data_count); |
730 | } else { | 739 | } else { |
731 | gk20a_writel(g, addr, data); | 740 | gk20a_writel(g, addr, data); |
732 | } | 741 | } |
@@ -793,14 +802,22 @@ void gr_gk20a_write_pm_ptr(struct gk20a *g, | |||
793 | 802 | ||
794 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | 803 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) |
795 | { | 804 | { |
796 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 805 | struct tsg_gk20a *tsg; |
797 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 806 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
798 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 807 | struct nvgpu_mem *mem = NULL; |
808 | struct ctx_header_desc *ctx = &c->ctx_header; | ||
799 | struct nvgpu_mem *ctxheader = &ctx->mem; | 809 | struct nvgpu_mem *ctxheader = &ctx->mem; |
800 | int ret = 0; | 810 | int ret = 0; |
801 | 811 | ||
802 | gk20a_dbg_fn(""); | 812 | gk20a_dbg_fn(""); |
803 | 813 | ||
814 | tsg = tsg_gk20a_from_ch(c); | ||
815 | if (!tsg) | ||
816 | return -EINVAL; | ||
817 | |||
818 | gr_ctx = &tsg->gr_ctx; | ||
819 | mem = &gr_ctx->mem; | ||
820 | |||
804 | if (nvgpu_mem_begin(g, mem)) | 821 | if (nvgpu_mem_begin(g, mem)) |
805 | return -ENOMEM; | 822 | return -ENOMEM; |
806 | 823 | ||
@@ -809,8 +826,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
809 | goto clean_up_mem; | 826 | goto clean_up_mem; |
810 | } | 827 | } |
811 | 828 | ||
812 | if (ch_ctx->zcull_ctx.gpu_va == 0 && | 829 | if (gr_ctx->zcull_ctx.gpu_va == 0 && |
813 | ch_ctx->zcull_ctx.ctx_sw_mode == | 830 | gr_ctx->zcull_ctx.ctx_sw_mode == |
814 | ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { | 831 | ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { |
815 | ret = -EINVAL; | 832 | ret = -EINVAL; |
816 | goto clean_up; | 833 | goto clean_up; |
@@ -830,13 +847,13 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
830 | 847 | ||
831 | nvgpu_mem_wr(g, mem, | 848 | nvgpu_mem_wr(g, mem, |
832 | ctxsw_prog_main_image_zcull_o(), | 849 | ctxsw_prog_main_image_zcull_o(), |
833 | ch_ctx->zcull_ctx.ctx_sw_mode); | 850 | gr_ctx->zcull_ctx.ctx_sw_mode); |
834 | 851 | ||
835 | if (ctxheader->gpu_va) | 852 | if (ctxheader->gpu_va) |
836 | g->ops.gr.write_zcull_ptr(g, ctxheader, | 853 | g->ops.gr.write_zcull_ptr(g, ctxheader, |
837 | ch_ctx->zcull_ctx.gpu_va); | 854 | gr_ctx->zcull_ctx.gpu_va); |
838 | else | 855 | else |
839 | g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va); | 856 | g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); |
840 | 857 | ||
841 | gk20a_enable_channel_tsg(g, c); | 858 | gk20a_enable_channel_tsg(g, c); |
842 | 859 | ||
@@ -869,22 +886,29 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
869 | struct channel_gk20a *c, bool patch) | 886 | struct channel_gk20a *c, bool patch) |
870 | { | 887 | { |
871 | struct gr_gk20a *gr = &g->gr; | 888 | struct gr_gk20a *gr = &g->gr; |
872 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 889 | struct tsg_gk20a *tsg; |
890 | struct nvgpu_gr_ctx *gr_ctx = NULL; | ||
873 | u64 addr; | 891 | u64 addr; |
874 | u32 size; | 892 | u32 size; |
875 | 893 | ||
876 | gk20a_dbg_fn(""); | 894 | gk20a_dbg_fn(""); |
895 | |||
896 | tsg = tsg_gk20a_from_ch(c); | ||
897 | if (!tsg) | ||
898 | return -EINVAL; | ||
899 | |||
900 | gr_ctx = &tsg->gr_ctx; | ||
877 | if (patch) { | 901 | if (patch) { |
878 | int err; | 902 | int err; |
879 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | 903 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
880 | if (err) | 904 | if (err) |
881 | return err; | 905 | return err; |
882 | } | 906 | } |
883 | 907 | ||
884 | /* global pagepool buffer */ | 908 | /* global pagepool buffer */ |
885 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> | 909 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> |
886 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | 910 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | |
887 | (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << | 911 | (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << |
888 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); | 912 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); |
889 | 913 | ||
890 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / | 914 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / |
@@ -896,12 +920,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
896 | gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", | 920 | gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", |
897 | addr, size); | 921 | addr, size); |
898 | 922 | ||
899 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); | 923 | g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch); |
900 | 924 | ||
901 | /* global bundle cb */ | 925 | /* global bundle cb */ |
902 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> | 926 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> |
903 | gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | | 927 | gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | |
904 | (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << | 928 | (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << |
905 | (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); | 929 | (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); |
906 | 930 | ||
907 | size = gr->bundle_cb_default_size; | 931 | size = gr->bundle_cb_default_size; |
@@ -909,20 +933,20 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
909 | gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", | 933 | gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", |
910 | addr, size); | 934 | addr, size); |
911 | 935 | ||
912 | g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); | 936 | g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch); |
913 | 937 | ||
914 | /* global attrib cb */ | 938 | /* global attrib cb */ |
915 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> | 939 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> |
916 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | | 940 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | |
917 | (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << | 941 | (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << |
918 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | 942 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); |
919 | 943 | ||
920 | gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); | 944 | gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); |
921 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); | 945 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); |
922 | g->ops.gr.commit_global_cb_manager(g, c, patch); | 946 | g->ops.gr.commit_global_cb_manager(g, c, patch); |
923 | 947 | ||
924 | if (patch) | 948 | if (patch) |
925 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); | 949 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); |
926 | 950 | ||
927 | return 0; | 951 | return 0; |
928 | } | 952 | } |
@@ -930,7 +954,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
930 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | 954 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) |
931 | { | 955 | { |
932 | struct gr_gk20a *gr = &g->gr; | 956 | struct gr_gk20a *gr = &g->gr; |
933 | struct channel_ctx_gk20a *ch_ctx = NULL; | 957 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
934 | u32 gpm_pd_cfg; | 958 | u32 gpm_pd_cfg; |
935 | u32 pd_ab_dist_cfg0; | 959 | u32 pd_ab_dist_cfg0; |
936 | u32 ds_debug; | 960 | u32 ds_debug; |
@@ -956,22 +980,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | |||
956 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; | 980 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; |
957 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; | 981 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; |
958 | 982 | ||
959 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); | 983 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); |
960 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); | 984 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); |
961 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); | 985 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); |
962 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); | 986 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); |
963 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); | 987 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); |
964 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); | 988 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); |
965 | } else { | 989 | } else { |
966 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; | 990 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; |
967 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; | 991 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; |
968 | ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; | 992 | ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; |
969 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; | 993 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; |
970 | 994 | ||
971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); | 995 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); |
972 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); | 996 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); |
973 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); | 997 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); |
974 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); | 998 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); |
975 | } | 999 | } |
976 | 1000 | ||
977 | return 0; | 1001 | return 0; |
@@ -1360,13 +1384,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1360 | struct channel_gk20a *c) | 1384 | struct channel_gk20a *c) |
1361 | { | 1385 | { |
1362 | struct gr_gk20a *gr = &g->gr; | 1386 | struct gr_gk20a *gr = &g->gr; |
1363 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1387 | struct tsg_gk20a *tsg; |
1388 | struct nvgpu_gr_ctx *gr_ctx = NULL; | ||
1364 | u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); | 1389 | u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); |
1365 | u32 ctx_header_words; | 1390 | u32 ctx_header_words; |
1366 | u32 i; | 1391 | u32 i; |
1367 | u32 data; | 1392 | u32 data; |
1368 | struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; | 1393 | struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; |
1369 | struct nvgpu_mem *gr_mem = &ch_ctx->gr_ctx->mem; | 1394 | struct nvgpu_mem *gr_mem; |
1370 | u32 err = 0; | 1395 | u32 err = 0; |
1371 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; | 1396 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; |
1372 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; | 1397 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; |
@@ -1374,6 +1399,13 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1374 | 1399 | ||
1375 | gk20a_dbg_fn(""); | 1400 | gk20a_dbg_fn(""); |
1376 | 1401 | ||
1402 | tsg = tsg_gk20a_from_ch(c); | ||
1403 | if (!tsg) | ||
1404 | return -EINVAL; | ||
1405 | |||
1406 | gr_ctx = &tsg->gr_ctx; | ||
1407 | gr_mem = &gr_ctx->mem; | ||
1408 | |||
1377 | /* golden ctx is global to all channels. Although only the first | 1409 | /* golden ctx is global to all channels. Although only the first |
1378 | channel initializes golden image, driver needs to prevent multiple | 1410 | channel initializes golden image, driver needs to prevent multiple |
1379 | channels from initializing golden ctx at the same time */ | 1411 | channels from initializing golden ctx at the same time */ |
@@ -1565,7 +1597,7 @@ restore_fe_go_idle: | |||
1565 | 1597 | ||
1566 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); | 1598 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); |
1567 | 1599 | ||
1568 | err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1600 | err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1569 | if (err) | 1601 | if (err) |
1570 | goto clean_up; | 1602 | goto clean_up; |
1571 | 1603 | ||
@@ -1614,20 +1646,25 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1614 | struct channel_gk20a *c, | 1646 | struct channel_gk20a *c, |
1615 | bool enable_smpc_ctxsw) | 1647 | bool enable_smpc_ctxsw) |
1616 | { | 1648 | { |
1617 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1649 | struct tsg_gk20a *tsg; |
1618 | struct nvgpu_mem *mem; | 1650 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
1651 | struct nvgpu_mem *mem = NULL; | ||
1619 | u32 data; | 1652 | u32 data; |
1620 | int ret; | 1653 | int ret; |
1621 | 1654 | ||
1622 | gk20a_dbg_fn(""); | 1655 | gk20a_dbg_fn(""); |
1623 | 1656 | ||
1624 | if (!ch_ctx->gr_ctx) { | 1657 | tsg = tsg_gk20a_from_ch(c); |
1658 | if (!tsg) | ||
1659 | return -EINVAL; | ||
1660 | |||
1661 | gr_ctx = &tsg->gr_ctx; | ||
1662 | mem = &gr_ctx->mem; | ||
1663 | if (!nvgpu_mem_is_valid(mem)) { | ||
1625 | nvgpu_err(g, "no graphics context allocated"); | 1664 | nvgpu_err(g, "no graphics context allocated"); |
1626 | return -EFAULT; | 1665 | return -EFAULT; |
1627 | } | 1666 | } |
1628 | 1667 | ||
1629 | mem = &ch_ctx->gr_ctx->mem; | ||
1630 | |||
1631 | ret = gk20a_disable_channel_tsg(g, c); | 1668 | ret = gk20a_disable_channel_tsg(g, c); |
1632 | if (ret) { | 1669 | if (ret) { |
1633 | nvgpu_err(g, "failed to disable channel/TSG"); | 1670 | nvgpu_err(g, "failed to disable channel/TSG"); |
@@ -1670,24 +1707,30 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1670 | struct channel_gk20a *c, | 1707 | struct channel_gk20a *c, |
1671 | bool enable_hwpm_ctxsw) | 1708 | bool enable_hwpm_ctxsw) |
1672 | { | 1709 | { |
1673 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1710 | struct tsg_gk20a *tsg; |
1674 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1711 | struct nvgpu_mem *gr_mem = NULL; |
1675 | struct nvgpu_mem *gr_mem; | 1712 | struct nvgpu_gr_ctx *gr_ctx; |
1713 | struct pm_ctx_desc *pm_ctx; | ||
1676 | u32 data; | 1714 | u32 data; |
1677 | u64 virt_addr; | 1715 | u64 virt_addr; |
1678 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 1716 | struct ctx_header_desc *ctx = &c->ctx_header; |
1679 | struct nvgpu_mem *ctxheader = &ctx->mem; | 1717 | struct nvgpu_mem *ctxheader = &ctx->mem; |
1680 | int ret; | 1718 | int ret; |
1681 | 1719 | ||
1682 | gk20a_dbg_fn(""); | 1720 | gk20a_dbg_fn(""); |
1683 | 1721 | ||
1684 | if (!ch_ctx->gr_ctx) { | 1722 | tsg = tsg_gk20a_from_ch(c); |
1723 | if (!tsg) | ||
1724 | return -EINVAL; | ||
1725 | |||
1726 | gr_ctx = &tsg->gr_ctx; | ||
1727 | pm_ctx = &gr_ctx->pm_ctx; | ||
1728 | gr_mem = &gr_ctx->mem; | ||
1729 | if (!nvgpu_mem_is_valid(gr_mem)) { | ||
1685 | nvgpu_err(g, "no graphics context allocated"); | 1730 | nvgpu_err(g, "no graphics context allocated"); |
1686 | return -EFAULT; | 1731 | return -EFAULT; |
1687 | } | 1732 | } |
1688 | 1733 | ||
1689 | gr_mem = &ch_ctx->gr_ctx->mem; | ||
1690 | |||
1691 | if (enable_hwpm_ctxsw) { | 1734 | if (enable_hwpm_ctxsw) { |
1692 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | 1735 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) |
1693 | return 0; | 1736 | return 0; |
@@ -1816,20 +1859,25 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1816 | struct channel_gk20a *c) | 1859 | struct channel_gk20a *c) |
1817 | { | 1860 | { |
1818 | struct gr_gk20a *gr = &g->gr; | 1861 | struct gr_gk20a *gr = &g->gr; |
1819 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1862 | struct tsg_gk20a *tsg; |
1863 | struct nvgpu_gr_ctx *gr_ctx; | ||
1820 | u32 virt_addr_lo; | 1864 | u32 virt_addr_lo; |
1821 | u32 virt_addr_hi; | 1865 | u32 virt_addr_hi; |
1822 | u64 virt_addr = 0; | 1866 | u64 virt_addr = 0; |
1823 | u32 v, data; | 1867 | u32 v, data; |
1824 | int ret = 0; | 1868 | int ret = 0; |
1825 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 1869 | struct nvgpu_mem *mem; |
1826 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
1827 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
1828 | 1870 | ||
1829 | gk20a_dbg_fn(""); | 1871 | gk20a_dbg_fn(""); |
1830 | 1872 | ||
1873 | tsg = tsg_gk20a_from_ch(c); | ||
1874 | if (!tsg) | ||
1875 | return -EINVAL; | ||
1876 | |||
1877 | gr_ctx = &tsg->gr_ctx; | ||
1878 | mem = &gr_ctx->mem; | ||
1831 | if (gr->ctx_vars.local_golden_image == NULL) | 1879 | if (gr->ctx_vars.local_golden_image == NULL) |
1832 | return -1; | 1880 | return -EINVAL; |
1833 | 1881 | ||
1834 | /* Channel gr_ctx buffer is gpu cacheable. | 1882 | /* Channel gr_ctx buffer is gpu cacheable. |
1835 | Flush and invalidate before cpu update. */ | 1883 | Flush and invalidate before cpu update. */ |
@@ -1838,11 +1886,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1838 | if (nvgpu_mem_begin(g, mem)) | 1886 | if (nvgpu_mem_begin(g, mem)) |
1839 | return -ENOMEM; | 1887 | return -ENOMEM; |
1840 | 1888 | ||
1841 | if (nvgpu_mem_begin(g, ctxheader)) { | ||
1842 | ret = -ENOMEM; | ||
1843 | goto clean_up_mem; | ||
1844 | } | ||
1845 | |||
1846 | nvgpu_mem_wr_n(g, mem, 0, | 1889 | nvgpu_mem_wr_n(g, mem, 0, |
1847 | gr->ctx_vars.local_golden_image, | 1890 | gr->ctx_vars.local_golden_image, |
1848 | gr->ctx_vars.golden_image_size); | 1891 | gr->ctx_vars.golden_image_size); |
@@ -1855,9 +1898,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1855 | 1898 | ||
1856 | /* set priv access map */ | 1899 | /* set priv access map */ |
1857 | virt_addr_lo = | 1900 | virt_addr_lo = |
1858 | u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1901 | u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
1859 | virt_addr_hi = | 1902 | virt_addr_hi = |
1860 | u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1903 | u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
1861 | 1904 | ||
1862 | if (g->allow_all) | 1905 | if (g->allow_all) |
1863 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); | 1906 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); |
@@ -1867,21 +1910,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1867 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), | 1910 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), |
1868 | data); | 1911 | data); |
1869 | 1912 | ||
1870 | if (ctxheader->gpu_va) { | 1913 | nvgpu_mem_wr(g, mem, |
1871 | nvgpu_mem_wr(g, ctxheader, | 1914 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), |
1872 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), | 1915 | virt_addr_lo); |
1873 | virt_addr_lo); | 1916 | nvgpu_mem_wr(g, mem, |
1874 | nvgpu_mem_wr(g, ctxheader, | 1917 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), |
1875 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), | 1918 | virt_addr_hi); |
1876 | virt_addr_hi); | 1919 | |
1877 | } else { | ||
1878 | nvgpu_mem_wr(g, mem, | ||
1879 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), | ||
1880 | virt_addr_lo); | ||
1881 | nvgpu_mem_wr(g, mem, | ||
1882 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), | ||
1883 | virt_addr_hi); | ||
1884 | } | ||
1885 | /* disable verif features */ | 1920 | /* disable verif features */ |
1886 | v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); | 1921 | v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); |
1887 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); | 1922 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); |
@@ -1889,65 +1924,50 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1889 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); | 1924 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); |
1890 | 1925 | ||
1891 | if (g->ops.gr.update_ctxsw_preemption_mode) | 1926 | if (g->ops.gr.update_ctxsw_preemption_mode) |
1892 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); | 1927 | g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); |
1893 | 1928 | ||
1894 | if (g->ops.gr.update_boosted_ctx) | 1929 | if (g->ops.gr.update_boosted_ctx) |
1895 | g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); | 1930 | g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); |
1896 | 1931 | ||
1897 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 1932 | virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); |
1898 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 1933 | virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); |
1899 | 1934 | ||
1900 | nvgpu_log(g, gpu_dbg_info, "write patch count = %d", | 1935 | nvgpu_log(g, gpu_dbg_info, "write patch count = %d", |
1901 | ch_ctx->patch_ctx.data_count); | 1936 | gr_ctx->patch_ctx.data_count); |
1902 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), | 1937 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), |
1903 | ch_ctx->patch_ctx.data_count); | 1938 | gr_ctx->patch_ctx.data_count); |
1904 | 1939 | ||
1905 | if (ctxheader->gpu_va) { | 1940 | nvgpu_mem_wr(g, mem, |
1906 | nvgpu_mem_wr(g, ctxheader, | 1941 | ctxsw_prog_main_image_patch_adr_lo_o(), |
1907 | ctxsw_prog_main_image_patch_adr_lo_o(), | 1942 | virt_addr_lo); |
1908 | virt_addr_lo); | 1943 | nvgpu_mem_wr(g, mem, |
1909 | nvgpu_mem_wr(g, ctxheader, | 1944 | ctxsw_prog_main_image_patch_adr_hi_o(), |
1910 | ctxsw_prog_main_image_patch_adr_hi_o(), | 1945 | virt_addr_hi); |
1911 | virt_addr_hi); | ||
1912 | } else { | ||
1913 | nvgpu_mem_wr(g, mem, | ||
1914 | ctxsw_prog_main_image_patch_adr_lo_o(), | ||
1915 | virt_addr_lo); | ||
1916 | nvgpu_mem_wr(g, mem, | ||
1917 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
1918 | virt_addr_hi); | ||
1919 | } | ||
1920 | 1946 | ||
1921 | /* Update main header region of the context buffer with the info needed | 1947 | /* Update main header region of the context buffer with the info needed |
1922 | * for PM context switching, including mode and possibly a pointer to | 1948 | * for PM context switching, including mode and possibly a pointer to |
1923 | * the PM backing store. | 1949 | * the PM backing store. |
1924 | */ | 1950 | */ |
1925 | if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { | 1951 | if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { |
1926 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { | 1952 | if (gr_ctx->pm_ctx.mem.gpu_va == 0) { |
1927 | nvgpu_err(g, | 1953 | nvgpu_err(g, |
1928 | "context switched pm with no pm buffer!"); | 1954 | "context switched pm with no pm buffer!"); |
1929 | nvgpu_mem_end(g, mem); | 1955 | nvgpu_mem_end(g, mem); |
1930 | return -EFAULT; | 1956 | return -EFAULT; |
1931 | } | 1957 | } |
1932 | 1958 | ||
1933 | virt_addr = ch_ctx->pm_ctx.mem.gpu_va; | 1959 | virt_addr = gr_ctx->pm_ctx.mem.gpu_va; |
1934 | } else | 1960 | } else |
1935 | virt_addr = 0; | 1961 | virt_addr = 0; |
1936 | 1962 | ||
1937 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); | 1963 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); |
1938 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1964 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1939 | data |= ch_ctx->pm_ctx.pm_mode; | 1965 | data |= gr_ctx->pm_ctx.pm_mode; |
1940 | 1966 | ||
1941 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); | 1967 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); |
1942 | 1968 | ||
1943 | if (ctxheader->gpu_va) | 1969 | g->ops.gr.write_pm_ptr(g, mem, virt_addr); |
1944 | g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); | ||
1945 | else | ||
1946 | g->ops.gr.write_pm_ptr(g, mem, virt_addr); | ||
1947 | |||
1948 | 1970 | ||
1949 | nvgpu_mem_end(g, ctxheader); | ||
1950 | clean_up_mem: | ||
1951 | nvgpu_mem_end(g, mem); | 1971 | nvgpu_mem_end(g, mem); |
1952 | 1972 | ||
1953 | return ret; | 1973 | return ret; |
@@ -2568,13 +2588,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2568 | return -ENOMEM; | 2588 | return -ENOMEM; |
2569 | } | 2589 | } |
2570 | 2590 | ||
2571 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | 2591 | static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, |
2592 | struct vm_gk20a *vm, | ||
2593 | struct nvgpu_gr_ctx *gr_ctx) | ||
2572 | { | 2594 | { |
2573 | struct vm_gk20a *ch_vm = c->vm; | 2595 | u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va; |
2574 | struct gr_gk20a *gr = &c->g->gr; | 2596 | u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size; |
2575 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 2597 | int *g_bfr_index = gr_ctx->global_ctx_buffer_index; |
2576 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2577 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | ||
2578 | u32 i; | 2598 | u32 i; |
2579 | 2599 | ||
2580 | gk20a_dbg_fn(""); | 2600 | gk20a_dbg_fn(""); |
@@ -2588,32 +2608,41 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | |||
2588 | * the correct struct nvgpu_mem to use. Handles the VPR | 2608 | * the correct struct nvgpu_mem to use. Handles the VPR |
2589 | * vs non-VPR difference in context images. | 2609 | * vs non-VPR difference in context images. |
2590 | */ | 2610 | */ |
2591 | mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; | 2611 | mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem; |
2592 | 2612 | ||
2593 | nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); | 2613 | nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]); |
2594 | } | 2614 | } |
2595 | } | 2615 | } |
2596 | 2616 | ||
2597 | memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); | 2617 | memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va)); |
2598 | memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); | 2618 | memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size)); |
2599 | memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); | 2619 | memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index)); |
2600 | 2620 | ||
2601 | c->ch_ctx.global_ctx_buffer_mapped = false; | 2621 | gr_ctx->global_ctx_buffer_mapped = false; |
2602 | } | 2622 | } |
2603 | 2623 | ||
2604 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | 2624 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, |
2605 | struct channel_gk20a *c) | 2625 | struct channel_gk20a *c) |
2606 | { | 2626 | { |
2627 | struct tsg_gk20a *tsg; | ||
2607 | struct vm_gk20a *ch_vm = c->vm; | 2628 | struct vm_gk20a *ch_vm = c->vm; |
2608 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 2629 | u64 *g_bfr_va; |
2609 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 2630 | u64 *g_bfr_size; |
2610 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | 2631 | int *g_bfr_index; |
2611 | struct gr_gk20a *gr = &g->gr; | 2632 | struct gr_gk20a *gr = &g->gr; |
2612 | struct nvgpu_mem *mem; | 2633 | struct nvgpu_mem *mem; |
2613 | u64 gpu_va; | 2634 | u64 gpu_va; |
2614 | 2635 | ||
2615 | gk20a_dbg_fn(""); | 2636 | gk20a_dbg_fn(""); |
2616 | 2637 | ||
2638 | tsg = tsg_gk20a_from_ch(c); | ||
2639 | if (!tsg) | ||
2640 | return -EINVAL; | ||
2641 | |||
2642 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | ||
2643 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | ||
2644 | g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index; | ||
2645 | |||
2617 | /* Circular Buffer */ | 2646 | /* Circular Buffer */ |
2618 | if (c->vpr && | 2647 | if (c->vpr && |
2619 | nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { | 2648 | nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { |
@@ -2688,21 +2717,20 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2688 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; | 2717 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; |
2689 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; | 2718 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; |
2690 | 2719 | ||
2691 | c->ch_ctx.global_ctx_buffer_mapped = true; | 2720 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
2692 | return 0; | 2721 | return 0; |
2693 | 2722 | ||
2694 | clean_up: | 2723 | clean_up: |
2695 | gr_gk20a_unmap_global_ctx_buffers(c); | 2724 | gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx); |
2696 | 2725 | ||
2697 | return -ENOMEM; | 2726 | return -ENOMEM; |
2698 | } | 2727 | } |
2699 | 2728 | ||
2700 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 2729 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
2701 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 2730 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
2702 | u32 class, | 2731 | u32 class, |
2703 | u32 padding) | 2732 | u32 padding) |
2704 | { | 2733 | { |
2705 | struct gr_ctx_desc *gr_ctx = NULL; | ||
2706 | struct gr_gk20a *gr = &g->gr; | 2734 | struct gr_gk20a *gr = &g->gr; |
2707 | int err = 0; | 2735 | int err = 0; |
2708 | 2736 | ||
@@ -2715,15 +2743,11 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2715 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 2743 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
2716 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 2744 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
2717 | 2745 | ||
2718 | gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); | ||
2719 | if (!gr_ctx) | ||
2720 | return -ENOMEM; | ||
2721 | |||
2722 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, | 2746 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, |
2723 | gr->ctx_vars.buffer_total_size, | 2747 | gr->ctx_vars.buffer_total_size, |
2724 | &gr_ctx->mem); | 2748 | &gr_ctx->mem); |
2725 | if (err) | 2749 | if (err) |
2726 | goto err_free_ctx; | 2750 | return err; |
2727 | 2751 | ||
2728 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, | 2752 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, |
2729 | &gr_ctx->mem, | 2753 | &gr_ctx->mem, |
@@ -2734,15 +2758,10 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2734 | if (!gr_ctx->mem.gpu_va) | 2758 | if (!gr_ctx->mem.gpu_va) |
2735 | goto err_free_mem; | 2759 | goto err_free_mem; |
2736 | 2760 | ||
2737 | *__gr_ctx = gr_ctx; | ||
2738 | |||
2739 | return 0; | 2761 | return 0; |
2740 | 2762 | ||
2741 | err_free_mem: | 2763 | err_free_mem: |
2742 | nvgpu_dma_free(g, &gr_ctx->mem); | 2764 | nvgpu_dma_free(g, &gr_ctx->mem); |
2743 | err_free_ctx: | ||
2744 | nvgpu_kfree(g, gr_ctx); | ||
2745 | gr_ctx = NULL; | ||
2746 | 2765 | ||
2747 | return err; | 2766 | return err; |
2748 | } | 2767 | } |
@@ -2750,7 +2769,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2750 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | 2769 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, |
2751 | struct tsg_gk20a *tsg, u32 class, u32 padding) | 2770 | struct tsg_gk20a *tsg, u32 class, u32 padding) |
2752 | { | 2771 | { |
2753 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; | 2772 | struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; |
2754 | int err; | 2773 | int err; |
2755 | 2774 | ||
2756 | if (!tsg->vm) { | 2775 | if (!tsg->vm) { |
@@ -2762,57 +2781,44 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2762 | if (err) | 2781 | if (err) |
2763 | return err; | 2782 | return err; |
2764 | 2783 | ||
2765 | return 0; | 2784 | gr_ctx->tsgid = tsg->tsgid; |
2766 | } | ||
2767 | |||
2768 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | ||
2769 | struct channel_gk20a *c, | ||
2770 | u32 class, | ||
2771 | u32 padding) | ||
2772 | { | ||
2773 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; | ||
2774 | int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding); | ||
2775 | if (err) | ||
2776 | return err; | ||
2777 | 2785 | ||
2778 | return 0; | 2786 | return 0; |
2779 | } | 2787 | } |
2780 | 2788 | ||
2781 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 2789 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
2782 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) | 2790 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) |
2783 | { | 2791 | { |
2784 | gk20a_dbg_fn(""); | 2792 | gk20a_dbg_fn(""); |
2785 | 2793 | ||
2786 | if (!gr_ctx || !gr_ctx->mem.gpu_va) | 2794 | if (gr_ctx->mem.gpu_va) { |
2787 | return; | 2795 | gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx); |
2796 | gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx); | ||
2797 | gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); | ||
2788 | 2798 | ||
2789 | if (g->ops.gr.dump_ctxsw_stats && | 2799 | if (g->ops.gr.dump_ctxsw_stats && |
2790 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) | 2800 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) |
2791 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); | 2801 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); |
2792 | 2802 | ||
2793 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | 2803 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); |
2794 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | 2804 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); |
2795 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | 2805 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); |
2796 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | 2806 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); |
2797 | nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va); | 2807 | nvgpu_dma_unmap_free(vm, &gr_ctx->mem); |
2798 | nvgpu_dma_free(g, &gr_ctx->mem); | 2808 | |
2799 | nvgpu_kfree(g, gr_ctx); | 2809 | memset(gr_ctx, 0, sizeof(*gr_ctx)); |
2810 | } | ||
2800 | } | 2811 | } |
2801 | 2812 | ||
2802 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) | 2813 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) |
2803 | { | 2814 | { |
2815 | struct gk20a *g = tsg->g; | ||
2816 | |||
2804 | if (!tsg->vm) { | 2817 | if (!tsg->vm) { |
2805 | nvgpu_err(tsg->g, "No address space bound"); | 2818 | nvgpu_err(g, "No address space bound"); |
2806 | return; | 2819 | return; |
2807 | } | 2820 | } |
2808 | tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); | 2821 | tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx); |
2809 | tsg->tsg_gr_ctx = NULL; | ||
2810 | } | ||
2811 | |||
2812 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) | ||
2813 | { | ||
2814 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | ||
2815 | c->ch_ctx.gr_ctx = NULL; | ||
2816 | } | 2822 | } |
2817 | 2823 | ||
2818 | u32 gr_gk20a_get_patch_slots(struct gk20a *g) | 2824 | u32 gr_gk20a_get_patch_slots(struct gk20a *g) |
@@ -2823,13 +2829,19 @@ u32 gr_gk20a_get_patch_slots(struct gk20a *g) | |||
2823 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | 2829 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, |
2824 | struct channel_gk20a *c) | 2830 | struct channel_gk20a *c) |
2825 | { | 2831 | { |
2826 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2832 | struct tsg_gk20a *tsg; |
2833 | struct patch_desc *patch_ctx; | ||
2827 | struct vm_gk20a *ch_vm = c->vm; | 2834 | struct vm_gk20a *ch_vm = c->vm; |
2828 | u32 alloc_size; | 2835 | u32 alloc_size; |
2829 | int err = 0; | 2836 | int err = 0; |
2830 | 2837 | ||
2831 | gk20a_dbg_fn(""); | 2838 | gk20a_dbg_fn(""); |
2832 | 2839 | ||
2840 | tsg = tsg_gk20a_from_ch(c); | ||
2841 | if (!tsg) | ||
2842 | return -EINVAL; | ||
2843 | |||
2844 | patch_ctx = &tsg->gr_ctx.patch_ctx; | ||
2833 | alloc_size = g->ops.gr.get_patch_slots(g) * | 2845 | alloc_size = g->ops.gr.get_patch_slots(g) * |
2834 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; | 2846 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; |
2835 | 2847 | ||
@@ -2845,57 +2857,42 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | |||
2845 | return 0; | 2857 | return 0; |
2846 | } | 2858 | } |
2847 | 2859 | ||
2848 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) | 2860 | static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, |
2861 | struct vm_gk20a *vm, | ||
2862 | struct nvgpu_gr_ctx *gr_ctx) | ||
2849 | { | 2863 | { |
2850 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2864 | struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; |
2851 | struct gk20a *g = c->g; | ||
2852 | 2865 | ||
2853 | gk20a_dbg_fn(""); | 2866 | gk20a_dbg_fn(""); |
2854 | 2867 | ||
2855 | if (patch_ctx->mem.gpu_va) | 2868 | if (patch_ctx->mem.gpu_va) |
2856 | nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem, | 2869 | nvgpu_gmmu_unmap(vm, &patch_ctx->mem, |
2857 | patch_ctx->mem.gpu_va); | 2870 | patch_ctx->mem.gpu_va); |
2858 | 2871 | ||
2859 | nvgpu_dma_free(g, &patch_ctx->mem); | 2872 | nvgpu_dma_free(g, &patch_ctx->mem); |
2860 | patch_ctx->data_count = 0; | 2873 | patch_ctx->data_count = 0; |
2861 | } | 2874 | } |
2862 | 2875 | ||
2863 | static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) | 2876 | static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, |
2877 | struct vm_gk20a *vm, | ||
2878 | struct nvgpu_gr_ctx *gr_ctx) | ||
2864 | { | 2879 | { |
2865 | struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; | 2880 | struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; |
2866 | struct gk20a *g = c->g; | ||
2867 | 2881 | ||
2868 | gk20a_dbg_fn(""); | 2882 | gk20a_dbg_fn(""); |
2869 | 2883 | ||
2870 | if (pm_ctx->mem.gpu_va) { | 2884 | if (pm_ctx->mem.gpu_va) { |
2871 | nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); | 2885 | nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); |
2872 | 2886 | ||
2873 | nvgpu_dma_free(g, &pm_ctx->mem); | 2887 | nvgpu_dma_free(g, &pm_ctx->mem); |
2874 | } | 2888 | } |
2875 | } | 2889 | } |
2876 | 2890 | ||
2877 | void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) | ||
2878 | { | ||
2879 | if(c->g->ops.fifo.free_channel_ctx_header) | ||
2880 | c->g->ops.fifo.free_channel_ctx_header(c); | ||
2881 | gr_gk20a_unmap_global_ctx_buffers(c); | ||
2882 | gr_gk20a_free_channel_patch_ctx(c); | ||
2883 | gr_gk20a_free_channel_pm_ctx(c); | ||
2884 | if (!is_tsg) | ||
2885 | gr_gk20a_free_channel_gr_ctx(c); | ||
2886 | |||
2887 | /* zcull_ctx */ | ||
2888 | |||
2889 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | ||
2890 | |||
2891 | c->first_init = false; | ||
2892 | } | ||
2893 | |||
2894 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | 2891 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) |
2895 | { | 2892 | { |
2896 | struct gk20a *g = c->g; | 2893 | struct gk20a *g = c->g; |
2897 | struct fifo_gk20a *f = &g->fifo; | 2894 | struct fifo_gk20a *f = &g->fifo; |
2898 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 2895 | struct nvgpu_gr_ctx *gr_ctx; |
2899 | struct tsg_gk20a *tsg = NULL; | 2896 | struct tsg_gk20a *tsg = NULL; |
2900 | int err = 0; | 2897 | int err = 0; |
2901 | 2898 | ||
@@ -2917,92 +2914,64 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
2917 | } | 2914 | } |
2918 | c->obj_class = class_num; | 2915 | c->obj_class = class_num; |
2919 | 2916 | ||
2920 | if (gk20a_is_channel_marked_as_tsg(c)) | 2917 | if (!gk20a_is_channel_marked_as_tsg(c)) |
2921 | tsg = &f->tsg[c->tsgid]; | 2918 | return -EINVAL; |
2922 | 2919 | ||
2923 | /* allocate gr ctx buffer */ | 2920 | tsg = &f->tsg[c->tsgid]; |
2924 | if (!tsg) { | 2921 | gr_ctx = &tsg->gr_ctx; |
2925 | if (!ch_ctx->gr_ctx) { | 2922 | |
2926 | err = gr_gk20a_alloc_channel_gr_ctx(g, c, | 2923 | if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { |
2927 | class_num, | 2924 | tsg->vm = c->vm; |
2928 | flags); | 2925 | nvgpu_vm_get(tsg->vm); |
2929 | if (err) { | 2926 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, |
2930 | nvgpu_err(g, | 2927 | class_num, |
2931 | "fail to allocate gr ctx buffer"); | 2928 | flags); |
2932 | goto out; | 2929 | if (err) { |
2933 | } | ||
2934 | } else { | ||
2935 | /*TBD: needs to be more subtle about which is | ||
2936 | * being allocated as some are allowed to be | ||
2937 | * allocated along same channel */ | ||
2938 | nvgpu_err(g, | 2930 | nvgpu_err(g, |
2939 | "too many classes alloc'd on same channel"); | 2931 | "fail to allocate TSG gr ctx buffer"); |
2940 | err = -EINVAL; | 2932 | nvgpu_vm_put(tsg->vm); |
2933 | tsg->vm = NULL; | ||
2941 | goto out; | 2934 | goto out; |
2942 | } | 2935 | } |
2943 | } else { | 2936 | |
2944 | if (!tsg->tsg_gr_ctx) { | 2937 | /* allocate patch buffer */ |
2945 | tsg->vm = c->vm; | 2938 | if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) { |
2946 | nvgpu_vm_get(tsg->vm); | 2939 | gr_ctx->patch_ctx.data_count = 0; |
2947 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, | 2940 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); |
2948 | class_num, | ||
2949 | flags); | ||
2950 | if (err) { | 2941 | if (err) { |
2951 | nvgpu_err(g, | 2942 | nvgpu_err(g, |
2952 | "fail to allocate TSG gr ctx buffer"); | 2943 | "fail to allocate patch buffer"); |
2953 | nvgpu_vm_put(tsg->vm); | ||
2954 | tsg->vm = NULL; | ||
2955 | goto out; | 2944 | goto out; |
2956 | } | 2945 | } |
2957 | } | 2946 | } |
2958 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | ||
2959 | } | ||
2960 | |||
2961 | /* PM ctxt switch is off by default */ | ||
2962 | ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
2963 | 2947 | ||
2964 | /* commit gr ctx buffer */ | 2948 | /* map global buffer to channel gpu_va and commit */ |
2965 | err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 2949 | err = gr_gk20a_map_global_ctx_buffers(g, c); |
2966 | if (err) { | ||
2967 | nvgpu_err(g, | ||
2968 | "fail to commit gr ctx buffer"); | ||
2969 | goto out; | ||
2970 | } | ||
2971 | |||
2972 | /* allocate patch buffer */ | ||
2973 | if (!nvgpu_mem_is_valid(&ch_ctx->patch_ctx.mem)) { | ||
2974 | ch_ctx->patch_ctx.data_count = 0; | ||
2975 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); | ||
2976 | if (err) { | 2950 | if (err) { |
2977 | nvgpu_err(g, | 2951 | nvgpu_err(g, |
2978 | "fail to allocate patch buffer"); | 2952 | "fail to map global ctx buffer"); |
2979 | goto out; | 2953 | goto out; |
2980 | } | 2954 | } |
2981 | } | 2955 | gr_gk20a_commit_global_ctx_buffers(g, c, true); |
2982 | 2956 | ||
2983 | /* map global buffer to channel gpu_va and commit */ | 2957 | /* commit gr ctx buffer */ |
2984 | if (!ch_ctx->global_ctx_buffer_mapped) { | 2958 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); |
2985 | err = gr_gk20a_map_global_ctx_buffers(g, c); | ||
2986 | if (err) { | 2959 | if (err) { |
2987 | nvgpu_err(g, | 2960 | nvgpu_err(g, |
2988 | "fail to map global ctx buffer"); | 2961 | "fail to commit gr ctx buffer"); |
2989 | goto out; | 2962 | goto out; |
2990 | } | 2963 | } |
2991 | gr_gk20a_commit_global_ctx_buffers(g, c, true); | ||
2992 | } | ||
2993 | 2964 | ||
2994 | /* init golden image, ELPG enabled after this is done */ | 2965 | /* init golden image, ELPG enabled after this is done */ |
2995 | err = gr_gk20a_init_golden_ctx_image(g, c); | 2966 | err = gr_gk20a_init_golden_ctx_image(g, c); |
2996 | if (err) { | 2967 | if (err) { |
2997 | nvgpu_err(g, | 2968 | nvgpu_err(g, |
2998 | "fail to init golden ctx image"); | 2969 | "fail to init golden ctx image"); |
2999 | goto out; | 2970 | goto out; |
3000 | } | 2971 | } |
3001 | 2972 | ||
3002 | /* load golden image */ | 2973 | /* load golden image */ |
3003 | if (!c->first_init) { | 2974 | gr_gk20a_load_golden_ctx_image(g, c); |
3004 | err = gr_gk20a_elpg_protected_call(g, | ||
3005 | gr_gk20a_load_golden_ctx_image(g, c)); | ||
3006 | if (err) { | 2975 | if (err) { |
3007 | nvgpu_err(g, | 2976 | nvgpu_err(g, |
3008 | "fail to load golden ctx image"); | 2977 | "fail to load golden ctx image"); |
@@ -3016,11 +2985,21 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
3016 | "fail to bind channel for ctxsw trace"); | 2985 | "fail to bind channel for ctxsw trace"); |
3017 | } | 2986 | } |
3018 | #endif | 2987 | #endif |
3019 | c->first_init = true; | ||
3020 | } | ||
3021 | 2988 | ||
3022 | if (g->ops.gr.set_czf_bypass) | 2989 | if (g->ops.gr.set_czf_bypass) |
3023 | g->ops.gr.set_czf_bypass(g, c); | 2990 | g->ops.gr.set_czf_bypass(g, c); |
2991 | |||
2992 | /* PM ctxt switch is off by default */ | ||
2993 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
2994 | } else { | ||
2995 | /* commit gr ctx buffer */ | ||
2996 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); | ||
2997 | if (err) { | ||
2998 | nvgpu_err(g, | ||
2999 | "fail to commit gr ctx buffer"); | ||
3000 | goto out; | ||
3001 | } | ||
3002 | } | ||
3024 | 3003 | ||
3025 | gk20a_dbg_fn("done"); | 3004 | gk20a_dbg_fn("done"); |
3026 | return 0; | 3005 | return 0; |
@@ -3553,8 +3532,14 @@ u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) | |||
3553 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | 3532 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, |
3554 | struct channel_gk20a *c, u64 zcull_va, u32 mode) | 3533 | struct channel_gk20a *c, u64 zcull_va, u32 mode) |
3555 | { | 3534 | { |
3556 | struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; | 3535 | struct tsg_gk20a *tsg; |
3536 | struct zcull_ctx_desc *zcull_ctx; | ||
3557 | 3537 | ||
3538 | tsg = tsg_gk20a_from_ch(c); | ||
3539 | if (!tsg) | ||
3540 | return -EINVAL; | ||
3541 | |||
3542 | zcull_ctx = &tsg->gr_ctx.zcull_ctx; | ||
3558 | zcull_ctx->ctx_sw_mode = mode; | 3543 | zcull_ctx->ctx_sw_mode = mode; |
3559 | zcull_ctx->gpu_va = zcull_va; | 3544 | zcull_ctx->gpu_va = zcull_va; |
3560 | 3545 | ||
@@ -6516,7 +6501,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void) | |||
6516 | * write will actually occur. so later we should put a lazy, | 6501 | * write will actually occur. so later we should put a lazy, |
6517 | * map-and-hold system in the patch write state */ | 6502 | * map-and-hold system in the patch write state */ |
6518 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | 6503 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, |
6519 | struct channel_ctx_gk20a *ch_ctx, | 6504 | struct channel_gk20a *ch, |
6520 | u32 addr, u32 data, | 6505 | u32 addr, u32 data, |
6521 | struct nvgpu_mem *mem) | 6506 | struct nvgpu_mem *mem) |
6522 | { | 6507 | { |
@@ -6531,9 +6516,16 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6531 | u32 *ovr_perf_regs = NULL; | 6516 | u32 *ovr_perf_regs = NULL; |
6532 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 6517 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
6533 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 6518 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
6534 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | 6519 | struct tsg_gk20a *tsg; |
6520 | struct nvgpu_gr_ctx *gr_ctx; | ||
6521 | struct ctx_header_desc *ctx = &ch->ctx_header; | ||
6535 | struct nvgpu_mem *ctxheader = &ctx->mem; | 6522 | struct nvgpu_mem *ctxheader = &ctx->mem; |
6536 | 6523 | ||
6524 | tsg = tsg_gk20a_from_ch(ch); | ||
6525 | if (!tsg) | ||
6526 | return -EINVAL; | ||
6527 | |||
6528 | gr_ctx = &tsg->gr_ctx; | ||
6537 | g->ops.gr.init_ovr_sm_dsm_perf(); | 6529 | g->ops.gr.init_ovr_sm_dsm_perf(); |
6538 | g->ops.gr.init_sm_dsm_reg_info(); | 6530 | g->ops.gr.init_sm_dsm_reg_info(); |
6539 | g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); | 6531 | g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); |
@@ -6556,17 +6548,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6556 | ctxsw_prog_main_image_patch_count_o()); | 6548 | ctxsw_prog_main_image_patch_count_o()); |
6557 | 6549 | ||
6558 | if (!tmp) | 6550 | if (!tmp) |
6559 | ch_ctx->patch_ctx.data_count = 0; | 6551 | gr_ctx->patch_ctx.data_count = 0; |
6560 | 6552 | ||
6561 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 6553 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
6562 | addr, data, true); | 6554 | addr, data, true); |
6563 | 6555 | ||
6564 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 6556 | vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); |
6565 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 6557 | vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); |
6566 | 6558 | ||
6567 | nvgpu_mem_wr(g, mem, | 6559 | nvgpu_mem_wr(g, mem, |
6568 | ctxsw_prog_main_image_patch_count_o(), | 6560 | ctxsw_prog_main_image_patch_count_o(), |
6569 | ch_ctx->patch_ctx.data_count); | 6561 | gr_ctx->patch_ctx.data_count); |
6570 | if (ctxheader->gpu_va) { | 6562 | if (ctxheader->gpu_va) { |
6571 | /* | 6563 | /* |
6572 | * Main context can be gr_ctx or pm_ctx. | 6564 | * Main context can be gr_ctx or pm_ctx. |
@@ -6575,7 +6567,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6575 | * __gr_gk20a_exec_ctx_ops. Need to take | 6567 | * __gr_gk20a_exec_ctx_ops. Need to take |
6576 | * care of cpu access to ctxheader here. | 6568 | * care of cpu access to ctxheader here. |
6577 | */ | 6569 | */ |
6578 | if (nvgpu_mem_begin(g, ctxheader)) | 6570 | if (nvgpu_mem_begin(g, ctxheader)) |
6579 | return -ENOMEM; | 6571 | return -ENOMEM; |
6580 | nvgpu_mem_wr(g, ctxheader, | 6572 | nvgpu_mem_wr(g, ctxheader, |
6581 | ctxsw_prog_main_image_patch_adr_lo_o(), | 6573 | ctxsw_prog_main_image_patch_adr_lo_o(), |
@@ -7690,7 +7682,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7690 | bool ch_is_curr_ctx) | 7682 | bool ch_is_curr_ctx) |
7691 | { | 7683 | { |
7692 | struct gk20a *g = ch->g; | 7684 | struct gk20a *g = ch->g; |
7693 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 7685 | struct tsg_gk20a *tsg; |
7686 | struct nvgpu_gr_ctx *gr_ctx; | ||
7694 | bool gr_ctx_ready = false; | 7687 | bool gr_ctx_ready = false; |
7695 | bool pm_ctx_ready = false; | 7688 | bool pm_ctx_ready = false; |
7696 | struct nvgpu_mem *current_mem = NULL; | 7689 | struct nvgpu_mem *current_mem = NULL; |
@@ -7707,6 +7700,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7707 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", | 7700 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", |
7708 | num_ctx_wr_ops, num_ctx_rd_ops); | 7701 | num_ctx_wr_ops, num_ctx_rd_ops); |
7709 | 7702 | ||
7703 | tsg = tsg_gk20a_from_ch(ch); | ||
7704 | if (!tsg) | ||
7705 | return -EINVAL; | ||
7706 | |||
7707 | gr_ctx = &tsg->gr_ctx; | ||
7708 | |||
7710 | if (ch_is_curr_ctx) { | 7709 | if (ch_is_curr_ctx) { |
7711 | for (pass = 0; pass < 2; pass++) { | 7710 | for (pass = 0; pass < 2; pass++) { |
7712 | ctx_op_nr = 0; | 7711 | ctx_op_nr = 0; |
@@ -7778,7 +7777,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7778 | } | 7777 | } |
7779 | offset_addrs = offsets + max_offsets; | 7778 | offset_addrs = offsets + max_offsets; |
7780 | 7779 | ||
7781 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | 7780 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
7782 | if (err) | 7781 | if (err) |
7783 | goto cleanup; | 7782 | goto cleanup; |
7784 | 7783 | ||
@@ -7812,13 +7811,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7812 | * gr_gk20a_apply_instmem_overrides, | 7811 | * gr_gk20a_apply_instmem_overrides, |
7813 | * recoded in-place instead. | 7812 | * recoded in-place instead. |
7814 | */ | 7813 | */ |
7815 | if (nvgpu_mem_begin(g, &ch_ctx->gr_ctx->mem)) { | 7814 | if (nvgpu_mem_begin(g, &gr_ctx->mem)) { |
7816 | err = -ENOMEM; | 7815 | err = -ENOMEM; |
7817 | goto cleanup; | 7816 | goto cleanup; |
7818 | } | 7817 | } |
7819 | gr_ctx_ready = true; | 7818 | gr_ctx_ready = true; |
7820 | } | 7819 | } |
7821 | current_mem = &ch_ctx->gr_ctx->mem; | 7820 | current_mem = &gr_ctx->mem; |
7822 | } else { | 7821 | } else { |
7823 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 7822 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
7824 | ctx_ops[i].offset, | 7823 | ctx_ops[i].offset, |
@@ -7835,19 +7834,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7835 | } | 7834 | } |
7836 | if (!pm_ctx_ready) { | 7835 | if (!pm_ctx_ready) { |
7837 | /* Make sure ctx buffer was initialized */ | 7836 | /* Make sure ctx buffer was initialized */ |
7838 | if (!nvgpu_mem_is_valid(&ch_ctx->pm_ctx.mem)) { | 7837 | if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) { |
7839 | nvgpu_err(g, | 7838 | nvgpu_err(g, |
7840 | "Invalid ctx buffer"); | 7839 | "Invalid ctx buffer"); |
7841 | err = -EINVAL; | 7840 | err = -EINVAL; |
7842 | goto cleanup; | 7841 | goto cleanup; |
7843 | } | 7842 | } |
7844 | if (nvgpu_mem_begin(g, &ch_ctx->pm_ctx.mem)) { | 7843 | if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) { |
7845 | err = -ENOMEM; | 7844 | err = -ENOMEM; |
7846 | goto cleanup; | 7845 | goto cleanup; |
7847 | } | 7846 | } |
7848 | pm_ctx_ready = true; | 7847 | pm_ctx_ready = true; |
7849 | } | 7848 | } |
7850 | current_mem = &ch_ctx->pm_ctx.mem; | 7849 | current_mem = &gr_ctx->pm_ctx.mem; |
7851 | } | 7850 | } |
7852 | 7851 | ||
7853 | /* if this is a quad access, setup for special access*/ | 7852 | /* if this is a quad access, setup for special access*/ |
@@ -7860,7 +7859,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7860 | /* sanity check gr ctxt offsets, | 7859 | /* sanity check gr ctxt offsets, |
7861 | * don't write outside, worst case | 7860 | * don't write outside, worst case |
7862 | */ | 7861 | */ |
7863 | if ((current_mem == &ch_ctx->gr_ctx->mem) && | 7862 | if ((current_mem == &gr_ctx->mem) && |
7864 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | 7863 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) |
7865 | continue; | 7864 | continue; |
7866 | if (pass == 0) { /* write pass */ | 7865 | if (pass == 0) { /* write pass */ |
@@ -7886,7 +7885,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7886 | 7885 | ||
7887 | /* check to see if we need to add a special WAR | 7886 | /* check to see if we need to add a special WAR |
7888 | for some of the SMPC perf regs */ | 7887 | for some of the SMPC perf regs */ |
7889 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | 7888 | gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j], |
7890 | v, current_mem); | 7889 | v, current_mem); |
7891 | 7890 | ||
7892 | } else { /* read pass */ | 7891 | } else { /* read pass */ |
@@ -7915,12 +7914,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7915 | if (offsets) | 7914 | if (offsets) |
7916 | nvgpu_kfree(g, offsets); | 7915 | nvgpu_kfree(g, offsets); |
7917 | 7916 | ||
7918 | if (ch_ctx->patch_ctx.mem.cpu_va) | 7917 | if (gr_ctx->patch_ctx.mem.cpu_va) |
7919 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); | 7918 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); |
7920 | if (gr_ctx_ready) | 7919 | if (gr_ctx_ready) |
7921 | nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); | 7920 | nvgpu_mem_end(g, &gr_ctx->mem); |
7922 | if (pm_ctx_ready) | 7921 | if (pm_ctx_ready) |
7923 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); | 7922 | nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem); |
7924 | 7923 | ||
7925 | return err; | 7924 | return err; |
7926 | } | 7925 | } |
@@ -7962,23 +7961,23 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7962 | } | 7961 | } |
7963 | 7962 | ||
7964 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 7963 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
7965 | struct channel_ctx_gk20a *ch_ctx, | 7964 | struct nvgpu_gr_ctx *gr_ctx, |
7966 | u64 addr, u32 size, bool patch) | 7965 | u64 addr, u32 size, bool patch) |
7967 | { | 7966 | { |
7968 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), | 7967 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), |
7969 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); | 7968 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); |
7970 | 7969 | ||
7971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), | 7970 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), |
7972 | gr_scc_pagepool_total_pages_f(size) | | 7971 | gr_scc_pagepool_total_pages_f(size) | |
7973 | gr_scc_pagepool_valid_true_f(), patch); | 7972 | gr_scc_pagepool_valid_true_f(), patch); |
7974 | 7973 | ||
7975 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), | 7974 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), |
7976 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); | 7975 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); |
7977 | 7976 | ||
7978 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), | 7977 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), |
7979 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); | 7978 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); |
7980 | 7979 | ||
7981 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), | 7980 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(), |
7982 | gr_pd_pagepool_total_pages_f(size) | | 7981 | gr_pd_pagepool_total_pages_f(size) | |
7983 | gr_pd_pagepool_valid_true_f(), patch); | 7982 | gr_pd_pagepool_valid_true_f(), patch); |
7984 | } | 7983 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 1c22923b..6cc15c94 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -28,7 +28,6 @@ | |||
28 | #include "gr_t19x.h" | 28 | #include "gr_t19x.h" |
29 | #endif | 29 | #endif |
30 | 30 | ||
31 | #include "tsg_gk20a.h" | ||
32 | #include "gr_ctx_gk20a.h" | 31 | #include "gr_ctx_gk20a.h" |
33 | #include "mm_gk20a.h" | 32 | #include "mm_gk20a.h" |
34 | 33 | ||
@@ -48,6 +47,10 @@ | |||
48 | 47 | ||
49 | #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ | 48 | #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ |
50 | 49 | ||
50 | /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ | ||
51 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) | ||
52 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) | ||
53 | |||
51 | /* | 54 | /* |
52 | * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries | 55 | * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries |
53 | * of address and data pairs | 56 | * of address and data pairs |
@@ -64,6 +67,7 @@ | |||
64 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) | 67 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) |
65 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) | 68 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) |
66 | 69 | ||
70 | struct tsg_gk20a; | ||
67 | struct channel_gk20a; | 71 | struct channel_gk20a; |
68 | struct nvgpu_warpstate; | 72 | struct nvgpu_warpstate; |
69 | 73 | ||
@@ -433,7 +437,12 @@ struct gr_gk20a { | |||
433 | 437 | ||
434 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | 438 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); |
435 | 439 | ||
436 | struct gr_ctx_desc { | 440 | struct ctx_header_desc { |
441 | struct nvgpu_mem mem; | ||
442 | }; | ||
443 | |||
444 | /* contexts associated with a TSG */ | ||
445 | struct nvgpu_gr_ctx { | ||
437 | struct nvgpu_mem mem; | 446 | struct nvgpu_mem mem; |
438 | 447 | ||
439 | u32 graphics_preempt_mode; | 448 | u32 graphics_preempt_mode; |
@@ -452,10 +461,16 @@ struct gr_ctx_desc { | |||
452 | u64 virt_ctx; | 461 | u64 virt_ctx; |
453 | #endif | 462 | #endif |
454 | bool golden_img_loaded; | 463 | bool golden_img_loaded; |
455 | }; | ||
456 | 464 | ||
457 | struct ctx_header_desc { | 465 | struct patch_desc patch_ctx; |
458 | struct nvgpu_mem mem; | 466 | struct zcull_ctx_desc zcull_ctx; |
467 | struct pm_ctx_desc pm_ctx; | ||
468 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
469 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
470 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
471 | bool global_ctx_buffer_mapped; | ||
472 | |||
473 | u32 tsgid; | ||
459 | }; | 474 | }; |
460 | 475 | ||
461 | struct gk20a_ctxsw_ucode_segment { | 476 | struct gk20a_ctxsw_ucode_segment { |
@@ -552,7 +567,6 @@ int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); | |||
552 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); | 567 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); |
553 | 568 | ||
554 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); | 569 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); |
555 | void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); | ||
556 | 570 | ||
557 | int gk20a_gr_isr(struct gk20a *g); | 571 | int gk20a_gr_isr(struct gk20a *g); |
558 | int gk20a_gr_nonstall_isr(struct gk20a *g); | 572 | int gk20a_gr_nonstall_isr(struct gk20a *g); |
@@ -633,17 +647,17 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
633 | struct channel_gk20a *c, | 647 | struct channel_gk20a *c, |
634 | bool enable_hwpm_ctxsw); | 648 | bool enable_hwpm_ctxsw); |
635 | 649 | ||
636 | struct channel_ctx_gk20a; | 650 | struct nvgpu_gr_ctx; |
637 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, | 651 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, |
638 | u32 addr, u32 data, bool patch); | 652 | u32 addr, u32 data, bool patch); |
639 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 653 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
640 | struct channel_ctx_gk20a *ch_ctx, | 654 | struct nvgpu_gr_ctx *ch_ctx, |
641 | bool update_patch_count); | 655 | bool update_patch_count); |
642 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 656 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
643 | struct channel_ctx_gk20a *ch_ctx, | 657 | struct nvgpu_gr_ctx *ch_ctx, |
644 | bool update_patch_count); | 658 | bool update_patch_count); |
645 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 659 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
646 | struct channel_ctx_gk20a *ch_ctx, | 660 | struct nvgpu_gr_ctx *ch_ctx, |
647 | u64 addr, u32 size, bool patch); | 661 | u64 addr, u32 size, bool patch); |
648 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 662 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
649 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 663 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
@@ -694,10 +708,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
694 | int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, | 708 | int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, |
695 | struct fecs_method_op_gk20a op); | 709 | struct fecs_method_op_gk20a op); |
696 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 710 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
697 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 711 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
698 | u32 class, u32 padding); | 712 | u32 class, u32 padding); |
699 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 713 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
700 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); | 714 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); |
701 | int gr_gk20a_halt_pipe(struct gk20a *g); | 715 | int gr_gk20a_halt_pipe(struct gk20a *g); |
702 | 716 | ||
703 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 717 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index d9ddc011..19d0ecce 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | |||
@@ -280,7 +280,6 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g) | |||
280 | tsg->num_active_channels = 0; | 280 | tsg->num_active_channels = 0; |
281 | nvgpu_ref_init(&tsg->refcount); | 281 | nvgpu_ref_init(&tsg->refcount); |
282 | 282 | ||
283 | tsg->tsg_gr_ctx = NULL; | ||
284 | tsg->vm = NULL; | 283 | tsg->vm = NULL; |
285 | tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | 284 | tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; |
286 | tsg->timeslice_us = 0; | 285 | tsg->timeslice_us = 0; |
@@ -319,10 +318,8 @@ void gk20a_tsg_release(struct nvgpu_ref *ref) | |||
319 | if (g->ops.fifo.tsg_release) | 318 | if (g->ops.fifo.tsg_release) |
320 | g->ops.fifo.tsg_release(tsg); | 319 | g->ops.fifo.tsg_release(tsg); |
321 | 320 | ||
322 | if (tsg->tsg_gr_ctx) { | 321 | if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem)) |
323 | gr_gk20a_free_tsg_gr_ctx(tsg); | 322 | gr_gk20a_free_tsg_gr_ctx(tsg); |
324 | tsg->tsg_gr_ctx = NULL; | ||
325 | } | ||
326 | 323 | ||
327 | if (g->ops.fifo.deinit_eng_method_buffers) | 324 | if (g->ops.fifo.deinit_eng_method_buffers) |
328 | g->ops.fifo.deinit_eng_method_buffers(g, tsg); | 325 | g->ops.fifo.deinit_eng_method_buffers(g, tsg); |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 08fe0365..2168cb4f 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <nvgpu/kref.h> | 26 | #include <nvgpu/kref.h> |
27 | #include <nvgpu/rwsem.h> | 27 | #include <nvgpu/rwsem.h> |
28 | 28 | ||
29 | #include "gr_gk20a.h" | ||
30 | |||
29 | #ifdef CONFIG_TEGRA_19x_GPU | 31 | #ifdef CONFIG_TEGRA_19x_GPU |
30 | #include "tsg_t19x.h" | 32 | #include "tsg_t19x.h" |
31 | #endif | 33 | #endif |
@@ -56,8 +58,6 @@ struct tsg_gk20a { | |||
56 | unsigned int timeslice_timeout; | 58 | unsigned int timeslice_timeout; |
57 | unsigned int timeslice_scale; | 59 | unsigned int timeslice_scale; |
58 | 60 | ||
59 | struct gr_ctx_desc *tsg_gr_ctx; | ||
60 | |||
61 | struct vm_gk20a *vm; | 61 | struct vm_gk20a *vm; |
62 | 62 | ||
63 | u32 interleave_level; | 63 | u32 interleave_level; |
@@ -71,6 +71,8 @@ struct tsg_gk20a { | |||
71 | #ifdef CONFIG_TEGRA_19x_GPU | 71 | #ifdef CONFIG_TEGRA_19x_GPU |
72 | struct tsg_t19x t19x; | 72 | struct tsg_t19x t19x; |
73 | #endif | 73 | #endif |
74 | |||
75 | struct nvgpu_gr_ctx gr_ctx; | ||
74 | }; | 76 | }; |
75 | 77 | ||
76 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); | 78 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 36fad8b3..a2434320 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -124,7 +124,7 @@ int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g) | |||
124 | } | 124 | } |
125 | 125 | ||
126 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | 126 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, |
127 | struct channel_ctx_gk20a *ch_ctx, | 127 | struct nvgpu_gr_ctx *ch_ctx, |
128 | u64 addr, bool patch) | 128 | u64 addr, bool patch) |
129 | { | 129 | { |
130 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), | 130 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), |
@@ -141,7 +141,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | 143 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, |
144 | struct channel_ctx_gk20a *ch_ctx, | 144 | struct nvgpu_gr_ctx *ch_ctx, |
145 | u64 addr, u64 size, bool patch) | 145 | u64 addr, u64 size, bool patch) |
146 | { | 146 | { |
147 | u32 data; | 147 | u32 data; |
@@ -180,7 +180,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
180 | struct channel_gk20a *c, bool patch) | 180 | struct channel_gk20a *c, bool patch) |
181 | { | 181 | { |
182 | struct gr_gk20a *gr = &g->gr; | 182 | struct gr_gk20a *gr = &g->gr; |
183 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 183 | struct tsg_gk20a *tsg; |
184 | struct nvgpu_gr_ctx *ch_ctx; | ||
184 | u32 attrib_offset_in_chunk = 0; | 185 | u32 attrib_offset_in_chunk = 0; |
185 | u32 alpha_offset_in_chunk = 0; | 186 | u32 alpha_offset_in_chunk = 0; |
186 | u32 pd_ab_max_output; | 187 | u32 pd_ab_max_output; |
@@ -193,6 +194,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
193 | 194 | ||
194 | gk20a_dbg_fn(""); | 195 | gk20a_dbg_fn(""); |
195 | 196 | ||
197 | tsg = tsg_gk20a_from_ch(c); | ||
198 | if (!tsg) | ||
199 | return -EINVAL; | ||
200 | |||
201 | ch_ctx = &tsg->gr_ctx; | ||
202 | |||
196 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), | 203 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), |
197 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | | 204 | gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | |
198 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), | 205 | gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), |
@@ -257,7 +264,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | |||
257 | } | 264 | } |
258 | 265 | ||
259 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, | 266 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, |
260 | struct channel_ctx_gk20a *ch_ctx, | 267 | struct nvgpu_gr_ctx *ch_ctx, |
261 | u64 addr, u32 size, bool patch) | 268 | u64 addr, u32 size, bool patch) |
262 | { | 269 | { |
263 | gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); | 270 | gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); |
@@ -845,7 +852,7 @@ u32 gr_gm20b_pagepool_default_size(struct gk20a *g) | |||
845 | } | 852 | } |
846 | 853 | ||
847 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | 854 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, |
848 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | 855 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
849 | u32 class, | 856 | u32 class, |
850 | u32 flags) | 857 | u32 flags) |
851 | { | 858 | { |
@@ -858,7 +865,7 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | |||
858 | return err; | 865 | return err; |
859 | 866 | ||
860 | if (class == MAXWELL_COMPUTE_B) | 867 | if (class == MAXWELL_COMPUTE_B) |
861 | (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; | 868 | gr_ctx->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; |
862 | 869 | ||
863 | gk20a_dbg_fn("done"); | 870 | gk20a_dbg_fn("done"); |
864 | 871 | ||
@@ -866,15 +873,21 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | |||
866 | } | 873 | } |
867 | 874 | ||
868 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | 875 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, |
869 | struct channel_ctx_gk20a *ch_ctx, | 876 | struct channel_gk20a *c, |
870 | struct nvgpu_mem *mem) | 877 | struct nvgpu_mem *mem) |
871 | { | 878 | { |
872 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 879 | struct tsg_gk20a *tsg; |
880 | struct nvgpu_gr_ctx *gr_ctx; | ||
873 | u32 cta_preempt_option = | 881 | u32 cta_preempt_option = |
874 | ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); | 882 | ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); |
875 | 883 | ||
876 | gk20a_dbg_fn(""); | 884 | gk20a_dbg_fn(""); |
877 | 885 | ||
886 | tsg = tsg_gk20a_from_ch(c); | ||
887 | if (!tsg) | ||
888 | return; | ||
889 | |||
890 | gr_ctx = &tsg->gr_ctx; | ||
878 | if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { | 891 | if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { |
879 | gk20a_dbg_info("CTA: %x", cta_preempt_option); | 892 | gk20a_dbg_info("CTA: %x", cta_preempt_option); |
880 | nvgpu_mem_wr(g, mem, | 893 | nvgpu_mem_wr(g, mem, |
@@ -1026,16 +1039,22 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | |||
1026 | int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, | 1039 | int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, |
1027 | bool enable) | 1040 | bool enable) |
1028 | { | 1041 | { |
1029 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1042 | struct tsg_gk20a *tsg; |
1043 | struct nvgpu_gr_ctx *gr_ctx; | ||
1030 | struct nvgpu_mem *mem; | 1044 | struct nvgpu_mem *mem; |
1031 | u32 v; | 1045 | u32 v; |
1032 | 1046 | ||
1033 | gk20a_dbg_fn(""); | 1047 | gk20a_dbg_fn(""); |
1034 | 1048 | ||
1035 | if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) | 1049 | tsg = tsg_gk20a_from_ch(c); |
1050 | if (!tsg) | ||
1051 | return -EINVAL; | ||
1052 | |||
1053 | gr_ctx = &tsg->gr_ctx; | ||
1054 | mem = &gr_ctx->mem; | ||
1055 | if (!nvgpu_mem_is_valid(mem) || c->vpr) | ||
1036 | return -EINVAL; | 1056 | return -EINVAL; |
1037 | 1057 | ||
1038 | mem = &ch_ctx->gr_ctx->mem; | ||
1039 | 1058 | ||
1040 | if (nvgpu_mem_begin(c->g, mem)) | 1059 | if (nvgpu_mem_begin(c->g, mem)) |
1041 | return -ENOMEM; | 1060 | return -ENOMEM; |
@@ -1289,12 +1308,19 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g, | |||
1289 | { | 1308 | { |
1290 | u32 gpc, tpc, offset; | 1309 | u32 gpc, tpc, offset; |
1291 | struct gr_gk20a *gr = &g->gr; | 1310 | struct gr_gk20a *gr = &g->gr; |
1292 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 1311 | struct tsg_gk20a *tsg; |
1312 | struct nvgpu_gr_ctx *ch_ctx; | ||
1293 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 1313 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
1294 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, | 1314 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, |
1295 | GPU_LIT_TPC_IN_GPC_STRIDE); | 1315 | GPU_LIT_TPC_IN_GPC_STRIDE); |
1296 | int err = 0; | 1316 | int err = 0; |
1297 | 1317 | ||
1318 | tsg = tsg_gk20a_from_ch(ch); | ||
1319 | if (!tsg) | ||
1320 | return -EINVAL; | ||
1321 | |||
1322 | ch_ctx = &tsg->gr_ctx; | ||
1323 | |||
1298 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 1324 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
1299 | 1325 | ||
1300 | gr->sm_error_states[sm_id].hww_global_esr = | 1326 | gr->sm_error_states[sm_id].hww_global_esr = |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 18e6b032..bddf6412 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h | |||
@@ -46,7 +46,7 @@ enum { | |||
46 | #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 | 46 | #define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 |
47 | 47 | ||
48 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | 48 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, |
49 | struct channel_ctx_gk20a *ch_ctx, | 49 | struct nvgpu_gr_ctx *ch_ctx, |
50 | u64 addr, bool patch); | 50 | u64 addr, bool patch); |
51 | int gr_gm20b_init_fs_state(struct gk20a *g); | 51 | int gr_gm20b_init_fs_state(struct gk20a *g); |
52 | int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); | 52 | int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); |
@@ -57,12 +57,12 @@ void gr_gm20b_bundle_cb_defaults(struct gk20a *g); | |||
57 | void gr_gm20b_cb_size_default(struct gk20a *g); | 57 | void gr_gm20b_cb_size_default(struct gk20a *g); |
58 | int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); | 58 | int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); |
59 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, | 59 | void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, |
60 | struct channel_ctx_gk20a *ch_ctx, | 60 | struct nvgpu_gr_ctx *ch_ctx, |
61 | u64 addr, u64 size, bool patch); | 61 | u64 addr, u64 size, bool patch); |
62 | int gr_gm20b_commit_global_cb_manager(struct gk20a *g, | 62 | int gr_gm20b_commit_global_cb_manager(struct gk20a *g, |
63 | struct channel_gk20a *c, bool patch); | 63 | struct channel_gk20a *c, bool patch); |
64 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, | 64 | void gr_gm20b_commit_global_pagepool(struct gk20a *g, |
65 | struct channel_ctx_gk20a *ch_ctx, | 65 | struct nvgpu_gr_ctx *ch_ctx, |
66 | u64 addr, u32 size, bool patch); | 66 | u64 addr, u32 size, bool patch); |
67 | int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, | 67 | int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, |
68 | u32 class_num, u32 offset, u32 data); | 68 | u32 class_num, u32 offset, u32 data); |
@@ -96,11 +96,11 @@ int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); | |||
96 | void gr_gm20b_detect_sm_arch(struct gk20a *g); | 96 | void gr_gm20b_detect_sm_arch(struct gk20a *g); |
97 | u32 gr_gm20b_pagepool_default_size(struct gk20a *g); | 97 | u32 gr_gm20b_pagepool_default_size(struct gk20a *g); |
98 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, | 98 | int gr_gm20b_alloc_gr_ctx(struct gk20a *g, |
99 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | 99 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
100 | u32 class, | 100 | u32 class, |
101 | u32 flags); | 101 | u32 flags); |
102 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, | 102 | void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, |
103 | struct channel_ctx_gk20a *ch_ctx, | 103 | struct channel_gk20a *c, |
104 | struct nvgpu_mem *mem); | 104 | struct nvgpu_mem *mem); |
105 | int gr_gm20b_dump_gr_status_regs(struct gk20a *g, | 105 | int gr_gm20b_dump_gr_status_regs(struct gk20a *g, |
106 | struct gk20a_debug_output *o); | 106 | struct gk20a_debug_output *o); |
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index c29f7267..3ee22ed1 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c | |||
@@ -226,7 +226,6 @@ static const struct gpu_ops gm20b_ops = { | |||
226 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 226 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
227 | .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, | 227 | .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, |
228 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | 228 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, |
229 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
230 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | 229 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, |
231 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | 230 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, |
232 | .get_zcull_info = gr_gk20a_get_zcull_info, | 231 | .get_zcull_info = gr_gk20a_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c index bedc0b78..02cecf53 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.c +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c | |||
@@ -135,7 +135,7 @@ void gr_gp106_cb_size_default(struct gk20a *g) | |||
135 | } | 135 | } |
136 | 136 | ||
137 | int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, | 137 | int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, |
138 | struct gr_ctx_desc *gr_ctx, | 138 | struct nvgpu_gr_ctx *gr_ctx, |
139 | struct vm_gk20a *vm, u32 class, | 139 | struct vm_gk20a *vm, u32 class, |
140 | u32 graphics_preempt_mode, | 140 | u32 graphics_preempt_mode, |
141 | u32 compute_preempt_mode) | 141 | u32 compute_preempt_mode) |
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h index 9f76e4ac..491ced4e 100644 --- a/drivers/gpu/nvgpu/gp106/gr_gp106.h +++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h | |||
@@ -38,7 +38,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr, | |||
38 | u32 class_num, u32 offset, u32 data); | 38 | u32 class_num, u32 offset, u32 data); |
39 | void gr_gp106_cb_size_default(struct gk20a *g); | 39 | void gr_gp106_cb_size_default(struct gk20a *g); |
40 | int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, | 40 | int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, |
41 | struct gr_ctx_desc *gr_ctx, | 41 | struct nvgpu_gr_ctx *gr_ctx, |
42 | struct vm_gk20a *vm, u32 class, | 42 | struct vm_gk20a *vm, u32 class, |
43 | u32 graphics_preempt_mode, | 43 | u32 graphics_preempt_mode, |
44 | u32 compute_preempt_mode); | 44 | u32 compute_preempt_mode); |
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index 1498d1c0..3073668e 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c | |||
@@ -272,7 +272,6 @@ static const struct gpu_ops gp106_ops = { | |||
272 | .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, | 272 | .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, |
273 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, | 273 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, |
274 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | 274 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, |
275 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
276 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | 275 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, |
277 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | 276 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, |
278 | .get_zcull_info = gr_gk20a_get_zcull_info, | 277 | .get_zcull_info = gr_gk20a_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 56acc732..549a4da4 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -389,9 +389,9 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
389 | int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | 389 | int gr_gp10b_commit_global_cb_manager(struct gk20a *g, |
390 | struct channel_gk20a *c, bool patch) | 390 | struct channel_gk20a *c, bool patch) |
391 | { | 391 | { |
392 | struct tsg_gk20a *tsg; | ||
392 | struct gr_gk20a *gr = &g->gr; | 393 | struct gr_gk20a *gr = &g->gr; |
393 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 394 | struct nvgpu_gr_ctx *gr_ctx; |
394 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
395 | u32 attrib_offset_in_chunk = 0; | 395 | u32 attrib_offset_in_chunk = 0; |
396 | u32 alpha_offset_in_chunk = 0; | 396 | u32 alpha_offset_in_chunk = 0; |
397 | u32 pd_ab_max_output; | 397 | u32 pd_ab_max_output; |
@@ -405,6 +405,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
405 | 405 | ||
406 | gk20a_dbg_fn(""); | 406 | gk20a_dbg_fn(""); |
407 | 407 | ||
408 | tsg = tsg_gk20a_from_ch(c); | ||
409 | if (!tsg) | ||
410 | return -EINVAL; | ||
411 | |||
412 | gr_ctx = &tsg->gr_ctx; | ||
413 | |||
408 | if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { | 414 | if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { |
409 | attrib_size_in_chunk = gr->attrib_cb_gfxp_size; | 415 | attrib_size_in_chunk = gr->attrib_cb_gfxp_size; |
410 | cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; | 416 | cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; |
@@ -413,9 +419,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
413 | cb_attrib_cache_size_init = gr->attrib_cb_default_size; | 419 | cb_attrib_cache_size_init = gr->attrib_cb_default_size; |
414 | } | 420 | } |
415 | 421 | ||
416 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), | 422 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(), |
417 | gr->attrib_cb_default_size, patch); | 423 | gr->attrib_cb_default_size, patch); |
418 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), | 424 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(), |
419 | gr->alpha_cb_default_size, patch); | 425 | gr->alpha_cb_default_size, patch); |
420 | 426 | ||
421 | pd_ab_max_output = (gr->alpha_cb_default_size * | 427 | pd_ab_max_output = (gr->alpha_cb_default_size * |
@@ -423,11 +429,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
423 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); | 429 | gr_pd_ab_dist_cfg1_max_output_granularity_v(); |
424 | 430 | ||
425 | if (g->gr.pd_max_batches) { | 431 | if (g->gr.pd_max_batches) { |
426 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | 432 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), |
427 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | 433 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | |
428 | gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); | 434 | gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); |
429 | } else { | 435 | } else { |
430 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), | 436 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(), |
431 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | | 437 | gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | |
432 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); | 438 | gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); |
433 | } | 439 | } |
@@ -447,17 +453,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
447 | cbm_cfg_size_steadystate = gr->attrib_cb_default_size * | 453 | cbm_cfg_size_steadystate = gr->attrib_cb_default_size * |
448 | gr->pes_tpc_count[ppc_index][gpc_index]; | 454 | gr->pes_tpc_count[ppc_index][gpc_index]; |
449 | 455 | ||
450 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 456 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
451 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + | 457 | gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + |
452 | ppc_in_gpc_stride * ppc_index, | 458 | ppc_in_gpc_stride * ppc_index, |
453 | cbm_cfg_size_beta, patch); | 459 | cbm_cfg_size_beta, patch); |
454 | 460 | ||
455 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 461 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
456 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + | 462 | gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + |
457 | ppc_in_gpc_stride * ppc_index, | 463 | ppc_in_gpc_stride * ppc_index, |
458 | attrib_offset_in_chunk, patch); | 464 | attrib_offset_in_chunk, patch); |
459 | 465 | ||
460 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 466 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
461 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + | 467 | gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + |
462 | ppc_in_gpc_stride * ppc_index, | 468 | ppc_in_gpc_stride * ppc_index, |
463 | cbm_cfg_size_steadystate, | 469 | cbm_cfg_size_steadystate, |
@@ -466,12 +472,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
466 | attrib_offset_in_chunk += attrib_size_in_chunk * | 472 | attrib_offset_in_chunk += attrib_size_in_chunk * |
467 | gr->pes_tpc_count[ppc_index][gpc_index]; | 473 | gr->pes_tpc_count[ppc_index][gpc_index]; |
468 | 474 | ||
469 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 475 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
470 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + | 476 | gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + |
471 | ppc_in_gpc_stride * ppc_index, | 477 | ppc_in_gpc_stride * ppc_index, |
472 | cbm_cfg_size_alpha, patch); | 478 | cbm_cfg_size_alpha, patch); |
473 | 479 | ||
474 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 480 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
475 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + | 481 | gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + |
476 | ppc_in_gpc_stride * ppc_index, | 482 | ppc_in_gpc_stride * ppc_index, |
477 | alpha_offset_in_chunk, patch); | 483 | alpha_offset_in_chunk, patch); |
@@ -479,7 +485,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
479 | alpha_offset_in_chunk += gr->alpha_cb_size * | 485 | alpha_offset_in_chunk += gr->alpha_cb_size * |
480 | gr->pes_tpc_count[ppc_index][gpc_index]; | 486 | gr->pes_tpc_count[ppc_index][gpc_index]; |
481 | 487 | ||
482 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 488 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
483 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), | 489 | gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), |
484 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), | 490 | gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), |
485 | patch); | 491 | patch); |
@@ -490,20 +496,20 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | |||
490 | } | 496 | } |
491 | 497 | ||
492 | void gr_gp10b_commit_global_pagepool(struct gk20a *g, | 498 | void gr_gp10b_commit_global_pagepool(struct gk20a *g, |
493 | struct channel_ctx_gk20a *ch_ctx, | 499 | struct nvgpu_gr_ctx *gr_ctx, |
494 | u64 addr, u32 size, bool patch) | 500 | u64 addr, u32 size, bool patch) |
495 | { | 501 | { |
496 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), | 502 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), |
497 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); | 503 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); |
498 | 504 | ||
499 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), | 505 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), |
500 | gr_scc_pagepool_total_pages_f(size) | | 506 | gr_scc_pagepool_total_pages_f(size) | |
501 | gr_scc_pagepool_valid_true_f(), patch); | 507 | gr_scc_pagepool_valid_true_f(), patch); |
502 | 508 | ||
503 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), | 509 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), |
504 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); | 510 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); |
505 | 511 | ||
506 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), | 512 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), |
507 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); | 513 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); |
508 | } | 514 | } |
509 | 515 | ||
@@ -947,7 +953,7 @@ fail_free: | |||
947 | } | 953 | } |
948 | 954 | ||
949 | int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 955 | int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
950 | struct gr_ctx_desc *gr_ctx, | 956 | struct nvgpu_gr_ctx *gr_ctx, |
951 | struct vm_gk20a *vm, u32 class, | 957 | struct vm_gk20a *vm, u32 class, |
952 | u32 graphics_preempt_mode, | 958 | u32 graphics_preempt_mode, |
953 | u32 compute_preempt_mode) | 959 | u32 compute_preempt_mode) |
@@ -1071,7 +1077,7 @@ fail: | |||
1071 | } | 1077 | } |
1072 | 1078 | ||
1073 | int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 1079 | int gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
1074 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | 1080 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
1075 | u32 class, | 1081 | u32 class, |
1076 | u32 flags) | 1082 | u32 flags) |
1077 | { | 1083 | { |
@@ -1085,7 +1091,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
1085 | if (err) | 1091 | if (err) |
1086 | return err; | 1092 | return err; |
1087 | 1093 | ||
1088 | (*gr_ctx)->ctx_id_valid = false; | 1094 | gr_ctx->ctx_id_valid = false; |
1089 | 1095 | ||
1090 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) | 1096 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) |
1091 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | 1097 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; |
@@ -1094,7 +1100,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
1094 | 1100 | ||
1095 | if (graphics_preempt_mode || compute_preempt_mode) { | 1101 | if (graphics_preempt_mode || compute_preempt_mode) { |
1096 | if (g->ops.gr.set_ctxsw_preemption_mode) { | 1102 | if (g->ops.gr.set_ctxsw_preemption_mode) { |
1097 | err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm, | 1103 | err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, |
1098 | class, graphics_preempt_mode, compute_preempt_mode); | 1104 | class, graphics_preempt_mode, compute_preempt_mode); |
1099 | if (err) { | 1105 | if (err) { |
1100 | nvgpu_err(g, "set_ctxsw_preemption_mode failed"); | 1106 | nvgpu_err(g, "set_ctxsw_preemption_mode failed"); |
@@ -1109,14 +1115,13 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
1109 | return 0; | 1115 | return 0; |
1110 | 1116 | ||
1111 | fail_free_gk20a_ctx: | 1117 | fail_free_gk20a_ctx: |
1112 | gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); | 1118 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); |
1113 | *gr_ctx = NULL; | ||
1114 | 1119 | ||
1115 | return err; | 1120 | return err; |
1116 | } | 1121 | } |
1117 | 1122 | ||
1118 | void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, | 1123 | void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, |
1119 | struct gr_ctx_desc *gr_ctx) | 1124 | struct nvgpu_gr_ctx *gr_ctx) |
1120 | { | 1125 | { |
1121 | struct nvgpu_mem *mem = &gr_ctx->mem; | 1126 | struct nvgpu_mem *mem = &gr_ctx->mem; |
1122 | 1127 | ||
@@ -1168,13 +1173,13 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, | |||
1168 | } | 1173 | } |
1169 | 1174 | ||
1170 | void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | 1175 | void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, |
1171 | struct channel_ctx_gk20a *ch_ctx, | 1176 | struct channel_gk20a *c, |
1172 | struct nvgpu_mem *mem) | 1177 | struct nvgpu_mem *mem) |
1173 | { | 1178 | { |
1174 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 1179 | struct tsg_gk20a *tsg; |
1175 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | 1180 | struct nvgpu_gr_ctx *gr_ctx; |
1181 | struct ctx_header_desc *ctx = &c->ctx_header; | ||
1176 | struct nvgpu_mem *ctxheader = &ctx->mem; | 1182 | struct nvgpu_mem *ctxheader = &ctx->mem; |
1177 | |||
1178 | u32 gfxp_preempt_option = | 1183 | u32 gfxp_preempt_option = |
1179 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); | 1184 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); |
1180 | u32 cilp_preempt_option = | 1185 | u32 cilp_preempt_option = |
@@ -1185,6 +1190,12 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1185 | 1190 | ||
1186 | gk20a_dbg_fn(""); | 1191 | gk20a_dbg_fn(""); |
1187 | 1192 | ||
1193 | tsg = tsg_gk20a_from_ch(c); | ||
1194 | if (!tsg) | ||
1195 | return; | ||
1196 | |||
1197 | gr_ctx = &tsg->gr_ctx; | ||
1198 | |||
1188 | if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { | 1199 | if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { |
1189 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); | 1200 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); |
1190 | nvgpu_mem_wr(g, mem, | 1201 | nvgpu_mem_wr(g, mem, |
@@ -1220,7 +1231,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1220 | gr_ctx->preempt_ctxsw_buffer.gpu_va); | 1231 | gr_ctx->preempt_ctxsw_buffer.gpu_va); |
1221 | } | 1232 | } |
1222 | 1233 | ||
1223 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); | 1234 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); |
1224 | if (err) { | 1235 | if (err) { |
1225 | nvgpu_err(g, "can't map patch context"); | 1236 | nvgpu_err(g, "can't map patch context"); |
1226 | goto out; | 1237 | goto out; |
@@ -1232,7 +1243,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1232 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | 1243 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); |
1233 | 1244 | ||
1234 | gk20a_dbg_info("attrib cb addr : 0x%016x", addr); | 1245 | gk20a_dbg_info("attrib cb addr : 0x%016x", addr); |
1235 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); | 1246 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true); |
1236 | 1247 | ||
1237 | addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> | 1248 | addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> |
1238 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | 1249 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | |
@@ -1243,7 +1254,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1243 | if (size == g->ops.gr.pagepool_default_size(g)) | 1254 | if (size == g->ops.gr.pagepool_default_size(g)) |
1244 | size = gr_scc_pagepool_total_pages_hwmax_v(); | 1255 | size = gr_scc_pagepool_total_pages_hwmax_v(); |
1245 | 1256 | ||
1246 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); | 1257 | g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true); |
1247 | 1258 | ||
1248 | addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> | 1259 | addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> |
1249 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | | 1260 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | |
@@ -1252,28 +1263,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1252 | size = gr_ctx->spill_ctxsw_buffer.size / | 1263 | size = gr_ctx->spill_ctxsw_buffer.size / |
1253 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); | 1264 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); |
1254 | 1265 | ||
1255 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1266 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1256 | gr_gpc0_swdx_rm_spill_buffer_addr_r(), | 1267 | gr_gpc0_swdx_rm_spill_buffer_addr_r(), |
1257 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), | 1268 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), |
1258 | true); | 1269 | true); |
1259 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1270 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1260 | gr_gpc0_swdx_rm_spill_buffer_size_r(), | 1271 | gr_gpc0_swdx_rm_spill_buffer_size_r(), |
1261 | gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), | 1272 | gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), |
1262 | true); | 1273 | true); |
1263 | 1274 | ||
1264 | cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); | 1275 | cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); |
1265 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1276 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1266 | gr_gpcs_swdx_beta_cb_ctrl_r(), | 1277 | gr_gpcs_swdx_beta_cb_ctrl_r(), |
1267 | gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( | 1278 | gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( |
1268 | cbes_reserve), | 1279 | cbes_reserve), |
1269 | true); | 1280 | true); |
1270 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1281 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1271 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), | 1282 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), |
1272 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( | 1283 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( |
1273 | cbes_reserve), | 1284 | cbes_reserve), |
1274 | true); | 1285 | true); |
1275 | 1286 | ||
1276 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); | 1287 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); |
1277 | } | 1288 | } |
1278 | 1289 | ||
1279 | out: | 1290 | out: |
@@ -1478,10 +1489,9 @@ int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, | |||
1478 | } | 1489 | } |
1479 | 1490 | ||
1480 | void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, | 1491 | void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, |
1481 | struct channel_ctx_gk20a *ch_ctx, | 1492 | struct nvgpu_gr_ctx *gr_ctx, |
1482 | u64 addr, bool patch) | 1493 | u64 addr, bool patch) |
1483 | { | 1494 | { |
1484 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
1485 | int attrBufferSize; | 1495 | int attrBufferSize; |
1486 | 1496 | ||
1487 | if (gr_ctx->preempt_ctxsw_buffer.gpu_va) | 1497 | if (gr_ctx->preempt_ctxsw_buffer.gpu_va) |
@@ -1491,37 +1501,37 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, | |||
1491 | 1501 | ||
1492 | attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); | 1502 | attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); |
1493 | 1503 | ||
1494 | gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); | 1504 | gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); |
1495 | 1505 | ||
1496 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | 1506 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), |
1497 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | 1507 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | |
1498 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | 1508 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); |
1499 | 1509 | ||
1500 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), | 1510 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), |
1501 | gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); | 1511 | gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); |
1502 | 1512 | ||
1503 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), | 1513 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), |
1504 | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | | 1514 | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | |
1505 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); | 1515 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); |
1506 | } | 1516 | } |
1507 | 1517 | ||
1508 | void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, | 1518 | void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, |
1509 | struct channel_ctx_gk20a *ch_ctx, | 1519 | struct nvgpu_gr_ctx *gr_ctx, |
1510 | u64 addr, u64 size, bool patch) | 1520 | u64 addr, u64 size, bool patch) |
1511 | { | 1521 | { |
1512 | u32 data; | 1522 | u32 data; |
1513 | 1523 | ||
1514 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), | 1524 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(), |
1515 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); | 1525 | gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); |
1516 | 1526 | ||
1517 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), | 1527 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(), |
1518 | gr_scc_bundle_cb_size_div_256b_f(size) | | 1528 | gr_scc_bundle_cb_size_div_256b_f(size) | |
1519 | gr_scc_bundle_cb_size_valid_true_f(), patch); | 1529 | gr_scc_bundle_cb_size_valid_true_f(), patch); |
1520 | 1530 | ||
1521 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), | 1531 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(), |
1522 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); | 1532 | gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); |
1523 | 1533 | ||
1524 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), | 1534 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(), |
1525 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | | 1535 | gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | |
1526 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); | 1536 | gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); |
1527 | 1537 | ||
@@ -1535,7 +1545,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, | |||
1535 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", | 1545 | gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", |
1536 | g->gr.bundle_cb_token_limit, data); | 1546 | g->gr.bundle_cb_token_limit, data); |
1537 | 1547 | ||
1538 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), | 1548 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(), |
1539 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | | 1549 | gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | |
1540 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); | 1550 | gr_pd_ab_dist_cfg2_state_limit_f(data), patch); |
1541 | } | 1551 | } |
@@ -1706,14 +1716,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, | |||
1706 | struct channel_gk20a *fault_ch) | 1716 | struct channel_gk20a *fault_ch) |
1707 | { | 1717 | { |
1708 | int ret; | 1718 | int ret; |
1709 | struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; | ||
1710 | struct tsg_gk20a *tsg; | 1719 | struct tsg_gk20a *tsg; |
1720 | struct nvgpu_gr_ctx *gr_ctx; | ||
1711 | 1721 | ||
1712 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | 1722 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); |
1713 | 1723 | ||
1714 | if (!gr_ctx) | 1724 | tsg = tsg_gk20a_from_ch(fault_ch); |
1725 | if (!tsg) | ||
1715 | return -EINVAL; | 1726 | return -EINVAL; |
1716 | 1727 | ||
1728 | gr_ctx = &tsg->gr_ctx; | ||
1729 | |||
1717 | if (gr_ctx->cilp_preempt_pending) { | 1730 | if (gr_ctx->cilp_preempt_pending) { |
1718 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | 1731 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, |
1719 | "CILP is already pending for chid %d", | 1732 | "CILP is already pending for chid %d", |
@@ -1783,13 +1796,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g, | |||
1783 | static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, | 1796 | static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, |
1784 | struct channel_gk20a *fault_ch) | 1797 | struct channel_gk20a *fault_ch) |
1785 | { | 1798 | { |
1786 | struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; | 1799 | struct tsg_gk20a *tsg; |
1800 | struct nvgpu_gr_ctx *gr_ctx; | ||
1787 | 1801 | ||
1788 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); | 1802 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); |
1789 | 1803 | ||
1790 | if (!gr_ctx) | 1804 | tsg = tsg_gk20a_from_ch(fault_ch); |
1805 | if (!tsg) | ||
1791 | return -EINVAL; | 1806 | return -EINVAL; |
1792 | 1807 | ||
1808 | gr_ctx = &tsg->gr_ctx; | ||
1809 | |||
1793 | /* The ucode is self-clearing, so all we need to do here is | 1810 | /* The ucode is self-clearing, so all we need to do here is |
1794 | to clear cilp_preempt_pending. */ | 1811 | to clear cilp_preempt_pending. */ |
1795 | if (!gr_ctx->cilp_preempt_pending) { | 1812 | if (!gr_ctx->cilp_preempt_pending) { |
@@ -1820,13 +1837,19 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g, | |||
1820 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 1837 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
1821 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 1838 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
1822 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; | 1839 | u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; |
1840 | struct tsg_gk20a *tsg; | ||
1823 | 1841 | ||
1824 | *early_exit = false; | 1842 | *early_exit = false; |
1825 | *ignore_debugger = false; | 1843 | *ignore_debugger = false; |
1826 | 1844 | ||
1827 | if (fault_ch) | 1845 | if (fault_ch) { |
1828 | cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == | 1846 | tsg = tsg_gk20a_from_ch(fault_ch); |
1847 | if (!tsg) | ||
1848 | return -EINVAL; | ||
1849 | |||
1850 | cilp_enabled = (tsg->gr_ctx.compute_preempt_mode == | ||
1829 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); | 1851 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); |
1852 | } | ||
1830 | 1853 | ||
1831 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", | 1854 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", |
1832 | gpc, tpc, global_esr); | 1855 | gpc, tpc, global_esr); |
@@ -1911,8 +1934,9 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g, | |||
1911 | 1934 | ||
1912 | static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) | 1935 | static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) |
1913 | { | 1936 | { |
1914 | struct gr_ctx_desc *gr_ctx; | 1937 | struct nvgpu_gr_ctx *gr_ctx; |
1915 | struct channel_gk20a *ch; | 1938 | struct channel_gk20a *ch; |
1939 | struct tsg_gk20a *tsg; | ||
1916 | int chid; | 1940 | int chid; |
1917 | int ret = -EINVAL; | 1941 | int ret = -EINVAL; |
1918 | 1942 | ||
@@ -1922,7 +1946,11 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) | |||
1922 | if (!ch) | 1946 | if (!ch) |
1923 | return ret; | 1947 | return ret; |
1924 | 1948 | ||
1925 | gr_ctx = ch->ch_ctx.gr_ctx; | 1949 | tsg = tsg_gk20a_from_ch(ch); |
1950 | if (!tsg) | ||
1951 | return -EINVAL; | ||
1952 | |||
1953 | gr_ctx = &tsg->gr_ctx; | ||
1926 | 1954 | ||
1927 | if (gr_ctx->cilp_preempt_pending) { | 1955 | if (gr_ctx->cilp_preempt_pending) { |
1928 | *__chid = chid; | 1956 | *__chid = chid; |
@@ -2022,11 +2050,17 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch, | |||
2022 | bool *cilp_preempt_pending) | 2050 | bool *cilp_preempt_pending) |
2023 | { | 2051 | { |
2024 | struct gk20a *g = ch->g; | 2052 | struct gk20a *g = ch->g; |
2025 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 2053 | struct tsg_gk20a *tsg; |
2026 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 2054 | struct nvgpu_gr_ctx *gr_ctx; |
2027 | bool ctx_resident = false; | 2055 | bool ctx_resident = false; |
2028 | int err = 0; | 2056 | int err = 0; |
2029 | 2057 | ||
2058 | tsg = tsg_gk20a_from_ch(ch); | ||
2059 | if (!tsg) | ||
2060 | return -EINVAL; | ||
2061 | |||
2062 | gr_ctx = &tsg->gr_ctx; | ||
2063 | |||
2030 | *cilp_preempt_pending = false; | 2064 | *cilp_preempt_pending = false; |
2031 | 2065 | ||
2032 | if (gk20a_is_channel_ctx_resident(ch)) { | 2066 | if (gk20a_is_channel_ctx_resident(ch)) { |
@@ -2097,15 +2131,22 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, | |||
2097 | nvgpu_mutex_release(&g->dbg_sessions_lock); | 2131 | nvgpu_mutex_release(&g->dbg_sessions_lock); |
2098 | 2132 | ||
2099 | if (cilp_preempt_pending_ch) { | 2133 | if (cilp_preempt_pending_ch) { |
2100 | struct channel_ctx_gk20a *ch_ctx = | 2134 | struct tsg_gk20a *tsg; |
2101 | &cilp_preempt_pending_ch->ch_ctx; | 2135 | struct nvgpu_gr_ctx *gr_ctx; |
2102 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
2103 | struct nvgpu_timeout timeout; | 2136 | struct nvgpu_timeout timeout; |
2104 | 2137 | ||
2105 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, | 2138 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, |
2106 | "CILP preempt pending, waiting %lu msecs for preemption", | 2139 | "CILP preempt pending, waiting %lu msecs for preemption", |
2107 | gk20a_get_gr_idle_timeout(g)); | 2140 | gk20a_get_gr_idle_timeout(g)); |
2108 | 2141 | ||
2142 | tsg = tsg_gk20a_from_ch(cilp_preempt_pending_ch); | ||
2143 | if (!tsg) { | ||
2144 | err = -EINVAL; | ||
2145 | goto clean_up; | ||
2146 | } | ||
2147 | |||
2148 | gr_ctx = &tsg->gr_ctx; | ||
2149 | |||
2109 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), | 2150 | nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), |
2110 | NVGPU_TIMER_CPU_TIMER); | 2151 | NVGPU_TIMER_CPU_TIMER); |
2111 | do { | 2152 | do { |
@@ -2130,12 +2171,19 @@ clean_up: | |||
2130 | int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, | 2171 | int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, |
2131 | bool boost) | 2172 | bool boost) |
2132 | { | 2173 | { |
2133 | struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; | 2174 | struct tsg_gk20a *tsg; |
2175 | struct nvgpu_gr_ctx *gr_ctx; | ||
2134 | struct gk20a *g = ch->g; | 2176 | struct gk20a *g = ch->g; |
2135 | struct nvgpu_mem *mem = &gr_ctx->mem; | 2177 | struct nvgpu_mem *mem; |
2136 | int err = 0; | 2178 | int err = 0; |
2137 | 2179 | ||
2180 | tsg = tsg_gk20a_from_ch(ch); | ||
2181 | if (!tsg) | ||
2182 | return -EINVAL; | ||
2183 | |||
2184 | gr_ctx = &tsg->gr_ctx; | ||
2138 | gr_ctx->boosted_ctx = boost; | 2185 | gr_ctx->boosted_ctx = boost; |
2186 | mem = &gr_ctx->mem; | ||
2139 | 2187 | ||
2140 | if (nvgpu_mem_begin(g, mem)) | 2188 | if (nvgpu_mem_begin(g, mem)) |
2141 | return -ENOMEM; | 2189 | return -ENOMEM; |
@@ -2162,7 +2210,7 @@ unmap_ctx: | |||
2162 | } | 2210 | } |
2163 | 2211 | ||
2164 | void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, | 2212 | void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, |
2165 | struct gr_ctx_desc *gr_ctx) { | 2213 | struct nvgpu_gr_ctx *gr_ctx) { |
2166 | u32 v; | 2214 | u32 v; |
2167 | 2215 | ||
2168 | v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( | 2216 | v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( |
@@ -2174,13 +2222,12 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2174 | u32 graphics_preempt_mode, | 2222 | u32 graphics_preempt_mode, |
2175 | u32 compute_preempt_mode) | 2223 | u32 compute_preempt_mode) |
2176 | { | 2224 | { |
2177 | struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; | 2225 | struct nvgpu_gr_ctx *gr_ctx; |
2178 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | ||
2179 | struct gk20a *g = ch->g; | 2226 | struct gk20a *g = ch->g; |
2180 | struct tsg_gk20a *tsg; | 2227 | struct tsg_gk20a *tsg; |
2181 | struct vm_gk20a *vm; | 2228 | struct vm_gk20a *vm; |
2182 | struct nvgpu_mem *mem = &gr_ctx->mem; | 2229 | struct nvgpu_mem *mem; |
2183 | struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; | 2230 | struct ctx_header_desc *ctx = &ch->ctx_header; |
2184 | struct nvgpu_mem *ctxheader = &ctx->mem; | 2231 | struct nvgpu_mem *ctxheader = &ctx->mem; |
2185 | u32 class; | 2232 | u32 class; |
2186 | int err = 0; | 2233 | int err = 0; |
@@ -2189,12 +2236,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2189 | if (!class) | 2236 | if (!class) |
2190 | return -EINVAL; | 2237 | return -EINVAL; |
2191 | 2238 | ||
2192 | if (gk20a_is_channel_marked_as_tsg(ch)) { | 2239 | tsg = tsg_gk20a_from_ch(ch); |
2193 | tsg = &g->fifo.tsg[ch->tsgid]; | 2240 | if (!tsg) |
2194 | vm = tsg->vm; | 2241 | return -EINVAL; |
2195 | } else { | 2242 | |
2196 | vm = ch->vm; | 2243 | vm = tsg->vm; |
2197 | } | 2244 | gr_ctx = &tsg->gr_ctx; |
2245 | mem = &gr_ctx->mem; | ||
2198 | 2246 | ||
2199 | /* skip setting anything if both modes are already set */ | 2247 | /* skip setting anything if both modes are already set */ |
2200 | if (graphics_preempt_mode && | 2248 | if (graphics_preempt_mode && |
@@ -2241,15 +2289,15 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2241 | 2289 | ||
2242 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 2290 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
2243 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | 2291 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, |
2244 | ch_ctx, mem); | 2292 | ch, mem); |
2245 | 2293 | ||
2246 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); | 2294 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); |
2247 | if (err) { | 2295 | if (err) { |
2248 | nvgpu_err(g, "can't map patch context"); | 2296 | nvgpu_err(g, "can't map patch context"); |
2249 | goto enable_ch; | 2297 | goto enable_ch; |
2250 | } | 2298 | } |
2251 | g->ops.gr.commit_global_cb_manager(g, ch, true); | 2299 | g->ops.gr.commit_global_cb_manager(g, ch, true); |
2252 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); | 2300 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); |
2253 | } | 2301 | } |
2254 | 2302 | ||
2255 | enable_ch: | 2303 | enable_ch: |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index e3ef6304..8d553d37 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h | |||
@@ -29,9 +29,8 @@ | |||
29 | 29 | ||
30 | struct gk20a; | 30 | struct gk20a; |
31 | struct gr_gk20a_isr_data; | 31 | struct gr_gk20a_isr_data; |
32 | struct channel_ctx_gk20a; | 32 | struct nvgpu_gr_ctx; |
33 | struct zbc_entry; | 33 | struct zbc_entry; |
34 | struct gr_ctx_desc; | ||
35 | struct nvgpu_preemption_modes_rec; | 34 | struct nvgpu_preemption_modes_rec; |
36 | struct gk20a_debug_output; | 35 | struct gk20a_debug_output; |
37 | 36 | ||
@@ -75,7 +74,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc, | |||
75 | int gr_gp10b_commit_global_cb_manager(struct gk20a *g, | 74 | int gr_gp10b_commit_global_cb_manager(struct gk20a *g, |
76 | struct channel_gk20a *c, bool patch); | 75 | struct channel_gk20a *c, bool patch); |
77 | void gr_gp10b_commit_global_pagepool(struct gk20a *g, | 76 | void gr_gp10b_commit_global_pagepool(struct gk20a *g, |
78 | struct channel_ctx_gk20a *ch_ctx, | 77 | struct nvgpu_gr_ctx *ch_ctx, |
79 | u64 addr, u32 size, bool patch); | 78 | u64 addr, u32 size, bool patch); |
80 | u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g); | 79 | u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g); |
81 | u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g); | 80 | u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g); |
@@ -93,28 +92,28 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data); | |||
93 | void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); | 92 | void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); |
94 | int gr_gp10b_init_ctx_state(struct gk20a *g); | 93 | int gr_gp10b_init_ctx_state(struct gk20a *g); |
95 | int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 94 | int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
96 | struct gr_ctx_desc *gr_ctx, | 95 | struct nvgpu_gr_ctx *gr_ctx, |
97 | struct vm_gk20a *vm, u32 class, | 96 | struct vm_gk20a *vm, u32 class, |
98 | u32 graphics_preempt_mode, | 97 | u32 graphics_preempt_mode, |
99 | u32 compute_preempt_mode); | 98 | u32 compute_preempt_mode); |
100 | int gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 99 | int gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
101 | struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, | 100 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
102 | u32 class, | 101 | u32 class, |
103 | u32 flags); | 102 | u32 flags); |
104 | void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | 103 | void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, |
105 | struct channel_ctx_gk20a *ch_ctx, | 104 | struct channel_gk20a *c, |
106 | struct nvgpu_mem *mem); | 105 | struct nvgpu_mem *mem); |
107 | int gr_gp10b_dump_gr_status_regs(struct gk20a *g, | 106 | int gr_gp10b_dump_gr_status_regs(struct gk20a *g, |
108 | struct gk20a_debug_output *o); | 107 | struct gk20a_debug_output *o); |
109 | void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, | 108 | void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, |
110 | struct gr_ctx_desc *gr_ctx); | 109 | struct nvgpu_gr_ctx *gr_ctx); |
111 | int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, | 110 | int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, |
112 | u32 expect_delay); | 111 | u32 expect_delay); |
113 | void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, | 112 | void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, |
114 | struct channel_ctx_gk20a *ch_ctx, | 113 | struct nvgpu_gr_ctx *ch_ctx, |
115 | u64 addr, bool patch); | 114 | u64 addr, bool patch); |
116 | void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, | 115 | void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, |
117 | struct channel_ctx_gk20a *ch_ctx, | 116 | struct nvgpu_gr_ctx *ch_ctx, |
118 | u64 addr, u64 size, bool patch); | 117 | u64 addr, u64 size, bool patch); |
119 | int gr_gp10b_load_smid_config(struct gk20a *g); | 118 | int gr_gp10b_load_smid_config(struct gk20a *g); |
120 | void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); | 119 | void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); |
@@ -133,7 +132,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, | |||
133 | int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, | 132 | int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, |
134 | bool boost); | 133 | bool boost); |
135 | void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, | 134 | void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, |
136 | struct gr_ctx_desc *gr_ctx); | 135 | struct nvgpu_gr_ctx *gr_ctx); |
137 | int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | 136 | int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, |
138 | u32 graphics_preempt_mode, | 137 | u32 graphics_preempt_mode, |
139 | u32 compute_preempt_mode); | 138 | u32 compute_preempt_mode); |
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index aaee595d..7041c5bd 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c | |||
@@ -236,7 +236,6 @@ static const struct gpu_ops gp10b_ops = { | |||
236 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 236 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
237 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, | 237 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, |
238 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | 238 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, |
239 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
240 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | 239 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, |
241 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | 240 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, |
242 | .get_zcull_info = gr_gk20a_get_zcull_info, | 241 | .get_zcull_info = gr_gk20a_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index b29a73d4..95d1f076 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c | |||
@@ -305,7 +305,6 @@ static const struct gpu_ops gv100_ops = { | |||
305 | .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, | 305 | .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, |
306 | .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, | 306 | .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, |
307 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | 307 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, |
308 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
309 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | 308 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, |
310 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | 309 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, |
311 | .get_zcull_info = gr_gk20a_get_zcull_info, | 310 | .get_zcull_info = gr_gk20a_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index d5924169..3030def8 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -1373,7 +1373,7 @@ fail_free: | |||
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, | 1375 | int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, |
1376 | struct gr_ctx_desc *gr_ctx, | 1376 | struct nvgpu_gr_ctx *gr_ctx, |
1377 | struct vm_gk20a *vm, u32 class, | 1377 | struct vm_gk20a *vm, u32 class, |
1378 | u32 graphics_preempt_mode, | 1378 | u32 graphics_preempt_mode, |
1379 | u32 compute_preempt_mode) | 1379 | u32 compute_preempt_mode) |
@@ -1497,13 +1497,13 @@ fail: | |||
1497 | } | 1497 | } |
1498 | 1498 | ||
1499 | void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | 1499 | void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, |
1500 | struct channel_ctx_gk20a *ch_ctx, | 1500 | struct channel_gk20a *c, |
1501 | struct nvgpu_mem *mem) | 1501 | struct nvgpu_mem *mem) |
1502 | { | 1502 | { |
1503 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | 1503 | struct tsg_gk20a *tsg; |
1504 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | 1504 | struct nvgpu_gr_ctx *gr_ctx; |
1505 | struct ctx_header_desc *ctx = &c->ctx_header; | ||
1505 | struct nvgpu_mem *ctxheader = &ctx->mem; | 1506 | struct nvgpu_mem *ctxheader = &ctx->mem; |
1506 | |||
1507 | u32 gfxp_preempt_option = | 1507 | u32 gfxp_preempt_option = |
1508 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); | 1508 | ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); |
1509 | u32 cilp_preempt_option = | 1509 | u32 cilp_preempt_option = |
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1514 | 1514 | ||
1515 | gk20a_dbg_fn(""); | 1515 | gk20a_dbg_fn(""); |
1516 | 1516 | ||
1517 | tsg = tsg_gk20a_from_ch(c); | ||
1518 | if (!tsg) | ||
1519 | return; | ||
1520 | |||
1521 | gr_ctx = &tsg->gr_ctx; | ||
1522 | |||
1517 | if (gr_ctx->graphics_preempt_mode == | 1523 | if (gr_ctx->graphics_preempt_mode == |
1518 | NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { | 1524 | NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { |
1519 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); | 1525 | gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); |
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1552 | gr_ctx->preempt_ctxsw_buffer.gpu_va); | 1558 | gr_ctx->preempt_ctxsw_buffer.gpu_va); |
1553 | } | 1559 | } |
1554 | 1560 | ||
1555 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); | 1561 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true); |
1556 | if (err) { | 1562 | if (err) { |
1557 | nvgpu_err(g, "can't map patch context"); | 1563 | nvgpu_err(g, "can't map patch context"); |
1558 | goto out; | 1564 | goto out; |
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1564 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | 1570 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); |
1565 | 1571 | ||
1566 | gk20a_dbg_info("attrib cb addr : 0x%016x", addr); | 1572 | gk20a_dbg_info("attrib cb addr : 0x%016x", addr); |
1567 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); | 1573 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true); |
1568 | 1574 | ||
1569 | addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> | 1575 | addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> |
1570 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | 1576 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1575 | if (size == g->ops.gr.pagepool_default_size(g)) | 1581 | if (size == g->ops.gr.pagepool_default_size(g)) |
1576 | size = gr_scc_pagepool_total_pages_hwmax_v(); | 1582 | size = gr_scc_pagepool_total_pages_hwmax_v(); |
1577 | 1583 | ||
1578 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); | 1584 | g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true); |
1579 | 1585 | ||
1580 | addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> | 1586 | addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> |
1581 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | | 1587 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1584 | size = gr_ctx->spill_ctxsw_buffer.size / | 1590 | size = gr_ctx->spill_ctxsw_buffer.size / |
1585 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); | 1591 | gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); |
1586 | 1592 | ||
1587 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1593 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1588 | gr_gpc0_swdx_rm_spill_buffer_addr_r(), | 1594 | gr_gpc0_swdx_rm_spill_buffer_addr_r(), |
1589 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), | 1595 | gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), |
1590 | true); | 1596 | true); |
1591 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1597 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1592 | gr_gpc0_swdx_rm_spill_buffer_size_r(), | 1598 | gr_gpc0_swdx_rm_spill_buffer_size_r(), |
1593 | gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), | 1599 | gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), |
1594 | true); | 1600 | true); |
1595 | 1601 | ||
1596 | cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); | 1602 | cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); |
1597 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1603 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1598 | gr_gpcs_swdx_beta_cb_ctrl_r(), | 1604 | gr_gpcs_swdx_beta_cb_ctrl_r(), |
1599 | gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( | 1605 | gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( |
1600 | cbes_reserve), | 1606 | cbes_reserve), |
1601 | true); | 1607 | true); |
1602 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 1608 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
1603 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), | 1609 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), |
1604 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( | 1610 | gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( |
1605 | cbes_reserve), | 1611 | cbes_reserve), |
1606 | true); | 1612 | true); |
1607 | 1613 | ||
1608 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); | 1614 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, true); |
1609 | } | 1615 | } |
1610 | 1616 | ||
1611 | out: | 1617 | out: |
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, | |||
1902 | } | 1908 | } |
1903 | 1909 | ||
1904 | void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, | 1910 | void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, |
1905 | struct channel_ctx_gk20a *ch_ctx, | 1911 | struct nvgpu_gr_ctx *gr_ctx, |
1906 | u64 addr, bool patch) | 1912 | u64 addr, bool patch) |
1907 | { | 1913 | { |
1908 | struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; | ||
1909 | int attrBufferSize; | 1914 | int attrBufferSize; |
1910 | 1915 | ||
1911 | if (gr_ctx->preempt_ctxsw_buffer.gpu_va) | 1916 | if (gr_ctx->preempt_ctxsw_buffer.gpu_va) |
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, | |||
1915 | 1920 | ||
1916 | attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); | 1921 | attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); |
1917 | 1922 | ||
1918 | gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); | 1923 | gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch); |
1919 | 1924 | ||
1920 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), | 1925 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), |
1921 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | | 1926 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | |
1922 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); | 1927 | gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); |
1923 | 1928 | ||
1924 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), | 1929 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), |
1925 | gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); | 1930 | gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); |
1926 | 1931 | ||
1927 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), | 1932 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), |
1928 | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | | 1933 | gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | |
1929 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); | 1934 | gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); |
1930 | } | 1935 | } |
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | |||
2042 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + | 2047 | u32 offset = gk20a_gr_gpc_offset(g, gpc) + |
2043 | gk20a_gr_tpc_offset(g, tpc) + | 2048 | gk20a_gr_tpc_offset(g, tpc) + |
2044 | gv11b_gr_sm_offset(g, sm); | 2049 | gv11b_gr_sm_offset(g, sm); |
2050 | struct tsg_gk20a *tsg; | ||
2045 | 2051 | ||
2046 | *early_exit = false; | 2052 | *early_exit = false; |
2047 | *ignore_debugger = false; | 2053 | *ignore_debugger = false; |
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g, | |||
2054 | return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, | 2060 | return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, |
2055 | warp_esr, fault_ch); | 2061 | warp_esr, fault_ch); |
2056 | 2062 | ||
2057 | if (fault_ch) | 2063 | if (fault_ch) { |
2058 | cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == | 2064 | tsg = tsg_gk20a_from_ch(fault_ch); |
2065 | if (!tsg) | ||
2066 | return -EINVAL; | ||
2067 | |||
2068 | cilp_enabled = (tsg->gr_ctx.compute_preempt_mode == | ||
2059 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); | 2069 | NVGPU_PREEMPTION_MODE_COMPUTE_CILP); |
2070 | } | ||
2060 | 2071 | ||
2061 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, | 2072 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, |
2062 | "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", | 2073 | "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", |
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
2509 | if (err) | 2520 | if (err) |
2510 | return err; | 2521 | return err; |
2511 | 2522 | ||
2512 | ctx = &c->ch_ctx.ctx_header; | 2523 | ctx = &c->ctx_header; |
2513 | addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); | 2524 | addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); |
2514 | addr_hi = u64_hi32(ctx->mem.gpu_va); | 2525 | addr_hi = u64_hi32(ctx->mem.gpu_va); |
2515 | 2526 | ||
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
2529 | 2540 | ||
2530 | int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | 2541 | int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) |
2531 | { | 2542 | { |
2532 | struct channel_ctx_gk20a *ch_ctx = NULL; | 2543 | struct nvgpu_gr_ctx *ch_ctx = NULL; |
2533 | u32 pd_ab_dist_cfg0; | 2544 | u32 pd_ab_dist_cfg0; |
2534 | u32 ds_debug; | 2545 | u32 ds_debug; |
2535 | u32 mpc_vtg_debug; | 2546 | u32 mpc_vtg_debug; |
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g, | |||
2836 | struct channel_gk20a *ch, u32 sm_id, | 2847 | struct channel_gk20a *ch, u32 sm_id, |
2837 | struct nvgpu_gr_sm_error_state *sm_error_state) | 2848 | struct nvgpu_gr_sm_error_state *sm_error_state) |
2838 | { | 2849 | { |
2850 | struct tsg_gk20a *tsg; | ||
2839 | u32 gpc, tpc, sm, offset; | 2851 | u32 gpc, tpc, sm, offset; |
2840 | struct gr_gk20a *gr = &g->gr; | 2852 | struct gr_gk20a *gr = &g->gr; |
2841 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 2853 | struct nvgpu_gr_ctx *ch_ctx; |
2842 | int err = 0; | 2854 | int err = 0; |
2843 | 2855 | ||
2856 | tsg = tsg_gk20a_from_ch(ch); | ||
2857 | if (!tsg) | ||
2858 | return -EINVAL; | ||
2859 | |||
2860 | ch_ctx = &tsg->gr_ctx; | ||
2861 | |||
2844 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); | 2862 | nvgpu_mutex_acquire(&g->dbg_sessions_lock); |
2845 | 2863 | ||
2846 | gr->sm_error_states[sm_id].hww_global_esr = | 2864 | gr->sm_error_states[sm_id].hww_global_esr = |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index b69e69bd..022a7698 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h | |||
@@ -41,9 +41,10 @@ struct zbc_s_table { | |||
41 | }; | 41 | }; |
42 | 42 | ||
43 | struct gk20a; | 43 | struct gk20a; |
44 | struct gr_gk20a; | ||
44 | struct zbc_entry; | 45 | struct zbc_entry; |
45 | struct zbc_query_params; | 46 | struct zbc_query_params; |
46 | struct channel_ctx_gk20a; | 47 | struct nvgpu_gr_ctx; |
47 | struct nvgpu_warpstate; | 48 | struct nvgpu_warpstate; |
48 | struct nvgpu_gr_sm_error_state; | 49 | struct nvgpu_gr_sm_error_state; |
49 | struct gr_ctx_desc; | 50 | struct gr_ctx_desc; |
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g, | |||
128 | int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, | 129 | int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, |
129 | u32 expect_delay); | 130 | u32 expect_delay); |
130 | void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, | 131 | void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, |
131 | struct channel_ctx_gk20a *ch_ctx, | 132 | struct nvgpu_gr_ctx *ch_ctx, |
132 | u64 addr, bool patch); | 133 | u64 addr, bool patch); |
133 | void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); | 134 | void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); |
134 | void gr_gv11b_get_access_map(struct gk20a *g, | 135 | void gr_gv11b_get_access_map(struct gk20a *g, |
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g); | |||
222 | void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); | 223 | void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); |
223 | 224 | ||
224 | int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, | 225 | int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, |
225 | struct gr_ctx_desc *gr_ctx, | 226 | struct nvgpu_gr_ctx *gr_ctx, |
226 | struct vm_gk20a *vm, u32 class, | 227 | struct vm_gk20a *vm, u32 class, |
227 | u32 graphics_preempt_mode, | 228 | u32 graphics_preempt_mode, |
228 | u32 compute_preempt_mode); | 229 | u32 compute_preempt_mode); |
229 | 230 | ||
230 | void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, | 231 | void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, |
231 | struct channel_ctx_gk20a *ch_ctx, | 232 | struct channel_gk20a *ch_ctx, |
232 | struct nvgpu_mem *mem); | 233 | struct nvgpu_mem *mem); |
233 | 234 | ||
234 | #endif | 235 | #endif |
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index aa3d52af..0a552f5b 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c | |||
@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = { | |||
272 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 272 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
273 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, | 273 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, |
274 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, | 274 | .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, |
275 | .free_channel_ctx = gk20a_free_channel_ctx, | ||
276 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, | 275 | .alloc_obj_ctx = gk20a_alloc_obj_ctx, |
277 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, | 276 | .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, |
278 | .get_zcull_info = gr_gk20a_get_zcull_info, | 277 | .get_zcull_info = gr_gk20a_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c index fe1aa8a5..607fff91 100644 --- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c | |||
@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c, | |||
43 | 43 | ||
44 | void gv11b_free_subctx_header(struct channel_gk20a *c) | 44 | void gv11b_free_subctx_header(struct channel_gk20a *c) |
45 | { | 45 | { |
46 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 46 | struct ctx_header_desc *ctx = &c->ctx_header; |
47 | struct gk20a *g = c->g; | 47 | struct gk20a *g = c->g; |
48 | 48 | ||
49 | nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); | 49 | nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); |
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c) | |||
57 | 57 | ||
58 | int gv11b_alloc_subctx_header(struct channel_gk20a *c) | 58 | int gv11b_alloc_subctx_header(struct channel_gk20a *c) |
59 | { | 59 | { |
60 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 60 | struct ctx_header_desc *ctx = &c->ctx_header; |
61 | struct gk20a *g = c->g; | 61 | struct gk20a *g = c->g; |
62 | int ret = 0; | 62 | int ret = 0; |
63 | 63 | ||
64 | nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); | 64 | nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); |
65 | 65 | ||
66 | if (ctx->mem.gpu_va == 0) { | 66 | if (!nvgpu_mem_is_valid(&ctx->mem)) { |
67 | ret = nvgpu_dma_alloc_flags_sys(g, | 67 | ret = nvgpu_dma_alloc_flags_sys(g, |
68 | 0, /* No Special flags */ | 68 | 0, /* No Special flags */ |
69 | ctxsw_prog_fecs_header_v(), | 69 | ctxsw_prog_fecs_header_v(), |
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c, | |||
111 | 111 | ||
112 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) | 112 | int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) |
113 | { | 113 | { |
114 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 114 | struct ctx_header_desc *ctx = &c->ctx_header; |
115 | struct nvgpu_mem *gr_mem; | 115 | struct nvgpu_mem *gr_mem; |
116 | struct gk20a *g = c->g; | 116 | struct gk20a *g = c->g; |
117 | int ret = 0; | 117 | int ret = 0; |
118 | u32 addr_lo, addr_hi; | 118 | u32 addr_lo, addr_hi; |
119 | struct tsg_gk20a *tsg; | ||
120 | struct nvgpu_gr_ctx *gr_ctx; | ||
119 | 121 | ||
120 | addr_lo = u64_lo32(gpu_va); | 122 | tsg = tsg_gk20a_from_ch(c); |
121 | addr_hi = u64_hi32(gpu_va); | 123 | if (!tsg) |
124 | return -EINVAL; | ||
125 | |||
126 | gr_ctx = &tsg->gr_ctx; | ||
122 | 127 | ||
123 | gr_mem = &ctx->mem; | 128 | gr_mem = &ctx->mem; |
124 | g->ops.mm.l2_flush(g, true); | 129 | g->ops.mm.l2_flush(g, true); |
125 | if (nvgpu_mem_begin(g, gr_mem)) | 130 | if (nvgpu_mem_begin(g, gr_mem)) |
126 | return -ENOMEM; | 131 | return -ENOMEM; |
127 | 132 | ||
133 | /* set priv access map */ | ||
134 | addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | ||
135 | addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | ||
136 | nvgpu_mem_wr(g, gr_mem, | ||
137 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), | ||
138 | addr_lo); | ||
139 | nvgpu_mem_wr(g, gr_mem, | ||
140 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), | ||
141 | addr_hi); | ||
142 | |||
143 | addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); | ||
144 | addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); | ||
145 | nvgpu_mem_wr(g, gr_mem, | ||
146 | ctxsw_prog_main_image_patch_adr_lo_o(), | ||
147 | addr_lo); | ||
148 | nvgpu_mem_wr(g, gr_mem, | ||
149 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
150 | addr_hi); | ||
151 | |||
152 | g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va); | ||
153 | g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va); | ||
154 | |||
155 | addr_lo = u64_lo32(gpu_va); | ||
156 | addr_hi = u64_hi32(gpu_va); | ||
157 | |||
128 | nvgpu_mem_wr(g, gr_mem, | 158 | nvgpu_mem_wr(g, gr_mem, |
129 | ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); | 159 | ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); |
130 | nvgpu_mem_wr(g, gr_mem, | 160 | nvgpu_mem_wr(g, gr_mem, |