summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2017-12-15 12:04:15 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-01-17 15:29:09 -0500
commit2f6698b863c9cc1db6455637b7c72e812b470b93 (patch)
treed0c8abf32d6994b9f54bf5eddafd8316e038c829
parent6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff)
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine channel_ctx_gk20a and gr_ctx_desc to one structure, because the split between them was arbitrary. Move context header to be property of channel. Bug 1842197 Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1639532 Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/linux/debug_fifo.c4
-rw-r--r--drivers/gpu/nvgpu/common/linux/ioctl_channel.c8
-rw-r--r--drivers/gpu/nvgpu/common/linux/sched.c13
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c25
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h4
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c343
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c4
-rw-r--r--drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h23
-rw-r--r--drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h20
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c641
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h40
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.c5
-rw-r--r--drivers/gpu/nvgpu/gk20a/tsg_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c50
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.h10
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c1
-rw-r--r--drivers/gpu/nvgpu/gp106/gr_gp106.c2
-rw-r--r--drivers/gpu/nvgpu/gp106/gr_gp106.h2
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c1
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c202
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.h19
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c1
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.c66
-rw-r--r--drivers/gpu/nvgpu/gv11b/gr_gv11b.h9
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c1
-rw-r--r--drivers/gpu/nvgpu/gv11b/subctx_gv11b.c42
33 files changed, 833 insertions, 739 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
index ad157ee7..aeab0c92 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show(
91 tsg->timeslice_us, 91 tsg->timeslice_us,
92 ch->timeout_ms_max, 92 ch->timeout_ms_max,
93 tsg->interleave_level, 93 tsg->interleave_level,
94 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, 94 tsg->gr_ctx.graphics_preempt_mode,
95 ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); 95 tsg->gr_ctx.compute_preempt_mode);
96 gk20a_channel_put(ch); 96 gk20a_channel_put(ch);
97 } 97 }
98 return 0; 98 return 0;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index 94501a89..e8f4c14b 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param(
85 tsg_gk20a_from_ch(ch)->timeslice_us, 85 tsg_gk20a_from_ch(ch)->timeslice_us,
86 ch->timeout_ms_max, 86 ch->timeout_ms_max,
87 gk20a_fifo_interleave_level_name(tsg->interleave_level), 87 gk20a_fifo_interleave_level_name(tsg->interleave_level),
88 gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ? 88 gr_gk20a_graphics_preempt_mode_name(
89 ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0), 89 tsg->gr_ctx.graphics_preempt_mode),
90 gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? 90 gr_gk20a_compute_preempt_mode_name(
91 ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)); 91 tsg->gr_ctx.compute_preempt_mode));
92} 92}
93 93
94/* 94/*
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c
index fc3f6ed8..e6211790 100644
--- a/drivers/gpu/nvgpu/common/linux/sched.c
+++ b/drivers/gpu/nvgpu/common/linux/sched.c
@@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
198 arg->runlist_interleave = tsg->interleave_level; 198 arg->runlist_interleave = tsg->interleave_level;
199 arg->timeslice = tsg->timeslice_us; 199 arg->timeslice = tsg->timeslice_us;
200 200
201 if (tsg->tsg_gr_ctx) { 201 arg->graphics_preempt_mode =
202 arg->graphics_preempt_mode = 202 tsg->gr_ctx.graphics_preempt_mode;
203 tsg->tsg_gr_ctx->graphics_preempt_mode; 203 arg->compute_preempt_mode =
204 arg->compute_preempt_mode = 204 tsg->gr_ctx.compute_preempt_mode;
205 tsg->tsg_gr_ctx->compute_preempt_mode;
206 } else {
207 arg->graphics_preempt_mode = 0;
208 arg->compute_preempt_mode = 0;
209 }
210 205
211 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); 206 nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
212 207
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
index ed61f16b..9adf20d1 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -27,12 +27,11 @@
27#include <nvgpu/hw/gp10b/hw_gr_gp10b.h> 27#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
28 28
29int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, 29int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
30 struct gr_ctx_desc **__gr_ctx, 30 struct nvgpu_gr_ctx *gr_ctx,
31 struct vm_gk20a *vm, 31 struct vm_gk20a *vm,
32 u32 class, 32 u32 class,
33 u32 flags) 33 u32 flags)
34{ 34{
35 struct gr_ctx_desc *gr_ctx;
36 u32 graphics_preempt_mode = 0; 35 u32 graphics_preempt_mode = 0;
37 u32 compute_preempt_mode = 0; 36 u32 compute_preempt_mode = 0;
38 struct vgpu_priv_data *priv = vgpu_get_priv_data(g); 37 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
40 39
41 gk20a_dbg_fn(""); 40 gk20a_dbg_fn("");
42 41
43 err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); 42 err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags);
44 if (err) 43 if (err)
45 return err; 44 return err;
46 45
47 gr_ctx = *__gr_ctx;
48
49 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) 46 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
50 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; 47 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
51 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) 48 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP)
@@ -84,7 +81,7 @@ fail:
84} 81}
85 82
86int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 83int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
87 struct gr_ctx_desc *gr_ctx, 84 struct nvgpu_gr_ctx *gr_ctx,
88 struct vm_gk20a *vm, u32 class, 85 struct vm_gk20a *vm, u32 class,
89 u32 graphics_preempt_mode, 86 u32 graphics_preempt_mode,
90 u32 compute_preempt_mode) 87 u32 compute_preempt_mode)
@@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
240 u32 graphics_preempt_mode, 237 u32 graphics_preempt_mode,
241 u32 compute_preempt_mode) 238 u32 compute_preempt_mode)
242{ 239{
243 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 240 struct nvgpu_gr_ctx *gr_ctx;
244 struct gk20a *g = ch->g; 241 struct gk20a *g = ch->g;
245 struct tsg_gk20a *tsg; 242 struct tsg_gk20a *tsg;
246 struct vm_gk20a *vm; 243 struct vm_gk20a *vm;
@@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
251 if (!class) 248 if (!class)
252 return -EINVAL; 249 return -EINVAL;
253 250
251 tsg = tsg_gk20a_from_ch(ch);
252 if (!tsg)
253 return -EINVAL;
254
255 vm = tsg->vm;
256 gr_ctx = &tsg->gr_ctx;
257
254 /* skip setting anything if both modes are already set */ 258 /* skip setting anything if both modes are already set */
255 if (graphics_preempt_mode && 259 if (graphics_preempt_mode &&
256 (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) 260 (graphics_preempt_mode == gr_ctx->graphics_preempt_mode))
@@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
263 if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) 267 if (graphics_preempt_mode == 0 && compute_preempt_mode == 0)
264 return 0; 268 return 0;
265 269
266 if (gk20a_is_channel_marked_as_tsg(ch)) {
267 tsg = &g->fifo.tsg[ch->tsgid];
268 vm = tsg->vm;
269 } else {
270 vm = ch->vm;
271 }
272
273 if (g->ops.gr.set_ctxsw_preemption_mode) { 270 if (g->ops.gr.set_ctxsw_preemption_mode) {
274 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, 271 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class,
275 graphics_preempt_mode, 272 graphics_preempt_mode,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
index 31b88d19..559bd227 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h
@@ -20,12 +20,12 @@
20#include "gk20a/gk20a.h" 20#include "gk20a/gk20a.h"
21 21
22int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, 22int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g,
23 struct gr_ctx_desc **__gr_ctx, 23 struct nvgpu_gr_ctx *gr_ctx,
24 struct vm_gk20a *vm, 24 struct vm_gk20a *vm,
25 u32 class, 25 u32 class,
26 u32 flags); 26 u32 flags);
27int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 27int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
28 struct gr_ctx_desc *gr_ctx, 28 struct nvgpu_gr_ctx *gr_ctx,
29 struct vm_gk20a *vm, u32 class, 29 struct vm_gk20a *vm, u32 class,
30 u32 graphics_preempt_mode, 30 u32 graphics_preempt_mode,
31 u32 compute_preempt_mode); 31 u32 compute_preempt_mode);
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
index e8cb96b4..d5fd5102 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
112 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 112 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
113 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, 113 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
114 .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, 114 .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
115 .free_channel_ctx = vgpu_gr_free_channel_ctx,
116 .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, 115 .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
117 .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, 116 .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
118 .get_zcull_info = vgpu_gr_get_zcull_info, 117 .get_zcull_info = vgpu_gr_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
index e8790587..8f1c5d78 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c
@@ -20,14 +20,18 @@
20 20
21#include <nvgpu/kmem.h> 21#include <nvgpu/kmem.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/dma.h>
23#include <nvgpu/error_notifier.h> 24#include <nvgpu/error_notifier.h>
24#include <nvgpu/dma.h> 25#include <nvgpu/dma.h>
25 26
26#include "vgpu.h" 27#include "vgpu.h"
27#include "gr_vgpu.h" 28#include "gr_vgpu.h"
28#include "gk20a/dbg_gpu_gk20a.h" 29#include "gk20a/dbg_gpu_gk20a.h"
30#include "gk20a/channel_gk20a.h"
31#include "gk20a/tsg_gk20a.h"
29 32
30#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> 33#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
34#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
31 35
32void vgpu_gr_detect_sm_arch(struct gk20a *g) 36void vgpu_gr_detect_sm_arch(struct gk20a *g)
33{ 37{
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
152 struct tegra_vgpu_cmd_msg msg; 156 struct tegra_vgpu_cmd_msg msg;
153 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; 157 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
154 struct vm_gk20a *ch_vm = c->vm; 158 struct vm_gk20a *ch_vm = c->vm;
155 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 159 struct tsg_gk20a *tsg;
156 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 160 u64 *g_bfr_va;
161 u64 *g_bfr_size;
157 struct gr_gk20a *gr = &g->gr; 162 struct gr_gk20a *gr = &g->gr;
158 u64 gpu_va; 163 u64 gpu_va;
159 u32 i; 164 u32 i;
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
161 166
162 gk20a_dbg_fn(""); 167 gk20a_dbg_fn("");
163 168
164 /* FIXME: add VPR support */ 169 tsg = tsg_gk20a_from_ch(c);
170 if (!tsg)
171 return -EINVAL;
172
173 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
174 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
165 175
166 /* Circular Buffer */ 176 /* Circular Buffer */
167 gpu_va = __nvgpu_vm_alloc_va(ch_vm, 177 gpu_va = __nvgpu_vm_alloc_va(ch_vm,
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
213 if (err || msg.ret) 223 if (err || msg.ret)
214 goto clean_up; 224 goto clean_up;
215 225
216 c->ch_ctx.global_ctx_buffer_mapped = true; 226 tsg->gr_ctx.global_ctx_buffer_mapped = true;
217 return 0; 227 return 0;
218 228
219 clean_up: 229 clean_up:
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
227 return -ENOMEM; 237 return -ENOMEM;
228} 238}
229 239
230static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) 240static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg)
231{ 241{
232 struct vm_gk20a *ch_vm = c->vm; 242 struct vm_gk20a *ch_vm = tsg->vm;
233 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 243 u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
234 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 244 u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
235 u32 i; 245 u32 i;
236 246
237 gk20a_dbg_fn(""); 247 gk20a_dbg_fn("");
238 248
239 if (c->ch_ctx.global_ctx_buffer_mapped) { 249 if (tsg->gr_ctx.global_ctx_buffer_mapped) {
240 struct tegra_vgpu_cmd_msg msg; 250 /* server will unmap on channel close */
241 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
242 int err;
243 251
244 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; 252 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
245 msg.handle = vgpu_get_handle(c->g); 253 if (g_bfr_va[i]) {
246 p->handle = c->virt_ctx; 254 __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
247 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 255 gmmu_page_size_kernel);
248 WARN_ON(err || msg.ret); 256 g_bfr_va[i] = 0;
249 } 257 g_bfr_size[i] = 0;
250 258 }
251 for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
252 if (g_bfr_va[i]) {
253 __nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
254 gmmu_page_size_kernel);
255 g_bfr_va[i] = 0;
256 g_bfr_size[i] = 0;
257 } 259 }
260
261 tsg->gr_ctx.global_ctx_buffer_mapped = false;
258 } 262 }
259 c->ch_ctx.global_ctx_buffer_mapped = false;
260} 263}
261 264
262int vgpu_gr_alloc_gr_ctx(struct gk20a *g, 265int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
263 struct gr_ctx_desc **__gr_ctx, 266 struct nvgpu_gr_ctx *gr_ctx,
264 struct vm_gk20a *vm, 267 struct vm_gk20a *vm,
265 u32 class, 268 u32 class,
266 u32 flags) 269 u32 flags)
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
268 struct tegra_vgpu_cmd_msg msg = {0}; 271 struct tegra_vgpu_cmd_msg msg = {0};
269 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; 272 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
270 struct gr_gk20a *gr = &g->gr; 273 struct gr_gk20a *gr = &g->gr;
271 struct gr_ctx_desc *gr_ctx;
272 int err; 274 int err;
273 275
274 gk20a_dbg_fn(""); 276 gk20a_dbg_fn("");
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
280 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 282 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
281 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 283 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
282 284
283 gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
284 if (!gr_ctx)
285 return -ENOMEM;
286
287 gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
288 gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, 285 gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
289 gr_ctx->mem.size, 286 gr->ctx_vars.buffer_total_size,
290 gmmu_page_size_kernel); 287 gmmu_page_size_kernel);
291 288
292 if (!gr_ctx->mem.gpu_va) { 289 if (!gr_ctx->mem.gpu_va)
293 nvgpu_kfree(g, gr_ctx);
294 return -ENOMEM; 290 return -ENOMEM;
295 } 291 gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
292 gr_ctx->mem.aperture = APERTURE_SYSMEM;
296 293
297 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; 294 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC;
298 msg.handle = vgpu_get_handle(g); 295 msg.handle = vgpu_get_handle(g);
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
306 nvgpu_err(g, "fail to alloc gr_ctx"); 303 nvgpu_err(g, "fail to alloc gr_ctx");
307 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, 304 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
308 gmmu_page_size_kernel); 305 gmmu_page_size_kernel);
309 nvgpu_kfree(g, gr_ctx); 306 gr_ctx->mem.aperture = APERTURE_INVALID;
310 } else { 307 } else {
311 gr_ctx->virt_ctx = p->gr_ctx_handle; 308 gr_ctx->virt_ctx = p->gr_ctx_handle;
312 *__gr_ctx = gr_ctx;
313 } 309 }
314 310
315 return err; 311 return err;
316} 312}
317 313
318void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
319 struct gr_ctx_desc *gr_ctx)
320{
321 struct tegra_vgpu_cmd_msg msg;
322 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
323 int err;
324
325 gk20a_dbg_fn("");
326
327 if (!gr_ctx || !gr_ctx->mem.gpu_va)
328 return;
329
330
331 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
332 msg.handle = vgpu_get_handle(g);
333 p->gr_ctx_handle = gr_ctx->virt_ctx;
334 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
335 WARN_ON(err || msg.ret);
336
337 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
338 gmmu_page_size_kernel);
339
340 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
341 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
342 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
343 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
344
345 nvgpu_kfree(g, gr_ctx);
346}
347
348static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c)
349{
350 gk20a_dbg_fn("");
351
352 c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
353 c->ch_ctx.gr_ctx = NULL;
354}
355
356static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, 314static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
357 struct channel_gk20a *c) 315 struct channel_gk20a *c)
358{ 316{
359 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 317 struct tsg_gk20a *tsg;
318 struct patch_desc *patch_ctx;
360 struct vm_gk20a *ch_vm = c->vm; 319 struct vm_gk20a *ch_vm = c->vm;
361 struct tegra_vgpu_cmd_msg msg; 320 struct tegra_vgpu_cmd_msg msg;
362 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; 321 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
364 323
365 gk20a_dbg_fn(""); 324 gk20a_dbg_fn("");
366 325
326 tsg = tsg_gk20a_from_ch(c);
327 if (!tsg)
328 return -EINVAL;
329
330 patch_ctx = &tsg->gr_ctx.patch_ctx;
367 patch_ctx->mem.size = 128 * sizeof(u32); 331 patch_ctx->mem.size = 128 * sizeof(u32);
368 patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, 332 patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
369 patch_ctx->mem.size, 333 patch_ctx->mem.size,
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
385 return err; 349 return err;
386} 350}
387 351
388static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) 352static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg)
389{ 353{
390 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 354 struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx;
391 struct vm_gk20a *ch_vm = c->vm;
392 355
393 gk20a_dbg_fn(""); 356 gk20a_dbg_fn("");
394 357
395 if (patch_ctx->mem.gpu_va) { 358 if (patch_ctx->mem.gpu_va) {
396 struct tegra_vgpu_cmd_msg msg; 359 /* server will free on channel close */
397 struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx;
398 int err;
399 360
400 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; 361 __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va,
401 msg.handle = vgpu_get_handle(c->g);
402 p->handle = c->virt_ctx;
403 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
404 WARN_ON(err || msg.ret);
405
406 __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
407 gmmu_page_size_kernel); 362 gmmu_page_size_kernel);
408 patch_ctx->mem.gpu_va = 0; 363 patch_ctx->mem.gpu_va = 0;
409 } 364 }
410} 365}
411 366
412static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) 367static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg)
413{ 368{
414 struct tegra_vgpu_cmd_msg msg; 369 struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx;
415 struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx;
416 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
417 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 370 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx;
418 int err;
419 371
420 gk20a_dbg_fn(""); 372 gk20a_dbg_fn("");
421 373
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
423 if (pm_ctx->mem.gpu_va == 0) 375 if (pm_ctx->mem.gpu_va == 0)
424 return; 376 return;
425 377
426 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; 378 /* server will free on channel close */
427 msg.handle = vgpu_get_handle(c->g);
428 p->handle = c->virt_ctx;
429 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
430 WARN_ON(err || msg.ret);
431 379
432 __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, 380 __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va,
433 gmmu_page_size_kernel); 381 gmmu_page_size_kernel);
434 pm_ctx->mem.gpu_va = 0; 382 pm_ctx->mem.gpu_va = 0;
435} 383}
436 384
437void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) 385void vgpu_gr_free_gr_ctx(struct gk20a *g,
386 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
438{ 387{
388 struct tsg_gk20a *tsg;
389
439 gk20a_dbg_fn(""); 390 gk20a_dbg_fn("");
440 391
441 if (c->g->ops.fifo.free_channel_ctx_header) 392 if (gr_ctx->mem.gpu_va) {
442 c->g->ops.fifo.free_channel_ctx_header(c); 393 struct tegra_vgpu_cmd_msg msg;
443 vgpu_gr_unmap_global_ctx_buffers(c); 394 struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx;
444 vgpu_gr_free_channel_patch_ctx(c); 395 int err;
445 vgpu_gr_free_channel_pm_ctx(c);
446 if (!is_tsg)
447 vgpu_gr_free_channel_gr_ctx(c);
448 396
449 /* zcull_ctx, pm_ctx */ 397 msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE;
398 msg.handle = vgpu_get_handle(g);
399 p->gr_ctx_handle = gr_ctx->virt_ctx;
400 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
401 WARN_ON(err || msg.ret);
450 402
451 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); 403 __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
404 gmmu_page_size_kernel);
405
406 tsg = &g->fifo.tsg[gr_ctx->tsgid];
407 vgpu_gr_unmap_global_ctx_buffers(tsg);
408 vgpu_gr_free_channel_patch_ctx(tsg);
409 vgpu_gr_free_channel_pm_ctx(tsg);
410
411 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
412 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
413 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
414 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
452 415
453 c->first_init = false; 416 memset(gr_ctx, 0, sizeof(*gr_ctx));
417 }
454} 418}
455 419
456static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) 420static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
457{ 421{
458 struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; 422 struct tsg_gk20a *tsg;
423 struct nvgpu_gr_ctx *gr_ctx;
459 struct tegra_vgpu_cmd_msg msg = {0}; 424 struct tegra_vgpu_cmd_msg msg = {0};
460 struct tegra_vgpu_channel_bind_gr_ctx_params *p = 425 struct tegra_vgpu_channel_bind_gr_ctx_params *p =
461 &msg.params.ch_bind_gr_ctx; 426 &msg.params.ch_bind_gr_ctx;
462 int err; 427 int err;
463 428
429 tsg = tsg_gk20a_from_ch(c);
430 if (!tsg)
431 return -EINVAL;
432
433 gr_ctx = &tsg->gr_ctx;
434
464 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; 435 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX;
465 msg.handle = vgpu_get_handle(c->g); 436 msg.handle = vgpu_get_handle(c->g);
466 p->ch_handle = c->virt_ctx; 437 p->ch_handle = c->virt_ctx;
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c)
474 445
475static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) 446static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg)
476{ 447{
477 struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; 448 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
478 struct tegra_vgpu_cmd_msg msg = {0}; 449 struct tegra_vgpu_cmd_msg msg = {0};
479 struct tegra_vgpu_tsg_bind_gr_ctx_params *p = 450 struct tegra_vgpu_tsg_bind_gr_ctx_params *p =
480 &msg.params.tsg_bind_gr_ctx; 451 &msg.params.tsg_bind_gr_ctx;
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
495{ 466{
496 struct gk20a *g = c->g; 467 struct gk20a *g = c->g;
497 struct fifo_gk20a *f = &g->fifo; 468 struct fifo_gk20a *f = &g->fifo;
498 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 469 struct nvgpu_gr_ctx *gr_ctx = NULL;
499 struct tsg_gk20a *tsg = NULL; 470 struct tsg_gk20a *tsg = NULL;
500 int err = 0; 471 int err = 0;
501 472
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
515 } 486 }
516 c->obj_class = class_num; 487 c->obj_class = class_num;
517 488
518 if (gk20a_is_channel_marked_as_tsg(c)) 489 if (!gk20a_is_channel_marked_as_tsg(c))
519 tsg = &f->tsg[c->tsgid]; 490 return -EINVAL;
520 491
521 if (!tsg) { 492 tsg = &f->tsg[c->tsgid];
522 /* allocate gr ctx buffer */ 493 gr_ctx = &tsg->gr_ctx;
523 if (!ch_ctx->gr_ctx) { 494
524 err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, 495 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
525 c->vm, 496 tsg->vm = c->vm;
526 class_num, 497 nvgpu_vm_get(tsg->vm);
527 flags); 498 err = g->ops.gr.alloc_gr_ctx(g, gr_ctx,
528 if (!err) 499 c->vm,
529 err = vgpu_gr_ch_bind_gr_ctx(c); 500 class_num,
530 if (err) { 501 flags);
531 nvgpu_err(g, "fail to allocate gr ctx buffer"); 502 if (!err)
532 goto out; 503 err = vgpu_gr_tsg_bind_gr_ctx(tsg);
533 } 504 if (err) {
534 } else {
535 /*TBD: needs to be more subtle about which is
536 * being allocated as some are allowed to be
537 * allocated along same channel */
538 nvgpu_err(g, 505 nvgpu_err(g,
539 "too many classes alloc'd on same channel"); 506 "fail to allocate TSG gr ctx buffer, err=%d", err);
540 err = -EINVAL; 507 nvgpu_vm_put(tsg->vm);
508 tsg->vm = NULL;
541 goto out; 509 goto out;
542 } 510 }
543 } else {
544 if (!tsg->tsg_gr_ctx) {
545 tsg->vm = c->vm;
546 nvgpu_vm_get(tsg->vm);
547 err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx,
548 c->vm,
549 class_num,
550 flags);
551 if (!err)
552 err = vgpu_gr_tsg_bind_gr_ctx(tsg);
553 if (err) {
554 nvgpu_err(g,
555 "fail to allocate TSG gr ctx buffer, err=%d", err);
556 nvgpu_vm_put(tsg->vm);
557 tsg->vm = NULL;
558 goto out;
559 }
560 }
561 511
562 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
563 err = vgpu_gr_ch_bind_gr_ctx(c); 512 err = vgpu_gr_ch_bind_gr_ctx(c);
564 if (err) { 513 if (err) {
565 nvgpu_err(g, "fail to bind gr ctx buffer"); 514 nvgpu_err(g, "fail to bind gr ctx buffer");
566 goto out; 515 goto out;
567 } 516 }
568 }
569 517
570 /* commit gr ctx buffer */ 518 /* commit gr ctx buffer */
571 err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 519 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
572 if (err) { 520 if (err) {
573 nvgpu_err(g, "fail to commit gr ctx buffer"); 521 nvgpu_err(g, "fail to commit gr ctx buffer");
574 goto out; 522 goto out;
575 } 523 }
576 524
577 /* allocate patch buffer */ 525 /* allocate patch buffer */
578 if (ch_ctx->patch_ctx.mem.priv.pages == NULL) {
579 err = vgpu_gr_alloc_channel_patch_ctx(g, c); 526 err = vgpu_gr_alloc_channel_patch_ctx(g, c);
580 if (err) { 527 if (err) {
581 nvgpu_err(g, "fail to allocate patch buffer"); 528 nvgpu_err(g, "fail to allocate patch buffer");
582 goto out; 529 goto out;
583 } 530 }
584 }
585 531
586 /* map global buffer to channel gpu_va and commit */ 532 /* map global buffer to channel gpu_va and commit */
587 if (!ch_ctx->global_ctx_buffer_mapped) {
588 err = vgpu_gr_map_global_ctx_buffers(g, c); 533 err = vgpu_gr_map_global_ctx_buffers(g, c);
589 if (err) { 534 if (err) {
590 nvgpu_err(g, "fail to map global ctx buffer"); 535 nvgpu_err(g, "fail to map global ctx buffer");
591 goto out; 536 goto out;
592 } 537 }
593 vgpu_gr_commit_global_ctx_buffers(g, c, true);
594 }
595 538
596 /* load golden image */ 539 err = vgpu_gr_commit_global_ctx_buffers(g, c, true);
597 if (!c->first_init) { 540 if (err) {
541 nvgpu_err(g, "fail to commit global ctx buffers");
542 goto out;
543 }
544
545 /* load golden image */
598 err = gr_gk20a_elpg_protected_call(g, 546 err = gr_gk20a_elpg_protected_call(g,
599 vgpu_gr_load_golden_ctx_image(g, c)); 547 vgpu_gr_load_golden_ctx_image(g, c));
600 if (err) { 548 if (err) {
601 nvgpu_err(g, "fail to load golden ctx image"); 549 nvgpu_err(g, "fail to load golden ctx image");
602 goto out; 550 goto out;
603 } 551 }
604 c->first_init = true; 552 } else {
553 err = vgpu_gr_ch_bind_gr_ctx(c);
554 if (err) {
555 nvgpu_err(g, "fail to bind gr ctx buffer");
556 goto out;
557 }
558
559 /* commit gr ctx buffer */
560 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
561 if (err) {
562 nvgpu_err(g, "fail to commit gr ctx buffer");
563 goto out;
564 }
605 } 565 }
606 566
567 /* PM ctxt switch is off by default */
568 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
569
607 gk20a_dbg_fn("done"); 570 gk20a_dbg_fn("done");
608 return 0; 571 return 0;
609out: 572out:
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g,
1055int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, 1018int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1056 struct channel_gk20a *ch, bool enable) 1019 struct channel_gk20a *ch, bool enable)
1057{ 1020{
1058 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 1021 struct tsg_gk20a *tsg;
1059 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1022 struct nvgpu_gr_ctx *ch_ctx;
1023 struct pm_ctx_desc *pm_ctx;
1060 struct tegra_vgpu_cmd_msg msg; 1024 struct tegra_vgpu_cmd_msg msg;
1061 struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; 1025 struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode;
1062 int err; 1026 int err;
1063 1027
1064 gk20a_dbg_fn(""); 1028 gk20a_dbg_fn("");
1065 1029
1030 tsg = tsg_gk20a_from_ch(ch);
1031 if (!tsg)
1032 return -EINVAL;
1033
1034 ch_ctx = &tsg->gr_ctx;
1035 pm_ctx = &ch_ctx->pm_ctx;
1036
1066 if (enable) { 1037 if (enable) {
1038 /*
1039 * send command to enable HWPM only once - otherwise server
1040 * will return an error due to using the same GPU VA twice.
1041 */
1042 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1043 return 0;
1044
1067 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; 1045 p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW;
1068 1046
1069 /* Allocate buffer if necessary */ 1047 /* Allocate buffer if necessary */
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1076 return -ENOMEM; 1054 return -ENOMEM;
1077 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; 1055 pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size;
1078 } 1056 }
1079 } else 1057 } else {
1058 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f())
1059 return 0;
1060
1080 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; 1061 p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW;
1062 }
1081 1063
1082 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; 1064 msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE;
1083 msg.handle = vgpu_get_handle(g); 1065 msg.handle = vgpu_get_handle(g);
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
1086 1068
1087 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 1069 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
1088 WARN_ON(err || msg.ret); 1070 WARN_ON(err || msg.ret);
1071 err = err ? err : msg.ret;
1072 if (!err)
1073 pm_ctx->pm_mode = enable ?
1074 ctxsw_prog_main_image_pm_mode_ctxsw_f() :
1075 ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
1089 1076
1090 return err ? err : msg.ret; 1077 return err;
1091} 1078}
1092 1079
1093int vgpu_gr_clear_sm_error_state(struct gk20a *g, 1080int vgpu_gr_clear_sm_error_state(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
index 16aa92a9..4b81da91 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h
@@ -29,6 +29,7 @@ struct dbg_session_gk20a;
29 29
30void vgpu_gr_detect_sm_arch(struct gk20a *g); 30void vgpu_gr_detect_sm_arch(struct gk20a *g);
31void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); 31void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
32void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg);
32int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); 33int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
33int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, 34int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
34 struct channel_gk20a *c, u64 zcull_va, 35 struct channel_gk20a *c, u64 zcull_va,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
index 968eae10..132ce6e5 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
131 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 131 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
132 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, 132 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
133 .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, 133 .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask,
134 .free_channel_ctx = vgpu_gr_free_channel_ctx,
135 .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, 134 .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
136 .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, 135 .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
137 .get_zcull_info = vgpu_gr_get_zcull_info, 136 .get_zcull_info = vgpu_gr_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
index d59f0381..a0099f03 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c
@@ -21,7 +21,7 @@
21 21
22int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) 22int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
23{ 23{
24 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 24 struct ctx_header_desc *ctx = &c->ctx_header;
25 struct tegra_vgpu_cmd_msg msg = {}; 25 struct tegra_vgpu_cmd_msg msg = {};
26 struct tegra_vgpu_alloc_ctx_header_params *p = 26 struct tegra_vgpu_alloc_ctx_header_params *p =
27 &msg.params.alloc_ctx_header; 27 &msg.params.alloc_ctx_header;
@@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c)
52 52
53void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) 53void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c)
54{ 54{
55 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 55 struct ctx_header_desc *ctx = &c->ctx_header;
56 struct tegra_vgpu_cmd_msg msg = {}; 56 struct tegra_vgpu_cmd_msg msg = {};
57 struct tegra_vgpu_free_ctx_header_params *p = 57 struct tegra_vgpu_free_ctx_header_params *p =
58 &msg.params.free_ctx_header; 58 &msg.params.free_ctx_header;
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
index 8c306ea0..20624240 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h
@@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info);
79int vgpu_gr_nonstall_isr(struct gk20a *g, 79int vgpu_gr_nonstall_isr(struct gk20a *g,
80 struct tegra_vgpu_gr_nonstall_intr_info *info); 80 struct tegra_vgpu_gr_nonstall_intr_info *info);
81int vgpu_gr_alloc_gr_ctx(struct gk20a *g, 81int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
82 struct gr_ctx_desc **__gr_ctx, 82 struct nvgpu_gr_ctx *gr_ctx,
83 struct vm_gk20a *vm, 83 struct vm_gk20a *vm,
84 u32 class, 84 u32 class,
85 u32 flags); 85 u32 flags);
86void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, 86void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
87 struct gr_ctx_desc *gr_ctx); 87 struct nvgpu_gr_ctx *gr_ctx);
88void vgpu_gr_handle_sm_esr_event(struct gk20a *g, 88void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
89 struct tegra_vgpu_sm_esr_info *info); 89 struct tegra_vgpu_sm_esr_info *info);
90int vgpu_gr_init_ctx_state(struct gk20a *g); 90int vgpu_gr_init_ctx_state(struct gk20a *g);
@@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g,
141 return 0; 141 return 0;
142} 142}
143static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, 143static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
144 struct gr_ctx_desc **__gr_ctx, 144 struct nvgpu_gr_ctx *gr_ctx,
145 struct vm_gk20a *vm, 145 struct vm_gk20a *vm,
146 u32 class, 146 u32 class,
147 u32 flags) 147 u32 flags)
@@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
149 return -ENOSYS; 149 return -ENOSYS;
150} 150}
151static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, 151static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
152 struct gr_ctx_desc *gr_ctx) 152 struct nvgpu_gr_ctx *gr_ctx)
153{ 153{
154} 154}
155static inline int vgpu_gr_init_ctx_state(struct gk20a *g) 155static inline int vgpu_gr_init_ctx_state(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 16d4711f..64266fe5 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -259,7 +259,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
259 259
260 ch->g->ops.fifo.disable_channel(ch); 260 ch->g->ops.fifo.disable_channel(ch);
261 261
262 if (channel_preempt && ch->ch_ctx.gr_ctx) 262 if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch))
263 ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); 263 ch->g->ops.fifo.preempt_channel(ch->g, ch->chid);
264 264
265 gk20a_channel_abort_clean_up(ch); 265 gk20a_channel_abort_clean_up(ch);
@@ -421,8 +421,8 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
421 g->ops.fecs_trace.unbind_channel(g, ch); 421 g->ops.fecs_trace.unbind_channel(g, ch);
422#endif 422#endif
423 423
424 /* release channel ctx */ 424 if(g->ops.fifo.free_channel_ctx_header)
425 g->ops.gr.free_channel_ctx(ch, was_tsg); 425 g->ops.fifo.free_channel_ctx_header(ch);
426 426
427 gk20a_gr_flush_channel_tlb(gr); 427 gk20a_gr_flush_channel_tlb(gr);
428 428
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index c13b1c58..29fa302f 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -31,7 +31,6 @@
31#include <nvgpu/atomic.h> 31#include <nvgpu/atomic.h>
32 32
33struct gk20a; 33struct gk20a;
34struct gr_gk20a;
35struct dbg_session_gk20a; 34struct dbg_session_gk20a;
36struct gk20a_fence; 35struct gk20a_fence;
37struct fifo_profile_gk20a; 36struct fifo_profile_gk20a;
@@ -50,10 +49,6 @@ struct fifo_profile_gk20a;
50#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) 49#define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1)
51#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) 50#define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2)
52 51
53/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
54#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
55#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
56
57struct notification { 52struct notification {
58 struct { 53 struct {
59 u32 nanoseconds[2]; 54 u32 nanoseconds[2];
@@ -63,19 +58,6 @@ struct notification {
63 u16 status; 58 u16 status;
64}; 59};
65 60
66/* contexts associated with a channel */
67struct channel_ctx_gk20a {
68 struct gr_ctx_desc *gr_ctx;
69 struct patch_desc patch_ctx;
70 struct zcull_ctx_desc zcull_ctx;
71 struct pm_ctx_desc pm_ctx;
72 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
73 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
74 int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
75 bool global_ctx_buffer_mapped;
76 struct ctx_header_desc ctx_header;
77};
78
79struct channel_gk20a_job { 61struct channel_gk20a_job {
80 struct nvgpu_mapped_buf **mapped_buffers; 62 struct nvgpu_mapped_buf **mapped_buffers;
81 int num_mapped_buffers; 63 int num_mapped_buffers;
@@ -190,7 +172,6 @@ struct channel_gk20a {
190 int chid; 172 int chid;
191 bool wdt_enabled; 173 bool wdt_enabled;
192 nvgpu_atomic_t bound; 174 nvgpu_atomic_t bound;
193 bool first_init;
194 bool vpr; 175 bool vpr;
195 bool deterministic; 176 bool deterministic;
196 /* deterministic, but explicitly idle and submits disallowed */ 177 /* deterministic, but explicitly idle and submits disallowed */
@@ -210,8 +191,6 @@ struct channel_gk20a {
210 191
211 struct gpfifo_desc gpfifo; 192 struct gpfifo_desc gpfifo;
212 193
213 struct channel_ctx_gk20a ch_ctx;
214
215 struct nvgpu_mem inst_block; 194 struct nvgpu_mem inst_block;
216 195
217 u64 userd_iova; 196 u64 userd_iova;
@@ -262,6 +241,8 @@ struct channel_gk20a {
262 struct channel_t19x t19x; 241 struct channel_t19x t19x;
263#endif 242#endif
264 243
244 struct ctx_header_desc ctx_header;
245
265 /* Any operating system specific data. */ 246 /* Any operating system specific data. */
266 void *os_priv; 247 void *os_priv;
267}; 248};
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index d283a82e..409661fc 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -625,9 +625,10 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
625 u32 lo; 625 u32 lo;
626 u32 hi; 626 u32 hi;
627 u64 pa; 627 u64 pa;
628 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 628 struct tsg_gk20a *tsg;
629 struct nvgpu_gr_ctx *ch_ctx;
629 struct gk20a_fecs_trace *trace = g->fecs_trace; 630 struct gk20a_fecs_trace *trace = g->fecs_trace;
630 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 631 struct nvgpu_mem *mem;
631 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); 632 u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
632 pid_t pid; 633 pid_t pid;
633 u32 aperture; 634 u32 aperture;
@@ -637,6 +638,13 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
637 ch->chid, context_ptr, 638 ch->chid, context_ptr,
638 nvgpu_inst_block_addr(g, &ch->inst_block)); 639 nvgpu_inst_block_addr(g, &ch->inst_block));
639 640
641 tsg = tsg_gk20a_from_ch(ch);
642 if (!tsg)
643 return -EINVAL;
644
645 ch_ctx = &tsg->gr_ctx;
646 mem = &ch_ctx->mem;
647
640 if (!trace) 648 if (!trace)
641 return -ENOMEM; 649 return -ENOMEM;
642 650
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 070b26b6..685976b1 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -187,16 +187,16 @@ struct gpu_ops {
187 void (*cb_size_default)(struct gk20a *g); 187 void (*cb_size_default)(struct gk20a *g);
188 int (*calc_global_ctx_buffer_size)(struct gk20a *g); 188 int (*calc_global_ctx_buffer_size)(struct gk20a *g);
189 void (*commit_global_attrib_cb)(struct gk20a *g, 189 void (*commit_global_attrib_cb)(struct gk20a *g,
190 struct channel_ctx_gk20a *ch_ctx, 190 struct nvgpu_gr_ctx *ch_ctx,
191 u64 addr, bool patch); 191 u64 addr, bool patch);
192 void (*commit_global_bundle_cb)(struct gk20a *g, 192 void (*commit_global_bundle_cb)(struct gk20a *g,
193 struct channel_ctx_gk20a *ch_ctx, 193 struct nvgpu_gr_ctx *ch_ctx,
194 u64 addr, u64 size, bool patch); 194 u64 addr, u64 size, bool patch);
195 int (*commit_global_cb_manager)(struct gk20a *g, 195 int (*commit_global_cb_manager)(struct gk20a *g,
196 struct channel_gk20a *ch, 196 struct channel_gk20a *ch,
197 bool patch); 197 bool patch);
198 void (*commit_global_pagepool)(struct gk20a *g, 198 void (*commit_global_pagepool)(struct gk20a *g,
199 struct channel_ctx_gk20a *ch_ctx, 199 struct nvgpu_gr_ctx *ch_ctx,
200 u64 addr, u32 size, bool patch); 200 u64 addr, u32 size, bool patch);
201 void (*init_gpc_mmu)(struct gk20a *g); 201 void (*init_gpc_mmu)(struct gk20a *g);
202 int (*handle_sw_method)(struct gk20a *g, u32 addr, 202 int (*handle_sw_method)(struct gk20a *g, u32 addr,
@@ -230,7 +230,6 @@ struct gpu_ops {
230 int (*load_ctxsw_ucode)(struct gk20a *g); 230 int (*load_ctxsw_ucode)(struct gk20a *g);
231 u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); 231 u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
232 void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); 232 void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index);
233 void (*free_channel_ctx)(struct channel_gk20a *c, bool is_tsg);
234 int (*alloc_obj_ctx)(struct channel_gk20a *c, 233 int (*alloc_obj_ctx)(struct channel_gk20a *c,
235 u32 class_num, u32 flags); 234 u32 class_num, u32 flags);
236 int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, 235 int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr,
@@ -285,13 +284,12 @@ struct gpu_ops {
285 u32 (*pagepool_default_size)(struct gk20a *g); 284 u32 (*pagepool_default_size)(struct gk20a *g);
286 int (*init_ctx_state)(struct gk20a *g); 285 int (*init_ctx_state)(struct gk20a *g);
287 int (*alloc_gr_ctx)(struct gk20a *g, 286 int (*alloc_gr_ctx)(struct gk20a *g,
288 struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, 287 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
289 u32 class, u32 padding); 288 u32 class, u32 padding);
290 void (*free_gr_ctx)(struct gk20a *g, 289 void (*free_gr_ctx)(struct gk20a *g,
291 struct vm_gk20a *vm, 290 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
292 struct gr_ctx_desc *gr_ctx);
293 void (*update_ctxsw_preemption_mode)(struct gk20a *g, 291 void (*update_ctxsw_preemption_mode)(struct gk20a *g,
294 struct channel_ctx_gk20a *ch_ctx, 292 struct channel_gk20a *c,
295 struct nvgpu_mem *mem); 293 struct nvgpu_mem *mem);
296 int (*update_smpc_ctxsw_mode)(struct gk20a *g, 294 int (*update_smpc_ctxsw_mode)(struct gk20a *g,
297 struct channel_gk20a *c, 295 struct channel_gk20a *c,
@@ -384,14 +382,14 @@ struct gpu_ops {
384 int (*get_preemption_mode_flags)(struct gk20a *g, 382 int (*get_preemption_mode_flags)(struct gk20a *g,
385 struct nvgpu_preemption_modes_rec *preemption_modes_rec); 383 struct nvgpu_preemption_modes_rec *preemption_modes_rec);
386 int (*set_ctxsw_preemption_mode)(struct gk20a *g, 384 int (*set_ctxsw_preemption_mode)(struct gk20a *g,
387 struct gr_ctx_desc *gr_ctx, 385 struct nvgpu_gr_ctx *gr_ctx,
388 struct vm_gk20a *vm, u32 class, 386 struct vm_gk20a *vm, u32 class,
389 u32 graphics_preempt_mode, 387 u32 graphics_preempt_mode,
390 u32 compute_preempt_mode); 388 u32 compute_preempt_mode);
391 int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); 389 int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost);
392 void (*update_boosted_ctx)(struct gk20a *g, 390 void (*update_boosted_ctx)(struct gk20a *g,
393 struct nvgpu_mem *mem, 391 struct nvgpu_mem *mem,
394 struct gr_ctx_desc *gr_ctx); 392 struct nvgpu_gr_ctx *gr_ctx);
395 int (*init_sm_id_table)(struct gk20a *g); 393 int (*init_sm_id_table)(struct gk20a *g);
396 int (*load_smid_config)(struct gk20a *g); 394 int (*load_smid_config)(struct gk20a *g);
397 void (*program_sm_id_numbering)(struct gk20a *g, 395 void (*program_sm_id_numbering)(struct gk20a *g,
@@ -440,7 +438,7 @@ struct gpu_ops {
440 u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); 438 u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g);
441 u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); 439 u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g);
442 void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, 440 void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm,
443 struct gr_ctx_desc *gr_ctx); 441 struct nvgpu_gr_ctx *gr_ctx);
444 } gr; 442 } gr;
445 struct { 443 struct {
446 void (*init_hw)(struct gk20a *g); 444 void (*init_hw)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 263ae030..f8af091b 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -85,18 +85,19 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
85static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); 85static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g);
86static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, 86static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
87 struct channel_gk20a *c); 87 struct channel_gk20a *c);
88static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); 88static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
89 89 struct vm_gk20a *vm,
90/* channel gr ctx buffer */ 90 struct nvgpu_gr_ctx *gr_ctx);
91static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, 91static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
92 struct channel_gk20a *c, 92 struct vm_gk20a *vm,
93 u32 class, u32 padding); 93 struct nvgpu_gr_ctx *gr_ctx);
94static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c);
95 94
96/* channel patch ctx buffer */ 95/* channel patch ctx buffer */
97static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, 96static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
98 struct channel_gk20a *c); 97 struct channel_gk20a *c);
99static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); 98static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
99 struct vm_gk20a *vm,
100 struct nvgpu_gr_ctx *gr_ctx);
100 101
101/* golden ctx image */ 102/* golden ctx image */
102static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, 103static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
@@ -108,8 +109,16 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
108 struct channel_gk20a *c, 109 struct channel_gk20a *c,
109 u32 *ctx_id) 110 u32 *ctx_id)
110{ 111{
111 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 112 struct tsg_gk20a *tsg;
112 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 113 struct nvgpu_gr_ctx *gr_ctx = NULL;
114 struct nvgpu_mem *mem = NULL;
115
116 tsg = tsg_gk20a_from_ch(c);
117 if (!tsg)
118 return -EINVAL;
119
120 gr_ctx = &tsg->gr_ctx;
121 mem = &gr_ctx->mem;
113 122
114 /* Channel gr_ctx buffer is gpu cacheable. 123 /* Channel gr_ctx buffer is gpu cacheable.
115 Flush and invalidate before cpu update. */ 124 Flush and invalidate before cpu update. */
@@ -671,62 +680,62 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va)
671 */ 680 */
672 681
673int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 682int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
674 struct channel_ctx_gk20a *ch_ctx, 683 struct nvgpu_gr_ctx *gr_ctx,
675 bool update_patch_count) 684 bool update_patch_count)
676{ 685{
677 int err = 0; 686 int err = 0;
678 687
679 err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); 688 err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem);
680 if (err) 689 if (err)
681 return err; 690 return err;
682 691
683 if (update_patch_count) { 692 if (update_patch_count) {
684 /* reset patch count if ucode has already processed it */ 693 /* reset patch count if ucode has already processed it */
685 ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, 694 gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
686 &ch_ctx->gr_ctx->mem, 695 &gr_ctx->mem,
687 ctxsw_prog_main_image_patch_count_o()); 696 ctxsw_prog_main_image_patch_count_o());
688 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", 697 nvgpu_log(g, gpu_dbg_info, "patch count reset to %d",
689 ch_ctx->patch_ctx.data_count); 698 gr_ctx->patch_ctx.data_count);
690 } 699 }
691 return 0; 700 return 0;
692} 701}
693 702
694void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 703void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
695 struct channel_ctx_gk20a *ch_ctx, 704 struct nvgpu_gr_ctx *gr_ctx,
696 bool update_patch_count) 705 bool update_patch_count)
697{ 706{
698 nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); 707 nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem);
699 708
700 /* Write context count to context image if it is mapped */ 709 /* Write context count to context image if it is mapped */
701 if (update_patch_count) { 710 if (update_patch_count) {
702 nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, 711 nvgpu_mem_wr(g, &gr_ctx->mem,
703 ctxsw_prog_main_image_patch_count_o(), 712 ctxsw_prog_main_image_patch_count_o(),
704 ch_ctx->patch_ctx.data_count); 713 gr_ctx->patch_ctx.data_count);
705 nvgpu_log(g, gpu_dbg_info, "write patch count %d", 714 nvgpu_log(g, gpu_dbg_info, "write patch count %d",
706 ch_ctx->patch_ctx.data_count); 715 gr_ctx->patch_ctx.data_count);
707 } 716 }
708} 717}
709 718
710void gr_gk20a_ctx_patch_write(struct gk20a *g, 719void gr_gk20a_ctx_patch_write(struct gk20a *g,
711 struct channel_ctx_gk20a *ch_ctx, 720 struct nvgpu_gr_ctx *gr_ctx,
712 u32 addr, u32 data, bool patch) 721 u32 addr, u32 data, bool patch)
713{ 722{
714 if (patch) { 723 if (patch) {
715 u32 patch_slot = ch_ctx->patch_ctx.data_count * 724 u32 patch_slot = gr_ctx->patch_ctx.data_count *
716 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; 725 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
717 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( 726 if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE(
718 ch_ctx->patch_ctx.mem.size) - 727 gr_ctx->patch_ctx.mem.size) -
719 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { 728 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) {
720 nvgpu_err(g, "failed to access patch_slot %d", 729 nvgpu_err(g, "failed to access patch_slot %d",
721 patch_slot); 730 patch_slot);
722 return; 731 return;
723 } 732 }
724 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); 733 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr);
725 nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); 734 nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data);
726 ch_ctx->patch_ctx.data_count++; 735 gr_ctx->patch_ctx.data_count++;
727 nvgpu_log(g, gpu_dbg_info, 736 nvgpu_log(g, gpu_dbg_info,
728 "patch addr = 0x%x data = 0x%x data_count %d", 737 "patch addr = 0x%x data = 0x%x data_count %d",
729 addr, data, ch_ctx->patch_ctx.data_count); 738 addr, data, gr_ctx->patch_ctx.data_count);
730 } else { 739 } else {
731 gk20a_writel(g, addr, data); 740 gk20a_writel(g, addr, data);
732 } 741 }
@@ -793,14 +802,22 @@ void gr_gk20a_write_pm_ptr(struct gk20a *g,
793 802
794static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) 803static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
795{ 804{
796 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 805 struct tsg_gk20a *tsg;
797 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 806 struct nvgpu_gr_ctx *gr_ctx = NULL;
798 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 807 struct nvgpu_mem *mem = NULL;
808 struct ctx_header_desc *ctx = &c->ctx_header;
799 struct nvgpu_mem *ctxheader = &ctx->mem; 809 struct nvgpu_mem *ctxheader = &ctx->mem;
800 int ret = 0; 810 int ret = 0;
801 811
802 gk20a_dbg_fn(""); 812 gk20a_dbg_fn("");
803 813
814 tsg = tsg_gk20a_from_ch(c);
815 if (!tsg)
816 return -EINVAL;
817
818 gr_ctx = &tsg->gr_ctx;
819 mem = &gr_ctx->mem;
820
804 if (nvgpu_mem_begin(g, mem)) 821 if (nvgpu_mem_begin(g, mem))
805 return -ENOMEM; 822 return -ENOMEM;
806 823
@@ -809,8 +826,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
809 goto clean_up_mem; 826 goto clean_up_mem;
810 } 827 }
811 828
812 if (ch_ctx->zcull_ctx.gpu_va == 0 && 829 if (gr_ctx->zcull_ctx.gpu_va == 0 &&
813 ch_ctx->zcull_ctx.ctx_sw_mode == 830 gr_ctx->zcull_ctx.ctx_sw_mode ==
814 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { 831 ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
815 ret = -EINVAL; 832 ret = -EINVAL;
816 goto clean_up; 833 goto clean_up;
@@ -830,13 +847,13 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
830 847
831 nvgpu_mem_wr(g, mem, 848 nvgpu_mem_wr(g, mem,
832 ctxsw_prog_main_image_zcull_o(), 849 ctxsw_prog_main_image_zcull_o(),
833 ch_ctx->zcull_ctx.ctx_sw_mode); 850 gr_ctx->zcull_ctx.ctx_sw_mode);
834 851
835 if (ctxheader->gpu_va) 852 if (ctxheader->gpu_va)
836 g->ops.gr.write_zcull_ptr(g, ctxheader, 853 g->ops.gr.write_zcull_ptr(g, ctxheader,
837 ch_ctx->zcull_ctx.gpu_va); 854 gr_ctx->zcull_ctx.gpu_va);
838 else 855 else
839 g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va); 856 g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va);
840 857
841 gk20a_enable_channel_tsg(g, c); 858 gk20a_enable_channel_tsg(g, c);
842 859
@@ -869,22 +886,29 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
869 struct channel_gk20a *c, bool patch) 886 struct channel_gk20a *c, bool patch)
870{ 887{
871 struct gr_gk20a *gr = &g->gr; 888 struct gr_gk20a *gr = &g->gr;
872 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 889 struct tsg_gk20a *tsg;
890 struct nvgpu_gr_ctx *gr_ctx = NULL;
873 u64 addr; 891 u64 addr;
874 u32 size; 892 u32 size;
875 893
876 gk20a_dbg_fn(""); 894 gk20a_dbg_fn("");
895
896 tsg = tsg_gk20a_from_ch(c);
897 if (!tsg)
898 return -EINVAL;
899
900 gr_ctx = &tsg->gr_ctx;
877 if (patch) { 901 if (patch) {
878 int err; 902 int err;
879 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); 903 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
880 if (err) 904 if (err)
881 return err; 905 return err;
882 } 906 }
883 907
884 /* global pagepool buffer */ 908 /* global pagepool buffer */
885 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> 909 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >>
886 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 910 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
887 (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << 911 (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) <<
888 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); 912 (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v()));
889 913
890 size = gr->global_ctx_buffer[PAGEPOOL].mem.size / 914 size = gr->global_ctx_buffer[PAGEPOOL].mem.size /
@@ -896,12 +920,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
896 gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", 920 gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d",
897 addr, size); 921 addr, size);
898 922
899 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); 923 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch);
900 924
901 /* global bundle cb */ 925 /* global bundle cb */
902 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> 926 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >>
903 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | 927 gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) |
904 (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << 928 (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) <<
905 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); 929 (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v()));
906 930
907 size = gr->bundle_cb_default_size; 931 size = gr->bundle_cb_default_size;
@@ -909,20 +933,20 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
909 gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", 933 gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d",
910 addr, size); 934 addr, size);
911 935
912 g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); 936 g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch);
913 937
914 /* global attrib cb */ 938 /* global attrib cb */
915 addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> 939 addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >>
916 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | 940 gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) |
917 (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << 941 (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) <<
918 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 942 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
919 943
920 gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); 944 gk20a_dbg_info("attrib cb addr : 0x%016llx", addr);
921 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); 945 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch);
922 g->ops.gr.commit_global_cb_manager(g, c, patch); 946 g->ops.gr.commit_global_cb_manager(g, c, patch);
923 947
924 if (patch) 948 if (patch)
925 gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); 949 gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
926 950
927 return 0; 951 return 0;
928} 952}
@@ -930,7 +954,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g,
930int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) 954int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
931{ 955{
932 struct gr_gk20a *gr = &g->gr; 956 struct gr_gk20a *gr = &g->gr;
933 struct channel_ctx_gk20a *ch_ctx = NULL; 957 struct nvgpu_gr_ctx *gr_ctx = NULL;
934 u32 gpm_pd_cfg; 958 u32 gpm_pd_cfg;
935 u32 pd_ab_dist_cfg0; 959 u32 pd_ab_dist_cfg0;
936 u32 ds_debug; 960 u32 ds_debug;
@@ -956,22 +980,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
956 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; 980 ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug;
957 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; 981 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug;
958 982
959 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); 983 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
960 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); 984 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false);
961 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); 985 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false);
962 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); 986 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
963 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); 987 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
964 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); 988 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
965 } else { 989 } else {
966 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; 990 gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg;
967 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; 991 pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0;
968 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; 992 ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug;
969 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; 993 mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug;
970 994
971 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); 995 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false);
972 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); 996 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false);
973 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); 997 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false);
974 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); 998 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false);
975 } 999 }
976 1000
977 return 0; 1001 return 0;
@@ -1360,13 +1384,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1360 struct channel_gk20a *c) 1384 struct channel_gk20a *c)
1361{ 1385{
1362 struct gr_gk20a *gr = &g->gr; 1386 struct gr_gk20a *gr = &g->gr;
1363 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1387 struct tsg_gk20a *tsg;
1388 struct nvgpu_gr_ctx *gr_ctx = NULL;
1364 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); 1389 u32 ctx_header_bytes = ctxsw_prog_fecs_header_v();
1365 u32 ctx_header_words; 1390 u32 ctx_header_words;
1366 u32 i; 1391 u32 i;
1367 u32 data; 1392 u32 data;
1368 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; 1393 struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem;
1369 struct nvgpu_mem *gr_mem = &ch_ctx->gr_ctx->mem; 1394 struct nvgpu_mem *gr_mem;
1370 u32 err = 0; 1395 u32 err = 0;
1371 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; 1396 struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
1372 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; 1397 struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
@@ -1374,6 +1399,13 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
1374 1399
1375 gk20a_dbg_fn(""); 1400 gk20a_dbg_fn("");
1376 1401
1402 tsg = tsg_gk20a_from_ch(c);
1403 if (!tsg)
1404 return -EINVAL;
1405
1406 gr_ctx = &tsg->gr_ctx;
1407 gr_mem = &gr_ctx->mem;
1408
1377 /* golden ctx is global to all channels. Although only the first 1409 /* golden ctx is global to all channels. Although only the first
1378 channel initializes golden image, driver needs to prevent multiple 1410 channel initializes golden image, driver needs to prevent multiple
1379 channels from initializing golden ctx at the same time */ 1411 channels from initializing golden ctx at the same time */
@@ -1565,7 +1597,7 @@ restore_fe_go_idle:
1565 1597
1566 g->ops.gr.write_zcull_ptr(g, gold_mem, 0); 1598 g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
1567 1599
1568 err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); 1600 err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
1569 if (err) 1601 if (err)
1570 goto clean_up; 1602 goto clean_up;
1571 1603
@@ -1614,20 +1646,25 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
1614 struct channel_gk20a *c, 1646 struct channel_gk20a *c,
1615 bool enable_smpc_ctxsw) 1647 bool enable_smpc_ctxsw)
1616{ 1648{
1617 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1649 struct tsg_gk20a *tsg;
1618 struct nvgpu_mem *mem; 1650 struct nvgpu_gr_ctx *gr_ctx = NULL;
1651 struct nvgpu_mem *mem = NULL;
1619 u32 data; 1652 u32 data;
1620 int ret; 1653 int ret;
1621 1654
1622 gk20a_dbg_fn(""); 1655 gk20a_dbg_fn("");
1623 1656
1624 if (!ch_ctx->gr_ctx) { 1657 tsg = tsg_gk20a_from_ch(c);
1658 if (!tsg)
1659 return -EINVAL;
1660
1661 gr_ctx = &tsg->gr_ctx;
1662 mem = &gr_ctx->mem;
1663 if (!nvgpu_mem_is_valid(mem)) {
1625 nvgpu_err(g, "no graphics context allocated"); 1664 nvgpu_err(g, "no graphics context allocated");
1626 return -EFAULT; 1665 return -EFAULT;
1627 } 1666 }
1628 1667
1629 mem = &ch_ctx->gr_ctx->mem;
1630
1631 ret = gk20a_disable_channel_tsg(g, c); 1668 ret = gk20a_disable_channel_tsg(g, c);
1632 if (ret) { 1669 if (ret) {
1633 nvgpu_err(g, "failed to disable channel/TSG"); 1670 nvgpu_err(g, "failed to disable channel/TSG");
@@ -1670,24 +1707,30 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
1670 struct channel_gk20a *c, 1707 struct channel_gk20a *c,
1671 bool enable_hwpm_ctxsw) 1708 bool enable_hwpm_ctxsw)
1672{ 1709{
1673 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1710 struct tsg_gk20a *tsg;
1674 struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; 1711 struct nvgpu_mem *gr_mem = NULL;
1675 struct nvgpu_mem *gr_mem; 1712 struct nvgpu_gr_ctx *gr_ctx;
1713 struct pm_ctx_desc *pm_ctx;
1676 u32 data; 1714 u32 data;
1677 u64 virt_addr; 1715 u64 virt_addr;
1678 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 1716 struct ctx_header_desc *ctx = &c->ctx_header;
1679 struct nvgpu_mem *ctxheader = &ctx->mem; 1717 struct nvgpu_mem *ctxheader = &ctx->mem;
1680 int ret; 1718 int ret;
1681 1719
1682 gk20a_dbg_fn(""); 1720 gk20a_dbg_fn("");
1683 1721
1684 if (!ch_ctx->gr_ctx) { 1722 tsg = tsg_gk20a_from_ch(c);
1723 if (!tsg)
1724 return -EINVAL;
1725
1726 gr_ctx = &tsg->gr_ctx;
1727 pm_ctx = &gr_ctx->pm_ctx;
1728 gr_mem = &gr_ctx->mem;
1729 if (!nvgpu_mem_is_valid(gr_mem)) {
1685 nvgpu_err(g, "no graphics context allocated"); 1730 nvgpu_err(g, "no graphics context allocated");
1686 return -EFAULT; 1731 return -EFAULT;
1687 } 1732 }
1688 1733
1689 gr_mem = &ch_ctx->gr_ctx->mem;
1690
1691 if (enable_hwpm_ctxsw) { 1734 if (enable_hwpm_ctxsw) {
1692 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) 1735 if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f())
1693 return 0; 1736 return 0;
@@ -1816,20 +1859,25 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1816 struct channel_gk20a *c) 1859 struct channel_gk20a *c)
1817{ 1860{
1818 struct gr_gk20a *gr = &g->gr; 1861 struct gr_gk20a *gr = &g->gr;
1819 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1862 struct tsg_gk20a *tsg;
1863 struct nvgpu_gr_ctx *gr_ctx;
1820 u32 virt_addr_lo; 1864 u32 virt_addr_lo;
1821 u32 virt_addr_hi; 1865 u32 virt_addr_hi;
1822 u64 virt_addr = 0; 1866 u64 virt_addr = 0;
1823 u32 v, data; 1867 u32 v, data;
1824 int ret = 0; 1868 int ret = 0;
1825 struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; 1869 struct nvgpu_mem *mem;
1826 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
1827 struct nvgpu_mem *ctxheader = &ctx->mem;
1828 1870
1829 gk20a_dbg_fn(""); 1871 gk20a_dbg_fn("");
1830 1872
1873 tsg = tsg_gk20a_from_ch(c);
1874 if (!tsg)
1875 return -EINVAL;
1876
1877 gr_ctx = &tsg->gr_ctx;
1878 mem = &gr_ctx->mem;
1831 if (gr->ctx_vars.local_golden_image == NULL) 1879 if (gr->ctx_vars.local_golden_image == NULL)
1832 return -1; 1880 return -EINVAL;
1833 1881
1834 /* Channel gr_ctx buffer is gpu cacheable. 1882 /* Channel gr_ctx buffer is gpu cacheable.
1835 Flush and invalidate before cpu update. */ 1883 Flush and invalidate before cpu update. */
@@ -1838,11 +1886,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1838 if (nvgpu_mem_begin(g, mem)) 1886 if (nvgpu_mem_begin(g, mem))
1839 return -ENOMEM; 1887 return -ENOMEM;
1840 1888
1841 if (nvgpu_mem_begin(g, ctxheader)) {
1842 ret = -ENOMEM;
1843 goto clean_up_mem;
1844 }
1845
1846 nvgpu_mem_wr_n(g, mem, 0, 1889 nvgpu_mem_wr_n(g, mem, 0,
1847 gr->ctx_vars.local_golden_image, 1890 gr->ctx_vars.local_golden_image,
1848 gr->ctx_vars.golden_image_size); 1891 gr->ctx_vars.golden_image_size);
@@ -1855,9 +1898,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1855 1898
1856 /* set priv access map */ 1899 /* set priv access map */
1857 virt_addr_lo = 1900 virt_addr_lo =
1858 u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1901 u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1859 virt_addr_hi = 1902 virt_addr_hi =
1860 u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); 1903 u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
1861 1904
1862 if (g->allow_all) 1905 if (g->allow_all)
1863 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); 1906 data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f();
@@ -1867,21 +1910,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1867 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), 1910 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(),
1868 data); 1911 data);
1869 1912
1870 if (ctxheader->gpu_va) { 1913 nvgpu_mem_wr(g, mem,
1871 nvgpu_mem_wr(g, ctxheader, 1914 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1872 ctxsw_prog_main_image_priv_access_map_addr_lo_o(), 1915 virt_addr_lo);
1873 virt_addr_lo); 1916 nvgpu_mem_wr(g, mem,
1874 nvgpu_mem_wr(g, ctxheader, 1917 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1875 ctxsw_prog_main_image_priv_access_map_addr_hi_o(), 1918 virt_addr_hi);
1876 virt_addr_hi); 1919
1877 } else {
1878 nvgpu_mem_wr(g, mem,
1879 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
1880 virt_addr_lo);
1881 nvgpu_mem_wr(g, mem,
1882 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
1883 virt_addr_hi);
1884 }
1885 /* disable verif features */ 1920 /* disable verif features */
1886 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); 1921 v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o());
1887 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); 1922 v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m());
@@ -1889,65 +1924,50 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
1889 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); 1924 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v);
1890 1925
1891 if (g->ops.gr.update_ctxsw_preemption_mode) 1926 if (g->ops.gr.update_ctxsw_preemption_mode)
1892 g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); 1927 g->ops.gr.update_ctxsw_preemption_mode(g, c, mem);
1893 1928
1894 if (g->ops.gr.update_boosted_ctx) 1929 if (g->ops.gr.update_boosted_ctx)
1895 g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); 1930 g->ops.gr.update_boosted_ctx(g, mem, gr_ctx);
1896 1931
1897 virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 1932 virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
1898 virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 1933 virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
1899 1934
1900 nvgpu_log(g, gpu_dbg_info, "write patch count = %d", 1935 nvgpu_log(g, gpu_dbg_info, "write patch count = %d",
1901 ch_ctx->patch_ctx.data_count); 1936 gr_ctx->patch_ctx.data_count);
1902 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), 1937 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
1903 ch_ctx->patch_ctx.data_count); 1938 gr_ctx->patch_ctx.data_count);
1904 1939
1905 if (ctxheader->gpu_va) { 1940 nvgpu_mem_wr(g, mem,
1906 nvgpu_mem_wr(g, ctxheader, 1941 ctxsw_prog_main_image_patch_adr_lo_o(),
1907 ctxsw_prog_main_image_patch_adr_lo_o(), 1942 virt_addr_lo);
1908 virt_addr_lo); 1943 nvgpu_mem_wr(g, mem,
1909 nvgpu_mem_wr(g, ctxheader, 1944 ctxsw_prog_main_image_patch_adr_hi_o(),
1910 ctxsw_prog_main_image_patch_adr_hi_o(), 1945 virt_addr_hi);
1911 virt_addr_hi);
1912 } else {
1913 nvgpu_mem_wr(g, mem,
1914 ctxsw_prog_main_image_patch_adr_lo_o(),
1915 virt_addr_lo);
1916 nvgpu_mem_wr(g, mem,
1917 ctxsw_prog_main_image_patch_adr_hi_o(),
1918 virt_addr_hi);
1919 }
1920 1946
1921 /* Update main header region of the context buffer with the info needed 1947 /* Update main header region of the context buffer with the info needed
1922 * for PM context switching, including mode and possibly a pointer to 1948 * for PM context switching, including mode and possibly a pointer to
1923 * the PM backing store. 1949 * the PM backing store.
1924 */ 1950 */
1925 if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { 1951 if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) {
1926 if (ch_ctx->pm_ctx.mem.gpu_va == 0) { 1952 if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
1927 nvgpu_err(g, 1953 nvgpu_err(g,
1928 "context switched pm with no pm buffer!"); 1954 "context switched pm with no pm buffer!");
1929 nvgpu_mem_end(g, mem); 1955 nvgpu_mem_end(g, mem);
1930 return -EFAULT; 1956 return -EFAULT;
1931 } 1957 }
1932 1958
1933 virt_addr = ch_ctx->pm_ctx.mem.gpu_va; 1959 virt_addr = gr_ctx->pm_ctx.mem.gpu_va;
1934 } else 1960 } else
1935 virt_addr = 0; 1961 virt_addr = 0;
1936 1962
1937 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); 1963 data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o());
1938 data = data & ~ctxsw_prog_main_image_pm_mode_m(); 1964 data = data & ~ctxsw_prog_main_image_pm_mode_m();
1939 data |= ch_ctx->pm_ctx.pm_mode; 1965 data |= gr_ctx->pm_ctx.pm_mode;
1940 1966
1941 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); 1967 nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data);
1942 1968
1943 if (ctxheader->gpu_va) 1969 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
1944 g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr);
1945 else
1946 g->ops.gr.write_pm_ptr(g, mem, virt_addr);
1947
1948 1970
1949 nvgpu_mem_end(g, ctxheader);
1950clean_up_mem:
1951 nvgpu_mem_end(g, mem); 1971 nvgpu_mem_end(g, mem);
1952 1972
1953 return ret; 1973 return ret;
@@ -2568,13 +2588,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
2568 return -ENOMEM; 2588 return -ENOMEM;
2569} 2589}
2570 2590
2571static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) 2591static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g,
2592 struct vm_gk20a *vm,
2593 struct nvgpu_gr_ctx *gr_ctx)
2572{ 2594{
2573 struct vm_gk20a *ch_vm = c->vm; 2595 u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va;
2574 struct gr_gk20a *gr = &c->g->gr; 2596 u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size;
2575 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 2597 int *g_bfr_index = gr_ctx->global_ctx_buffer_index;
2576 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size;
2577 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index;
2578 u32 i; 2598 u32 i;
2579 2599
2580 gk20a_dbg_fn(""); 2600 gk20a_dbg_fn("");
@@ -2588,32 +2608,41 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c)
2588 * the correct struct nvgpu_mem to use. Handles the VPR 2608 * the correct struct nvgpu_mem to use. Handles the VPR
2589 * vs non-VPR difference in context images. 2609 * vs non-VPR difference in context images.
2590 */ 2610 */
2591 mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; 2611 mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem;
2592 2612
2593 nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); 2613 nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]);
2594 } 2614 }
2595 } 2615 }
2596 2616
2597 memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); 2617 memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va));
2598 memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); 2618 memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size));
2599 memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); 2619 memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index));
2600 2620
2601 c->ch_ctx.global_ctx_buffer_mapped = false; 2621 gr_ctx->global_ctx_buffer_mapped = false;
2602} 2622}
2603 2623
2604static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, 2624static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2605 struct channel_gk20a *c) 2625 struct channel_gk20a *c)
2606{ 2626{
2627 struct tsg_gk20a *tsg;
2607 struct vm_gk20a *ch_vm = c->vm; 2628 struct vm_gk20a *ch_vm = c->vm;
2608 u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; 2629 u64 *g_bfr_va;
2609 u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; 2630 u64 *g_bfr_size;
2610 int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; 2631 int *g_bfr_index;
2611 struct gr_gk20a *gr = &g->gr; 2632 struct gr_gk20a *gr = &g->gr;
2612 struct nvgpu_mem *mem; 2633 struct nvgpu_mem *mem;
2613 u64 gpu_va; 2634 u64 gpu_va;
2614 2635
2615 gk20a_dbg_fn(""); 2636 gk20a_dbg_fn("");
2616 2637
2638 tsg = tsg_gk20a_from_ch(c);
2639 if (!tsg)
2640 return -EINVAL;
2641
2642 g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va;
2643 g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size;
2644 g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index;
2645
2617 /* Circular Buffer */ 2646 /* Circular Buffer */
2618 if (c->vpr && 2647 if (c->vpr &&
2619 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { 2648 nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) {
@@ -2688,21 +2717,20 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
2688 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; 2717 g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size;
2689 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; 2718 g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
2690 2719
2691 c->ch_ctx.global_ctx_buffer_mapped = true; 2720 tsg->gr_ctx.global_ctx_buffer_mapped = true;
2692 return 0; 2721 return 0;
2693 2722
2694clean_up: 2723clean_up:
2695 gr_gk20a_unmap_global_ctx_buffers(c); 2724 gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx);
2696 2725
2697 return -ENOMEM; 2726 return -ENOMEM;
2698} 2727}
2699 2728
2700int gr_gk20a_alloc_gr_ctx(struct gk20a *g, 2729int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2701 struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, 2730 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
2702 u32 class, 2731 u32 class,
2703 u32 padding) 2732 u32 padding)
2704{ 2733{
2705 struct gr_ctx_desc *gr_ctx = NULL;
2706 struct gr_gk20a *gr = &g->gr; 2734 struct gr_gk20a *gr = &g->gr;
2707 int err = 0; 2735 int err = 0;
2708 2736
@@ -2715,15 +2743,11 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2715 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; 2743 gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size;
2716 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; 2744 gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
2717 2745
2718 gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx));
2719 if (!gr_ctx)
2720 return -ENOMEM;
2721
2722 err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, 2746 err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
2723 gr->ctx_vars.buffer_total_size, 2747 gr->ctx_vars.buffer_total_size,
2724 &gr_ctx->mem); 2748 &gr_ctx->mem);
2725 if (err) 2749 if (err)
2726 goto err_free_ctx; 2750 return err;
2727 2751
2728 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, 2752 gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm,
2729 &gr_ctx->mem, 2753 &gr_ctx->mem,
@@ -2734,15 +2758,10 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2734 if (!gr_ctx->mem.gpu_va) 2758 if (!gr_ctx->mem.gpu_va)
2735 goto err_free_mem; 2759 goto err_free_mem;
2736 2760
2737 *__gr_ctx = gr_ctx;
2738
2739 return 0; 2761 return 0;
2740 2762
2741 err_free_mem: 2763 err_free_mem:
2742 nvgpu_dma_free(g, &gr_ctx->mem); 2764 nvgpu_dma_free(g, &gr_ctx->mem);
2743 err_free_ctx:
2744 nvgpu_kfree(g, gr_ctx);
2745 gr_ctx = NULL;
2746 2765
2747 return err; 2766 return err;
2748} 2767}
@@ -2750,7 +2769,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
2750static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, 2769static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2751 struct tsg_gk20a *tsg, u32 class, u32 padding) 2770 struct tsg_gk20a *tsg, u32 class, u32 padding)
2752{ 2771{
2753 struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; 2772 struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx;
2754 int err; 2773 int err;
2755 2774
2756 if (!tsg->vm) { 2775 if (!tsg->vm) {
@@ -2762,57 +2781,44 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g,
2762 if (err) 2781 if (err)
2763 return err; 2782 return err;
2764 2783
2765 return 0; 2784 gr_ctx->tsgid = tsg->tsgid;
2766}
2767
2768static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g,
2769 struct channel_gk20a *c,
2770 u32 class,
2771 u32 padding)
2772{
2773 struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx;
2774 int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding);
2775 if (err)
2776 return err;
2777 2785
2778 return 0; 2786 return 0;
2779} 2787}
2780 2788
2781void gr_gk20a_free_gr_ctx(struct gk20a *g, 2789void gr_gk20a_free_gr_ctx(struct gk20a *g,
2782 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) 2790 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
2783{ 2791{
2784 gk20a_dbg_fn(""); 2792 gk20a_dbg_fn("");
2785 2793
2786 if (!gr_ctx || !gr_ctx->mem.gpu_va) 2794 if (gr_ctx->mem.gpu_va) {
2787 return; 2795 gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx);
2796 gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx);
2797 gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx);
2788 2798
2789 if (g->ops.gr.dump_ctxsw_stats && 2799 if (g->ops.gr.dump_ctxsw_stats &&
2790 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) 2800 g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close)
2791 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); 2801 g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx);
2792 2802
2793 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); 2803 nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer);
2794 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); 2804 nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer);
2795 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); 2805 nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer);
2796 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); 2806 nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer);
2797 nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va); 2807 nvgpu_dma_unmap_free(vm, &gr_ctx->mem);
2798 nvgpu_dma_free(g, &gr_ctx->mem); 2808
2799 nvgpu_kfree(g, gr_ctx); 2809 memset(gr_ctx, 0, sizeof(*gr_ctx));
2810 }
2800} 2811}
2801 2812
2802void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) 2813void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg)
2803{ 2814{
2815 struct gk20a *g = tsg->g;
2816
2804 if (!tsg->vm) { 2817 if (!tsg->vm) {
2805 nvgpu_err(tsg->g, "No address space bound"); 2818 nvgpu_err(g, "No address space bound");
2806 return; 2819 return;
2807 } 2820 }
2808 tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); 2821 tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx);
2809 tsg->tsg_gr_ctx = NULL;
2810}
2811
2812static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c)
2813{
2814 c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx);
2815 c->ch_ctx.gr_ctx = NULL;
2816} 2822}
2817 2823
2818u32 gr_gk20a_get_patch_slots(struct gk20a *g) 2824u32 gr_gk20a_get_patch_slots(struct gk20a *g)
@@ -2823,13 +2829,19 @@ u32 gr_gk20a_get_patch_slots(struct gk20a *g)
2823static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, 2829static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2824 struct channel_gk20a *c) 2830 struct channel_gk20a *c)
2825{ 2831{
2826 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2832 struct tsg_gk20a *tsg;
2833 struct patch_desc *patch_ctx;
2827 struct vm_gk20a *ch_vm = c->vm; 2834 struct vm_gk20a *ch_vm = c->vm;
2828 u32 alloc_size; 2835 u32 alloc_size;
2829 int err = 0; 2836 int err = 0;
2830 2837
2831 gk20a_dbg_fn(""); 2838 gk20a_dbg_fn("");
2832 2839
2840 tsg = tsg_gk20a_from_ch(c);
2841 if (!tsg)
2842 return -EINVAL;
2843
2844 patch_ctx = &tsg->gr_ctx.patch_ctx;
2833 alloc_size = g->ops.gr.get_patch_slots(g) * 2845 alloc_size = g->ops.gr.get_patch_slots(g) *
2834 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; 2846 PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY;
2835 2847
@@ -2845,57 +2857,42 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g,
2845 return 0; 2857 return 0;
2846} 2858}
2847 2859
2848static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) 2860static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g,
2861 struct vm_gk20a *vm,
2862 struct nvgpu_gr_ctx *gr_ctx)
2849{ 2863{
2850 struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; 2864 struct patch_desc *patch_ctx = &gr_ctx->patch_ctx;
2851 struct gk20a *g = c->g;
2852 2865
2853 gk20a_dbg_fn(""); 2866 gk20a_dbg_fn("");
2854 2867
2855 if (patch_ctx->mem.gpu_va) 2868 if (patch_ctx->mem.gpu_va)
2856 nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem, 2869 nvgpu_gmmu_unmap(vm, &patch_ctx->mem,
2857 patch_ctx->mem.gpu_va); 2870 patch_ctx->mem.gpu_va);
2858 2871
2859 nvgpu_dma_free(g, &patch_ctx->mem); 2872 nvgpu_dma_free(g, &patch_ctx->mem);
2860 patch_ctx->data_count = 0; 2873 patch_ctx->data_count = 0;
2861} 2874}
2862 2875
2863static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) 2876static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g,
2877 struct vm_gk20a *vm,
2878 struct nvgpu_gr_ctx *gr_ctx)
2864{ 2879{
2865 struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; 2880 struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx;
2866 struct gk20a *g = c->g;
2867 2881
2868 gk20a_dbg_fn(""); 2882 gk20a_dbg_fn("");
2869 2883
2870 if (pm_ctx->mem.gpu_va) { 2884 if (pm_ctx->mem.gpu_va) {
2871 nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); 2885 nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
2872 2886
2873 nvgpu_dma_free(g, &pm_ctx->mem); 2887 nvgpu_dma_free(g, &pm_ctx->mem);
2874 } 2888 }
2875} 2889}
2876 2890
2877void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg)
2878{
2879 if(c->g->ops.fifo.free_channel_ctx_header)
2880 c->g->ops.fifo.free_channel_ctx_header(c);
2881 gr_gk20a_unmap_global_ctx_buffers(c);
2882 gr_gk20a_free_channel_patch_ctx(c);
2883 gr_gk20a_free_channel_pm_ctx(c);
2884 if (!is_tsg)
2885 gr_gk20a_free_channel_gr_ctx(c);
2886
2887 /* zcull_ctx */
2888
2889 memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a));
2890
2891 c->first_init = false;
2892}
2893
2894int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) 2891int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
2895{ 2892{
2896 struct gk20a *g = c->g; 2893 struct gk20a *g = c->g;
2897 struct fifo_gk20a *f = &g->fifo; 2894 struct fifo_gk20a *f = &g->fifo;
2898 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 2895 struct nvgpu_gr_ctx *gr_ctx;
2899 struct tsg_gk20a *tsg = NULL; 2896 struct tsg_gk20a *tsg = NULL;
2900 int err = 0; 2897 int err = 0;
2901 2898
@@ -2917,92 +2914,64 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
2917 } 2914 }
2918 c->obj_class = class_num; 2915 c->obj_class = class_num;
2919 2916
2920 if (gk20a_is_channel_marked_as_tsg(c)) 2917 if (!gk20a_is_channel_marked_as_tsg(c))
2921 tsg = &f->tsg[c->tsgid]; 2918 return -EINVAL;
2922 2919
2923 /* allocate gr ctx buffer */ 2920 tsg = &f->tsg[c->tsgid];
2924 if (!tsg) { 2921 gr_ctx = &tsg->gr_ctx;
2925 if (!ch_ctx->gr_ctx) { 2922
2926 err = gr_gk20a_alloc_channel_gr_ctx(g, c, 2923 if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
2927 class_num, 2924 tsg->vm = c->vm;
2928 flags); 2925 nvgpu_vm_get(tsg->vm);
2929 if (err) { 2926 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg,
2930 nvgpu_err(g, 2927 class_num,
2931 "fail to allocate gr ctx buffer"); 2928 flags);
2932 goto out; 2929 if (err) {
2933 }
2934 } else {
2935 /*TBD: needs to be more subtle about which is
2936 * being allocated as some are allowed to be
2937 * allocated along same channel */
2938 nvgpu_err(g, 2930 nvgpu_err(g,
2939 "too many classes alloc'd on same channel"); 2931 "fail to allocate TSG gr ctx buffer");
2940 err = -EINVAL; 2932 nvgpu_vm_put(tsg->vm);
2933 tsg->vm = NULL;
2941 goto out; 2934 goto out;
2942 } 2935 }
2943 } else { 2936
2944 if (!tsg->tsg_gr_ctx) { 2937 /* allocate patch buffer */
2945 tsg->vm = c->vm; 2938 if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) {
2946 nvgpu_vm_get(tsg->vm); 2939 gr_ctx->patch_ctx.data_count = 0;
2947 err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, 2940 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2948 class_num,
2949 flags);
2950 if (err) { 2941 if (err) {
2951 nvgpu_err(g, 2942 nvgpu_err(g,
2952 "fail to allocate TSG gr ctx buffer"); 2943 "fail to allocate patch buffer");
2953 nvgpu_vm_put(tsg->vm);
2954 tsg->vm = NULL;
2955 goto out; 2944 goto out;
2956 } 2945 }
2957 } 2946 }
2958 ch_ctx->gr_ctx = tsg->tsg_gr_ctx;
2959 }
2960
2961 /* PM ctxt switch is off by default */
2962 ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
2963 2947
2964 /* commit gr ctx buffer */ 2948 /* map global buffer to channel gpu_va and commit */
2965 err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); 2949 err = gr_gk20a_map_global_ctx_buffers(g, c);
2966 if (err) {
2967 nvgpu_err(g,
2968 "fail to commit gr ctx buffer");
2969 goto out;
2970 }
2971
2972 /* allocate patch buffer */
2973 if (!nvgpu_mem_is_valid(&ch_ctx->patch_ctx.mem)) {
2974 ch_ctx->patch_ctx.data_count = 0;
2975 err = gr_gk20a_alloc_channel_patch_ctx(g, c);
2976 if (err) { 2950 if (err) {
2977 nvgpu_err(g, 2951 nvgpu_err(g,
2978 "fail to allocate patch buffer"); 2952 "fail to map global ctx buffer");
2979 goto out; 2953 goto out;
2980 } 2954 }
2981 } 2955 gr_gk20a_commit_global_ctx_buffers(g, c, true);
2982 2956
2983 /* map global buffer to channel gpu_va and commit */ 2957 /* commit gr ctx buffer */
2984 if (!ch_ctx->global_ctx_buffer_mapped) { 2958 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
2985 err = gr_gk20a_map_global_ctx_buffers(g, c);
2986 if (err) { 2959 if (err) {
2987 nvgpu_err(g, 2960 nvgpu_err(g,
2988 "fail to map global ctx buffer"); 2961 "fail to commit gr ctx buffer");
2989 goto out; 2962 goto out;
2990 } 2963 }
2991 gr_gk20a_commit_global_ctx_buffers(g, c, true);
2992 }
2993 2964
2994 /* init golden image, ELPG enabled after this is done */ 2965 /* init golden image, ELPG enabled after this is done */
2995 err = gr_gk20a_init_golden_ctx_image(g, c); 2966 err = gr_gk20a_init_golden_ctx_image(g, c);
2996 if (err) { 2967 if (err) {
2997 nvgpu_err(g, 2968 nvgpu_err(g,
2998 "fail to init golden ctx image"); 2969 "fail to init golden ctx image");
2999 goto out; 2970 goto out;
3000 } 2971 }
3001 2972
3002 /* load golden image */ 2973 /* load golden image */
3003 if (!c->first_init) { 2974 gr_gk20a_load_golden_ctx_image(g, c);
3004 err = gr_gk20a_elpg_protected_call(g,
3005 gr_gk20a_load_golden_ctx_image(g, c));
3006 if (err) { 2975 if (err) {
3007 nvgpu_err(g, 2976 nvgpu_err(g,
3008 "fail to load golden ctx image"); 2977 "fail to load golden ctx image");
@@ -3016,11 +2985,21 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
3016 "fail to bind channel for ctxsw trace"); 2985 "fail to bind channel for ctxsw trace");
3017 } 2986 }
3018#endif 2987#endif
3019 c->first_init = true;
3020 }
3021 2988
3022 if (g->ops.gr.set_czf_bypass) 2989 if (g->ops.gr.set_czf_bypass)
3023 g->ops.gr.set_czf_bypass(g, c); 2990 g->ops.gr.set_czf_bypass(g, c);
2991
2992 /* PM ctxt switch is off by default */
2993 gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
2994 } else {
2995 /* commit gr ctx buffer */
2996 err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va);
2997 if (err) {
2998 nvgpu_err(g,
2999 "fail to commit gr ctx buffer");
3000 goto out;
3001 }
3002 }
3024 3003
3025 gk20a_dbg_fn("done"); 3004 gk20a_dbg_fn("done");
3026 return 0; 3005 return 0;
@@ -3553,8 +3532,14 @@ u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr)
3553int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, 3532int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr,
3554 struct channel_gk20a *c, u64 zcull_va, u32 mode) 3533 struct channel_gk20a *c, u64 zcull_va, u32 mode)
3555{ 3534{
3556 struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; 3535 struct tsg_gk20a *tsg;
3536 struct zcull_ctx_desc *zcull_ctx;
3557 3537
3538 tsg = tsg_gk20a_from_ch(c);
3539 if (!tsg)
3540 return -EINVAL;
3541
3542 zcull_ctx = &tsg->gr_ctx.zcull_ctx;
3558 zcull_ctx->ctx_sw_mode = mode; 3543 zcull_ctx->ctx_sw_mode = mode;
3559 zcull_ctx->gpu_va = zcull_va; 3544 zcull_ctx->gpu_va = zcull_va;
3560 3545
@@ -6516,7 +6501,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void)
6516 * write will actually occur. so later we should put a lazy, 6501 * write will actually occur. so later we should put a lazy,
6517 * map-and-hold system in the patch write state */ 6502 * map-and-hold system in the patch write state */
6518static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, 6503static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6519 struct channel_ctx_gk20a *ch_ctx, 6504 struct channel_gk20a *ch,
6520 u32 addr, u32 data, 6505 u32 addr, u32 data,
6521 struct nvgpu_mem *mem) 6506 struct nvgpu_mem *mem)
6522{ 6507{
@@ -6531,9 +6516,16 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6531 u32 *ovr_perf_regs = NULL; 6516 u32 *ovr_perf_regs = NULL;
6532 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 6517 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
6533 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 6518 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
6534 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 6519 struct tsg_gk20a *tsg;
6520 struct nvgpu_gr_ctx *gr_ctx;
6521 struct ctx_header_desc *ctx = &ch->ctx_header;
6535 struct nvgpu_mem *ctxheader = &ctx->mem; 6522 struct nvgpu_mem *ctxheader = &ctx->mem;
6536 6523
6524 tsg = tsg_gk20a_from_ch(ch);
6525 if (!tsg)
6526 return -EINVAL;
6527
6528 gr_ctx = &tsg->gr_ctx;
6537 g->ops.gr.init_ovr_sm_dsm_perf(); 6529 g->ops.gr.init_ovr_sm_dsm_perf();
6538 g->ops.gr.init_sm_dsm_reg_info(); 6530 g->ops.gr.init_sm_dsm_reg_info();
6539 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); 6531 g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs);
@@ -6556,17 +6548,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6556 ctxsw_prog_main_image_patch_count_o()); 6548 ctxsw_prog_main_image_patch_count_o());
6557 6549
6558 if (!tmp) 6550 if (!tmp)
6559 ch_ctx->patch_ctx.data_count = 0; 6551 gr_ctx->patch_ctx.data_count = 0;
6560 6552
6561 gr_gk20a_ctx_patch_write(g, ch_ctx, 6553 gr_gk20a_ctx_patch_write(g, gr_ctx,
6562 addr, data, true); 6554 addr, data, true);
6563 6555
6564 vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); 6556 vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
6565 vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); 6557 vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
6566 6558
6567 nvgpu_mem_wr(g, mem, 6559 nvgpu_mem_wr(g, mem,
6568 ctxsw_prog_main_image_patch_count_o(), 6560 ctxsw_prog_main_image_patch_count_o(),
6569 ch_ctx->patch_ctx.data_count); 6561 gr_ctx->patch_ctx.data_count);
6570 if (ctxheader->gpu_va) { 6562 if (ctxheader->gpu_va) {
6571 /* 6563 /*
6572 * Main context can be gr_ctx or pm_ctx. 6564 * Main context can be gr_ctx or pm_ctx.
@@ -6575,7 +6567,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
6575 * __gr_gk20a_exec_ctx_ops. Need to take 6567 * __gr_gk20a_exec_ctx_ops. Need to take
6576 * care of cpu access to ctxheader here. 6568 * care of cpu access to ctxheader here.
6577 */ 6569 */
6578 if (nvgpu_mem_begin(g, ctxheader)) 6570 if (nvgpu_mem_begin(g, ctxheader))
6579 return -ENOMEM; 6571 return -ENOMEM;
6580 nvgpu_mem_wr(g, ctxheader, 6572 nvgpu_mem_wr(g, ctxheader,
6581 ctxsw_prog_main_image_patch_adr_lo_o(), 6573 ctxsw_prog_main_image_patch_adr_lo_o(),
@@ -7690,7 +7682,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7690 bool ch_is_curr_ctx) 7682 bool ch_is_curr_ctx)
7691{ 7683{
7692 struct gk20a *g = ch->g; 7684 struct gk20a *g = ch->g;
7693 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 7685 struct tsg_gk20a *tsg;
7686 struct nvgpu_gr_ctx *gr_ctx;
7694 bool gr_ctx_ready = false; 7687 bool gr_ctx_ready = false;
7695 bool pm_ctx_ready = false; 7688 bool pm_ctx_ready = false;
7696 struct nvgpu_mem *current_mem = NULL; 7689 struct nvgpu_mem *current_mem = NULL;
@@ -7707,6 +7700,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7707 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", 7700 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
7708 num_ctx_wr_ops, num_ctx_rd_ops); 7701 num_ctx_wr_ops, num_ctx_rd_ops);
7709 7702
7703 tsg = tsg_gk20a_from_ch(ch);
7704 if (!tsg)
7705 return -EINVAL;
7706
7707 gr_ctx = &tsg->gr_ctx;
7708
7710 if (ch_is_curr_ctx) { 7709 if (ch_is_curr_ctx) {
7711 for (pass = 0; pass < 2; pass++) { 7710 for (pass = 0; pass < 2; pass++) {
7712 ctx_op_nr = 0; 7711 ctx_op_nr = 0;
@@ -7778,7 +7777,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7778 } 7777 }
7779 offset_addrs = offsets + max_offsets; 7778 offset_addrs = offsets + max_offsets;
7780 7779
7781 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); 7780 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
7782 if (err) 7781 if (err)
7783 goto cleanup; 7782 goto cleanup;
7784 7783
@@ -7812,13 +7811,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7812 * gr_gk20a_apply_instmem_overrides, 7811 * gr_gk20a_apply_instmem_overrides,
7813 * recoded in-place instead. 7812 * recoded in-place instead.
7814 */ 7813 */
7815 if (nvgpu_mem_begin(g, &ch_ctx->gr_ctx->mem)) { 7814 if (nvgpu_mem_begin(g, &gr_ctx->mem)) {
7816 err = -ENOMEM; 7815 err = -ENOMEM;
7817 goto cleanup; 7816 goto cleanup;
7818 } 7817 }
7819 gr_ctx_ready = true; 7818 gr_ctx_ready = true;
7820 } 7819 }
7821 current_mem = &ch_ctx->gr_ctx->mem; 7820 current_mem = &gr_ctx->mem;
7822 } else { 7821 } else {
7823 err = gr_gk20a_get_pm_ctx_buffer_offsets(g, 7822 err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
7824 ctx_ops[i].offset, 7823 ctx_ops[i].offset,
@@ -7835,19 +7834,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7835 } 7834 }
7836 if (!pm_ctx_ready) { 7835 if (!pm_ctx_ready) {
7837 /* Make sure ctx buffer was initialized */ 7836 /* Make sure ctx buffer was initialized */
7838 if (!nvgpu_mem_is_valid(&ch_ctx->pm_ctx.mem)) { 7837 if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) {
7839 nvgpu_err(g, 7838 nvgpu_err(g,
7840 "Invalid ctx buffer"); 7839 "Invalid ctx buffer");
7841 err = -EINVAL; 7840 err = -EINVAL;
7842 goto cleanup; 7841 goto cleanup;
7843 } 7842 }
7844 if (nvgpu_mem_begin(g, &ch_ctx->pm_ctx.mem)) { 7843 if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) {
7845 err = -ENOMEM; 7844 err = -ENOMEM;
7846 goto cleanup; 7845 goto cleanup;
7847 } 7846 }
7848 pm_ctx_ready = true; 7847 pm_ctx_ready = true;
7849 } 7848 }
7850 current_mem = &ch_ctx->pm_ctx.mem; 7849 current_mem = &gr_ctx->pm_ctx.mem;
7851 } 7850 }
7852 7851
7853 /* if this is a quad access, setup for special access*/ 7852 /* if this is a quad access, setup for special access*/
@@ -7860,7 +7859,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7860 /* sanity check gr ctxt offsets, 7859 /* sanity check gr ctxt offsets,
7861 * don't write outside, worst case 7860 * don't write outside, worst case
7862 */ 7861 */
7863 if ((current_mem == &ch_ctx->gr_ctx->mem) && 7862 if ((current_mem == &gr_ctx->mem) &&
7864 (offsets[j] >= g->gr.ctx_vars.golden_image_size)) 7863 (offsets[j] >= g->gr.ctx_vars.golden_image_size))
7865 continue; 7864 continue;
7866 if (pass == 0) { /* write pass */ 7865 if (pass == 0) { /* write pass */
@@ -7886,7 +7885,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7886 7885
7887 /* check to see if we need to add a special WAR 7886 /* check to see if we need to add a special WAR
7888 for some of the SMPC perf regs */ 7887 for some of the SMPC perf regs */
7889 gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], 7888 gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j],
7890 v, current_mem); 7889 v, current_mem);
7891 7890
7892 } else { /* read pass */ 7891 } else { /* read pass */
@@ -7915,12 +7914,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7915 if (offsets) 7914 if (offsets)
7916 nvgpu_kfree(g, offsets); 7915 nvgpu_kfree(g, offsets);
7917 7916
7918 if (ch_ctx->patch_ctx.mem.cpu_va) 7917 if (gr_ctx->patch_ctx.mem.cpu_va)
7919 gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); 7918 gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
7920 if (gr_ctx_ready) 7919 if (gr_ctx_ready)
7921 nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); 7920 nvgpu_mem_end(g, &gr_ctx->mem);
7922 if (pm_ctx_ready) 7921 if (pm_ctx_ready)
7923 nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); 7922 nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem);
7924 7923
7925 return err; 7924 return err;
7926} 7925}
@@ -7962,23 +7961,23 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
7962} 7961}
7963 7962
7964void gr_gk20a_commit_global_pagepool(struct gk20a *g, 7963void gr_gk20a_commit_global_pagepool(struct gk20a *g,
7965 struct channel_ctx_gk20a *ch_ctx, 7964 struct nvgpu_gr_ctx *gr_ctx,
7966 u64 addr, u32 size, bool patch) 7965 u64 addr, u32 size, bool patch)
7967{ 7966{
7968 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), 7967 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
7969 gr_scc_pagepool_base_addr_39_8_f(addr), patch); 7968 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
7970 7969
7971 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), 7970 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
7972 gr_scc_pagepool_total_pages_f(size) | 7971 gr_scc_pagepool_total_pages_f(size) |
7973 gr_scc_pagepool_valid_true_f(), patch); 7972 gr_scc_pagepool_valid_true_f(), patch);
7974 7973
7975 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), 7974 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
7976 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); 7975 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
7977 7976
7978 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), 7977 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
7979 gr_gpcs_gcc_pagepool_total_pages_f(size), patch); 7978 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
7980 7979
7981 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), 7980 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(),
7982 gr_pd_pagepool_total_pages_f(size) | 7981 gr_pd_pagepool_total_pages_f(size) |
7983 gr_pd_pagepool_valid_true_f(), patch); 7982 gr_pd_pagepool_valid_true_f(), patch);
7984} 7983}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 1c22923b..6cc15c94 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -28,7 +28,6 @@
28#include "gr_t19x.h" 28#include "gr_t19x.h"
29#endif 29#endif
30 30
31#include "tsg_gk20a.h"
32#include "gr_ctx_gk20a.h" 31#include "gr_ctx_gk20a.h"
33#include "mm_gk20a.h" 32#include "mm_gk20a.h"
34 33
@@ -48,6 +47,10 @@
48 47
49#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ 48#define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */
50 49
50/* Flags to be passed to g->ops.gr.alloc_obj_ctx() */
51#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1)
52#define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2)
53
51/* 54/*
52 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries 55 * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries
53 * of address and data pairs 56 * of address and data pairs
@@ -64,6 +67,7 @@
64#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) 67#define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1)
65#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) 68#define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2)
66 69
70struct tsg_gk20a;
67struct channel_gk20a; 71struct channel_gk20a;
68struct nvgpu_warpstate; 72struct nvgpu_warpstate;
69 73
@@ -433,7 +437,12 @@ struct gr_gk20a {
433 437
434void gk20a_fecs_dump_falcon_stats(struct gk20a *g); 438void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
435 439
436struct gr_ctx_desc { 440struct ctx_header_desc {
441 struct nvgpu_mem mem;
442};
443
444/* contexts associated with a TSG */
445struct nvgpu_gr_ctx {
437 struct nvgpu_mem mem; 446 struct nvgpu_mem mem;
438 447
439 u32 graphics_preempt_mode; 448 u32 graphics_preempt_mode;
@@ -452,10 +461,16 @@ struct gr_ctx_desc {
452 u64 virt_ctx; 461 u64 virt_ctx;
453#endif 462#endif
454 bool golden_img_loaded; 463 bool golden_img_loaded;
455};
456 464
457struct ctx_header_desc { 465 struct patch_desc patch_ctx;
458 struct nvgpu_mem mem; 466 struct zcull_ctx_desc zcull_ctx;
467 struct pm_ctx_desc pm_ctx;
468 u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA];
469 u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA];
470 int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA];
471 bool global_ctx_buffer_mapped;
472
473 u32 tsgid;
459}; 474};
460 475
461struct gk20a_ctxsw_ucode_segment { 476struct gk20a_ctxsw_ucode_segment {
@@ -552,7 +567,6 @@ int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a);
552int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); 567int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr);
553 568
554int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); 569int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags);
555void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg);
556 570
557int gk20a_gr_isr(struct gk20a *g); 571int gk20a_gr_isr(struct gk20a *g);
558int gk20a_gr_nonstall_isr(struct gk20a *g); 572int gk20a_gr_nonstall_isr(struct gk20a *g);
@@ -633,17 +647,17 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
633 struct channel_gk20a *c, 647 struct channel_gk20a *c,
634 bool enable_hwpm_ctxsw); 648 bool enable_hwpm_ctxsw);
635 649
636struct channel_ctx_gk20a; 650struct nvgpu_gr_ctx;
637void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, 651void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx,
638 u32 addr, u32 data, bool patch); 652 u32 addr, u32 data, bool patch);
639int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, 653int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
640 struct channel_ctx_gk20a *ch_ctx, 654 struct nvgpu_gr_ctx *ch_ctx,
641 bool update_patch_count); 655 bool update_patch_count);
642void gr_gk20a_ctx_patch_write_end(struct gk20a *g, 656void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
643 struct channel_ctx_gk20a *ch_ctx, 657 struct nvgpu_gr_ctx *ch_ctx,
644 bool update_patch_count); 658 bool update_patch_count);
645void gr_gk20a_commit_global_pagepool(struct gk20a *g, 659void gr_gk20a_commit_global_pagepool(struct gk20a *g,
646 struct channel_ctx_gk20a *ch_ctx, 660 struct nvgpu_gr_ctx *ch_ctx,
647 u64 addr, u32 size, bool patch); 661 u64 addr, u32 size, bool patch);
648void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); 662void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
649void gr_gk20a_enable_hww_exceptions(struct gk20a *g); 663void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
@@ -694,10 +708,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
694int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, 708int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
695 struct fecs_method_op_gk20a op); 709 struct fecs_method_op_gk20a op);
696int gr_gk20a_alloc_gr_ctx(struct gk20a *g, 710int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
697 struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, 711 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
698 u32 class, u32 padding); 712 u32 class, u32 padding);
699void gr_gk20a_free_gr_ctx(struct gk20a *g, 713void gr_gk20a_free_gr_ctx(struct gk20a *g,
700 struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); 714 struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
701int gr_gk20a_halt_pipe(struct gk20a *g); 715int gr_gk20a_halt_pipe(struct gk20a *g);
702 716
703#if defined(CONFIG_GK20A_CYCLE_STATS) 717#if defined(CONFIG_GK20A_CYCLE_STATS)
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index d9ddc011..19d0ecce 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -280,7 +280,6 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g)
280 tsg->num_active_channels = 0; 280 tsg->num_active_channels = 0;
281 nvgpu_ref_init(&tsg->refcount); 281 nvgpu_ref_init(&tsg->refcount);
282 282
283 tsg->tsg_gr_ctx = NULL;
284 tsg->vm = NULL; 283 tsg->vm = NULL;
285 tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; 284 tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
286 tsg->timeslice_us = 0; 285 tsg->timeslice_us = 0;
@@ -319,10 +318,8 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
319 if (g->ops.fifo.tsg_release) 318 if (g->ops.fifo.tsg_release)
320 g->ops.fifo.tsg_release(tsg); 319 g->ops.fifo.tsg_release(tsg);
321 320
322 if (tsg->tsg_gr_ctx) { 321 if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem))
323 gr_gk20a_free_tsg_gr_ctx(tsg); 322 gr_gk20a_free_tsg_gr_ctx(tsg);
324 tsg->tsg_gr_ctx = NULL;
325 }
326 323
327 if (g->ops.fifo.deinit_eng_method_buffers) 324 if (g->ops.fifo.deinit_eng_method_buffers)
328 g->ops.fifo.deinit_eng_method_buffers(g, tsg); 325 g->ops.fifo.deinit_eng_method_buffers(g, tsg);
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
index 08fe0365..2168cb4f 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -26,6 +26,8 @@
26#include <nvgpu/kref.h> 26#include <nvgpu/kref.h>
27#include <nvgpu/rwsem.h> 27#include <nvgpu/rwsem.h>
28 28
29#include "gr_gk20a.h"
30
29#ifdef CONFIG_TEGRA_19x_GPU 31#ifdef CONFIG_TEGRA_19x_GPU
30#include "tsg_t19x.h" 32#include "tsg_t19x.h"
31#endif 33#endif
@@ -56,8 +58,6 @@ struct tsg_gk20a {
56 unsigned int timeslice_timeout; 58 unsigned int timeslice_timeout;
57 unsigned int timeslice_scale; 59 unsigned int timeslice_scale;
58 60
59 struct gr_ctx_desc *tsg_gr_ctx;
60
61 struct vm_gk20a *vm; 61 struct vm_gk20a *vm;
62 62
63 u32 interleave_level; 63 u32 interleave_level;
@@ -71,6 +71,8 @@ struct tsg_gk20a {
71#ifdef CONFIG_TEGRA_19x_GPU 71#ifdef CONFIG_TEGRA_19x_GPU
72 struct tsg_t19x t19x; 72 struct tsg_t19x t19x;
73#endif 73#endif
74
75 struct nvgpu_gr_ctx gr_ctx;
74}; 76};
75 77
76int gk20a_enable_tsg(struct tsg_gk20a *tsg); 78int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 36fad8b3..a2434320 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -124,7 +124,7 @@ int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g)
124} 124}
125 125
126void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, 126void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
127 struct channel_ctx_gk20a *ch_ctx, 127 struct nvgpu_gr_ctx *ch_ctx,
128 u64 addr, bool patch) 128 u64 addr, bool patch)
129{ 129{
130 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(), 130 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_setup_attrib_cb_base_r(),
@@ -141,7 +141,7 @@ void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
141} 141}
142 142
143void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, 143void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
144 struct channel_ctx_gk20a *ch_ctx, 144 struct nvgpu_gr_ctx *ch_ctx,
145 u64 addr, u64 size, bool patch) 145 u64 addr, u64 size, bool patch)
146{ 146{
147 u32 data; 147 u32 data;
@@ -180,7 +180,8 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
180 struct channel_gk20a *c, bool patch) 180 struct channel_gk20a *c, bool patch)
181{ 181{
182 struct gr_gk20a *gr = &g->gr; 182 struct gr_gk20a *gr = &g->gr;
183 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 183 struct tsg_gk20a *tsg;
184 struct nvgpu_gr_ctx *ch_ctx;
184 u32 attrib_offset_in_chunk = 0; 185 u32 attrib_offset_in_chunk = 0;
185 u32 alpha_offset_in_chunk = 0; 186 u32 alpha_offset_in_chunk = 0;
186 u32 pd_ab_max_output; 187 u32 pd_ab_max_output;
@@ -193,6 +194,12 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
193 194
194 gk20a_dbg_fn(""); 195 gk20a_dbg_fn("");
195 196
197 tsg = tsg_gk20a_from_ch(c);
198 if (!tsg)
199 return -EINVAL;
200
201 ch_ctx = &tsg->gr_ctx;
202
196 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(), 203 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_r(),
197 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) | 204 gr_ds_tga_constraintlogic_beta_cbsize_f(gr->attrib_cb_default_size) |
198 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size), 205 gr_ds_tga_constraintlogic_alpha_cbsize_f(gr->alpha_cb_default_size),
@@ -257,7 +264,7 @@ int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
257} 264}
258 265
259void gr_gm20b_commit_global_pagepool(struct gk20a *g, 266void gr_gm20b_commit_global_pagepool(struct gk20a *g,
260 struct channel_ctx_gk20a *ch_ctx, 267 struct nvgpu_gr_ctx *ch_ctx,
261 u64 addr, u32 size, bool patch) 268 u64 addr, u32 size, bool patch)
262{ 269{
263 gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch); 270 gr_gk20a_commit_global_pagepool(g, ch_ctx, addr, size, patch);
@@ -845,7 +852,7 @@ u32 gr_gm20b_pagepool_default_size(struct gk20a *g)
845} 852}
846 853
847int gr_gm20b_alloc_gr_ctx(struct gk20a *g, 854int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
848 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 855 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
849 u32 class, 856 u32 class,
850 u32 flags) 857 u32 flags)
851{ 858{
@@ -858,7 +865,7 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
858 return err; 865 return err;
859 866
860 if (class == MAXWELL_COMPUTE_B) 867 if (class == MAXWELL_COMPUTE_B)
861 (*gr_ctx)->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA; 868 gr_ctx->compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CTA;
862 869
863 gk20a_dbg_fn("done"); 870 gk20a_dbg_fn("done");
864 871
@@ -866,15 +873,21 @@ int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
866} 873}
867 874
868void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, 875void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
869 struct channel_ctx_gk20a *ch_ctx, 876 struct channel_gk20a *c,
870 struct nvgpu_mem *mem) 877 struct nvgpu_mem *mem)
871{ 878{
872 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 879 struct tsg_gk20a *tsg;
880 struct nvgpu_gr_ctx *gr_ctx;
873 u32 cta_preempt_option = 881 u32 cta_preempt_option =
874 ctxsw_prog_main_image_preemption_options_control_cta_enabled_f(); 882 ctxsw_prog_main_image_preemption_options_control_cta_enabled_f();
875 883
876 gk20a_dbg_fn(""); 884 gk20a_dbg_fn("");
877 885
886 tsg = tsg_gk20a_from_ch(c);
887 if (!tsg)
888 return;
889
890 gr_ctx = &tsg->gr_ctx;
878 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) { 891 if (gr_ctx->compute_preempt_mode == NVGPU_PREEMPTION_MODE_COMPUTE_CTA) {
879 gk20a_dbg_info("CTA: %x", cta_preempt_option); 892 gk20a_dbg_info("CTA: %x", cta_preempt_option);
880 nvgpu_mem_wr(g, mem, 893 nvgpu_mem_wr(g, mem,
@@ -1026,16 +1039,22 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
1026int gr_gm20b_update_pc_sampling(struct channel_gk20a *c, 1039int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
1027 bool enable) 1040 bool enable)
1028{ 1041{
1029 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 1042 struct tsg_gk20a *tsg;
1043 struct nvgpu_gr_ctx *gr_ctx;
1030 struct nvgpu_mem *mem; 1044 struct nvgpu_mem *mem;
1031 u32 v; 1045 u32 v;
1032 1046
1033 gk20a_dbg_fn(""); 1047 gk20a_dbg_fn("");
1034 1048
1035 if (!ch_ctx || !ch_ctx->gr_ctx || c->vpr) 1049 tsg = tsg_gk20a_from_ch(c);
1050 if (!tsg)
1051 return -EINVAL;
1052
1053 gr_ctx = &tsg->gr_ctx;
1054 mem = &gr_ctx->mem;
1055 if (!nvgpu_mem_is_valid(mem) || c->vpr)
1036 return -EINVAL; 1056 return -EINVAL;
1037 1057
1038 mem = &ch_ctx->gr_ctx->mem;
1039 1058
1040 if (nvgpu_mem_begin(c->g, mem)) 1059 if (nvgpu_mem_begin(c->g, mem))
1041 return -ENOMEM; 1060 return -ENOMEM;
@@ -1289,12 +1308,19 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
1289{ 1308{
1290 u32 gpc, tpc, offset; 1309 u32 gpc, tpc, offset;
1291 struct gr_gk20a *gr = &g->gr; 1310 struct gr_gk20a *gr = &g->gr;
1292 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 1311 struct tsg_gk20a *tsg;
1312 struct nvgpu_gr_ctx *ch_ctx;
1293 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 1313 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1294 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, 1314 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
1295 GPU_LIT_TPC_IN_GPC_STRIDE); 1315 GPU_LIT_TPC_IN_GPC_STRIDE);
1296 int err = 0; 1316 int err = 0;
1297 1317
1318 tsg = tsg_gk20a_from_ch(ch);
1319 if (!tsg)
1320 return -EINVAL;
1321
1322 ch_ctx = &tsg->gr_ctx;
1323
1298 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 1324 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1299 1325
1300 gr->sm_error_states[sm_id].hww_global_esr = 1326 gr->sm_error_states[sm_id].hww_global_esr =
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index 18e6b032..bddf6412 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -46,7 +46,7 @@ enum {
46#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0 46#define NVA297_SET_SHADER_EXCEPTIONS_ENABLE_FALSE 0
47 47
48void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, 48void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
49 struct channel_ctx_gk20a *ch_ctx, 49 struct nvgpu_gr_ctx *ch_ctx,
50 u64 addr, bool patch); 50 u64 addr, bool patch);
51int gr_gm20b_init_fs_state(struct gk20a *g); 51int gr_gm20b_init_fs_state(struct gk20a *g);
52int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask); 52int gm20b_gr_tpc_disable_override(struct gk20a *g, u32 mask);
@@ -57,12 +57,12 @@ void gr_gm20b_bundle_cb_defaults(struct gk20a *g);
57void gr_gm20b_cb_size_default(struct gk20a *g); 57void gr_gm20b_cb_size_default(struct gk20a *g);
58int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g); 58int gr_gm20b_calc_global_ctx_buffer_size(struct gk20a *g);
59void gr_gm20b_commit_global_bundle_cb(struct gk20a *g, 59void gr_gm20b_commit_global_bundle_cb(struct gk20a *g,
60 struct channel_ctx_gk20a *ch_ctx, 60 struct nvgpu_gr_ctx *ch_ctx,
61 u64 addr, u64 size, bool patch); 61 u64 addr, u64 size, bool patch);
62int gr_gm20b_commit_global_cb_manager(struct gk20a *g, 62int gr_gm20b_commit_global_cb_manager(struct gk20a *g,
63 struct channel_gk20a *c, bool patch); 63 struct channel_gk20a *c, bool patch);
64void gr_gm20b_commit_global_pagepool(struct gk20a *g, 64void gr_gm20b_commit_global_pagepool(struct gk20a *g,
65 struct channel_ctx_gk20a *ch_ctx, 65 struct nvgpu_gr_ctx *ch_ctx,
66 u64 addr, u32 size, bool patch); 66 u64 addr, u32 size, bool patch);
67int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr, 67int gr_gm20b_handle_sw_method(struct gk20a *g, u32 addr,
68 u32 class_num, u32 offset, u32 data); 68 u32 class_num, u32 offset, u32 data);
@@ -96,11 +96,11 @@ int gr_gm20b_load_ctxsw_ucode(struct gk20a *g);
96void gr_gm20b_detect_sm_arch(struct gk20a *g); 96void gr_gm20b_detect_sm_arch(struct gk20a *g);
97u32 gr_gm20b_pagepool_default_size(struct gk20a *g); 97u32 gr_gm20b_pagepool_default_size(struct gk20a *g);
98int gr_gm20b_alloc_gr_ctx(struct gk20a *g, 98int gr_gm20b_alloc_gr_ctx(struct gk20a *g,
99 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 99 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
100 u32 class, 100 u32 class,
101 u32 flags); 101 u32 flags);
102void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g, 102void gr_gm20b_update_ctxsw_preemption_mode(struct gk20a *g,
103 struct channel_ctx_gk20a *ch_ctx, 103 struct channel_gk20a *c,
104 struct nvgpu_mem *mem); 104 struct nvgpu_mem *mem);
105int gr_gm20b_dump_gr_status_regs(struct gk20a *g, 105int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
106 struct gk20a_debug_output *o); 106 struct gk20a_debug_output *o);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index c29f7267..3ee22ed1 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -226,7 +226,6 @@ static const struct gpu_ops gm20b_ops = {
226 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 226 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
227 .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, 227 .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask,
228 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 228 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
229 .free_channel_ctx = gk20a_free_channel_ctx,
230 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 229 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
231 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 230 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
232 .get_zcull_info = gr_gk20a_get_zcull_info, 231 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index bedc0b78..02cecf53 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -135,7 +135,7 @@ void gr_gp106_cb_size_default(struct gk20a *g)
135} 135}
136 136
137int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, 137int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
138 struct gr_ctx_desc *gr_ctx, 138 struct nvgpu_gr_ctx *gr_ctx,
139 struct vm_gk20a *vm, u32 class, 139 struct vm_gk20a *vm, u32 class,
140 u32 graphics_preempt_mode, 140 u32 graphics_preempt_mode,
141 u32 compute_preempt_mode) 141 u32 compute_preempt_mode)
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.h b/drivers/gpu/nvgpu/gp106/gr_gp106.h
index 9f76e4ac..491ced4e 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.h
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.h
@@ -38,7 +38,7 @@ int gr_gp106_handle_sw_method(struct gk20a *g, u32 addr,
38 u32 class_num, u32 offset, u32 data); 38 u32 class_num, u32 offset, u32 data);
39void gr_gp106_cb_size_default(struct gk20a *g); 39void gr_gp106_cb_size_default(struct gk20a *g);
40int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g, 40int gr_gp106_set_ctxsw_preemption_mode(struct gk20a *g,
41 struct gr_ctx_desc *gr_ctx, 41 struct nvgpu_gr_ctx *gr_ctx,
42 struct vm_gk20a *vm, u32 class, 42 struct vm_gk20a *vm, u32 class,
43 u32 graphics_preempt_mode, 43 u32 graphics_preempt_mode,
44 u32 compute_preempt_mode); 44 u32 compute_preempt_mode);
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1498d1c0..3073668e 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -272,7 +272,6 @@ static const struct gpu_ops gp106_ops = {
272 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, 272 .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments,
273 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, 273 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
275 .free_channel_ctx = gk20a_free_channel_ctx,
276 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 275 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
277 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 276 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
278 .get_zcull_info = gr_gk20a_get_zcull_info, 277 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 56acc732..549a4da4 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -389,9 +389,9 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
389int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 389int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
390 struct channel_gk20a *c, bool patch) 390 struct channel_gk20a *c, bool patch)
391{ 391{
392 struct tsg_gk20a *tsg;
392 struct gr_gk20a *gr = &g->gr; 393 struct gr_gk20a *gr = &g->gr;
393 struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; 394 struct nvgpu_gr_ctx *gr_ctx;
394 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
395 u32 attrib_offset_in_chunk = 0; 395 u32 attrib_offset_in_chunk = 0;
396 u32 alpha_offset_in_chunk = 0; 396 u32 alpha_offset_in_chunk = 0;
397 u32 pd_ab_max_output; 397 u32 pd_ab_max_output;
@@ -405,6 +405,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
405 405
406 gk20a_dbg_fn(""); 406 gk20a_dbg_fn("");
407 407
408 tsg = tsg_gk20a_from_ch(c);
409 if (!tsg)
410 return -EINVAL;
411
412 gr_ctx = &tsg->gr_ctx;
413
408 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 414 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
409 attrib_size_in_chunk = gr->attrib_cb_gfxp_size; 415 attrib_size_in_chunk = gr->attrib_cb_gfxp_size;
410 cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size; 416 cb_attrib_cache_size_init = gr->attrib_cb_gfxp_default_size;
@@ -413,9 +419,9 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
413 cb_attrib_cache_size_init = gr->attrib_cb_default_size; 419 cb_attrib_cache_size_init = gr->attrib_cb_default_size;
414 } 420 }
415 421
416 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_beta_r(), 422 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_beta_r(),
417 gr->attrib_cb_default_size, patch); 423 gr->attrib_cb_default_size, patch);
418 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_tga_constraintlogic_alpha_r(), 424 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_tga_constraintlogic_alpha_r(),
419 gr->alpha_cb_default_size, patch); 425 gr->alpha_cb_default_size, patch);
420 426
421 pd_ab_max_output = (gr->alpha_cb_default_size * 427 pd_ab_max_output = (gr->alpha_cb_default_size *
@@ -423,11 +429,11 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
423 gr_pd_ab_dist_cfg1_max_output_granularity_v(); 429 gr_pd_ab_dist_cfg1_max_output_granularity_v();
424 430
425 if (g->gr.pd_max_batches) { 431 if (g->gr.pd_max_batches) {
426 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), 432 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
427 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | 433 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
428 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch); 434 gr_pd_ab_dist_cfg1_max_batches_f(g->gr.pd_max_batches), patch);
429 } else { 435 } else {
430 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg1_r(), 436 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg1_r(),
431 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) | 437 gr_pd_ab_dist_cfg1_max_output_f(pd_ab_max_output) |
432 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch); 438 gr_pd_ab_dist_cfg1_max_batches_init_f(), patch);
433 } 439 }
@@ -447,17 +453,17 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
447 cbm_cfg_size_steadystate = gr->attrib_cb_default_size * 453 cbm_cfg_size_steadystate = gr->attrib_cb_default_size *
448 gr->pes_tpc_count[ppc_index][gpc_index]; 454 gr->pes_tpc_count[ppc_index][gpc_index];
449 455
450 gr_gk20a_ctx_patch_write(g, ch_ctx, 456 gr_gk20a_ctx_patch_write(g, gr_ctx,
451 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp + 457 gr_gpc0_ppc0_cbm_beta_cb_size_r() + temp +
452 ppc_in_gpc_stride * ppc_index, 458 ppc_in_gpc_stride * ppc_index,
453 cbm_cfg_size_beta, patch); 459 cbm_cfg_size_beta, patch);
454 460
455 gr_gk20a_ctx_patch_write(g, ch_ctx, 461 gr_gk20a_ctx_patch_write(g, gr_ctx,
456 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp + 462 gr_gpc0_ppc0_cbm_beta_cb_offset_r() + temp +
457 ppc_in_gpc_stride * ppc_index, 463 ppc_in_gpc_stride * ppc_index,
458 attrib_offset_in_chunk, patch); 464 attrib_offset_in_chunk, patch);
459 465
460 gr_gk20a_ctx_patch_write(g, ch_ctx, 466 gr_gk20a_ctx_patch_write(g, gr_ctx,
461 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp + 467 gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() + temp +
462 ppc_in_gpc_stride * ppc_index, 468 ppc_in_gpc_stride * ppc_index,
463 cbm_cfg_size_steadystate, 469 cbm_cfg_size_steadystate,
@@ -466,12 +472,12 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
466 attrib_offset_in_chunk += attrib_size_in_chunk * 472 attrib_offset_in_chunk += attrib_size_in_chunk *
467 gr->pes_tpc_count[ppc_index][gpc_index]; 473 gr->pes_tpc_count[ppc_index][gpc_index];
468 474
469 gr_gk20a_ctx_patch_write(g, ch_ctx, 475 gr_gk20a_ctx_patch_write(g, gr_ctx,
470 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp + 476 gr_gpc0_ppc0_cbm_alpha_cb_size_r() + temp +
471 ppc_in_gpc_stride * ppc_index, 477 ppc_in_gpc_stride * ppc_index,
472 cbm_cfg_size_alpha, patch); 478 cbm_cfg_size_alpha, patch);
473 479
474 gr_gk20a_ctx_patch_write(g, ch_ctx, 480 gr_gk20a_ctx_patch_write(g, gr_ctx,
475 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp + 481 gr_gpc0_ppc0_cbm_alpha_cb_offset_r() + temp +
476 ppc_in_gpc_stride * ppc_index, 482 ppc_in_gpc_stride * ppc_index,
477 alpha_offset_in_chunk, patch); 483 alpha_offset_in_chunk, patch);
@@ -479,7 +485,7 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
479 alpha_offset_in_chunk += gr->alpha_cb_size * 485 alpha_offset_in_chunk += gr->alpha_cb_size *
480 gr->pes_tpc_count[ppc_index][gpc_index]; 486 gr->pes_tpc_count[ppc_index][gpc_index];
481 487
482 gr_gk20a_ctx_patch_write(g, ch_ctx, 488 gr_gk20a_ctx_patch_write(g, gr_ctx,
483 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2), 489 gr_gpcs_swdx_tc_beta_cb_size_r(ppc_index + temp2),
484 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate), 490 gr_gpcs_swdx_tc_beta_cb_size_v_f(cbm_cfg_size_steadystate),
485 patch); 491 patch);
@@ -490,20 +496,20 @@ int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
490} 496}
491 497
492void gr_gp10b_commit_global_pagepool(struct gk20a *g, 498void gr_gp10b_commit_global_pagepool(struct gk20a *g,
493 struct channel_ctx_gk20a *ch_ctx, 499 struct nvgpu_gr_ctx *gr_ctx,
494 u64 addr, u32 size, bool patch) 500 u64 addr, u32 size, bool patch)
495{ 501{
496 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), 502 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(),
497 gr_scc_pagepool_base_addr_39_8_f(addr), patch); 503 gr_scc_pagepool_base_addr_39_8_f(addr), patch);
498 504
499 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), 505 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(),
500 gr_scc_pagepool_total_pages_f(size) | 506 gr_scc_pagepool_total_pages_f(size) |
501 gr_scc_pagepool_valid_true_f(), patch); 507 gr_scc_pagepool_valid_true_f(), patch);
502 508
503 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), 509 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(),
504 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); 510 gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch);
505 511
506 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), 512 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(),
507 gr_gpcs_gcc_pagepool_total_pages_f(size), patch); 513 gr_gpcs_gcc_pagepool_total_pages_f(size), patch);
508} 514}
509 515
@@ -947,7 +953,7 @@ fail_free:
947} 953}
948 954
949int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 955int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
950 struct gr_ctx_desc *gr_ctx, 956 struct nvgpu_gr_ctx *gr_ctx,
951 struct vm_gk20a *vm, u32 class, 957 struct vm_gk20a *vm, u32 class,
952 u32 graphics_preempt_mode, 958 u32 graphics_preempt_mode,
953 u32 compute_preempt_mode) 959 u32 compute_preempt_mode)
@@ -1071,7 +1077,7 @@ fail:
1071} 1077}
1072 1078
1073int gr_gp10b_alloc_gr_ctx(struct gk20a *g, 1079int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1074 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 1080 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
1075 u32 class, 1081 u32 class,
1076 u32 flags) 1082 u32 flags)
1077{ 1083{
@@ -1085,7 +1091,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1085 if (err) 1091 if (err)
1086 return err; 1092 return err;
1087 1093
1088 (*gr_ctx)->ctx_id_valid = false; 1094 gr_ctx->ctx_id_valid = false;
1089 1095
1090 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) 1096 if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP)
1091 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; 1097 graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
@@ -1094,7 +1100,7 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1094 1100
1095 if (graphics_preempt_mode || compute_preempt_mode) { 1101 if (graphics_preempt_mode || compute_preempt_mode) {
1096 if (g->ops.gr.set_ctxsw_preemption_mode) { 1102 if (g->ops.gr.set_ctxsw_preemption_mode) {
1097 err = g->ops.gr.set_ctxsw_preemption_mode(g, *gr_ctx, vm, 1103 err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm,
1098 class, graphics_preempt_mode, compute_preempt_mode); 1104 class, graphics_preempt_mode, compute_preempt_mode);
1099 if (err) { 1105 if (err) {
1100 nvgpu_err(g, "set_ctxsw_preemption_mode failed"); 1106 nvgpu_err(g, "set_ctxsw_preemption_mode failed");
@@ -1109,14 +1115,13 @@ int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
1109 return 0; 1115 return 0;
1110 1116
1111fail_free_gk20a_ctx: 1117fail_free_gk20a_ctx:
1112 gr_gk20a_free_gr_ctx(g, vm, *gr_ctx); 1118 gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
1113 *gr_ctx = NULL;
1114 1119
1115 return err; 1120 return err;
1116} 1121}
1117 1122
1118void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, 1123void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
1119 struct gr_ctx_desc *gr_ctx) 1124 struct nvgpu_gr_ctx *gr_ctx)
1120{ 1125{
1121 struct nvgpu_mem *mem = &gr_ctx->mem; 1126 struct nvgpu_mem *mem = &gr_ctx->mem;
1122 1127
@@ -1168,13 +1173,13 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
1168} 1173}
1169 1174
1170void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, 1175void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1171 struct channel_ctx_gk20a *ch_ctx, 1176 struct channel_gk20a *c,
1172 struct nvgpu_mem *mem) 1177 struct nvgpu_mem *mem)
1173{ 1178{
1174 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 1179 struct tsg_gk20a *tsg;
1175 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 1180 struct nvgpu_gr_ctx *gr_ctx;
1181 struct ctx_header_desc *ctx = &c->ctx_header;
1176 struct nvgpu_mem *ctxheader = &ctx->mem; 1182 struct nvgpu_mem *ctxheader = &ctx->mem;
1177
1178 u32 gfxp_preempt_option = 1183 u32 gfxp_preempt_option =
1179 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); 1184 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1180 u32 cilp_preempt_option = 1185 u32 cilp_preempt_option =
@@ -1185,6 +1190,12 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1185 1190
1186 gk20a_dbg_fn(""); 1191 gk20a_dbg_fn("");
1187 1192
1193 tsg = tsg_gk20a_from_ch(c);
1194 if (!tsg)
1195 return;
1196
1197 gr_ctx = &tsg->gr_ctx;
1198
1188 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 1199 if (gr_ctx->graphics_preempt_mode == NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1189 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); 1200 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
1190 nvgpu_mem_wr(g, mem, 1201 nvgpu_mem_wr(g, mem,
@@ -1220,7 +1231,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1220 gr_ctx->preempt_ctxsw_buffer.gpu_va); 1231 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1221 } 1232 }
1222 1233
1223 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 1234 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
1224 if (err) { 1235 if (err) {
1225 nvgpu_err(g, "can't map patch context"); 1236 nvgpu_err(g, "can't map patch context");
1226 goto out; 1237 goto out;
@@ -1232,7 +1243,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1232 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 1243 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1233 1244
1234 gk20a_dbg_info("attrib cb addr : 0x%016x", addr); 1245 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1235 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); 1246 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
1236 1247
1237 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> 1248 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1238 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 1249 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1243,7 +1254,7 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1243 if (size == g->ops.gr.pagepool_default_size(g)) 1254 if (size == g->ops.gr.pagepool_default_size(g))
1244 size = gr_scc_pagepool_total_pages_hwmax_v(); 1255 size = gr_scc_pagepool_total_pages_hwmax_v();
1245 1256
1246 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); 1257 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
1247 1258
1248 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> 1259 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1249 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | 1260 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1252,28 +1263,28 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1252 size = gr_ctx->spill_ctxsw_buffer.size / 1263 size = gr_ctx->spill_ctxsw_buffer.size /
1253 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); 1264 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1254 1265
1255 gr_gk20a_ctx_patch_write(g, ch_ctx, 1266 gr_gk20a_ctx_patch_write(g, gr_ctx,
1256 gr_gpc0_swdx_rm_spill_buffer_addr_r(), 1267 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1257 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), 1268 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1258 true); 1269 true);
1259 gr_gk20a_ctx_patch_write(g, ch_ctx, 1270 gr_gk20a_ctx_patch_write(g, gr_ctx,
1260 gr_gpc0_swdx_rm_spill_buffer_size_r(), 1271 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1261 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), 1272 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1262 true); 1273 true);
1263 1274
1264 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); 1275 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1265 gr_gk20a_ctx_patch_write(g, ch_ctx, 1276 gr_gk20a_ctx_patch_write(g, gr_ctx,
1266 gr_gpcs_swdx_beta_cb_ctrl_r(), 1277 gr_gpcs_swdx_beta_cb_ctrl_r(),
1267 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( 1278 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1268 cbes_reserve), 1279 cbes_reserve),
1269 true); 1280 true);
1270 gr_gk20a_ctx_patch_write(g, ch_ctx, 1281 gr_gk20a_ctx_patch_write(g, gr_ctx,
1271 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), 1282 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1272 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( 1283 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1273 cbes_reserve), 1284 cbes_reserve),
1274 true); 1285 true);
1275 1286
1276 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 1287 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
1277 } 1288 }
1278 1289
1279out: 1290out:
@@ -1478,10 +1489,9 @@ int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1478} 1489}
1479 1490
1480void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, 1491void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1481 struct channel_ctx_gk20a *ch_ctx, 1492 struct nvgpu_gr_ctx *gr_ctx,
1482 u64 addr, bool patch) 1493 u64 addr, bool patch)
1483{ 1494{
1484 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1485 int attrBufferSize; 1495 int attrBufferSize;
1486 1496
1487 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) 1497 if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1491,37 +1501,37 @@ void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
1491 1501
1492 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); 1502 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1493 1503
1494 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); 1504 gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
1495 1505
1496 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), 1506 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1497 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | 1507 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1498 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); 1508 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1499 1509
1500 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), 1510 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1501 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); 1511 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1502 1512
1503 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), 1513 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1504 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | 1514 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1505 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 1515 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1506} 1516}
1507 1517
1508void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, 1518void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1509 struct channel_ctx_gk20a *ch_ctx, 1519 struct nvgpu_gr_ctx *gr_ctx,
1510 u64 addr, u64 size, bool patch) 1520 u64 addr, u64 size, bool patch)
1511{ 1521{
1512 u32 data; 1522 u32 data;
1513 1523
1514 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_base_r(), 1524 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_base_r(),
1515 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch); 1525 gr_scc_bundle_cb_base_addr_39_8_f(addr), patch);
1516 1526
1517 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_bundle_cb_size_r(), 1527 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_bundle_cb_size_r(),
1518 gr_scc_bundle_cb_size_div_256b_f(size) | 1528 gr_scc_bundle_cb_size_div_256b_f(size) |
1519 gr_scc_bundle_cb_size_valid_true_f(), patch); 1529 gr_scc_bundle_cb_size_valid_true_f(), patch);
1520 1530
1521 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_base_r(), 1531 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_base_r(),
1522 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch); 1532 gr_gpcs_swdx_bundle_cb_base_addr_39_8_f(addr), patch);
1523 1533
1524 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_swdx_bundle_cb_size_r(), 1534 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_swdx_bundle_cb_size_r(),
1525 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) | 1535 gr_gpcs_swdx_bundle_cb_size_div_256b_f(size) |
1526 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch); 1536 gr_gpcs_swdx_bundle_cb_size_valid_true_f(), patch);
1527 1537
@@ -1535,7 +1545,7 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1535 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d", 1545 gk20a_dbg_info("bundle cb token limit : %d, state limit : %d",
1536 g->gr.bundle_cb_token_limit, data); 1546 g->gr.bundle_cb_token_limit, data);
1537 1547
1538 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg2_r(), 1548 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg2_r(),
1539 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) | 1549 gr_pd_ab_dist_cfg2_token_limit_f(g->gr.bundle_cb_token_limit) |
1540 gr_pd_ab_dist_cfg2_state_limit_f(data), patch); 1550 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1541} 1551}
@@ -1706,14 +1716,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
1706 struct channel_gk20a *fault_ch) 1716 struct channel_gk20a *fault_ch)
1707{ 1717{
1708 int ret; 1718 int ret;
1709 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx;
1710 struct tsg_gk20a *tsg; 1719 struct tsg_gk20a *tsg;
1720 struct nvgpu_gr_ctx *gr_ctx;
1711 1721
1712 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); 1722 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1713 1723
1714 if (!gr_ctx) 1724 tsg = tsg_gk20a_from_ch(fault_ch);
1725 if (!tsg)
1715 return -EINVAL; 1726 return -EINVAL;
1716 1727
1728 gr_ctx = &tsg->gr_ctx;
1729
1717 if (gr_ctx->cilp_preempt_pending) { 1730 if (gr_ctx->cilp_preempt_pending) {
1718 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, 1731 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
1719 "CILP is already pending for chid %d", 1732 "CILP is already pending for chid %d",
@@ -1783,13 +1796,17 @@ int gr_gp10b_set_cilp_preempt_pending(struct gk20a *g,
1783static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g, 1796static int gr_gp10b_clear_cilp_preempt_pending(struct gk20a *g,
1784 struct channel_gk20a *fault_ch) 1797 struct channel_gk20a *fault_ch)
1785{ 1798{
1786 struct gr_ctx_desc *gr_ctx = fault_ch->ch_ctx.gr_ctx; 1799 struct tsg_gk20a *tsg;
1800 struct nvgpu_gr_ctx *gr_ctx;
1787 1801
1788 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, ""); 1802 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, "");
1789 1803
1790 if (!gr_ctx) 1804 tsg = tsg_gk20a_from_ch(fault_ch);
1805 if (!tsg)
1791 return -EINVAL; 1806 return -EINVAL;
1792 1807
1808 gr_ctx = &tsg->gr_ctx;
1809
1793 /* The ucode is self-clearing, so all we need to do here is 1810 /* The ucode is self-clearing, so all we need to do here is
1794 to clear cilp_preempt_pending. */ 1811 to clear cilp_preempt_pending. */
1795 if (!gr_ctx->cilp_preempt_pending) { 1812 if (!gr_ctx->cilp_preempt_pending) {
@@ -1820,13 +1837,19 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1820 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 1837 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1821 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 1838 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1822 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc; 1839 u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
1840 struct tsg_gk20a *tsg;
1823 1841
1824 *early_exit = false; 1842 *early_exit = false;
1825 *ignore_debugger = false; 1843 *ignore_debugger = false;
1826 1844
1827 if (fault_ch) 1845 if (fault_ch) {
1828 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 1846 tsg = tsg_gk20a_from_ch(fault_ch);
1847 if (!tsg)
1848 return -EINVAL;
1849
1850 cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
1829 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 1851 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
1852 }
1830 1853
1831 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n", 1854 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "SM Exception received on gpc %d tpc %d = %u\n",
1832 gpc, tpc, global_esr); 1855 gpc, tpc, global_esr);
@@ -1911,8 +1934,9 @@ int gr_gp10b_pre_process_sm_exception(struct gk20a *g,
1911 1934
1912static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid) 1935static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1913{ 1936{
1914 struct gr_ctx_desc *gr_ctx; 1937 struct nvgpu_gr_ctx *gr_ctx;
1915 struct channel_gk20a *ch; 1938 struct channel_gk20a *ch;
1939 struct tsg_gk20a *tsg;
1916 int chid; 1940 int chid;
1917 int ret = -EINVAL; 1941 int ret = -EINVAL;
1918 1942
@@ -1922,7 +1946,11 @@ static int gr_gp10b_get_cilp_preempt_pending_chid(struct gk20a *g, int *__chid)
1922 if (!ch) 1946 if (!ch)
1923 return ret; 1947 return ret;
1924 1948
1925 gr_ctx = ch->ch_ctx.gr_ctx; 1949 tsg = tsg_gk20a_from_ch(ch);
1950 if (!tsg)
1951 return -EINVAL;
1952
1953 gr_ctx = &tsg->gr_ctx;
1926 1954
1927 if (gr_ctx->cilp_preempt_pending) { 1955 if (gr_ctx->cilp_preempt_pending) {
1928 *__chid = chid; 1956 *__chid = chid;
@@ -2022,11 +2050,17 @@ static bool gr_gp10b_suspend_context(struct channel_gk20a *ch,
2022 bool *cilp_preempt_pending) 2050 bool *cilp_preempt_pending)
2023{ 2051{
2024 struct gk20a *g = ch->g; 2052 struct gk20a *g = ch->g;
2025 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 2053 struct tsg_gk20a *tsg;
2026 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 2054 struct nvgpu_gr_ctx *gr_ctx;
2027 bool ctx_resident = false; 2055 bool ctx_resident = false;
2028 int err = 0; 2056 int err = 0;
2029 2057
2058 tsg = tsg_gk20a_from_ch(ch);
2059 if (!tsg)
2060 return -EINVAL;
2061
2062 gr_ctx = &tsg->gr_ctx;
2063
2030 *cilp_preempt_pending = false; 2064 *cilp_preempt_pending = false;
2031 2065
2032 if (gk20a_is_channel_ctx_resident(ch)) { 2066 if (gk20a_is_channel_ctx_resident(ch)) {
@@ -2097,15 +2131,22 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
2097 nvgpu_mutex_release(&g->dbg_sessions_lock); 2131 nvgpu_mutex_release(&g->dbg_sessions_lock);
2098 2132
2099 if (cilp_preempt_pending_ch) { 2133 if (cilp_preempt_pending_ch) {
2100 struct channel_ctx_gk20a *ch_ctx = 2134 struct tsg_gk20a *tsg;
2101 &cilp_preempt_pending_ch->ch_ctx; 2135 struct nvgpu_gr_ctx *gr_ctx;
2102 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
2103 struct nvgpu_timeout timeout; 2136 struct nvgpu_timeout timeout;
2104 2137
2105 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, 2138 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
2106 "CILP preempt pending, waiting %lu msecs for preemption", 2139 "CILP preempt pending, waiting %lu msecs for preemption",
2107 gk20a_get_gr_idle_timeout(g)); 2140 gk20a_get_gr_idle_timeout(g));
2108 2141
2142 tsg = tsg_gk20a_from_ch(cilp_preempt_pending_ch);
2143 if (!tsg) {
2144 err = -EINVAL;
2145 goto clean_up;
2146 }
2147
2148 gr_ctx = &tsg->gr_ctx;
2149
2109 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g), 2150 nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
2110 NVGPU_TIMER_CPU_TIMER); 2151 NVGPU_TIMER_CPU_TIMER);
2111 do { 2152 do {
@@ -2130,12 +2171,19 @@ clean_up:
2130int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, 2171int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
2131 bool boost) 2172 bool boost)
2132{ 2173{
2133 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 2174 struct tsg_gk20a *tsg;
2175 struct nvgpu_gr_ctx *gr_ctx;
2134 struct gk20a *g = ch->g; 2176 struct gk20a *g = ch->g;
2135 struct nvgpu_mem *mem = &gr_ctx->mem; 2177 struct nvgpu_mem *mem;
2136 int err = 0; 2178 int err = 0;
2137 2179
2180 tsg = tsg_gk20a_from_ch(ch);
2181 if (!tsg)
2182 return -EINVAL;
2183
2184 gr_ctx = &tsg->gr_ctx;
2138 gr_ctx->boosted_ctx = boost; 2185 gr_ctx->boosted_ctx = boost;
2186 mem = &gr_ctx->mem;
2139 2187
2140 if (nvgpu_mem_begin(g, mem)) 2188 if (nvgpu_mem_begin(g, mem))
2141 return -ENOMEM; 2189 return -ENOMEM;
@@ -2162,7 +2210,7 @@ unmap_ctx:
2162} 2210}
2163 2211
2164void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, 2212void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
2165 struct gr_ctx_desc *gr_ctx) { 2213 struct nvgpu_gr_ctx *gr_ctx) {
2166 u32 v; 2214 u32 v;
2167 2215
2168 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f( 2216 v = ctxsw_prog_main_image_pmu_options_boost_clock_frequencies_f(
@@ -2174,13 +2222,12 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2174 u32 graphics_preempt_mode, 2222 u32 graphics_preempt_mode,
2175 u32 compute_preempt_mode) 2223 u32 compute_preempt_mode)
2176{ 2224{
2177 struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; 2225 struct nvgpu_gr_ctx *gr_ctx;
2178 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
2179 struct gk20a *g = ch->g; 2226 struct gk20a *g = ch->g;
2180 struct tsg_gk20a *tsg; 2227 struct tsg_gk20a *tsg;
2181 struct vm_gk20a *vm; 2228 struct vm_gk20a *vm;
2182 struct nvgpu_mem *mem = &gr_ctx->mem; 2229 struct nvgpu_mem *mem;
2183 struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; 2230 struct ctx_header_desc *ctx = &ch->ctx_header;
2184 struct nvgpu_mem *ctxheader = &ctx->mem; 2231 struct nvgpu_mem *ctxheader = &ctx->mem;
2185 u32 class; 2232 u32 class;
2186 int err = 0; 2233 int err = 0;
@@ -2189,12 +2236,13 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2189 if (!class) 2236 if (!class)
2190 return -EINVAL; 2237 return -EINVAL;
2191 2238
2192 if (gk20a_is_channel_marked_as_tsg(ch)) { 2239 tsg = tsg_gk20a_from_ch(ch);
2193 tsg = &g->fifo.tsg[ch->tsgid]; 2240 if (!tsg)
2194 vm = tsg->vm; 2241 return -EINVAL;
2195 } else { 2242
2196 vm = ch->vm; 2243 vm = tsg->vm;
2197 } 2244 gr_ctx = &tsg->gr_ctx;
2245 mem = &gr_ctx->mem;
2198 2246
2199 /* skip setting anything if both modes are already set */ 2247 /* skip setting anything if both modes are already set */
2200 if (graphics_preempt_mode && 2248 if (graphics_preempt_mode &&
@@ -2241,15 +2289,15 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2241 2289
2242 if (g->ops.gr.update_ctxsw_preemption_mode) { 2290 if (g->ops.gr.update_ctxsw_preemption_mode) {
2243 g->ops.gr.update_ctxsw_preemption_mode(ch->g, 2291 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2244 ch_ctx, mem); 2292 ch, mem);
2245 2293
2246 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 2294 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
2247 if (err) { 2295 if (err) {
2248 nvgpu_err(g, "can't map patch context"); 2296 nvgpu_err(g, "can't map patch context");
2249 goto enable_ch; 2297 goto enable_ch;
2250 } 2298 }
2251 g->ops.gr.commit_global_cb_manager(g, ch, true); 2299 g->ops.gr.commit_global_cb_manager(g, ch, true);
2252 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 2300 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
2253 } 2301 }
2254 2302
2255enable_ch: 2303enable_ch:
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
index e3ef6304..8d553d37 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -29,9 +29,8 @@
29 29
30struct gk20a; 30struct gk20a;
31struct gr_gk20a_isr_data; 31struct gr_gk20a_isr_data;
32struct channel_ctx_gk20a; 32struct nvgpu_gr_ctx;
33struct zbc_entry; 33struct zbc_entry;
34struct gr_ctx_desc;
35struct nvgpu_preemption_modes_rec; 34struct nvgpu_preemption_modes_rec;
36struct gk20a_debug_output; 35struct gk20a_debug_output;
37 36
@@ -75,7 +74,7 @@ int gr_gp10b_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc,
75int gr_gp10b_commit_global_cb_manager(struct gk20a *g, 74int gr_gp10b_commit_global_cb_manager(struct gk20a *g,
76 struct channel_gk20a *c, bool patch); 75 struct channel_gk20a *c, bool patch);
77void gr_gp10b_commit_global_pagepool(struct gk20a *g, 76void gr_gp10b_commit_global_pagepool(struct gk20a *g,
78 struct channel_ctx_gk20a *ch_ctx, 77 struct nvgpu_gr_ctx *ch_ctx,
79 u64 addr, u32 size, bool patch); 78 u64 addr, u32 size, bool patch);
80u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g); 79u32 gr_gp10b_get_gpcs_swdx_dss_zbc_c_format_reg(struct gk20a *g);
81u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g); 80u32 gr_gp10b_get_gpcs_swdx_dss_zbc_z_format_reg(struct gk20a *g);
@@ -93,28 +92,28 @@ void gr_gp10b_set_alpha_circular_buffer_size(struct gk20a *g, u32 data);
93void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data); 92void gr_gp10b_set_circular_buffer_size(struct gk20a *g, u32 data);
94int gr_gp10b_init_ctx_state(struct gk20a *g); 93int gr_gp10b_init_ctx_state(struct gk20a *g);
95int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, 94int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
96 struct gr_ctx_desc *gr_ctx, 95 struct nvgpu_gr_ctx *gr_ctx,
97 struct vm_gk20a *vm, u32 class, 96 struct vm_gk20a *vm, u32 class,
98 u32 graphics_preempt_mode, 97 u32 graphics_preempt_mode,
99 u32 compute_preempt_mode); 98 u32 compute_preempt_mode);
100int gr_gp10b_alloc_gr_ctx(struct gk20a *g, 99int gr_gp10b_alloc_gr_ctx(struct gk20a *g,
101 struct gr_ctx_desc **gr_ctx, struct vm_gk20a *vm, 100 struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
102 u32 class, 101 u32 class,
103 u32 flags); 102 u32 flags);
104void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, 103void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
105 struct channel_ctx_gk20a *ch_ctx, 104 struct channel_gk20a *c,
106 struct nvgpu_mem *mem); 105 struct nvgpu_mem *mem);
107int gr_gp10b_dump_gr_status_regs(struct gk20a *g, 106int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
108 struct gk20a_debug_output *o); 107 struct gk20a_debug_output *o);
109void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm, 108void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
110 struct gr_ctx_desc *gr_ctx); 109 struct nvgpu_gr_ctx *gr_ctx);
111int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms, 110int gr_gp10b_wait_empty(struct gk20a *g, unsigned long duration_ms,
112 u32 expect_delay); 111 u32 expect_delay);
113void gr_gp10b_commit_global_attrib_cb(struct gk20a *g, 112void gr_gp10b_commit_global_attrib_cb(struct gk20a *g,
114 struct channel_ctx_gk20a *ch_ctx, 113 struct nvgpu_gr_ctx *ch_ctx,
115 u64 addr, bool patch); 114 u64 addr, bool patch);
116void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, 115void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
117 struct channel_ctx_gk20a *ch_ctx, 116 struct nvgpu_gr_ctx *ch_ctx,
118 u64 addr, u64 size, bool patch); 117 u64 addr, u64 size, bool patch);
119int gr_gp10b_load_smid_config(struct gk20a *g); 118int gr_gp10b_load_smid_config(struct gk20a *g);
120void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); 119void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
@@ -133,7 +132,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g,
133int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch, 132int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
134 bool boost); 133 bool boost);
135void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem, 134void gr_gp10b_update_boosted_ctx(struct gk20a *g, struct nvgpu_mem *mem,
136 struct gr_ctx_desc *gr_ctx); 135 struct nvgpu_gr_ctx *gr_ctx);
137int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, 136int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
138 u32 graphics_preempt_mode, 137 u32 graphics_preempt_mode,
139 u32 compute_preempt_mode); 138 u32 compute_preempt_mode);
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index aaee595d..7041c5bd 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -236,7 +236,6 @@ static const struct gpu_ops gp10b_ops = {
236 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 236 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
237 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, 237 .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask,
238 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 238 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
239 .free_channel_ctx = gk20a_free_channel_ctx,
240 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 239 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
241 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 240 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
242 .get_zcull_info = gr_gk20a_get_zcull_info, 241 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index b29a73d4..95d1f076 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -305,7 +305,6 @@ static const struct gpu_ops gv100_ops = {
305 .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, 305 .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode,
306 .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, 306 .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask,
307 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 307 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
308 .free_channel_ctx = gk20a_free_channel_ctx,
309 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 308 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
310 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 309 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
311 .get_zcull_info = gr_gk20a_get_zcull_info, 310 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
index d5924169..3030def8 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -1373,7 +1373,7 @@ fail_free:
1373} 1373}
1374 1374
1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, 1375int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
1376 struct gr_ctx_desc *gr_ctx, 1376 struct nvgpu_gr_ctx *gr_ctx,
1377 struct vm_gk20a *vm, u32 class, 1377 struct vm_gk20a *vm, u32 class,
1378 u32 graphics_preempt_mode, 1378 u32 graphics_preempt_mode,
1379 u32 compute_preempt_mode) 1379 u32 compute_preempt_mode)
@@ -1497,13 +1497,13 @@ fail:
1497} 1497}
1498 1498
1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, 1499void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1500 struct channel_ctx_gk20a *ch_ctx, 1500 struct channel_gk20a *c,
1501 struct nvgpu_mem *mem) 1501 struct nvgpu_mem *mem)
1502{ 1502{
1503 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx; 1503 struct tsg_gk20a *tsg;
1504 struct ctx_header_desc *ctx = &ch_ctx->ctx_header; 1504 struct nvgpu_gr_ctx *gr_ctx;
1505 struct ctx_header_desc *ctx = &c->ctx_header;
1505 struct nvgpu_mem *ctxheader = &ctx->mem; 1506 struct nvgpu_mem *ctxheader = &ctx->mem;
1506
1507 u32 gfxp_preempt_option = 1507 u32 gfxp_preempt_option =
1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f(); 1508 ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
1509 u32 cilp_preempt_option = 1509 u32 cilp_preempt_option =
@@ -1514,6 +1514,12 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1514 1514
1515 gk20a_dbg_fn(""); 1515 gk20a_dbg_fn("");
1516 1516
1517 tsg = tsg_gk20a_from_ch(c);
1518 if (!tsg)
1519 return;
1520
1521 gr_ctx = &tsg->gr_ctx;
1522
1517 if (gr_ctx->graphics_preempt_mode == 1523 if (gr_ctx->graphics_preempt_mode ==
1518 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) { 1524 NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP) {
1519 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option); 1525 gk20a_dbg_info("GfxP: %x", gfxp_preempt_option);
@@ -1552,7 +1558,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1552 gr_ctx->preempt_ctxsw_buffer.gpu_va); 1558 gr_ctx->preempt_ctxsw_buffer.gpu_va);
1553 } 1559 }
1554 1560
1555 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, true); 1561 err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, true);
1556 if (err) { 1562 if (err) {
1557 nvgpu_err(g, "can't map patch context"); 1563 nvgpu_err(g, "can't map patch context");
1558 goto out; 1564 goto out;
@@ -1564,7 +1570,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1564 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); 1570 (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()));
1565 1571
1566 gk20a_dbg_info("attrib cb addr : 0x%016x", addr); 1572 gk20a_dbg_info("attrib cb addr : 0x%016x", addr);
1567 g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, true); 1573 g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, true);
1568 1574
1569 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >> 1575 addr = (u64_lo32(gr_ctx->pagepool_ctxsw_buffer.gpu_va) >>
1570 gr_scc_pagepool_base_addr_39_8_align_bits_v()) | 1576 gr_scc_pagepool_base_addr_39_8_align_bits_v()) |
@@ -1575,7 +1581,7 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1575 if (size == g->ops.gr.pagepool_default_size(g)) 1581 if (size == g->ops.gr.pagepool_default_size(g))
1576 size = gr_scc_pagepool_total_pages_hwmax_v(); 1582 size = gr_scc_pagepool_total_pages_hwmax_v();
1577 1583
1578 g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, true); 1584 g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, true);
1579 1585
1580 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >> 1586 addr = (u64_lo32(gr_ctx->spill_ctxsw_buffer.gpu_va) >>
1581 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) | 1587 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_align_bits_v()) |
@@ -1584,28 +1590,28 @@ void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
1584 size = gr_ctx->spill_ctxsw_buffer.size / 1590 size = gr_ctx->spill_ctxsw_buffer.size /
1585 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v(); 1591 gr_gpc0_swdx_rm_spill_buffer_size_256b_byte_granularity_v();
1586 1592
1587 gr_gk20a_ctx_patch_write(g, ch_ctx, 1593 gr_gk20a_ctx_patch_write(g, gr_ctx,
1588 gr_gpc0_swdx_rm_spill_buffer_addr_r(), 1594 gr_gpc0_swdx_rm_spill_buffer_addr_r(),
1589 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr), 1595 gr_gpc0_swdx_rm_spill_buffer_addr_39_8_f(addr),
1590 true); 1596 true);
1591 gr_gk20a_ctx_patch_write(g, ch_ctx, 1597 gr_gk20a_ctx_patch_write(g, gr_ctx,
1592 gr_gpc0_swdx_rm_spill_buffer_size_r(), 1598 gr_gpc0_swdx_rm_spill_buffer_size_r(),
1593 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size), 1599 gr_gpc0_swdx_rm_spill_buffer_size_256b_f(size),
1594 true); 1600 true);
1595 1601
1596 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v(); 1602 cbes_reserve = gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_gfxp_v();
1597 gr_gk20a_ctx_patch_write(g, ch_ctx, 1603 gr_gk20a_ctx_patch_write(g, gr_ctx,
1598 gr_gpcs_swdx_beta_cb_ctrl_r(), 1604 gr_gpcs_swdx_beta_cb_ctrl_r(),
1599 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f( 1605 gr_gpcs_swdx_beta_cb_ctrl_cbes_reserve_f(
1600 cbes_reserve), 1606 cbes_reserve),
1601 true); 1607 true);
1602 gr_gk20a_ctx_patch_write(g, ch_ctx, 1608 gr_gk20a_ctx_patch_write(g, gr_ctx,
1603 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(), 1609 gr_gpcs_ppcs_cbm_beta_cb_ctrl_r(),
1604 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f( 1610 gr_gpcs_ppcs_cbm_beta_cb_ctrl_cbes_reserve_f(
1605 cbes_reserve), 1611 cbes_reserve),
1606 true); 1612 true);
1607 1613
1608 gr_gk20a_ctx_patch_write_end(g, ch_ctx, true); 1614 gr_gk20a_ctx_patch_write_end(g, gr_ctx, true);
1609 } 1615 }
1610 1616
1611out: 1617out:
@@ -1902,10 +1908,9 @@ int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
1902} 1908}
1903 1909
1904void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, 1910void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1905 struct channel_ctx_gk20a *ch_ctx, 1911 struct nvgpu_gr_ctx *gr_ctx,
1906 u64 addr, bool patch) 1912 u64 addr, bool patch)
1907{ 1913{
1908 struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
1909 int attrBufferSize; 1914 int attrBufferSize;
1910 1915
1911 if (gr_ctx->preempt_ctxsw_buffer.gpu_va) 1916 if (gr_ctx->preempt_ctxsw_buffer.gpu_va)
@@ -1915,16 +1920,16 @@ void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
1915 1920
1916 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f(); 1921 attrBufferSize /= gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_granularity_f();
1917 1922
1918 gr_gm20b_commit_global_attrib_cb(g, ch_ctx, addr, patch); 1923 gr_gm20b_commit_global_attrib_cb(g, gr_ctx, addr, patch);
1919 1924
1920 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(), 1925 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_r(),
1921 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) | 1926 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_v_f(addr) |
1922 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch); 1927 gr_gpcs_tpcs_mpc_vtg_cb_global_base_addr_valid_true_f(), patch);
1923 1928
1924 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(), 1929 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_0_r(),
1925 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch); 1930 gr_gpcs_tpcs_tex_rm_cb_0_base_addr_43_12_f(addr), patch);
1926 1931
1927 gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(), 1932 gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_tex_rm_cb_1_r(),
1928 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) | 1933 gr_gpcs_tpcs_tex_rm_cb_1_size_div_128b_f(attrBufferSize) |
1929 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch); 1934 gr_gpcs_tpcs_tex_rm_cb_1_valid_true_f(), patch);
1930} 1935}
@@ -2042,6 +2047,7 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2042 u32 offset = gk20a_gr_gpc_offset(g, gpc) + 2047 u32 offset = gk20a_gr_gpc_offset(g, gpc) +
2043 gk20a_gr_tpc_offset(g, tpc) + 2048 gk20a_gr_tpc_offset(g, tpc) +
2044 gv11b_gr_sm_offset(g, sm); 2049 gv11b_gr_sm_offset(g, sm);
2050 struct tsg_gk20a *tsg;
2045 2051
2046 *early_exit = false; 2052 *early_exit = false;
2047 *ignore_debugger = false; 2053 *ignore_debugger = false;
@@ -2054,9 +2060,14 @@ int gr_gv11b_pre_process_sm_exception(struct gk20a *g,
2054 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm, 2060 return gr_gv11b_handle_warp_esr_error_mmu_nack(g, gpc, tpc, sm,
2055 warp_esr, fault_ch); 2061 warp_esr, fault_ch);
2056 2062
2057 if (fault_ch) 2063 if (fault_ch) {
2058 cilp_enabled = (fault_ch->ch_ctx.gr_ctx->compute_preempt_mode == 2064 tsg = tsg_gk20a_from_ch(fault_ch);
2065 if (!tsg)
2066 return -EINVAL;
2067
2068 cilp_enabled = (tsg->gr_ctx.compute_preempt_mode ==
2059 NVGPU_PREEMPTION_MODE_COMPUTE_CILP); 2069 NVGPU_PREEMPTION_MODE_COMPUTE_CILP);
2070 }
2060 2071
2061 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, 2072 gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg,
2062 "SM Exception received on gpc %d tpc %d sm %d = 0x%08x", 2073 "SM Exception received on gpc %d tpc %d sm %d = 0x%08x",
@@ -2509,7 +2520,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
2509 if (err) 2520 if (err)
2510 return err; 2521 return err;
2511 2522
2512 ctx = &c->ch_ctx.ctx_header; 2523 ctx = &c->ctx_header;
2513 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v(); 2524 addr_lo = u64_lo32(ctx->mem.gpu_va) >> ram_in_base_shift_v();
2514 addr_hi = u64_hi32(ctx->mem.gpu_va); 2525 addr_hi = u64_hi32(ctx->mem.gpu_va);
2515 2526
@@ -2529,7 +2540,7 @@ int gr_gv11b_commit_inst(struct channel_gk20a *c, u64 gpu_va)
2529 2540
2530int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) 2541int gr_gv11b_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c)
2531{ 2542{
2532 struct channel_ctx_gk20a *ch_ctx = NULL; 2543 struct nvgpu_gr_ctx *ch_ctx = NULL;
2533 u32 pd_ab_dist_cfg0; 2544 u32 pd_ab_dist_cfg0;
2534 u32 ds_debug; 2545 u32 ds_debug;
2535 u32 mpc_vtg_debug; 2546 u32 mpc_vtg_debug;
@@ -2836,11 +2847,18 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
2836 struct channel_gk20a *ch, u32 sm_id, 2847 struct channel_gk20a *ch, u32 sm_id,
2837 struct nvgpu_gr_sm_error_state *sm_error_state) 2848 struct nvgpu_gr_sm_error_state *sm_error_state)
2838{ 2849{
2850 struct tsg_gk20a *tsg;
2839 u32 gpc, tpc, sm, offset; 2851 u32 gpc, tpc, sm, offset;
2840 struct gr_gk20a *gr = &g->gr; 2852 struct gr_gk20a *gr = &g->gr;
2841 struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; 2853 struct nvgpu_gr_ctx *ch_ctx;
2842 int err = 0; 2854 int err = 0;
2843 2855
2856 tsg = tsg_gk20a_from_ch(ch);
2857 if (!tsg)
2858 return -EINVAL;
2859
2860 ch_ctx = &tsg->gr_ctx;
2861
2844 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 2862 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
2845 2863
2846 gr->sm_error_states[sm_id].hww_global_esr = 2864 gr->sm_error_states[sm_id].hww_global_esr =
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
index b69e69bd..022a7698 100644
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -41,9 +41,10 @@ struct zbc_s_table {
41}; 41};
42 42
43struct gk20a; 43struct gk20a;
44struct gr_gk20a;
44struct zbc_entry; 45struct zbc_entry;
45struct zbc_query_params; 46struct zbc_query_params;
46struct channel_ctx_gk20a; 47struct nvgpu_gr_ctx;
47struct nvgpu_warpstate; 48struct nvgpu_warpstate;
48struct nvgpu_gr_sm_error_state; 49struct nvgpu_gr_sm_error_state;
49struct gr_ctx_desc; 50struct gr_ctx_desc;
@@ -128,7 +129,7 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
128int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms, 129int gr_gv11b_wait_empty(struct gk20a *g, unsigned long duration_ms,
129 u32 expect_delay); 130 u32 expect_delay);
130void gr_gv11b_commit_global_attrib_cb(struct gk20a *g, 131void gr_gv11b_commit_global_attrib_cb(struct gk20a *g,
131 struct channel_ctx_gk20a *ch_ctx, 132 struct nvgpu_gr_ctx *ch_ctx,
132 u64 addr, bool patch); 133 u64 addr, bool patch);
133void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); 134void gr_gv11b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
134void gr_gv11b_get_access_map(struct gk20a *g, 135void gr_gv11b_get_access_map(struct gk20a *g,
@@ -222,13 +223,13 @@ unsigned long gr_gv11b_get_max_gfxp_wfi_timeout_count(struct gk20a *g);
222void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g); 223void gr_gv11b_ecc_init_scrub_reg(struct gk20a *g);
223 224
224int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g, 225int gr_gv11b_set_ctxsw_preemption_mode(struct gk20a *g,
225 struct gr_ctx_desc *gr_ctx, 226 struct nvgpu_gr_ctx *gr_ctx,
226 struct vm_gk20a *vm, u32 class, 227 struct vm_gk20a *vm, u32 class,
227 u32 graphics_preempt_mode, 228 u32 graphics_preempt_mode,
228 u32 compute_preempt_mode); 229 u32 compute_preempt_mode);
229 230
230void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g, 231void gr_gv11b_update_ctxsw_preemption_mode(struct gk20a *g,
231 struct channel_ctx_gk20a *ch_ctx, 232 struct channel_gk20a *ch_ctx,
232 struct nvgpu_mem *mem); 233 struct nvgpu_mem *mem);
233 234
234#endif 235#endif
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index aa3d52af..0a552f5b 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -272,7 +272,6 @@ static const struct gpu_ops gv11b_ops = {
272 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, 272 .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode,
273 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, 273 .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask,
274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask, 274 .get_gpc_tpc_mask = gr_gm20b_get_gpc_tpc_mask,
275 .free_channel_ctx = gk20a_free_channel_ctx,
276 .alloc_obj_ctx = gk20a_alloc_obj_ctx, 275 .alloc_obj_ctx = gk20a_alloc_obj_ctx,
277 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull, 276 .bind_ctxsw_zcull = gr_gk20a_bind_ctxsw_zcull,
278 .get_zcull_info = gr_gk20a_get_zcull_info, 277 .get_zcull_info = gr_gk20a_get_zcull_info,
diff --git a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
index fe1aa8a5..607fff91 100644
--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -43,7 +43,7 @@ static void gv11b_subctx_commit_pdb(struct channel_gk20a *c,
43 43
44void gv11b_free_subctx_header(struct channel_gk20a *c) 44void gv11b_free_subctx_header(struct channel_gk20a *c)
45{ 45{
46 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 46 struct ctx_header_desc *ctx = &c->ctx_header;
47 struct gk20a *g = c->g; 47 struct gk20a *g = c->g;
48 48
49 nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header"); 49 nvgpu_log(g, gpu_dbg_fn, "gv11b_free_subctx_header");
@@ -57,13 +57,13 @@ void gv11b_free_subctx_header(struct channel_gk20a *c)
57 57
58int gv11b_alloc_subctx_header(struct channel_gk20a *c) 58int gv11b_alloc_subctx_header(struct channel_gk20a *c)
59{ 59{
60 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 60 struct ctx_header_desc *ctx = &c->ctx_header;
61 struct gk20a *g = c->g; 61 struct gk20a *g = c->g;
62 int ret = 0; 62 int ret = 0;
63 63
64 nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header"); 64 nvgpu_log(g, gpu_dbg_fn, "gv11b_alloc_subctx_header");
65 65
66 if (ctx->mem.gpu_va == 0) { 66 if (!nvgpu_mem_is_valid(&ctx->mem)) {
67 ret = nvgpu_dma_alloc_flags_sys(g, 67 ret = nvgpu_dma_alloc_flags_sys(g,
68 0, /* No Special flags */ 68 0, /* No Special flags */
69 ctxsw_prog_fecs_header_v(), 69 ctxsw_prog_fecs_header_v(),
@@ -111,20 +111,50 @@ static void gv11b_init_subcontext_pdb(struct channel_gk20a *c,
111 111
112int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va) 112int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
113{ 113{
114 struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; 114 struct ctx_header_desc *ctx = &c->ctx_header;
115 struct nvgpu_mem *gr_mem; 115 struct nvgpu_mem *gr_mem;
116 struct gk20a *g = c->g; 116 struct gk20a *g = c->g;
117 int ret = 0; 117 int ret = 0;
118 u32 addr_lo, addr_hi; 118 u32 addr_lo, addr_hi;
119 struct tsg_gk20a *tsg;
120 struct nvgpu_gr_ctx *gr_ctx;
119 121
120 addr_lo = u64_lo32(gpu_va); 122 tsg = tsg_gk20a_from_ch(c);
121 addr_hi = u64_hi32(gpu_va); 123 if (!tsg)
124 return -EINVAL;
125
126 gr_ctx = &tsg->gr_ctx;
122 127
123 gr_mem = &ctx->mem; 128 gr_mem = &ctx->mem;
124 g->ops.mm.l2_flush(g, true); 129 g->ops.mm.l2_flush(g, true);
125 if (nvgpu_mem_begin(g, gr_mem)) 130 if (nvgpu_mem_begin(g, gr_mem))
126 return -ENOMEM; 131 return -ENOMEM;
127 132
133 /* set priv access map */
134 addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
135 addr_hi = u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
136 nvgpu_mem_wr(g, gr_mem,
137 ctxsw_prog_main_image_priv_access_map_addr_lo_o(),
138 addr_lo);
139 nvgpu_mem_wr(g, gr_mem,
140 ctxsw_prog_main_image_priv_access_map_addr_hi_o(),
141 addr_hi);
142
143 addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va);
144 addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va);
145 nvgpu_mem_wr(g, gr_mem,
146 ctxsw_prog_main_image_patch_adr_lo_o(),
147 addr_lo);
148 nvgpu_mem_wr(g, gr_mem,
149 ctxsw_prog_main_image_patch_adr_hi_o(),
150 addr_hi);
151
152 g->ops.gr.write_pm_ptr(g, gr_mem, gr_ctx->pm_ctx.mem.gpu_va);
153 g->ops.gr.write_zcull_ptr(g, gr_mem, gr_ctx->zcull_ctx.gpu_va);
154
155 addr_lo = u64_lo32(gpu_va);
156 addr_hi = u64_hi32(gpu_va);
157
128 nvgpu_mem_wr(g, gr_mem, 158 nvgpu_mem_wr(g, gr_mem,
129 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi); 159 ctxsw_prog_main_image_context_buffer_ptr_hi_o(), addr_hi);
130 nvgpu_mem_wr(g, gr_mem, 160 nvgpu_mem_wr(g, gr_mem,