diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-12-15 12:04:15 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-01-17 15:29:09 -0500 |
commit | 2f6698b863c9cc1db6455637b7c72e812b470b93 (patch) | |
tree | d0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/common/linux | |
parent | 6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff) |
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine
channel_ctx_gk20a and gr_ctx_desc to one structure, because the split
between them was arbitrary. Move context header to be property of
channel.
Bug 1842197
Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1639532
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
11 files changed, 195 insertions, 217 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c index ad157ee7..aeab0c92 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c +++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c | |||
@@ -91,8 +91,8 @@ static int gk20a_fifo_sched_debugfs_seq_show( | |||
91 | tsg->timeslice_us, | 91 | tsg->timeslice_us, |
92 | ch->timeout_ms_max, | 92 | ch->timeout_ms_max, |
93 | tsg->interleave_level, | 93 | tsg->interleave_level, |
94 | ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->graphics_preempt_mode : U32_MAX, | 94 | tsg->gr_ctx.graphics_preempt_mode, |
95 | ch->ch_ctx.gr_ctx ? ch->ch_ctx.gr_ctx->compute_preempt_mode : U32_MAX); | 95 | tsg->gr_ctx.compute_preempt_mode); |
96 | gk20a_channel_put(ch); | 96 | gk20a_channel_put(ch); |
97 | } | 97 | } |
98 | return 0; | 98 | return 0; |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index 94501a89..e8f4c14b 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c | |||
@@ -85,10 +85,10 @@ static void gk20a_channel_trace_sched_param( | |||
85 | tsg_gk20a_from_ch(ch)->timeslice_us, | 85 | tsg_gk20a_from_ch(ch)->timeslice_us, |
86 | ch->timeout_ms_max, | 86 | ch->timeout_ms_max, |
87 | gk20a_fifo_interleave_level_name(tsg->interleave_level), | 87 | gk20a_fifo_interleave_level_name(tsg->interleave_level), |
88 | gr_gk20a_graphics_preempt_mode_name(ch->ch_ctx.gr_ctx ? | 88 | gr_gk20a_graphics_preempt_mode_name( |
89 | ch->ch_ctx.gr_ctx->graphics_preempt_mode : 0), | 89 | tsg->gr_ctx.graphics_preempt_mode), |
90 | gr_gk20a_compute_preempt_mode_name(ch->ch_ctx.gr_ctx ? | 90 | gr_gk20a_compute_preempt_mode_name( |
91 | ch->ch_ctx.gr_ctx->compute_preempt_mode : 0)); | 91 | tsg->gr_ctx.compute_preempt_mode)); |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* |
diff --git a/drivers/gpu/nvgpu/common/linux/sched.c b/drivers/gpu/nvgpu/common/linux/sched.c index fc3f6ed8..e6211790 100644 --- a/drivers/gpu/nvgpu/common/linux/sched.c +++ b/drivers/gpu/nvgpu/common/linux/sched.c | |||
@@ -198,15 +198,10 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, | |||
198 | arg->runlist_interleave = tsg->interleave_level; | 198 | arg->runlist_interleave = tsg->interleave_level; |
199 | arg->timeslice = tsg->timeslice_us; | 199 | arg->timeslice = tsg->timeslice_us; |
200 | 200 | ||
201 | if (tsg->tsg_gr_ctx) { | 201 | arg->graphics_preempt_mode = |
202 | arg->graphics_preempt_mode = | 202 | tsg->gr_ctx.graphics_preempt_mode; |
203 | tsg->tsg_gr_ctx->graphics_preempt_mode; | 203 | arg->compute_preempt_mode = |
204 | arg->compute_preempt_mode = | 204 | tsg->gr_ctx.compute_preempt_mode; |
205 | tsg->tsg_gr_ctx->compute_preempt_mode; | ||
206 | } else { | ||
207 | arg->graphics_preempt_mode = 0; | ||
208 | arg->compute_preempt_mode = 0; | ||
209 | } | ||
210 | 205 | ||
211 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); | 206 | nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); |
212 | 207 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c index ed61f16b..9adf20d1 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.c | |||
@@ -27,12 +27,11 @@ | |||
27 | #include <nvgpu/hw/gp10b/hw_gr_gp10b.h> | 27 | #include <nvgpu/hw/gp10b/hw_gr_gp10b.h> |
28 | 28 | ||
29 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 29 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
30 | struct gr_ctx_desc **__gr_ctx, | 30 | struct nvgpu_gr_ctx *gr_ctx, |
31 | struct vm_gk20a *vm, | 31 | struct vm_gk20a *vm, |
32 | u32 class, | 32 | u32 class, |
33 | u32 flags) | 33 | u32 flags) |
34 | { | 34 | { |
35 | struct gr_ctx_desc *gr_ctx; | ||
36 | u32 graphics_preempt_mode = 0; | 35 | u32 graphics_preempt_mode = 0; |
37 | u32 compute_preempt_mode = 0; | 36 | u32 compute_preempt_mode = 0; |
38 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); | 37 | struct vgpu_priv_data *priv = vgpu_get_priv_data(g); |
@@ -40,12 +39,10 @@ int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | |||
40 | 39 | ||
41 | gk20a_dbg_fn(""); | 40 | gk20a_dbg_fn(""); |
42 | 41 | ||
43 | err = vgpu_gr_alloc_gr_ctx(g, __gr_ctx, vm, class, flags); | 42 | err = vgpu_gr_alloc_gr_ctx(g, gr_ctx, vm, class, flags); |
44 | if (err) | 43 | if (err) |
45 | return err; | 44 | return err; |
46 | 45 | ||
47 | gr_ctx = *__gr_ctx; | ||
48 | |||
49 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) | 46 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) |
50 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; | 47 | graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP; |
51 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) | 48 | if (flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) |
@@ -84,7 +81,7 @@ fail: | |||
84 | } | 81 | } |
85 | 82 | ||
86 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 83 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
87 | struct gr_ctx_desc *gr_ctx, | 84 | struct nvgpu_gr_ctx *gr_ctx, |
88 | struct vm_gk20a *vm, u32 class, | 85 | struct vm_gk20a *vm, u32 class, |
89 | u32 graphics_preempt_mode, | 86 | u32 graphics_preempt_mode, |
90 | u32 compute_preempt_mode) | 87 | u32 compute_preempt_mode) |
@@ -240,7 +237,7 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
240 | u32 graphics_preempt_mode, | 237 | u32 graphics_preempt_mode, |
241 | u32 compute_preempt_mode) | 238 | u32 compute_preempt_mode) |
242 | { | 239 | { |
243 | struct gr_ctx_desc *gr_ctx = ch->ch_ctx.gr_ctx; | 240 | struct nvgpu_gr_ctx *gr_ctx; |
244 | struct gk20a *g = ch->g; | 241 | struct gk20a *g = ch->g; |
245 | struct tsg_gk20a *tsg; | 242 | struct tsg_gk20a *tsg; |
246 | struct vm_gk20a *vm; | 243 | struct vm_gk20a *vm; |
@@ -251,6 +248,13 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
251 | if (!class) | 248 | if (!class) |
252 | return -EINVAL; | 249 | return -EINVAL; |
253 | 250 | ||
251 | tsg = tsg_gk20a_from_ch(ch); | ||
252 | if (!tsg) | ||
253 | return -EINVAL; | ||
254 | |||
255 | vm = tsg->vm; | ||
256 | gr_ctx = &tsg->gr_ctx; | ||
257 | |||
254 | /* skip setting anything if both modes are already set */ | 258 | /* skip setting anything if both modes are already set */ |
255 | if (graphics_preempt_mode && | 259 | if (graphics_preempt_mode && |
256 | (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) | 260 | (graphics_preempt_mode == gr_ctx->graphics_preempt_mode)) |
@@ -263,13 +267,6 @@ int vgpu_gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
263 | if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) | 267 | if (graphics_preempt_mode == 0 && compute_preempt_mode == 0) |
264 | return 0; | 268 | return 0; |
265 | 269 | ||
266 | if (gk20a_is_channel_marked_as_tsg(ch)) { | ||
267 | tsg = &g->fifo.tsg[ch->tsgid]; | ||
268 | vm = tsg->vm; | ||
269 | } else { | ||
270 | vm = ch->vm; | ||
271 | } | ||
272 | |||
273 | if (g->ops.gr.set_ctxsw_preemption_mode) { | 270 | if (g->ops.gr.set_ctxsw_preemption_mode) { |
274 | err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, | 271 | err = g->ops.gr.set_ctxsw_preemption_mode(g, gr_ctx, vm, class, |
275 | graphics_preempt_mode, | 272 | graphics_preempt_mode, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h index 31b88d19..559bd227 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_gr_gp10b.h | |||
@@ -20,12 +20,12 @@ | |||
20 | #include "gk20a/gk20a.h" | 20 | #include "gk20a/gk20a.h" |
21 | 21 | ||
22 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, | 22 | int vgpu_gr_gp10b_alloc_gr_ctx(struct gk20a *g, |
23 | struct gr_ctx_desc **__gr_ctx, | 23 | struct nvgpu_gr_ctx *gr_ctx, |
24 | struct vm_gk20a *vm, | 24 | struct vm_gk20a *vm, |
25 | u32 class, | 25 | u32 class, |
26 | u32 flags); | 26 | u32 flags); |
27 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | 27 | int vgpu_gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, |
28 | struct gr_ctx_desc *gr_ctx, | 28 | struct nvgpu_gr_ctx *gr_ctx, |
29 | struct vm_gk20a *vm, u32 class, | 29 | struct vm_gk20a *vm, u32 class, |
30 | u32 graphics_preempt_mode, | 30 | u32 graphics_preempt_mode, |
31 | u32 compute_preempt_mode); | 31 | u32 compute_preempt_mode); |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c index e8cb96b4..d5fd5102 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gp10b/vgpu_hal_gp10b.c | |||
@@ -112,7 +112,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { | |||
112 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 112 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
113 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, | 113 | .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, |
114 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, | 114 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, |
115 | .free_channel_ctx = vgpu_gr_free_channel_ctx, | ||
116 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, | 115 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, |
117 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, | 116 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, |
118 | .get_zcull_info = vgpu_gr_get_zcull_info, | 117 | .get_zcull_info = vgpu_gr_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c index e8790587..8f1c5d78 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.c | |||
@@ -20,14 +20,18 @@ | |||
20 | 20 | ||
21 | #include <nvgpu/kmem.h> | 21 | #include <nvgpu/kmem.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/dma.h> | ||
23 | #include <nvgpu/error_notifier.h> | 24 | #include <nvgpu/error_notifier.h> |
24 | #include <nvgpu/dma.h> | 25 | #include <nvgpu/dma.h> |
25 | 26 | ||
26 | #include "vgpu.h" | 27 | #include "vgpu.h" |
27 | #include "gr_vgpu.h" | 28 | #include "gr_vgpu.h" |
28 | #include "gk20a/dbg_gpu_gk20a.h" | 29 | #include "gk20a/dbg_gpu_gk20a.h" |
30 | #include "gk20a/channel_gk20a.h" | ||
31 | #include "gk20a/tsg_gk20a.h" | ||
29 | 32 | ||
30 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> | 33 | #include <nvgpu/hw/gk20a/hw_gr_gk20a.h> |
34 | #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> | ||
31 | 35 | ||
32 | void vgpu_gr_detect_sm_arch(struct gk20a *g) | 36 | void vgpu_gr_detect_sm_arch(struct gk20a *g) |
33 | { | 37 | { |
@@ -152,8 +156,9 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
152 | struct tegra_vgpu_cmd_msg msg; | 156 | struct tegra_vgpu_cmd_msg msg; |
153 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 157 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
154 | struct vm_gk20a *ch_vm = c->vm; | 158 | struct vm_gk20a *ch_vm = c->vm; |
155 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 159 | struct tsg_gk20a *tsg; |
156 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 160 | u64 *g_bfr_va; |
161 | u64 *g_bfr_size; | ||
157 | struct gr_gk20a *gr = &g->gr; | 162 | struct gr_gk20a *gr = &g->gr; |
158 | u64 gpu_va; | 163 | u64 gpu_va; |
159 | u32 i; | 164 | u32 i; |
@@ -161,7 +166,12 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
161 | 166 | ||
162 | gk20a_dbg_fn(""); | 167 | gk20a_dbg_fn(""); |
163 | 168 | ||
164 | /* FIXME: add VPR support */ | 169 | tsg = tsg_gk20a_from_ch(c); |
170 | if (!tsg) | ||
171 | return -EINVAL; | ||
172 | |||
173 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | ||
174 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | ||
165 | 175 | ||
166 | /* Circular Buffer */ | 176 | /* Circular Buffer */ |
167 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 177 | gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
@@ -213,7 +223,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
213 | if (err || msg.ret) | 223 | if (err || msg.ret) |
214 | goto clean_up; | 224 | goto clean_up; |
215 | 225 | ||
216 | c->ch_ctx.global_ctx_buffer_mapped = true; | 226 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
217 | return 0; | 227 | return 0; |
218 | 228 | ||
219 | clean_up: | 229 | clean_up: |
@@ -227,40 +237,33 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, | |||
227 | return -ENOMEM; | 237 | return -ENOMEM; |
228 | } | 238 | } |
229 | 239 | ||
230 | static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) | 240 | static void vgpu_gr_unmap_global_ctx_buffers(struct tsg_gk20a *tsg) |
231 | { | 241 | { |
232 | struct vm_gk20a *ch_vm = c->vm; | 242 | struct vm_gk20a *ch_vm = tsg->vm; |
233 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 243 | u64 *g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; |
234 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 244 | u64 *g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; |
235 | u32 i; | 245 | u32 i; |
236 | 246 | ||
237 | gk20a_dbg_fn(""); | 247 | gk20a_dbg_fn(""); |
238 | 248 | ||
239 | if (c->ch_ctx.global_ctx_buffer_mapped) { | 249 | if (tsg->gr_ctx.global_ctx_buffer_mapped) { |
240 | struct tegra_vgpu_cmd_msg msg; | 250 | /* server will unmap on channel close */ |
241 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
242 | int err; | ||
243 | 251 | ||
244 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_UNMAP_GR_GLOBAL_CTX; | 252 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { |
245 | msg.handle = vgpu_get_handle(c->g); | 253 | if (g_bfr_va[i]) { |
246 | p->handle = c->virt_ctx; | 254 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], |
247 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 255 | gmmu_page_size_kernel); |
248 | WARN_ON(err || msg.ret); | 256 | g_bfr_va[i] = 0; |
249 | } | 257 | g_bfr_size[i] = 0; |
250 | 258 | } | |
251 | for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { | ||
252 | if (g_bfr_va[i]) { | ||
253 | __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], | ||
254 | gmmu_page_size_kernel); | ||
255 | g_bfr_va[i] = 0; | ||
256 | g_bfr_size[i] = 0; | ||
257 | } | 259 | } |
260 | |||
261 | tsg->gr_ctx.global_ctx_buffer_mapped = false; | ||
258 | } | 262 | } |
259 | c->ch_ctx.global_ctx_buffer_mapped = false; | ||
260 | } | 263 | } |
261 | 264 | ||
262 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 265 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
263 | struct gr_ctx_desc **__gr_ctx, | 266 | struct nvgpu_gr_ctx *gr_ctx, |
264 | struct vm_gk20a *vm, | 267 | struct vm_gk20a *vm, |
265 | u32 class, | 268 | u32 class, |
266 | u32 flags) | 269 | u32 flags) |
@@ -268,7 +271,6 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
268 | struct tegra_vgpu_cmd_msg msg = {0}; | 271 | struct tegra_vgpu_cmd_msg msg = {0}; |
269 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | 272 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
270 | struct gr_gk20a *gr = &g->gr; | 273 | struct gr_gk20a *gr = &g->gr; |
271 | struct gr_ctx_desc *gr_ctx; | ||
272 | int err; | 274 | int err; |
273 | 275 | ||
274 | gk20a_dbg_fn(""); | 276 | gk20a_dbg_fn(""); |
@@ -280,19 +282,14 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
280 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 282 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
281 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 283 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
282 | 284 | ||
283 | gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); | ||
284 | if (!gr_ctx) | ||
285 | return -ENOMEM; | ||
286 | |||
287 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; | ||
288 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, | 285 | gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, |
289 | gr_ctx->mem.size, | 286 | gr->ctx_vars.buffer_total_size, |
290 | gmmu_page_size_kernel); | 287 | gmmu_page_size_kernel); |
291 | 288 | ||
292 | if (!gr_ctx->mem.gpu_va) { | 289 | if (!gr_ctx->mem.gpu_va) |
293 | nvgpu_kfree(g, gr_ctx); | ||
294 | return -ENOMEM; | 290 | return -ENOMEM; |
295 | } | 291 | gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; |
292 | gr_ctx->mem.aperture = APERTURE_SYSMEM; | ||
296 | 293 | ||
297 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; | 294 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_ALLOC; |
298 | msg.handle = vgpu_get_handle(g); | 295 | msg.handle = vgpu_get_handle(g); |
@@ -306,57 +303,19 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
306 | nvgpu_err(g, "fail to alloc gr_ctx"); | 303 | nvgpu_err(g, "fail to alloc gr_ctx"); |
307 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | 304 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
308 | gmmu_page_size_kernel); | 305 | gmmu_page_size_kernel); |
309 | nvgpu_kfree(g, gr_ctx); | 306 | gr_ctx->mem.aperture = APERTURE_INVALID; |
310 | } else { | 307 | } else { |
311 | gr_ctx->virt_ctx = p->gr_ctx_handle; | 308 | gr_ctx->virt_ctx = p->gr_ctx_handle; |
312 | *__gr_ctx = gr_ctx; | ||
313 | } | 309 | } |
314 | 310 | ||
315 | return err; | 311 | return err; |
316 | } | 312 | } |
317 | 313 | ||
318 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | ||
319 | struct gr_ctx_desc *gr_ctx) | ||
320 | { | ||
321 | struct tegra_vgpu_cmd_msg msg; | ||
322 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; | ||
323 | int err; | ||
324 | |||
325 | gk20a_dbg_fn(""); | ||
326 | |||
327 | if (!gr_ctx || !gr_ctx->mem.gpu_va) | ||
328 | return; | ||
329 | |||
330 | |||
331 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; | ||
332 | msg.handle = vgpu_get_handle(g); | ||
333 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
334 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
335 | WARN_ON(err || msg.ret); | ||
336 | |||
337 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, | ||
338 | gmmu_page_size_kernel); | ||
339 | |||
340 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
341 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
342 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
343 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
344 | |||
345 | nvgpu_kfree(g, gr_ctx); | ||
346 | } | ||
347 | |||
348 | static void vgpu_gr_free_channel_gr_ctx(struct channel_gk20a *c) | ||
349 | { | ||
350 | gk20a_dbg_fn(""); | ||
351 | |||
352 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | ||
353 | c->ch_ctx.gr_ctx = NULL; | ||
354 | } | ||
355 | |||
356 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | 314 | static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, |
357 | struct channel_gk20a *c) | 315 | struct channel_gk20a *c) |
358 | { | 316 | { |
359 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 317 | struct tsg_gk20a *tsg; |
318 | struct patch_desc *patch_ctx; | ||
360 | struct vm_gk20a *ch_vm = c->vm; | 319 | struct vm_gk20a *ch_vm = c->vm; |
361 | struct tegra_vgpu_cmd_msg msg; | 320 | struct tegra_vgpu_cmd_msg msg; |
362 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | 321 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; |
@@ -364,6 +323,11 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
364 | 323 | ||
365 | gk20a_dbg_fn(""); | 324 | gk20a_dbg_fn(""); |
366 | 325 | ||
326 | tsg = tsg_gk20a_from_ch(c); | ||
327 | if (!tsg) | ||
328 | return -EINVAL; | ||
329 | |||
330 | patch_ctx = &tsg->gr_ctx.patch_ctx; | ||
367 | patch_ctx->mem.size = 128 * sizeof(u32); | 331 | patch_ctx->mem.size = 128 * sizeof(u32); |
368 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, | 332 | patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, |
369 | patch_ctx->mem.size, | 333 | patch_ctx->mem.size, |
@@ -385,37 +349,25 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, | |||
385 | return err; | 349 | return err; |
386 | } | 350 | } |
387 | 351 | ||
388 | static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) | 352 | static void vgpu_gr_free_channel_patch_ctx(struct tsg_gk20a *tsg) |
389 | { | 353 | { |
390 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 354 | struct patch_desc *patch_ctx = &tsg->gr_ctx.patch_ctx; |
391 | struct vm_gk20a *ch_vm = c->vm; | ||
392 | 355 | ||
393 | gk20a_dbg_fn(""); | 356 | gk20a_dbg_fn(""); |
394 | 357 | ||
395 | if (patch_ctx->mem.gpu_va) { | 358 | if (patch_ctx->mem.gpu_va) { |
396 | struct tegra_vgpu_cmd_msg msg; | 359 | /* server will free on channel close */ |
397 | struct tegra_vgpu_ch_ctx_params *p = &msg.params.ch_ctx; | ||
398 | int err; | ||
399 | 360 | ||
400 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_GR_PATCH_CTX; | 361 | __nvgpu_vm_free_va(tsg->vm, patch_ctx->mem.gpu_va, |
401 | msg.handle = vgpu_get_handle(c->g); | ||
402 | p->handle = c->virt_ctx; | ||
403 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
404 | WARN_ON(err || msg.ret); | ||
405 | |||
406 | __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, | ||
407 | gmmu_page_size_kernel); | 362 | gmmu_page_size_kernel); |
408 | patch_ctx->mem.gpu_va = 0; | 363 | patch_ctx->mem.gpu_va = 0; |
409 | } | 364 | } |
410 | } | 365 | } |
411 | 366 | ||
412 | static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | 367 | static void vgpu_gr_free_channel_pm_ctx(struct tsg_gk20a *tsg) |
413 | { | 368 | { |
414 | struct tegra_vgpu_cmd_msg msg; | 369 | struct nvgpu_gr_ctx *ch_ctx = &tsg->gr_ctx; |
415 | struct tegra_vgpu_channel_free_hwpm_ctx *p = &msg.params.free_hwpm_ctx; | ||
416 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | ||
417 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 370 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; |
418 | int err; | ||
419 | 371 | ||
420 | gk20a_dbg_fn(""); | 372 | gk20a_dbg_fn(""); |
421 | 373 | ||
@@ -423,44 +375,63 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) | |||
423 | if (pm_ctx->mem.gpu_va == 0) | 375 | if (pm_ctx->mem.gpu_va == 0) |
424 | return; | 376 | return; |
425 | 377 | ||
426 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_FREE_HWPM_CTX; | 378 | /* server will free on channel close */ |
427 | msg.handle = vgpu_get_handle(c->g); | ||
428 | p->handle = c->virt_ctx; | ||
429 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
430 | WARN_ON(err || msg.ret); | ||
431 | 379 | ||
432 | __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, | 380 | __nvgpu_vm_free_va(tsg->vm, pm_ctx->mem.gpu_va, |
433 | gmmu_page_size_kernel); | 381 | gmmu_page_size_kernel); |
434 | pm_ctx->mem.gpu_va = 0; | 382 | pm_ctx->mem.gpu_va = 0; |
435 | } | 383 | } |
436 | 384 | ||
437 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) | 385 | void vgpu_gr_free_gr_ctx(struct gk20a *g, |
386 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
438 | { | 387 | { |
388 | struct tsg_gk20a *tsg; | ||
389 | |||
439 | gk20a_dbg_fn(""); | 390 | gk20a_dbg_fn(""); |
440 | 391 | ||
441 | if (c->g->ops.fifo.free_channel_ctx_header) | 392 | if (gr_ctx->mem.gpu_va) { |
442 | c->g->ops.fifo.free_channel_ctx_header(c); | 393 | struct tegra_vgpu_cmd_msg msg; |
443 | vgpu_gr_unmap_global_ctx_buffers(c); | 394 | struct tegra_vgpu_gr_ctx_params *p = &msg.params.gr_ctx; |
444 | vgpu_gr_free_channel_patch_ctx(c); | 395 | int err; |
445 | vgpu_gr_free_channel_pm_ctx(c); | ||
446 | if (!is_tsg) | ||
447 | vgpu_gr_free_channel_gr_ctx(c); | ||
448 | 396 | ||
449 | /* zcull_ctx, pm_ctx */ | 397 | msg.cmd = TEGRA_VGPU_CMD_GR_CTX_FREE; |
398 | msg.handle = vgpu_get_handle(g); | ||
399 | p->gr_ctx_handle = gr_ctx->virt_ctx; | ||
400 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | ||
401 | WARN_ON(err || msg.ret); | ||
450 | 402 | ||
451 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | 403 | __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, |
404 | gmmu_page_size_kernel); | ||
405 | |||
406 | tsg = &g->fifo.tsg[gr_ctx->tsgid]; | ||
407 | vgpu_gr_unmap_global_ctx_buffers(tsg); | ||
408 | vgpu_gr_free_channel_patch_ctx(tsg); | ||
409 | vgpu_gr_free_channel_pm_ctx(tsg); | ||
410 | |||
411 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | ||
412 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | ||
413 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | ||
414 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | ||
452 | 415 | ||
453 | c->first_init = false; | 416 | memset(gr_ctx, 0, sizeof(*gr_ctx)); |
417 | } | ||
454 | } | 418 | } |
455 | 419 | ||
456 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | 420 | static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) |
457 | { | 421 | { |
458 | struct gr_ctx_desc *gr_ctx = c->ch_ctx.gr_ctx; | 422 | struct tsg_gk20a *tsg; |
423 | struct nvgpu_gr_ctx *gr_ctx; | ||
459 | struct tegra_vgpu_cmd_msg msg = {0}; | 424 | struct tegra_vgpu_cmd_msg msg = {0}; |
460 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = | 425 | struct tegra_vgpu_channel_bind_gr_ctx_params *p = |
461 | &msg.params.ch_bind_gr_ctx; | 426 | &msg.params.ch_bind_gr_ctx; |
462 | int err; | 427 | int err; |
463 | 428 | ||
429 | tsg = tsg_gk20a_from_ch(c); | ||
430 | if (!tsg) | ||
431 | return -EINVAL; | ||
432 | |||
433 | gr_ctx = &tsg->gr_ctx; | ||
434 | |||
464 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; | 435 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_BIND_GR_CTX; |
465 | msg.handle = vgpu_get_handle(c->g); | 436 | msg.handle = vgpu_get_handle(c->g); |
466 | p->ch_handle = c->virt_ctx; | 437 | p->ch_handle = c->virt_ctx; |
@@ -474,7 +445,7 @@ static int vgpu_gr_ch_bind_gr_ctx(struct channel_gk20a *c) | |||
474 | 445 | ||
475 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) | 446 | static int vgpu_gr_tsg_bind_gr_ctx(struct tsg_gk20a *tsg) |
476 | { | 447 | { |
477 | struct gr_ctx_desc *gr_ctx = tsg->tsg_gr_ctx; | 448 | struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; |
478 | struct tegra_vgpu_cmd_msg msg = {0}; | 449 | struct tegra_vgpu_cmd_msg msg = {0}; |
479 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = | 450 | struct tegra_vgpu_tsg_bind_gr_ctx_params *p = |
480 | &msg.params.tsg_bind_gr_ctx; | 451 | &msg.params.tsg_bind_gr_ctx; |
@@ -495,7 +466,7 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
495 | { | 466 | { |
496 | struct gk20a *g = c->g; | 467 | struct gk20a *g = c->g; |
497 | struct fifo_gk20a *f = &g->fifo; | 468 | struct fifo_gk20a *f = &g->fifo; |
498 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 469 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
499 | struct tsg_gk20a *tsg = NULL; | 470 | struct tsg_gk20a *tsg = NULL; |
500 | int err = 0; | 471 | int err = 0; |
501 | 472 | ||
@@ -515,95 +486,87 @@ int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
515 | } | 486 | } |
516 | c->obj_class = class_num; | 487 | c->obj_class = class_num; |
517 | 488 | ||
518 | if (gk20a_is_channel_marked_as_tsg(c)) | 489 | if (!gk20a_is_channel_marked_as_tsg(c)) |
519 | tsg = &f->tsg[c->tsgid]; | 490 | return -EINVAL; |
520 | 491 | ||
521 | if (!tsg) { | 492 | tsg = &f->tsg[c->tsgid]; |
522 | /* allocate gr ctx buffer */ | 493 | gr_ctx = &tsg->gr_ctx; |
523 | if (!ch_ctx->gr_ctx) { | 494 | |
524 | err = g->ops.gr.alloc_gr_ctx(g, &c->ch_ctx.gr_ctx, | 495 | if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { |
525 | c->vm, | 496 | tsg->vm = c->vm; |
526 | class_num, | 497 | nvgpu_vm_get(tsg->vm); |
527 | flags); | 498 | err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, |
528 | if (!err) | 499 | c->vm, |
529 | err = vgpu_gr_ch_bind_gr_ctx(c); | 500 | class_num, |
530 | if (err) { | 501 | flags); |
531 | nvgpu_err(g, "fail to allocate gr ctx buffer"); | 502 | if (!err) |
532 | goto out; | 503 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); |
533 | } | 504 | if (err) { |
534 | } else { | ||
535 | /*TBD: needs to be more subtle about which is | ||
536 | * being allocated as some are allowed to be | ||
537 | * allocated along same channel */ | ||
538 | nvgpu_err(g, | 505 | nvgpu_err(g, |
539 | "too many classes alloc'd on same channel"); | 506 | "fail to allocate TSG gr ctx buffer, err=%d", err); |
540 | err = -EINVAL; | 507 | nvgpu_vm_put(tsg->vm); |
508 | tsg->vm = NULL; | ||
541 | goto out; | 509 | goto out; |
542 | } | 510 | } |
543 | } else { | ||
544 | if (!tsg->tsg_gr_ctx) { | ||
545 | tsg->vm = c->vm; | ||
546 | nvgpu_vm_get(tsg->vm); | ||
547 | err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, | ||
548 | c->vm, | ||
549 | class_num, | ||
550 | flags); | ||
551 | if (!err) | ||
552 | err = vgpu_gr_tsg_bind_gr_ctx(tsg); | ||
553 | if (err) { | ||
554 | nvgpu_err(g, | ||
555 | "fail to allocate TSG gr ctx buffer, err=%d", err); | ||
556 | nvgpu_vm_put(tsg->vm); | ||
557 | tsg->vm = NULL; | ||
558 | goto out; | ||
559 | } | ||
560 | } | ||
561 | 511 | ||
562 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | ||
563 | err = vgpu_gr_ch_bind_gr_ctx(c); | 512 | err = vgpu_gr_ch_bind_gr_ctx(c); |
564 | if (err) { | 513 | if (err) { |
565 | nvgpu_err(g, "fail to bind gr ctx buffer"); | 514 | nvgpu_err(g, "fail to bind gr ctx buffer"); |
566 | goto out; | 515 | goto out; |
567 | } | 516 | } |
568 | } | ||
569 | 517 | ||
570 | /* commit gr ctx buffer */ | 518 | /* commit gr ctx buffer */ |
571 | err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 519 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); |
572 | if (err) { | 520 | if (err) { |
573 | nvgpu_err(g, "fail to commit gr ctx buffer"); | 521 | nvgpu_err(g, "fail to commit gr ctx buffer"); |
574 | goto out; | 522 | goto out; |
575 | } | 523 | } |
576 | 524 | ||
577 | /* allocate patch buffer */ | 525 | /* allocate patch buffer */ |
578 | if (ch_ctx->patch_ctx.mem.priv.pages == NULL) { | ||
579 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); | 526 | err = vgpu_gr_alloc_channel_patch_ctx(g, c); |
580 | if (err) { | 527 | if (err) { |
581 | nvgpu_err(g, "fail to allocate patch buffer"); | 528 | nvgpu_err(g, "fail to allocate patch buffer"); |
582 | goto out; | 529 | goto out; |
583 | } | 530 | } |
584 | } | ||
585 | 531 | ||
586 | /* map global buffer to channel gpu_va and commit */ | 532 | /* map global buffer to channel gpu_va and commit */ |
587 | if (!ch_ctx->global_ctx_buffer_mapped) { | ||
588 | err = vgpu_gr_map_global_ctx_buffers(g, c); | 533 | err = vgpu_gr_map_global_ctx_buffers(g, c); |
589 | if (err) { | 534 | if (err) { |
590 | nvgpu_err(g, "fail to map global ctx buffer"); | 535 | nvgpu_err(g, "fail to map global ctx buffer"); |
591 | goto out; | 536 | goto out; |
592 | } | 537 | } |
593 | vgpu_gr_commit_global_ctx_buffers(g, c, true); | ||
594 | } | ||
595 | 538 | ||
596 | /* load golden image */ | 539 | err = vgpu_gr_commit_global_ctx_buffers(g, c, true); |
597 | if (!c->first_init) { | 540 | if (err) { |
541 | nvgpu_err(g, "fail to commit global ctx buffers"); | ||
542 | goto out; | ||
543 | } | ||
544 | |||
545 | /* load golden image */ | ||
598 | err = gr_gk20a_elpg_protected_call(g, | 546 | err = gr_gk20a_elpg_protected_call(g, |
599 | vgpu_gr_load_golden_ctx_image(g, c)); | 547 | vgpu_gr_load_golden_ctx_image(g, c)); |
600 | if (err) { | 548 | if (err) { |
601 | nvgpu_err(g, "fail to load golden ctx image"); | 549 | nvgpu_err(g, "fail to load golden ctx image"); |
602 | goto out; | 550 | goto out; |
603 | } | 551 | } |
604 | c->first_init = true; | 552 | } else { |
553 | err = vgpu_gr_ch_bind_gr_ctx(c); | ||
554 | if (err) { | ||
555 | nvgpu_err(g, "fail to bind gr ctx buffer"); | ||
556 | goto out; | ||
557 | } | ||
558 | |||
559 | /* commit gr ctx buffer */ | ||
560 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); | ||
561 | if (err) { | ||
562 | nvgpu_err(g, "fail to commit gr ctx buffer"); | ||
563 | goto out; | ||
564 | } | ||
605 | } | 565 | } |
606 | 566 | ||
567 | /* PM ctxt switch is off by default */ | ||
568 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
569 | |||
607 | gk20a_dbg_fn("done"); | 570 | gk20a_dbg_fn("done"); |
608 | return 0; | 571 | return 0; |
609 | out: | 572 | out: |
@@ -1055,15 +1018,30 @@ int vgpu_gr_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1055 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | 1018 | int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, |
1056 | struct channel_gk20a *ch, bool enable) | 1019 | struct channel_gk20a *ch, bool enable) |
1057 | { | 1020 | { |
1058 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 1021 | struct tsg_gk20a *tsg; |
1059 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1022 | struct nvgpu_gr_ctx *ch_ctx; |
1023 | struct pm_ctx_desc *pm_ctx; | ||
1060 | struct tegra_vgpu_cmd_msg msg; | 1024 | struct tegra_vgpu_cmd_msg msg; |
1061 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; | 1025 | struct tegra_vgpu_channel_set_ctxsw_mode *p = &msg.params.set_ctxsw_mode; |
1062 | int err; | 1026 | int err; |
1063 | 1027 | ||
1064 | gk20a_dbg_fn(""); | 1028 | gk20a_dbg_fn(""); |
1065 | 1029 | ||
1030 | tsg = tsg_gk20a_from_ch(ch); | ||
1031 | if (!tsg) | ||
1032 | return -EINVAL; | ||
1033 | |||
1034 | ch_ctx = &tsg->gr_ctx; | ||
1035 | pm_ctx = &ch_ctx->pm_ctx; | ||
1036 | |||
1066 | if (enable) { | 1037 | if (enable) { |
1038 | /* | ||
1039 | * send command to enable HWPM only once - otherwise server | ||
1040 | * will return an error due to using the same GPU VA twice. | ||
1041 | */ | ||
1042 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | ||
1043 | return 0; | ||
1044 | |||
1067 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; | 1045 | p->mode = TEGRA_VGPU_CTXSW_MODE_CTXSW; |
1068 | 1046 | ||
1069 | /* Allocate buffer if necessary */ | 1047 | /* Allocate buffer if necessary */ |
@@ -1076,8 +1054,12 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1076 | return -ENOMEM; | 1054 | return -ENOMEM; |
1077 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; | 1055 | pm_ctx->mem.size = g->gr.ctx_vars.pm_ctxsw_image_size; |
1078 | } | 1056 | } |
1079 | } else | 1057 | } else { |
1058 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_no_ctxsw_f()) | ||
1059 | return 0; | ||
1060 | |||
1080 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; | 1061 | p->mode = TEGRA_VGPU_CTXSW_MODE_NO_CTXSW; |
1062 | } | ||
1081 | 1063 | ||
1082 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; | 1064 | msg.cmd = TEGRA_VGPU_CMD_CHANNEL_SET_HWPM_CTXSW_MODE; |
1083 | msg.handle = vgpu_get_handle(g); | 1065 | msg.handle = vgpu_get_handle(g); |
@@ -1086,8 +1068,13 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1086 | 1068 | ||
1087 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); | 1069 | err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); |
1088 | WARN_ON(err || msg.ret); | 1070 | WARN_ON(err || msg.ret); |
1071 | err = err ? err : msg.ret; | ||
1072 | if (!err) | ||
1073 | pm_ctx->pm_mode = enable ? | ||
1074 | ctxsw_prog_main_image_pm_mode_ctxsw_f() : | ||
1075 | ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
1089 | 1076 | ||
1090 | return err ? err : msg.ret; | 1077 | return err; |
1091 | } | 1078 | } |
1092 | 1079 | ||
1093 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, | 1080 | int vgpu_gr_clear_sm_error_state(struct gk20a *g, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h index 16aa92a9..4b81da91 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gr_vgpu.h | |||
@@ -29,6 +29,7 @@ struct dbg_session_gk20a; | |||
29 | 29 | ||
30 | void vgpu_gr_detect_sm_arch(struct gk20a *g); | 30 | void vgpu_gr_detect_sm_arch(struct gk20a *g); |
31 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); | 31 | void vgpu_gr_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); |
32 | void vgpu_gr_free_tsg_ctx(struct tsg_gk20a *tsg); | ||
32 | int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); | 33 | int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); |
33 | int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | 34 | int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, |
34 | struct channel_gk20a *c, u64 zcull_va, | 35 | struct channel_gk20a *c, u64 zcull_va, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c index 968eae10..132ce6e5 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_hal_gv11b.c | |||
@@ -131,7 +131,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { | |||
131 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, | 131 | .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, |
132 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, | 132 | .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, |
133 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, | 133 | .get_gpc_tpc_mask = vgpu_gr_get_gpc_tpc_mask, |
134 | .free_channel_ctx = vgpu_gr_free_channel_ctx, | ||
135 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, | 134 | .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, |
136 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, | 135 | .bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull, |
137 | .get_zcull_info = vgpu_gr_get_zcull_info, | 136 | .get_zcull_info = vgpu_gr_get_zcull_info, |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c index d59f0381..a0099f03 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c +++ b/drivers/gpu/nvgpu/common/linux/vgpu/gv11b/vgpu_subctx_gv11b.c | |||
@@ -21,7 +21,7 @@ | |||
21 | 21 | ||
22 | int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) | 22 | int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) |
23 | { | 23 | { |
24 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 24 | struct ctx_header_desc *ctx = &c->ctx_header; |
25 | struct tegra_vgpu_cmd_msg msg = {}; | 25 | struct tegra_vgpu_cmd_msg msg = {}; |
26 | struct tegra_vgpu_alloc_ctx_header_params *p = | 26 | struct tegra_vgpu_alloc_ctx_header_params *p = |
27 | &msg.params.alloc_ctx_header; | 27 | &msg.params.alloc_ctx_header; |
@@ -52,7 +52,7 @@ int vgpu_gv11b_alloc_subctx_header(struct channel_gk20a *c) | |||
52 | 52 | ||
53 | void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) | 53 | void vgpu_gv11b_free_subctx_header(struct channel_gk20a *c) |
54 | { | 54 | { |
55 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 55 | struct ctx_header_desc *ctx = &c->ctx_header; |
56 | struct tegra_vgpu_cmd_msg msg = {}; | 56 | struct tegra_vgpu_cmd_msg msg = {}; |
57 | struct tegra_vgpu_free_ctx_header_params *p = | 57 | struct tegra_vgpu_free_ctx_header_params *p = |
58 | &msg.params.free_ctx_header; | 58 | &msg.params.free_ctx_header; |
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h index 8c306ea0..20624240 100644 --- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu.h | |||
@@ -79,12 +79,12 @@ int vgpu_gr_isr(struct gk20a *g, struct tegra_vgpu_gr_intr_info *info); | |||
79 | int vgpu_gr_nonstall_isr(struct gk20a *g, | 79 | int vgpu_gr_nonstall_isr(struct gk20a *g, |
80 | struct tegra_vgpu_gr_nonstall_intr_info *info); | 80 | struct tegra_vgpu_gr_nonstall_intr_info *info); |
81 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 81 | int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
82 | struct gr_ctx_desc **__gr_ctx, | 82 | struct nvgpu_gr_ctx *gr_ctx, |
83 | struct vm_gk20a *vm, | 83 | struct vm_gk20a *vm, |
84 | u32 class, | 84 | u32 class, |
85 | u32 flags); | 85 | u32 flags); |
86 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 86 | void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
87 | struct gr_ctx_desc *gr_ctx); | 87 | struct nvgpu_gr_ctx *gr_ctx); |
88 | void vgpu_gr_handle_sm_esr_event(struct gk20a *g, | 88 | void vgpu_gr_handle_sm_esr_event(struct gk20a *g, |
89 | struct tegra_vgpu_sm_esr_info *info); | 89 | struct tegra_vgpu_sm_esr_info *info); |
90 | int vgpu_gr_init_ctx_state(struct gk20a *g); | 90 | int vgpu_gr_init_ctx_state(struct gk20a *g); |
@@ -141,7 +141,7 @@ static inline int vgpu_gr_isr(struct gk20a *g, | |||
141 | return 0; | 141 | return 0; |
142 | } | 142 | } |
143 | static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | 143 | static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, |
144 | struct gr_ctx_desc **__gr_ctx, | 144 | struct nvgpu_gr_ctx *gr_ctx, |
145 | struct vm_gk20a *vm, | 145 | struct vm_gk20a *vm, |
146 | u32 class, | 146 | u32 class, |
147 | u32 flags) | 147 | u32 flags) |
@@ -149,7 +149,7 @@ static inline int vgpu_gr_alloc_gr_ctx(struct gk20a *g, | |||
149 | return -ENOSYS; | 149 | return -ENOSYS; |
150 | } | 150 | } |
151 | static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, | 151 | static inline void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, |
152 | struct gr_ctx_desc *gr_ctx) | 152 | struct nvgpu_gr_ctx *gr_ctx) |
153 | { | 153 | { |
154 | } | 154 | } |
155 | static inline int vgpu_gr_init_ctx_state(struct gk20a *g) | 155 | static inline int vgpu_gr_init_ctx_state(struct gk20a *g) |