diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2017-12-15 12:04:15 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-01-17 15:29:09 -0500 |
commit | 2f6698b863c9cc1db6455637b7c72e812b470b93 (patch) | |
tree | d0c8abf32d6994b9f54bf5eddafd8316e038c829 /drivers/gpu/nvgpu/gk20a | |
parent | 6a73114788ffafe4c53771c707ecbd9c9ea0a117 (diff) |
gpu: nvgpu: Make graphics context property of TSG
Move graphics context ownership to TSG instead of channel. Combine
channel_ctx_gk20a and gr_ctx_desc to one structure, because the split
between them was arbitrary. Move context header to be property of
channel.
Bug 1842197
Change-Id: I410e3262f80b318d8528bcbec270b63a2d8d2ff9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1639532
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 23 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 641 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 40 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 6 |
8 files changed, 376 insertions, 377 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 16d4711f..64266fe5 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -259,7 +259,7 @@ void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) | |||
259 | 259 | ||
260 | ch->g->ops.fifo.disable_channel(ch); | 260 | ch->g->ops.fifo.disable_channel(ch); |
261 | 261 | ||
262 | if (channel_preempt && ch->ch_ctx.gr_ctx) | 262 | if (channel_preempt && gk20a_is_channel_marked_as_tsg(ch)) |
263 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); | 263 | ch->g->ops.fifo.preempt_channel(ch->g, ch->chid); |
264 | 264 | ||
265 | gk20a_channel_abort_clean_up(ch); | 265 | gk20a_channel_abort_clean_up(ch); |
@@ -421,8 +421,8 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
421 | g->ops.fecs_trace.unbind_channel(g, ch); | 421 | g->ops.fecs_trace.unbind_channel(g, ch); |
422 | #endif | 422 | #endif |
423 | 423 | ||
424 | /* release channel ctx */ | 424 | if(g->ops.fifo.free_channel_ctx_header) |
425 | g->ops.gr.free_channel_ctx(ch, was_tsg); | 425 | g->ops.fifo.free_channel_ctx_header(ch); |
426 | 426 | ||
427 | gk20a_gr_flush_channel_tlb(gr); | 427 | gk20a_gr_flush_channel_tlb(gr); |
428 | 428 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index c13b1c58..29fa302f 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <nvgpu/atomic.h> | 31 | #include <nvgpu/atomic.h> |
32 | 32 | ||
33 | struct gk20a; | 33 | struct gk20a; |
34 | struct gr_gk20a; | ||
35 | struct dbg_session_gk20a; | 34 | struct dbg_session_gk20a; |
36 | struct gk20a_fence; | 35 | struct gk20a_fence; |
37 | struct fifo_profile_gk20a; | 36 | struct fifo_profile_gk20a; |
@@ -50,10 +49,6 @@ struct fifo_profile_gk20a; | |||
50 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) | 49 | #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) |
51 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) | 50 | #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) |
52 | 51 | ||
53 | /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ | ||
54 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) | ||
55 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) | ||
56 | |||
57 | struct notification { | 52 | struct notification { |
58 | struct { | 53 | struct { |
59 | u32 nanoseconds[2]; | 54 | u32 nanoseconds[2]; |
@@ -63,19 +58,6 @@ struct notification { | |||
63 | u16 status; | 58 | u16 status; |
64 | }; | 59 | }; |
65 | 60 | ||
66 | /* contexts associated with a channel */ | ||
67 | struct channel_ctx_gk20a { | ||
68 | struct gr_ctx_desc *gr_ctx; | ||
69 | struct patch_desc patch_ctx; | ||
70 | struct zcull_ctx_desc zcull_ctx; | ||
71 | struct pm_ctx_desc pm_ctx; | ||
72 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
73 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
74 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
75 | bool global_ctx_buffer_mapped; | ||
76 | struct ctx_header_desc ctx_header; | ||
77 | }; | ||
78 | |||
79 | struct channel_gk20a_job { | 61 | struct channel_gk20a_job { |
80 | struct nvgpu_mapped_buf **mapped_buffers; | 62 | struct nvgpu_mapped_buf **mapped_buffers; |
81 | int num_mapped_buffers; | 63 | int num_mapped_buffers; |
@@ -190,7 +172,6 @@ struct channel_gk20a { | |||
190 | int chid; | 172 | int chid; |
191 | bool wdt_enabled; | 173 | bool wdt_enabled; |
192 | nvgpu_atomic_t bound; | 174 | nvgpu_atomic_t bound; |
193 | bool first_init; | ||
194 | bool vpr; | 175 | bool vpr; |
195 | bool deterministic; | 176 | bool deterministic; |
196 | /* deterministic, but explicitly idle and submits disallowed */ | 177 | /* deterministic, but explicitly idle and submits disallowed */ |
@@ -210,8 +191,6 @@ struct channel_gk20a { | |||
210 | 191 | ||
211 | struct gpfifo_desc gpfifo; | 192 | struct gpfifo_desc gpfifo; |
212 | 193 | ||
213 | struct channel_ctx_gk20a ch_ctx; | ||
214 | |||
215 | struct nvgpu_mem inst_block; | 194 | struct nvgpu_mem inst_block; |
216 | 195 | ||
217 | u64 userd_iova; | 196 | u64 userd_iova; |
@@ -262,6 +241,8 @@ struct channel_gk20a { | |||
262 | struct channel_t19x t19x; | 241 | struct channel_t19x t19x; |
263 | #endif | 242 | #endif |
264 | 243 | ||
244 | struct ctx_header_desc ctx_header; | ||
245 | |||
265 | /* Any operating system specific data. */ | 246 | /* Any operating system specific data. */ |
266 | void *os_priv; | 247 | void *os_priv; |
267 | }; | 248 | }; |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index d283a82e..409661fc 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -625,9 +625,10 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
625 | u32 lo; | 625 | u32 lo; |
626 | u32 hi; | 626 | u32 hi; |
627 | u64 pa; | 627 | u64 pa; |
628 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 628 | struct tsg_gk20a *tsg; |
629 | struct nvgpu_gr_ctx *ch_ctx; | ||
629 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 630 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
630 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 631 | struct nvgpu_mem *mem; |
631 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | 632 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); |
632 | pid_t pid; | 633 | pid_t pid; |
633 | u32 aperture; | 634 | u32 aperture; |
@@ -637,6 +638,13 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g, | |||
637 | ch->chid, context_ptr, | 638 | ch->chid, context_ptr, |
638 | nvgpu_inst_block_addr(g, &ch->inst_block)); | 639 | nvgpu_inst_block_addr(g, &ch->inst_block)); |
639 | 640 | ||
641 | tsg = tsg_gk20a_from_ch(ch); | ||
642 | if (!tsg) | ||
643 | return -EINVAL; | ||
644 | |||
645 | ch_ctx = &tsg->gr_ctx; | ||
646 | mem = &ch_ctx->mem; | ||
647 | |||
640 | if (!trace) | 648 | if (!trace) |
641 | return -ENOMEM; | 649 | return -ENOMEM; |
642 | 650 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 070b26b6..685976b1 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -187,16 +187,16 @@ struct gpu_ops { | |||
187 | void (*cb_size_default)(struct gk20a *g); | 187 | void (*cb_size_default)(struct gk20a *g); |
188 | int (*calc_global_ctx_buffer_size)(struct gk20a *g); | 188 | int (*calc_global_ctx_buffer_size)(struct gk20a *g); |
189 | void (*commit_global_attrib_cb)(struct gk20a *g, | 189 | void (*commit_global_attrib_cb)(struct gk20a *g, |
190 | struct channel_ctx_gk20a *ch_ctx, | 190 | struct nvgpu_gr_ctx *ch_ctx, |
191 | u64 addr, bool patch); | 191 | u64 addr, bool patch); |
192 | void (*commit_global_bundle_cb)(struct gk20a *g, | 192 | void (*commit_global_bundle_cb)(struct gk20a *g, |
193 | struct channel_ctx_gk20a *ch_ctx, | 193 | struct nvgpu_gr_ctx *ch_ctx, |
194 | u64 addr, u64 size, bool patch); | 194 | u64 addr, u64 size, bool patch); |
195 | int (*commit_global_cb_manager)(struct gk20a *g, | 195 | int (*commit_global_cb_manager)(struct gk20a *g, |
196 | struct channel_gk20a *ch, | 196 | struct channel_gk20a *ch, |
197 | bool patch); | 197 | bool patch); |
198 | void (*commit_global_pagepool)(struct gk20a *g, | 198 | void (*commit_global_pagepool)(struct gk20a *g, |
199 | struct channel_ctx_gk20a *ch_ctx, | 199 | struct nvgpu_gr_ctx *ch_ctx, |
200 | u64 addr, u32 size, bool patch); | 200 | u64 addr, u32 size, bool patch); |
201 | void (*init_gpc_mmu)(struct gk20a *g); | 201 | void (*init_gpc_mmu)(struct gk20a *g); |
202 | int (*handle_sw_method)(struct gk20a *g, u32 addr, | 202 | int (*handle_sw_method)(struct gk20a *g, u32 addr, |
@@ -230,7 +230,6 @@ struct gpu_ops { | |||
230 | int (*load_ctxsw_ucode)(struct gk20a *g); | 230 | int (*load_ctxsw_ucode)(struct gk20a *g); |
231 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); | 231 | u32 (*get_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); |
232 | void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); | 232 | void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); |
233 | void (*free_channel_ctx)(struct channel_gk20a *c, bool is_tsg); | ||
234 | int (*alloc_obj_ctx)(struct channel_gk20a *c, | 233 | int (*alloc_obj_ctx)(struct channel_gk20a *c, |
235 | u32 class_num, u32 flags); | 234 | u32 class_num, u32 flags); |
236 | int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, | 235 | int (*bind_ctxsw_zcull)(struct gk20a *g, struct gr_gk20a *gr, |
@@ -285,13 +284,12 @@ struct gpu_ops { | |||
285 | u32 (*pagepool_default_size)(struct gk20a *g); | 284 | u32 (*pagepool_default_size)(struct gk20a *g); |
286 | int (*init_ctx_state)(struct gk20a *g); | 285 | int (*init_ctx_state)(struct gk20a *g); |
287 | int (*alloc_gr_ctx)(struct gk20a *g, | 286 | int (*alloc_gr_ctx)(struct gk20a *g, |
288 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 287 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
289 | u32 class, u32 padding); | 288 | u32 class, u32 padding); |
290 | void (*free_gr_ctx)(struct gk20a *g, | 289 | void (*free_gr_ctx)(struct gk20a *g, |
291 | struct vm_gk20a *vm, | 290 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); |
292 | struct gr_ctx_desc *gr_ctx); | ||
293 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, | 291 | void (*update_ctxsw_preemption_mode)(struct gk20a *g, |
294 | struct channel_ctx_gk20a *ch_ctx, | 292 | struct channel_gk20a *c, |
295 | struct nvgpu_mem *mem); | 293 | struct nvgpu_mem *mem); |
296 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, | 294 | int (*update_smpc_ctxsw_mode)(struct gk20a *g, |
297 | struct channel_gk20a *c, | 295 | struct channel_gk20a *c, |
@@ -384,14 +382,14 @@ struct gpu_ops { | |||
384 | int (*get_preemption_mode_flags)(struct gk20a *g, | 382 | int (*get_preemption_mode_flags)(struct gk20a *g, |
385 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 383 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
386 | int (*set_ctxsw_preemption_mode)(struct gk20a *g, | 384 | int (*set_ctxsw_preemption_mode)(struct gk20a *g, |
387 | struct gr_ctx_desc *gr_ctx, | 385 | struct nvgpu_gr_ctx *gr_ctx, |
388 | struct vm_gk20a *vm, u32 class, | 386 | struct vm_gk20a *vm, u32 class, |
389 | u32 graphics_preempt_mode, | 387 | u32 graphics_preempt_mode, |
390 | u32 compute_preempt_mode); | 388 | u32 compute_preempt_mode); |
391 | int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); | 389 | int (*set_boosted_ctx)(struct channel_gk20a *ch, bool boost); |
392 | void (*update_boosted_ctx)(struct gk20a *g, | 390 | void (*update_boosted_ctx)(struct gk20a *g, |
393 | struct nvgpu_mem *mem, | 391 | struct nvgpu_mem *mem, |
394 | struct gr_ctx_desc *gr_ctx); | 392 | struct nvgpu_gr_ctx *gr_ctx); |
395 | int (*init_sm_id_table)(struct gk20a *g); | 393 | int (*init_sm_id_table)(struct gk20a *g); |
396 | int (*load_smid_config)(struct gk20a *g); | 394 | int (*load_smid_config)(struct gk20a *g); |
397 | void (*program_sm_id_numbering)(struct gk20a *g, | 395 | void (*program_sm_id_numbering)(struct gk20a *g, |
@@ -440,7 +438,7 @@ struct gpu_ops { | |||
440 | u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); | 438 | u32 (*get_gpcs_swdx_dss_zbc_c_format_reg)(struct gk20a *g); |
441 | u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); | 439 | u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(struct gk20a *g); |
442 | void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, | 440 | void (*dump_ctxsw_stats)(struct gk20a *g, struct vm_gk20a *vm, |
443 | struct gr_ctx_desc *gr_ctx); | 441 | struct nvgpu_gr_ctx *gr_ctx); |
444 | } gr; | 442 | } gr; |
445 | struct { | 443 | struct { |
446 | void (*init_hw)(struct gk20a *g); | 444 | void (*init_hw)(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 263ae030..f8af091b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -85,18 +85,19 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); | |||
85 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); | 85 | static void gr_gk20a_free_global_ctx_buffers(struct gk20a *g); |
86 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | 86 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, |
87 | struct channel_gk20a *c); | 87 | struct channel_gk20a *c); |
88 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c); | 88 | static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, |
89 | 89 | struct vm_gk20a *vm, | |
90 | /* channel gr ctx buffer */ | 90 | struct nvgpu_gr_ctx *gr_ctx); |
91 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | 91 | static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, |
92 | struct channel_gk20a *c, | 92 | struct vm_gk20a *vm, |
93 | u32 class, u32 padding); | 93 | struct nvgpu_gr_ctx *gr_ctx); |
94 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c); | ||
95 | 94 | ||
96 | /* channel patch ctx buffer */ | 95 | /* channel patch ctx buffer */ |
97 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | 96 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, |
98 | struct channel_gk20a *c); | 97 | struct channel_gk20a *c); |
99 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c); | 98 | static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, |
99 | struct vm_gk20a *vm, | ||
100 | struct nvgpu_gr_ctx *gr_ctx); | ||
100 | 101 | ||
101 | /* golden ctx image */ | 102 | /* golden ctx image */ |
102 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | 103 | static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, |
@@ -108,8 +109,16 @@ int gr_gk20a_get_ctx_id(struct gk20a *g, | |||
108 | struct channel_gk20a *c, | 109 | struct channel_gk20a *c, |
109 | u32 *ctx_id) | 110 | u32 *ctx_id) |
110 | { | 111 | { |
111 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 112 | struct tsg_gk20a *tsg; |
112 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 113 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
114 | struct nvgpu_mem *mem = NULL; | ||
115 | |||
116 | tsg = tsg_gk20a_from_ch(c); | ||
117 | if (!tsg) | ||
118 | return -EINVAL; | ||
119 | |||
120 | gr_ctx = &tsg->gr_ctx; | ||
121 | mem = &gr_ctx->mem; | ||
113 | 122 | ||
114 | /* Channel gr_ctx buffer is gpu cacheable. | 123 | /* Channel gr_ctx buffer is gpu cacheable. |
115 | Flush and invalidate before cpu update. */ | 124 | Flush and invalidate before cpu update. */ |
@@ -671,62 +680,62 @@ int gr_gk20a_commit_inst(struct channel_gk20a *c, u64 gpu_va) | |||
671 | */ | 680 | */ |
672 | 681 | ||
673 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 682 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
674 | struct channel_ctx_gk20a *ch_ctx, | 683 | struct nvgpu_gr_ctx *gr_ctx, |
675 | bool update_patch_count) | 684 | bool update_patch_count) |
676 | { | 685 | { |
677 | int err = 0; | 686 | int err = 0; |
678 | 687 | ||
679 | err = nvgpu_mem_begin(g, &ch_ctx->patch_ctx.mem); | 688 | err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem); |
680 | if (err) | 689 | if (err) |
681 | return err; | 690 | return err; |
682 | 691 | ||
683 | if (update_patch_count) { | 692 | if (update_patch_count) { |
684 | /* reset patch count if ucode has already processed it */ | 693 | /* reset patch count if ucode has already processed it */ |
685 | ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, | 694 | gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, |
686 | &ch_ctx->gr_ctx->mem, | 695 | &gr_ctx->mem, |
687 | ctxsw_prog_main_image_patch_count_o()); | 696 | ctxsw_prog_main_image_patch_count_o()); |
688 | nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", | 697 | nvgpu_log(g, gpu_dbg_info, "patch count reset to %d", |
689 | ch_ctx->patch_ctx.data_count); | 698 | gr_ctx->patch_ctx.data_count); |
690 | } | 699 | } |
691 | return 0; | 700 | return 0; |
692 | } | 701 | } |
693 | 702 | ||
694 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 703 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
695 | struct channel_ctx_gk20a *ch_ctx, | 704 | struct nvgpu_gr_ctx *gr_ctx, |
696 | bool update_patch_count) | 705 | bool update_patch_count) |
697 | { | 706 | { |
698 | nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); | 707 | nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem); |
699 | 708 | ||
700 | /* Write context count to context image if it is mapped */ | 709 | /* Write context count to context image if it is mapped */ |
701 | if (update_patch_count) { | 710 | if (update_patch_count) { |
702 | nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, | 711 | nvgpu_mem_wr(g, &gr_ctx->mem, |
703 | ctxsw_prog_main_image_patch_count_o(), | 712 | ctxsw_prog_main_image_patch_count_o(), |
704 | ch_ctx->patch_ctx.data_count); | 713 | gr_ctx->patch_ctx.data_count); |
705 | nvgpu_log(g, gpu_dbg_info, "write patch count %d", | 714 | nvgpu_log(g, gpu_dbg_info, "write patch count %d", |
706 | ch_ctx->patch_ctx.data_count); | 715 | gr_ctx->patch_ctx.data_count); |
707 | } | 716 | } |
708 | } | 717 | } |
709 | 718 | ||
710 | void gr_gk20a_ctx_patch_write(struct gk20a *g, | 719 | void gr_gk20a_ctx_patch_write(struct gk20a *g, |
711 | struct channel_ctx_gk20a *ch_ctx, | 720 | struct nvgpu_gr_ctx *gr_ctx, |
712 | u32 addr, u32 data, bool patch) | 721 | u32 addr, u32 data, bool patch) |
713 | { | 722 | { |
714 | if (patch) { | 723 | if (patch) { |
715 | u32 patch_slot = ch_ctx->patch_ctx.data_count * | 724 | u32 patch_slot = gr_ctx->patch_ctx.data_count * |
716 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; | 725 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; |
717 | if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( | 726 | if (patch_slot > (PATCH_CTX_ENTRIES_FROM_SIZE( |
718 | ch_ctx->patch_ctx.mem.size) - | 727 | gr_ctx->patch_ctx.mem.size) - |
719 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { | 728 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY)) { |
720 | nvgpu_err(g, "failed to access patch_slot %d", | 729 | nvgpu_err(g, "failed to access patch_slot %d", |
721 | patch_slot); | 730 | patch_slot); |
722 | return; | 731 | return; |
723 | } | 732 | } |
724 | nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot, addr); | 733 | nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot, addr); |
725 | nvgpu_mem_wr32(g, &ch_ctx->patch_ctx.mem, patch_slot + 1, data); | 734 | nvgpu_mem_wr32(g, &gr_ctx->patch_ctx.mem, patch_slot + 1, data); |
726 | ch_ctx->patch_ctx.data_count++; | 735 | gr_ctx->patch_ctx.data_count++; |
727 | nvgpu_log(g, gpu_dbg_info, | 736 | nvgpu_log(g, gpu_dbg_info, |
728 | "patch addr = 0x%x data = 0x%x data_count %d", | 737 | "patch addr = 0x%x data = 0x%x data_count %d", |
729 | addr, data, ch_ctx->patch_ctx.data_count); | 738 | addr, data, gr_ctx->patch_ctx.data_count); |
730 | } else { | 739 | } else { |
731 | gk20a_writel(g, addr, data); | 740 | gk20a_writel(g, addr, data); |
732 | } | 741 | } |
@@ -793,14 +802,22 @@ void gr_gk20a_write_pm_ptr(struct gk20a *g, | |||
793 | 802 | ||
794 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | 803 | static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) |
795 | { | 804 | { |
796 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 805 | struct tsg_gk20a *tsg; |
797 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 806 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
798 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 807 | struct nvgpu_mem *mem = NULL; |
808 | struct ctx_header_desc *ctx = &c->ctx_header; | ||
799 | struct nvgpu_mem *ctxheader = &ctx->mem; | 809 | struct nvgpu_mem *ctxheader = &ctx->mem; |
800 | int ret = 0; | 810 | int ret = 0; |
801 | 811 | ||
802 | gk20a_dbg_fn(""); | 812 | gk20a_dbg_fn(""); |
803 | 813 | ||
814 | tsg = tsg_gk20a_from_ch(c); | ||
815 | if (!tsg) | ||
816 | return -EINVAL; | ||
817 | |||
818 | gr_ctx = &tsg->gr_ctx; | ||
819 | mem = &gr_ctx->mem; | ||
820 | |||
804 | if (nvgpu_mem_begin(g, mem)) | 821 | if (nvgpu_mem_begin(g, mem)) |
805 | return -ENOMEM; | 822 | return -ENOMEM; |
806 | 823 | ||
@@ -809,8 +826,8 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
809 | goto clean_up_mem; | 826 | goto clean_up_mem; |
810 | } | 827 | } |
811 | 828 | ||
812 | if (ch_ctx->zcull_ctx.gpu_va == 0 && | 829 | if (gr_ctx->zcull_ctx.gpu_va == 0 && |
813 | ch_ctx->zcull_ctx.ctx_sw_mode == | 830 | gr_ctx->zcull_ctx.ctx_sw_mode == |
814 | ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { | 831 | ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) { |
815 | ret = -EINVAL; | 832 | ret = -EINVAL; |
816 | goto clean_up; | 833 | goto clean_up; |
@@ -830,13 +847,13 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c) | |||
830 | 847 | ||
831 | nvgpu_mem_wr(g, mem, | 848 | nvgpu_mem_wr(g, mem, |
832 | ctxsw_prog_main_image_zcull_o(), | 849 | ctxsw_prog_main_image_zcull_o(), |
833 | ch_ctx->zcull_ctx.ctx_sw_mode); | 850 | gr_ctx->zcull_ctx.ctx_sw_mode); |
834 | 851 | ||
835 | if (ctxheader->gpu_va) | 852 | if (ctxheader->gpu_va) |
836 | g->ops.gr.write_zcull_ptr(g, ctxheader, | 853 | g->ops.gr.write_zcull_ptr(g, ctxheader, |
837 | ch_ctx->zcull_ctx.gpu_va); | 854 | gr_ctx->zcull_ctx.gpu_va); |
838 | else | 855 | else |
839 | g->ops.gr.write_zcull_ptr(g, mem, ch_ctx->zcull_ctx.gpu_va); | 856 | g->ops.gr.write_zcull_ptr(g, mem, gr_ctx->zcull_ctx.gpu_va); |
840 | 857 | ||
841 | gk20a_enable_channel_tsg(g, c); | 858 | gk20a_enable_channel_tsg(g, c); |
842 | 859 | ||
@@ -869,22 +886,29 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
869 | struct channel_gk20a *c, bool patch) | 886 | struct channel_gk20a *c, bool patch) |
870 | { | 887 | { |
871 | struct gr_gk20a *gr = &g->gr; | 888 | struct gr_gk20a *gr = &g->gr; |
872 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 889 | struct tsg_gk20a *tsg; |
890 | struct nvgpu_gr_ctx *gr_ctx = NULL; | ||
873 | u64 addr; | 891 | u64 addr; |
874 | u32 size; | 892 | u32 size; |
875 | 893 | ||
876 | gk20a_dbg_fn(""); | 894 | gk20a_dbg_fn(""); |
895 | |||
896 | tsg = tsg_gk20a_from_ch(c); | ||
897 | if (!tsg) | ||
898 | return -EINVAL; | ||
899 | |||
900 | gr_ctx = &tsg->gr_ctx; | ||
877 | if (patch) { | 901 | if (patch) { |
878 | int err; | 902 | int err; |
879 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | 903 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
880 | if (err) | 904 | if (err) |
881 | return err; | 905 | return err; |
882 | } | 906 | } |
883 | 907 | ||
884 | /* global pagepool buffer */ | 908 | /* global pagepool buffer */ |
885 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> | 909 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) >> |
886 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | | 910 | gr_scc_pagepool_base_addr_39_8_align_bits_v()) | |
887 | (u64_hi32(ch_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << | 911 | (u64_hi32(gr_ctx->global_ctx_buffer_va[PAGEPOOL_VA]) << |
888 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); | 912 | (32 - gr_scc_pagepool_base_addr_39_8_align_bits_v())); |
889 | 913 | ||
890 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / | 914 | size = gr->global_ctx_buffer[PAGEPOOL].mem.size / |
@@ -896,12 +920,12 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
896 | gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", | 920 | gk20a_dbg_info("pagepool buffer addr : 0x%016llx, size : %d", |
897 | addr, size); | 921 | addr, size); |
898 | 922 | ||
899 | g->ops.gr.commit_global_pagepool(g, ch_ctx, addr, size, patch); | 923 | g->ops.gr.commit_global_pagepool(g, gr_ctx, addr, size, patch); |
900 | 924 | ||
901 | /* global bundle cb */ | 925 | /* global bundle cb */ |
902 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> | 926 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) >> |
903 | gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | | 927 | gr_scc_bundle_cb_base_addr_39_8_align_bits_v()) | |
904 | (u64_hi32(ch_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << | 928 | (u64_hi32(gr_ctx->global_ctx_buffer_va[CIRCULAR_VA]) << |
905 | (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); | 929 | (32 - gr_scc_bundle_cb_base_addr_39_8_align_bits_v())); |
906 | 930 | ||
907 | size = gr->bundle_cb_default_size; | 931 | size = gr->bundle_cb_default_size; |
@@ -909,20 +933,20 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
909 | gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", | 933 | gk20a_dbg_info("bundle cb addr : 0x%016llx, size : %d", |
910 | addr, size); | 934 | addr, size); |
911 | 935 | ||
912 | g->ops.gr.commit_global_bundle_cb(g, ch_ctx, addr, size, patch); | 936 | g->ops.gr.commit_global_bundle_cb(g, gr_ctx, addr, size, patch); |
913 | 937 | ||
914 | /* global attrib cb */ | 938 | /* global attrib cb */ |
915 | addr = (u64_lo32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> | 939 | addr = (u64_lo32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) >> |
916 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | | 940 | gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v()) | |
917 | (u64_hi32(ch_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << | 941 | (u64_hi32(gr_ctx->global_ctx_buffer_va[ATTRIBUTE_VA]) << |
918 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); | 942 | (32 - gr_gpcs_setup_attrib_cb_base_addr_39_12_align_bits_v())); |
919 | 943 | ||
920 | gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); | 944 | gk20a_dbg_info("attrib cb addr : 0x%016llx", addr); |
921 | g->ops.gr.commit_global_attrib_cb(g, ch_ctx, addr, patch); | 945 | g->ops.gr.commit_global_attrib_cb(g, gr_ctx, addr, patch); |
922 | g->ops.gr.commit_global_cb_manager(g, c, patch); | 946 | g->ops.gr.commit_global_cb_manager(g, c, patch); |
923 | 947 | ||
924 | if (patch) | 948 | if (patch) |
925 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, false); | 949 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); |
926 | 950 | ||
927 | return 0; | 951 | return 0; |
928 | } | 952 | } |
@@ -930,7 +954,7 @@ static int gr_gk20a_commit_global_ctx_buffers(struct gk20a *g, | |||
930 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | 954 | int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) |
931 | { | 955 | { |
932 | struct gr_gk20a *gr = &g->gr; | 956 | struct gr_gk20a *gr = &g->gr; |
933 | struct channel_ctx_gk20a *ch_ctx = NULL; | 957 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
934 | u32 gpm_pd_cfg; | 958 | u32 gpm_pd_cfg; |
935 | u32 pd_ab_dist_cfg0; | 959 | u32 pd_ab_dist_cfg0; |
936 | u32 ds_debug; | 960 | u32 ds_debug; |
@@ -956,22 +980,22 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c) | |||
956 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; | 980 | ds_debug = gr_ds_debug_timeslice_mode_enable_f() | ds_debug; |
957 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; | 981 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_enabled_f() | mpc_vtg_debug; |
958 | 982 | ||
959 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); | 983 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); |
960 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); | 984 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pe_vaf_r(), pe_vaf, false); |
961 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); | 985 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_pes_vsc_vpc_r(), pe_vsc_vpc, false); |
962 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); | 986 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); |
963 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); | 987 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); |
964 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); | 988 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); |
965 | } else { | 989 | } else { |
966 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; | 990 | gpm_pd_cfg = gr_gpcs_gpm_pd_cfg_timeslice_mode_disable_f() | gpm_pd_cfg; |
967 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; | 991 | pd_ab_dist_cfg0 = gr_pd_ab_dist_cfg0_timeslice_enable_dis_f() | pd_ab_dist_cfg0; |
968 | ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; | 992 | ds_debug = gr_ds_debug_timeslice_mode_disable_f() | ds_debug; |
969 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; | 993 | mpc_vtg_debug = gr_gpcs_tpcs_mpc_vtg_debug_timeslice_mode_disabled_f() | mpc_vtg_debug; |
970 | 994 | ||
971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); | 995 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gpm_pd_cfg_r(), gpm_pd_cfg, false); |
972 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); | 996 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_ab_dist_cfg0_r(), pd_ab_dist_cfg0, false); |
973 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_ds_debug_r(), ds_debug, false); | 997 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_ds_debug_r(), ds_debug, false); |
974 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); | 998 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_tpcs_mpc_vtg_debug_r(), mpc_vtg_debug, false); |
975 | } | 999 | } |
976 | 1000 | ||
977 | return 0; | 1001 | return 0; |
@@ -1360,13 +1384,14 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1360 | struct channel_gk20a *c) | 1384 | struct channel_gk20a *c) |
1361 | { | 1385 | { |
1362 | struct gr_gk20a *gr = &g->gr; | 1386 | struct gr_gk20a *gr = &g->gr; |
1363 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1387 | struct tsg_gk20a *tsg; |
1388 | struct nvgpu_gr_ctx *gr_ctx = NULL; | ||
1364 | u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); | 1389 | u32 ctx_header_bytes = ctxsw_prog_fecs_header_v(); |
1365 | u32 ctx_header_words; | 1390 | u32 ctx_header_words; |
1366 | u32 i; | 1391 | u32 i; |
1367 | u32 data; | 1392 | u32 data; |
1368 | struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; | 1393 | struct nvgpu_mem *gold_mem = &gr->global_ctx_buffer[GOLDEN_CTX].mem; |
1369 | struct nvgpu_mem *gr_mem = &ch_ctx->gr_ctx->mem; | 1394 | struct nvgpu_mem *gr_mem; |
1370 | u32 err = 0; | 1395 | u32 err = 0; |
1371 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; | 1396 | struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load; |
1372 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; | 1397 | struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init; |
@@ -1374,6 +1399,13 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g, | |||
1374 | 1399 | ||
1375 | gk20a_dbg_fn(""); | 1400 | gk20a_dbg_fn(""); |
1376 | 1401 | ||
1402 | tsg = tsg_gk20a_from_ch(c); | ||
1403 | if (!tsg) | ||
1404 | return -EINVAL; | ||
1405 | |||
1406 | gr_ctx = &tsg->gr_ctx; | ||
1407 | gr_mem = &gr_ctx->mem; | ||
1408 | |||
1377 | /* golden ctx is global to all channels. Although only the first | 1409 | /* golden ctx is global to all channels. Although only the first |
1378 | channel initializes golden image, driver needs to prevent multiple | 1410 | channel initializes golden image, driver needs to prevent multiple |
1379 | channels from initializing golden ctx at the same time */ | 1411 | channels from initializing golden ctx at the same time */ |
@@ -1565,7 +1597,7 @@ restore_fe_go_idle: | |||
1565 | 1597 | ||
1566 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); | 1598 | g->ops.gr.write_zcull_ptr(g, gold_mem, 0); |
1567 | 1599 | ||
1568 | err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); | 1600 | err = g->ops.gr.commit_inst(c, gr_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]); |
1569 | if (err) | 1601 | if (err) |
1570 | goto clean_up; | 1602 | goto clean_up; |
1571 | 1603 | ||
@@ -1614,20 +1646,25 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g, | |||
1614 | struct channel_gk20a *c, | 1646 | struct channel_gk20a *c, |
1615 | bool enable_smpc_ctxsw) | 1647 | bool enable_smpc_ctxsw) |
1616 | { | 1648 | { |
1617 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1649 | struct tsg_gk20a *tsg; |
1618 | struct nvgpu_mem *mem; | 1650 | struct nvgpu_gr_ctx *gr_ctx = NULL; |
1651 | struct nvgpu_mem *mem = NULL; | ||
1619 | u32 data; | 1652 | u32 data; |
1620 | int ret; | 1653 | int ret; |
1621 | 1654 | ||
1622 | gk20a_dbg_fn(""); | 1655 | gk20a_dbg_fn(""); |
1623 | 1656 | ||
1624 | if (!ch_ctx->gr_ctx) { | 1657 | tsg = tsg_gk20a_from_ch(c); |
1658 | if (!tsg) | ||
1659 | return -EINVAL; | ||
1660 | |||
1661 | gr_ctx = &tsg->gr_ctx; | ||
1662 | mem = &gr_ctx->mem; | ||
1663 | if (!nvgpu_mem_is_valid(mem)) { | ||
1625 | nvgpu_err(g, "no graphics context allocated"); | 1664 | nvgpu_err(g, "no graphics context allocated"); |
1626 | return -EFAULT; | 1665 | return -EFAULT; |
1627 | } | 1666 | } |
1628 | 1667 | ||
1629 | mem = &ch_ctx->gr_ctx->mem; | ||
1630 | |||
1631 | ret = gk20a_disable_channel_tsg(g, c); | 1668 | ret = gk20a_disable_channel_tsg(g, c); |
1632 | if (ret) { | 1669 | if (ret) { |
1633 | nvgpu_err(g, "failed to disable channel/TSG"); | 1670 | nvgpu_err(g, "failed to disable channel/TSG"); |
@@ -1670,24 +1707,30 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
1670 | struct channel_gk20a *c, | 1707 | struct channel_gk20a *c, |
1671 | bool enable_hwpm_ctxsw) | 1708 | bool enable_hwpm_ctxsw) |
1672 | { | 1709 | { |
1673 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1710 | struct tsg_gk20a *tsg; |
1674 | struct pm_ctx_desc *pm_ctx = &ch_ctx->pm_ctx; | 1711 | struct nvgpu_mem *gr_mem = NULL; |
1675 | struct nvgpu_mem *gr_mem; | 1712 | struct nvgpu_gr_ctx *gr_ctx; |
1713 | struct pm_ctx_desc *pm_ctx; | ||
1676 | u32 data; | 1714 | u32 data; |
1677 | u64 virt_addr; | 1715 | u64 virt_addr; |
1678 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | 1716 | struct ctx_header_desc *ctx = &c->ctx_header; |
1679 | struct nvgpu_mem *ctxheader = &ctx->mem; | 1717 | struct nvgpu_mem *ctxheader = &ctx->mem; |
1680 | int ret; | 1718 | int ret; |
1681 | 1719 | ||
1682 | gk20a_dbg_fn(""); | 1720 | gk20a_dbg_fn(""); |
1683 | 1721 | ||
1684 | if (!ch_ctx->gr_ctx) { | 1722 | tsg = tsg_gk20a_from_ch(c); |
1723 | if (!tsg) | ||
1724 | return -EINVAL; | ||
1725 | |||
1726 | gr_ctx = &tsg->gr_ctx; | ||
1727 | pm_ctx = &gr_ctx->pm_ctx; | ||
1728 | gr_mem = &gr_ctx->mem; | ||
1729 | if (!nvgpu_mem_is_valid(gr_mem)) { | ||
1685 | nvgpu_err(g, "no graphics context allocated"); | 1730 | nvgpu_err(g, "no graphics context allocated"); |
1686 | return -EFAULT; | 1731 | return -EFAULT; |
1687 | } | 1732 | } |
1688 | 1733 | ||
1689 | gr_mem = &ch_ctx->gr_ctx->mem; | ||
1690 | |||
1691 | if (enable_hwpm_ctxsw) { | 1734 | if (enable_hwpm_ctxsw) { |
1692 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) | 1735 | if (pm_ctx->pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) |
1693 | return 0; | 1736 | return 0; |
@@ -1816,20 +1859,25 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1816 | struct channel_gk20a *c) | 1859 | struct channel_gk20a *c) |
1817 | { | 1860 | { |
1818 | struct gr_gk20a *gr = &g->gr; | 1861 | struct gr_gk20a *gr = &g->gr; |
1819 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 1862 | struct tsg_gk20a *tsg; |
1863 | struct nvgpu_gr_ctx *gr_ctx; | ||
1820 | u32 virt_addr_lo; | 1864 | u32 virt_addr_lo; |
1821 | u32 virt_addr_hi; | 1865 | u32 virt_addr_hi; |
1822 | u64 virt_addr = 0; | 1866 | u64 virt_addr = 0; |
1823 | u32 v, data; | 1867 | u32 v, data; |
1824 | int ret = 0; | 1868 | int ret = 0; |
1825 | struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem; | 1869 | struct nvgpu_mem *mem; |
1826 | struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header; | ||
1827 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
1828 | 1870 | ||
1829 | gk20a_dbg_fn(""); | 1871 | gk20a_dbg_fn(""); |
1830 | 1872 | ||
1873 | tsg = tsg_gk20a_from_ch(c); | ||
1874 | if (!tsg) | ||
1875 | return -EINVAL; | ||
1876 | |||
1877 | gr_ctx = &tsg->gr_ctx; | ||
1878 | mem = &gr_ctx->mem; | ||
1831 | if (gr->ctx_vars.local_golden_image == NULL) | 1879 | if (gr->ctx_vars.local_golden_image == NULL) |
1832 | return -1; | 1880 | return -EINVAL; |
1833 | 1881 | ||
1834 | /* Channel gr_ctx buffer is gpu cacheable. | 1882 | /* Channel gr_ctx buffer is gpu cacheable. |
1835 | Flush and invalidate before cpu update. */ | 1883 | Flush and invalidate before cpu update. */ |
@@ -1838,11 +1886,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1838 | if (nvgpu_mem_begin(g, mem)) | 1886 | if (nvgpu_mem_begin(g, mem)) |
1839 | return -ENOMEM; | 1887 | return -ENOMEM; |
1840 | 1888 | ||
1841 | if (nvgpu_mem_begin(g, ctxheader)) { | ||
1842 | ret = -ENOMEM; | ||
1843 | goto clean_up_mem; | ||
1844 | } | ||
1845 | |||
1846 | nvgpu_mem_wr_n(g, mem, 0, | 1889 | nvgpu_mem_wr_n(g, mem, 0, |
1847 | gr->ctx_vars.local_golden_image, | 1890 | gr->ctx_vars.local_golden_image, |
1848 | gr->ctx_vars.golden_image_size); | 1891 | gr->ctx_vars.golden_image_size); |
@@ -1855,9 +1898,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1855 | 1898 | ||
1856 | /* set priv access map */ | 1899 | /* set priv access map */ |
1857 | virt_addr_lo = | 1900 | virt_addr_lo = |
1858 | u64_lo32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1901 | u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
1859 | virt_addr_hi = | 1902 | virt_addr_hi = |
1860 | u64_hi32(ch_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); | 1903 | u64_hi32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]); |
1861 | 1904 | ||
1862 | if (g->allow_all) | 1905 | if (g->allow_all) |
1863 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); | 1906 | data = ctxsw_prog_main_image_priv_access_map_config_mode_allow_all_f(); |
@@ -1867,21 +1910,13 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1867 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), | 1910 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_priv_access_map_config_o(), |
1868 | data); | 1911 | data); |
1869 | 1912 | ||
1870 | if (ctxheader->gpu_va) { | 1913 | nvgpu_mem_wr(g, mem, |
1871 | nvgpu_mem_wr(g, ctxheader, | 1914 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), |
1872 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), | 1915 | virt_addr_lo); |
1873 | virt_addr_lo); | 1916 | nvgpu_mem_wr(g, mem, |
1874 | nvgpu_mem_wr(g, ctxheader, | 1917 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), |
1875 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), | 1918 | virt_addr_hi); |
1876 | virt_addr_hi); | 1919 | |
1877 | } else { | ||
1878 | nvgpu_mem_wr(g, mem, | ||
1879 | ctxsw_prog_main_image_priv_access_map_addr_lo_o(), | ||
1880 | virt_addr_lo); | ||
1881 | nvgpu_mem_wr(g, mem, | ||
1882 | ctxsw_prog_main_image_priv_access_map_addr_hi_o(), | ||
1883 | virt_addr_hi); | ||
1884 | } | ||
1885 | /* disable verif features */ | 1920 | /* disable verif features */ |
1886 | v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); | 1921 | v = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_misc_options_o()); |
1887 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); | 1922 | v = v & ~(ctxsw_prog_main_image_misc_options_verif_features_m()); |
@@ -1889,65 +1924,50 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g, | |||
1889 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); | 1924 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_misc_options_o(), v); |
1890 | 1925 | ||
1891 | if (g->ops.gr.update_ctxsw_preemption_mode) | 1926 | if (g->ops.gr.update_ctxsw_preemption_mode) |
1892 | g->ops.gr.update_ctxsw_preemption_mode(g, ch_ctx, mem); | 1927 | g->ops.gr.update_ctxsw_preemption_mode(g, c, mem); |
1893 | 1928 | ||
1894 | if (g->ops.gr.update_boosted_ctx) | 1929 | if (g->ops.gr.update_boosted_ctx) |
1895 | g->ops.gr.update_boosted_ctx(g, mem, ch_ctx->gr_ctx); | 1930 | g->ops.gr.update_boosted_ctx(g, mem, gr_ctx); |
1896 | 1931 | ||
1897 | virt_addr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 1932 | virt_addr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); |
1898 | virt_addr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 1933 | virt_addr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); |
1899 | 1934 | ||
1900 | nvgpu_log(g, gpu_dbg_info, "write patch count = %d", | 1935 | nvgpu_log(g, gpu_dbg_info, "write patch count = %d", |
1901 | ch_ctx->patch_ctx.data_count); | 1936 | gr_ctx->patch_ctx.data_count); |
1902 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), | 1937 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(), |
1903 | ch_ctx->patch_ctx.data_count); | 1938 | gr_ctx->patch_ctx.data_count); |
1904 | 1939 | ||
1905 | if (ctxheader->gpu_va) { | 1940 | nvgpu_mem_wr(g, mem, |
1906 | nvgpu_mem_wr(g, ctxheader, | 1941 | ctxsw_prog_main_image_patch_adr_lo_o(), |
1907 | ctxsw_prog_main_image_patch_adr_lo_o(), | 1942 | virt_addr_lo); |
1908 | virt_addr_lo); | 1943 | nvgpu_mem_wr(g, mem, |
1909 | nvgpu_mem_wr(g, ctxheader, | 1944 | ctxsw_prog_main_image_patch_adr_hi_o(), |
1910 | ctxsw_prog_main_image_patch_adr_hi_o(), | 1945 | virt_addr_hi); |
1911 | virt_addr_hi); | ||
1912 | } else { | ||
1913 | nvgpu_mem_wr(g, mem, | ||
1914 | ctxsw_prog_main_image_patch_adr_lo_o(), | ||
1915 | virt_addr_lo); | ||
1916 | nvgpu_mem_wr(g, mem, | ||
1917 | ctxsw_prog_main_image_patch_adr_hi_o(), | ||
1918 | virt_addr_hi); | ||
1919 | } | ||
1920 | 1946 | ||
1921 | /* Update main header region of the context buffer with the info needed | 1947 | /* Update main header region of the context buffer with the info needed |
1922 | * for PM context switching, including mode and possibly a pointer to | 1948 | * for PM context switching, including mode and possibly a pointer to |
1923 | * the PM backing store. | 1949 | * the PM backing store. |
1924 | */ | 1950 | */ |
1925 | if (ch_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { | 1951 | if (gr_ctx->pm_ctx.pm_mode == ctxsw_prog_main_image_pm_mode_ctxsw_f()) { |
1926 | if (ch_ctx->pm_ctx.mem.gpu_va == 0) { | 1952 | if (gr_ctx->pm_ctx.mem.gpu_va == 0) { |
1927 | nvgpu_err(g, | 1953 | nvgpu_err(g, |
1928 | "context switched pm with no pm buffer!"); | 1954 | "context switched pm with no pm buffer!"); |
1929 | nvgpu_mem_end(g, mem); | 1955 | nvgpu_mem_end(g, mem); |
1930 | return -EFAULT; | 1956 | return -EFAULT; |
1931 | } | 1957 | } |
1932 | 1958 | ||
1933 | virt_addr = ch_ctx->pm_ctx.mem.gpu_va; | 1959 | virt_addr = gr_ctx->pm_ctx.mem.gpu_va; |
1934 | } else | 1960 | } else |
1935 | virt_addr = 0; | 1961 | virt_addr = 0; |
1936 | 1962 | ||
1937 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); | 1963 | data = nvgpu_mem_rd(g, mem, ctxsw_prog_main_image_pm_o()); |
1938 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); | 1964 | data = data & ~ctxsw_prog_main_image_pm_mode_m(); |
1939 | data |= ch_ctx->pm_ctx.pm_mode; | 1965 | data |= gr_ctx->pm_ctx.pm_mode; |
1940 | 1966 | ||
1941 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); | 1967 | nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_pm_o(), data); |
1942 | 1968 | ||
1943 | if (ctxheader->gpu_va) | 1969 | g->ops.gr.write_pm_ptr(g, mem, virt_addr); |
1944 | g->ops.gr.write_pm_ptr(g, ctxheader, virt_addr); | ||
1945 | else | ||
1946 | g->ops.gr.write_pm_ptr(g, mem, virt_addr); | ||
1947 | |||
1948 | 1970 | ||
1949 | nvgpu_mem_end(g, ctxheader); | ||
1950 | clean_up_mem: | ||
1951 | nvgpu_mem_end(g, mem); | 1971 | nvgpu_mem_end(g, mem); |
1952 | 1972 | ||
1953 | return ret; | 1973 | return ret; |
@@ -2568,13 +2588,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g) | |||
2568 | return -ENOMEM; | 2588 | return -ENOMEM; |
2569 | } | 2589 | } |
2570 | 2590 | ||
2571 | static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | 2591 | static void gr_gk20a_unmap_global_ctx_buffers(struct gk20a *g, |
2592 | struct vm_gk20a *vm, | ||
2593 | struct nvgpu_gr_ctx *gr_ctx) | ||
2572 | { | 2594 | { |
2573 | struct vm_gk20a *ch_vm = c->vm; | 2595 | u64 *g_bfr_va = gr_ctx->global_ctx_buffer_va; |
2574 | struct gr_gk20a *gr = &c->g->gr; | 2596 | u64 *g_bfr_size = gr_ctx->global_ctx_buffer_size; |
2575 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 2597 | int *g_bfr_index = gr_ctx->global_ctx_buffer_index; |
2576 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | ||
2577 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | ||
2578 | u32 i; | 2598 | u32 i; |
2579 | 2599 | ||
2580 | gk20a_dbg_fn(""); | 2600 | gk20a_dbg_fn(""); |
@@ -2588,32 +2608,41 @@ static void gr_gk20a_unmap_global_ctx_buffers(struct channel_gk20a *c) | |||
2588 | * the correct struct nvgpu_mem to use. Handles the VPR | 2608 | * the correct struct nvgpu_mem to use. Handles the VPR |
2589 | * vs non-VPR difference in context images. | 2609 | * vs non-VPR difference in context images. |
2590 | */ | 2610 | */ |
2591 | mem = &gr->global_ctx_buffer[g_bfr_index[i]].mem; | 2611 | mem = &g->gr.global_ctx_buffer[g_bfr_index[i]].mem; |
2592 | 2612 | ||
2593 | nvgpu_gmmu_unmap(ch_vm, mem, g_bfr_va[i]); | 2613 | nvgpu_gmmu_unmap(vm, mem, g_bfr_va[i]); |
2594 | } | 2614 | } |
2595 | } | 2615 | } |
2596 | 2616 | ||
2597 | memset(g_bfr_va, 0, sizeof(c->ch_ctx.global_ctx_buffer_va)); | 2617 | memset(g_bfr_va, 0, sizeof(gr_ctx->global_ctx_buffer_va)); |
2598 | memset(g_bfr_size, 0, sizeof(c->ch_ctx.global_ctx_buffer_size)); | 2618 | memset(g_bfr_size, 0, sizeof(gr_ctx->global_ctx_buffer_size)); |
2599 | memset(g_bfr_index, 0, sizeof(c->ch_ctx.global_ctx_buffer_index)); | 2619 | memset(g_bfr_index, 0, sizeof(gr_ctx->global_ctx_buffer_index)); |
2600 | 2620 | ||
2601 | c->ch_ctx.global_ctx_buffer_mapped = false; | 2621 | gr_ctx->global_ctx_buffer_mapped = false; |
2602 | } | 2622 | } |
2603 | 2623 | ||
2604 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | 2624 | static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, |
2605 | struct channel_gk20a *c) | 2625 | struct channel_gk20a *c) |
2606 | { | 2626 | { |
2627 | struct tsg_gk20a *tsg; | ||
2607 | struct vm_gk20a *ch_vm = c->vm; | 2628 | struct vm_gk20a *ch_vm = c->vm; |
2608 | u64 *g_bfr_va = c->ch_ctx.global_ctx_buffer_va; | 2629 | u64 *g_bfr_va; |
2609 | u64 *g_bfr_size = c->ch_ctx.global_ctx_buffer_size; | 2630 | u64 *g_bfr_size; |
2610 | int *g_bfr_index = c->ch_ctx.global_ctx_buffer_index; | 2631 | int *g_bfr_index; |
2611 | struct gr_gk20a *gr = &g->gr; | 2632 | struct gr_gk20a *gr = &g->gr; |
2612 | struct nvgpu_mem *mem; | 2633 | struct nvgpu_mem *mem; |
2613 | u64 gpu_va; | 2634 | u64 gpu_va; |
2614 | 2635 | ||
2615 | gk20a_dbg_fn(""); | 2636 | gk20a_dbg_fn(""); |
2616 | 2637 | ||
2638 | tsg = tsg_gk20a_from_ch(c); | ||
2639 | if (!tsg) | ||
2640 | return -EINVAL; | ||
2641 | |||
2642 | g_bfr_va = tsg->gr_ctx.global_ctx_buffer_va; | ||
2643 | g_bfr_size = tsg->gr_ctx.global_ctx_buffer_size; | ||
2644 | g_bfr_index = tsg->gr_ctx.global_ctx_buffer_index; | ||
2645 | |||
2617 | /* Circular Buffer */ | 2646 | /* Circular Buffer */ |
2618 | if (c->vpr && | 2647 | if (c->vpr && |
2619 | nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { | 2648 | nvgpu_mem_is_valid(&gr->global_ctx_buffer[CIRCULAR_VPR].mem)) { |
@@ -2688,21 +2717,20 @@ static int gr_gk20a_map_global_ctx_buffers(struct gk20a *g, | |||
2688 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; | 2717 | g_bfr_size[PRIV_ACCESS_MAP_VA] = mem->size; |
2689 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; | 2718 | g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP; |
2690 | 2719 | ||
2691 | c->ch_ctx.global_ctx_buffer_mapped = true; | 2720 | tsg->gr_ctx.global_ctx_buffer_mapped = true; |
2692 | return 0; | 2721 | return 0; |
2693 | 2722 | ||
2694 | clean_up: | 2723 | clean_up: |
2695 | gr_gk20a_unmap_global_ctx_buffers(c); | 2724 | gr_gk20a_unmap_global_ctx_buffers(g, ch_vm, &tsg->gr_ctx); |
2696 | 2725 | ||
2697 | return -ENOMEM; | 2726 | return -ENOMEM; |
2698 | } | 2727 | } |
2699 | 2728 | ||
2700 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 2729 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
2701 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 2730 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
2702 | u32 class, | 2731 | u32 class, |
2703 | u32 padding) | 2732 | u32 padding) |
2704 | { | 2733 | { |
2705 | struct gr_ctx_desc *gr_ctx = NULL; | ||
2706 | struct gr_gk20a *gr = &g->gr; | 2734 | struct gr_gk20a *gr = &g->gr; |
2707 | int err = 0; | 2735 | int err = 0; |
2708 | 2736 | ||
@@ -2715,15 +2743,11 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2715 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; | 2743 | gr->ctx_vars.buffer_size = gr->ctx_vars.golden_image_size; |
2716 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; | 2744 | gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; |
2717 | 2745 | ||
2718 | gr_ctx = nvgpu_kzalloc(g, sizeof(*gr_ctx)); | ||
2719 | if (!gr_ctx) | ||
2720 | return -ENOMEM; | ||
2721 | |||
2722 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, | 2746 | err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING, |
2723 | gr->ctx_vars.buffer_total_size, | 2747 | gr->ctx_vars.buffer_total_size, |
2724 | &gr_ctx->mem); | 2748 | &gr_ctx->mem); |
2725 | if (err) | 2749 | if (err) |
2726 | goto err_free_ctx; | 2750 | return err; |
2727 | 2751 | ||
2728 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, | 2752 | gr_ctx->mem.gpu_va = nvgpu_gmmu_map(vm, |
2729 | &gr_ctx->mem, | 2753 | &gr_ctx->mem, |
@@ -2734,15 +2758,10 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2734 | if (!gr_ctx->mem.gpu_va) | 2758 | if (!gr_ctx->mem.gpu_va) |
2735 | goto err_free_mem; | 2759 | goto err_free_mem; |
2736 | 2760 | ||
2737 | *__gr_ctx = gr_ctx; | ||
2738 | |||
2739 | return 0; | 2761 | return 0; |
2740 | 2762 | ||
2741 | err_free_mem: | 2763 | err_free_mem: |
2742 | nvgpu_dma_free(g, &gr_ctx->mem); | 2764 | nvgpu_dma_free(g, &gr_ctx->mem); |
2743 | err_free_ctx: | ||
2744 | nvgpu_kfree(g, gr_ctx); | ||
2745 | gr_ctx = NULL; | ||
2746 | 2765 | ||
2747 | return err; | 2766 | return err; |
2748 | } | 2767 | } |
@@ -2750,7 +2769,7 @@ int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | |||
2750 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | 2769 | static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, |
2751 | struct tsg_gk20a *tsg, u32 class, u32 padding) | 2770 | struct tsg_gk20a *tsg, u32 class, u32 padding) |
2752 | { | 2771 | { |
2753 | struct gr_ctx_desc **gr_ctx = &tsg->tsg_gr_ctx; | 2772 | struct nvgpu_gr_ctx *gr_ctx = &tsg->gr_ctx; |
2754 | int err; | 2773 | int err; |
2755 | 2774 | ||
2756 | if (!tsg->vm) { | 2775 | if (!tsg->vm) { |
@@ -2762,57 +2781,44 @@ static int gr_gk20a_alloc_tsg_gr_ctx(struct gk20a *g, | |||
2762 | if (err) | 2781 | if (err) |
2763 | return err; | 2782 | return err; |
2764 | 2783 | ||
2765 | return 0; | 2784 | gr_ctx->tsgid = tsg->tsgid; |
2766 | } | ||
2767 | |||
2768 | static int gr_gk20a_alloc_channel_gr_ctx(struct gk20a *g, | ||
2769 | struct channel_gk20a *c, | ||
2770 | u32 class, | ||
2771 | u32 padding) | ||
2772 | { | ||
2773 | struct gr_ctx_desc **gr_ctx = &c->ch_ctx.gr_ctx; | ||
2774 | int err = g->ops.gr.alloc_gr_ctx(g, gr_ctx, c->vm, class, padding); | ||
2775 | if (err) | ||
2776 | return err; | ||
2777 | 2785 | ||
2778 | return 0; | 2786 | return 0; |
2779 | } | 2787 | } |
2780 | 2788 | ||
2781 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 2789 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
2782 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx) | 2790 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) |
2783 | { | 2791 | { |
2784 | gk20a_dbg_fn(""); | 2792 | gk20a_dbg_fn(""); |
2785 | 2793 | ||
2786 | if (!gr_ctx || !gr_ctx->mem.gpu_va) | 2794 | if (gr_ctx->mem.gpu_va) { |
2787 | return; | 2795 | gr_gk20a_unmap_global_ctx_buffers(g, vm, gr_ctx); |
2796 | gr_gk20a_free_channel_patch_ctx(g, vm, gr_ctx); | ||
2797 | gr_gk20a_free_channel_pm_ctx(g, vm, gr_ctx); | ||
2788 | 2798 | ||
2789 | if (g->ops.gr.dump_ctxsw_stats && | 2799 | if (g->ops.gr.dump_ctxsw_stats && |
2790 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) | 2800 | g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close) |
2791 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); | 2801 | g->ops.gr.dump_ctxsw_stats(g, vm, gr_ctx); |
2792 | 2802 | ||
2793 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); | 2803 | nvgpu_dma_unmap_free(vm, &gr_ctx->pagepool_ctxsw_buffer); |
2794 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); | 2804 | nvgpu_dma_unmap_free(vm, &gr_ctx->betacb_ctxsw_buffer); |
2795 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); | 2805 | nvgpu_dma_unmap_free(vm, &gr_ctx->spill_ctxsw_buffer); |
2796 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); | 2806 | nvgpu_dma_unmap_free(vm, &gr_ctx->preempt_ctxsw_buffer); |
2797 | nvgpu_gmmu_unmap(vm, &gr_ctx->mem, gr_ctx->mem.gpu_va); | 2807 | nvgpu_dma_unmap_free(vm, &gr_ctx->mem); |
2798 | nvgpu_dma_free(g, &gr_ctx->mem); | 2808 | |
2799 | nvgpu_kfree(g, gr_ctx); | 2809 | memset(gr_ctx, 0, sizeof(*gr_ctx)); |
2810 | } | ||
2800 | } | 2811 | } |
2801 | 2812 | ||
2802 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) | 2813 | void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg) |
2803 | { | 2814 | { |
2815 | struct gk20a *g = tsg->g; | ||
2816 | |||
2804 | if (!tsg->vm) { | 2817 | if (!tsg->vm) { |
2805 | nvgpu_err(tsg->g, "No address space bound"); | 2818 | nvgpu_err(g, "No address space bound"); |
2806 | return; | 2819 | return; |
2807 | } | 2820 | } |
2808 | tsg->g->ops.gr.free_gr_ctx(tsg->g, tsg->vm, tsg->tsg_gr_ctx); | 2821 | tsg->g->ops.gr.free_gr_ctx(g, tsg->vm, &tsg->gr_ctx); |
2809 | tsg->tsg_gr_ctx = NULL; | ||
2810 | } | ||
2811 | |||
2812 | static void gr_gk20a_free_channel_gr_ctx(struct channel_gk20a *c) | ||
2813 | { | ||
2814 | c->g->ops.gr.free_gr_ctx(c->g, c->vm, c->ch_ctx.gr_ctx); | ||
2815 | c->ch_ctx.gr_ctx = NULL; | ||
2816 | } | 2822 | } |
2817 | 2823 | ||
2818 | u32 gr_gk20a_get_patch_slots(struct gk20a *g) | 2824 | u32 gr_gk20a_get_patch_slots(struct gk20a *g) |
@@ -2823,13 +2829,19 @@ u32 gr_gk20a_get_patch_slots(struct gk20a *g) | |||
2823 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | 2829 | static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, |
2824 | struct channel_gk20a *c) | 2830 | struct channel_gk20a *c) |
2825 | { | 2831 | { |
2826 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2832 | struct tsg_gk20a *tsg; |
2833 | struct patch_desc *patch_ctx; | ||
2827 | struct vm_gk20a *ch_vm = c->vm; | 2834 | struct vm_gk20a *ch_vm = c->vm; |
2828 | u32 alloc_size; | 2835 | u32 alloc_size; |
2829 | int err = 0; | 2836 | int err = 0; |
2830 | 2837 | ||
2831 | gk20a_dbg_fn(""); | 2838 | gk20a_dbg_fn(""); |
2832 | 2839 | ||
2840 | tsg = tsg_gk20a_from_ch(c); | ||
2841 | if (!tsg) | ||
2842 | return -EINVAL; | ||
2843 | |||
2844 | patch_ctx = &tsg->gr_ctx.patch_ctx; | ||
2833 | alloc_size = g->ops.gr.get_patch_slots(g) * | 2845 | alloc_size = g->ops.gr.get_patch_slots(g) * |
2834 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; | 2846 | PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY; |
2835 | 2847 | ||
@@ -2845,57 +2857,42 @@ static int gr_gk20a_alloc_channel_patch_ctx(struct gk20a *g, | |||
2845 | return 0; | 2857 | return 0; |
2846 | } | 2858 | } |
2847 | 2859 | ||
2848 | static void gr_gk20a_free_channel_patch_ctx(struct channel_gk20a *c) | 2860 | static void gr_gk20a_free_channel_patch_ctx(struct gk20a *g, |
2861 | struct vm_gk20a *vm, | ||
2862 | struct nvgpu_gr_ctx *gr_ctx) | ||
2849 | { | 2863 | { |
2850 | struct patch_desc *patch_ctx = &c->ch_ctx.patch_ctx; | 2864 | struct patch_desc *patch_ctx = &gr_ctx->patch_ctx; |
2851 | struct gk20a *g = c->g; | ||
2852 | 2865 | ||
2853 | gk20a_dbg_fn(""); | 2866 | gk20a_dbg_fn(""); |
2854 | 2867 | ||
2855 | if (patch_ctx->mem.gpu_va) | 2868 | if (patch_ctx->mem.gpu_va) |
2856 | nvgpu_gmmu_unmap(c->vm, &patch_ctx->mem, | 2869 | nvgpu_gmmu_unmap(vm, &patch_ctx->mem, |
2857 | patch_ctx->mem.gpu_va); | 2870 | patch_ctx->mem.gpu_va); |
2858 | 2871 | ||
2859 | nvgpu_dma_free(g, &patch_ctx->mem); | 2872 | nvgpu_dma_free(g, &patch_ctx->mem); |
2860 | patch_ctx->data_count = 0; | 2873 | patch_ctx->data_count = 0; |
2861 | } | 2874 | } |
2862 | 2875 | ||
2863 | static void gr_gk20a_free_channel_pm_ctx(struct channel_gk20a *c) | 2876 | static void gr_gk20a_free_channel_pm_ctx(struct gk20a *g, |
2877 | struct vm_gk20a *vm, | ||
2878 | struct nvgpu_gr_ctx *gr_ctx) | ||
2864 | { | 2879 | { |
2865 | struct pm_ctx_desc *pm_ctx = &c->ch_ctx.pm_ctx; | 2880 | struct pm_ctx_desc *pm_ctx = &gr_ctx->pm_ctx; |
2866 | struct gk20a *g = c->g; | ||
2867 | 2881 | ||
2868 | gk20a_dbg_fn(""); | 2882 | gk20a_dbg_fn(""); |
2869 | 2883 | ||
2870 | if (pm_ctx->mem.gpu_va) { | 2884 | if (pm_ctx->mem.gpu_va) { |
2871 | nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); | 2885 | nvgpu_gmmu_unmap(vm, &pm_ctx->mem, pm_ctx->mem.gpu_va); |
2872 | 2886 | ||
2873 | nvgpu_dma_free(g, &pm_ctx->mem); | 2887 | nvgpu_dma_free(g, &pm_ctx->mem); |
2874 | } | 2888 | } |
2875 | } | 2889 | } |
2876 | 2890 | ||
2877 | void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg) | ||
2878 | { | ||
2879 | if(c->g->ops.fifo.free_channel_ctx_header) | ||
2880 | c->g->ops.fifo.free_channel_ctx_header(c); | ||
2881 | gr_gk20a_unmap_global_ctx_buffers(c); | ||
2882 | gr_gk20a_free_channel_patch_ctx(c); | ||
2883 | gr_gk20a_free_channel_pm_ctx(c); | ||
2884 | if (!is_tsg) | ||
2885 | gr_gk20a_free_channel_gr_ctx(c); | ||
2886 | |||
2887 | /* zcull_ctx */ | ||
2888 | |||
2889 | memset(&c->ch_ctx, 0, sizeof(struct channel_ctx_gk20a)); | ||
2890 | |||
2891 | c->first_init = false; | ||
2892 | } | ||
2893 | |||
2894 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | 2891 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) |
2895 | { | 2892 | { |
2896 | struct gk20a *g = c->g; | 2893 | struct gk20a *g = c->g; |
2897 | struct fifo_gk20a *f = &g->fifo; | 2894 | struct fifo_gk20a *f = &g->fifo; |
2898 | struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx; | 2895 | struct nvgpu_gr_ctx *gr_ctx; |
2899 | struct tsg_gk20a *tsg = NULL; | 2896 | struct tsg_gk20a *tsg = NULL; |
2900 | int err = 0; | 2897 | int err = 0; |
2901 | 2898 | ||
@@ -2917,92 +2914,64 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
2917 | } | 2914 | } |
2918 | c->obj_class = class_num; | 2915 | c->obj_class = class_num; |
2919 | 2916 | ||
2920 | if (gk20a_is_channel_marked_as_tsg(c)) | 2917 | if (!gk20a_is_channel_marked_as_tsg(c)) |
2921 | tsg = &f->tsg[c->tsgid]; | 2918 | return -EINVAL; |
2922 | 2919 | ||
2923 | /* allocate gr ctx buffer */ | 2920 | tsg = &f->tsg[c->tsgid]; |
2924 | if (!tsg) { | 2921 | gr_ctx = &tsg->gr_ctx; |
2925 | if (!ch_ctx->gr_ctx) { | 2922 | |
2926 | err = gr_gk20a_alloc_channel_gr_ctx(g, c, | 2923 | if (!nvgpu_mem_is_valid(&gr_ctx->mem)) { |
2927 | class_num, | 2924 | tsg->vm = c->vm; |
2928 | flags); | 2925 | nvgpu_vm_get(tsg->vm); |
2929 | if (err) { | 2926 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, |
2930 | nvgpu_err(g, | 2927 | class_num, |
2931 | "fail to allocate gr ctx buffer"); | 2928 | flags); |
2932 | goto out; | 2929 | if (err) { |
2933 | } | ||
2934 | } else { | ||
2935 | /*TBD: needs to be more subtle about which is | ||
2936 | * being allocated as some are allowed to be | ||
2937 | * allocated along same channel */ | ||
2938 | nvgpu_err(g, | 2930 | nvgpu_err(g, |
2939 | "too many classes alloc'd on same channel"); | 2931 | "fail to allocate TSG gr ctx buffer"); |
2940 | err = -EINVAL; | 2932 | nvgpu_vm_put(tsg->vm); |
2933 | tsg->vm = NULL; | ||
2941 | goto out; | 2934 | goto out; |
2942 | } | 2935 | } |
2943 | } else { | 2936 | |
2944 | if (!tsg->tsg_gr_ctx) { | 2937 | /* allocate patch buffer */ |
2945 | tsg->vm = c->vm; | 2938 | if (!nvgpu_mem_is_valid(&gr_ctx->patch_ctx.mem)) { |
2946 | nvgpu_vm_get(tsg->vm); | 2939 | gr_ctx->patch_ctx.data_count = 0; |
2947 | err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, | 2940 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); |
2948 | class_num, | ||
2949 | flags); | ||
2950 | if (err) { | 2941 | if (err) { |
2951 | nvgpu_err(g, | 2942 | nvgpu_err(g, |
2952 | "fail to allocate TSG gr ctx buffer"); | 2943 | "fail to allocate patch buffer"); |
2953 | nvgpu_vm_put(tsg->vm); | ||
2954 | tsg->vm = NULL; | ||
2955 | goto out; | 2944 | goto out; |
2956 | } | 2945 | } |
2957 | } | 2946 | } |
2958 | ch_ctx->gr_ctx = tsg->tsg_gr_ctx; | ||
2959 | } | ||
2960 | |||
2961 | /* PM ctxt switch is off by default */ | ||
2962 | ch_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
2963 | 2947 | ||
2964 | /* commit gr ctx buffer */ | 2948 | /* map global buffer to channel gpu_va and commit */ |
2965 | err = g->ops.gr.commit_inst(c, ch_ctx->gr_ctx->mem.gpu_va); | 2949 | err = gr_gk20a_map_global_ctx_buffers(g, c); |
2966 | if (err) { | ||
2967 | nvgpu_err(g, | ||
2968 | "fail to commit gr ctx buffer"); | ||
2969 | goto out; | ||
2970 | } | ||
2971 | |||
2972 | /* allocate patch buffer */ | ||
2973 | if (!nvgpu_mem_is_valid(&ch_ctx->patch_ctx.mem)) { | ||
2974 | ch_ctx->patch_ctx.data_count = 0; | ||
2975 | err = gr_gk20a_alloc_channel_patch_ctx(g, c); | ||
2976 | if (err) { | 2950 | if (err) { |
2977 | nvgpu_err(g, | 2951 | nvgpu_err(g, |
2978 | "fail to allocate patch buffer"); | 2952 | "fail to map global ctx buffer"); |
2979 | goto out; | 2953 | goto out; |
2980 | } | 2954 | } |
2981 | } | 2955 | gr_gk20a_commit_global_ctx_buffers(g, c, true); |
2982 | 2956 | ||
2983 | /* map global buffer to channel gpu_va and commit */ | 2957 | /* commit gr ctx buffer */ |
2984 | if (!ch_ctx->global_ctx_buffer_mapped) { | 2958 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); |
2985 | err = gr_gk20a_map_global_ctx_buffers(g, c); | ||
2986 | if (err) { | 2959 | if (err) { |
2987 | nvgpu_err(g, | 2960 | nvgpu_err(g, |
2988 | "fail to map global ctx buffer"); | 2961 | "fail to commit gr ctx buffer"); |
2989 | goto out; | 2962 | goto out; |
2990 | } | 2963 | } |
2991 | gr_gk20a_commit_global_ctx_buffers(g, c, true); | ||
2992 | } | ||
2993 | 2964 | ||
2994 | /* init golden image, ELPG enabled after this is done */ | 2965 | /* init golden image, ELPG enabled after this is done */ |
2995 | err = gr_gk20a_init_golden_ctx_image(g, c); | 2966 | err = gr_gk20a_init_golden_ctx_image(g, c); |
2996 | if (err) { | 2967 | if (err) { |
2997 | nvgpu_err(g, | 2968 | nvgpu_err(g, |
2998 | "fail to init golden ctx image"); | 2969 | "fail to init golden ctx image"); |
2999 | goto out; | 2970 | goto out; |
3000 | } | 2971 | } |
3001 | 2972 | ||
3002 | /* load golden image */ | 2973 | /* load golden image */ |
3003 | if (!c->first_init) { | 2974 | gr_gk20a_load_golden_ctx_image(g, c); |
3004 | err = gr_gk20a_elpg_protected_call(g, | ||
3005 | gr_gk20a_load_golden_ctx_image(g, c)); | ||
3006 | if (err) { | 2975 | if (err) { |
3007 | nvgpu_err(g, | 2976 | nvgpu_err(g, |
3008 | "fail to load golden ctx image"); | 2977 | "fail to load golden ctx image"); |
@@ -3016,11 +2985,21 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags) | |||
3016 | "fail to bind channel for ctxsw trace"); | 2985 | "fail to bind channel for ctxsw trace"); |
3017 | } | 2986 | } |
3018 | #endif | 2987 | #endif |
3019 | c->first_init = true; | ||
3020 | } | ||
3021 | 2988 | ||
3022 | if (g->ops.gr.set_czf_bypass) | 2989 | if (g->ops.gr.set_czf_bypass) |
3023 | g->ops.gr.set_czf_bypass(g, c); | 2990 | g->ops.gr.set_czf_bypass(g, c); |
2991 | |||
2992 | /* PM ctxt switch is off by default */ | ||
2993 | gr_ctx->pm_ctx.pm_mode = ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); | ||
2994 | } else { | ||
2995 | /* commit gr ctx buffer */ | ||
2996 | err = g->ops.gr.commit_inst(c, gr_ctx->mem.gpu_va); | ||
2997 | if (err) { | ||
2998 | nvgpu_err(g, | ||
2999 | "fail to commit gr ctx buffer"); | ||
3000 | goto out; | ||
3001 | } | ||
3002 | } | ||
3024 | 3003 | ||
3025 | gk20a_dbg_fn("done"); | 3004 | gk20a_dbg_fn("done"); |
3026 | return 0; | 3005 | return 0; |
@@ -3553,8 +3532,14 @@ u32 gr_gk20a_get_ctxsw_zcull_size(struct gk20a *g, struct gr_gk20a *gr) | |||
3553 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, | 3532 | int gr_gk20a_bind_ctxsw_zcull(struct gk20a *g, struct gr_gk20a *gr, |
3554 | struct channel_gk20a *c, u64 zcull_va, u32 mode) | 3533 | struct channel_gk20a *c, u64 zcull_va, u32 mode) |
3555 | { | 3534 | { |
3556 | struct zcull_ctx_desc *zcull_ctx = &c->ch_ctx.zcull_ctx; | 3535 | struct tsg_gk20a *tsg; |
3536 | struct zcull_ctx_desc *zcull_ctx; | ||
3557 | 3537 | ||
3538 | tsg = tsg_gk20a_from_ch(c); | ||
3539 | if (!tsg) | ||
3540 | return -EINVAL; | ||
3541 | |||
3542 | zcull_ctx = &tsg->gr_ctx.zcull_ctx; | ||
3558 | zcull_ctx->ctx_sw_mode = mode; | 3543 | zcull_ctx->ctx_sw_mode = mode; |
3559 | zcull_ctx->gpu_va = zcull_va; | 3544 | zcull_ctx->gpu_va = zcull_va; |
3560 | 3545 | ||
@@ -6516,7 +6501,7 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void) | |||
6516 | * write will actually occur. so later we should put a lazy, | 6501 | * write will actually occur. so later we should put a lazy, |
6517 | * map-and-hold system in the patch write state */ | 6502 | * map-and-hold system in the patch write state */ |
6518 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | 6503 | static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, |
6519 | struct channel_ctx_gk20a *ch_ctx, | 6504 | struct channel_gk20a *ch, |
6520 | u32 addr, u32 data, | 6505 | u32 addr, u32 data, |
6521 | struct nvgpu_mem *mem) | 6506 | struct nvgpu_mem *mem) |
6522 | { | 6507 | { |
@@ -6531,9 +6516,16 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6531 | u32 *ovr_perf_regs = NULL; | 6516 | u32 *ovr_perf_regs = NULL; |
6532 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 6517 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
6533 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 6518 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
6534 | struct ctx_header_desc *ctx = &ch_ctx->ctx_header; | 6519 | struct tsg_gk20a *tsg; |
6520 | struct nvgpu_gr_ctx *gr_ctx; | ||
6521 | struct ctx_header_desc *ctx = &ch->ctx_header; | ||
6535 | struct nvgpu_mem *ctxheader = &ctx->mem; | 6522 | struct nvgpu_mem *ctxheader = &ctx->mem; |
6536 | 6523 | ||
6524 | tsg = tsg_gk20a_from_ch(ch); | ||
6525 | if (!tsg) | ||
6526 | return -EINVAL; | ||
6527 | |||
6528 | gr_ctx = &tsg->gr_ctx; | ||
6537 | g->ops.gr.init_ovr_sm_dsm_perf(); | 6529 | g->ops.gr.init_ovr_sm_dsm_perf(); |
6538 | g->ops.gr.init_sm_dsm_reg_info(); | 6530 | g->ops.gr.init_sm_dsm_reg_info(); |
6539 | g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); | 6531 | g->ops.gr.get_ovr_perf_regs(g, &num_ovr_perf_regs, &ovr_perf_regs); |
@@ -6556,17 +6548,17 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6556 | ctxsw_prog_main_image_patch_count_o()); | 6548 | ctxsw_prog_main_image_patch_count_o()); |
6557 | 6549 | ||
6558 | if (!tmp) | 6550 | if (!tmp) |
6559 | ch_ctx->patch_ctx.data_count = 0; | 6551 | gr_ctx->patch_ctx.data_count = 0; |
6560 | 6552 | ||
6561 | gr_gk20a_ctx_patch_write(g, ch_ctx, | 6553 | gr_gk20a_ctx_patch_write(g, gr_ctx, |
6562 | addr, data, true); | 6554 | addr, data, true); |
6563 | 6555 | ||
6564 | vaddr_lo = u64_lo32(ch_ctx->patch_ctx.mem.gpu_va); | 6556 | vaddr_lo = u64_lo32(gr_ctx->patch_ctx.mem.gpu_va); |
6565 | vaddr_hi = u64_hi32(ch_ctx->patch_ctx.mem.gpu_va); | 6557 | vaddr_hi = u64_hi32(gr_ctx->patch_ctx.mem.gpu_va); |
6566 | 6558 | ||
6567 | nvgpu_mem_wr(g, mem, | 6559 | nvgpu_mem_wr(g, mem, |
6568 | ctxsw_prog_main_image_patch_count_o(), | 6560 | ctxsw_prog_main_image_patch_count_o(), |
6569 | ch_ctx->patch_ctx.data_count); | 6561 | gr_ctx->patch_ctx.data_count); |
6570 | if (ctxheader->gpu_va) { | 6562 | if (ctxheader->gpu_va) { |
6571 | /* | 6563 | /* |
6572 | * Main context can be gr_ctx or pm_ctx. | 6564 | * Main context can be gr_ctx or pm_ctx. |
@@ -6575,7 +6567,7 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, | |||
6575 | * __gr_gk20a_exec_ctx_ops. Need to take | 6567 | * __gr_gk20a_exec_ctx_ops. Need to take |
6576 | * care of cpu access to ctxheader here. | 6568 | * care of cpu access to ctxheader here. |
6577 | */ | 6569 | */ |
6578 | if (nvgpu_mem_begin(g, ctxheader)) | 6570 | if (nvgpu_mem_begin(g, ctxheader)) |
6579 | return -ENOMEM; | 6571 | return -ENOMEM; |
6580 | nvgpu_mem_wr(g, ctxheader, | 6572 | nvgpu_mem_wr(g, ctxheader, |
6581 | ctxsw_prog_main_image_patch_adr_lo_o(), | 6573 | ctxsw_prog_main_image_patch_adr_lo_o(), |
@@ -7690,7 +7682,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7690 | bool ch_is_curr_ctx) | 7682 | bool ch_is_curr_ctx) |
7691 | { | 7683 | { |
7692 | struct gk20a *g = ch->g; | 7684 | struct gk20a *g = ch->g; |
7693 | struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx; | 7685 | struct tsg_gk20a *tsg; |
7686 | struct nvgpu_gr_ctx *gr_ctx; | ||
7694 | bool gr_ctx_ready = false; | 7687 | bool gr_ctx_ready = false; |
7695 | bool pm_ctx_ready = false; | 7688 | bool pm_ctx_ready = false; |
7696 | struct nvgpu_mem *current_mem = NULL; | 7689 | struct nvgpu_mem *current_mem = NULL; |
@@ -7707,6 +7700,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7707 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", | 7700 | gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d", |
7708 | num_ctx_wr_ops, num_ctx_rd_ops); | 7701 | num_ctx_wr_ops, num_ctx_rd_ops); |
7709 | 7702 | ||
7703 | tsg = tsg_gk20a_from_ch(ch); | ||
7704 | if (!tsg) | ||
7705 | return -EINVAL; | ||
7706 | |||
7707 | gr_ctx = &tsg->gr_ctx; | ||
7708 | |||
7710 | if (ch_is_curr_ctx) { | 7709 | if (ch_is_curr_ctx) { |
7711 | for (pass = 0; pass < 2; pass++) { | 7710 | for (pass = 0; pass < 2; pass++) { |
7712 | ctx_op_nr = 0; | 7711 | ctx_op_nr = 0; |
@@ -7778,7 +7777,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7778 | } | 7777 | } |
7779 | offset_addrs = offsets + max_offsets; | 7778 | offset_addrs = offsets + max_offsets; |
7780 | 7779 | ||
7781 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false); | 7780 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); |
7782 | if (err) | 7781 | if (err) |
7783 | goto cleanup; | 7782 | goto cleanup; |
7784 | 7783 | ||
@@ -7812,13 +7811,13 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7812 | * gr_gk20a_apply_instmem_overrides, | 7811 | * gr_gk20a_apply_instmem_overrides, |
7813 | * recoded in-place instead. | 7812 | * recoded in-place instead. |
7814 | */ | 7813 | */ |
7815 | if (nvgpu_mem_begin(g, &ch_ctx->gr_ctx->mem)) { | 7814 | if (nvgpu_mem_begin(g, &gr_ctx->mem)) { |
7816 | err = -ENOMEM; | 7815 | err = -ENOMEM; |
7817 | goto cleanup; | 7816 | goto cleanup; |
7818 | } | 7817 | } |
7819 | gr_ctx_ready = true; | 7818 | gr_ctx_ready = true; |
7820 | } | 7819 | } |
7821 | current_mem = &ch_ctx->gr_ctx->mem; | 7820 | current_mem = &gr_ctx->mem; |
7822 | } else { | 7821 | } else { |
7823 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, | 7822 | err = gr_gk20a_get_pm_ctx_buffer_offsets(g, |
7824 | ctx_ops[i].offset, | 7823 | ctx_ops[i].offset, |
@@ -7835,19 +7834,19 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7835 | } | 7834 | } |
7836 | if (!pm_ctx_ready) { | 7835 | if (!pm_ctx_ready) { |
7837 | /* Make sure ctx buffer was initialized */ | 7836 | /* Make sure ctx buffer was initialized */ |
7838 | if (!nvgpu_mem_is_valid(&ch_ctx->pm_ctx.mem)) { | 7837 | if (!nvgpu_mem_is_valid(&gr_ctx->pm_ctx.mem)) { |
7839 | nvgpu_err(g, | 7838 | nvgpu_err(g, |
7840 | "Invalid ctx buffer"); | 7839 | "Invalid ctx buffer"); |
7841 | err = -EINVAL; | 7840 | err = -EINVAL; |
7842 | goto cleanup; | 7841 | goto cleanup; |
7843 | } | 7842 | } |
7844 | if (nvgpu_mem_begin(g, &ch_ctx->pm_ctx.mem)) { | 7843 | if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) { |
7845 | err = -ENOMEM; | 7844 | err = -ENOMEM; |
7846 | goto cleanup; | 7845 | goto cleanup; |
7847 | } | 7846 | } |
7848 | pm_ctx_ready = true; | 7847 | pm_ctx_ready = true; |
7849 | } | 7848 | } |
7850 | current_mem = &ch_ctx->pm_ctx.mem; | 7849 | current_mem = &gr_ctx->pm_ctx.mem; |
7851 | } | 7850 | } |
7852 | 7851 | ||
7853 | /* if this is a quad access, setup for special access*/ | 7852 | /* if this is a quad access, setup for special access*/ |
@@ -7860,7 +7859,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7860 | /* sanity check gr ctxt offsets, | 7859 | /* sanity check gr ctxt offsets, |
7861 | * don't write outside, worst case | 7860 | * don't write outside, worst case |
7862 | */ | 7861 | */ |
7863 | if ((current_mem == &ch_ctx->gr_ctx->mem) && | 7862 | if ((current_mem == &gr_ctx->mem) && |
7864 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) | 7863 | (offsets[j] >= g->gr.ctx_vars.golden_image_size)) |
7865 | continue; | 7864 | continue; |
7866 | if (pass == 0) { /* write pass */ | 7865 | if (pass == 0) { /* write pass */ |
@@ -7886,7 +7885,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7886 | 7885 | ||
7887 | /* check to see if we need to add a special WAR | 7886 | /* check to see if we need to add a special WAR |
7888 | for some of the SMPC perf regs */ | 7887 | for some of the SMPC perf regs */ |
7889 | gr_gk20a_ctx_patch_smpc(g, ch_ctx, offset_addrs[j], | 7888 | gr_gk20a_ctx_patch_smpc(g, ch, offset_addrs[j], |
7890 | v, current_mem); | 7889 | v, current_mem); |
7891 | 7890 | ||
7892 | } else { /* read pass */ | 7891 | } else { /* read pass */ |
@@ -7915,12 +7914,12 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7915 | if (offsets) | 7914 | if (offsets) |
7916 | nvgpu_kfree(g, offsets); | 7915 | nvgpu_kfree(g, offsets); |
7917 | 7916 | ||
7918 | if (ch_ctx->patch_ctx.mem.cpu_va) | 7917 | if (gr_ctx->patch_ctx.mem.cpu_va) |
7919 | gr_gk20a_ctx_patch_write_end(g, ch_ctx, gr_ctx_ready); | 7918 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready); |
7920 | if (gr_ctx_ready) | 7919 | if (gr_ctx_ready) |
7921 | nvgpu_mem_end(g, &ch_ctx->gr_ctx->mem); | 7920 | nvgpu_mem_end(g, &gr_ctx->mem); |
7922 | if (pm_ctx_ready) | 7921 | if (pm_ctx_ready) |
7923 | nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem); | 7922 | nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem); |
7924 | 7923 | ||
7925 | return err; | 7924 | return err; |
7926 | } | 7925 | } |
@@ -7962,23 +7961,23 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, | |||
7962 | } | 7961 | } |
7963 | 7962 | ||
7964 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 7963 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
7965 | struct channel_ctx_gk20a *ch_ctx, | 7964 | struct nvgpu_gr_ctx *gr_ctx, |
7966 | u64 addr, u32 size, bool patch) | 7965 | u64 addr, u32 size, bool patch) |
7967 | { | 7966 | { |
7968 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_base_r(), | 7967 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_base_r(), |
7969 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); | 7968 | gr_scc_pagepool_base_addr_39_8_f(addr), patch); |
7970 | 7969 | ||
7971 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_scc_pagepool_r(), | 7970 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_pagepool_r(), |
7972 | gr_scc_pagepool_total_pages_f(size) | | 7971 | gr_scc_pagepool_total_pages_f(size) | |
7973 | gr_scc_pagepool_valid_true_f(), patch); | 7972 | gr_scc_pagepool_valid_true_f(), patch); |
7974 | 7973 | ||
7975 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_base_r(), | 7974 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_base_r(), |
7976 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); | 7975 | gr_gpcs_gcc_pagepool_base_addr_39_8_f(addr), patch); |
7977 | 7976 | ||
7978 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_gpcs_gcc_pagepool_r(), | 7977 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_pagepool_r(), |
7979 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); | 7978 | gr_gpcs_gcc_pagepool_total_pages_f(size), patch); |
7980 | 7979 | ||
7981 | gr_gk20a_ctx_patch_write(g, ch_ctx, gr_pd_pagepool_r(), | 7980 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_pd_pagepool_r(), |
7982 | gr_pd_pagepool_total_pages_f(size) | | 7981 | gr_pd_pagepool_total_pages_f(size) | |
7983 | gr_pd_pagepool_valid_true_f(), patch); | 7982 | gr_pd_pagepool_valid_true_f(), patch); |
7984 | } | 7983 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 1c22923b..6cc15c94 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -28,7 +28,6 @@ | |||
28 | #include "gr_t19x.h" | 28 | #include "gr_t19x.h" |
29 | #endif | 29 | #endif |
30 | 30 | ||
31 | #include "tsg_gk20a.h" | ||
32 | #include "gr_ctx_gk20a.h" | 31 | #include "gr_ctx_gk20a.h" |
33 | #include "mm_gk20a.h" | 32 | #include "mm_gk20a.h" |
34 | 33 | ||
@@ -48,6 +47,10 @@ | |||
48 | 47 | ||
49 | #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ | 48 | #define GK20A_TIMEOUT_FPGA 100000 /* 100 sec */ |
50 | 49 | ||
50 | /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ | ||
51 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP (1 << 1) | ||
52 | #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP (1 << 2) | ||
53 | |||
51 | /* | 54 | /* |
52 | * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries | 55 | * allocate a minimum of 1 page (4KB) worth of patch space, this is 512 entries |
53 | * of address and data pairs | 56 | * of address and data pairs |
@@ -64,6 +67,7 @@ | |||
64 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) | 67 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CTA (1 << 1) |
65 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) | 68 | #define NVGPU_PREEMPTION_MODE_COMPUTE_CILP (1 << 2) |
66 | 69 | ||
70 | struct tsg_gk20a; | ||
67 | struct channel_gk20a; | 71 | struct channel_gk20a; |
68 | struct nvgpu_warpstate; | 72 | struct nvgpu_warpstate; |
69 | 73 | ||
@@ -433,7 +437,12 @@ struct gr_gk20a { | |||
433 | 437 | ||
434 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); | 438 | void gk20a_fecs_dump_falcon_stats(struct gk20a *g); |
435 | 439 | ||
436 | struct gr_ctx_desc { | 440 | struct ctx_header_desc { |
441 | struct nvgpu_mem mem; | ||
442 | }; | ||
443 | |||
444 | /* contexts associated with a TSG */ | ||
445 | struct nvgpu_gr_ctx { | ||
437 | struct nvgpu_mem mem; | 446 | struct nvgpu_mem mem; |
438 | 447 | ||
439 | u32 graphics_preempt_mode; | 448 | u32 graphics_preempt_mode; |
@@ -452,10 +461,16 @@ struct gr_ctx_desc { | |||
452 | u64 virt_ctx; | 461 | u64 virt_ctx; |
453 | #endif | 462 | #endif |
454 | bool golden_img_loaded; | 463 | bool golden_img_loaded; |
455 | }; | ||
456 | 464 | ||
457 | struct ctx_header_desc { | 465 | struct patch_desc patch_ctx; |
458 | struct nvgpu_mem mem; | 466 | struct zcull_ctx_desc zcull_ctx; |
467 | struct pm_ctx_desc pm_ctx; | ||
468 | u64 global_ctx_buffer_va[NR_GLOBAL_CTX_BUF_VA]; | ||
469 | u64 global_ctx_buffer_size[NR_GLOBAL_CTX_BUF_VA]; | ||
470 | int global_ctx_buffer_index[NR_GLOBAL_CTX_BUF_VA]; | ||
471 | bool global_ctx_buffer_mapped; | ||
472 | |||
473 | u32 tsgid; | ||
459 | }; | 474 | }; |
460 | 475 | ||
461 | struct gk20a_ctxsw_ucode_segment { | 476 | struct gk20a_ctxsw_ucode_segment { |
@@ -552,7 +567,6 @@ int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); | |||
552 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); | 567 | int gr_gk20a_init_ctx_vars(struct gk20a *g, struct gr_gk20a *gr); |
553 | 568 | ||
554 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); | 569 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags); |
555 | void gk20a_free_channel_ctx(struct channel_gk20a *c, bool is_tsg); | ||
556 | 570 | ||
557 | int gk20a_gr_isr(struct gk20a *g); | 571 | int gk20a_gr_isr(struct gk20a *g); |
558 | int gk20a_gr_nonstall_isr(struct gk20a *g); | 572 | int gk20a_gr_nonstall_isr(struct gk20a *g); |
@@ -633,17 +647,17 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, | |||
633 | struct channel_gk20a *c, | 647 | struct channel_gk20a *c, |
634 | bool enable_hwpm_ctxsw); | 648 | bool enable_hwpm_ctxsw); |
635 | 649 | ||
636 | struct channel_ctx_gk20a; | 650 | struct nvgpu_gr_ctx; |
637 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, | 651 | void gr_gk20a_ctx_patch_write(struct gk20a *g, struct nvgpu_gr_ctx *ch_ctx, |
638 | u32 addr, u32 data, bool patch); | 652 | u32 addr, u32 data, bool patch); |
639 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, | 653 | int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, |
640 | struct channel_ctx_gk20a *ch_ctx, | 654 | struct nvgpu_gr_ctx *ch_ctx, |
641 | bool update_patch_count); | 655 | bool update_patch_count); |
642 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, | 656 | void gr_gk20a_ctx_patch_write_end(struct gk20a *g, |
643 | struct channel_ctx_gk20a *ch_ctx, | 657 | struct nvgpu_gr_ctx *ch_ctx, |
644 | bool update_patch_count); | 658 | bool update_patch_count); |
645 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, | 659 | void gr_gk20a_commit_global_pagepool(struct gk20a *g, |
646 | struct channel_ctx_gk20a *ch_ctx, | 660 | struct nvgpu_gr_ctx *ch_ctx, |
647 | u64 addr, u32 size, bool patch); | 661 | u64 addr, u32 size, bool patch); |
648 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 662 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
649 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 663 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
@@ -694,10 +708,10 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g, | |||
694 | int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, | 708 | int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g, |
695 | struct fecs_method_op_gk20a op); | 709 | struct fecs_method_op_gk20a op); |
696 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, | 710 | int gr_gk20a_alloc_gr_ctx(struct gk20a *g, |
697 | struct gr_ctx_desc **__gr_ctx, struct vm_gk20a *vm, | 711 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, |
698 | u32 class, u32 padding); | 712 | u32 class, u32 padding); |
699 | void gr_gk20a_free_gr_ctx(struct gk20a *g, | 713 | void gr_gk20a_free_gr_ctx(struct gk20a *g, |
700 | struct vm_gk20a *vm, struct gr_ctx_desc *gr_ctx); | 714 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); |
701 | int gr_gk20a_halt_pipe(struct gk20a *g); | 715 | int gr_gk20a_halt_pipe(struct gk20a *g); |
702 | 716 | ||
703 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 717 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index d9ddc011..19d0ecce 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | |||
@@ -280,7 +280,6 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g) | |||
280 | tsg->num_active_channels = 0; | 280 | tsg->num_active_channels = 0; |
281 | nvgpu_ref_init(&tsg->refcount); | 281 | nvgpu_ref_init(&tsg->refcount); |
282 | 282 | ||
283 | tsg->tsg_gr_ctx = NULL; | ||
284 | tsg->vm = NULL; | 283 | tsg->vm = NULL; |
285 | tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | 284 | tsg->interleave_level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; |
286 | tsg->timeslice_us = 0; | 285 | tsg->timeslice_us = 0; |
@@ -319,10 +318,8 @@ void gk20a_tsg_release(struct nvgpu_ref *ref) | |||
319 | if (g->ops.fifo.tsg_release) | 318 | if (g->ops.fifo.tsg_release) |
320 | g->ops.fifo.tsg_release(tsg); | 319 | g->ops.fifo.tsg_release(tsg); |
321 | 320 | ||
322 | if (tsg->tsg_gr_ctx) { | 321 | if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem)) |
323 | gr_gk20a_free_tsg_gr_ctx(tsg); | 322 | gr_gk20a_free_tsg_gr_ctx(tsg); |
324 | tsg->tsg_gr_ctx = NULL; | ||
325 | } | ||
326 | 323 | ||
327 | if (g->ops.fifo.deinit_eng_method_buffers) | 324 | if (g->ops.fifo.deinit_eng_method_buffers) |
328 | g->ops.fifo.deinit_eng_method_buffers(g, tsg); | 325 | g->ops.fifo.deinit_eng_method_buffers(g, tsg); |
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 08fe0365..2168cb4f 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <nvgpu/kref.h> | 26 | #include <nvgpu/kref.h> |
27 | #include <nvgpu/rwsem.h> | 27 | #include <nvgpu/rwsem.h> |
28 | 28 | ||
29 | #include "gr_gk20a.h" | ||
30 | |||
29 | #ifdef CONFIG_TEGRA_19x_GPU | 31 | #ifdef CONFIG_TEGRA_19x_GPU |
30 | #include "tsg_t19x.h" | 32 | #include "tsg_t19x.h" |
31 | #endif | 33 | #endif |
@@ -56,8 +58,6 @@ struct tsg_gk20a { | |||
56 | unsigned int timeslice_timeout; | 58 | unsigned int timeslice_timeout; |
57 | unsigned int timeslice_scale; | 59 | unsigned int timeslice_scale; |
58 | 60 | ||
59 | struct gr_ctx_desc *tsg_gr_ctx; | ||
60 | |||
61 | struct vm_gk20a *vm; | 61 | struct vm_gk20a *vm; |
62 | 62 | ||
63 | u32 interleave_level; | 63 | u32 interleave_level; |
@@ -71,6 +71,8 @@ struct tsg_gk20a { | |||
71 | #ifdef CONFIG_TEGRA_19x_GPU | 71 | #ifdef CONFIG_TEGRA_19x_GPU |
72 | struct tsg_t19x t19x; | 72 | struct tsg_t19x t19x; |
73 | #endif | 73 | #endif |
74 | |||
75 | struct nvgpu_gr_ctx gr_ctx; | ||
74 | }; | 76 | }; |
75 | 77 | ||
76 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); | 78 | int gk20a_enable_tsg(struct tsg_gk20a *tsg); |