summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorseshendra Gadagottu <sgadagottu@nvidia.com>2017-01-24 18:38:02 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2017-05-10 17:16:06 -0400
commit2baab4e49e8bb2d5c25759660bc1ae2382d9cb1b (patch)
tree268411c22d20b9245676a192e85b63f55e005f1c
parent8b981f3c64897128fd2a94c03a819bbca5edd4da (diff)
gpu: nvgpu: changes related to preemption
Added function pointers to check chip specific valid gfx class and compute class. Also added function pointer to update ctx header with preemption buffer pointers. Bug 200292090 Change-Id: I8119ee082e2abb67186a8ac07088f8db7f410ba1 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: http://git-master/r/1293502 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c19
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c59
4 files changed, 93 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c8d06b4f..96ca69a3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -194,6 +194,8 @@ struct gpu_ops {
194 void (*set_circular_buffer_size)(struct gk20a *g, u32 data); 194 void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
195 void (*enable_hww_exceptions)(struct gk20a *g); 195 void (*enable_hww_exceptions)(struct gk20a *g);
196 bool (*is_valid_class)(struct gk20a *g, u32 class_num); 196 bool (*is_valid_class)(struct gk20a *g, u32 class_num);
197 bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num);
198 bool (*is_valid_compute_class)(struct gk20a *g, u32 class_num);
197 void (*get_sm_dsm_perf_regs)(struct gk20a *g, 199 void (*get_sm_dsm_perf_regs)(struct gk20a *g,
198 u32 *num_sm_dsm_perf_regs, 200 u32 *num_sm_dsm_perf_regs,
199 u32 **sm_dsm_perf_regs, 201 u32 **sm_dsm_perf_regs,
@@ -354,6 +356,8 @@ struct gpu_ops {
354 struct nvgpu_mem *mem, u64 gpu_va); 356 struct nvgpu_mem *mem, u64 gpu_va);
355 void (*write_pm_ptr)(struct gk20a *g, 357 void (*write_pm_ptr)(struct gk20a *g,
356 struct nvgpu_mem *mem, u64 gpu_va); 358 struct nvgpu_mem *mem, u64 gpu_va);
359 void (*write_preemption_ptr)(struct gk20a *g,
360 struct nvgpu_mem *mem, u64 gpu_va);
357 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); 361 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine);
358 void (*load_tpc_mask)(struct gk20a *g); 362 void (*load_tpc_mask)(struct gk20a *g);
359 int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); 363 int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3140c285..77a947de 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3113,6 +3113,22 @@ static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
3113 return valid; 3113 return valid;
3114} 3114}
3115 3115
3116static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num)
3117{
3118 if (class_num == KEPLER_C)
3119 return true;
3120 else
3121 return false;
3122}
3123
3124static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num)
3125{
3126 if (class_num == KEPLER_COMPUTE_A)
3127 return true;
3128 else
3129 return false;
3130}
3131
3116int gk20a_alloc_obj_ctx(struct channel_gk20a *c, 3132int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3117 struct nvgpu_alloc_obj_ctx_args *args) 3133 struct nvgpu_alloc_obj_ctx_args *args)
3118{ 3134{
@@ -9462,6 +9478,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9462 gk20a_gr_set_alpha_circular_buffer_size; 9478 gk20a_gr_set_alpha_circular_buffer_size;
9463 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; 9479 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
9464 gops->gr.is_valid_class = gr_gk20a_is_valid_class; 9480 gops->gr.is_valid_class = gr_gk20a_is_valid_class;
9481 gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class;
9482 gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class;
9465 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; 9483 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
9466 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; 9484 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
9467 gops->gr.init_fs_state = gr_gk20a_init_fs_state; 9485 gops->gr.init_fs_state = gr_gk20a_init_fs_state;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 717e5487..99bccd0e 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -474,6 +474,23 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
474 return valid; 474 return valid;
475} 475}
476 476
477static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
478{
479 if (class_num == MAXWELL_B)
480 return true;
481 else
482 return false;
483}
484
485static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
486{
487 if (class_num == MAXWELL_COMPUTE_B)
488 return true;
489 else
490 return false;
491}
492
493
477/* Following are the blocks of registers that the ucode 494/* Following are the blocks of registers that the ucode
478 stores in the extended region.*/ 495 stores in the extended region.*/
479/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ 496/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
@@ -1527,6 +1544,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
1527 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; 1544 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
1528 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; 1545 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
1529 gops->gr.is_valid_class = gr_gm20b_is_valid_class; 1546 gops->gr.is_valid_class = gr_gm20b_is_valid_class;
1547 gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class;
1548 gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class;
1530 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; 1549 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
1531 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; 1550 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
1532 gops->gr.init_fs_state = gr_gm20b_init_fs_state; 1551 gops->gr.init_fs_state = gr_gm20b_init_fs_state;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 7ae6abc2..855c2b14 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -65,6 +65,23 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
65 return valid; 65 return valid;
66} 66}
67 67
68static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
69{
70 if (class_num == PASCAL_A || class_num == MAXWELL_B)
71 return true;
72 else
73 return false;
74}
75
76static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
77{
78 if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B)
79 return true;
80 else
81 return false;
82}
83
84
68static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, 85static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
69 u32 sed_status, 86 u32 sed_status,
70 u32 ded_status, 87 u32 ded_status,
@@ -869,10 +886,11 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
869{ 886{
870 int err = 0; 887 int err = 0;
871 888
872 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) 889 if (g->ops.gr.is_valid_gfx_class(g, class) &&
890 g->gr.t18x.ctx_vars.force_preemption_gfxp)
873 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; 891 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
874 892
875 if (class == PASCAL_COMPUTE_A && 893 if (g->ops.gr.is_valid_compute_class(g, class) &&
876 g->gr.t18x.ctx_vars.force_preemption_cilp) 894 g->gr.t18x.ctx_vars.force_preemption_cilp)
877 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; 895 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
878 896
@@ -959,7 +977,8 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
959 break; 977 break;
960 } 978 }
961 979
962 if (class == PASCAL_COMPUTE_A || class == PASCAL_A) { 980 if (g->ops.gr.is_valid_compute_class(g, class) ||
981 g->ops.gr.is_valid_gfx_class(g, class)) {
963 switch (compute_preempt_mode) { 982 switch (compute_preempt_mode) {
964 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: 983 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
965 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: 984 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
@@ -1141,9 +1160,9 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1141 u32 size; 1160 u32 size;
1142 u32 cbes_reserve; 1161 u32 cbes_reserve;
1143 1162
1144 nvgpu_mem_wr(g, mem, 1163 if (g->ops.gr.write_preemption_ptr)
1145 ctxsw_prog_main_image_full_preemption_ptr_o(), 1164 g->ops.gr.write_preemption_ptr(g, mem,
1146 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); 1165 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
1147 1166
1148 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 1167 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1149 if (err) { 1168 if (err) {
@@ -2110,6 +2129,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2110 struct tsg_gk20a *tsg; 2129 struct tsg_gk20a *tsg;
2111 struct vm_gk20a *vm; 2130 struct vm_gk20a *vm;
2112 struct nvgpu_mem *mem = &gr_ctx->mem; 2131 struct nvgpu_mem *mem = &gr_ctx->mem;
2132 struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header;
2133 struct nvgpu_mem *ctxheader = &ctx->mem;
2113 u32 class; 2134 u32 class;
2114 int err = 0; 2135 int err = 0;
2115 2136
@@ -2156,6 +2177,9 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2156 if (nvgpu_mem_begin(g, mem)) 2177 if (nvgpu_mem_begin(g, mem))
2157 return -ENOMEM; 2178 return -ENOMEM;
2158 2179
2180 if (nvgpu_mem_begin(g, ctxheader))
2181 goto unamp_ctx_header;
2182
2159 err = gk20a_disable_channel_tsg(g, ch); 2183 err = gk20a_disable_channel_tsg(g, ch);
2160 if (err) 2184 if (err)
2161 goto unmap_ctx; 2185 goto unmap_ctx;
@@ -2165,7 +2189,12 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2165 goto enable_ch; 2189 goto enable_ch;
2166 2190
2167 if (g->ops.gr.update_ctxsw_preemption_mode) { 2191 if (g->ops.gr.update_ctxsw_preemption_mode) {
2168 g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); 2192 if (ctxheader->gpu_va)
2193 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2194 ch_ctx, ctxheader);
2195 else
2196 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2197 ch_ctx, mem);
2169 2198
2170 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 2199 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
2171 if (err) { 2200 if (err) {
@@ -2179,6 +2208,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2179enable_ch: 2208enable_ch:
2180 gk20a_enable_channel_tsg(g, ch); 2209 gk20a_enable_channel_tsg(g, ch);
2181unmap_ctx: 2210unmap_ctx:
2211 nvgpu_mem_end(g, ctxheader);
2212unamp_ctx_header:
2182 nvgpu_mem_end(g, mem); 2213 nvgpu_mem_end(g, mem);
2183 2214
2184 return err; 2215 return err;
@@ -2262,14 +2293,28 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g)
2262 return 0; 2293 return 0;
2263} 2294}
2264 2295
2296static void gr_gp10b_write_preemption_ptr(struct gk20a *g,
2297 struct nvgpu_mem *mem, u64 gpu_va)
2298{
2299 u32 va = u64_lo32(gpu_va >> 8);
2300
2301 nvgpu_mem_wr(g, mem,
2302 ctxsw_prog_main_image_full_preemption_ptr_o(), va);
2303
2304}
2305
2306
2265void gp10b_init_gr(struct gpu_ops *gops) 2307void gp10b_init_gr(struct gpu_ops *gops)
2266{ 2308{
2267 gm20b_init_gr(gops); 2309 gm20b_init_gr(gops);
2268 gops->gr.init_fs_state = gr_gp10b_init_fs_state; 2310 gops->gr.init_fs_state = gr_gp10b_init_fs_state;
2269 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; 2311 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
2270 gops->gr.is_valid_class = gr_gp10b_is_valid_class; 2312 gops->gr.is_valid_class = gr_gp10b_is_valid_class;
2313 gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class;
2314 gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class;
2271 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; 2315 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
2272 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; 2316 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
2317 gops->gr.write_preemption_ptr = gr_gp10b_write_preemption_ptr;
2273 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; 2318 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
2274 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; 2319 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
2275 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; 2320 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;