summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c18
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c19
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c59
4 files changed, 93 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index c8d06b4f..96ca69a3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -194,6 +194,8 @@ struct gpu_ops {
194 void (*set_circular_buffer_size)(struct gk20a *g, u32 data); 194 void (*set_circular_buffer_size)(struct gk20a *g, u32 data);
195 void (*enable_hww_exceptions)(struct gk20a *g); 195 void (*enable_hww_exceptions)(struct gk20a *g);
196 bool (*is_valid_class)(struct gk20a *g, u32 class_num); 196 bool (*is_valid_class)(struct gk20a *g, u32 class_num);
197 bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num);
198 bool (*is_valid_compute_class)(struct gk20a *g, u32 class_num);
197 void (*get_sm_dsm_perf_regs)(struct gk20a *g, 199 void (*get_sm_dsm_perf_regs)(struct gk20a *g,
198 u32 *num_sm_dsm_perf_regs, 200 u32 *num_sm_dsm_perf_regs,
199 u32 **sm_dsm_perf_regs, 201 u32 **sm_dsm_perf_regs,
@@ -354,6 +356,8 @@ struct gpu_ops {
354 struct nvgpu_mem *mem, u64 gpu_va); 356 struct nvgpu_mem *mem, u64 gpu_va);
355 void (*write_pm_ptr)(struct gk20a *g, 357 void (*write_pm_ptr)(struct gk20a *g,
356 struct nvgpu_mem *mem, u64 gpu_va); 358 struct nvgpu_mem *mem, u64 gpu_va);
359 void (*write_preemption_ptr)(struct gk20a *g,
360 struct nvgpu_mem *mem, u64 gpu_va);
357 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); 361 void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine);
358 void (*load_tpc_mask)(struct gk20a *g); 362 void (*load_tpc_mask)(struct gk20a *g);
359 int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); 363 int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 3140c285..77a947de 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3113,6 +3113,22 @@ static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num)
3113 return valid; 3113 return valid;
3114} 3114}
3115 3115
3116static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num)
3117{
3118 if (class_num == KEPLER_C)
3119 return true;
3120 else
3121 return false;
3122}
3123
3124static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num)
3125{
3126 if (class_num == KEPLER_COMPUTE_A)
3127 return true;
3128 else
3129 return false;
3130}
3131
3116int gk20a_alloc_obj_ctx(struct channel_gk20a *c, 3132int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
3117 struct nvgpu_alloc_obj_ctx_args *args) 3133 struct nvgpu_alloc_obj_ctx_args *args)
3118{ 3134{
@@ -9462,6 +9478,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
9462 gk20a_gr_set_alpha_circular_buffer_size; 9478 gk20a_gr_set_alpha_circular_buffer_size;
9463 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; 9479 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
9464 gops->gr.is_valid_class = gr_gk20a_is_valid_class; 9480 gops->gr.is_valid_class = gr_gk20a_is_valid_class;
9481 gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class;
9482 gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class;
9465 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; 9483 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
9466 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; 9484 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
9467 gops->gr.init_fs_state = gr_gk20a_init_fs_state; 9485 gops->gr.init_fs_state = gr_gk20a_init_fs_state;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 717e5487..99bccd0e 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -474,6 +474,23 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num)
474 return valid; 474 return valid;
475} 475}
476 476
477static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
478{
479 if (class_num == MAXWELL_B)
480 return true;
481 else
482 return false;
483}
484
485static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num)
486{
487 if (class_num == MAXWELL_COMPUTE_B)
488 return true;
489 else
490 return false;
491}
492
493
477/* Following are the blocks of registers that the ucode 494/* Following are the blocks of registers that the ucode
478 stores in the extended region.*/ 495 stores in the extended region.*/
479/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ 496/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */
@@ -1527,6 +1544,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
1527 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; 1544 gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size;
1528 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; 1545 gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions;
1529 gops->gr.is_valid_class = gr_gm20b_is_valid_class; 1546 gops->gr.is_valid_class = gr_gm20b_is_valid_class;
1547 gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class;
1548 gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class;
1530 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; 1549 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
1531 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; 1550 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
1532 gops->gr.init_fs_state = gr_gm20b_init_fs_state; 1551 gops->gr.init_fs_state = gr_gm20b_init_fs_state;
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 7ae6abc2..855c2b14 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -65,6 +65,23 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
65 return valid; 65 return valid;
66} 66}
67 67
68static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
69{
70 if (class_num == PASCAL_A || class_num == MAXWELL_B)
71 return true;
72 else
73 return false;
74}
75
76static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num)
77{
78 if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B)
79 return true;
80 else
81 return false;
82}
83
84
68static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, 85static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err,
69 u32 sed_status, 86 u32 sed_status,
70 u32 ded_status, 87 u32 ded_status,
@@ -869,10 +886,11 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
869{ 886{
870 int err = 0; 887 int err = 0;
871 888
872 if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) 889 if (g->ops.gr.is_valid_gfx_class(g, class) &&
890 g->gr.t18x.ctx_vars.force_preemption_gfxp)
873 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; 891 graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP;
874 892
875 if (class == PASCAL_COMPUTE_A && 893 if (g->ops.gr.is_valid_compute_class(g, class) &&
876 g->gr.t18x.ctx_vars.force_preemption_cilp) 894 g->gr.t18x.ctx_vars.force_preemption_cilp)
877 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; 895 compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP;
878 896
@@ -959,7 +977,8 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g,
959 break; 977 break;
960 } 978 }
961 979
962 if (class == PASCAL_COMPUTE_A || class == PASCAL_A) { 980 if (g->ops.gr.is_valid_compute_class(g, class) ||
981 g->ops.gr.is_valid_gfx_class(g, class)) {
963 switch (compute_preempt_mode) { 982 switch (compute_preempt_mode) {
964 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: 983 case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
965 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: 984 case NVGPU_COMPUTE_PREEMPTION_MODE_CTA:
@@ -1141,9 +1160,9 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
1141 u32 size; 1160 u32 size;
1142 u32 cbes_reserve; 1161 u32 cbes_reserve;
1143 1162
1144 nvgpu_mem_wr(g, mem, 1163 if (g->ops.gr.write_preemption_ptr)
1145 ctxsw_prog_main_image_full_preemption_ptr_o(), 1164 g->ops.gr.write_preemption_ptr(g, mem,
1146 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); 1165 gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
1147 1166
1148 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 1167 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
1149 if (err) { 1168 if (err) {
@@ -2110,6 +2129,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2110 struct tsg_gk20a *tsg; 2129 struct tsg_gk20a *tsg;
2111 struct vm_gk20a *vm; 2130 struct vm_gk20a *vm;
2112 struct nvgpu_mem *mem = &gr_ctx->mem; 2131 struct nvgpu_mem *mem = &gr_ctx->mem;
2132 struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header;
2133 struct nvgpu_mem *ctxheader = &ctx->mem;
2113 u32 class; 2134 u32 class;
2114 int err = 0; 2135 int err = 0;
2115 2136
@@ -2156,6 +2177,9 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2156 if (nvgpu_mem_begin(g, mem)) 2177 if (nvgpu_mem_begin(g, mem))
2157 return -ENOMEM; 2178 return -ENOMEM;
2158 2179
2180 if (nvgpu_mem_begin(g, ctxheader))
2181 goto unamp_ctx_header;
2182
2159 err = gk20a_disable_channel_tsg(g, ch); 2183 err = gk20a_disable_channel_tsg(g, ch);
2160 if (err) 2184 if (err)
2161 goto unmap_ctx; 2185 goto unmap_ctx;
@@ -2165,7 +2189,12 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2165 goto enable_ch; 2189 goto enable_ch;
2166 2190
2167 if (g->ops.gr.update_ctxsw_preemption_mode) { 2191 if (g->ops.gr.update_ctxsw_preemption_mode) {
2168 g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); 2192 if (ctxheader->gpu_va)
2193 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2194 ch_ctx, ctxheader);
2195 else
2196 g->ops.gr.update_ctxsw_preemption_mode(ch->g,
2197 ch_ctx, mem);
2169 2198
2170 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); 2199 err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
2171 if (err) { 2200 if (err) {
@@ -2179,6 +2208,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
2179enable_ch: 2208enable_ch:
2180 gk20a_enable_channel_tsg(g, ch); 2209 gk20a_enable_channel_tsg(g, ch);
2181unmap_ctx: 2210unmap_ctx:
2211 nvgpu_mem_end(g, ctxheader);
2212unamp_ctx_header:
2182 nvgpu_mem_end(g, mem); 2213 nvgpu_mem_end(g, mem);
2183 2214
2184 return err; 2215 return err;
@@ -2262,14 +2293,28 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g)
2262 return 0; 2293 return 0;
2263} 2294}
2264 2295
2296static void gr_gp10b_write_preemption_ptr(struct gk20a *g,
2297 struct nvgpu_mem *mem, u64 gpu_va)
2298{
2299 u32 va = u64_lo32(gpu_va >> 8);
2300
2301 nvgpu_mem_wr(g, mem,
2302 ctxsw_prog_main_image_full_preemption_ptr_o(), va);
2303
2304}
2305
2306
2265void gp10b_init_gr(struct gpu_ops *gops) 2307void gp10b_init_gr(struct gpu_ops *gops)
2266{ 2308{
2267 gm20b_init_gr(gops); 2309 gm20b_init_gr(gops);
2268 gops->gr.init_fs_state = gr_gp10b_init_fs_state; 2310 gops->gr.init_fs_state = gr_gp10b_init_fs_state;
2269 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; 2311 gops->gr.init_preemption_state = gr_gp10b_init_preemption_state;
2270 gops->gr.is_valid_class = gr_gp10b_is_valid_class; 2312 gops->gr.is_valid_class = gr_gp10b_is_valid_class;
2313 gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class;
2314 gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class;
2271 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; 2315 gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager;
2272 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; 2316 gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool;
2317 gops->gr.write_preemption_ptr = gr_gp10b_write_preemption_ptr;
2273 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; 2318 gops->gr.add_zbc_color = gr_gp10b_add_zbc_color;
2274 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; 2319 gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth;
2275 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; 2320 gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size;