diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 59 |
4 files changed, 93 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c8d06b4f..96ca69a3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -194,6 +194,8 @@ struct gpu_ops { | |||
194 | void (*set_circular_buffer_size)(struct gk20a *g, u32 data); | 194 | void (*set_circular_buffer_size)(struct gk20a *g, u32 data); |
195 | void (*enable_hww_exceptions)(struct gk20a *g); | 195 | void (*enable_hww_exceptions)(struct gk20a *g); |
196 | bool (*is_valid_class)(struct gk20a *g, u32 class_num); | 196 | bool (*is_valid_class)(struct gk20a *g, u32 class_num); |
197 | bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num); | ||
198 | bool (*is_valid_compute_class)(struct gk20a *g, u32 class_num); | ||
197 | void (*get_sm_dsm_perf_regs)(struct gk20a *g, | 199 | void (*get_sm_dsm_perf_regs)(struct gk20a *g, |
198 | u32 *num_sm_dsm_perf_regs, | 200 | u32 *num_sm_dsm_perf_regs, |
199 | u32 **sm_dsm_perf_regs, | 201 | u32 **sm_dsm_perf_regs, |
@@ -354,6 +356,8 @@ struct gpu_ops { | |||
354 | struct nvgpu_mem *mem, u64 gpu_va); | 356 | struct nvgpu_mem *mem, u64 gpu_va); |
355 | void (*write_pm_ptr)(struct gk20a *g, | 357 | void (*write_pm_ptr)(struct gk20a *g, |
356 | struct nvgpu_mem *mem, u64 gpu_va); | 358 | struct nvgpu_mem *mem, u64 gpu_va); |
359 | void (*write_preemption_ptr)(struct gk20a *g, | ||
360 | struct nvgpu_mem *mem, u64 gpu_va); | ||
357 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); | 361 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); |
358 | void (*load_tpc_mask)(struct gk20a *g); | 362 | void (*load_tpc_mask)(struct gk20a *g); |
359 | int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); | 363 | int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3140c285..77a947de 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3113,6 +3113,22 @@ static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) | |||
3113 | return valid; | 3113 | return valid; |
3114 | } | 3114 | } |
3115 | 3115 | ||
3116 | static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
3117 | { | ||
3118 | if (class_num == KEPLER_C) | ||
3119 | return true; | ||
3120 | else | ||
3121 | return false; | ||
3122 | } | ||
3123 | |||
3124 | static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
3125 | { | ||
3126 | if (class_num == KEPLER_COMPUTE_A) | ||
3127 | return true; | ||
3128 | else | ||
3129 | return false; | ||
3130 | } | ||
3131 | |||
3116 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | 3132 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, |
3117 | struct nvgpu_alloc_obj_ctx_args *args) | 3133 | struct nvgpu_alloc_obj_ctx_args *args) |
3118 | { | 3134 | { |
@@ -9462,6 +9478,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
9462 | gk20a_gr_set_alpha_circular_buffer_size; | 9478 | gk20a_gr_set_alpha_circular_buffer_size; |
9463 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | 9479 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; |
9464 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | 9480 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; |
9481 | gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class; | ||
9482 | gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class; | ||
9465 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | 9483 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; |
9466 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | 9484 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; |
9467 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; | 9485 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 717e5487..99bccd0e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -474,6 +474,23 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | |||
474 | return valid; | 474 | return valid; |
475 | } | 475 | } |
476 | 476 | ||
477 | static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
478 | { | ||
479 | if (class_num == MAXWELL_B) | ||
480 | return true; | ||
481 | else | ||
482 | return false; | ||
483 | } | ||
484 | |||
485 | static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
486 | { | ||
487 | if (class_num == MAXWELL_COMPUTE_B) | ||
488 | return true; | ||
489 | else | ||
490 | return false; | ||
491 | } | ||
492 | |||
493 | |||
477 | /* Following are the blocks of registers that the ucode | 494 | /* Following are the blocks of registers that the ucode |
478 | stores in the extended region.*/ | 495 | stores in the extended region.*/ |
479 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | 496 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ |
@@ -1527,6 +1544,8 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1527 | gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; | 1544 | gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; |
1528 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | 1545 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; |
1529 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; | 1546 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; |
1547 | gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class; | ||
1548 | gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class; | ||
1530 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; | 1549 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; |
1531 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | 1550 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; |
1532 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; | 1551 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 7ae6abc2..855c2b14 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -65,6 +65,23 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | |||
65 | return valid; | 65 | return valid; |
66 | } | 66 | } |
67 | 67 | ||
68 | static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
69 | { | ||
70 | if (class_num == PASCAL_A || class_num == MAXWELL_B) | ||
71 | return true; | ||
72 | else | ||
73 | return false; | ||
74 | } | ||
75 | |||
76 | static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
77 | { | ||
78 | if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B) | ||
79 | return true; | ||
80 | else | ||
81 | return false; | ||
82 | } | ||
83 | |||
84 | |||
68 | static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, | 85 | static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, |
69 | u32 sed_status, | 86 | u32 sed_status, |
70 | u32 ded_status, | 87 | u32 ded_status, |
@@ -869,10 +886,11 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | |||
869 | { | 886 | { |
870 | int err = 0; | 887 | int err = 0; |
871 | 888 | ||
872 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) | 889 | if (g->ops.gr.is_valid_gfx_class(g, class) && |
890 | g->gr.t18x.ctx_vars.force_preemption_gfxp) | ||
873 | graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | 891 | graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; |
874 | 892 | ||
875 | if (class == PASCAL_COMPUTE_A && | 893 | if (g->ops.gr.is_valid_compute_class(g, class) && |
876 | g->gr.t18x.ctx_vars.force_preemption_cilp) | 894 | g->gr.t18x.ctx_vars.force_preemption_cilp) |
877 | compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | 895 | compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; |
878 | 896 | ||
@@ -959,7 +977,8 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | |||
959 | break; | 977 | break; |
960 | } | 978 | } |
961 | 979 | ||
962 | if (class == PASCAL_COMPUTE_A || class == PASCAL_A) { | 980 | if (g->ops.gr.is_valid_compute_class(g, class) || |
981 | g->ops.gr.is_valid_gfx_class(g, class)) { | ||
963 | switch (compute_preempt_mode) { | 982 | switch (compute_preempt_mode) { |
964 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | 983 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: |
965 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: | 984 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: |
@@ -1141,9 +1160,9 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1141 | u32 size; | 1160 | u32 size; |
1142 | u32 cbes_reserve; | 1161 | u32 cbes_reserve; |
1143 | 1162 | ||
1144 | nvgpu_mem_wr(g, mem, | 1163 | if (g->ops.gr.write_preemption_ptr) |
1145 | ctxsw_prog_main_image_full_preemption_ptr_o(), | 1164 | g->ops.gr.write_preemption_ptr(g, mem, |
1146 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); | 1165 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); |
1147 | 1166 | ||
1148 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 1167 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
1149 | if (err) { | 1168 | if (err) { |
@@ -2110,6 +2129,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2110 | struct tsg_gk20a *tsg; | 2129 | struct tsg_gk20a *tsg; |
2111 | struct vm_gk20a *vm; | 2130 | struct vm_gk20a *vm; |
2112 | struct nvgpu_mem *mem = &gr_ctx->mem; | 2131 | struct nvgpu_mem *mem = &gr_ctx->mem; |
2132 | struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; | ||
2133 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
2113 | u32 class; | 2134 | u32 class; |
2114 | int err = 0; | 2135 | int err = 0; |
2115 | 2136 | ||
@@ -2156,6 +2177,9 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2156 | if (nvgpu_mem_begin(g, mem)) | 2177 | if (nvgpu_mem_begin(g, mem)) |
2157 | return -ENOMEM; | 2178 | return -ENOMEM; |
2158 | 2179 | ||
2180 | if (nvgpu_mem_begin(g, ctxheader)) | ||
2181 | goto unamp_ctx_header; | ||
2182 | |||
2159 | err = gk20a_disable_channel_tsg(g, ch); | 2183 | err = gk20a_disable_channel_tsg(g, ch); |
2160 | if (err) | 2184 | if (err) |
2161 | goto unmap_ctx; | 2185 | goto unmap_ctx; |
@@ -2165,7 +2189,12 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2165 | goto enable_ch; | 2189 | goto enable_ch; |
2166 | 2190 | ||
2167 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 2191 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
2168 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); | 2192 | if (ctxheader->gpu_va) |
2193 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | ||
2194 | ch_ctx, ctxheader); | ||
2195 | else | ||
2196 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | ||
2197 | ch_ctx, mem); | ||
2169 | 2198 | ||
2170 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 2199 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
2171 | if (err) { | 2200 | if (err) { |
@@ -2179,6 +2208,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2179 | enable_ch: | 2208 | enable_ch: |
2180 | gk20a_enable_channel_tsg(g, ch); | 2209 | gk20a_enable_channel_tsg(g, ch); |
2181 | unmap_ctx: | 2210 | unmap_ctx: |
2211 | nvgpu_mem_end(g, ctxheader); | ||
2212 | unamp_ctx_header: | ||
2182 | nvgpu_mem_end(g, mem); | 2213 | nvgpu_mem_end(g, mem); |
2183 | 2214 | ||
2184 | return err; | 2215 | return err; |
@@ -2262,14 +2293,28 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g) | |||
2262 | return 0; | 2293 | return 0; |
2263 | } | 2294 | } |
2264 | 2295 | ||
2296 | static void gr_gp10b_write_preemption_ptr(struct gk20a *g, | ||
2297 | struct nvgpu_mem *mem, u64 gpu_va) | ||
2298 | { | ||
2299 | u32 va = u64_lo32(gpu_va >> 8); | ||
2300 | |||
2301 | nvgpu_mem_wr(g, mem, | ||
2302 | ctxsw_prog_main_image_full_preemption_ptr_o(), va); | ||
2303 | |||
2304 | } | ||
2305 | |||
2306 | |||
2265 | void gp10b_init_gr(struct gpu_ops *gops) | 2307 | void gp10b_init_gr(struct gpu_ops *gops) |
2266 | { | 2308 | { |
2267 | gm20b_init_gr(gops); | 2309 | gm20b_init_gr(gops); |
2268 | gops->gr.init_fs_state = gr_gp10b_init_fs_state; | 2310 | gops->gr.init_fs_state = gr_gp10b_init_fs_state; |
2269 | gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; | 2311 | gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; |
2270 | gops->gr.is_valid_class = gr_gp10b_is_valid_class; | 2312 | gops->gr.is_valid_class = gr_gp10b_is_valid_class; |
2313 | gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class; | ||
2314 | gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class; | ||
2271 | gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; | 2315 | gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; |
2272 | gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; | 2316 | gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; |
2317 | gops->gr.write_preemption_ptr = gr_gp10b_write_preemption_ptr; | ||
2273 | gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; | 2318 | gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; |
2274 | gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; | 2319 | gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; |
2275 | gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; | 2320 | gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; |