diff options
author | seshendra Gadagottu <sgadagottu@nvidia.com> | 2017-01-24 18:38:02 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-10 17:16:06 -0400 |
commit | 2baab4e49e8bb2d5c25759660bc1ae2382d9cb1b (patch) | |
tree | 268411c22d20b9245676a192e85b63f55e005f1c /drivers/gpu | |
parent | 8b981f3c64897128fd2a94c03a819bbca5edd4da (diff) |
gpu: nvgpu: changes related to preemption
Added function pointers to check chip specific valid
gfx class and compute class. Also added function pointer
to update ctx header with preemption buffer pointers.
Bug 200292090
Change-Id: I8119ee082e2abb67186a8ac07088f8db7f410ba1
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1293502
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 18 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 59 |
4 files changed, 93 insertions, 7 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c8d06b4f..96ca69a3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -194,6 +194,8 @@ struct gpu_ops { | |||
194 | void (*set_circular_buffer_size)(struct gk20a *g, u32 data); | 194 | void (*set_circular_buffer_size)(struct gk20a *g, u32 data); |
195 | void (*enable_hww_exceptions)(struct gk20a *g); | 195 | void (*enable_hww_exceptions)(struct gk20a *g); |
196 | bool (*is_valid_class)(struct gk20a *g, u32 class_num); | 196 | bool (*is_valid_class)(struct gk20a *g, u32 class_num); |
197 | bool (*is_valid_gfx_class)(struct gk20a *g, u32 class_num); | ||
198 | bool (*is_valid_compute_class)(struct gk20a *g, u32 class_num); | ||
197 | void (*get_sm_dsm_perf_regs)(struct gk20a *g, | 199 | void (*get_sm_dsm_perf_regs)(struct gk20a *g, |
198 | u32 *num_sm_dsm_perf_regs, | 200 | u32 *num_sm_dsm_perf_regs, |
199 | u32 **sm_dsm_perf_regs, | 201 | u32 **sm_dsm_perf_regs, |
@@ -354,6 +356,8 @@ struct gpu_ops { | |||
354 | struct nvgpu_mem *mem, u64 gpu_va); | 356 | struct nvgpu_mem *mem, u64 gpu_va); |
355 | void (*write_pm_ptr)(struct gk20a *g, | 357 | void (*write_pm_ptr)(struct gk20a *g, |
356 | struct nvgpu_mem *mem, u64 gpu_va); | 358 | struct nvgpu_mem *mem, u64 gpu_va); |
359 | void (*write_preemption_ptr)(struct gk20a *g, | ||
360 | struct nvgpu_mem *mem, u64 gpu_va); | ||
357 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); | 361 | void (*init_elcg_mode)(struct gk20a *g, u32 mode, u32 engine); |
358 | void (*load_tpc_mask)(struct gk20a *g); | 362 | void (*load_tpc_mask)(struct gk20a *g); |
359 | int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); | 363 | int (*inval_icache)(struct gk20a *g, struct channel_gk20a *ch); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3140c285..77a947de 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -3113,6 +3113,22 @@ static bool gr_gk20a_is_valid_class(struct gk20a *g, u32 class_num) | |||
3113 | return valid; | 3113 | return valid; |
3114 | } | 3114 | } |
3115 | 3115 | ||
3116 | static bool gr_gk20a_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
3117 | { | ||
3118 | if (class_num == KEPLER_C) | ||
3119 | return true; | ||
3120 | else | ||
3121 | return false; | ||
3122 | } | ||
3123 | |||
3124 | static bool gr_gk20a_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
3125 | { | ||
3126 | if (class_num == KEPLER_COMPUTE_A) | ||
3127 | return true; | ||
3128 | else | ||
3129 | return false; | ||
3130 | } | ||
3131 | |||
3116 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, | 3132 | int gk20a_alloc_obj_ctx(struct channel_gk20a *c, |
3117 | struct nvgpu_alloc_obj_ctx_args *args) | 3133 | struct nvgpu_alloc_obj_ctx_args *args) |
3118 | { | 3134 | { |
@@ -9462,6 +9478,8 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
9462 | gk20a_gr_set_alpha_circular_buffer_size; | 9478 | gk20a_gr_set_alpha_circular_buffer_size; |
9463 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | 9479 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; |
9464 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | 9480 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; |
9481 | gops->gr.is_valid_gfx_class = gr_gk20a_is_valid_gfx_class; | ||
9482 | gops->gr.is_valid_compute_class = gr_gk20a_is_valid_compute_class; | ||
9465 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | 9483 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; |
9466 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | 9484 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; |
9467 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; | 9485 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 717e5487..99bccd0e 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -474,6 +474,23 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) | |||
474 | return valid; | 474 | return valid; |
475 | } | 475 | } |
476 | 476 | ||
477 | static bool gr_gm20b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
478 | { | ||
479 | if (class_num == MAXWELL_B) | ||
480 | return true; | ||
481 | else | ||
482 | return false; | ||
483 | } | ||
484 | |||
485 | static bool gr_gm20b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
486 | { | ||
487 | if (class_num == MAXWELL_COMPUTE_B) | ||
488 | return true; | ||
489 | else | ||
490 | return false; | ||
491 | } | ||
492 | |||
493 | |||
477 | /* Following are the blocks of registers that the ucode | 494 | /* Following are the blocks of registers that the ucode |
478 | stores in the extended region.*/ | 495 | stores in the extended region.*/ |
479 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ | 496 | /* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ |
@@ -1527,6 +1544,8 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1527 | gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; | 1544 | gops->gr.set_circular_buffer_size = gr_gm20b_set_circular_buffer_size; |
1528 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; | 1545 | gops->gr.enable_hww_exceptions = gr_gk20a_enable_hww_exceptions; |
1529 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; | 1546 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; |
1547 | gops->gr.is_valid_gfx_class = gr_gm20b_is_valid_gfx_class; | ||
1548 | gops->gr.is_valid_compute_class = gr_gm20b_is_valid_compute_class; | ||
1530 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; | 1549 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; |
1531 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | 1550 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; |
1532 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; | 1551 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 7ae6abc2..855c2b14 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
@@ -65,6 +65,23 @@ static bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | |||
65 | return valid; | 65 | return valid; |
66 | } | 66 | } |
67 | 67 | ||
68 | static bool gr_gp10b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
69 | { | ||
70 | if (class_num == PASCAL_A || class_num == MAXWELL_B) | ||
71 | return true; | ||
72 | else | ||
73 | return false; | ||
74 | } | ||
75 | |||
76 | static bool gr_gp10b_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
77 | { | ||
78 | if (class_num == PASCAL_COMPUTE_A || class_num == MAXWELL_COMPUTE_B) | ||
79 | return true; | ||
80 | else | ||
81 | return false; | ||
82 | } | ||
83 | |||
84 | |||
68 | static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, | 85 | static void gr_gp10b_sm_lrf_ecc_overcount_war(int single_err, |
69 | u32 sed_status, | 86 | u32 sed_status, |
70 | u32 ded_status, | 87 | u32 ded_status, |
@@ -869,10 +886,11 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | |||
869 | { | 886 | { |
870 | int err = 0; | 887 | int err = 0; |
871 | 888 | ||
872 | if (class == PASCAL_A && g->gr.t18x.ctx_vars.force_preemption_gfxp) | 889 | if (g->ops.gr.is_valid_gfx_class(g, class) && |
890 | g->gr.t18x.ctx_vars.force_preemption_gfxp) | ||
873 | graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; | 891 | graphics_preempt_mode = NVGPU_GRAPHICS_PREEMPTION_MODE_GFXP; |
874 | 892 | ||
875 | if (class == PASCAL_COMPUTE_A && | 893 | if (g->ops.gr.is_valid_compute_class(g, class) && |
876 | g->gr.t18x.ctx_vars.force_preemption_cilp) | 894 | g->gr.t18x.ctx_vars.force_preemption_cilp) |
877 | compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; | 895 | compute_preempt_mode = NVGPU_COMPUTE_PREEMPTION_MODE_CILP; |
878 | 896 | ||
@@ -959,7 +977,8 @@ static int gr_gp10b_set_ctxsw_preemption_mode(struct gk20a *g, | |||
959 | break; | 977 | break; |
960 | } | 978 | } |
961 | 979 | ||
962 | if (class == PASCAL_COMPUTE_A || class == PASCAL_A) { | 980 | if (g->ops.gr.is_valid_compute_class(g, class) || |
981 | g->ops.gr.is_valid_gfx_class(g, class)) { | ||
963 | switch (compute_preempt_mode) { | 982 | switch (compute_preempt_mode) { |
964 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | 983 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: |
965 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: | 984 | case NVGPU_COMPUTE_PREEMPTION_MODE_CTA: |
@@ -1141,9 +1160,9 @@ static void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g, | |||
1141 | u32 size; | 1160 | u32 size; |
1142 | u32 cbes_reserve; | 1161 | u32 cbes_reserve; |
1143 | 1162 | ||
1144 | nvgpu_mem_wr(g, mem, | 1163 | if (g->ops.gr.write_preemption_ptr) |
1145 | ctxsw_prog_main_image_full_preemption_ptr_o(), | 1164 | g->ops.gr.write_preemption_ptr(g, mem, |
1146 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va >> 8); | 1165 | gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va); |
1147 | 1166 | ||
1148 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 1167 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
1149 | if (err) { | 1168 | if (err) { |
@@ -2110,6 +2129,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2110 | struct tsg_gk20a *tsg; | 2129 | struct tsg_gk20a *tsg; |
2111 | struct vm_gk20a *vm; | 2130 | struct vm_gk20a *vm; |
2112 | struct nvgpu_mem *mem = &gr_ctx->mem; | 2131 | struct nvgpu_mem *mem = &gr_ctx->mem; |
2132 | struct ctx_header_desc *ctx = &ch->ch_ctx.ctx_header; | ||
2133 | struct nvgpu_mem *ctxheader = &ctx->mem; | ||
2113 | u32 class; | 2134 | u32 class; |
2114 | int err = 0; | 2135 | int err = 0; |
2115 | 2136 | ||
@@ -2156,6 +2177,9 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2156 | if (nvgpu_mem_begin(g, mem)) | 2177 | if (nvgpu_mem_begin(g, mem)) |
2157 | return -ENOMEM; | 2178 | return -ENOMEM; |
2158 | 2179 | ||
2180 | if (nvgpu_mem_begin(g, ctxheader)) | ||
2181 | goto unamp_ctx_header; | ||
2182 | |||
2159 | err = gk20a_disable_channel_tsg(g, ch); | 2183 | err = gk20a_disable_channel_tsg(g, ch); |
2160 | if (err) | 2184 | if (err) |
2161 | goto unmap_ctx; | 2185 | goto unmap_ctx; |
@@ -2165,7 +2189,12 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2165 | goto enable_ch; | 2189 | goto enable_ch; |
2166 | 2190 | ||
2167 | if (g->ops.gr.update_ctxsw_preemption_mode) { | 2191 | if (g->ops.gr.update_ctxsw_preemption_mode) { |
2168 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, ch_ctx, mem); | 2192 | if (ctxheader->gpu_va) |
2193 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | ||
2194 | ch_ctx, ctxheader); | ||
2195 | else | ||
2196 | g->ops.gr.update_ctxsw_preemption_mode(ch->g, | ||
2197 | ch_ctx, mem); | ||
2169 | 2198 | ||
2170 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); | 2199 | err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx); |
2171 | if (err) { | 2200 | if (err) { |
@@ -2179,6 +2208,8 @@ static int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch, | |||
2179 | enable_ch: | 2208 | enable_ch: |
2180 | gk20a_enable_channel_tsg(g, ch); | 2209 | gk20a_enable_channel_tsg(g, ch); |
2181 | unmap_ctx: | 2210 | unmap_ctx: |
2211 | nvgpu_mem_end(g, ctxheader); | ||
2212 | unamp_ctx_header: | ||
2182 | nvgpu_mem_end(g, mem); | 2213 | nvgpu_mem_end(g, mem); |
2183 | 2214 | ||
2184 | return err; | 2215 | return err; |
@@ -2262,14 +2293,28 @@ static int gr_gp10b_init_preemption_state(struct gk20a *g) | |||
2262 | return 0; | 2293 | return 0; |
2263 | } | 2294 | } |
2264 | 2295 | ||
2296 | static void gr_gp10b_write_preemption_ptr(struct gk20a *g, | ||
2297 | struct nvgpu_mem *mem, u64 gpu_va) | ||
2298 | { | ||
2299 | u32 va = u64_lo32(gpu_va >> 8); | ||
2300 | |||
2301 | nvgpu_mem_wr(g, mem, | ||
2302 | ctxsw_prog_main_image_full_preemption_ptr_o(), va); | ||
2303 | |||
2304 | } | ||
2305 | |||
2306 | |||
2265 | void gp10b_init_gr(struct gpu_ops *gops) | 2307 | void gp10b_init_gr(struct gpu_ops *gops) |
2266 | { | 2308 | { |
2267 | gm20b_init_gr(gops); | 2309 | gm20b_init_gr(gops); |
2268 | gops->gr.init_fs_state = gr_gp10b_init_fs_state; | 2310 | gops->gr.init_fs_state = gr_gp10b_init_fs_state; |
2269 | gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; | 2311 | gops->gr.init_preemption_state = gr_gp10b_init_preemption_state; |
2270 | gops->gr.is_valid_class = gr_gp10b_is_valid_class; | 2312 | gops->gr.is_valid_class = gr_gp10b_is_valid_class; |
2313 | gops->gr.is_valid_gfx_class = gr_gp10b_is_valid_gfx_class; | ||
2314 | gops->gr.is_valid_compute_class = gr_gp10b_is_valid_compute_class; | ||
2271 | gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; | 2315 | gops->gr.commit_global_cb_manager = gr_gp10b_commit_global_cb_manager; |
2272 | gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; | 2316 | gops->gr.commit_global_pagepool = gr_gp10b_commit_global_pagepool; |
2317 | gops->gr.write_preemption_ptr = gr_gp10b_write_preemption_ptr; | ||
2273 | gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; | 2318 | gops->gr.add_zbc_color = gr_gp10b_add_zbc_color; |
2274 | gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; | 2319 | gops->gr.add_zbc_depth = gr_gp10b_add_zbc_depth; |
2275 | gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; | 2320 | gops->gr.pagepool_default_size = gr_gp10b_pagepool_default_size; |