diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-06 18:13:54 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-05-16 13:57:48 -0400 |
commit | 211edaefb71d06d34c2835a93249da58673bff8a (patch) | |
tree | 3bd5eed1cc9020fcc8af4e4ffd9653268d59eb9b /drivers/gpu/nvgpu/gk20a | |
parent | 3a1321ddcd33accd6a8a6efee2921ebf088b0f50 (diff) |
gpu: nvgpu: Fix CWD floorsweep programming
Program CWD TPC and SM registers correctly. The old code did not work
when there are more than 4 TPCs.
Refactor init_fs_mask to reduce code duplication.
Change-Id: Id93c1f8df24f1b7ee60314c3204e288b91951a88
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/1143697
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 116 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 |
3 files changed, 84 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8dfe8eda..5d06a441 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -260,6 +260,10 @@ struct gpu_ops { | |||
260 | int (*get_preemption_mode_flags)(struct gk20a *g, | 260 | int (*get_preemption_mode_flags)(struct gk20a *g, |
261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
262 | int (*fuse_override)(struct gk20a *g); | 262 | int (*fuse_override)(struct gk20a *g); |
263 | int (*load_smid_config)(struct gk20a *g); | ||
264 | void (*program_sm_id_numbering)(struct gk20a *g, | ||
265 | u32 gpc, u32 tpc, u32 smid); | ||
266 | void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc); | ||
263 | } gr; | 267 | } gr; |
264 | const char *name; | 268 | const char *name; |
265 | struct { | 269 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c98da273..901fea8c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1286,54 +1286,82 @@ static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | |||
1286 | return 0x1; | 1286 | return 0x1; |
1287 | } | 1287 | } |
1288 | 1288 | ||
1289 | static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | 1289 | static void gr_gk20a_program_active_tpc_counts(struct gk20a *g, u32 gpc_index) |
1290 | { | ||
1291 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1292 | u32 gpc_offset = gpc_stride * gpc_index; | ||
1293 | struct gr_gk20a *gr = &g->gr; | ||
1294 | |||
1295 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | ||
1296 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1297 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1298 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1299 | } | ||
1300 | |||
1301 | static void gr_gk20a_init_sm_id_table(struct gk20a *g) | ||
1302 | { | ||
1303 | u32 gpc, tpc; | ||
1304 | u32 sm_id = 0; | ||
1305 | |||
1306 | for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { | ||
1307 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { | ||
1308 | |||
1309 | if (tpc < g->gr.gpc_tpc_count[gpc]) { | ||
1310 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc; | ||
1311 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc; | ||
1312 | sm_id++; | ||
1313 | } | ||
1314 | } | ||
1315 | } | ||
1316 | g->gr.no_of_sm = sm_id; | ||
1317 | } | ||
1318 | |||
1319 | static void gr_gk20a_program_sm_id_numbering(struct gk20a *g, | ||
1320 | u32 gpc, u32 tpc, u32 sm_id) | ||
1321 | { | ||
1322 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1323 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1324 | u32 gpc_offset = gpc_stride * gpc; | ||
1325 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
1326 | |||
1327 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1328 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1329 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1330 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1331 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
1332 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1333 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1334 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1335 | } | ||
1336 | |||
1337 | int gr_gk20a_init_fs_state(struct gk20a *g) | ||
1290 | { | 1338 | { |
1291 | struct gr_gk20a *gr = &g->gr; | 1339 | struct gr_gk20a *gr = &g->gr; |
1292 | u32 tpc_index, gpc_index; | 1340 | u32 tpc_index, gpc_index; |
1293 | u32 tpc_offset, gpc_offset; | ||
1294 | u32 sm_id = 0, gpc_id = 0; | 1341 | u32 sm_id = 0, gpc_id = 0; |
1295 | u32 tpc_per_gpc; | 1342 | u32 tpc_per_gpc; |
1296 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 1343 | u32 fuse_tpc_mask; |
1297 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1298 | 1344 | ||
1299 | gk20a_dbg_fn(""); | 1345 | gk20a_dbg_fn(""); |
1300 | 1346 | ||
1301 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | 1347 | gr_gk20a_init_sm_id_table(g); |
1302 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1303 | gpc_offset = gpc_stride * gpc_index; | ||
1304 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | ||
1305 | tpc_offset = tpc_in_gpc_stride * tpc_index; | ||
1306 | |||
1307 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1308 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1309 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1310 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1311 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, | ||
1312 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1313 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1314 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1315 | |||
1316 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; | ||
1317 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; | ||
1318 | 1348 | ||
1319 | sm_id++; | 1349 | for (sm_id = 0; sm_id < gr->tpc_count; sm_id++) { |
1320 | } | 1350 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; |
1351 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1321 | 1352 | ||
1322 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | 1353 | g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); |
1323 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1324 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1325 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1326 | } | ||
1327 | } | ||
1328 | 1354 | ||
1329 | gr->no_of_sm = sm_id; | 1355 | if (g->ops.gr.program_active_tpc_counts) |
1356 | g->ops.gr.program_active_tpc_counts(g, gpc_index); | ||
1357 | } | ||
1330 | 1358 | ||
1331 | for (tpc_index = 0, gpc_id = 0; | 1359 | for (tpc_index = 0, gpc_id = 0; |
1332 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); | 1360 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); |
1333 | tpc_index++, gpc_id += 8) { | 1361 | tpc_index++, gpc_id += 8) { |
1334 | 1362 | ||
1335 | if (gpc_id >= gr->gpc_count) | 1363 | if (gpc_id >= gr->gpc_count) |
1336 | gpc_id = 0; | 1364 | continue; |
1337 | 1365 | ||
1338 | tpc_per_gpc = | 1366 | tpc_per_gpc = |
1339 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | | 1367 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | |
@@ -1365,9 +1393,19 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | |||
1365 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | 1393 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); |
1366 | } | 1394 | } |
1367 | 1395 | ||
1368 | gk20a_writel(g, gr_cwd_fs_r(), | 1396 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); |
1369 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | 1397 | if (g->tpc_fs_mask_user && |
1370 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | 1398 | fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { |
1399 | u32 val = g->tpc_fs_mask_user; | ||
1400 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
1401 | gk20a_writel(g, gr_cwd_fs_r(), | ||
1402 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
1403 | gr_cwd_fs_num_tpcs_f(hweight32(val))); | ||
1404 | } else { | ||
1405 | gk20a_writel(g, gr_cwd_fs_r(), | ||
1406 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
1407 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
1408 | } | ||
1371 | 1409 | ||
1372 | gk20a_writel(g, gr_bes_zrop_settings_r(), | 1410 | gk20a_writel(g, gr_bes_zrop_settings_r(), |
1373 | gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); | 1411 | gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); |
@@ -4413,7 +4451,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4413 | gr_gk20a_commit_global_timeslice(g, NULL, false); | 4451 | gr_gk20a_commit_global_timeslice(g, NULL, false); |
4414 | 4452 | ||
4415 | /* floorsweep anything left */ | 4453 | /* floorsweep anything left */ |
4416 | g->ops.gr.init_fs_state(g); | 4454 | err = g->ops.gr.init_fs_state(g); |
4455 | if (err) | ||
4456 | goto out; | ||
4417 | 4457 | ||
4418 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | 4458 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); |
4419 | if (err) | 4459 | if (err) |
@@ -4466,7 +4506,7 @@ restore_fe_go_idle: | |||
4466 | 4506 | ||
4467 | out: | 4507 | out: |
4468 | gk20a_dbg_fn("done"); | 4508 | gk20a_dbg_fn("done"); |
4469 | return 0; | 4509 | return err; |
4470 | } | 4510 | } |
4471 | 4511 | ||
4472 | static void gr_gk20a_load_gating_prod(struct gk20a *g) | 4512 | static void gr_gk20a_load_gating_prod(struct gk20a *g) |
@@ -8633,7 +8673,7 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8633 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | 8673 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; |
8634 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | 8674 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; |
8635 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | 8675 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; |
8636 | gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; | 8676 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; |
8637 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | 8677 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; |
8638 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | 8678 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; |
8639 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | 8679 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; |
@@ -8681,4 +8721,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8681 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; | 8721 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; |
8682 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | 8722 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; |
8683 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; | 8723 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; |
8724 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; | ||
8725 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; | ||
8684 | } | 8726 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 15d1ea7d..b5d97727 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -533,6 +533,7 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, | |||
533 | u64 addr, u32 size, bool patch); | 533 | u64 addr, u32 size, bool patch); |
534 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 534 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
535 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 535 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
536 | int gr_gk20a_init_fs_state(struct gk20a *g); | ||
536 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | 537 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); |
537 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | 538 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); |
538 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | 539 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); |