diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 116 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 133 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | 16 |
6 files changed, 159 insertions, 116 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8dfe8eda..5d06a441 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -260,6 +260,10 @@ struct gpu_ops { | |||
260 | int (*get_preemption_mode_flags)(struct gk20a *g, | 260 | int (*get_preemption_mode_flags)(struct gk20a *g, |
261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); | 261 | struct nvgpu_preemption_modes_rec *preemption_modes_rec); |
262 | int (*fuse_override)(struct gk20a *g); | 262 | int (*fuse_override)(struct gk20a *g); |
263 | int (*load_smid_config)(struct gk20a *g); | ||
264 | void (*program_sm_id_numbering)(struct gk20a *g, | ||
265 | u32 gpc, u32 tpc, u32 smid); | ||
266 | void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc); | ||
263 | } gr; | 267 | } gr; |
264 | const char *name; | 268 | const char *name; |
265 | struct { | 269 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c98da273..901fea8c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -1286,54 +1286,82 @@ static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) | |||
1286 | return 0x1; | 1286 | return 0x1; |
1287 | } | 1287 | } |
1288 | 1288 | ||
1289 | static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | 1289 | static void gr_gk20a_program_active_tpc_counts(struct gk20a *g, u32 gpc_index) |
1290 | { | ||
1291 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1292 | u32 gpc_offset = gpc_stride * gpc_index; | ||
1293 | struct gr_gk20a *gr = &g->gr; | ||
1294 | |||
1295 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | ||
1296 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1297 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1298 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1299 | } | ||
1300 | |||
1301 | static void gr_gk20a_init_sm_id_table(struct gk20a *g) | ||
1302 | { | ||
1303 | u32 gpc, tpc; | ||
1304 | u32 sm_id = 0; | ||
1305 | |||
1306 | for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) { | ||
1307 | for (gpc = 0; gpc < g->gr.gpc_count; gpc++) { | ||
1308 | |||
1309 | if (tpc < g->gr.gpc_tpc_count[gpc]) { | ||
1310 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc; | ||
1311 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc; | ||
1312 | sm_id++; | ||
1313 | } | ||
1314 | } | ||
1315 | } | ||
1316 | g->gr.no_of_sm = sm_id; | ||
1317 | } | ||
1318 | |||
1319 | static void gr_gk20a_program_sm_id_numbering(struct gk20a *g, | ||
1320 | u32 gpc, u32 tpc, u32 sm_id) | ||
1321 | { | ||
1322 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | ||
1323 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1324 | u32 gpc_offset = gpc_stride * gpc; | ||
1325 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
1326 | |||
1327 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1328 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1329 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1330 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1331 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
1332 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1333 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1334 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1335 | } | ||
1336 | |||
1337 | int gr_gk20a_init_fs_state(struct gk20a *g) | ||
1290 | { | 1338 | { |
1291 | struct gr_gk20a *gr = &g->gr; | 1339 | struct gr_gk20a *gr = &g->gr; |
1292 | u32 tpc_index, gpc_index; | 1340 | u32 tpc_index, gpc_index; |
1293 | u32 tpc_offset, gpc_offset; | ||
1294 | u32 sm_id = 0, gpc_id = 0; | 1341 | u32 sm_id = 0, gpc_id = 0; |
1295 | u32 tpc_per_gpc; | 1342 | u32 tpc_per_gpc; |
1296 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 1343 | u32 fuse_tpc_mask; |
1297 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | ||
1298 | 1344 | ||
1299 | gk20a_dbg_fn(""); | 1345 | gk20a_dbg_fn(""); |
1300 | 1346 | ||
1301 | for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { | 1347 | gr_gk20a_init_sm_id_table(g); |
1302 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | ||
1303 | gpc_offset = gpc_stride * gpc_index; | ||
1304 | if (tpc_index < gr->gpc_tpc_count[gpc_index]) { | ||
1305 | tpc_offset = tpc_in_gpc_stride * tpc_index; | ||
1306 | |||
1307 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
1308 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
1309 | gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1310 | gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id)); | ||
1311 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset, | ||
1312 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
1313 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
1314 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
1315 | |||
1316 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; | ||
1317 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; | ||
1318 | 1348 | ||
1319 | sm_id++; | 1349 | for (sm_id = 0; sm_id < gr->tpc_count; sm_id++) { |
1320 | } | 1350 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; |
1351 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
1321 | 1352 | ||
1322 | gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, | 1353 | g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id); |
1323 | gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1324 | gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset, | ||
1325 | gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index])); | ||
1326 | } | ||
1327 | } | ||
1328 | 1354 | ||
1329 | gr->no_of_sm = sm_id; | 1355 | if (g->ops.gr.program_active_tpc_counts) |
1356 | g->ops.gr.program_active_tpc_counts(g, gpc_index); | ||
1357 | } | ||
1330 | 1358 | ||
1331 | for (tpc_index = 0, gpc_id = 0; | 1359 | for (tpc_index = 0, gpc_id = 0; |
1332 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); | 1360 | tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); |
1333 | tpc_index++, gpc_id += 8) { | 1361 | tpc_index++, gpc_id += 8) { |
1334 | 1362 | ||
1335 | if (gpc_id >= gr->gpc_count) | 1363 | if (gpc_id >= gr->gpc_count) |
1336 | gpc_id = 0; | 1364 | continue; |
1337 | 1365 | ||
1338 | tpc_per_gpc = | 1366 | tpc_per_gpc = |
1339 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | | 1367 | gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | |
@@ -1365,9 +1393,19 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) | |||
1365 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | 1393 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); |
1366 | } | 1394 | } |
1367 | 1395 | ||
1368 | gk20a_writel(g, gr_cwd_fs_r(), | 1396 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); |
1369 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | 1397 | if (g->tpc_fs_mask_user && |
1370 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | 1398 | fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { |
1399 | u32 val = g->tpc_fs_mask_user; | ||
1400 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
1401 | gk20a_writel(g, gr_cwd_fs_r(), | ||
1402 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
1403 | gr_cwd_fs_num_tpcs_f(hweight32(val))); | ||
1404 | } else { | ||
1405 | gk20a_writel(g, gr_cwd_fs_r(), | ||
1406 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
1407 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
1408 | } | ||
1371 | 1409 | ||
1372 | gk20a_writel(g, gr_bes_zrop_settings_r(), | 1410 | gk20a_writel(g, gr_bes_zrop_settings_r(), |
1373 | gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); | 1411 | gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); |
@@ -4413,7 +4451,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) | |||
4413 | gr_gk20a_commit_global_timeslice(g, NULL, false); | 4451 | gr_gk20a_commit_global_timeslice(g, NULL, false); |
4414 | 4452 | ||
4415 | /* floorsweep anything left */ | 4453 | /* floorsweep anything left */ |
4416 | g->ops.gr.init_fs_state(g); | 4454 | err = g->ops.gr.init_fs_state(g); |
4455 | if (err) | ||
4456 | goto out; | ||
4417 | 4457 | ||
4418 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); | 4458 | err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); |
4419 | if (err) | 4459 | if (err) |
@@ -4466,7 +4506,7 @@ restore_fe_go_idle: | |||
4466 | 4506 | ||
4467 | out: | 4507 | out: |
4468 | gk20a_dbg_fn("done"); | 4508 | gk20a_dbg_fn("done"); |
4469 | return 0; | 4509 | return err; |
4470 | } | 4510 | } |
4471 | 4511 | ||
4472 | static void gr_gk20a_load_gating_prod(struct gk20a *g) | 4512 | static void gr_gk20a_load_gating_prod(struct gk20a *g) |
@@ -8633,7 +8673,7 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8633 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; | 8673 | gops->gr.is_valid_class = gr_gk20a_is_valid_class; |
8634 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; | 8674 | gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; |
8635 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; | 8675 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; |
8636 | gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; | 8676 | gops->gr.init_fs_state = gr_gk20a_init_fs_state; |
8637 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; | 8677 | gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; |
8638 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; | 8678 | gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; |
8639 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; | 8679 | gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; |
@@ -8681,4 +8721,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8681 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; | 8721 | gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; |
8682 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | 8722 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; |
8683 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; | 8723 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; |
8724 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; | ||
8725 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; | ||
8684 | } | 8726 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 15d1ea7d..b5d97727 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h | |||
@@ -533,6 +533,7 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, | |||
533 | u64 addr, u32 size, bool patch); | 533 | u64 addr, u32 size, bool patch); |
534 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); | 534 | void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); |
535 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); | 535 | void gr_gk20a_enable_hww_exceptions(struct gk20a *g); |
536 | int gr_gk20a_init_fs_state(struct gk20a *g); | ||
536 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); | 537 | int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); |
537 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); | 538 | int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); |
538 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); | 539 | int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 2197bae5..0659eefd 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -552,79 +552,71 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g) | |||
552 | } | 552 | } |
553 | } | 553 | } |
554 | 554 | ||
555 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | 555 | static void gr_gm20b_program_sm_id_numbering(struct gk20a *g, |
556 | u32 gpc, u32 tpc, u32 smid) | ||
556 | { | 557 | { |
557 | struct gr_gk20a *gr = &g->gr; | ||
558 | u32 tpc_index, gpc_index; | ||
559 | u32 tpc_offset, gpc_offset; | ||
560 | u32 sm_id = 0; | ||
561 | u32 tpc_per_gpc = 0; | ||
562 | u32 tpc_sm_id = 0, gpc_tpc_id = 0; | ||
563 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); | 558 | u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); |
564 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); | 559 | u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); |
565 | u32 fuse_tpc_mask; | 560 | u32 gpc_offset = gpc_stride * gpc; |
561 | u32 tpc_offset = tpc_in_gpc_stride * tpc; | ||
562 | |||
563 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, | ||
564 | gr_gpc0_tpc0_sm_cfg_sm_id_f(smid)); | ||
565 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset, | ||
566 | gr_gpc0_gpm_pd_sm_id_id_f(smid)); | ||
567 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, | ||
568 | gr_gpc0_tpc0_pe_cfg_smid_value_f(smid)); | ||
569 | } | ||
566 | 570 | ||
567 | gk20a_dbg_fn(""); | 571 | static int gr_gm20b_load_smid_config(struct gk20a *g) |
572 | { | ||
573 | u32 *tpc_sm_id; | ||
574 | u32 i, j; | ||
575 | u32 tpc_index, gpc_index; | ||
568 | 576 | ||
569 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { | 577 | tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL); |
570 | gpc_offset = gpc_stride * gpc_index; | 578 | if (!tpc_sm_id) |
571 | for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; | 579 | return -ENOMEM; |
572 | tpc_index++) { | ||
573 | tpc_offset = tpc_in_gpc_stride * tpc_index; | ||
574 | |||
575 | gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() | ||
576 | + gpc_offset + tpc_offset, | ||
577 | gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id)); | ||
578 | gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) | ||
579 | + gpc_offset, | ||
580 | gr_gpc0_gpm_pd_sm_id_id_f(sm_id)); | ||
581 | gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() | ||
582 | + gpc_offset + tpc_offset, | ||
583 | gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); | ||
584 | |||
585 | g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index; | ||
586 | g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index; | ||
587 | |||
588 | sm_id++; | ||
589 | } | ||
590 | } | ||
591 | 580 | ||
592 | gr->no_of_sm = sm_id; | 581 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ |
582 | for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) { | ||
583 | u32 reg = 0; | ||
584 | u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() + | ||
585 | gr_cwd_gpc_tpc_id_tpc0_s(); | ||
593 | 586 | ||
594 | for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) | 587 | for (j = 0; j < 4; j++) { |
595 | tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] | 588 | u32 sm_id = (i / 4) + j; |
596 | << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); | 589 | u32 bits; |
597 | gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc); | ||
598 | gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc); | ||
599 | 590 | ||
600 | /* gr__setup_pd_mapping stubbed for gk20a */ | 591 | if (sm_id >= g->gr.tpc_count) |
601 | gr_gk20a_setup_rop_mapping(g, gr); | 592 | break; |
602 | 593 | ||
603 | for (gpc_index = 0; | 594 | gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index; |
604 | gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; | 595 | tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index; |
605 | gpc_index += 4) { | ||
606 | 596 | ||
607 | gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), | 597 | bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) | |
608 | gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) | | 598 | gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); |
609 | gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) | | 599 | reg |= bits << (j * bit_stride); |
610 | gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) | | ||
611 | gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); | ||
612 | } | ||
613 | 600 | ||
614 | fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); | 601 | tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride; |
615 | if (g->tpc_fs_mask_user && | 602 | } |
616 | fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { | 603 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg); |
617 | u32 val = g->tpc_fs_mask_user; | ||
618 | val &= (0x1 << gr->max_tpc_count) - 1; | ||
619 | gk20a_writel(g, gr_cwd_fs_r(), | ||
620 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
621 | gr_cwd_fs_num_tpcs_f(hweight32(val))); | ||
622 | } else { | ||
623 | gk20a_writel(g, gr_cwd_fs_r(), | ||
624 | gr_cwd_fs_num_gpcs_f(gr->gpc_count) | | ||
625 | gr_cwd_fs_num_tpcs_f(gr->tpc_count)); | ||
626 | } | 604 | } |
627 | 605 | ||
606 | for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++) | ||
607 | gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]); | ||
608 | |||
609 | kfree(tpc_sm_id); | ||
610 | |||
611 | return 0; | ||
612 | } | ||
613 | |||
614 | int gr_gm20b_init_fs_state(struct gk20a *g) | ||
615 | { | ||
616 | gk20a_dbg_fn(""); | ||
617 | |||
618 | gr_gk20a_init_fs_state(g); | ||
619 | |||
628 | gr_gm20b_load_tpc_mask(g); | 620 | gr_gm20b_load_tpc_mask(g); |
629 | 621 | ||
630 | gk20a_writel(g, gr_bes_zrop_settings_r(), | 622 | gk20a_writel(g, gr_bes_zrop_settings_r(), |
@@ -636,22 +628,7 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) | |||
636 | gk20a_readl(g, gr_be0_crop_debug3_r()) | | 628 | gk20a_readl(g, gr_be0_crop_debug3_r()) | |
637 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); | 629 | gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); |
638 | 630 | ||
639 | for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { | 631 | g->ops.gr.load_smid_config(g); |
640 | if (tpc_index == 0) { | ||
641 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index); | ||
642 | tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index); | ||
643 | } else if (tpc_index == 1) { | ||
644 | gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index); | ||
645 | tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index); | ||
646 | } | ||
647 | } | ||
648 | |||
649 | /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs. | ||
650 | * Since we know TPC number is less than 5. We select | ||
651 | * index 0 directly. */ | ||
652 | gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id); | ||
653 | |||
654 | gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id); | ||
655 | 632 | ||
656 | return 0; | 633 | return 0; |
657 | } | 634 | } |
@@ -1443,7 +1420,7 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1443 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; | 1420 | gops->gr.is_valid_class = gr_gm20b_is_valid_class; |
1444 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; | 1421 | gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; |
1445 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; | 1422 | gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; |
1446 | gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; | 1423 | gops->gr.init_fs_state = gr_gm20b_init_fs_state; |
1447 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; | 1424 | gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; |
1448 | gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; | 1425 | gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; |
1449 | if (gops->privsecurity) | 1426 | if (gops->privsecurity) |
@@ -1499,4 +1476,6 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1499 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; | 1476 | gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; |
1500 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; | 1477 | gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; |
1501 | gops->gr.fuse_override = gm20b_gr_fuse_override; | 1478 | gops->gr.fuse_override = gm20b_gr_fuse_override; |
1479 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; | ||
1480 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; | ||
1502 | } | 1481 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 90f933bd..2a585e63 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GM20B GPC MMU | 2 | * GM20B GPC MMU |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -46,5 +46,6 @@ void gm20b_init_gr(struct gpu_ops *gops); | |||
46 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, | 46 | void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, |
47 | struct channel_ctx_gk20a *ch_ctx, | 47 | struct channel_ctx_gk20a *ch_ctx, |
48 | u64 addr, bool patch); | 48 | u64 addr, bool patch); |
49 | int gr_gm20b_ctx_state_floorsweep(struct gk20a *g); | 49 | int gr_gm20b_init_fs_state(struct gk20a *g); |
50 | |||
50 | #endif | 51 | #endif |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index 73861c07..45240e97 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h | |||
@@ -1962,10 +1962,22 @@ static inline u32 gr_cwd_gpc_tpc_id_r(u32 i) | |||
1962 | { | 1962 | { |
1963 | return 0x00405b60 + i*4; | 1963 | return 0x00405b60 + i*4; |
1964 | } | 1964 | } |
1965 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_s(void) | ||
1966 | { | ||
1967 | return 4; | ||
1968 | } | ||
1965 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v) | 1969 | static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v) |
1966 | { | 1970 | { |
1967 | return (v & 0xf) << 0; | 1971 | return (v & 0xf) << 0; |
1968 | } | 1972 | } |
1973 | static inline u32 gr_cwd_gpc_tpc_id_gpc0_s(void) | ||
1974 | { | ||
1975 | return 4; | ||
1976 | } | ||
1977 | static inline u32 gr_cwd_gpc_tpc_id_gpc0_f(u32 v) | ||
1978 | { | ||
1979 | return (v & 0xf) << 4; | ||
1980 | } | ||
1969 | static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v) | 1981 | static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v) |
1970 | { | 1982 | { |
1971 | return (v & 0xf) << 8; | 1983 | return (v & 0xf) << 8; |
@@ -1974,6 +1986,10 @@ static inline u32 gr_cwd_sm_id_r(u32 i) | |||
1974 | { | 1986 | { |
1975 | return 0x00405ba0 + i*4; | 1987 | return 0x00405ba0 + i*4; |
1976 | } | 1988 | } |
1989 | static inline u32 gr_cwd_sm_id__size_1_v(void) | ||
1990 | { | ||
1991 | return 0x00000006; | ||
1992 | } | ||
1977 | static inline u32 gr_cwd_sm_id_tpc0_f(u32 v) | 1993 | static inline u32 gr_cwd_sm_id_tpc0_f(u32 v) |
1978 | { | 1994 | { |
1979 | return (v & 0xff) << 0; | 1995 | return (v & 0xff) << 0; |