summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-05-06 18:13:54 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2016-05-16 13:57:48 -0400
commit211edaefb71d06d34c2835a93249da58673bff8a (patch)
tree3bd5eed1cc9020fcc8af4e4ffd9653268d59eb9b /drivers
parent3a1321ddcd33accd6a8a6efee2921ebf088b0f50 (diff)
gpu: nvgpu: Fix CWD floorsweep programming
Program CWD TPC and SM registers correctly. The old code did not work when there are more than 4 TPCs. Refactor init_fs_mask to reduce code duplication. Change-Id: Id93c1f8df24f1b7ee60314c3204e288b91951a88 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1143697 GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c116
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c133
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.h5
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h16
6 files changed, 159 insertions, 116 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 8dfe8eda..5d06a441 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -260,6 +260,10 @@ struct gpu_ops {
260 int (*get_preemption_mode_flags)(struct gk20a *g, 260 int (*get_preemption_mode_flags)(struct gk20a *g,
261 struct nvgpu_preemption_modes_rec *preemption_modes_rec); 261 struct nvgpu_preemption_modes_rec *preemption_modes_rec);
262 int (*fuse_override)(struct gk20a *g); 262 int (*fuse_override)(struct gk20a *g);
263 int (*load_smid_config)(struct gk20a *g);
264 void (*program_sm_id_numbering)(struct gk20a *g,
265 u32 gpc, u32 tpc, u32 smid);
266 void (*program_active_tpc_counts)(struct gk20a *g, u32 gpc);
263 } gr; 267 } gr;
264 const char *name; 268 const char *name;
265 struct { 269 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index c98da273..901fea8c 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1286,54 +1286,82 @@ static u32 gr_gk20a_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
1286 return 0x1; 1286 return 0x1;
1287} 1287}
1288 1288
1289static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g) 1289static void gr_gk20a_program_active_tpc_counts(struct gk20a *g, u32 gpc_index)
1290{
1291 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1292 u32 gpc_offset = gpc_stride * gpc_index;
1293 struct gr_gk20a *gr = &g->gr;
1294
1295 gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset,
1296 gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1297 gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1298 gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1299}
1300
1301static void gr_gk20a_init_sm_id_table(struct gk20a *g)
1302{
1303 u32 gpc, tpc;
1304 u32 sm_id = 0;
1305
1306 for (tpc = 0; tpc < g->gr.max_tpc_per_gpc_count; tpc++) {
1307 for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
1308
1309 if (tpc < g->gr.gpc_tpc_count[gpc]) {
1310 g->gr.sm_to_cluster[sm_id].tpc_index = tpc;
1311 g->gr.sm_to_cluster[sm_id].gpc_index = gpc;
1312 sm_id++;
1313 }
1314 }
1315 }
1316 g->gr.no_of_sm = sm_id;
1317}
1318
1319static void gr_gk20a_program_sm_id_numbering(struct gk20a *g,
1320 u32 gpc, u32 tpc, u32 sm_id)
1321{
1322 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
1323 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1324 u32 gpc_offset = gpc_stride * gpc;
1325 u32 tpc_offset = tpc_in_gpc_stride * tpc;
1326
1327 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1328 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1329 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1330 gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1331 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
1332 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1333 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1334 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1335}
1336
1337int gr_gk20a_init_fs_state(struct gk20a *g)
1290{ 1338{
1291 struct gr_gk20a *gr = &g->gr; 1339 struct gr_gk20a *gr = &g->gr;
1292 u32 tpc_index, gpc_index; 1340 u32 tpc_index, gpc_index;
1293 u32 tpc_offset, gpc_offset;
1294 u32 sm_id = 0, gpc_id = 0; 1341 u32 sm_id = 0, gpc_id = 0;
1295 u32 tpc_per_gpc; 1342 u32 tpc_per_gpc;
1296 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 1343 u32 fuse_tpc_mask;
1297 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
1298 1344
1299 gk20a_dbg_fn(""); 1345 gk20a_dbg_fn("");
1300 1346
1301 for (tpc_index = 0; tpc_index < gr->max_tpc_per_gpc_count; tpc_index++) { 1347 gr_gk20a_init_sm_id_table(g);
1302 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
1303 gpc_offset = gpc_stride * gpc_index;
1304 if (tpc_index < gr->gpc_tpc_count[gpc_index]) {
1305 tpc_offset = tpc_in_gpc_stride * tpc_index;
1306
1307 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
1308 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
1309 gk20a_writel(g, gr_gpc0_tpc0_l1c_cfg_smid_r() + gpc_offset + tpc_offset,
1310 gr_gpc0_tpc0_l1c_cfg_smid_value_f(sm_id));
1311 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index) + gpc_offset,
1312 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
1313 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1314 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1315
1316 g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
1317 g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
1318 1348
1319 sm_id++; 1349 for (sm_id = 0; sm_id < gr->tpc_count; sm_id++) {
1320 } 1350 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1351 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1321 1352
1322 gk20a_writel(g, gr_gpc0_gpm_pd_active_tpcs_r() + gpc_offset, 1353 g->ops.gr.program_sm_id_numbering(g, gpc_index, tpc_index, sm_id);
1323 gr_gpc0_gpm_pd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1324 gk20a_writel(g, gr_gpc0_gpm_sd_active_tpcs_r() + gpc_offset,
1325 gr_gpc0_gpm_sd_active_tpcs_num_f(gr->gpc_tpc_count[gpc_index]));
1326 }
1327 }
1328 1354
1329 gr->no_of_sm = sm_id; 1355 if (g->ops.gr.program_active_tpc_counts)
1356 g->ops.gr.program_active_tpc_counts(g, gpc_index);
1357 }
1330 1358
1331 for (tpc_index = 0, gpc_id = 0; 1359 for (tpc_index = 0, gpc_id = 0;
1332 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); 1360 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1333 tpc_index++, gpc_id += 8) { 1361 tpc_index++, gpc_id += 8) {
1334 1362
1335 if (gpc_id >= gr->gpc_count) 1363 if (gpc_id >= gr->gpc_count)
1336 gpc_id = 0; 1364 continue;
1337 1365
1338 tpc_per_gpc = 1366 tpc_per_gpc =
1339 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) | 1367 gr_pd_num_tpc_per_gpc_count0_f(gr->gpc_tpc_count[gpc_id + 0]) |
@@ -1365,9 +1393,19 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1365 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3])); 1393 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
1366 } 1394 }
1367 1395
1368 gk20a_writel(g, gr_cwd_fs_r(), 1396 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0);
1369 gr_cwd_fs_num_gpcs_f(gr->gpc_count) | 1397 if (g->tpc_fs_mask_user &&
1370 gr_cwd_fs_num_tpcs_f(gr->tpc_count)); 1398 fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) {
1399 u32 val = g->tpc_fs_mask_user;
1400 val &= (0x1 << gr->max_tpc_count) - 1;
1401 gk20a_writel(g, gr_cwd_fs_r(),
1402 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1403 gr_cwd_fs_num_tpcs_f(hweight32(val)));
1404 } else {
1405 gk20a_writel(g, gr_cwd_fs_r(),
1406 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
1407 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
1408 }
1371 1409
1372 gk20a_writel(g, gr_bes_zrop_settings_r(), 1410 gk20a_writel(g, gr_bes_zrop_settings_r(),
1373 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps)); 1411 gr_bes_zrop_settings_num_active_fbps_f(gr->num_fbps));
@@ -4413,7 +4451,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
4413 gr_gk20a_commit_global_timeslice(g, NULL, false); 4451 gr_gk20a_commit_global_timeslice(g, NULL, false);
4414 4452
4415 /* floorsweep anything left */ 4453 /* floorsweep anything left */
4416 g->ops.gr.init_fs_state(g); 4454 err = g->ops.gr.init_fs_state(g);
4455 if (err)
4456 goto out;
4417 4457
4418 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT); 4458 err = gr_gk20a_wait_idle(g, end_jiffies, GR_IDLE_CHECK_DEFAULT);
4419 if (err) 4459 if (err)
@@ -4466,7 +4506,7 @@ restore_fe_go_idle:
4466 4506
4467out: 4507out:
4468 gk20a_dbg_fn("done"); 4508 gk20a_dbg_fn("done");
4469 return 0; 4509 return err;
4470} 4510}
4471 4511
4472static void gr_gk20a_load_gating_prod(struct gk20a *g) 4512static void gr_gk20a_load_gating_prod(struct gk20a *g)
@@ -8633,7 +8673,7 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8633 gops->gr.is_valid_class = gr_gk20a_is_valid_class; 8673 gops->gr.is_valid_class = gr_gk20a_is_valid_class;
8634 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs; 8674 gops->gr.get_sm_dsm_perf_regs = gr_gk20a_get_sm_dsm_perf_regs;
8635 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs; 8675 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gk20a_get_sm_dsm_perf_ctrl_regs;
8636 gops->gr.init_fs_state = gr_gk20a_ctx_state_floorsweep; 8676 gops->gr.init_fs_state = gr_gk20a_init_fs_state;
8637 gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask; 8677 gops->gr.set_hww_esr_report_mask = gr_gk20a_set_hww_esr_report_mask;
8638 gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables; 8678 gops->gr.setup_alpha_beta_tables = gr_gk20a_setup_alpha_beta_tables;
8639 gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments; 8679 gops->gr.falcon_load_ucode = gr_gk20a_load_ctxsw_ucode_segments;
@@ -8681,4 +8721,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8681 gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state; 8721 gops->gr.clear_sm_error_state = gk20a_gr_clear_sm_error_state;
8682 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; 8722 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts;
8683 gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; 8723 gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags;
8724 gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts;
8725 gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering;
8684} 8726}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 15d1ea7d..b5d97727 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -533,6 +533,7 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g,
533 u64 addr, u32 size, bool patch); 533 u64 addr, u32 size, bool patch);
534void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); 534void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data);
535void gr_gk20a_enable_hww_exceptions(struct gk20a *g); 535void gr_gk20a_enable_hww_exceptions(struct gk20a *g);
536int gr_gk20a_init_fs_state(struct gk20a *g);
536int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); 537int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr);
537int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); 538int gr_gk20a_init_ctxsw_ucode(struct gk20a *g);
538int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); 539int gr_gk20a_load_ctxsw_ucode(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 2197bae5..0659eefd 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -552,79 +552,71 @@ static void gr_gm20b_load_tpc_mask(struct gk20a *g)
552 } 552 }
553} 553}
554 554
555int gr_gm20b_ctx_state_floorsweep(struct gk20a *g) 555static void gr_gm20b_program_sm_id_numbering(struct gk20a *g,
556 u32 gpc, u32 tpc, u32 smid)
556{ 557{
557 struct gr_gk20a *gr = &g->gr;
558 u32 tpc_index, gpc_index;
559 u32 tpc_offset, gpc_offset;
560 u32 sm_id = 0;
561 u32 tpc_per_gpc = 0;
562 u32 tpc_sm_id = 0, gpc_tpc_id = 0;
563 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); 558 u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
564 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); 559 u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
565 u32 fuse_tpc_mask; 560 u32 gpc_offset = gpc_stride * gpc;
561 u32 tpc_offset = tpc_in_gpc_stride * tpc;
562
563 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset,
564 gr_gpc0_tpc0_sm_cfg_sm_id_f(smid));
565 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,
566 gr_gpc0_gpm_pd_sm_id_id_f(smid));
567 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
568 gr_gpc0_tpc0_pe_cfg_smid_value_f(smid));
569}
566 570
567 gk20a_dbg_fn(""); 571static int gr_gm20b_load_smid_config(struct gk20a *g)
572{
573 u32 *tpc_sm_id;
574 u32 i, j;
575 u32 tpc_index, gpc_index;
568 576
569 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 577 tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
570 gpc_offset = gpc_stride * gpc_index; 578 if (!tpc_sm_id)
571 for (tpc_index = 0; tpc_index < gr->gpc_tpc_count[gpc_index]; 579 return -ENOMEM;
572 tpc_index++) {
573 tpc_offset = tpc_in_gpc_stride * tpc_index;
574
575 gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r()
576 + gpc_offset + tpc_offset,
577 gr_gpc0_tpc0_sm_cfg_sm_id_f(sm_id));
578 gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc_index)
579 + gpc_offset,
580 gr_gpc0_gpm_pd_sm_id_id_f(sm_id));
581 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r()
582 + gpc_offset + tpc_offset,
583 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
584
585 g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
586 g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
587
588 sm_id++;
589 }
590 }
591 580
592 gr->no_of_sm = sm_id; 581 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
582 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
583 u32 reg = 0;
584 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
585 gr_cwd_gpc_tpc_id_tpc0_s();
593 586
594 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) 587 for (j = 0; j < 4; j++) {
595 tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] 588 u32 sm_id = (i / 4) + j;
596 << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); 589 u32 bits;
597 gk20a_writel(g, gr_pd_num_tpc_per_gpc_r(0), tpc_per_gpc);
598 gk20a_writel(g, gr_ds_num_tpc_per_gpc_r(0), tpc_per_gpc);
599 590
600 /* gr__setup_pd_mapping stubbed for gk20a */ 591 if (sm_id >= g->gr.tpc_count)
601 gr_gk20a_setup_rop_mapping(g, gr); 592 break;
602 593
603 for (gpc_index = 0; 594 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
604 gpc_index < gr_pd_dist_skip_table__size_1_v() * 4; 595 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
605 gpc_index += 4) {
606 596
607 gk20a_writel(g, gr_pd_dist_skip_table_r(gpc_index/4), 597 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
608 gr_pd_dist_skip_table_gpc_4n0_mask_f(gr->gpc_skip_mask[gpc_index]) | 598 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
609 gr_pd_dist_skip_table_gpc_4n1_mask_f(gr->gpc_skip_mask[gpc_index + 1]) | 599 reg |= bits << (j * bit_stride);
610 gr_pd_dist_skip_table_gpc_4n2_mask_f(gr->gpc_skip_mask[gpc_index + 2]) |
611 gr_pd_dist_skip_table_gpc_4n3_mask_f(gr->gpc_skip_mask[gpc_index + 3]));
612 }
613 600
614 fuse_tpc_mask = g->ops.gr.get_gpc_tpc_mask(g, 0); 601 tpc_sm_id[gpc_index] |= sm_id << tpc_index * bit_stride;
615 if (g->tpc_fs_mask_user && 602 }
616 fuse_tpc_mask == (0x1 << gr->max_tpc_count) - 1) { 603 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
617 u32 val = g->tpc_fs_mask_user;
618 val &= (0x1 << gr->max_tpc_count) - 1;
619 gk20a_writel(g, gr_cwd_fs_r(),
620 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
621 gr_cwd_fs_num_tpcs_f(hweight32(val)));
622 } else {
623 gk20a_writel(g, gr_cwd_fs_r(),
624 gr_cwd_fs_num_gpcs_f(gr->gpc_count) |
625 gr_cwd_fs_num_tpcs_f(gr->tpc_count));
626 } 604 }
627 605
606 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
607 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
608
609 kfree(tpc_sm_id);
610
611 return 0;
612}
613
614int gr_gm20b_init_fs_state(struct gk20a *g)
615{
616 gk20a_dbg_fn("");
617
618 gr_gk20a_init_fs_state(g);
619
628 gr_gm20b_load_tpc_mask(g); 620 gr_gm20b_load_tpc_mask(g);
629 621
630 gk20a_writel(g, gr_bes_zrop_settings_r(), 622 gk20a_writel(g, gr_bes_zrop_settings_r(),
@@ -636,22 +628,7 @@ int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
636 gk20a_readl(g, gr_be0_crop_debug3_r()) | 628 gk20a_readl(g, gr_be0_crop_debug3_r()) |
637 gr_bes_crop_debug3_comp_vdc_4to2_disable_m()); 629 gr_bes_crop_debug3_comp_vdc_4to2_disable_m());
638 630
639 for (tpc_index = 0; tpc_index < gr->tpc_count; tpc_index++) { 631 g->ops.gr.load_smid_config(g);
640 if (tpc_index == 0) {
641 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
642 tpc_sm_id |= gr_cwd_sm_id_tpc0_f(tpc_index);
643 } else if (tpc_index == 1) {
644 gpc_tpc_id |= gr_cwd_gpc_tpc_id_tpc1_f(tpc_index);
645 tpc_sm_id |= gr_cwd_sm_id_tpc1_f(tpc_index);
646 }
647 }
648
649 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.
650 * Since we know TPC number is less than 5. We select
651 * index 0 directly. */
652 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(0), gpc_tpc_id);
653
654 gk20a_writel(g, gr_cwd_sm_id_r(0), tpc_sm_id);
655 632
656 return 0; 633 return 0;
657} 634}
@@ -1443,7 +1420,7 @@ void gm20b_init_gr(struct gpu_ops *gops)
1443 gops->gr.is_valid_class = gr_gm20b_is_valid_class; 1420 gops->gr.is_valid_class = gr_gm20b_is_valid_class;
1444 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs; 1421 gops->gr.get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs;
1445 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs; 1422 gops->gr.get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs;
1446 gops->gr.init_fs_state = gr_gm20b_ctx_state_floorsweep; 1423 gops->gr.init_fs_state = gr_gm20b_init_fs_state;
1447 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask; 1424 gops->gr.set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask;
1448 gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments; 1425 gops->gr.falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments;
1449 if (gops->privsecurity) 1426 if (gops->privsecurity)
@@ -1499,4 +1476,6 @@ void gm20b_init_gr(struct gpu_ops *gops)
1499 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts; 1476 gops->gr.suspend_contexts = gr_gk20a_suspend_contexts;
1500 gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags; 1477 gops->gr.get_preemption_mode_flags = gr_gm20b_get_preemption_mode_flags;
1501 gops->gr.fuse_override = gm20b_gr_fuse_override; 1478 gops->gr.fuse_override = gm20b_gr_fuse_override;
1479 gops->gr.load_smid_config = gr_gm20b_load_smid_config;
1480 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
1502} 1481}
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
index 90f933bd..2a585e63 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * GM20B GPC MMU 2 * GM20B GPC MMU
3 * 3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -46,5 +46,6 @@ void gm20b_init_gr(struct gpu_ops *gops);
46void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, 46void gr_gm20b_commit_global_attrib_cb(struct gk20a *g,
47 struct channel_ctx_gk20a *ch_ctx, 47 struct channel_ctx_gk20a *ch_ctx,
48 u64 addr, bool patch); 48 u64 addr, bool patch);
49int gr_gm20b_ctx_state_floorsweep(struct gk20a *g); 49int gr_gm20b_init_fs_state(struct gk20a *g);
50
50#endif 51#endif
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index 73861c07..45240e97 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -1962,10 +1962,22 @@ static inline u32 gr_cwd_gpc_tpc_id_r(u32 i)
1962{ 1962{
1963 return 0x00405b60 + i*4; 1963 return 0x00405b60 + i*4;
1964} 1964}
1965static inline u32 gr_cwd_gpc_tpc_id_tpc0_s(void)
1966{
1967 return 4;
1968}
1965static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v) 1969static inline u32 gr_cwd_gpc_tpc_id_tpc0_f(u32 v)
1966{ 1970{
1967 return (v & 0xf) << 0; 1971 return (v & 0xf) << 0;
1968} 1972}
1973static inline u32 gr_cwd_gpc_tpc_id_gpc0_s(void)
1974{
1975 return 4;
1976}
1977static inline u32 gr_cwd_gpc_tpc_id_gpc0_f(u32 v)
1978{
1979 return (v & 0xf) << 4;
1980}
1969static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v) 1981static inline u32 gr_cwd_gpc_tpc_id_tpc1_f(u32 v)
1970{ 1982{
1971 return (v & 0xf) << 8; 1983 return (v & 0xf) << 8;
@@ -1974,6 +1986,10 @@ static inline u32 gr_cwd_sm_id_r(u32 i)
1974{ 1986{
1975 return 0x00405ba0 + i*4; 1987 return 0x00405ba0 + i*4;
1976} 1988}
1989static inline u32 gr_cwd_sm_id__size_1_v(void)
1990{
1991 return 0x00000006;
1992}
1977static inline u32 gr_cwd_sm_id_tpc0_f(u32 v) 1993static inline u32 gr_cwd_sm_id_tpc0_f(u32 v)
1978{ 1994{
1979 return (v & 0xff) << 0; 1995 return (v & 0xff) << 0;