summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2016-05-06 18:00:17 -0400
committerDeepak Nibade <dnibade@nvidia.com>2016-12-27 04:56:15 -0500
commita6682186de77b90fa41718d4b0012b35aba95ae0 (patch)
treecc0ba093bcb943f790683e9f2616000b758b27e7 /drivers/gpu/nvgpu/gp10b/gr_gp10b.c
parent205559cf31212af1c3c602f4889421748a433416 (diff)
gpu: nvgpu: gp10b: Fix CWD floorsweep programming
Program CWD TPC and SM registers correctly. The old code did not work when there are more than 4 TPCs. Change-Id: I18a14a0f76d97b0962607ec0bbd71aafcd768bca Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1143075 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gp10b/gr_gp10b.c')
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c52
1 files changed, 50 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 07f1014f..ebe11c67 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1381,10 +1381,57 @@ static void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
1381 gr_pd_ab_dist_cfg2_state_limit_f(data), patch); 1381 gr_pd_ab_dist_cfg2_state_limit_f(data), patch);
1382} 1382}
1383 1383
1384static int gr_gp10b_init_fs_state(struct gk20a *g) 1384static int gr_gp10b_load_smid_config(struct gk20a *g)
1385{
1386 u32 *tpc_sm_id;
1387 u32 i, j;
1388 u32 tpc_index, gpc_index;
1389 u32 max_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
1390
1391 tpc_sm_id = kcalloc(gr_cwd_sm_id__size_1_v(), sizeof(u32), GFP_KERNEL);
1392 if (!tpc_sm_id)
1393 return -ENOMEM;
1394
1395 /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/
1396 for (i = 0; i <= ((g->gr.tpc_count-1) / 4); i++) {
1397 u32 reg = 0;
1398 u32 bit_stride = gr_cwd_gpc_tpc_id_gpc0_s() +
1399 gr_cwd_gpc_tpc_id_tpc0_s();
1400
1401 for (j = 0; j < 4; j++) {
1402 u32 sm_id = (i / 4) + j;
1403 u32 bits;
1404
1405 if (sm_id >= g->gr.tpc_count)
1406 break;
1407
1408 gpc_index = g->gr.sm_to_cluster[sm_id].gpc_index;
1409 tpc_index = g->gr.sm_to_cluster[sm_id].tpc_index;
1410
1411 bits = gr_cwd_gpc_tpc_id_gpc0_f(gpc_index) |
1412 gr_cwd_gpc_tpc_id_tpc0_f(tpc_index);
1413 reg |= bits << (j * bit_stride);
1414
1415 tpc_sm_id[gpc_index + max_gpcs * ((tpc_index & 4) >> 2)]
1416 |= sm_id << (bit_stride * (tpc_index & 3));
1417 }
1418 gk20a_writel(g, gr_cwd_gpc_tpc_id_r(i), reg);
1419 }
1420
1421 for (i = 0; i < gr_cwd_sm_id__size_1_v(); i++)
1422 gk20a_writel(g, gr_cwd_sm_id_r(i), tpc_sm_id[i]);
1423
1424 kfree(tpc_sm_id);
1425
1426 return 0;
1427}
1428
1429int gr_gp10b_init_fs_state(struct gk20a *g)
1385{ 1430{
1386 u32 data; 1431 u32 data;
1387 1432
1433 gk20a_dbg_fn("");
1434
1388 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r()); 1435 data = gk20a_readl(g, gr_gpcs_tpcs_sm_texio_control_r());
1389 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(), 1436 data = set_field(data, gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_m(),
1390 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f()); 1437 gr_gpcs_tpcs_sm_texio_control_oor_addr_check_mode_arm_63_48_match_f());
@@ -1401,7 +1448,7 @@ static int gr_gp10b_init_fs_state(struct gk20a *g)
1401 g->gr.t18x.fecs_feature_override_ecc_val); 1448 g->gr.t18x.fecs_feature_override_ecc_val);
1402 } 1449 }
1403 1450
1404 return gr_gm20b_ctx_state_floorsweep(g); 1451 return gr_gm20b_init_fs_state(g);
1405} 1452}
1406 1453
1407static void gr_gp10b_init_cyclestats(struct gk20a *g) 1454static void gr_gp10b_init_cyclestats(struct gk20a *g)
@@ -2076,4 +2123,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
2076 gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode; 2123 gops->gr.set_preemption_mode = gr_gp10b_set_preemption_mode;
2077 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags; 2124 gops->gr.get_preemption_mode_flags = gr_gp10b_get_preemption_mode_flags;
2078 gops->gr.fuse_override = gp10b_gr_fuse_override; 2125 gops->gr.fuse_override = gp10b_gr_fuse_override;
2126 gops->gr.load_smid_config = gr_gp10b_load_smid_config;
2079} 2127}