summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/gr_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c104
1 files changed, 60 insertions, 44 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index ceb3cb18..39562ec1 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics 2 * GK20A Graphics
3 * 3 *
4 * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -1028,6 +1028,18 @@ int gr_gk20a_commit_global_timeslice(struct gk20a *g, struct channel_gk20a *c,
1028 return 0; 1028 return 0;
1029} 1029}
1030 1030
1031/*
1032 * Return map tiles count for given index
1033 * Return 0 if index is out-of-bounds
1034 */
1035static u32 gr_gk20a_get_map_tile_count(struct gr_gk20a *gr, u32 index)
1036{
1037 if (index >= gr->map_tile_count)
1038 return 0;
1039
1040 return gr->map_tiles[index];
1041}
1042
1031int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr) 1043int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1032{ 1044{
1033 u32 norm_entries, norm_shift; 1045 u32 norm_entries, norm_shift;
@@ -1043,43 +1055,43 @@ int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr)
1043 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) | 1055 gr_crstr_map_table_cfg_row_offset_f(gr->map_row_offset) |
1044 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count)); 1056 gr_crstr_map_table_cfg_num_entries_f(gr->tpc_count));
1045 1057
1046 map0 = gr_crstr_gpc_map0_tile0_f(gr->map_tiles[0]) | 1058 map0 = gr_crstr_gpc_map0_tile0_f(gr_gk20a_get_map_tile_count(gr, 0)) |
1047 gr_crstr_gpc_map0_tile1_f(gr->map_tiles[1]) | 1059 gr_crstr_gpc_map0_tile1_f(gr_gk20a_get_map_tile_count(gr, 1)) |
1048 gr_crstr_gpc_map0_tile2_f(gr->map_tiles[2]) | 1060 gr_crstr_gpc_map0_tile2_f(gr_gk20a_get_map_tile_count(gr, 2)) |
1049 gr_crstr_gpc_map0_tile3_f(gr->map_tiles[3]) | 1061 gr_crstr_gpc_map0_tile3_f(gr_gk20a_get_map_tile_count(gr, 3)) |
1050 gr_crstr_gpc_map0_tile4_f(gr->map_tiles[4]) | 1062 gr_crstr_gpc_map0_tile4_f(gr_gk20a_get_map_tile_count(gr, 4)) |
1051 gr_crstr_gpc_map0_tile5_f(gr->map_tiles[5]); 1063 gr_crstr_gpc_map0_tile5_f(gr_gk20a_get_map_tile_count(gr, 5));
1052 1064
1053 map1 = gr_crstr_gpc_map1_tile6_f(gr->map_tiles[6]) | 1065 map1 = gr_crstr_gpc_map1_tile6_f(gr_gk20a_get_map_tile_count(gr, 6)) |
1054 gr_crstr_gpc_map1_tile7_f(gr->map_tiles[7]) | 1066 gr_crstr_gpc_map1_tile7_f(gr_gk20a_get_map_tile_count(gr, 7)) |
1055 gr_crstr_gpc_map1_tile8_f(gr->map_tiles[8]) | 1067 gr_crstr_gpc_map1_tile8_f(gr_gk20a_get_map_tile_count(gr, 8)) |
1056 gr_crstr_gpc_map1_tile9_f(gr->map_tiles[9]) | 1068 gr_crstr_gpc_map1_tile9_f(gr_gk20a_get_map_tile_count(gr, 9)) |
1057 gr_crstr_gpc_map1_tile10_f(gr->map_tiles[10]) | 1069 gr_crstr_gpc_map1_tile10_f(gr_gk20a_get_map_tile_count(gr, 10)) |
1058 gr_crstr_gpc_map1_tile11_f(gr->map_tiles[11]); 1070 gr_crstr_gpc_map1_tile11_f(gr_gk20a_get_map_tile_count(gr, 11));
1059 1071
1060 map2 = gr_crstr_gpc_map2_tile12_f(gr->map_tiles[12]) | 1072 map2 = gr_crstr_gpc_map2_tile12_f(gr_gk20a_get_map_tile_count(gr, 12)) |
1061 gr_crstr_gpc_map2_tile13_f(gr->map_tiles[13]) | 1073 gr_crstr_gpc_map2_tile13_f(gr_gk20a_get_map_tile_count(gr, 13)) |
1062 gr_crstr_gpc_map2_tile14_f(gr->map_tiles[14]) | 1074 gr_crstr_gpc_map2_tile14_f(gr_gk20a_get_map_tile_count(gr, 14)) |
1063 gr_crstr_gpc_map2_tile15_f(gr->map_tiles[15]) | 1075 gr_crstr_gpc_map2_tile15_f(gr_gk20a_get_map_tile_count(gr, 15)) |
1064 gr_crstr_gpc_map2_tile16_f(gr->map_tiles[16]) | 1076 gr_crstr_gpc_map2_tile16_f(gr_gk20a_get_map_tile_count(gr, 16)) |
1065 gr_crstr_gpc_map2_tile17_f(gr->map_tiles[17]); 1077 gr_crstr_gpc_map2_tile17_f(gr_gk20a_get_map_tile_count(gr, 17));
1066 1078
1067 map3 = gr_crstr_gpc_map3_tile18_f(gr->map_tiles[18]) | 1079 map3 = gr_crstr_gpc_map3_tile18_f(gr_gk20a_get_map_tile_count(gr, 18)) |
1068 gr_crstr_gpc_map3_tile19_f(gr->map_tiles[19]) | 1080 gr_crstr_gpc_map3_tile19_f(gr_gk20a_get_map_tile_count(gr, 19)) |
1069 gr_crstr_gpc_map3_tile20_f(gr->map_tiles[20]) | 1081 gr_crstr_gpc_map3_tile20_f(gr_gk20a_get_map_tile_count(gr, 20)) |
1070 gr_crstr_gpc_map3_tile21_f(gr->map_tiles[21]) | 1082 gr_crstr_gpc_map3_tile21_f(gr_gk20a_get_map_tile_count(gr, 21)) |
1071 gr_crstr_gpc_map3_tile22_f(gr->map_tiles[22]) | 1083 gr_crstr_gpc_map3_tile22_f(gr_gk20a_get_map_tile_count(gr, 22)) |
1072 gr_crstr_gpc_map3_tile23_f(gr->map_tiles[23]); 1084 gr_crstr_gpc_map3_tile23_f(gr_gk20a_get_map_tile_count(gr, 23));
1073 1085
1074 map4 = gr_crstr_gpc_map4_tile24_f(gr->map_tiles[24]) | 1086 map4 = gr_crstr_gpc_map4_tile24_f(gr_gk20a_get_map_tile_count(gr, 24)) |
1075 gr_crstr_gpc_map4_tile25_f(gr->map_tiles[25]) | 1087 gr_crstr_gpc_map4_tile25_f(gr_gk20a_get_map_tile_count(gr, 25)) |
1076 gr_crstr_gpc_map4_tile26_f(gr->map_tiles[26]) | 1088 gr_crstr_gpc_map4_tile26_f(gr_gk20a_get_map_tile_count(gr, 26)) |
1077 gr_crstr_gpc_map4_tile27_f(gr->map_tiles[27]) | 1089 gr_crstr_gpc_map4_tile27_f(gr_gk20a_get_map_tile_count(gr, 27)) |
1078 gr_crstr_gpc_map4_tile28_f(gr->map_tiles[28]) | 1090 gr_crstr_gpc_map4_tile28_f(gr_gk20a_get_map_tile_count(gr, 28)) |
1079 gr_crstr_gpc_map4_tile29_f(gr->map_tiles[29]); 1091 gr_crstr_gpc_map4_tile29_f(gr_gk20a_get_map_tile_count(gr, 29));
1080 1092
1081 map5 = gr_crstr_gpc_map5_tile30_f(gr->map_tiles[30]) | 1093 map5 = gr_crstr_gpc_map5_tile30_f(gr_gk20a_get_map_tile_count(gr, 30)) |
1082 gr_crstr_gpc_map5_tile31_f(gr->map_tiles[31]) | 1094 gr_crstr_gpc_map5_tile31_f(gr_gk20a_get_map_tile_count(gr, 31)) |
1083 gr_crstr_gpc_map5_tile32_f(0) | 1095 gr_crstr_gpc_map5_tile32_f(0) |
1084 gr_crstr_gpc_map5_tile33_f(0) | 1096 gr_crstr_gpc_map5_tile33_f(0) |
1085 gr_crstr_gpc_map5_tile34_f(0) | 1097 gr_crstr_gpc_map5_tile34_f(0) |
@@ -3658,6 +3670,7 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3658 int ret = 0; 3670 int ret = 0;
3659 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); 3671 int num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
3660 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC); 3672 int num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
3673 int map_tile_count = num_gpcs * num_tpc_per_gpc;
3661 3674
3662 init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); 3675 init_frac = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
3663 init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL); 3676 init_err = kzalloc(num_gpcs * sizeof(s32), GFP_KERNEL);
@@ -3721,7 +3734,8 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3721 delete_map = true; 3734 delete_map = true;
3722 3735
3723 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) { 3736 for (tile_count = 0; tile_count < gr->map_tile_count; tile_count++) {
3724 if ((u32)gr->map_tiles[tile_count] >= gr->tpc_count) 3737 if (gr_gk20a_get_map_tile_count(gr, tile_count)
3738 >= gr->tpc_count)
3725 delete_map = true; 3739 delete_map = true;
3726 } 3740 }
3727 3741
@@ -3733,13 +3747,13 @@ static int gr_gk20a_init_map_tiles(struct gk20a *g, struct gr_gk20a *gr)
3733 } 3747 }
3734 3748
3735 if (gr->map_tiles == NULL) { 3749 if (gr->map_tiles == NULL) {
3736 gr->map_tile_count = num_gpcs; 3750 gr->map_tiles = kzalloc(map_tile_count * sizeof(u8),
3737 3751 GFP_KERNEL);
3738 gr->map_tiles = kzalloc(num_gpcs * sizeof(u8), GFP_KERNEL);
3739 if (gr->map_tiles == NULL) { 3752 if (gr->map_tiles == NULL) {
3740 ret = -ENOMEM; 3753 ret = -ENOMEM;
3741 goto clean_up; 3754 goto clean_up;
3742 } 3755 }
3756 gr->map_tile_count = map_tile_count;
3743 3757
3744 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) { 3758 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) {
3745 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index]; 3759 sorted_num_tpcs[gpc_index] = gr->gpc_tpc_count[gpc_index];
@@ -4508,6 +4522,7 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4508 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, 4522 u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
4509 GPU_LIT_NUM_TPC_PER_GPC); 4523 GPU_LIT_NUM_TPC_PER_GPC);
4510 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc; 4524 u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
4525 u32 map_tile_count;
4511 4526
4512 if (!gr->map_tiles) 4527 if (!gr->map_tiles)
4513 return -1; 4528 return -1;
@@ -4534,9 +4549,10 @@ static int gr_gk20a_zcull_init_hw(struct gk20a *g, struct gr_gk20a *gr)
4534 } 4549 }
4535 4550
4536 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) { 4551 for (map_counter = 0; map_counter < gr->tpc_count; map_counter++) {
4552 map_tile_count = gr_gk20a_get_map_tile_count(gr, map_counter);
4537 zcull_map_tiles[map_counter] = 4553 zcull_map_tiles[map_counter] =
4538 zcull_bank_counters[gr->map_tiles[map_counter]]; 4554 zcull_bank_counters[map_tile_count];
4539 zcull_bank_counters[gr->map_tiles[map_counter]]++; 4555 zcull_bank_counters[map_tile_count]++;
4540 } 4556 }
4541 4557
4542 if (g->ops.gr.program_zcull_mapping) 4558 if (g->ops.gr.program_zcull_mapping)