diff options
author | Deepak Goyal <dgoyal@nvidia.com> | 2018-09-14 02:15:19 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-09-27 01:24:52 -0400 |
commit | 34732a14b22f09d8f9d52f756612178f0313f120 (patch) | |
tree | 94f634efcad3179ddbca82dedaf82dfe8f099030 | |
parent | 991179f29cea8ab8272465789496c2f15bad6240 (diff) |
nvgpu: gpu: Support multiple tpc-pg masks.
- TPC powergating should be done before
calling gk20a_enable_gr_hw.
gk20a_enable_gr_hw() issues a GR engine reset.
Without this fix, enabling 1 TPC from each PES
causes ctxsw timeout error while running GFX Benchmark.
- Adds valid tpc-pg mask for 1/2/3/4 active TPC configs.
TPC Config - TPC-MASK
4 TPC configuration - 0x0
3 TPC configuration - 0x1/0x2/0x4/0x8
2 TPC configuration - 0x5/0x9/0x6/0xa
- We should not write to gr_fe_tpc_pesmask_r()
as part of TPC-PG sequence. This register is for
debug purpose only.
Bug 200442360
Change-Id: I6fbe1ad8fbc836ace8cbaf00ec3d21a12c73e0bd
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1809772
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 30 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/driver_common.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/platform_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/os/linux/sysfs.c | 15 |
7 files changed, 47 insertions, 35 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9958d24f..1cad8dcb 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -259,9 +259,28 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
259 | 259 | ||
260 | g->ops.mc.intr_enable(g); | 260 | g->ops.mc.intr_enable(g); |
261 | 261 | ||
262 | /* | ||
263 | * Overwrite can_tpc_powergate to false if the chip is ES fused and | ||
264 | * already optimized with some TPCs already floorswept | ||
265 | * via fuse. We will not support TPC-PG in those cases. | ||
266 | */ | ||
267 | |||
268 | if (g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0) != 0x0) { | ||
269 | g->can_tpc_powergate = false; | ||
270 | g->tpc_pg_mask = 0x0; | ||
271 | } | ||
272 | |||
273 | nvgpu_mutex_acquire(&g->tpc_pg_lock); | ||
274 | |||
275 | if (g->can_tpc_powergate) { | ||
276 | if (g->ops.gr.powergate_tpc != NULL) | ||
277 | g->ops.gr.powergate_tpc(g); | ||
278 | } | ||
279 | |||
262 | err = gk20a_enable_gr_hw(g); | 280 | err = gk20a_enable_gr_hw(g); |
263 | if (err) { | 281 | if (err) { |
264 | nvgpu_err(g, "failed to enable gr"); | 282 | nvgpu_err(g, "failed to enable gr"); |
283 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
265 | goto done; | 284 | goto done; |
266 | } | 285 | } |
267 | 286 | ||
@@ -271,6 +290,7 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
271 | } | 290 | } |
272 | if (err) { | 291 | if (err) { |
273 | nvgpu_err(g, "failed to init pmu ucode"); | 292 | nvgpu_err(g, "failed to init pmu ucode"); |
293 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
274 | goto done; | 294 | goto done; |
275 | } | 295 | } |
276 | } | 296 | } |
@@ -279,6 +299,7 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
279 | err = gk20a_init_pstate_support(g); | 299 | err = gk20a_init_pstate_support(g); |
280 | if (err) { | 300 | if (err) { |
281 | nvgpu_err(g, "failed to init pstates"); | 301 | nvgpu_err(g, "failed to init pstates"); |
302 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
282 | goto done; | 303 | goto done; |
283 | } | 304 | } |
284 | } | 305 | } |
@@ -296,18 +317,11 @@ int gk20a_finalize_poweron(struct gk20a *g) | |||
296 | err = nvgpu_init_pmu_support(g); | 317 | err = nvgpu_init_pmu_support(g); |
297 | if (err) { | 318 | if (err) { |
298 | nvgpu_err(g, "failed to init gk20a pmu"); | 319 | nvgpu_err(g, "failed to init gk20a pmu"); |
320 | nvgpu_mutex_release(&g->tpc_pg_lock); | ||
299 | goto done; | 321 | goto done; |
300 | } | 322 | } |
301 | } | 323 | } |
302 | 324 | ||
303 | nvgpu_mutex_acquire(&g->tpc_pg_lock); | ||
304 | |||
305 | if (g->can_tpc_powergate) { | ||
306 | if (g->ops.gr.powergate_tpc != NULL) { | ||
307 | g->ops.gr.powergate_tpc(g); | ||
308 | } | ||
309 | } | ||
310 | |||
311 | err = gk20a_init_gr_support(g); | 325 | err = gk20a_init_gr_support(g); |
312 | if (err) { | 326 | if (err) { |
313 | nvgpu_err(g, "failed to init gk20a gr"); | 327 | nvgpu_err(g, "failed to init gk20a gr"); |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 3dedc6b5..288bd583 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
@@ -73,16 +73,6 @@ | |||
73 | */ | 73 | */ |
74 | #define GR_TPCS_INFO_FOR_MAPREGISTER 6 | 74 | #define GR_TPCS_INFO_FOR_MAPREGISTER 6 |
75 | 75 | ||
76 | /* | ||
77 | * There are 4 TPCs in GV11b ranging from TPC0 to TPC3 | ||
78 | * There are two PES in GV11b each controlling two TPCs | ||
79 | * PES0 is linked to TPC0 & TPC2 | ||
80 | * PES1 is linked to TPC1 & TPC3 | ||
81 | */ | ||
82 | #define TPC_MASK_FOR_PESID_0 (u32) 0x5 | ||
83 | #define TPC_MASK_FOR_PESID_1 (u32) 0xa | ||
84 | |||
85 | |||
86 | bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) | 76 | bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) |
87 | { | 77 | { |
88 | bool valid = false; | 78 | bool valid = false; |
@@ -143,17 +133,6 @@ void gr_gv11b_powergate_tpc(struct gk20a *g) | |||
143 | tpc_pg_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0); | 133 | tpc_pg_status = g->ops.fuse.fuse_status_opt_tpc_gpc(g, 0); |
144 | } while (tpc_pg_status != g->tpc_pg_mask); | 134 | } while (tpc_pg_status != g->tpc_pg_mask); |
145 | 135 | ||
146 | gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | | ||
147 | gr_fe_tpc_pesmask_action_write_f() | | ||
148 | gr_fe_tpc_pesmask_pesid_f(0) | | ||
149 | gr_fe_tpc_pesmask_gpcid_f(0) | | ||
150 | ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0)); | ||
151 | gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() | | ||
152 | gr_fe_tpc_pesmask_action_write_f() | | ||
153 | gr_fe_tpc_pesmask_pesid_f(1) | | ||
154 | gr_fe_tpc_pesmask_gpcid_f(0) | | ||
155 | ((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1)); | ||
156 | |||
157 | return; | 136 | return; |
158 | } | 137 | } |
159 | 138 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 2ebe0011..8d7ccfa8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h | |||
@@ -138,6 +138,8 @@ enum gk20a_cbc_op { | |||
138 | 138 | ||
139 | #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) | 139 | #define nvgpu_get_litter_value(g, v) (g)->ops.get_litter_value((g), v) |
140 | 140 | ||
141 | #define MAX_TPC_PG_CONFIGS 3 | ||
142 | |||
141 | enum nvgpu_unit; | 143 | enum nvgpu_unit; |
142 | 144 | ||
143 | enum nvgpu_flush_op; | 145 | enum nvgpu_flush_op; |
@@ -1592,7 +1594,7 @@ struct gk20a { | |||
1592 | u32 tpc_pg_mask; | 1594 | u32 tpc_pg_mask; |
1593 | bool can_tpc_powergate; | 1595 | bool can_tpc_powergate; |
1594 | 1596 | ||
1595 | u32 valid_tpc_mask; | 1597 | u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; |
1596 | 1598 | ||
1597 | struct nvgpu_bios bios; | 1599 | struct nvgpu_bios bios; |
1598 | bool bios_is_init; | 1600 | bool bios_is_init; |
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 9ff32d68..539f0559 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c | |||
@@ -135,6 +135,7 @@ static void nvgpu_init_timeslice(struct gk20a *g) | |||
135 | static void nvgpu_init_pm_vars(struct gk20a *g) | 135 | static void nvgpu_init_pm_vars(struct gk20a *g) |
136 | { | 136 | { |
137 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); | 137 | struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g)); |
138 | u32 i = 0; | ||
138 | 139 | ||
139 | /* | 140 | /* |
140 | * Set up initial power settings. For non-slicon platforms, disable | 141 | * Set up initial power settings. For non-slicon platforms, disable |
@@ -172,7 +173,10 @@ static void nvgpu_init_pm_vars(struct gk20a *g) | |||
172 | g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); | 173 | g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); |
173 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); | 174 | __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); |
174 | g->can_tpc_powergate = platform->can_tpc_powergate; | 175 | g->can_tpc_powergate = platform->can_tpc_powergate; |
175 | g->valid_tpc_mask = platform->valid_tpc_mask; | 176 | |
177 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) | ||
178 | g->valid_tpc_mask[i] = platform->valid_tpc_mask[i]; | ||
179 | |||
176 | g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; | 180 | g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; |
177 | /* if default delay is not set, set default delay to 500msec */ | 181 | /* if default delay is not set, set default delay to 500msec */ |
178 | if (platform->railgate_delay_init) | 182 | if (platform->railgate_delay_init) |
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index 3a0227a1..a19d0a7c 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h | |||
@@ -74,7 +74,7 @@ struct gk20a_platform { | |||
74 | struct reset_control *reset_control; | 74 | struct reset_control *reset_control; |
75 | #endif | 75 | #endif |
76 | /* valid TPC-MASK */ | 76 | /* valid TPC-MASK */ |
77 | u32 valid_tpc_mask; | 77 | u32 valid_tpc_mask[MAX_TPC_PG_CONFIGS]; |
78 | 78 | ||
79 | /* Delay before rail gated */ | 79 | /* Delay before rail gated */ |
80 | int railgate_delay_init; | 80 | int railgate_delay_init; |
diff --git a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c index 41cba0f0..dafa05e5 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c | |||
@@ -233,7 +233,9 @@ struct gk20a_platform gv11b_tegra_platform = { | |||
233 | .can_railgate_init = true, | 233 | .can_railgate_init = true, |
234 | 234 | ||
235 | .can_tpc_powergate = true, | 235 | .can_tpc_powergate = true, |
236 | .valid_tpc_mask = 0xc, | 236 | .valid_tpc_mask[0] = 0x0, |
237 | .valid_tpc_mask[1] = 0x1, | ||
238 | .valid_tpc_mask[2] = 0x5, | ||
237 | 239 | ||
238 | .can_slcg = true, | 240 | .can_slcg = true, |
239 | .can_blcg = true, | 241 | .can_blcg = true, |
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 9e48e45d..1ffb6539 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c | |||
@@ -865,6 +865,18 @@ static ssize_t tpc_pg_mask_read(struct device *dev, | |||
865 | return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); | 865 | return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask); |
866 | } | 866 | } |
867 | 867 | ||
868 | static bool is_tpc_mask_valid(struct gk20a *g, u32 tpc_mask) | ||
869 | { | ||
870 | u32 i; | ||
871 | bool valid = false; | ||
872 | |||
873 | for (i = 0; i < MAX_TPC_PG_CONFIGS; i++) { | ||
874 | if (tpc_mask == g->valid_tpc_mask[i]) | ||
875 | valid = true; | ||
876 | } | ||
877 | return valid; | ||
878 | } | ||
879 | |||
868 | static ssize_t tpc_pg_mask_store(struct device *dev, | 880 | static ssize_t tpc_pg_mask_store(struct device *dev, |
869 | struct device_attribute *attr, const char *buf, size_t count) | 881 | struct device_attribute *attr, const char *buf, size_t count) |
870 | { | 882 | { |
@@ -896,10 +908,9 @@ static ssize_t tpc_pg_mask_store(struct device *dev, | |||
896 | return -ENODEV; | 908 | return -ENODEV; |
897 | } | 909 | } |
898 | 910 | ||
899 | if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) { | 911 | if (is_tpc_mask_valid(g, (u32)val)) { |
900 | g->tpc_pg_mask = val; | 912 | g->tpc_pg_mask = val; |
901 | } else { | 913 | } else { |
902 | |||
903 | nvgpu_err(g, "TPC-PG mask is invalid"); | 914 | nvgpu_err(g, "TPC-PG mask is invalid"); |
904 | nvgpu_mutex_release(&g->tpc_pg_lock); | 915 | nvgpu_mutex_release(&g->tpc_pg_lock); |
905 | return -EINVAL; | 916 | return -EINVAL; |