summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahantesh Kumbar <mkumbar@nvidia.com>2017-10-20 07:00:42 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-11-20 03:34:15 -0500
commit1ab4754c052b639427f38202860d064c2fa03b57 (patch)
tree0d7740707f0a1c602d1838f256aed07778f6fba4
parent9d04e970937657d11620d812c29a5d10828440fc (diff)
gpu: nvgpu: Kill pg init thread if pmu boot fails
- Created nvgpu_kill_task_pg_init() method to set pmu state to PMU_STATE_EXIT & make thread stop, and poll to confirm thread stopped. - Check for PMU/SEC2 ACR secure boot completion status & initiate pg init thread kill if ACR boot exits with error, which fails to validate & boot LS-PMU. - Set pmu state to PMU_STATE_OFF after thread kill during ACR boot failure. Issue: pg init task blocks if PMU boot fails & cause kernel to show message "task nvgpu_pg_init_g:2120 blocked for more than 120 seconds" Bug 200346134 Change-Id: I5270426080dcd628ccca4df798005294c19767a0 Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1582593 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/common/pmu/pmu.c45
-rw-r--r--drivers/gpu/nvgpu/gm20b/acr_gm20b.c11
-rw-r--r--drivers/gpu/nvgpu/gp106/sec2_gp106.c10
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/pmu.h1
4 files changed, 44 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c
index 3447f40d..d595097b 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -155,6 +155,31 @@ static int nvgpu_init_task_pg_init(struct gk20a *g)
155 return err; 155 return err;
156} 156}
157 157
158void nvgpu_kill_task_pg_init(struct gk20a *g)
159{
160 struct nvgpu_pmu *pmu = &g->pmu;
161 struct nvgpu_timeout timeout;
162
163 /* make sure the pending operations are finished before we continue */
164 if (nvgpu_thread_is_running(&pmu->pg_init.state_task)) {
165
166 /* post PMU_STATE_EXIT to exit PMU state machine loop */
167 nvgpu_pmu_state_change(g, PMU_STATE_EXIT, true);
168
169 /* Make thread stop*/
170 nvgpu_thread_stop(&pmu->pg_init.state_task);
171
172 /* wait to confirm thread stopped */
173 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
174 do {
175 if (!nvgpu_thread_is_running(&pmu->pg_init.state_task))
176 break;
177 nvgpu_udelay(2);
178 } while (!nvgpu_timeout_expired_msg(&timeout,
179 "timeout - waiting PMU state machine thread stop"));
180 }
181}
182
158static int nvgpu_init_pmu_setup_sw(struct gk20a *g) 183static int nvgpu_init_pmu_setup_sw(struct gk20a *g)
159{ 184{
160 struct nvgpu_pmu *pmu = &g->pmu; 185 struct nvgpu_pmu *pmu = &g->pmu;
@@ -469,7 +494,6 @@ int nvgpu_pmu_destroy(struct gk20a *g)
469{ 494{
470 struct nvgpu_pmu *pmu = &g->pmu; 495 struct nvgpu_pmu *pmu = &g->pmu;
471 struct pmu_pg_stats_data pg_stat_data = { 0 }; 496 struct pmu_pg_stats_data pg_stat_data = { 0 };
472 struct nvgpu_timeout timeout;
473 int i; 497 int i;
474 498
475 nvgpu_log_fn(g, " "); 499 nvgpu_log_fn(g, " ");
@@ -477,24 +501,7 @@ int nvgpu_pmu_destroy(struct gk20a *g)
477 if (!g->support_pmu) 501 if (!g->support_pmu)
478 return 0; 502 return 0;
479 503
480 /* make sure the pending operations are finished before we continue */ 504 nvgpu_kill_task_pg_init(g);
481 if (nvgpu_thread_is_running(&pmu->pg_init.state_task)) {
482
483 /* post PMU_STATE_EXIT to exit PMU state machine loop */
484 nvgpu_pmu_state_change(g, PMU_STATE_EXIT, true);
485
486 /* Make thread stop*/
487 nvgpu_thread_stop(&pmu->pg_init.state_task);
488
489 /* wait to confirm thread stopped */
490 nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
491 do {
492 if (!nvgpu_thread_is_running(&pmu->pg_init.state_task))
493 break;
494 nvgpu_udelay(2);
495 } while (!nvgpu_timeout_expired_msg(&timeout,
496 "timeout - waiting PMU state machine thread stop"));
497 }
498 505
499 nvgpu_pmu_get_pg_stats(g, 506 nvgpu_pmu_get_pg_stats(g,
500 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data); 507 PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index a39cdf2c..e5fd8692 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -1407,12 +1407,12 @@ int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms)
1407{ 1407{
1408 struct nvgpu_pmu *pmu = &g->pmu; 1408 struct nvgpu_pmu *pmu = &g->pmu;
1409 u32 data = 0; 1409 u32 data = 0;
1410 int ret = -EBUSY; 1410 int ret = 0;
1411 1411
1412 ret = nvgpu_flcn_wait_for_halt(pmu->flcn, timeout_ms); 1412 ret = nvgpu_flcn_wait_for_halt(pmu->flcn, timeout_ms);
1413 if (ret) { 1413 if (ret) {
1414 nvgpu_err(g, "ACR boot timed out"); 1414 nvgpu_err(g, "ACR boot timed out");
1415 return ret; 1415 goto exit;
1416 } 1416 }
1417 1417
1418 g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r()); 1418 g->acr.capabilities = gk20a_readl(g, pwr_falcon_mailbox1_r());
@@ -1421,6 +1421,13 @@ int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_ms)
1421 if (data) { 1421 if (data) {
1422 nvgpu_err(g, "ACR boot failed, err %x", data); 1422 nvgpu_err(g, "ACR boot failed, err %x", data);
1423 ret = -EAGAIN; 1423 ret = -EAGAIN;
1424 goto exit;
1425 }
1426
1427exit:
1428 if (ret) {
1429 nvgpu_kill_task_pg_init(g);
1430 nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
1424 } 1431 }
1425 1432
1426 return ret; 1433 return ret;
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 26ded39e..332ac794 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -52,20 +52,26 @@ int sec2_wait_for_halt(struct gk20a *g, unsigned int timeout)
52 completion = nvgpu_flcn_wait_for_halt(&g->sec2_flcn, timeout); 52 completion = nvgpu_flcn_wait_for_halt(&g->sec2_flcn, timeout);
53 if (completion) { 53 if (completion) {
54 nvgpu_err(g, "ACR boot timed out"); 54 nvgpu_err(g, "ACR boot timed out");
55 return completion; 55 goto exit;
56 } 56 }
57 57
58 g->acr.capabilities = gk20a_readl(g, psec_falcon_mailbox1_r()); 58 g->acr.capabilities = gk20a_readl(g, psec_falcon_mailbox1_r());
59 gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities); 59 gm20b_dbg_pmu("ACR capabilities %x\n", g->acr.capabilities);
60 data = gk20a_readl(g, psec_falcon_mailbox0_r()); 60 data = gk20a_readl(g, psec_falcon_mailbox0_r());
61 if (data) { 61 if (data) {
62
63 nvgpu_err(g, "ACR boot failed, err %x", data); 62 nvgpu_err(g, "ACR boot failed, err %x", data);
64 completion = -EAGAIN; 63 completion = -EAGAIN;
64 goto exit;
65 } 65 }
66 66
67 init_pmu_setup_hw1(g); 67 init_pmu_setup_hw1(g);
68 68
69exit:
70 if (completion) {
71 nvgpu_kill_task_pg_init(g);
72 nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
73 }
74
69 return completion; 75 return completion;
70} 76}
71 77
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index 045bf34c..a818f3d0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -421,6 +421,7 @@ int nvgpu_pmu_process_init_msg(struct nvgpu_pmu *pmu,
421 421
422void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state, 422void nvgpu_pmu_state_change(struct gk20a *g, u32 pmu_state,
423 bool post_change_event); 423 bool post_change_event);
424void nvgpu_kill_task_pg_init(struct gk20a *g);
424 425
425/* NVGPU-PMU MEM alloc */ 426/* NVGPU-PMU MEM alloc */
426void nvgpu_pmu_surface_free(struct gk20a *g, struct nvgpu_mem *mem); 427void nvgpu_pmu_surface_free(struct gk20a *g, struct nvgpu_mem *mem);