summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2019-04-30 05:03:05 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2019-05-09 17:41:50 -0400
commitbdaacf544127fcfaa474ccb5466aa93f81382416 (patch)
tree1a5ed5825b3680d5e199ee98ba85d0497c11d208 /drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
parentc81cc032c48a1b25e095b17b77399166c9091ff3 (diff)
gpu: nvgpu: disable elpg before ctxsw_disable
if fecs is sent stop_ctxsw method, elpg entry/exit cannot happen and may timeout. It could manifest as different error signatures depending on when stop_ctxsw fecs method gets sent with respect to pmu elpg sequence. It could come as pmu halt or abort or maybe ext error too. If ctxsw failed to disable, do not read engine info and just abort tsg. Bug 2092051 Bug 2429295 Bug 2484211 Bug 1890287 Change-Id: I5f3ba07663bcafd3f0083d44c603420b0ccf6945 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2014914 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2018156 GVS: Gerrit_Virtual_Submit Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fifo_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c35
1 files changed, 31 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 6d89940a..b96372b4 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1943,14 +1943,42 @@ void gk20a_fifo_recover_ch(struct gk20a *g, struct channel_gk20a *ch,
1943void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg, 1943void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1944 bool verbose, u32 rc_type) 1944 bool verbose, u32 rc_type)
1945{ 1945{
1946 u32 engines; 1946 u32 engines = 0U;
1947 int err;
1947 1948
1948 /* stop context switching to prevent engine assignments from 1949 /* stop context switching to prevent engine assignments from
1949 changing until TSG is recovered */ 1950 changing until TSG is recovered */
1950 nvgpu_mutex_acquire(&g->dbg_sessions_lock); 1951 nvgpu_mutex_acquire(&g->dbg_sessions_lock);
1951 gr_gk20a_disable_ctxsw(g);
1952 1952
1953 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true); 1953 /* disable tsg so that it does not get scheduled again */
1954 g->ops.fifo.disable_tsg(tsg);
1955
1956 /*
1957 * stop context switching to prevent engine assignments from
1958 * changing until engine status is checked to make sure tsg
1959 * being recovered is not loaded on the engines
1960 */
1961 err = gr_gk20a_disable_ctxsw(g);
1962
1963 if (err != 0) {
1964 /* if failed to disable ctxsw, just abort tsg */
1965 nvgpu_err(g, "failed to disable ctxsw");
1966 } else {
1967 /* recover engines if tsg is loaded on the engines */
1968 engines = gk20a_fifo_engines_on_id(g, tsg->tsgid, true);
1969
1970 /*
1971 * it is ok to enable ctxsw before tsg is recovered. If engines
1972 * is 0, no engine recovery is needed and if it is non zero,
1973 * gk20a_fifo_recover will call get_engines_mask_on_id again.
1974 * By that time if tsg is not on the engine, engine need not
1975 * be reset.
1976 */
1977 err = gr_gk20a_enable_ctxsw(g);
1978 if (err != 0) {
1979 nvgpu_err(g, "failed to enable ctxsw");
1980 }
1981 }
1954 1982
1955 if (engines) { 1983 if (engines) {
1956 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose, 1984 gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
@@ -1963,7 +1991,6 @@ void gk20a_fifo_recover_tsg(struct gk20a *g, struct tsg_gk20a *tsg,
1963 gk20a_fifo_abort_tsg(g, tsg, false); 1991 gk20a_fifo_abort_tsg(g, tsg, false);
1964 } 1992 }
1965 1993
1966 gr_gk20a_enable_ctxsw(g);
1967 nvgpu_mutex_release(&g->dbg_sessions_lock); 1994 nvgpu_mutex_release(&g->dbg_sessions_lock);
1968} 1995}
1969 1996