summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDeepak Nibade <dnibade@nvidia.com>2017-10-25 09:21:41 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-10-26 04:06:49 -0400
commit0d4272b65733744c7198b57bd03e3d261fb1740f (patch)
treec2abdbbfd5546d6330a2c1be6397ae0d450d2893 /drivers
parent6bf40e523740279761f3fdc3d84000acc2f62aba (diff)
gpu: nvgpu: don't re-enable TSG if timed out
In gk20a_fifo_tsg_unbind_channel(), we disable/preempt TSG, unbind one channel from TSG, and then re-enable rest of the channels in TSG But it is possible that TSG has already timed out due to some error and is already disabled If we re-enable all channels in such case, it can cause random issues right after re-enabling faulted channel Hence do not re-enable TSG if it has timedout Since we disable all channels of TSG if one channel encounters fatal error, it is safe to assume that TSG has timed out if one channel has timed out Bug 1958308 Bug 200327095 Change-Id: I958ca6a2b408ff1338f2e551a79c072f1e203eda Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1585421 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c18
1 files changed, 16 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 3b7dce32..194824c3 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1952,6 +1952,12 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
1952 struct fifo_gk20a *f = &g->fifo; 1952 struct fifo_gk20a *f = &g->fifo;
1953 struct tsg_gk20a *tsg = &f->tsg[ch->tsgid]; 1953 struct tsg_gk20a *tsg = &f->tsg[ch->tsgid];
1954 int err; 1954 int err;
1955 bool tsg_timedout = false;
1956
1957 /* If one channel in TSG times out, we disable all channels */
1958 nvgpu_rwsem_down_write(&tsg->ch_list_lock);
1959 tsg_timedout = ch->has_timedout;
1960 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
1955 1961
1956 /* Disable TSG and examine status before unbinding channel */ 1962 /* Disable TSG and examine status before unbinding channel */
1957 g->ops.fifo.disable_tsg(tsg); 1963 g->ops.fifo.disable_tsg(tsg);
@@ -1976,14 +1982,22 @@ int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch)
1976 nvgpu_list_del(&ch->ch_entry); 1982 nvgpu_list_del(&ch->ch_entry);
1977 nvgpu_rwsem_up_write(&tsg->ch_list_lock); 1983 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
1978 1984
1979 g->ops.fifo.enable_tsg(tsg); 1985 /*
1986 * Don't re-enable all channels if TSG has timed out already
1987 *
1988 * Note that we can skip disabling and preempting TSG too in case of
1989 * time out, but we keep that to ensure TSG is kicked out
1990 */
1991 if (!tsg_timedout)
1992 g->ops.fifo.enable_tsg(tsg);
1980 1993
1981 gk20a_channel_abort_clean_up(ch); 1994 gk20a_channel_abort_clean_up(ch);
1982 1995
1983 return 0; 1996 return 0;
1984 1997
1985fail_enable_tsg: 1998fail_enable_tsg:
1986 g->ops.fifo.enable_tsg(tsg); 1999 if (!tsg_timedout)
2000 g->ops.fifo.enable_tsg(tsg);
1987 return err; 2001 return err;
1988} 2002}
1989 2003