summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSagar Kamble <skamble@nvidia.com>2021-02-02 11:32:23 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2021-03-19 17:39:39 -0400
commit13fc430775eb0e39dc06e420d5c92dda7016f6ae (patch)
treea29ac9b259819124b8ffd0250feaab53fbb693c7
parent9170f2b77cba9aedd957acdde7c758e81e073cc0 (diff)
gpu: nvgpu: retry tsg unbind if NEXT is set
The NEXT bit can remain set for the channel if timeslice expires before scheduler clears it. Due to this nvgpu fails TSG unbind and in turn nvrm_gpu fails channel close. In this case, checking the channel hw state after some time can help see NEXT bit cleared by scheduler. Reenable the tsg and return -EAGAIN to nvrm_gpu for it to retry again. Bug 3144960 Bug 200520811 Change-Id: I35f417f02270e371a4e632986b73a00f8a4f921a Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2468391 (cherry picked from commit cf287a4ef592e7329f813c076ec8bdad18dc5933) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2479106 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
-rw-r--r--drivers/gpu/nvgpu/common/fifo/channel.c4
-rw-r--r--drivers/gpu/nvgpu/common/fifo/tsg.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/fifo_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/tsg.h4
-rw-r--r--drivers/gpu/nvgpu/os/linux/ioctl_tsg.c7
-rw-r--r--drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c4
-rw-r--r--drivers/gpu/nvgpu/vgpu/tsg_vgpu.c4
7 files changed, 23 insertions, 15 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 7179d46e..2be72cb4 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics channel 2 * GK20A Graphics channel
3 * 3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -333,7 +333,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
333 * have an open channel fd anymore to use for the unbind 333 * have an open channel fd anymore to use for the unbind
334 * ioctl. 334 * ioctl.
335 */ 335 */
336 err = gk20a_tsg_unbind_channel(ch); 336 err = gk20a_tsg_unbind_channel(ch, true);
337 if (err) { 337 if (err) {
338 nvgpu_err(g, 338 nvgpu_err(g,
339 "failed to unbind channel %d from TSG", 339 "failed to unbind channel %d from TSG",
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 841dd465..ebcdd2be 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -148,7 +148,7 @@ int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
148} 148}
149 149
150/* The caller must ensure that channel belongs to a tsg */ 150/* The caller must ensure that channel belongs to a tsg */
151int gk20a_tsg_unbind_channel(struct channel_gk20a *ch) 151int gk20a_tsg_unbind_channel(struct channel_gk20a *ch, bool force)
152{ 152{
153 struct gk20a *g = ch->g; 153 struct gk20a *g = ch->g;
154 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); 154 struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
@@ -159,6 +159,11 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch)
159 } 159 }
160 160
161 err = gk20a_fifo_tsg_unbind_channel(ch); 161 err = gk20a_fifo_tsg_unbind_channel(ch);
162
163 if (!force && err == -EAGAIN) {
164 return err;
165 }
166
162 if (err) { 167 if (err) {
163 nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", 168 nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d",
164 ch->chid, tsg->tsgid); 169 ch->chid, tsg->tsgid);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 072f1777..e91830f8 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics FIFO (gr host) 2 * GK20A Graphics FIFO (gr host)
3 * 3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -2196,9 +2196,9 @@ int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
2196 struct gk20a *g = ch->g; 2196 struct gk20a *g = ch->g;
2197 2197
2198 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) { 2198 if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
2199 nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!", 2199 nvgpu_log_info(g, "Channel %d to be removed from TSG %d has NEXT set!",
2200 ch->chid, ch->tsgid); 2200 ch->chid, ch->tsgid);
2201 return -EINVAL; 2201 return -EAGAIN;
2202 } 2202 }
2203 2203
2204 if (g->ops.fifo.tsg_verify_status_ctx_reload) { 2204 if (g->ops.fifo.tsg_verify_status_ctx_reload) {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 7cd97c97..f5391e7c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -90,7 +90,7 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg);
90int gk20a_disable_tsg(struct tsg_gk20a *tsg); 90int gk20a_disable_tsg(struct tsg_gk20a *tsg);
91int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg, 91int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
92 struct channel_gk20a *ch); 92 struct channel_gk20a *ch);
93int gk20a_tsg_unbind_channel(struct channel_gk20a *ch); 93int gk20a_tsg_unbind_channel(struct channel_gk20a *ch, bool force);
94 94
95void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, 95void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
96 int event_id); 96 int event_id);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
index 2f8cb3ae..915a173a 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -134,7 +134,10 @@ static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
134 goto out; 134 goto out;
135 } 135 }
136 136
137 err = gk20a_tsg_unbind_channel(ch); 137 err = gk20a_tsg_unbind_channel(ch, false);
138 if (err == -EAGAIN) {
139 goto out;
140 }
138 141
139 /* 142 /*
140 * Mark the channel timedout since channel unbound from TSG 143 * Mark the channel timedout since channel unbound from TSG
diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c
index 8c999161..84f295fe 100644
--- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c
+++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -54,7 +54,7 @@ int vgpu_gv11b_tsg_bind_channel(struct tsg_gk20a *tsg,
54 nvgpu_err(tsg->g, 54 nvgpu_err(tsg->g,
55 "vgpu_gv11b_tsg_bind_channel failed, ch %d tsgid %d", 55 "vgpu_gv11b_tsg_bind_channel failed, ch %d tsgid %d",
56 ch->chid, tsg->tsgid); 56 ch->chid, tsg->tsgid);
57 gk20a_tsg_unbind_channel(ch); 57 gk20a_tsg_unbind_channel(ch, true);
58 } 58 }
59 59
60 return err; 60 return err;
diff --git a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
index d6060e06..d1f64d97 100644
--- a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -112,7 +112,7 @@ int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg,
112 nvgpu_err(tsg->g, 112 nvgpu_err(tsg->g,
113 "vgpu_tsg_bind_channel failed, ch %d tsgid %d", 113 "vgpu_tsg_bind_channel failed, ch %d tsgid %d",
114 ch->chid, tsg->tsgid); 114 ch->chid, tsg->tsgid);
115 gk20a_tsg_unbind_channel(ch); 115 gk20a_tsg_unbind_channel(ch, true);
116 } 116 }
117 117
118 return err; 118 return err;