From 13fc430775eb0e39dc06e420d5c92dda7016f6ae Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Tue, 2 Feb 2021 22:02:23 +0530 Subject: gpu: nvgpu: retry tsg unbind if NEXT is set The NEXT bit can remain set for the channel if timeslice expires before scheduler clears it. Due to this nvgpu fails TSG unbind and in turn nvrm_gpu fails channel close. In this case, checking the channel hw state after some time can help see NEXT bit cleared by scheduler. Reenable the tsg and return -EAGAIN to nvrm_gpu for it to retry again. Bug 3144960 Bug 200520811 Change-Id: I35f417f02270e371a4e632986b73a00f8a4f921a Signed-off-by: Sagar Kamble Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2468391 (cherry picked from commit cf287a4ef592e7329f813c076ec8bdad18dc5933) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2479106 Tested-by: mobile promotions Reviewed-by: Deepak Nibade Reviewed-by: svc-mobile-coverity Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/fifo/channel.c | 4 ++-- drivers/gpu/nvgpu/common/fifo/tsg.c | 9 +++++++-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 6 +++--- drivers/gpu/nvgpu/include/nvgpu/tsg.h | 4 ++-- drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 7 +++++-- drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c | 4 ++-- drivers/gpu/nvgpu/vgpu/tsg_vgpu.c | 4 ++-- 7 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 7179d46e..2be72cb4 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1,7 +1,7 @@ /* * GK20A Graphics channel * - * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -333,7 +333,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) * have an open channel fd anymore to use for the unbind * ioctl. */ - err = gk20a_tsg_unbind_channel(ch); + err = gk20a_tsg_unbind_channel(ch, true); if (err) { nvgpu_err(g, "failed to unbind channel %d from TSG", diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 841dd465..ebcdd2be 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -148,7 +148,7 @@ int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg, } /* The caller must ensure that channel belongs to a tsg */ -int gk20a_tsg_unbind_channel(struct channel_gk20a *ch) +int gk20a_tsg_unbind_channel(struct channel_gk20a *ch, bool force) { struct gk20a *g = ch->g; struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch); @@ -159,6 +159,11 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch) } err = gk20a_fifo_tsg_unbind_channel(ch); + + if (!force && err == -EAGAIN) { + return err; + } + if (err) { nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", ch->chid, tsg->tsgid); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 072f1777..e91830f8 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -1,7 +1,7 @@ /* * GK20A Graphics FIFO (gr host) * - * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -2196,9 +2196,9 @@ int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch) struct gk20a *g = ch->g; if (gk20a_fifo_channel_status_is_next(g, ch->chid)) { - nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!", + nvgpu_log_info(g, "Channel %d to be removed from TSG %d has NEXT set!", ch->chid, ch->tsgid); - return -EINVAL; + return -EAGAIN; } if (g->ops.fifo.tsg_verify_status_ctx_reload) { diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 7cd97c97..f5391e7c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -90,7 +90,7 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg); int gk20a_disable_tsg(struct tsg_gk20a *tsg); int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg, struct channel_gk20a *ch); -int gk20a_tsg_unbind_channel(struct channel_gk20a *ch); +int gk20a_tsg_unbind_channel(struct channel_gk20a *ch, bool force); void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, int event_id); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index 2f8cb3ae..915a173a 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -134,7 +134,10 @@ static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd) goto out; } - err = gk20a_tsg_unbind_channel(ch); + err = gk20a_tsg_unbind_channel(ch, false); + if (err == -EAGAIN) { + goto out; + } /* * Mark the channel timedout since channel unbound from TSG diff --git a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c index 8c999161..84f295fe 100644 --- a/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c +++ b/drivers/gpu/nvgpu/vgpu/gv11b/vgpu_tsg_gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -54,7 +54,7 @@ int vgpu_gv11b_tsg_bind_channel(struct tsg_gk20a *tsg, nvgpu_err(tsg->g, "vgpu_gv11b_tsg_bind_channel failed, ch %d tsgid %d", ch->chid, tsg->tsgid); - gk20a_tsg_unbind_channel(ch); + gk20a_tsg_unbind_channel(ch, true); } return err; diff --git a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c index d6060e06..d1f64d97 100644 --- a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -112,7 +112,7 @@ int vgpu_tsg_bind_channel(struct tsg_gk20a *tsg, nvgpu_err(tsg->g, "vgpu_tsg_bind_channel failed, ch %d tsgid %d", ch->chid, tsg->tsgid); - gk20a_tsg_unbind_channel(ch); + gk20a_tsg_unbind_channel(ch, true); } return err; -- cgit v1.2.2