summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Daifuku <pdaifuku@nvidia.com>2020-01-30 13:58:54 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2020-02-22 07:24:36 -0500
commitea14973b149930669a8fe2aa37cdfad41759001d (patch)
tree37ede9e1280516550c3da4f28c3f43b6e42917df
parente1683ce076db48117cda56597238b90acdfe67cb (diff)
gpu: nvgpu: vgpu: fix tsg_unbind in recovery case
When unbinding a channel from a tsg when virtual, vgpu_tsg_unbind_channel would return an error if unbinding the channel on the guest side failed, and did so before notifying the RM server of the unbind. Later on in the recovery process, the guest OS would remove the channel from the TSG's list, but this would leave the RM server with an out-of-date channel list. Fix this by making the tsg_unbind_channel HAL optional and implemented only for vgpu: the vgpu version now just notifies the RM server so that it can clean up its version of the TSG; if vgpu, always call the tsg_unbind_channel HAL whether or not the local unbind succeeded. Minimal port from dev-main of https://git-master.nvidia.com/r/c/linux-nvgpu/+/2084029 Bug 2766920 Bug 200587845 Change-Id: I75bddf3a28ac20bf4fb7510ff64097a32c7eec3f Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2287774 (cherry picked from commit 471c72c1efcc4fe6d547f556edf7773827fd2674) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2289928 Reviewed-by: Thomas Steinle <tsteinle@nvidia.com> Reviewed-by: Satish Arora <satisha@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
-rw-r--r--drivers/gpu/nvgpu/common/fifo/tsg.c7
-rw-r--r--drivers/gpu/nvgpu/gm20b/hal_gm20b.c2
-rw-r--r--drivers/gpu/nvgpu/gp106/hal_gp106.c4
-rw-r--r--drivers/gpu/nvgpu/gp10b/hal_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/gv100/hal_gv100.c2
-rw-r--r--drivers/gpu/nvgpu/gv11b/hal_gv11b.c2
-rw-r--r--drivers/gpu/nvgpu/vgpu/tsg_vgpu.c8
7 files changed, 14 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 65cee225..f6c718f0 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -158,7 +158,7 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch)
158 return -EINVAL; 158 return -EINVAL;
159 } 159 }
160 160
161 err = g->ops.fifo.tsg_unbind_channel(ch); 161 err = gk20a_fifo_tsg_unbind_channel(ch);
162 if (err) { 162 if (err) {
163 nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", 163 nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d",
164 ch->chid, tsg->tsgid); 164 ch->chid, tsg->tsgid);
@@ -172,6 +172,11 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch)
172 ch->tsgid = NVGPU_INVALID_TSG_ID; 172 ch->tsgid = NVGPU_INVALID_TSG_ID;
173 nvgpu_rwsem_up_write(&tsg->ch_list_lock); 173 nvgpu_rwsem_up_write(&tsg->ch_list_lock);
174 } 174 }
175
176 if (g->ops.fifo.tsg_unbind_channel != NULL) {
177 err = g->ops.fifo.tsg_unbind_channel(ch);
178 }
179
175 nvgpu_log(g, gpu_dbg_fn, "UNBIND tsg:%d channel:%d", 180 nvgpu_log(g, gpu_dbg_fn, "UNBIND tsg:%d channel:%d",
176 tsg->tsgid, ch->chid); 181 tsg->tsgid, ch->chid);
177 182
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 9898c683..c470f520 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -459,7 +459,7 @@ static const struct gpu_ops gm20b_ops = {
459 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0, 459 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
460 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1, 460 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
461 .tsg_bind_channel = gk20a_tsg_bind_channel, 461 .tsg_bind_channel = gk20a_tsg_bind_channel,
462 .tsg_unbind_channel = gk20a_fifo_tsg_unbind_channel, 462 .tsg_unbind_channel = NULL,
463 .post_event_id = gk20a_tsg_event_id_post_event, 463 .post_event_id = gk20a_tsg_event_id_post_event,
464 .ch_abort_clean_up = gk20a_channel_abort_clean_up, 464 .ch_abort_clean_up = gk20a_channel_abort_clean_up,
465 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout, 465 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index d033a516..17f3ccf5 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GP106 HAL interface 2 * GP106 HAL interface
3 * 3 *
4 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -527,7 +527,7 @@ static const struct gpu_ops gp106_ops = {
527 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0, 527 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
528 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1, 528 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
529 .tsg_bind_channel = gk20a_tsg_bind_channel, 529 .tsg_bind_channel = gk20a_tsg_bind_channel,
530 .tsg_unbind_channel = gk20a_fifo_tsg_unbind_channel, 530 .tsg_unbind_channel = NULL,
531 .post_event_id = gk20a_tsg_event_id_post_event, 531 .post_event_id = gk20a_tsg_event_id_post_event,
532 .ch_abort_clean_up = gk20a_channel_abort_clean_up, 532 .ch_abort_clean_up = gk20a_channel_abort_clean_up,
533 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout, 533 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 3cdba8ac..d3409b09 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -496,7 +496,7 @@ static const struct gpu_ops gp10b_ops = {
496 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0, 496 .handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0,
497 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1, 497 .handle_pbdma_intr_1 = gk20a_fifo_handle_pbdma_intr_1,
498 .tsg_bind_channel = gk20a_tsg_bind_channel, 498 .tsg_bind_channel = gk20a_tsg_bind_channel,
499 .tsg_unbind_channel = gk20a_fifo_tsg_unbind_channel, 499 .tsg_unbind_channel = NULL,
500 .post_event_id = gk20a_tsg_event_id_post_event, 500 .post_event_id = gk20a_tsg_event_id_post_event,
501 .ch_abort_clean_up = gk20a_channel_abort_clean_up, 501 .ch_abort_clean_up = gk20a_channel_abort_clean_up,
502 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout, 502 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 0e0417a0..696316f5 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -651,7 +651,7 @@ static const struct gpu_ops gv100_ops = {
651 .deinit_eng_method_buffers = 651 .deinit_eng_method_buffers =
652 gv11b_fifo_deinit_eng_method_buffers, 652 gv11b_fifo_deinit_eng_method_buffers,
653 .tsg_bind_channel = gk20a_tsg_bind_channel, 653 .tsg_bind_channel = gk20a_tsg_bind_channel,
654 .tsg_unbind_channel = gk20a_fifo_tsg_unbind_channel, 654 .tsg_unbind_channel = NULL,
655 .post_event_id = gk20a_tsg_event_id_post_event, 655 .post_event_id = gk20a_tsg_event_id_post_event,
656 .ch_abort_clean_up = gk20a_channel_abort_clean_up, 656 .ch_abort_clean_up = gk20a_channel_abort_clean_up,
657 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout, 657 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f7fabf68..2f7b4abc 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -618,7 +618,7 @@ static const struct gpu_ops gv11b_ops = {
618 .deinit_eng_method_buffers = 618 .deinit_eng_method_buffers =
619 gv11b_fifo_deinit_eng_method_buffers, 619 gv11b_fifo_deinit_eng_method_buffers,
620 .tsg_bind_channel = gk20a_tsg_bind_channel, 620 .tsg_bind_channel = gk20a_tsg_bind_channel,
621 .tsg_unbind_channel = gk20a_fifo_tsg_unbind_channel, 621 .tsg_unbind_channel = NULL,
622 .post_event_id = gk20a_tsg_event_id_post_event, 622 .post_event_id = gk20a_tsg_event_id_post_event,
623 .ch_abort_clean_up = gk20a_channel_abort_clean_up, 623 .ch_abort_clean_up = gk20a_channel_abort_clean_up,
624 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout, 624 .check_tsg_ctxsw_timeout = gk20a_fifo_check_tsg_ctxsw_timeout,
diff --git a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
index 3553bf51..d6060e06 100644
--- a/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/tsg_vgpu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -128,12 +128,8 @@ int vgpu_tsg_unbind_channel(struct channel_gk20a *ch)
128 128
129 nvgpu_log_fn(g, " "); 129 nvgpu_log_fn(g, " ");
130 130
131 err = gk20a_fifo_tsg_unbind_channel(ch);
132 if (err)
133 return err;
134
135 msg.cmd = TEGRA_VGPU_CMD_TSG_UNBIND_CHANNEL; 131 msg.cmd = TEGRA_VGPU_CMD_TSG_UNBIND_CHANNEL;
136 msg.handle = vgpu_get_handle(ch->g); 132 msg.handle = vgpu_get_handle(g);
137 p->ch_handle = ch->virt_ctx; 133 p->ch_handle = ch->virt_ctx;
138 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); 134 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
139 err = err ? err : msg.ret; 135 err = err ? err : msg.ret;