gpu: nvgpu: set preempt timeout

-For Si platforms, gk20a_get_gr_idle_timeout returns 3000 ms i.e. 3 sec. Currently this time is used for preempt polling and this conflicts with channel timeout if polling times out. Use fifo_eng_timeout_us converted to ms for preempt polling. -In case of preempt timeout, do not issue recovery for si platform. ctxsw timeout will trigger recovery if needed. For non si platforms, issue preempt timeout rc if preempt times out. Bug 2113657 Bug 2064553 Bug 2038366 Bug 2028993 Bug 200426402 Change-Id: I8d9f58be9ac634e94defa92a20fb737bf256d841 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1762076 GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Seema Khowala <seemaj@nvidia.com> 2018-06-26 15:33:02 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-07-30 03:21:04 -0400
commit: 4cbec6b2c7f74f309fb4bc5d4981c42ae6ea3bcd (patch)
tree: 0797b3884c771aaf56eeceac091d7c2516256b6d
parent: 5d2058791fa5a917201634b034a8736522585e26 (diff)
1 files changed, 40 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 1bf38080..306f05a7 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2743,6 +2743,17 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
                        fifo_preempt_type_channel_f());
 }
+static u32 gk20a_fifo_get_preempt_timeout(struct gk20a *g)
+{
+        /* Use fifo_eng_timeout converted to ms for preempt
+         * polling. gr_idle_timeout i.e 3000 ms is and not appropriate
+         * for polling preempt done as context switch timeout gets
+         * triggered every 100 ms and context switch recovery
+         * happens every 3000 ms */
+        return g->fifo_eng_timeout_us / 1000;
+}
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
                unsigned int id_type)
 {
@@ -2750,7 +2761,7 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
        u32 delay = GR_IDLE_CHECK_DEFAULT;
        int ret = -EBUSY;
-        nvgpu_timeout_init(g, &timeout, gk20a_get_gr_idle_timeout(g),
+        nvgpu_timeout_init(g, &timeout, gk20a_fifo_get_preempt_timeout(g),
                           NVGPU_TIMER_CPU_TIMER);
        do {
                if (!(gk20a_readl(g, fifo_preempt_r()) &
@@ -2761,8 +2772,12 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
                nvgpu_usleep_range(delay, delay * 2);
                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
-        } while (!nvgpu_timeout_expired_msg(&timeout, "preempt timeout"));
+        } while (!nvgpu_timeout_expired(&timeout));
+        if (ret) {
+                nvgpu_err(g, "preempt timeout: id: %u id_type: %d ",
+                        id, id_type);
+        }
        return ret;
 }
@@ -2848,8 +2863,16 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 chid)
        for (i = 0; i < g->fifo.max_runlists; i++)
                nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
-        if (ret)
+        if (ret) {
-                gk20a_fifo_preempt_timeout_rc(g, chid, false);
+                if (nvgpu_platform_is_silicon(g)) {
+                        nvgpu_err(g, "preempt timed out for chid: %u, "
+                        "ctxsw timeout will trigger recovery if needed", chid);
+                } else {
+                        gk20a_fifo_preempt_timeout_rc(g, chid, false);
+                }
+        }
        return ret;
 }
@@ -2880,8 +2903,14 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
        for (i = 0; i < g->fifo.max_runlists; i++)
                nvgpu_mutex_release(&f->runlist_info[i].runlist_lock);
-        if (ret)
+        if (ret) {
-                gk20a_fifo_preempt_timeout_rc(g, tsgid, true);
+                if (nvgpu_platform_is_silicon(g)) {
+                        nvgpu_err(g, "preempt timed out for tsgid: %u, "
+                        "ctxsw timeout will trigger recovery if needed", tsgid);
+                } else {
+                        gk20a_fifo_preempt_timeout_rc(g, tsgid, true);
+                }
+        }
        return ret;
 }
@@ -3121,6 +3150,11 @@ int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id)
                delay = min_t(u32, delay << 1, GR_IDLE_CHECK_MAX);
        } while (!nvgpu_timeout_expired(&timeout));
+        if (ret) {
+                nvgpu_err(g, "runlist wait timeout: runlist id: %u",
+                        runlist_id);
+        }
        return ret;
 }
author	Seema Khowala <seemaj@nvidia.com>	2018-06-26 15:33:02 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-07-30 03:21:04 -0400
commit	4cbec6b2c7f74f309fb4bc5d4981c42ae6ea3bcd (patch)
tree	0797b3884c771aaf56eeceac091d7c2516256b6d
parent	5d2058791fa5a917201634b034a8736522585e26 (diff)