gpu: nvgpu: set safe state for user managed syncpoints

MAX/threshold value of user managed syncpoint is not tracked by nvgpu So if channel is reset by nvgpu there could be waiters still waiting on some user syncpoint fence Fix this by setting a large safe value to user managed syncpoint when aborting the channel and when closing the channel We right now increment the current value by 0x10000 which should be sufficient to release any pending waiter Bug 200326065 Jira NVGPU-179 Change-Id: Ie6432369bb4c21bd922c14b8d5a74c1477116f0b Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1678768 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Deepak Nibade <dnibade@nvidia.com> 2018-03-20 07:51:23 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-03-23 11:20:35 -0400
commit: b5b4353ca6cc9b6457ddccc00bf87538291870fc (patch)
tree: 1e2334728031345a3cb042bcc934bb0d9e3b0f82
parent: 2aead38194fb6f3166a9ccb501467f7b0662f6c1 (diff)
5 files changed, 62 insertions, 6 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c
index a76953e3..fa169cf0 100644
--- a/drivers/gpu/nvgpu/common/linux/nvhost.c
+++ b/drivers/gpu/nvgpu/common/linux/nvhost.c
@@ -166,6 +166,25 @@ u32 nvgpu_nvhost_syncpt_read_maxval(
        return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
 }
+void nvgpu_nvhost_syncpt_set_safe_state(
+        struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+        u32 val;
+        /*
+         * Add large number of increments to current value
+         * so that all waiters on this syncpoint are released
+         *
+         * We don't expect any case where more than 0x10000 increments
+         * are pending
+         */
+        val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
+        val += 0x10000;
+        nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
+        nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
+}
 int nvgpu_nvhost_create_symlink(struct gk20a *g)
 {
        struct device *dev = dev_from_gk20a(g);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 78953558..65b17304 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -196,6 +196,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
        nvgpu_mutex_acquire(&ch->sync_lock);
        if (ch->sync)
                ch->sync->set_min_eq_max(ch->sync);
+        if (ch->user_sync)
+                ch->user_sync->set_safe_state(ch->user_sync);
        nvgpu_mutex_release(&ch->sync_lock);
        /* release all job semaphores (applies only to jobs that use
@@ -435,11 +437,18 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
        /* sync must be destroyed before releasing channel vm */
        nvgpu_mutex_acquire(&ch->sync_lock);
        if (ch->sync) {
-                gk20a_channel_sync_destroy(ch->sync);
+                gk20a_channel_sync_destroy(ch->sync, false);
                ch->sync = NULL;
        }
        if (ch->user_sync) {
-                gk20a_channel_sync_destroy(ch->user_sync);
+                /*
+                 * Set user managed syncpoint to safe state
+                 * But it's already done if channel has timedout
+                 */
+                if (ch->has_timedout)
+                        gk20a_channel_sync_destroy(ch->user_sync, false);
+                else
+                        gk20a_channel_sync_destroy(ch->user_sync, true);
                ch->user_sync = NULL;
        }
        nvgpu_mutex_release(&ch->sync_lock);
@@ -1211,7 +1220,7 @@ clean_up_prealloc:
                channel_gk20a_free_prealloc_resources(c);
 clean_up_sync:
        if (c->sync) {
-                gk20a_channel_sync_destroy(c->sync);
+                gk20a_channel_sync_destroy(c->sync, false);
                c->sync = NULL;
        }
 clean_up_unmap:
@@ -1905,7 +1914,8 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
                                if (nvgpu_atomic_dec_and_test(
                                        &c->sync->refcount) &&
                                                g->aggressive_sync_destroy) {
-                                        gk20a_channel_sync_destroy(c->sync);
+                                        gk20a_channel_sync_destroy(c->sync,
+                                                false);
                                        c->sync = NULL;
                                }
                                nvgpu_mutex_release(&c->sync_lock);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 3c12147f..236ddaaf 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -280,6 +280,13 @@ static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
        nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
 }
+static void gk20a_channel_syncpt_set_safe_state(struct gk20a_channel_sync *s)
+{
+        struct gk20a_channel_syncpt *sp =
+                container_of(s, struct gk20a_channel_syncpt, ops);
+        nvgpu_nvhost_syncpt_set_safe_state(sp->nvhost_dev, sp->id);
+}
 static void gk20a_channel_syncpt_signal_timeline(
                struct gk20a_channel_sync *s)
 {
@@ -357,6 +364,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c, bool user_managed)
        sp->ops.incr_wfi                = gk20a_channel_syncpt_incr_wfi;
        sp->ops.incr_user               = gk20a_channel_syncpt_incr_user;
        sp->ops.set_min_eq_max          = gk20a_channel_syncpt_set_min_eq_max;
+        sp->ops.set_safe_state          = gk20a_channel_syncpt_set_safe_state;
        sp->ops.signal_timeline         = gk20a_channel_syncpt_signal_timeline;
        sp->ops.syncpt_id               = gk20a_channel_syncpt_id;
        sp->ops.syncpt_address          = gk20a_channel_syncpt_address;
@@ -634,6 +642,11 @@ static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
        /* Nothing to do. */
 }
+static void gk20a_channel_semaphore_set_safe_state(struct gk20a_channel_sync *s)
+{
+        /* Nothing to do. */
+}
 static void gk20a_channel_semaphore_signal_timeline(
                struct gk20a_channel_sync *s)
 {
@@ -703,6 +716,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed)
        sema->ops.incr_wfi      = gk20a_channel_semaphore_incr_wfi;
        sema->ops.incr_user     = gk20a_channel_semaphore_incr_user;
        sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
+        sema->ops.set_safe_state = gk20a_channel_semaphore_set_safe_state;
        sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
        sema->ops.syncpt_id     = gk20a_channel_semaphore_syncpt_id;
        sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address;
@@ -711,8 +725,11 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed)
        return &sema->ops;
 }
-void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
+void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
+        bool set_safe_state)
 {
+        if (set_safe_state)
+                sync->set_safe_state(sync);
        sync->destroy(sync);
 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 3f44b27a..da8cb251 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -86,6 +86,13 @@ struct gk20a_channel_sync {
        /* Reset the channel syncpoint/semaphore. */
        void (*set_min_eq_max)(struct gk20a_channel_sync *s);
+        /*
+         * Set the channel syncpoint/semaphore to safe state
+         * This should be used to reset User managed syncpoint since we don't
+         * track threshold values for those syncpoints
+         */
+        void (*set_safe_state)(struct gk20a_channel_sync *s);
        /* Signals the sync timeline (if owned by the gk20a_channel_sync layer).
         * This should be called when we notice that a gk20a_fence is
         * expired. */
@@ -101,7 +108,8 @@ struct gk20a_channel_sync {
        void (*destroy)(struct gk20a_channel_sync *s);
 };
-void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
+void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
+        bool set_safe_state);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
        bool user_managed);
 bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
index d5b5831a..13de012a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
@@ -52,6 +52,8 @@ int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvhost_dev,
        u32 id, u32 *val);
 u32 nvgpu_nvhost_syncpt_read_maxval(struct nvgpu_nvhost_dev *nvhost_dev,
        u32 id);
+void nvgpu_nvhost_syncpt_set_safe_state(
+        struct nvgpu_nvhost_dev *nvhost_dev, u32 id);
 int nvgpu_nvhost_intr_register_notifier(struct nvgpu_nvhost_dev *nvhost_dev,
        u32 id, u32 thresh, void (*callback)(void *, int), void *private_data);
author	Deepak Nibade <dnibade@nvidia.com>	2018-03-20 07:51:23 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-03-23 11:20:35 -0400
commit	b5b4353ca6cc9b6457ddccc00bf87538291870fc (patch)
tree	1e2334728031345a3cb042bcc934bb0d9e3b0f82
parent	2aead38194fb6f3166a9ccb501467f7b0662f6c1 (diff)