gpu: nvgpu: move sync_gk20a under common/linux directory

sync_gk20a.* files are no longer used by core code and only invoked from linux specific implementations of the OS_FENCE framework which are under the common/linux directory. Hence, sync_gk20a.* files are also moved under common/linux. JIRA NVGPU-66 Change-Id: If623524611373d2da39b63cfb3c1e40089bf8d22 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1712900 Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Debarshi Dutta <ddutta@nvidia.com> 2018-05-10 23:48:48 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2018-05-14 09:07:12 -0400
commit: a51eb9da021c2934e196c5d8be04551703e6bb5b (patch)
tree: 843036626eff3733759ab003fc577a1ae2e729ae /drivers/gpu/nvgpu/common
parent: 4dac924aba27aa46267fb39f3ed968318292a7f5 (diff)
5 files changed, 473 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index d767374b..1e170b30 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -40,7 +40,7 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
-#include "gk20a/sync_gk20a.h"
+#include "sync_sema_android.h"
 u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
 {
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
index d4aeb6ed..2bb71c99 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -21,10 +21,11 @@
 #include <nvgpu/linux/os_fence_android.h>
 #include <nvgpu/semaphore.h>
-#include "gk20a/sync_gk20a.h"
 #include "gk20a/channel_sync_gk20a.h"
 #include "gk20a/mm_gk20a.h"
+#include "sync_sema_android.h"
 #include "../drivers/staging/android/sync.h"
 int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
index b3712011..d7a72fcd 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -24,7 +24,6 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/channel_gk20a.h"
-#include "gk20a/sync_gk20a.h"
 #include "gk20a/channel_sync_gk20a.h"
 #include "gk20a/mm_gk20a.h"
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
new file mode 100644
index 00000000..fad21351
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
@@ -0,0 +1,419 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include "../common/linux/channel.h"
+#include "../drivers/staging/android/sync.h"
+#include "sync_sema_android.h"
+static const struct sync_timeline_ops gk20a_sync_timeline_ops;
+struct gk20a_sync_timeline {
+        struct sync_timeline            obj;
+        u32                             max;
+        u32                             min;
+};
+/**
+ * The sync framework dups pts when merging fences. We share a single
+ * refcounted gk20a_sync_pt for each duped pt.
+ */
+struct gk20a_sync_pt {
+        struct gk20a                    *g;
+        struct nvgpu_ref                        refcount;
+        u32                             thresh;
+        struct nvgpu_semaphore          *sema;
+        struct gk20a_sync_timeline      *obj;
+        /*
+         * Use a spin lock here since it will have better performance
+         * than a mutex - there should be very little contention on this
+         * lock.
+         */
+        struct nvgpu_spinlock                   lock;
+};
+struct gk20a_sync_pt_inst {
+        struct sync_pt                  pt;
+        struct gk20a_sync_pt            *shared;
+};
+/**
+ * Compares sync pt values a and b, both of which will trigger either before
+ * or after ref (i.e. a and b trigger before ref, or a and b trigger after
+ * ref). Supplying ref allows us to handle wrapping correctly.
+ *
+ * Returns -1 if a < b (a triggers before b)
+ *          0 if a = b (a and b trigger at the same time)
+ *          1 if a > b (b triggers before a)
+ */
+static int __gk20a_sync_pt_compare_ref(
+        u32 ref,
+        u32 a,
+        u32 b)
+{
+        /*
+         * We normalize both a and b by subtracting ref from them.
+         * Denote the normalized values by a_n and b_n. Note that because
+         * of wrapping, a_n and/or b_n may be negative.
+         *
+         * The normalized values a_n and b_n satisfy:
+         * - a positive value triggers before a negative value
+         * - a smaller positive value triggers before a greater positive value
+         * - a smaller negative value (greater in absolute value) triggers
+         *   before a greater negative value (smaller in absolute value).
+         *
+         * Thus we can just stick to unsigned arithmetic and compare
+         * (u32)a_n to (u32)b_n.
+         *
+         * Just to reiterate the possible cases:
+         *
+         *      1A) ...ref..a....b....
+         *      1B) ...ref..b....a....
+         *      2A) ...b....ref..a....              b_n < 0
+         *      2B) ...a....ref..b....     a_n > 0
+         *      3A) ...a....b....ref..     a_n < 0, b_n < 0
+         *      3A) ...b....a....ref..     a_n < 0, b_n < 0
+         */
+        u32 a_n = a - ref;
+        u32 b_n = b - ref;
+        if (a_n < b_n)
+                return -1;
+        else if (a_n > b_n)
+                return 1;
+        else
+                return 0;
+}
+static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
+{
+        struct gk20a_sync_pt_inst *pti =
+                        container_of(pt, struct gk20a_sync_pt_inst, pt);
+        return pti->shared;
+}
+static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
+{
+        if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
+                return NULL;
+        return (struct gk20a_sync_timeline *)obj;
+}
+static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
+{
+        struct gk20a_sync_pt *pt =
+                container_of(ref, struct gk20a_sync_pt, refcount);
+        struct gk20a *g = pt->g;
+        if (pt->sema)
+                nvgpu_semaphore_put(pt->sema);
+        nvgpu_kfree(g, pt);
+}
+static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
+                struct gk20a *g,
+                struct gk20a_sync_timeline *obj,
+                struct nvgpu_semaphore *sema)
+{
+        struct gk20a_sync_pt *shared;
+        shared = nvgpu_kzalloc(g, sizeof(*shared));
+        if (!shared)
+                return NULL;
+        nvgpu_ref_init(&shared->refcount);
+        shared->g = g;
+        shared->obj = obj;
+        shared->sema = sema;
+        shared->thresh = ++obj->max; /* sync framework has a lock */
+        nvgpu_spinlock_init(&shared->lock);
+        nvgpu_semaphore_get(sema);
+        return shared;
+}
+static struct sync_pt *gk20a_sync_pt_create_inst(
+                struct gk20a *g,
+                struct gk20a_sync_timeline *obj,
+                struct nvgpu_semaphore *sema)
+{
+        struct gk20a_sync_pt_inst *pti;
+        pti = (struct gk20a_sync_pt_inst *)
+                sync_pt_create(&obj->obj, sizeof(*pti));
+        if (!pti)
+                return NULL;
+        pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
+        if (!pti->shared) {
+                sync_pt_free(&pti->pt);
+                return NULL;
+        }
+        return &pti->pt;
+}
+static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        if (pt)
+                nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
+}
+static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt_inst *pti;
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        pti = (struct gk20a_sync_pt_inst *)
+                sync_pt_create(&pt->obj->obj, sizeof(*pti));
+        if (!pti)
+                return NULL;
+        pti->shared = pt;
+        nvgpu_ref_get(&pt->refcount);
+        return &pti->pt;
+}
+/*
+ * This function must be able to run on the same sync_pt concurrently. This
+ * requires a lock to protect access to the sync_pt's internal data structures
+ * which are modified as a side effect of calling this function.
+ */
+static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        struct gk20a_sync_timeline *obj = pt->obj;
+        bool signaled = true;
+        nvgpu_spinlock_acquire(&pt->lock);
+        if (!pt->sema)
+                goto done;
+        /* Acquired == not realeased yet == active == not signaled. */
+        signaled = !nvgpu_semaphore_is_acquired(pt->sema);
+        if (signaled) {
+                /* Update min if necessary. */
+                if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
+                                                obj->min) == 1)
+                        obj->min = pt->thresh;
+                /* Release the semaphore to the pool. */
+                nvgpu_semaphore_put(pt->sema);
+                pt->sema = NULL;
+        }
+done:
+        nvgpu_spinlock_release(&pt->lock);
+        return signaled;
+}
+static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+        bool a_expired;
+        bool b_expired;
+        struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
+        struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
+        if (WARN_ON(pt_a->obj != pt_b->obj))
+                return 0;
+        /* Early out */
+        if (a == b)
+                return 0;
+        a_expired = gk20a_sync_pt_has_signaled(a);
+        b_expired = gk20a_sync_pt_has_signaled(b);
+        if (a_expired && !b_expired) {
+                /* Easy, a was earlier */
+                return -1;
+        } else if (!a_expired && b_expired) {
+                /* Easy, b was earlier */
+                return 1;
+        }
+        /* Both a and b are expired (trigger before min) or not
+         * expired (trigger after min), so we can use min
+         * as a reference value for __gk20a_sync_pt_compare_ref.
+         */
+        return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
+                        pt_a->thresh, pt_b->thresh);
+}
+static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
+{
+        return obj->min;
+}
+static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
+                char *str, int size)
+{
+        struct gk20a_sync_timeline *obj =
+                (struct gk20a_sync_timeline *)timeline;
+        snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
+}
+static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
+                                             char *str, int size)
+{
+        struct nvgpu_semaphore *s = pt->sema;
+        snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
+                 s->location.pool->page_idx,
+                 nvgpu_semaphore_get_value(s),
+                 nvgpu_semaphore_read(s));
+}
+static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
+                int size)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        if (pt->sema) {
+                gk20a_sync_pt_value_str_for_sema(pt, str, size);
+                return;
+        }
+        snprintf(str, size, "%d", pt->thresh);
+}
+static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
+        .driver_name = "nvgpu_semaphore",
+        .dup = gk20a_sync_pt_dup_inst,
+        .has_signaled = gk20a_sync_pt_has_signaled,
+        .compare = gk20a_sync_pt_compare,
+        .free_pt = gk20a_sync_pt_free_inst,
+        .timeline_value_str = gk20a_sync_timeline_value_str,
+        .pt_value_str = gk20a_sync_pt_value_str,
+};
+/* Public API */
+struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+        struct sync_fence *fence = sync_fence_fdget(fd);
+        int i;
+        if (!fence)
+                return NULL;
+        for (i = 0; i < fence->num_fences; i++) {
+                struct fence *pt = fence->cbs[i].sync_pt;
+                struct sync_pt *spt = sync_pt_from_fence(pt);
+                struct sync_timeline *t;
+                if (spt == NULL) {
+                        sync_fence_put(fence);
+                        return NULL;
+                }
+                t = sync_pt_parent(spt);
+                if (t->ops != &gk20a_sync_timeline_ops) {
+                        sync_fence_put(fence);
+                        return NULL;
+                }
+        }
+        return fence;
+}
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
+        struct nvgpu_semaphore *sema;
+        nvgpu_spinlock_acquire(&pt->lock);
+        sema = pt->sema;
+        if (sema)
+                nvgpu_semaphore_get(sema);
+        nvgpu_spinlock_release(&pt->lock);
+        return sema;
+}
+void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
+{
+        sync_timeline_signal(timeline, 0);
+}
+void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
+{
+        sync_timeline_destroy(timeline);
+}
+struct sync_timeline *gk20a_sync_timeline_create(
+                const char *name)
+{
+        struct gk20a_sync_timeline *obj;
+        obj = (struct gk20a_sync_timeline *)
+                sync_timeline_create(&gk20a_sync_timeline_ops,
+                                     sizeof(struct gk20a_sync_timeline),
+                                     name);
+        if (!obj)
+                return NULL;
+        obj->max = 0;
+        obj->min = 0;
+        return &obj->obj;
+}
+struct sync_fence *gk20a_sync_fence_create(
+                struct channel_gk20a *c,
+                struct nvgpu_semaphore *sema,
+                const char *fmt, ...)
+{
+        char name[30];
+        va_list args;
+        struct sync_pt *pt;
+        struct sync_fence *fence;
+        struct gk20a *g = c->g;
+        struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
+        struct nvgpu_os_fence_framework *fence_framework = NULL;
+        struct gk20a_sync_timeline *timeline = NULL;
+        fence_framework = &os_channel_priv->fence_framework;
+        timeline = to_gk20a_timeline(fence_framework->timeline);
+        pt = gk20a_sync_pt_create_inst(g, timeline, sema);
+        if (pt == NULL)
+                return NULL;
+        va_start(args, fmt);
+        vsnprintf(name, sizeof(name), fmt, args);
+        va_end(args);
+        fence = sync_fence_create(name, pt);
+        if (fence == NULL) {
+                sync_pt_free(pt);
+                return NULL;
+        }
+        return fence;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
new file mode 100644
index 00000000..4fca7bed
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
@@ -0,0 +1,51 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _GK20A_SYNC_H_
+#define _GK20A_SYNC_H_
+struct sync_timeline;
+struct sync_fence;
+struct sync_pt;
+struct nvgpu_semaphore;
+struct fence;
+#ifdef CONFIG_SYNC
+struct sync_timeline *gk20a_sync_timeline_create(const char *name);
+void gk20a_sync_timeline_destroy(struct sync_timeline *);
+void gk20a_sync_timeline_signal(struct sync_timeline *);
+struct sync_fence *gk20a_sync_fence_create(
+                struct channel_gk20a *c,
+                struct nvgpu_semaphore *,
+                const char *fmt, ...);
+struct sync_fence *gk20a_sync_fence_fdget(int fd);
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
+#else
+static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
+static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
+static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+        return NULL;
+}
+static inline struct sync_timeline *gk20a_sync_timeline_create(
+        const char *name) {
+                return NULL;
+}
+#endif
+#endif
author	Debarshi Dutta <ddutta@nvidia.com>	2018-05-10 23:48:48 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2018-05-14 09:07:12 -0400
commit	a51eb9da021c2934e196c5d8be04551703e6bb5b (patch)
tree	843036626eff3733759ab003fc577a1ae2e729ae /drivers/gpu/nvgpu/common
parent	4dac924aba27aa46267fb39f3ed968318292a7f5 (diff)

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index d767374b..1e170b30 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -40,7 +40,7 @@
40	#include <trace/events/gk20a.h>	40	#include <trace/events/gk20a.h>
41	#include <uapi/linux/nvgpu.h>	41	#include <uapi/linux/nvgpu.h>
42		42
43	#include "gk20a/sync_gk20a.h"	43	#include "sync_sema_android.h"
44		44
45	u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)	45	u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
46	{	46	{


diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c index d4aeb6ed..2bb71c99 100644 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c +++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -21,10 +21,11 @@
21	#include <nvgpu/linux/os_fence_android.h>	21	#include <nvgpu/linux/os_fence_android.h>
22	#include <nvgpu/semaphore.h>	22	#include <nvgpu/semaphore.h>
23		23
24	#include "gk20a/sync_gk20a.h"
25	#include "gk20a/channel_sync_gk20a.h"	24	#include "gk20a/channel_sync_gk20a.h"
26	#include "gk20a/mm_gk20a.h"	25	#include "gk20a/mm_gk20a.h"
27		26
		27	#include "sync_sema_android.h"
		28
28	#include "../drivers/staging/android/sync.h"	29	#include "../drivers/staging/android/sync.h"
29		30
30	int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,	31	int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,


diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c index b3712011..d7a72fcd 100644 --- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c +++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -24,7 +24,6 @@
24		24
25	#include "gk20a/gk20a.h"	25	#include "gk20a/gk20a.h"
26	#include "gk20a/channel_gk20a.h"	26	#include "gk20a/channel_gk20a.h"
27	#include "gk20a/sync_gk20a.h"
28	#include "gk20a/channel_sync_gk20a.h"	27	#include "gk20a/channel_sync_gk20a.h"
29	#include "gk20a/mm_gk20a.h"	28	#include "gk20a/mm_gk20a.h"
30		29


diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c new file mode 100644 index 00000000..fad21351 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
@@ -0,0 +1,419 @@
		1	/*
		2	* Semaphore Sync Framework Integration
		3	*
		4	* Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License
		16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		17	*/
		18
		19	#include <linux/file.h>
		20	#include <linux/fs.h>
		21	#include <linux/hrtimer.h>
		22	#include <linux/module.h>
		23	#include <nvgpu/lock.h>
		24
		25	#include <nvgpu/kmem.h>
		26	#include <nvgpu/semaphore.h>
		27	#include <nvgpu/bug.h>
		28	#include <nvgpu/kref.h>
		29	#include "../common/linux/channel.h"
		30
		31	#include "../drivers/staging/android/sync.h"
		32
		33	#include "sync_sema_android.h"
		34
		35	static const struct sync_timeline_ops gk20a_sync_timeline_ops;
		36
		37	struct gk20a_sync_timeline {
		38	struct sync_timeline obj;
		39	u32 max;
		40	u32 min;
		41	};
		42
		43	/**
		44	* The sync framework dups pts when merging fences. We share a single
		45	* refcounted gk20a_sync_pt for each duped pt.
		46	*/
		47	struct gk20a_sync_pt {
		48	struct gk20a *g;
		49	struct nvgpu_ref refcount;
		50	u32 thresh;
		51	struct nvgpu_semaphore *sema;
		52	struct gk20a_sync_timeline *obj;
		53
		54	/*
		55	* Use a spin lock here since it will have better performance
		56	* than a mutex - there should be very little contention on this
		57	* lock.
		58	*/
		59	struct nvgpu_spinlock lock;
		60	};
		61
		62	struct gk20a_sync_pt_inst {
		63	struct sync_pt pt;
		64	struct gk20a_sync_pt *shared;
		65	};
		66
		67	/**
		68	* Compares sync pt values a and b, both of which will trigger either before
		69	* or after ref (i.e. a and b trigger before ref, or a and b trigger after
		70	* ref). Supplying ref allows us to handle wrapping correctly.
		71	*
		72	* Returns -1 if a < b (a triggers before b)
		73	* 0 if a = b (a and b trigger at the same time)
		74	* 1 if a > b (b triggers before a)
		75	*/
		76	static int __gk20a_sync_pt_compare_ref(
		77	u32 ref,
		78	u32 a,
		79	u32 b)
		80	{
		81	/*
		82	* We normalize both a and b by subtracting ref from them.
		83	* Denote the normalized values by a_n and b_n. Note that because
		84	* of wrapping, a_n and/or b_n may be negative.
		85	*
		86	* The normalized values a_n and b_n satisfy:
		87	* - a positive value triggers before a negative value
		88	* - a smaller positive value triggers before a greater positive value
		89	* - a smaller negative value (greater in absolute value) triggers
		90	* before a greater negative value (smaller in absolute value).
		91	*
		92	* Thus we can just stick to unsigned arithmetic and compare
		93	* (u32)a_n to (u32)b_n.
		94	*
		95	* Just to reiterate the possible cases:
		96	*
		97	* 1A) ...ref..a....b....
		98	* 1B) ...ref..b....a....
		99	* 2A) ...b....ref..a.... b_n < 0
		100	* 2B) ...a....ref..b.... a_n > 0
		101	* 3A) ...a....b....ref.. a_n < 0, b_n < 0
		102	* 3A) ...b....a....ref.. a_n < 0, b_n < 0
		103	*/
		104	u32 a_n = a - ref;
		105	u32 b_n = b - ref;
		106	if (a_n < b_n)
		107	return -1;
		108	else if (a_n > b_n)
		109	return 1;
		110	else
		111	return 0;
		112	}
		113
		114	static struct gk20a_sync_pt to_gk20a_sync_pt(struct sync_pt pt)
		115	{
		116	struct gk20a_sync_pt_inst *pti =
		117	container_of(pt, struct gk20a_sync_pt_inst, pt);
		118	return pti->shared;
		119	}
		120	static struct gk20a_sync_timeline to_gk20a_timeline(struct sync_timeline obj)
		121	{
		122	if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
		123	return NULL;
		124	return (struct gk20a_sync_timeline *)obj;
		125	}
		126
		127	static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
		128	{
		129	struct gk20a_sync_pt *pt =
		130	container_of(ref, struct gk20a_sync_pt, refcount);
		131	struct gk20a *g = pt->g;
		132
		133	if (pt->sema)
		134	nvgpu_semaphore_put(pt->sema);
		135	nvgpu_kfree(g, pt);
		136	}
		137
		138	static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
		139	struct gk20a *g,
		140	struct gk20a_sync_timeline *obj,
		141	struct nvgpu_semaphore *sema)
		142	{
		143	struct gk20a_sync_pt *shared;
		144
		145	shared = nvgpu_kzalloc(g, sizeof(*shared));
		146	if (!shared)
		147	return NULL;
		148
		149	nvgpu_ref_init(&shared->refcount);
		150	shared->g = g;
		151	shared->obj = obj;
		152	shared->sema = sema;
		153	shared->thresh = ++obj->max; /* sync framework has a lock */
		154
		155	nvgpu_spinlock_init(&shared->lock);
		156
		157	nvgpu_semaphore_get(sema);
		158
		159	return shared;
		160	}
		161
		162	static struct sync_pt *gk20a_sync_pt_create_inst(
		163	struct gk20a *g,
		164	struct gk20a_sync_timeline *obj,
		165	struct nvgpu_semaphore *sema)
		166	{
		167	struct gk20a_sync_pt_inst *pti;
		168
		169	pti = (struct gk20a_sync_pt_inst *)
		170	sync_pt_create(&obj->obj, sizeof(*pti));
		171	if (!pti)
		172	return NULL;
		173
		174	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
		175	if (!pti->shared) {
		176	sync_pt_free(&pti->pt);
		177	return NULL;
		178	}
		179	return &pti->pt;
		180	}
		181
		182	static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
		183	{
		184	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
		185	if (pt)
		186	nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
		187	}
		188
		189	static struct sync_pt gk20a_sync_pt_dup_inst(struct sync_pt sync_pt)
		190	{
		191	struct gk20a_sync_pt_inst *pti;
		192	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
		193
		194	pti = (struct gk20a_sync_pt_inst *)
		195	sync_pt_create(&pt->obj->obj, sizeof(*pti));
		196	if (!pti)
		197	return NULL;
		198	pti->shared = pt;
		199	nvgpu_ref_get(&pt->refcount);
		200	return &pti->pt;
		201	}
		202
		203	/*
		204	* This function must be able to run on the same sync_pt concurrently. This
		205	* requires a lock to protect access to the sync_pt's internal data structures
		206	* which are modified as a side effect of calling this function.
		207	*/
		208	static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
		209	{
		210	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
		211	struct gk20a_sync_timeline *obj = pt->obj;
		212	bool signaled = true;
		213
		214	nvgpu_spinlock_acquire(&pt->lock);
		215	if (!pt->sema)
		216	goto done;
		217
		218	/* Acquired == not realeased yet == active == not signaled. */
		219	signaled = !nvgpu_semaphore_is_acquired(pt->sema);
		220
		221	if (signaled) {
		222	/* Update min if necessary. */
		223	if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
		224	obj->min) == 1)
		225	obj->min = pt->thresh;
		226
		227	/* Release the semaphore to the pool. */
		228	nvgpu_semaphore_put(pt->sema);
		229	pt->sema = NULL;
		230	}
		231	done:
		232	nvgpu_spinlock_release(&pt->lock);
		233
		234	return signaled;
		235	}
		236
		237	static int gk20a_sync_pt_compare(struct sync_pt a, struct sync_pt b)
		238	{
		239	bool a_expired;
		240	bool b_expired;
		241	struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
		242	struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
		243
		244	if (WARN_ON(pt_a->obj != pt_b->obj))
		245	return 0;
		246
		247	/* Early out */
		248	if (a == b)
		249	return 0;
		250
		251	a_expired = gk20a_sync_pt_has_signaled(a);
		252	b_expired = gk20a_sync_pt_has_signaled(b);
		253	if (a_expired && !b_expired) {
		254	/* Easy, a was earlier */
		255	return -1;
		256	} else if (!a_expired && b_expired) {
		257	/* Easy, b was earlier */
		258	return 1;
		259	}
		260
		261	/* Both a and b are expired (trigger before min) or not
		262	* expired (trigger after min), so we can use min
		263	* as a reference value for __gk20a_sync_pt_compare_ref.
		264	*/
		265	return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
		266	pt_a->thresh, pt_b->thresh);
		267	}
		268
		269	static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
		270	{
		271	return obj->min;
		272	}
		273
		274	static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
		275	char *str, int size)
		276	{
		277	struct gk20a_sync_timeline *obj =
		278	(struct gk20a_sync_timeline *)timeline;
		279	snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
		280	}
		281
		282	static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
		283	char *str, int size)
		284	{
		285	struct nvgpu_semaphore *s = pt->sema;
		286
		287	snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
		288	s->location.pool->page_idx,
		289	nvgpu_semaphore_get_value(s),
		290	nvgpu_semaphore_read(s));
		291	}
		292
		293	static void gk20a_sync_pt_value_str(struct sync_pt sync_pt, char str,
		294	int size)
		295	{
		296	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
		297
		298	if (pt->sema) {
		299	gk20a_sync_pt_value_str_for_sema(pt, str, size);
		300	return;
		301	}
		302
		303	snprintf(str, size, "%d", pt->thresh);
		304	}
		305
		306	static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
		307	.driver_name = "nvgpu_semaphore",
		308	.dup = gk20a_sync_pt_dup_inst,
		309	.has_signaled = gk20a_sync_pt_has_signaled,
		310	.compare = gk20a_sync_pt_compare,
		311	.free_pt = gk20a_sync_pt_free_inst,
		312	.timeline_value_str = gk20a_sync_timeline_value_str,
		313	.pt_value_str = gk20a_sync_pt_value_str,
		314	};
		315
		316	/* Public API */
		317
		318	struct sync_fence *gk20a_sync_fence_fdget(int fd)
		319	{
		320	struct sync_fence *fence = sync_fence_fdget(fd);
		321	int i;
		322
		323	if (!fence)
		324	return NULL;
		325
		326	for (i = 0; i < fence->num_fences; i++) {
		327	struct fence *pt = fence->cbs[i].sync_pt;
		328	struct sync_pt *spt = sync_pt_from_fence(pt);
		329	struct sync_timeline *t;
		330
		331	if (spt == NULL) {
		332	sync_fence_put(fence);
		333	return NULL;
		334	}
		335
		336	t = sync_pt_parent(spt);
		337	if (t->ops != &gk20a_sync_timeline_ops) {
		338	sync_fence_put(fence);
		339	return NULL;
		340	}
		341	}
		342
		343	return fence;
		344	}
		345
		346	struct nvgpu_semaphore gk20a_sync_pt_sema(struct sync_pt spt)
		347	{
		348	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
		349	struct nvgpu_semaphore *sema;
		350
		351	nvgpu_spinlock_acquire(&pt->lock);
		352	sema = pt->sema;
		353	if (sema)
		354	nvgpu_semaphore_get(sema);
		355	nvgpu_spinlock_release(&pt->lock);
		356
		357	return sema;
		358	}
		359
		360	void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
		361	{
		362	sync_timeline_signal(timeline, 0);
		363	}
		364
		365	void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
		366	{
		367	sync_timeline_destroy(timeline);
		368	}
		369
		370	struct sync_timeline *gk20a_sync_timeline_create(
		371	const char *name)
		372	{
		373	struct gk20a_sync_timeline *obj;
		374
		375	obj = (struct gk20a_sync_timeline *)
		376	sync_timeline_create(&gk20a_sync_timeline_ops,
		377	sizeof(struct gk20a_sync_timeline),
		378	name);
		379	if (!obj)
		380	return NULL;
		381	obj->max = 0;
		382	obj->min = 0;
		383	return &obj->obj;
		384	}
		385
		386	struct sync_fence *gk20a_sync_fence_create(
		387	struct channel_gk20a *c,
		388	struct nvgpu_semaphore *sema,
		389	const char *fmt, ...)
		390	{
		391	char name[30];
		392	va_list args;
		393	struct sync_pt *pt;
		394	struct sync_fence *fence;
		395	struct gk20a *g = c->g;
		396
		397	struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
		398	struct nvgpu_os_fence_framework *fence_framework = NULL;
		399	struct gk20a_sync_timeline *timeline = NULL;
		400
		401	fence_framework = &os_channel_priv->fence_framework;
		402
		403	timeline = to_gk20a_timeline(fence_framework->timeline);
		404
		405	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
		406	if (pt == NULL)
		407	return NULL;
		408
		409	va_start(args, fmt);
		410	vsnprintf(name, sizeof(name), fmt, args);
		411	va_end(args);
		412
		413	fence = sync_fence_create(name, pt);
		414	if (fence == NULL) {
		415	sync_pt_free(pt);
		416	return NULL;
		417	}
		418	return fence;
		419	}


diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h new file mode 100644 index 00000000..4fca7bed --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
@@ -0,0 +1,51 @@
		1	/*
		2	* Semaphore Sync Framework Integration
		3	*
		4	* Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
		5	*
		6	* This program is free software; you can redistribute it and/or modify it
		7	* under the terms and conditions of the GNU General Public License,
		8	* version 2, as published by the Free Software Foundation.
		9	*
		10	* This program is distributed in the hope it will be useful, but WITHOUT
		11	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
		13	* more details.
		14	*
		15	* You should have received a copy of the GNU General Public License
		16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
		17	*/
		18
		19	#ifndef _GK20A_SYNC_H_
		20	#define _GK20A_SYNC_H_
		21
		22	struct sync_timeline;
		23	struct sync_fence;
		24	struct sync_pt;
		25	struct nvgpu_semaphore;
		26	struct fence;
		27
		28	#ifdef CONFIG_SYNC
		29	struct sync_timeline gk20a_sync_timeline_create(const char name);
		30	void gk20a_sync_timeline_destroy(struct sync_timeline *);
		31	void gk20a_sync_timeline_signal(struct sync_timeline *);
		32	struct sync_fence *gk20a_sync_fence_create(
		33	struct channel_gk20a *c,
		34	struct nvgpu_semaphore *,
		35	const char *fmt, ...);
		36	struct sync_fence *gk20a_sync_fence_fdget(int fd);
		37	struct nvgpu_semaphore gk20a_sync_pt_sema(struct sync_pt spt);
		38	#else
		39	static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
		40	static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
		41	static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
		42	{
		43	return NULL;
		44	}
		45	static inline struct sync_timeline *gk20a_sync_timeline_create(
		46	const char *name) {
		47	return NULL;
		48	}
		49	#endif
		50
		51	#endif