1 files changed, 436 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
new file mode 100644
index 00000000..277b3334
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -0,0 +1,436 @@
+/*
+ * GK20A Sync Framework Integration
+ *
+ * Copyright (c) 2014-2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <nvgpu/lock.h>
+#include <uapi/linux/nvgpu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include "../drivers/staging/android/sync.h"
+#include "sync_gk20a.h"
+static const struct sync_timeline_ops gk20a_sync_timeline_ops;
+struct gk20a_sync_timeline {
+        struct sync_timeline            obj;
+        u32                             max;
+        u32                             min;
+};
+/**
+ * The sync framework dups pts when merging fences. We share a single
+ * refcounted gk20a_sync_pt for each duped pt.
+ */
+struct gk20a_sync_pt {
+        struct gk20a                    *g;
+        struct nvgpu_ref                        refcount;
+        u32                             thresh;
+        struct nvgpu_semaphore          *sema;
+        struct gk20a_sync_timeline      *obj;
+        /*
+         * Use a spin lock here since it will have better performance
+         * than a mutex - there should be very little contention on this
+         * lock.
+         */
+        struct nvgpu_spinlock                   lock;
+};
+struct gk20a_sync_pt_inst {
+        struct sync_pt                  pt;
+        struct gk20a_sync_pt            *shared;
+};
+/**
+ * Check if the passed sync_fence is backed by a single GPU semaphore. In such
+ * cases we can short circuit a lot of SW involved in signaling pre-fences and
+ * post fences.
+ *
+ * For now reject multi-sync_pt fences. This could be changed in future. It
+ * would require that the sema fast path push a sema acquire for each semaphore
+ * in the fence.
+ */
+int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
+{
+        struct sync_timeline *t;
+        struct fence *pt = fence->cbs[0].sync_pt;
+        struct sync_pt *spt = sync_pt_from_fence(pt);
+        if (fence->num_fences != 1)
+                return 0;
+        if (spt == NULL)
+                return 0;
+        t = sync_pt_parent(spt);
+        if (t->ops == &gk20a_sync_timeline_ops)
+                return 1;
+        return 0;
+}
+struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
+{
+        struct sync_pt *spt;
+        struct gk20a_sync_pt_inst *pti;
+        struct fence *pt;
+        if (!f)
+                return NULL;
+        if (!gk20a_is_sema_backed_sync_fence(f))
+                return NULL;
+        pt = f->cbs[0].sync_pt;
+        spt = sync_pt_from_fence(pt);
+        pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
+        return pti->shared->sema;
+}
+/**
+ * Compares sync pt values a and b, both of which will trigger either before
+ * or after ref (i.e. a and b trigger before ref, or a and b trigger after
+ * ref). Supplying ref allows us to handle wrapping correctly.
+ *
+ * Returns -1 if a < b (a triggers before b)
+ *          0 if a = b (a and b trigger at the same time)
+ *          1 if a > b (b triggers before a)
+ */
+static int __gk20a_sync_pt_compare_ref(
+        u32 ref,
+        u32 a,
+        u32 b)
+{
+        /*
+         * We normalize both a and b by subtracting ref from them.
+         * Denote the normalized values by a_n and b_n. Note that because
+         * of wrapping, a_n and/or b_n may be negative.
+         *
+         * The normalized values a_n and b_n satisfy:
+         * - a positive value triggers before a negative value
+         * - a smaller positive value triggers before a greater positive value
+         * - a smaller negative value (greater in absolute value) triggers
+         *   before a greater negative value (smaller in absolute value).
+         *
+         * Thus we can just stick to unsigned arithmetic and compare
+         * (u32)a_n to (u32)b_n.
+         *
+         * Just to reiterate the possible cases:
+         *
+         *      1A) ...ref..a....b....
+         *      1B) ...ref..b....a....
+         *      2A) ...b....ref..a....              b_n < 0
+         *      2B) ...a....ref..b....     a_n > 0
+         *      3A) ...a....b....ref..     a_n < 0, b_n < 0
+         *      3A) ...b....a....ref..     a_n < 0, b_n < 0
+         */
+        u32 a_n = a - ref;
+        u32 b_n = b - ref;
+        if (a_n < b_n)
+                return -1;
+        else if (a_n > b_n)
+                return 1;
+        else
+                return 0;
+}
+static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
+{
+        struct gk20a_sync_pt_inst *pti =
+                        container_of(pt, struct gk20a_sync_pt_inst, pt);
+        return pti->shared;
+}
+static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
+{
+        if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
+                return NULL;
+        return (struct gk20a_sync_timeline *)obj;
+}
+static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
+{
+        struct gk20a_sync_pt *pt =
+                container_of(ref, struct gk20a_sync_pt, refcount);
+        struct gk20a *g = pt->g;
+        if (pt->sema)
+                nvgpu_semaphore_put(pt->sema);
+        nvgpu_kfree(g, pt);
+}
+static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
+                struct gk20a *g,
+                struct gk20a_sync_timeline *obj,
+                struct nvgpu_semaphore *sema)
+{
+        struct gk20a_sync_pt *shared;
+        shared = nvgpu_kzalloc(g, sizeof(*shared));
+        if (!shared)
+                return NULL;
+        nvgpu_ref_init(&shared->refcount);
+        shared->g = g;
+        shared->obj = obj;
+        shared->sema = sema;
+        shared->thresh = ++obj->max; /* sync framework has a lock */
+        nvgpu_spinlock_init(&shared->lock);
+        nvgpu_semaphore_get(sema);
+        return shared;
+}
+static struct sync_pt *gk20a_sync_pt_create_inst(
+                struct gk20a *g,
+                struct gk20a_sync_timeline *obj,
+                struct nvgpu_semaphore *sema)
+{
+        struct gk20a_sync_pt_inst *pti;
+        pti = (struct gk20a_sync_pt_inst *)
+                sync_pt_create(&obj->obj, sizeof(*pti));
+        if (!pti)
+                return NULL;
+        pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
+        if (!pti->shared) {
+                sync_pt_free(&pti->pt);
+                return NULL;
+        }
+        return &pti->pt;
+}
+static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        if (pt)
+                nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
+}
+static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt_inst *pti;
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        pti = (struct gk20a_sync_pt_inst *)
+                sync_pt_create(&pt->obj->obj, sizeof(*pti));
+        if (!pti)
+                return NULL;
+        pti->shared = pt;
+        nvgpu_ref_get(&pt->refcount);
+        return &pti->pt;
+}
+/*
+ * This function must be able to run on the same sync_pt concurrently. This
+ * requires a lock to protect access to the sync_pt's internal data structures
+ * which are modified as a side effect of calling this function.
+ */
+static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        struct gk20a_sync_timeline *obj = pt->obj;
+        bool signaled = true;
+        nvgpu_spinlock_acquire(&pt->lock);
+        if (!pt->sema)
+                goto done;
+        /* Acquired == not realeased yet == active == not signaled. */
+        signaled = !nvgpu_semaphore_is_acquired(pt->sema);
+        if (signaled) {
+                /* Update min if necessary. */
+                if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
+                                                obj->min) == 1)
+                        obj->min = pt->thresh;
+                /* Release the semaphore to the pool. */
+                nvgpu_semaphore_put(pt->sema);
+                pt->sema = NULL;
+        }
+done:
+        nvgpu_spinlock_release(&pt->lock);
+        return signaled;
+}
+static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+        bool a_expired;
+        bool b_expired;
+        struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
+        struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
+        if (WARN_ON(pt_a->obj != pt_b->obj))
+                return 0;
+        /* Early out */
+        if (a == b)
+                return 0;
+        a_expired = gk20a_sync_pt_has_signaled(a);
+        b_expired = gk20a_sync_pt_has_signaled(b);
+        if (a_expired && !b_expired) {
+                /* Easy, a was earlier */
+                return -1;
+        } else if (!a_expired && b_expired) {
+                /* Easy, b was earlier */
+                return 1;
+        }
+        /* Both a and b are expired (trigger before min) or not
+         * expired (trigger after min), so we can use min
+         * as a reference value for __gk20a_sync_pt_compare_ref.
+         */
+        return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
+                        pt_a->thresh, pt_b->thresh);
+}
+static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
+{
+        return obj->min;
+}
+static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
+                char *str, int size)
+{
+        struct gk20a_sync_timeline *obj =
+                (struct gk20a_sync_timeline *)timeline;
+        snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
+}
+static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
+                                             char *str, int size)
+{
+        struct nvgpu_semaphore *s = pt->sema;
+        snprintf(str, size, "S: c=%d [v=%u,r_v=%u]",
+                 s->hw_sema->ch->chid,
+                 nvgpu_semaphore_get_value(s),
+                 nvgpu_semaphore_read(s));
+}
+static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
+                int size)
+{
+        struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+        if (pt->sema) {
+                gk20a_sync_pt_value_str_for_sema(pt, str, size);
+                return;
+        }
+        snprintf(str, size, "%d", pt->thresh);
+}
+static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
+        .driver_name = "nvgpu_semaphore",
+        .dup = gk20a_sync_pt_dup_inst,
+        .has_signaled = gk20a_sync_pt_has_signaled,
+        .compare = gk20a_sync_pt_compare,
+        .free_pt = gk20a_sync_pt_free_inst,
+        .timeline_value_str = gk20a_sync_timeline_value_str,
+        .pt_value_str = gk20a_sync_pt_value_str,
+};
+/* Public API */
+struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+        return sync_fence_fdget(fd);
+}
+void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
+{
+        sync_timeline_signal(timeline, 0);
+}
+void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
+{
+        sync_timeline_destroy(timeline);
+}
+struct sync_timeline *gk20a_sync_timeline_create(
+                const char *fmt, ...)
+{
+        struct gk20a_sync_timeline *obj;
+        char name[30];
+        va_list args;
+        va_start(args, fmt);
+        vsnprintf(name, sizeof(name), fmt, args);
+        va_end(args);
+        obj = (struct gk20a_sync_timeline *)
+                sync_timeline_create(&gk20a_sync_timeline_ops,
+                                     sizeof(struct gk20a_sync_timeline),
+                                     name);
+        if (!obj)
+                return NULL;
+        obj->max = 0;
+        obj->min = 0;
+        return &obj->obj;
+}
+struct sync_fence *gk20a_sync_fence_create(
+                struct gk20a *g,
+                struct sync_timeline *obj,
+                struct nvgpu_semaphore *sema,
+                const char *fmt, ...)
+{
+        char name[30];
+        va_list args;
+        struct sync_pt *pt;
+        struct sync_fence *fence;
+        struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
+        pt = gk20a_sync_pt_create_inst(g, timeline, sema);
+        if (pt == NULL)
+                return NULL;
+        va_start(args, fmt);
+        vsnprintf(name, sizeof(name), fmt, args);
+        va_end(args);
+        fence = sync_fence_create(name, pt);
+        if (fence == NULL) {
+                sync_pt_free(pt);
+                return NULL;
+        }
+        return fence;
+}

diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c new file mode 100644 index 00000000..277b3334 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -0,0 +1,436 @@
	1	/*
	2	* GK20A Sync Framework Integration
	3	*
	4	* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a
	7	* copy of this software and associated documentation files (the "Software"),
	8	* to deal in the Software without restriction, including without limitation
	9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	10	* and/or sell copies of the Software, and to permit persons to whom the
	11	* Software is furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	22	* DEALINGS IN THE SOFTWARE.
	23	*/
	24
	25	#include <linux/file.h>
	26	#include <linux/fs.h>
	27	#include <linux/hrtimer.h>
	28	#include <linux/module.h>
	29	#include <nvgpu/lock.h>
	30	#include <uapi/linux/nvgpu.h>
	31
	32	#include <nvgpu/kmem.h>
	33	#include <nvgpu/semaphore.h>
	34	#include <nvgpu/bug.h>
	35	#include <nvgpu/kref.h>
	36
	37	#include "../drivers/staging/android/sync.h"
	38
	39	#include "sync_gk20a.h"
	40
	41	static const struct sync_timeline_ops gk20a_sync_timeline_ops;
	42
	43	struct gk20a_sync_timeline {
	44	struct sync_timeline obj;
	45	u32 max;
	46	u32 min;
	47	};
	48
	49	/**
	50	* The sync framework dups pts when merging fences. We share a single
	51	* refcounted gk20a_sync_pt for each duped pt.
	52	*/
	53	struct gk20a_sync_pt {
	54	struct gk20a *g;
	55	struct nvgpu_ref refcount;
	56	u32 thresh;
	57	struct nvgpu_semaphore *sema;
	58	struct gk20a_sync_timeline *obj;
	59
	60	/*
	61	* Use a spin lock here since it will have better performance
	62	* than a mutex - there should be very little contention on this
	63	* lock.
	64	*/
	65	struct nvgpu_spinlock lock;
	66	};
	67
	68	struct gk20a_sync_pt_inst {
	69	struct sync_pt pt;
	70	struct gk20a_sync_pt *shared;
	71	};
	72
	73	/**
	74	* Check if the passed sync_fence is backed by a single GPU semaphore. In such
	75	* cases we can short circuit a lot of SW involved in signaling pre-fences and
	76	* post fences.
	77	*
	78	* For now reject multi-sync_pt fences. This could be changed in future. It
	79	* would require that the sema fast path push a sema acquire for each semaphore
	80	* in the fence.
	81	*/
	82	int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
	83	{
	84	struct sync_timeline *t;
	85
	86	struct fence *pt = fence->cbs[0].sync_pt;
	87	struct sync_pt *spt = sync_pt_from_fence(pt);
	88
	89	if (fence->num_fences != 1)
	90	return 0;
	91
	92	if (spt == NULL)
	93	return 0;
	94
	95	t = sync_pt_parent(spt);
	96
	97	if (t->ops == &gk20a_sync_timeline_ops)
	98	return 1;
	99	return 0;
	100	}
	101
	102	struct nvgpu_semaphore gk20a_sync_fence_get_sema(struct sync_fence f)
	103	{
	104	struct sync_pt *spt;
	105	struct gk20a_sync_pt_inst *pti;
	106
	107	struct fence *pt;
	108
	109	if (!f)
	110	return NULL;
	111
	112	if (!gk20a_is_sema_backed_sync_fence(f))
	113	return NULL;
	114
	115	pt = f->cbs[0].sync_pt;
	116	spt = sync_pt_from_fence(pt);
	117	pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
	118
	119	return pti->shared->sema;
	120	}
	121
	122	/**
	123	* Compares sync pt values a and b, both of which will trigger either before
	124	* or after ref (i.e. a and b trigger before ref, or a and b trigger after
	125	* ref). Supplying ref allows us to handle wrapping correctly.
	126	*
	127	* Returns -1 if a < b (a triggers before b)
	128	* 0 if a = b (a and b trigger at the same time)
	129	* 1 if a > b (b triggers before a)
	130	*/
	131	static int __gk20a_sync_pt_compare_ref(
	132	u32 ref,
	133	u32 a,
	134	u32 b)
	135	{
	136	/*
	137	* We normalize both a and b by subtracting ref from them.
	138	* Denote the normalized values by a_n and b_n. Note that because
	139	* of wrapping, a_n and/or b_n may be negative.
	140	*
	141	* The normalized values a_n and b_n satisfy:
	142	* - a positive value triggers before a negative value
	143	* - a smaller positive value triggers before a greater positive value
	144	* - a smaller negative value (greater in absolute value) triggers
	145	* before a greater negative value (smaller in absolute value).
	146	*
	147	* Thus we can just stick to unsigned arithmetic and compare
	148	* (u32)a_n to (u32)b_n.
	149	*
	150	* Just to reiterate the possible cases:
	151	*
	152	* 1A) ...ref..a....b....
	153	* 1B) ...ref..b....a....
	154	* 2A) ...b....ref..a.... b_n < 0
	155	* 2B) ...a....ref..b.... a_n > 0
	156	* 3A) ...a....b....ref.. a_n < 0, b_n < 0
	157	* 3A) ...b....a....ref.. a_n < 0, b_n < 0
	158	*/
	159	u32 a_n = a - ref;
	160	u32 b_n = b - ref;
	161	if (a_n < b_n)
	162	return -1;
	163	else if (a_n > b_n)
	164	return 1;
	165	else
	166	return 0;
	167	}
	168
	169	static struct gk20a_sync_pt to_gk20a_sync_pt(struct sync_pt pt)
	170	{
	171	struct gk20a_sync_pt_inst *pti =
	172	container_of(pt, struct gk20a_sync_pt_inst, pt);
	173	return pti->shared;
	174	}
	175	static struct gk20a_sync_timeline to_gk20a_timeline(struct sync_timeline obj)
	176	{
	177	if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
	178	return NULL;
	179	return (struct gk20a_sync_timeline *)obj;
	180	}
	181
	182	static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
	183	{
	184	struct gk20a_sync_pt *pt =
	185	container_of(ref, struct gk20a_sync_pt, refcount);
	186	struct gk20a *g = pt->g;
	187
	188	if (pt->sema)
	189	nvgpu_semaphore_put(pt->sema);
	190	nvgpu_kfree(g, pt);
	191	}
	192
	193	static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
	194	struct gk20a *g,
	195	struct gk20a_sync_timeline *obj,
	196	struct nvgpu_semaphore *sema)
	197	{
	198	struct gk20a_sync_pt *shared;
	199
	200	shared = nvgpu_kzalloc(g, sizeof(*shared));
	201	if (!shared)
	202	return NULL;
	203
	204	nvgpu_ref_init(&shared->refcount);
	205	shared->g = g;
	206	shared->obj = obj;
	207	shared->sema = sema;
	208	shared->thresh = ++obj->max; /* sync framework has a lock */
	209
	210	nvgpu_spinlock_init(&shared->lock);
	211
	212	nvgpu_semaphore_get(sema);
	213
	214	return shared;
	215	}
	216
	217	static struct sync_pt *gk20a_sync_pt_create_inst(
	218	struct gk20a *g,
	219	struct gk20a_sync_timeline *obj,
	220	struct nvgpu_semaphore *sema)
	221	{
	222	struct gk20a_sync_pt_inst *pti;
	223
	224	pti = (struct gk20a_sync_pt_inst *)
	225	sync_pt_create(&obj->obj, sizeof(*pti));
	226	if (!pti)
	227	return NULL;
	228
	229	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
	230	if (!pti->shared) {
	231	sync_pt_free(&pti->pt);
	232	return NULL;
	233	}
	234	return &pti->pt;
	235	}
	236
	237	static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
	238	{
	239	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
	240	if (pt)
	241	nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
	242	}
	243
	244	static struct sync_pt gk20a_sync_pt_dup_inst(struct sync_pt sync_pt)
	245	{
	246	struct gk20a_sync_pt_inst *pti;
	247	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
	248
	249	pti = (struct gk20a_sync_pt_inst *)
	250	sync_pt_create(&pt->obj->obj, sizeof(*pti));
	251	if (!pti)
	252	return NULL;
	253	pti->shared = pt;
	254	nvgpu_ref_get(&pt->refcount);
	255	return &pti->pt;
	256	}
	257
	258	/*
	259	* This function must be able to run on the same sync_pt concurrently. This
	260	* requires a lock to protect access to the sync_pt's internal data structures
	261	* which are modified as a side effect of calling this function.
	262	*/
	263	static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
	264	{
	265	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
	266	struct gk20a_sync_timeline *obj = pt->obj;
	267	bool signaled = true;
	268
	269	nvgpu_spinlock_acquire(&pt->lock);
	270	if (!pt->sema)
	271	goto done;
	272
	273	/* Acquired == not realeased yet == active == not signaled. */
	274	signaled = !nvgpu_semaphore_is_acquired(pt->sema);
	275
	276	if (signaled) {
	277	/* Update min if necessary. */
	278	if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
	279	obj->min) == 1)
	280	obj->min = pt->thresh;
	281
	282	/* Release the semaphore to the pool. */
	283	nvgpu_semaphore_put(pt->sema);
	284	pt->sema = NULL;
	285	}
	286	done:
	287	nvgpu_spinlock_release(&pt->lock);
	288
	289	return signaled;
	290	}
	291
	292	static int gk20a_sync_pt_compare(struct sync_pt a, struct sync_pt b)
	293	{
	294	bool a_expired;
	295	bool b_expired;
	296	struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
	297	struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
	298
	299	if (WARN_ON(pt_a->obj != pt_b->obj))
	300	return 0;
	301
	302	/* Early out */
	303	if (a == b)
	304	return 0;
	305
	306	a_expired = gk20a_sync_pt_has_signaled(a);
	307	b_expired = gk20a_sync_pt_has_signaled(b);
	308	if (a_expired && !b_expired) {
	309	/* Easy, a was earlier */
	310	return -1;
	311	} else if (!a_expired && b_expired) {
	312	/* Easy, b was earlier */
	313	return 1;
	314	}
	315
	316	/* Both a and b are expired (trigger before min) or not
	317	* expired (trigger after min), so we can use min
	318	* as a reference value for __gk20a_sync_pt_compare_ref.
	319	*/
	320	return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
	321	pt_a->thresh, pt_b->thresh);
	322	}
	323
	324	static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
	325	{
	326	return obj->min;
	327	}
	328
	329	static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
	330	char *str, int size)
	331	{
	332	struct gk20a_sync_timeline *obj =
	333	(struct gk20a_sync_timeline *)timeline;
	334	snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
	335	}
	336
	337	static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
	338	char *str, int size)
	339	{
	340	struct nvgpu_semaphore *s = pt->sema;
	341
	342	snprintf(str, size, "S: c=%d [v=%u,r_v=%u]",
	343	s->hw_sema->ch->chid,
	344	nvgpu_semaphore_get_value(s),
	345	nvgpu_semaphore_read(s));
	346	}
	347
	348	static void gk20a_sync_pt_value_str(struct sync_pt sync_pt, char str,
	349	int size)
	350	{
	351	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
	352
	353	if (pt->sema) {
	354	gk20a_sync_pt_value_str_for_sema(pt, str, size);
	355	return;
	356	}
	357
	358	snprintf(str, size, "%d", pt->thresh);
	359	}
	360
	361	static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
	362	.driver_name = "nvgpu_semaphore",
	363	.dup = gk20a_sync_pt_dup_inst,
	364	.has_signaled = gk20a_sync_pt_has_signaled,
	365	.compare = gk20a_sync_pt_compare,
	366	.free_pt = gk20a_sync_pt_free_inst,
	367	.timeline_value_str = gk20a_sync_timeline_value_str,
	368	.pt_value_str = gk20a_sync_pt_value_str,
	369	};
	370
	371	/* Public API */
	372
	373	struct sync_fence *gk20a_sync_fence_fdget(int fd)
	374	{
	375	return sync_fence_fdget(fd);
	376	}
	377
	378	void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
	379	{
	380	sync_timeline_signal(timeline, 0);
	381	}
	382
	383	void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
	384	{
	385	sync_timeline_destroy(timeline);
	386	}
	387
	388	struct sync_timeline *gk20a_sync_timeline_create(
	389	const char *fmt, ...)
	390	{
	391	struct gk20a_sync_timeline *obj;
	392	char name[30];
	393	va_list args;
	394
	395	va_start(args, fmt);
	396	vsnprintf(name, sizeof(name), fmt, args);
	397	va_end(args);
	398
	399	obj = (struct gk20a_sync_timeline *)
	400	sync_timeline_create(&gk20a_sync_timeline_ops,
	401	sizeof(struct gk20a_sync_timeline),
	402	name);
	403	if (!obj)
	404	return NULL;
	405	obj->max = 0;
	406	obj->min = 0;
	407	return &obj->obj;
	408	}
	409
	410	struct sync_fence *gk20a_sync_fence_create(
	411	struct gk20a *g,
	412	struct sync_timeline *obj,
	413	struct nvgpu_semaphore *sema,
	414	const char *fmt, ...)
	415	{
	416	char name[30];
	417	va_list args;
	418	struct sync_pt *pt;
	419	struct sync_fence *fence;
	420	struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
	421
	422	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
	423	if (pt == NULL)
	424	return NULL;
	425
	426	va_start(args, fmt);
	427	vsnprintf(name, sizeof(name), fmt, args);
	428	va_end(args);
	429
	430	fence = sync_fence_create(name, pt);
	431	if (fence == NULL) {
	432	sync_pt_free(pt);
	433	return NULL;
	434	}
	435	return fence;
	436	}