From a51eb9da021c2934e196c5d8be04551703e6bb5b Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Fri, 11 May 2018 09:18:48 +0530
Subject: gpu: nvgpu: move sync_gk20a under common/linux directory

sync_gk20a.* files are no longer used by core code and only invoked
from linux specific implementations of the OS_FENCE framework which are
under the common/linux directory. Hence, sync_gk20a.* files are also
moved under common/linux.

JIRA NVGPU-66

Change-Id: If623524611373d2da39b63cfb3c1e40089bf8d22
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1712900
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/channel.c           |   2 +-
 .../gpu/nvgpu/common/linux/os_fence_android_sema.c |   3 +-
 .../nvgpu/common/linux/os_fence_android_syncpt.c   |   1 -
 drivers/gpu/nvgpu/common/linux/sync_sema_android.c | 419 +++++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/sync_sema_android.h |  51 +++
 5 files changed, 473 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.c
 create mode 100644 drivers/gpu/nvgpu/common/linux/sync_sema_android.h

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index d767374b..1e170b30 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -40,7 +40,7 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
 
-#include "gk20a/sync_gk20a.h"
+#include "sync_sema_android.h"
 
 u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
 {
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
index d4aeb6ed..2bb71c99 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_sema.c
@@ -21,10 +21,11 @@
 #include <nvgpu/linux/os_fence_android.h>
 #include <nvgpu/semaphore.h>
 
-#include "gk20a/sync_gk20a.h"
 #include "gk20a/channel_sync_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
+#include "sync_sema_android.h"
+
 #include "../drivers/staging/android/sync.h"
 
 int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
index b3712011..d7a72fcd 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -24,7 +24,6 @@
 
 #include "gk20a/gk20a.h"
 #include "gk20a/channel_gk20a.h"
-#include "gk20a/sync_gk20a.h"
 #include "gk20a/channel_sync_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.c b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
new file mode 100644
index 00000000..fad21351
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.c
@@ -0,0 +1,419 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <nvgpu/lock.h>
+
+#include <nvgpu/kmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/kref.h>
+#include "../common/linux/channel.h"
+
+#include "../drivers/staging/android/sync.h"
+
+#include "sync_sema_android.h"
+
+static const struct sync_timeline_ops gk20a_sync_timeline_ops;
+
+struct gk20a_sync_timeline {
+	struct sync_timeline		obj;
+	u32				max;
+	u32				min;
+};
+
+/**
+ * The sync framework dups pts when merging fences. We share a single
+ * refcounted gk20a_sync_pt for each duped pt.
+ */
+struct gk20a_sync_pt {
+	struct gk20a			*g;
+	struct nvgpu_ref			refcount;
+	u32				thresh;
+	struct nvgpu_semaphore		*sema;
+	struct gk20a_sync_timeline	*obj;
+
+	/*
+	 * Use a spin lock here since it will have better performance
+	 * than a mutex - there should be very little contention on this
+	 * lock.
+	 */
+	struct nvgpu_spinlock			lock;
+};
+
+struct gk20a_sync_pt_inst {
+	struct sync_pt			pt;
+	struct gk20a_sync_pt		*shared;
+};
+
+/**
+ * Compares sync pt values a and b, both of which will trigger either before
+ * or after ref (i.e. a and b trigger before ref, or a and b trigger after
+ * ref). Supplying ref allows us to handle wrapping correctly.
+ *
+ * Returns -1 if a < b (a triggers before b)
+ *	    0 if a = b (a and b trigger at the same time)
+ *	    1 if a > b (b triggers before a)
+ */
+static int __gk20a_sync_pt_compare_ref(
+	u32 ref,
+	u32 a,
+	u32 b)
+{
+	/*
+	 * We normalize both a and b by subtracting ref from them.
+	 * Denote the normalized values by a_n and b_n. Note that because
+	 * of wrapping, a_n and/or b_n may be negative.
+	 *
+	 * The normalized values a_n and b_n satisfy:
+	 * - a positive value triggers before a negative value
+	 * - a smaller positive value triggers before a greater positive value
+	 * - a smaller negative value (greater in absolute value) triggers
+	 *   before a greater negative value (smaller in absolute value).
+	 *
+	 * Thus we can just stick to unsigned arithmetic and compare
+	 * (u32)a_n to (u32)b_n.
+	 *
+	 * Just to reiterate the possible cases:
+	 *
+	 *	1A) ...ref..a....b....
+	 *	1B) ...ref..b....a....
+	 *	2A) ...b....ref..a....              b_n < 0
+	 *	2B) ...a....ref..b....     a_n > 0
+	 *	3A) ...a....b....ref..     a_n < 0, b_n < 0
+	 *	3A) ...b....a....ref..     a_n < 0, b_n < 0
+	 */
+	u32 a_n = a - ref;
+	u32 b_n = b - ref;
+	if (a_n < b_n)
+		return -1;
+	else if (a_n > b_n)
+		return 1;
+	else
+		return 0;
+}
+
+static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
+{
+	struct gk20a_sync_pt_inst *pti =
+			container_of(pt, struct gk20a_sync_pt_inst, pt);
+	return pti->shared;
+}
+static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
+{
+	if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
+		return NULL;
+	return (struct gk20a_sync_timeline *)obj;
+}
+
+static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
+{
+	struct gk20a_sync_pt *pt =
+		container_of(ref, struct gk20a_sync_pt, refcount);
+	struct gk20a *g = pt->g;
+
+	if (pt->sema)
+		nvgpu_semaphore_put(pt->sema);
+	nvgpu_kfree(g, pt);
+}
+
+static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
+		struct gk20a *g,
+		struct gk20a_sync_timeline *obj,
+		struct nvgpu_semaphore *sema)
+{
+	struct gk20a_sync_pt *shared;
+
+	shared = nvgpu_kzalloc(g, sizeof(*shared));
+	if (!shared)
+		return NULL;
+
+	nvgpu_ref_init(&shared->refcount);
+	shared->g = g;
+	shared->obj = obj;
+	shared->sema = sema;
+	shared->thresh = ++obj->max; /* sync framework has a lock */
+
+	nvgpu_spinlock_init(&shared->lock);
+
+	nvgpu_semaphore_get(sema);
+
+	return shared;
+}
+
+static struct sync_pt *gk20a_sync_pt_create_inst(
+		struct gk20a *g,
+		struct gk20a_sync_timeline *obj,
+		struct nvgpu_semaphore *sema)
+{
+	struct gk20a_sync_pt_inst *pti;
+
+	pti = (struct gk20a_sync_pt_inst *)
+		sync_pt_create(&obj->obj, sizeof(*pti));
+	if (!pti)
+		return NULL;
+
+	pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
+	if (!pti->shared) {
+		sync_pt_free(&pti->pt);
+		return NULL;
+	}
+	return &pti->pt;
+}
+
+static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+	if (pt)
+		nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
+}
+
+static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt_inst *pti;
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+
+	pti = (struct gk20a_sync_pt_inst *)
+		sync_pt_create(&pt->obj->obj, sizeof(*pti));
+	if (!pti)
+		return NULL;
+	pti->shared = pt;
+	nvgpu_ref_get(&pt->refcount);
+	return &pti->pt;
+}
+
+/*
+ * This function must be able to run on the same sync_pt concurrently. This
+ * requires a lock to protect access to the sync_pt's internal data structures
+ * which are modified as a side effect of calling this function.
+ */
+static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+	struct gk20a_sync_timeline *obj = pt->obj;
+	bool signaled = true;
+
+	nvgpu_spinlock_acquire(&pt->lock);
+	if (!pt->sema)
+		goto done;
+
+	/* Acquired == not realeased yet == active == not signaled. */
+	signaled = !nvgpu_semaphore_is_acquired(pt->sema);
+
+	if (signaled) {
+		/* Update min if necessary. */
+		if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
+						obj->min) == 1)
+			obj->min = pt->thresh;
+
+		/* Release the semaphore to the pool. */
+		nvgpu_semaphore_put(pt->sema);
+		pt->sema = NULL;
+	}
+done:
+	nvgpu_spinlock_release(&pt->lock);
+
+	return signaled;
+}
+
+static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	bool a_expired;
+	bool b_expired;
+	struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
+	struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
+
+	if (WARN_ON(pt_a->obj != pt_b->obj))
+		return 0;
+
+	/* Early out */
+	if (a == b)
+		return 0;
+
+	a_expired = gk20a_sync_pt_has_signaled(a);
+	b_expired = gk20a_sync_pt_has_signaled(b);
+	if (a_expired && !b_expired) {
+		/* Easy, a was earlier */
+		return -1;
+	} else if (!a_expired && b_expired) {
+		/* Easy, b was earlier */
+		return 1;
+	}
+
+	/* Both a and b are expired (trigger before min) or not
+	 * expired (trigger after min), so we can use min
+	 * as a reference value for __gk20a_sync_pt_compare_ref.
+	 */
+	return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
+			pt_a->thresh, pt_b->thresh);
+}
+
+static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
+{
+	return obj->min;
+}
+
+static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
+		char *str, int size)
+{
+	struct gk20a_sync_timeline *obj =
+		(struct gk20a_sync_timeline *)timeline;
+	snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
+}
+
+static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
+					     char *str, int size)
+{
+	struct nvgpu_semaphore *s = pt->sema;
+
+	snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
+		 s->location.pool->page_idx,
+		 nvgpu_semaphore_get_value(s),
+		 nvgpu_semaphore_read(s));
+}
+
+static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
+		int size)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
+
+	if (pt->sema) {
+		gk20a_sync_pt_value_str_for_sema(pt, str, size);
+		return;
+	}
+
+	snprintf(str, size, "%d", pt->thresh);
+}
+
+static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
+	.driver_name = "nvgpu_semaphore",
+	.dup = gk20a_sync_pt_dup_inst,
+	.has_signaled = gk20a_sync_pt_has_signaled,
+	.compare = gk20a_sync_pt_compare,
+	.free_pt = gk20a_sync_pt_free_inst,
+	.timeline_value_str = gk20a_sync_timeline_value_str,
+	.pt_value_str = gk20a_sync_pt_value_str,
+};
+
+/* Public API */
+
+struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+	struct sync_fence *fence = sync_fence_fdget(fd);
+	int i;
+
+	if (!fence)
+		return NULL;
+
+	for (i = 0; i < fence->num_fences; i++) {
+		struct fence *pt = fence->cbs[i].sync_pt;
+		struct sync_pt *spt = sync_pt_from_fence(pt);
+		struct sync_timeline *t;
+
+		if (spt == NULL) {
+			sync_fence_put(fence);
+			return NULL;
+		}
+
+		t = sync_pt_parent(spt);
+		if (t->ops != &gk20a_sync_timeline_ops) {
+			sync_fence_put(fence);
+			return NULL;
+		}
+	}
+
+	return fence;
+}
+
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
+{
+	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
+	struct nvgpu_semaphore *sema;
+
+	nvgpu_spinlock_acquire(&pt->lock);
+	sema = pt->sema;
+	if (sema)
+		nvgpu_semaphore_get(sema);
+	nvgpu_spinlock_release(&pt->lock);
+
+	return sema;
+}
+
+void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
+{
+	sync_timeline_signal(timeline, 0);
+}
+
+void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
+{
+	sync_timeline_destroy(timeline);
+}
+
+struct sync_timeline *gk20a_sync_timeline_create(
+		const char *name)
+{
+	struct gk20a_sync_timeline *obj;
+
+	obj = (struct gk20a_sync_timeline *)
+		sync_timeline_create(&gk20a_sync_timeline_ops,
+				     sizeof(struct gk20a_sync_timeline),
+				     name);
+	if (!obj)
+		return NULL;
+	obj->max = 0;
+	obj->min = 0;
+	return &obj->obj;
+}
+
+struct sync_fence *gk20a_sync_fence_create(
+		struct channel_gk20a *c,
+		struct nvgpu_semaphore *sema,
+		const char *fmt, ...)
+{
+	char name[30];
+	va_list args;
+	struct sync_pt *pt;
+	struct sync_fence *fence;
+	struct gk20a *g = c->g;
+
+	struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
+	struct nvgpu_os_fence_framework *fence_framework = NULL;
+	struct gk20a_sync_timeline *timeline = NULL;
+
+	fence_framework = &os_channel_priv->fence_framework;
+
+	timeline = to_gk20a_timeline(fence_framework->timeline);
+
+	pt = gk20a_sync_pt_create_inst(g, timeline, sema);
+	if (pt == NULL)
+		return NULL;
+
+	va_start(args, fmt);
+	vsnprintf(name, sizeof(name), fmt, args);
+	va_end(args);
+
+	fence = sync_fence_create(name, pt);
+	if (fence == NULL) {
+		sync_pt_free(pt);
+		return NULL;
+	}
+	return fence;
+}
diff --git a/drivers/gpu/nvgpu/common/linux/sync_sema_android.h b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
new file mode 100644
index 00000000..4fca7bed
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/sync_sema_android.h
@@ -0,0 +1,51 @@
+/*
+ * Semaphore Sync Framework Integration
+ *
+ * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _GK20A_SYNC_H_
+#define _GK20A_SYNC_H_
+
+struct sync_timeline;
+struct sync_fence;
+struct sync_pt;
+struct nvgpu_semaphore;
+struct fence;
+
+#ifdef CONFIG_SYNC
+struct sync_timeline *gk20a_sync_timeline_create(const char *name);
+void gk20a_sync_timeline_destroy(struct sync_timeline *);
+void gk20a_sync_timeline_signal(struct sync_timeline *);
+struct sync_fence *gk20a_sync_fence_create(
+		struct channel_gk20a *c,
+		struct nvgpu_semaphore *,
+		const char *fmt, ...);
+struct sync_fence *gk20a_sync_fence_fdget(int fd);
+struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt);
+#else
+static inline void gk20a_sync_timeline_destroy(struct sync_timeline *obj) {}
+static inline void gk20a_sync_timeline_signal(struct sync_timeline *obj) {}
+static inline struct sync_fence *gk20a_sync_fence_fdget(int fd)
+{
+	return NULL;
+}
+static inline struct sync_timeline *gk20a_sync_timeline_create(
+	const char *name) {
+		return NULL;
+}
+#endif
+
+#endif
-- 
cgit v1.2.2