From 70e69e2686527990865b221a60e0ec1e9a53d316 Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Mon, 23 Apr 2018 17:26:51 +0530
Subject: gpu: nvgpu: adapt gk20a_channel_syncpt to use os_fence

This patch adapts gk20a_channel_syncpt to use os_fence for post fence
as well as pre-fence(wait) use cases.

Jira NVGPU-66

Change-Id: I49627d1f88d52a53511a02f5de60fed6df8350de
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1676631
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                         |   4 +
 drivers/gpu/nvgpu/common/linux/os_fence_android.c  |  14 +-
 .../nvgpu/common/linux/os_fence_android_syncpt.c   | 121 +++++++++++++++++
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c       | 148 ++++++++-------------
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h       |   4 +
 .../nvgpu/include/nvgpu/linux/os_fence_android.h   |   6 +-
 drivers/gpu/nvgpu/include/nvgpu/os_fence.h         |  19 +++
 7 files changed, 213 insertions(+), 103 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 9a46f12c..5af7de35 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -164,6 +164,10 @@ nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o \
 		common/linux/os_fence_android.o \
 		common/linux/os_fence_android_sema.o
 
+ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y)
+nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o
+endif
+
 nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \
 	common/linux/pci_usermode.o \
 
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android.c b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
index d689a2a8..d8f70639 100644
--- a/drivers/gpu/nvgpu/common/linux/os_fence_android.c
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android.c
@@ -55,15 +55,15 @@ void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
 int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
 	struct channel_gk20a *c, int fd)
 {
-	int err;
+	int err = -ENOSYS;
 
-	err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
+#endif
+
+	if (err)
+		err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
 
-	/* TO-DO
-	 * check if fence is empty and if CONFIG_TEGRA_GK20A_NVHOST
-	 * is enabled, try to get a sync_fence using
-	 * corresponding nvhost method.
-	 */
 	if (err)
 		nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
 
diff --git a/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
new file mode 100644
index 00000000..76def831
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/os_fence_android_syncpt.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <nvgpu/errno.h>
+
+#include <nvgpu/types.h>
+#include <nvgpu/os_fence.h>
+#include <nvgpu/linux/os_fence_android.h>
+#include <nvgpu/nvhost.h>
+#include <nvgpu/atomic.h>
+
+#include "gk20a/gk20a.h"
+#include "gk20a/channel_gk20a.h"
+#include "gk20a/sync_gk20a.h"
+#include "gk20a/channel_sync_gk20a.h"
+#include "gk20a/mm_gk20a.h"
+
+#include "../drivers/staging/android/sync.h"
+
+int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
+	struct priv_cmd_entry *wait_cmd,
+	struct channel_gk20a *c,
+	int max_wait_cmds)
+{
+	int err;
+	int wait_cmd_size;
+	int num_wait_cmds;
+	int i;
+	u32 wait_id;
+	struct sync_pt *pt;
+
+	struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
+
+	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
+		return -EINVAL;
+
+	/* validate syncpt ids */
+	for (i = 0; i < sync_fence->num_fences; i++) {
+		pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
+		wait_id = nvgpu_nvhost_sync_pt_id(pt);
+		if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
+					c->g->nvhost_dev, wait_id)) {
+			return -EINVAL;
+		}
+	}
+
+	num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
+	if (num_wait_cmds == 0)
+		return 0;
+
+	wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+		wait_cmd_size * num_wait_cmds, wait_cmd);
+	if (err) {
+		nvgpu_err(c->g,
+			"not enough priv cmd buffer space");
+		return err;
+	}
+
+	for (i = 0; i < sync_fence->num_fences; i++) {
+		struct fence *f = sync_fence->cbs[i].sync_pt;
+		struct sync_pt *pt = sync_pt_from_fence(f);
+		u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
+		u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
+
+		err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
+			wait_cmd, wait_cmd_size, i, true);
+	}
+
+	WARN_ON(i != num_wait_cmds);
+
+	return 0;
+}
+
+static const struct nvgpu_os_fence_ops syncpt_ops = {
+	.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
+	.drop_ref = nvgpu_os_fence_android_drop_ref,
+};
+
+int nvgpu_os_fence_syncpt_create(
+	struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
+{
+	struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
+		nvhost_dev, id, thresh, "fence");
+
+	if (!fence) {
+		nvgpu_err(c->g, "error constructing fence %s", "fence");
+		return -ENOMEM;
+	}
+
+	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
+
+	return 0;
+}
+
+int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd)
+{
+	struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
+
+	if (!fence)
+		return -ENOMEM;
+
+	nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
+
+	return 0;
+}
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index a0b0ac1e..16898593 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -51,6 +51,39 @@ struct gk20a_channel_syncpt {
 	struct nvgpu_mem syncpt_buf;
 };
 
+int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
+	u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
+	u32 wait_cmd_size, int pos, bool preallocated)
+{
+	int err = 0;
+	bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext(
+		c->g->nvhost_dev, id, thresh);
+
+	if (is_expired) {
+		if (preallocated) {
+			nvgpu_memset(c->g, wait_cmd->mem,
+			(wait_cmd->off + pos * wait_cmd_size) * sizeof(u32),
+				0, wait_cmd_size * sizeof(u32));
+		}
+	} else {
+		if (!preallocated) {
+			err = gk20a_channel_alloc_priv_cmdbuf(c,
+				c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
+			if (err) {
+				nvgpu_err(c->g, "not enough priv cmd buffer space");
+				return err;
+			}
+		}
+		nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
+				id, c->vm->syncpt_ro_map_gpu_va);
+		c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
+			pos * wait_cmd_size, id, thresh,
+			c->vm->syncpt_ro_map_gpu_va);
+	}
+
+	return 0;
+}
+
 static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
 		u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd)
 {
@@ -58,108 +91,36 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
 		container_of(s, struct gk20a_channel_syncpt, ops);
 	struct channel_gk20a *c = sp->c;
 	int err = 0;
+	u32 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
 
 	if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id))
 		return -EINVAL;
 
-	if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh))
-		return 0;
+	err = gk20a_channel_gen_syncpt_wait_cmd(c, id, thresh,
+			wait_cmd, wait_cmd_size, 0, false);
 
-	err = gk20a_channel_alloc_priv_cmdbuf(c,
-			c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
-	if (err) {
-		nvgpu_err(c->g,
-				"not enough priv cmd buffer space");
-		return err;
-	}
-
-	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
-					id, sp->c->vm->syncpt_ro_map_gpu_va);
-	c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
-					thresh, c->vm->syncpt_ro_map_gpu_va);
-
-	return 0;
+	return err;
 }
 
 static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
-		       struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
+	struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
 {
-#ifdef CONFIG_SYNC
-	int i;
-	int num_wait_cmds;
-	struct sync_fence *sync_fence;
-	struct sync_pt *pt;
+	struct nvgpu_os_fence os_fence = {0};
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
 	struct channel_gk20a *c = sp->c;
-	u32 wait_id;
 	int err = 0;
-	u32 wait_cmd_size = 0;
-
-	sync_fence = nvgpu_nvhost_sync_fdget(fd);
-	if (!sync_fence)
-		return -EINVAL;
 
-	if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
-		sync_fence_put(sync_fence);
+	err = nvgpu_os_fence_fdget(&os_fence, c, fd);
+	if (err)
 		return -EINVAL;
-	}
-
-	/* validate syncpt ids */
-	for (i = 0; i < sync_fence->num_fences; i++) {
-		pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
-		wait_id = nvgpu_nvhost_sync_pt_id(pt);
-		if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
-					sp->nvhost_dev, wait_id)) {
-			sync_fence_put(sync_fence);
-			return -EINVAL;
-		}
-	}
 
-	num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
-	if (num_wait_cmds == 0) {
-		sync_fence_put(sync_fence);
-		return 0;
-	}
-	wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
-	err = gk20a_channel_alloc_priv_cmdbuf(c,
-		wait_cmd_size * num_wait_cmds,
-		wait_cmd);
-	if (err) {
-		nvgpu_err(c->g,
-				"not enough priv cmd buffer space");
-		sync_fence_put(sync_fence);
-		return err;
-	}
-
-	i = 0;
-	for (i = 0; i < sync_fence->num_fences; i++) {
-		struct fence *f = sync_fence->cbs[i].sync_pt;
-		struct sync_pt *pt = sync_pt_from_fence(f);
-		u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
-		u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
-
-		if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev,
-				wait_id, wait_value)) {
-			nvgpu_memset(c->g, wait_cmd->mem,
-			(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
-				0, wait_cmd_size * sizeof(u32));
-		} else {
-			nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
-					wait_id, sp->syncpt_buf.gpu_va);
-			c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
-				i * wait_cmd_size, wait_id, wait_value,
-				c->vm->syncpt_ro_map_gpu_va);
-		}
-	}
+	err = os_fence.ops->program_waits(&os_fence,
+		wait_cmd, c, max_wait_cmds);
 
-	WARN_ON(i != num_wait_cmds);
-	sync_fence_put(sync_fence);
+	os_fence.ops->drop_ref(&os_fence);
 
-	return 0;
-#else
-	return -ENODEV;
-#endif
+	return err;
 }
 
 static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
@@ -185,6 +146,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 		container_of(s, struct gk20a_channel_syncpt, ops);
 	struct channel_gk20a *c = sp->c;
 	struct sync_fence *sync_fence = NULL;
+	struct nvgpu_os_fence os_fence = {0};
 
 	err = gk20a_channel_alloc_priv_cmdbuf(c,
 			c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
@@ -226,26 +188,22 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 		}
 	}
 
-#ifdef CONFIG_SYNC
 	if (need_sync_fence) {
-		sync_fence = nvgpu_nvhost_sync_create_fence(sp->nvhost_dev,
-			sp->id, thresh, "fence");
+		err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost_dev,
+			sp->id, thresh);
 
-		if (IS_ERR(sync_fence)) {
-			err = PTR_ERR(sync_fence);
+		if (err)
 			goto clean_up_priv_cmd;
-		}
+
+		sync_fence = (struct sync_fence *)os_fence.priv;
 	}
-#endif
 
 	err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev,
 	 sp->id, thresh, sync_fence);
 
 	if (err) {
-#ifdef CONFIG_SYNC
-		if (sync_fence)
-			sync_fence_put(sync_fence);
-#endif
+		if (nvgpu_os_fence_is_initialized(&os_fence))
+			os_fence.ops->drop_ref(&os_fence);
 		goto clean_up_priv_cmd;
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 565b4f86..5c6e91c9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -108,6 +108,10 @@ void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c,
 	struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
 	u32 wait_cmd_size, int pos);
 
+int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
+		u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
+		u32 wait_cmd_size, int pos, bool preallocated);
+
 void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
 	bool set_safe_state);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
index 79cc51ea..39d08339 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/os_fence_android.h
@@ -39,4 +39,8 @@ void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
 	struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
 	struct sync_fence *fence);
 
-#endif
\ No newline at end of file
+int nvgpu_os_fence_syncpt_fdget(
+	struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, int fd);
+
+#endif /* __NVGPU_OS_FENCE_ANDROID_H__ */
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
index c8d24fc2..a22140da 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/os_fence.h
@@ -28,6 +28,7 @@
 struct nvgpu_semaphore;
 struct channel_gk20a;
 struct priv_cmd_entry;
+struct nvgpu_nvhost_dev;
 
 /*
  * struct nvgpu_os_fence adds an abstraction to the earlier Android Sync
@@ -108,4 +109,22 @@ static inline int nvgpu_os_fence_fdget(
 
 #endif /* CONFIG_SYNC */
 
+#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_SYNC)
+
+int nvgpu_os_fence_syncpt_create(struct nvgpu_os_fence *fence_out,
+	struct channel_gk20a *c, struct nvgpu_nvhost_dev *nvhost_dev,
+	u32 id, u32 thresh);
+
+#else
+
+static inline int nvgpu_os_fence_syncpt_create(
+	struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
+	struct nvgpu_nvhost_dev *nvhost_dev,
+	u32 id, u32 thresh)
+{
+	return -ENOSYS;
+}
+
+#endif /* CONFIG_TEGRA_GK20A_NVHOST && CONFIG_SYNC */
+
 #endif /* __NVGPU_OS_FENCE__ */
-- 
cgit v1.2.2