From c0822cb22e13204e06b145ae950a33d45e95918e Mon Sep 17 00:00:00 2001
From: seshendra Gadagottu <sgadagottu@nvidia.com>
Date: Fri, 17 Mar 2017 11:30:57 -0700
Subject: gpu: nvgpu: add chip specific sync point support

Added support for chip specific sync point implementation.
Relevant fifo hal functions are added and updated for
legacy chips.

JIRA GPUT19X-2

Change-Id: I9a9c36d71e15c384b5e5af460cd52012f94e0b04
Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: http://git-master/r/1258232
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile.nvgpu             |  1 +
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 90 +++++++++++-----------------
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c         | 83 +++++++++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h         | 16 +++++
 drivers/gpu/nvgpu/gk20a/gk20a.h              | 21 +++++++
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c         | 10 ++++
 6 files changed, 166 insertions(+), 55 deletions(-)

(limited to 'drivers/gpu')

diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 109d1e4d..e21a9426 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -18,6 +18,7 @@ ccflags-y += -I$(srctree)/../nvgpu-t19x/drivers/gpu/nvgpu
 ccflags-y += -I$(srctree)/../nvgpu-t19x/drivers/gpu/nvgpu/include
 ccflags-y += -I$(srctree)/../nvgpu-t19x/include
 ccflags-y += -I$(srctree)/../nvgpu-t19x/include/uapi
+ccflags-y += -I$(srctree)/../nvhost-t19x/include
 endif
 
 obj-$(CONFIG_GK20A) := nvgpu.o
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index eac7dbc3..07157dc9 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -42,22 +42,9 @@ struct gk20a_channel_syncpt {
 	struct channel_gk20a *c;
 	struct platform_device *host1x_pdev;
 	u32 id;
+	struct nvgpu_mem syncpt_buf;
 };
 
-static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh)
-{
-	off = cmd->off + off;
-	/* syncpoint_a */
-	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
-	/* payload */
-	nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
-	/* syncpoint_b */
-	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
-	/* syncpt_id, switch_en, wait */
-	nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
-}
-
 static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
 		u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
 		struct gk20a_fence *fence)
@@ -75,14 +62,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
 	if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
 		return 0;
 
-	err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd);
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+			c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
 	if (err) {
 		nvgpu_err(c->g,
 				"not enough priv cmd buffer space");
 		return err;
 	}
 
-	add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
+	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
+					id, sp->syncpt_buf.gpu_va);
+	c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
+						thresh, sp->syncpt_buf.gpu_va);
 
 	return 0;
 }
@@ -101,6 +92,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 	struct channel_gk20a *c = sp->c;
 	u32 wait_id;
 	int err = 0;
+	u32 wait_cmd_size = 0;
 
 	sync_fence = nvhost_sync_fdget(fd);
 	if (!sync_fence)
@@ -130,8 +122,10 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 		sync_fence_put(sync_fence);
 		return 0;
 	}
-
-	err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd);
+	wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+		wait_cmd_size * num_wait_cmds,
+		wait_cmd);
 	if (err) {
 		nvgpu_err(c->g,
 				"not enough priv cmd buffer space");
@@ -152,13 +146,16 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
 
 		if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
 				wait_id, wait_value)) {
-			/* each wait_cmd is 4 u32s */
 			nvgpu_memset(c->g, wait_cmd->mem,
-					(wait_cmd->off + i * 4) * sizeof(u32),
-					0, 4 * sizeof(u32));
-		} else
-			add_wait_cmd(c->g, wait_cmd, i * 4, wait_id,
-					wait_value);
+			(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
+				0, wait_cmd_size * sizeof(u32));
+		} else {
+			nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
+					wait_id, sp->syncpt_buf.gpu_va);
+			c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
+				i * wait_cmd_size, wait_id, wait_value,
+				sp->syncpt_buf.gpu_va);
+		}
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
 		i++;
 	}
@@ -193,45 +190,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
 				       bool need_sync_fence)
 {
 	u32 thresh;
-	size_t incr_cmd_size;
-	int off;
 	int err;
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
 	struct channel_gk20a *c = sp->c;
 
-	incr_cmd_size = 6;
-	if (wfi_cmd)
-		incr_cmd_size += 2;
-
-	err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
+	err = gk20a_channel_alloc_priv_cmdbuf(c,
+			c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
+			incr_cmd);
 	if (err)
 		return err;
 
-	off = incr_cmd->off;
-
-	/* WAR for hw bug 1491360: syncpt needs to be incremented twice */
-
-	if (wfi_cmd) {
-		/* wfi */
-		nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E);
-		/* handle, ignored */
-		nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000);
-	}
-	/* syncpoint_a */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C);
-	/* payload, ignored */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0);
-	/* syncpoint_b */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
-	/* syncpt_id, incr */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
-	/* syncpoint_b */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
-	/* syncpt_id, incr */
-	nvgpu_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
-
-	WARN_ON(off - incr_cmd->off != incr_cmd_size);
+	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
+				sp->id, sp->syncpt_buf.gpu_va);
+	c->g->ops.fifo.add_syncpt_incr_cmd(c->g, wfi_cmd,
+			incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
 
 	thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
 
@@ -337,6 +310,10 @@ static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
 {
 	struct gk20a_channel_syncpt *sp =
 		container_of(s, struct gk20a_channel_syncpt, ops);
+
+
+	sp->c->g->ops.fifo.free_syncpt_buf(sp->c, &sp->syncpt_buf);
+
 	nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
 	nvhost_syncpt_put_ref_ext(sp->host1x_pdev, sp->id);
 	nvgpu_kfree(sp->c->g, sp);
@@ -366,6 +343,9 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
 		return NULL;
 	}
 
+	sp->c->g->ops.fifo.alloc_syncpt_buf(sp->c, sp->id,
+				&sp->syncpt_buf);
+
 	nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
 
 	atomic_set(&sp->ops.refcount, 0);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 05c13374..b8ff84df 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -4265,6 +4265,79 @@ u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
 	return val;
 }
 
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		u32 id, u32 thresh, u64 gpu_va)
+{
+	gk20a_dbg_fn("");
+
+	off = cmd->off + off;
+	/* syncpoint_a */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
+	/* payload */
+	nvgpu_mem_wr32(g, cmd->mem, off++, thresh);
+	/* syncpoint_b */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
+	/* syncpt_id, switch_en, wait */
+	nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
+}
+
+u32 gk20a_fifo_get_syncpt_wait_cmd_size(void)
+{
+	return 4;
+}
+
+void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
+		bool wfi_cmd, struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va)
+{
+	u32 off = cmd->off;
+
+	gk20a_dbg_fn("");
+	if (wfi_cmd) {
+		/* wfi */
+		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001E);
+		/* handle, ignored */
+		nvgpu_mem_wr32(g, cmd->mem, off++, 0x00000000);
+	}
+	/* syncpoint_a */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001C);
+	/* payload, ignored */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0);
+	/* syncpoint_b */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
+	/* syncpt_id, incr */
+	nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
+	/* syncpoint_b */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001D);
+	/* syncpt_id, incr */
+	nvgpu_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x1);
+
+}
+
+u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd)
+{
+	if (wfi_cmd)
+		return 8;
+	else
+		return 6;
+}
+
+void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
+				struct nvgpu_mem *syncpt_buf)
+{
+
+}
+
+int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
+			u32 syncpt_id, struct nvgpu_mem *syncpt_buf)
+{
+	return 0;
+}
+#endif
+
+
 void gk20a_init_fifo(struct gpu_ops *gops)
 {
 	gops->fifo.disable_channel = gk20a_fifo_disable_channel;
@@ -4312,4 +4385,14 @@ void gk20a_init_fifo(struct gpu_ops *gops)
 	gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg;
 	gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error;
 	gops->fifo.handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0;
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	gops->fifo.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf;
+	gops->fifo.free_syncpt_buf = gk20a_fifo_free_syncpt_buf;
+	gops->fifo.add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd;
+	gops->fifo.get_syncpt_wait_cmd_size =
+				gk20a_fifo_get_syncpt_wait_cmd_size;
+	gops->fifo.add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd;
+	gops->fifo.get_syncpt_incr_cmd_size =
+				gk20a_fifo_get_syncpt_incr_cmd_size;
+#endif
 }
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 7351478a..80f1853c 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -405,4 +405,20 @@ unsigned int gk20a_fifo_handle_pbdma_intr_0(struct gk20a *g, u32 pbdma_id,
 			u32 pbdma_intr_0, u32 *handled, u32 *error_notifier);
 
 u32 gk20a_fifo_default_timeslice_us(struct gk20a *g);
+
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+void gk20a_fifo_add_syncpt_wait_cmd(struct gk20a *g,
+			struct priv_cmd_entry *cmd, u32 off,
+			u32 id, u32 thresh, u64 gpu_va);
+u32 gk20a_fifo_get_syncpt_wait_cmd_size(void);
+void gk20a_fifo_add_syncpt_incr_cmd(struct gk20a *g,
+			bool wfi_cmd, struct priv_cmd_entry *cmd,
+			u32 id, u64 gpu_va);
+u32 gk20a_fifo_get_syncpt_incr_cmd_size(bool wfi_cmd);
+void gk20a_fifo_free_syncpt_buf(struct channel_gk20a *c,
+			struct nvgpu_mem *syncpt_buf);
+int gk20a_fifo_alloc_syncpt_buf(struct channel_gk20a *c,
+			u32 syncpt_id, struct nvgpu_mem *syncpt_buf);
+#endif
+
 #endif /*__GR_GK20A_H__*/
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 29ac4763..f246b857 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -499,6 +499,20 @@ struct gpu_ops {
 		unsigned int (*handle_pbdma_intr_0)(struct gk20a *g,
 					u32 pbdma_id, u32 pbdma_intr_0,
 					u32 *handled, u32 *error_notifier);
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+		int (*alloc_syncpt_buf)(struct channel_gk20a *c,
+				u32 syncpt_id, struct nvgpu_mem *syncpt_buf);
+		void (*free_syncpt_buf)(struct channel_gk20a *c,
+				struct nvgpu_mem *syncpt_buf);
+		void (*add_syncpt_wait_cmd)(struct gk20a *g,
+					struct priv_cmd_entry *cmd, u32 off,
+					u32 id, u32 thresh, u64 gpu_va);
+		u32 (*get_syncpt_wait_cmd_size)(void);
+		void (*add_syncpt_incr_cmd)(struct gk20a *g,
+			bool wfi_cmd, struct priv_cmd_entry *cmd,
+			u32 id, u64 gpu_va);
+		u32 (*get_syncpt_incr_cmd_size)(bool wfi_cmd);
+#endif
 	} fifo;
 	struct pmu_v {
 		/*used for change of enum zbc update cmd id from ver 0 to ver1*/
@@ -1243,6 +1257,13 @@ struct gk20a {
 	/* Some boards might be missing power sensor, preventing
 	 * from monitoring power, current and voltage */
 	bool power_sensor_missing;
+
+#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_TEGRA_19x_GPU)
+	phys_addr_t	syncpt_unit_base;
+	size_t		syncpt_unit_size;
+	u32		syncpt_size;
+#endif
+
 };
 
 static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index 0931d226..6fb5802b 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -230,4 +230,14 @@ void gm20b_init_fifo(struct gpu_ops *gops)
 	gops->fifo.teardown_ch_tsg = gk20a_fifo_teardown_ch_tsg;
 	gops->fifo.handle_sched_error = gk20a_fifo_handle_sched_error;
 	gops->fifo.handle_pbdma_intr_0 = gk20a_fifo_handle_pbdma_intr_0;
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	gops->fifo.alloc_syncpt_buf = gk20a_fifo_alloc_syncpt_buf;
+	gops->fifo.free_syncpt_buf = gk20a_fifo_free_syncpt_buf;
+	gops->fifo.add_syncpt_wait_cmd = gk20a_fifo_add_syncpt_wait_cmd;
+	gops->fifo.get_syncpt_wait_cmd_size =
+			gk20a_fifo_get_syncpt_wait_cmd_size;
+	gops->fifo.add_syncpt_incr_cmd = gk20a_fifo_add_syncpt_incr_cmd;
+	gops->fifo.get_syncpt_incr_cmd_size =
+			gk20a_fifo_get_syncpt_incr_cmd_size;
+#endif
 }
-- 
cgit v1.2.2