From 8d5536271f989e01018a543016340a3d76a2fae2 Mon Sep 17 00:00:00 2001
From: Deepak Nibade <dnibade@nvidia.com>
Date: Tue, 13 Feb 2018 04:02:38 -0800
Subject: gpu: nvgpu: add user API to get a syncpoint

Add new user API NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT which will expose
per-channel allocated syncpoint to user space
API will also return current value of the syncpoint
On supported platforms, this API will also return a RW semaphore address
(corresponding to syncpoint shim) to user space

Add new characteristics flag NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT to indicate
support for this new API
Add new flag NVGPU_SUPPORT_USER_SYNCPOINT for use of core driver

Set this flag for GV11B and GP10B for now

Add a new API (*syncpt_address) in struct gk20a_channel_sync to get GPU_VA
address of a syncpoint

Add new API nvgpu_nvhost_syncpt_read_maxval() which will read and return MAX
value of syncpoint

Bug 200326065
Jira NVGPU-179

Change-Id: I9da6f17b85996f4fc6731c0bf94fca6f3181c3e0
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1658009
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 67 ++++++++++++++++++++++++++
 drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c    |  2 +
 drivers/gpu/nvgpu/common/linux/nvhost.c        |  6 +++
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c   | 14 ++++++
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h   |  3 ++
 drivers/gpu/nvgpu/gp10b/gp10b.c                |  1 +
 drivers/gpu/nvgpu/gv11b/gv11b.c                |  2 +
 drivers/gpu/nvgpu/include/nvgpu/enabled.h      |  4 +-
 drivers/gpu/nvgpu/include/nvgpu/nvhost.h       |  2 +
 include/uapi/linux/nvgpu.h                     | 12 ++++-
 10 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index b7856ca0..ab6ac9b9 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -32,6 +32,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/error_notifier.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/nvhost.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/dbg_gpu_gk20a.h"
@@ -948,6 +949,60 @@ static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch,
 	return err;
 }
 
+static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch,
+	struct nvgpu_get_user_syncpoint_args *args)
+{
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+	struct gk20a *g = ch->g;
+	int err;
+
+	if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) {
+		nvgpu_err(g, "user syncpoints not supported");
+		return -EINVAL;
+	}
+
+	if (!gk20a_platform_has_syncpoints(g)) {
+		nvgpu_err(g, "syncpoints not supported");
+		return -EINVAL;
+	}
+
+	if (g->aggressive_sync_destroy_thresh) {
+		nvgpu_err(g, "sufficient syncpoints not available");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&ch->sync_lock);
+	if (ch->sync) {
+		nvgpu_mutex_release(&ch->sync_lock);
+	} else {
+		ch->sync = gk20a_channel_sync_create(ch);
+		if (!ch->sync) {
+			nvgpu_mutex_release(&ch->sync_lock);
+			return -ENOMEM;
+		}
+		nvgpu_mutex_release(&ch->sync_lock);
+
+		if (g->ops.fifo.resetup_ramfc) {
+			err = g->ops.fifo.resetup_ramfc(ch);
+			if (err)
+				return err;
+		}
+	}
+
+	args->syncpoint_id = ch->sync->syncpt_id(ch->sync);
+	args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev,
+						args->syncpoint_id);
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS))
+		args->gpu_va = ch->sync->syncpt_address(ch->sync);
+	else
+		args->gpu_va = 0;
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
 long gk20a_channel_ioctl(struct file *filp,
 	unsigned int cmd, unsigned long arg)
 {
@@ -1239,6 +1294,18 @@ long gk20a_channel_ioctl(struct file *filp,
 			err = -EINVAL;
 		}
 		break;
+	case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT:
+		err = gk20a_busy(ch->g);
+		if (err) {
+			dev_err(dev,
+				"%s: failed to host gk20a for ioctl cmd: 0x%x",
+				__func__, cmd);
+			break;
+		}
+		err = nvgpu_ioctl_channel_get_user_syncpoint(ch,
+		      (struct nvgpu_get_user_syncpoint_args *)buf);
+		gk20a_idle(ch->g);
+		break;
 	default:
 		dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 71a9bee6..257c04b2 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -158,6 +158,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
 		NVGPU_SUPPORT_DETERMINISTIC_OPTS},
 	{NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS,
 		NVGPU_SUPPORT_SYNCPOINT_ADDRESS},
+	{NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT,
+		NVGPU_SUPPORT_USER_SYNCPOINT},
 	{NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE,
 		NVGPU_SUPPORT_IO_COHERENCE},
 	{NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST,
diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c
index e0f83612..94bbfd70 100644
--- a/drivers/gpu/nvgpu/common/linux/nvhost.c
+++ b/drivers/gpu/nvgpu/common/linux/nvhost.c
@@ -152,6 +152,12 @@ int nvgpu_nvhost_syncpt_read_ext_check(
 	return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
 }
 
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
+}
+
 int nvgpu_nvhost_create_symlink(struct gk20a *g)
 {
 	struct device *dev = dev_from_gk20a(g);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 4a6b8162..e965a329 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -294,6 +294,13 @@ static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s)
 	return sp->id;
 }
 
+static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s)
+{
+	struct gk20a_channel_syncpt *sp =
+		container_of(s, struct gk20a_channel_syncpt, ops);
+	return sp->syncpt_buf.gpu_va;
+}
+
 static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
 {
 	struct gk20a_channel_syncpt *sp =
@@ -345,6 +352,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
 	sp->ops.set_min_eq_max		= gk20a_channel_syncpt_set_min_eq_max;
 	sp->ops.signal_timeline		= gk20a_channel_syncpt_signal_timeline;
 	sp->ops.syncpt_id		= gk20a_channel_syncpt_id;
+	sp->ops.syncpt_address		= gk20a_channel_syncpt_address;
 	sp->ops.destroy			= gk20a_channel_syncpt_destroy;
 
 	return &sp->ops;
@@ -865,6 +873,11 @@ static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s)
 	return -EINVAL;
 }
 
+static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s)
+{
+	return 0;
+}
+
 static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
 {
 	struct gk20a_channel_semaphore *sema =
@@ -916,6 +929,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
 	sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
 	sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
 	sema->ops.syncpt_id	= gk20a_channel_semaphore_syncpt_id;
+	sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address;
 	sema->ops.destroy	= gk20a_channel_semaphore_destroy;
 
 	return &sema->ops;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 2fd009df..fe1d8526 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -102,6 +102,9 @@ struct gk20a_channel_sync {
 	/* Returns the sync point id or negative number if no syncpt*/
 	int (*syncpt_id)(struct gk20a_channel_sync *s);
 
+	/* Returns the sync point address of sync point or 0 if not supported */
+	u64 (*syncpt_address)(struct gk20a_channel_sync *s);
+
 	/* Free the resources allocated by gk20a_channel_sync_create. */
 	void (*destroy)(struct gk20a_channel_sync *s);
 };
diff --git a/drivers/gpu/nvgpu/gp10b/gp10b.c b/drivers/gpu/nvgpu/gp10b/gp10b.c
index 51dc4301..d0a21fe5 100644
--- a/drivers/gpu/nvgpu/gp10b/gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gp10b.c
@@ -116,5 +116,6 @@ int gp10b_init_gpu_characteristics(struct gk20a *g)
 	gk20a_init_gpu_characteristics(g);
 	gp10b_detect_ecc_enabled_units(g);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, true);
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c
index 2670c986..c1ad7944 100644
--- a/drivers/gpu/nvgpu/gv11b/gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gv11b.c
@@ -155,5 +155,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g)
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true);
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true);
+	__nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true);
+
 	return 0;
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index e6f9525d..a3d9df24 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -157,11 +157,13 @@ struct gk20a;
 
 /* GPU_VA address of a syncpoint is supported */
 #define NVGPU_SUPPORT_SYNCPOINT_ADDRESS		65
+/* Allocating per-channel syncpoint in user space is supported */
+#define NVGPU_SUPPORT_USER_SYNCPOINT		66
 
 /*
  * Must be greater than the largest bit offset in the above list.
  */
-#define NVGPU_MAX_ENABLED_BITS			66
+#define NVGPU_MAX_ENABLED_BITS			67
 
 /**
  * nvgpu_is_enabled - Check if the passed flag is enabled.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
index 6e92637a..cb70f436 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h
@@ -50,6 +50,8 @@ void nvgpu_nvhost_syncpt_set_min_eq_max_ext(struct nvgpu_nvhost_dev *nvhost_dev,
 	u32 id);
 int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvhost_dev,
 	u32 id, u32 *val);
+u32 nvgpu_nvhost_syncpt_read_maxval(struct nvgpu_nvhost_dev *nvhost_dev,
+	u32 id);
 
 int nvgpu_nvhost_intr_register_notifier(struct nvgpu_nvhost_dev *nvhost_dev,
 	u32 id, u32 thresh, void (*callback)(void *, int), void *private_data);
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index f8211374..cf75595a 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -154,6 +154,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS	(1ULL << 26)
 /* VPR is supported */
 #define NVGPU_GPU_FLAGS_SUPPORT_VPR			(1ULL << 27)
+/* Allocating per-channel syncpoint in user space is supported */
+#define NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT		(1ULL << 28)
 /* SM LRF ECC is enabled */
 #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF	(1ULL << 60)
 /* SM SHM ECC is enabled */
@@ -1648,6 +1650,12 @@ struct nvgpu_boosted_ctx_args {
 	__u32 padding;
 };
 
+struct nvgpu_get_user_syncpoint_args {
+	__u64 gpu_va;		/* out */
+	__u32 syncpoint_id;	/* out */
+	__u32 syncpoint_max;	/* out */
+};
+
 #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD	\
 	_IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
 #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT	\
@@ -1698,9 +1706,11 @@ struct nvgpu_boosted_ctx_args {
 	_IOW(NVGPU_IOCTL_MAGIC, 124, struct nvgpu_boosted_ctx_args)
 #define NVGPU_IOCTL_CHANNEL_GET_TIMESLICE \
 	_IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
+#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
+	_IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
 
 #define NVGPU_IOCTL_CHANNEL_LAST	\
-	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_TIMESLICE)
+	_IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT)
 #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
 
 /*
-- 
cgit v1.2.2