From 8d5536271f989e01018a543016340a3d76a2fae2 Mon Sep 17 00:00:00 2001 From: Deepak Nibade Date: Tue, 13 Feb 2018 04:02:38 -0800 Subject: gpu: nvgpu: add user API to get a syncpoint Add new user API NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT which will expose per-channel allocated syncpoint to user space API will also return current value of the syncpoint On supported platforms, this API will also return a RW semaphore address (corresponding to syncpoint shim) to user space Add new characteristics flag NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT to indicate support for this new API Add new flag NVGPU_SUPPORT_USER_SYNCPOINT for use of core driver Set this flag for GV11B and GP10B for now Add a new API (*syncpt_address) in struct gk20a_channel_sync to get GPU_VA address of a syncpoint Add new API nvgpu_nvhost_syncpt_read_maxval() which will read and return MAX value of syncpoint Bug 200326065 Jira NVGPU-179 Change-Id: I9da6f17b85996f4fc6731c0bf94fca6f3181c3e0 Signed-off-by: Deepak Nibade Reviewed-on: https://git-master.nvidia.com/r/1658009 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/ioctl_channel.c | 67 ++++++++++++++++++++++++++ drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 2 + drivers/gpu/nvgpu/common/linux/nvhost.c | 6 +++ drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 14 ++++++ drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 3 ++ drivers/gpu/nvgpu/gp10b/gp10b.c | 1 + drivers/gpu/nvgpu/gv11b/gv11b.c | 2 + drivers/gpu/nvgpu/include/nvgpu/enabled.h | 4 +- drivers/gpu/nvgpu/include/nvgpu/nvhost.h | 2 + include/uapi/linux/nvgpu.h | 12 ++++- 10 files changed, 111 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c index b7856ca0..ab6ac9b9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/dbg_gpu_gk20a.h" @@ -948,6 +949,60 @@ static int nvgpu_ioctl_channel_set_preemption_mode(struct channel_gk20a *ch, return err; } +static int nvgpu_ioctl_channel_get_user_syncpoint(struct channel_gk20a *ch, + struct nvgpu_get_user_syncpoint_args *args) +{ +#ifdef CONFIG_TEGRA_GK20A_NVHOST + struct gk20a *g = ch->g; + int err; + + if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT)) { + nvgpu_err(g, "user syncpoints not supported"); + return -EINVAL; + } + + if (!gk20a_platform_has_syncpoints(g)) { + nvgpu_err(g, "syncpoints not supported"); + return -EINVAL; + } + + if (g->aggressive_sync_destroy_thresh) { + nvgpu_err(g, "sufficient syncpoints not available"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&ch->sync_lock); + if (ch->sync) { + nvgpu_mutex_release(&ch->sync_lock); + } else { + ch->sync = gk20a_channel_sync_create(ch); + if (!ch->sync) { + nvgpu_mutex_release(&ch->sync_lock); + return -ENOMEM; + } + nvgpu_mutex_release(&ch->sync_lock); + + if (g->ops.fifo.resetup_ramfc) { + err = g->ops.fifo.resetup_ramfc(ch); + if (err) + return err; + } + } + + args->syncpoint_id = ch->sync->syncpt_id(ch->sync); + args->syncpoint_max = nvgpu_nvhost_syncpt_read_maxval(g->nvhost_dev, + args->syncpoint_id); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS)) + args->gpu_va = ch->sync->syncpt_address(ch->sync); + else + args->gpu_va = 0; + + return 0; +#else + return -EINVAL; +#endif +} + long gk20a_channel_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -1239,6 +1294,18 @@ long gk20a_channel_ioctl(struct file *filp, err = -EINVAL; } break; + case NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT: + err = gk20a_busy(ch->g); + if (err) { + dev_err(dev, + "%s: failed to host gk20a for ioctl cmd: 0x%x", + __func__, cmd); + break; + } + err = nvgpu_ioctl_channel_get_user_syncpoint(ch, + (struct nvgpu_get_user_syncpoint_args *)buf); + gk20a_idle(ch->g); + break; default: dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index 71a9bee6..257c04b2 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c @@ -158,6 +158,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_SUPPORT_DETERMINISTIC_OPTS}, {NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS, NVGPU_SUPPORT_SYNCPOINT_ADDRESS}, + {NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT, + NVGPU_SUPPORT_USER_SYNCPOINT}, {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, NVGPU_SUPPORT_IO_COHERENCE}, {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, diff --git a/drivers/gpu/nvgpu/common/linux/nvhost.c b/drivers/gpu/nvgpu/common/linux/nvhost.c index e0f83612..94bbfd70 100644 --- a/drivers/gpu/nvgpu/common/linux/nvhost.c +++ b/drivers/gpu/nvgpu/common/linux/nvhost.c @@ -152,6 +152,12 @@ int nvgpu_nvhost_syncpt_read_ext_check( return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val); } +u32 nvgpu_nvhost_syncpt_read_maxval( + struct nvgpu_nvhost_dev *nvhost_dev, u32 id) +{ + return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id); +} + int nvgpu_nvhost_create_symlink(struct gk20a *g) { struct device *dev = dev_from_gk20a(g); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 4a6b8162..e965a329 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -294,6 +294,13 @@ static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s) return sp->id; } +static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s) +{ + struct gk20a_channel_syncpt *sp = + container_of(s, struct gk20a_channel_syncpt, ops); + return sp->syncpt_buf.gpu_va; +} + static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_syncpt *sp = @@ -345,6 +352,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline; sp->ops.syncpt_id = gk20a_channel_syncpt_id; + sp->ops.syncpt_address = gk20a_channel_syncpt_address; sp->ops.destroy = gk20a_channel_syncpt_destroy; return &sp->ops; @@ -865,6 +873,11 @@ static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s) return -EINVAL; } +static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s) +{ + return 0; +} + static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) { struct gk20a_channel_semaphore *sema = @@ -916,6 +929,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c) sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline; sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id; + sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address; sema->ops.destroy = gk20a_channel_semaphore_destroy; return &sema->ops; diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 2fd009df..fe1d8526 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -102,6 +102,9 @@ struct gk20a_channel_sync { /* Returns the sync point id or negative number if no syncpt*/ int (*syncpt_id)(struct gk20a_channel_sync *s); + /* Returns the sync point address of sync point or 0 if not supported */ + u64 (*syncpt_address)(struct gk20a_channel_sync *s); + /* Free the resources allocated by gk20a_channel_sync_create. */ void (*destroy)(struct gk20a_channel_sync *s); }; diff --git a/drivers/gpu/nvgpu/gp10b/gp10b.c b/drivers/gpu/nvgpu/gp10b/gp10b.c index 51dc4301..d0a21fe5 100644 --- a/drivers/gpu/nvgpu/gp10b/gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gp10b.c @@ -116,5 +116,6 @@ int gp10b_init_gpu_characteristics(struct gk20a *g) gk20a_init_gpu_characteristics(g); gp10b_detect_ecc_enabled_units(g); __nvgpu_set_enabled(g, NVGPU_SUPPORT_RESCHEDULE_RUNLIST, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); return 0; } diff --git a/drivers/gpu/nvgpu/gv11b/gv11b.c b/drivers/gpu/nvgpu/gv11b/gv11b.c index 2670c986..c1ad7944 100644 --- a/drivers/gpu/nvgpu/gv11b/gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gv11b.c @@ -155,5 +155,7 @@ int gv11b_init_gpu_characteristics(struct gk20a *g) __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_SCG, true); __nvgpu_set_enabled(g, NVGPU_SUPPORT_SYNCPOINT_ADDRESS, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_USER_SYNCPOINT, true); + return 0; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index e6f9525d..a3d9df24 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -157,11 +157,13 @@ struct gk20a; /* GPU_VA address of a syncpoint is supported */ #define NVGPU_SUPPORT_SYNCPOINT_ADDRESS 65 +/* Allocating per-channel syncpoint in user space is supported */ +#define NVGPU_SUPPORT_USER_SYNCPOINT 66 /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 66 +#define NVGPU_MAX_ENABLED_BITS 67 /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h index 6e92637a..cb70f436 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvhost.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvhost.h @@ -50,6 +50,8 @@ void nvgpu_nvhost_syncpt_set_min_eq_max_ext(struct nvgpu_nvhost_dev *nvhost_dev, u32 id); int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val); +u32 nvgpu_nvhost_syncpt_read_maxval(struct nvgpu_nvhost_dev *nvhost_dev, + u32 id); int nvgpu_nvhost_intr_register_notifier(struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh, void (*callback)(void *, int), void *private_data); diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index f8211374..cf75595a 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -154,6 +154,8 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_SYNCPOINT_ADDRESS (1ULL << 26) /* VPR is supported */ #define NVGPU_GPU_FLAGS_SUPPORT_VPR (1ULL << 27) +/* Allocating per-channel syncpoint in user space is supported */ +#define NVGPU_GPU_FLAGS_SUPPORT_USER_SYNCPOINT (1ULL << 28) /* SM LRF ECC is enabled */ #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) /* SM SHM ECC is enabled */ @@ -1648,6 +1650,12 @@ struct nvgpu_boosted_ctx_args { __u32 padding; }; +struct nvgpu_get_user_syncpoint_args { + __u64 gpu_va; /* out */ + __u32 syncpoint_id; /* out */ + __u32 syncpoint_max; /* out */ +}; + #define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) #define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ @@ -1698,9 +1706,11 @@ struct nvgpu_boosted_ctx_args { _IOW(NVGPU_IOCTL_MAGIC, 124, struct nvgpu_boosted_ctx_args) #define NVGPU_IOCTL_CHANNEL_GET_TIMESLICE \ _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) +#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ + _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) #define NVGPU_IOCTL_CHANNEL_LAST \ - _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_TIMESLICE) + _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT) #define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) /* -- cgit v1.2.2