From d6efa8fc625fc297b9baa0755407cee1180fbbe6 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Fri, 24 Jun 2016 08:52:18 -0700 Subject: gpu: nvgpu: add ref counting for GPU sched ctrl Jira VFND-1968 Change-Id: Id84c5732e312e44db3d412df5c21e429227dd7fa Signed-off-by: Thomas Fleury Reviewed-on: http://git-master/r/1171286 (cherry picked from commit 13a3a4355914635ed175708affef17dc8ef0b133) Reviewed-on: http://git-master/r/1177824 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Richard Zhao GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman Reviewed-by: Vijayakumar Subbu --- drivers/gpu/nvgpu/gk20a/sched_gk20a.c | 195 +++++++++++++++++++++++----------- drivers/gpu/nvgpu/gk20a/sched_gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/tsg_gk20a.c | 4 +- drivers/gpu/nvgpu/gk20a/tsg_gk20a.h | 1 + include/uapi/linux/nvgpu.h | 22 +++- 5 files changed, 157 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c index bcbbbe8b..3d7e8bd7 100644 --- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c @@ -105,8 +105,6 @@ static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched, mutex_unlock(&sched->status_lock); return -EFAULT; } - - memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size); mutex_unlock(&sched->status_lock); return 0; @@ -159,13 +157,15 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, if (!bitmap) return -ENOMEM; - mutex_lock(&f->tsg_inuse_mutex); + mutex_lock(&sched->status_lock); for (tsgid = 0; tsgid < f->num_channels; tsgid++) { - tsg = &f->tsg[tsgid]; - if ((tsg->in_use) && (tsg->tgid == tgid)) - NVGPU_SCHED_SET(tsgid, bitmap); + if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { + tsg = &f->tsg[tsgid]; + if (tsg->tgid == tgid) + NVGPU_SCHED_SET(tsgid, bitmap); + } } - mutex_unlock(&f->tsg_inuse_mutex); + mutex_unlock(&sched->status_lock); if (copy_to_user((void __user *)(uintptr_t)arg->buffer, bitmap, sched->bitmap_size)) @@ -183,23 +183,15 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, struct fifo_gk20a *f = &g->fifo; struct tsg_gk20a *tsg; u32 tsgid = arg->tsgid; - int err = -ENXIO; gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); if (tsgid >= f->num_channels) return -EINVAL; - mutex_lock(&f->tsg_inuse_mutex); tsg = &f->tsg[tsgid]; - if (!tsg->in_use) - goto unlock_in_use; - - mutex_lock(&sched->status_lock); - if (!NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { - gk20a_dbg(gpu_dbg_sched, "tsgid=%u not active", tsgid); - goto unlock_status; - } + if (!kref_get_unless_zero(&tsg->refcount)) + return -ENXIO; arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */ arg->runlist_interleave = tsg->interleave_level; @@ -215,15 +207,9 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, arg->compute_preempt_mode = 0; } - err = 0; - -unlock_status: - mutex_unlock(&sched->status_lock); - -unlock_in_use: - mutex_unlock(&f->tsg_inuse_mutex); + kref_put(&tsg->refcount, gk20a_tsg_release); - return err; + return 0; } static int gk20a_sched_dev_ioctl_tsg_set_timeslice( @@ -234,37 +220,27 @@ static int gk20a_sched_dev_ioctl_tsg_set_timeslice( struct fifo_gk20a *f = &g->fifo; struct tsg_gk20a *tsg; u32 tsgid = arg->tsgid; - int err = -ENXIO; + int err; gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); if (tsgid >= f->num_channels) return -EINVAL; - mutex_lock(&f->tsg_inuse_mutex); tsg = &f->tsg[tsgid]; - if (!tsg->in_use) - goto unlock_in_use; - - mutex_lock(&sched->status_lock); - if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) { - gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid); - goto unlock_status; - } + if (!kref_get_unless_zero(&tsg->refcount)) + return -ENXIO; err = gk20a_busy(g->dev); if (err) - goto unlock_status; + goto done; err = gk20a_tsg_set_timeslice(tsg, arg->timeslice); gk20a_idle(g->dev); -unlock_status: - mutex_unlock(&sched->status_lock); - -unlock_in_use: - mutex_unlock(&f->tsg_inuse_mutex); +done: + kref_put(&tsg->refcount, gk20a_tsg_release); return err; } @@ -277,37 +253,27 @@ static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( struct fifo_gk20a *f = &g->fifo; struct tsg_gk20a *tsg; u32 tsgid = arg->tsgid; - int err = -ENXIO; + int err; gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); if (tsgid >= f->num_channels) return -EINVAL; - mutex_lock(&f->tsg_inuse_mutex); tsg = &f->tsg[tsgid]; - if (!tsg->in_use) - goto unlock_in_use; - - mutex_lock(&sched->status_lock); - if (NVGPU_SCHED_ISSET(tsgid, sched->recent_tsg_bitmap)) { - gk20a_dbg(gpu_dbg_sched, "tsgid=%u was re-allocated", tsgid); - goto unlock_status; - } + if (!kref_get_unless_zero(&tsg->refcount)) + return -ENXIO; err = gk20a_busy(g->dev); if (err) - goto unlock_status; + goto done; err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave); gk20a_idle(g->dev); -unlock_status: - mutex_unlock(&sched->status_lock); - -unlock_in_use: - mutex_unlock(&f->tsg_inuse_mutex); +done: + kref_put(&tsg->refcount, gk20a_tsg_release); return err; } @@ -332,6 +298,80 @@ static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched) return 0; } +static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_api_version_args *args) +{ + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, ""); + + args->version = NVGPU_SCHED_API_VERSION; + return 0; +} + +static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_refcount_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + tsg = &f->tsg[tsgid]; + if (!kref_get_unless_zero(&tsg->refcount)) + return -ENXIO; + + mutex_lock(&sched->status_lock); + if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + gk20a_warn(dev_from_gk20a(g), + "tsgid=%d already referenced", tsgid); + /* unlock status_lock as gk20a_tsg_release locks it */ + mutex_unlock(&sched->status_lock); + kref_put(&tsg->refcount, gk20a_tsg_release); + return -ENXIO; + } + + /* keep reference on TSG, will be released on + * NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close + */ + NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap); + mutex_unlock(&sched->status_lock); + + return 0; +} + +static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched, + struct nvgpu_sched_tsg_refcount_args *arg) +{ + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + u32 tsgid = arg->tsgid; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid); + + if (tsgid >= f->num_channels) + return -EINVAL; + + mutex_lock(&sched->status_lock); + if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + mutex_unlock(&sched->status_lock); + gk20a_warn(dev_from_gk20a(g), + "tsgid=%d not previously referenced", tsgid); + return -ENXIO; + } + NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); + mutex_unlock(&sched->status_lock); + + tsg = &f->tsg[tsgid]; + kref_put(&tsg->refcount, gk20a_tsg_release); + + return 0; +} + int gk20a_sched_dev_open(struct inode *inode, struct file *filp) { struct gk20a *g = container_of(inode->i_cdev, @@ -354,6 +394,7 @@ int gk20a_sched_dev_open(struct inode *inode, struct file *filp) memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap, sched->bitmap_size); + memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size); filp->private_data = sched; gk20a_dbg(gpu_dbg_sched, "filp=%p sched=%p", filp, sched); @@ -414,6 +455,18 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL: err = gk20a_sched_dev_ioctl_unlock_control(sched); break; + case NVGPU_SCHED_IOCTL_GET_API_VERSION: + err = gk20a_sched_dev_ioctl_get_api_version(sched, + (struct nvgpu_sched_api_version_args *)buf); + break; + case NVGPU_SCHED_IOCTL_GET_TSG: + err = gk20a_sched_dev_ioctl_get_tsg(sched, + (struct nvgpu_sched_tsg_refcount_args *)buf); + break; + case NVGPU_SCHED_IOCTL_PUT_TSG: + err = gk20a_sched_dev_ioctl_put_tsg(sched, + (struct nvgpu_sched_tsg_refcount_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); @@ -436,9 +489,21 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, int gk20a_sched_dev_release(struct inode *inode, struct file *filp) { struct gk20a_sched_ctrl *sched = filp->private_data; + struct gk20a *g = sched->g; + struct fifo_gk20a *f = &g->fifo; + struct tsg_gk20a *tsg; + int tsgid; gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched); + /* release any reference to TSGs */ + for (tsgid = 0; tsgid < f->num_channels; tsgid++) { + if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { + tsg = &f->tsg[tsgid]; + kref_put(&tsg->refcount, gk20a_tsg_release); + } + } + /* unlock control */ mutex_lock(&sched->control_lock); sched->control_locked = false; @@ -569,11 +634,15 @@ int gk20a_sched_ctrl_init(struct gk20a *g) sched->active_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); if (!sched->active_tsg_bitmap) - goto fail_active; + return -ENOMEM; sched->recent_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); if (!sched->recent_tsg_bitmap) - goto fail_recent; + goto free_active; + + sched->ref_tsg_bitmap = kzalloc(sched->bitmap_size, GFP_KERNEL); + if (!sched->ref_tsg_bitmap) + goto free_recent; init_waitqueue_head(&sched->readout_wq); mutex_init(&sched->status_lock); @@ -584,10 +653,12 @@ int gk20a_sched_ctrl_init(struct gk20a *g) return 0; -fail_recent: +free_recent: + kfree(sched->recent_tsg_bitmap); + +free_active: kfree(sched->active_tsg_bitmap); -fail_active: return -ENOMEM; } @@ -597,7 +668,9 @@ void gk20a_sched_ctrl_cleanup(struct gk20a *g) kfree(sched->active_tsg_bitmap); kfree(sched->recent_tsg_bitmap); + kfree(sched->ref_tsg_bitmap); sched->active_tsg_bitmap = NULL; sched->recent_tsg_bitmap = NULL; + sched->ref_tsg_bitmap = NULL; sched->sw_ready = false; } diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h index 8f533056..0ae13783 100644 --- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.h @@ -32,6 +32,7 @@ struct gk20a_sched_ctrl { size_t bitmap_size; u64 *active_tsg_bitmap; u64 *recent_tsg_bitmap; + u64 *ref_tsg_bitmap; wait_queue_head_t readout_wq; }; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index af8f0f7b..3e83cd06 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -28,8 +28,6 @@ #define NVGPU_TSG_MIN_TIMESLICE_US 1000 #define NVGPU_TSG_MAX_TIMESLICE_US 50000 -static void gk20a_tsg_release(struct kref *ref); - bool gk20a_is_channel_marked_as_tsg(struct channel_gk20a *ch) { return !(ch->tsgid == NVGPU_INVALID_TSG_ID); @@ -449,7 +447,7 @@ int gk20a_tsg_dev_open(struct inode *inode, struct file *filp) return ret; } -static void gk20a_tsg_release(struct kref *ref) +void gk20a_tsg_release(struct kref *ref) { struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount); struct gk20a *g = tsg->g; diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h index 2819dd1c..e1960102 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h @@ -23,6 +23,7 @@ bool gk20a_is_channel_marked_as_tsg(struct channel_gk20a *ch); int gk20a_tsg_dev_release(struct inode *inode, struct file *filp); int gk20a_tsg_dev_open(struct inode *inode, struct file *filp); +void gk20a_tsg_release(struct kref *ref); int gk20a_tsg_open(struct gk20a *g, struct file *filp); long gk20a_tsg_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 17604d32..aa950dfa 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1576,6 +1576,14 @@ struct nvgpu_sched_tsg_runlist_interleave_args { __u32 runlist_interleave; }; +struct nvgpu_sched_api_version_args { + __u32 version; +}; + +struct nvgpu_sched_tsg_refcount_args { + __u32 tsgid; /* in: TSG identifier */ +}; + #define NVGPU_SCHED_IOCTL_GET_TSGS \ _IOWR(NVGPU_SCHED_IOCTL_MAGIC, 1, \ struct nvgpu_sched_get_tsgs_args) @@ -1598,9 +1606,17 @@ struct nvgpu_sched_tsg_runlist_interleave_args { _IO(NVGPU_SCHED_IOCTL_MAGIC, 7) #define NVGPU_SCHED_IOCTL_UNLOCK_CONTROL \ _IO(NVGPU_SCHED_IOCTL_MAGIC, 8) - +#define NVGPU_SCHED_IOCTL_GET_API_VERSION \ + _IOR(NVGPU_SCHED_IOCTL_MAGIC, 9, \ + struct nvgpu_sched_api_version_args) +#define NVGPU_SCHED_IOCTL_GET_TSG \ + _IOW(NVGPU_SCHED_IOCTL_MAGIC, 10, \ + struct nvgpu_sched_tsg_refcount_args) +#define NVGPU_SCHED_IOCTL_PUT_TSG \ + _IOW(NVGPU_SCHED_IOCTL_MAGIC, 11, \ + struct nvgpu_sched_tsg_refcount_args) #define NVGPU_SCHED_IOCTL_LAST \ - _IOC_NR(NVGPU_SCHED_IOCTL_UNLOCK_CONTROL) + _IOC_NR(NVGPU_SCHED_IOCTL_PUT_TSG) #define NVGPU_SCHED_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_sched_tsg_get_params_args) @@ -1620,4 +1636,6 @@ struct nvgpu_sched_event_arg { __u64 status; }; +#define NVGPU_SCHED_API_VERSION 1 + #endif -- cgit v1.2.2