From 63e8592e06939e20c7b9e56b430353ebbee31ad6 Mon Sep 17 00:00:00 2001 From: Sachit Kadle Date: Mon, 22 Aug 2016 18:06:30 -0700 Subject: gpu: nvgpu: use inplace allocation in sync framework This change is the first of a series of changes to support the usage of pre-allocated job tracking resources in the submit path. With this change, we still maintain a dynamically-allocated joblist, but make the necessary changes in the channel_sync & fence framework to use in-place allocations. Specifically, we: 1) Update channel sync framework routines to take in pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather than dynamically allocating themselves 2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s) to gk20a_submit_prepare_syncs 3) Modify fence framework to have seperate allocation and init APIs. We expose allocation as a seperate API, so the client can allocate the object before passing it into the channel sync framework. 4) Fix clean_up logic in channel sync framework Bug 1795076 Change-Id: I96db457683cd207fd029c31c45f548f98055e844 Signed-off-by: Sachit Kadle Reviewed-on: http://git-master/r/1206725 (cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624) Reviewed-on: http://git-master/r/1223933 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 156 ++++++++++++++++--------- drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 168 +++++++++++++++------------ drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 20 ++-- drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 88 +++++++++----- drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 15 ++- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 + 7 files changed, 275 insertions(+), 175 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index f60a92b4..4019721a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c) /* allocate a cmd buffer with given size. size is number of u32 entries */ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, - struct priv_cmd_entry **entry) + struct priv_cmd_entry *e) { struct priv_cmd_queue *q = &c->priv_cmd_q; - struct priv_cmd_entry *e; u32 free_count; u32 size = orig_size; gk20a_dbg_fn("size %d", orig_size); - *entry = NULL; + if (!e) { + gk20a_err(dev_from_gk20a(c->g), + "ch %d: priv cmd entry is null", + c->hw_chid); + return -EINVAL; + } /* if free space in the end is less than requested, increase the size * to make the real allocated space start from beginning. */ @@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, if (size > free_count) return -EAGAIN; - e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); - if (!e) { - gk20a_err(dev_from_gk20a(c->g), - "ch %d: fail to allocate priv cmd entry", - c->hw_chid); - return -ENOMEM; - } - e->size = orig_size; e->mem = &q->mem; @@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size, /* we already handled q->put + size > q->size so BUG_ON this */ BUG_ON(q->put > q->size); - *entry = e; + /* commit the previous writes before making the entry valid */ + wmb(); + e->valid = true; gk20a_dbg_fn("done"); return 0; @@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c, kfree(e); } +static struct channel_gk20a_job *channel_gk20a_alloc_job( + struct channel_gk20a *c) +{ + struct channel_gk20a_job *job = NULL; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + return job; +} + +static void channel_gk20a_free_job(struct channel_gk20a *c, + struct channel_gk20a_job *job) +{ + kfree(job); +} + int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, struct nvgpu_alloc_gpfifo_args *args) { @@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) if (!e) return 0; - if ((q->get != e->off) && e->off != 0) - gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid); + if (e->valid) { + /* read the entry's valid flag before reading its contents */ + rmb(); + if ((q->get != e->off) && e->off != 0) + gk20a_err(d, "requests out-of-order, ch=%d\n", + c->hw_chid); + q->get = e->off + e->size; + } - q->get = e->off + e->size; free_priv_cmdbuf(c, e); return 0; @@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, } static int gk20a_channel_add_job(struct channel_gk20a *c, - struct gk20a_fence *pre_fence, - struct gk20a_fence *post_fence, - struct priv_cmd_entry *wait_cmd, - struct priv_cmd_entry *incr_cmd, + struct channel_gk20a_job *job, bool skip_buffer_refcounting) { struct vm_gk20a *vm = c->vm; - struct channel_gk20a_job *job = NULL; struct mapped_buffer_node **mapped_buffers = NULL; int err = 0, num_mapped_buffers = 0; @@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, goto err_put_vm; } - job = kzalloc(sizeof(*job), GFP_KERNEL); - if (!job) { - err = -ENOMEM; - goto err_put_buffers; - } - /* put() is done in gk20a_channel_update() when the job is done */ c = gk20a_channel_get(c); if (c) { job->num_mapped_buffers = num_mapped_buffers; job->mapped_buffers = mapped_buffers; - job->pre_fence = pre_fence; - job->post_fence = post_fence; - job->wait_cmd = wait_cmd; - job->incr_cmd = incr_cmd; gk20a_channel_timeout_start(c, job); @@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, spin_unlock(&c->jobs_lock); } else { err = -ETIMEDOUT; - goto err_free_job; + goto err_put_buffers; } return 0; -err_free_job: - kfree(job); err_put_buffers: gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); err_put_vm: @@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) list_del_init(&job->list); spin_unlock(&c->jobs_lock); - kfree(job); + channel_gk20a_free_job(c, job); job_finished = 1; gk20a_idle(g->dev); } @@ -2143,6 +2145,7 @@ out: */ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, struct nvgpu_fence *fence, + struct channel_gk20a_job *job, struct priv_cmd_entry **wait_cmd, struct priv_cmd_entry **incr_cmd, struct gk20a_fence **pre_fence, @@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, * this condition. */ if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { + job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry), + GFP_KERNEL); + job->pre_fence = gk20a_alloc_fence(c); + + if (!job->wait_cmd || !job->pre_fence) { + err = -ENOMEM; + goto clean_up_pre_fence; + } + if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { wait_fence_fd = fence->id; err = c->sync->wait_fd(c->sync, wait_fence_fd, - wait_cmd, pre_fence); + job->wait_cmd, job->pre_fence); } else { err = c->sync->wait_syncpt(c->sync, fence->id, - fence->value, wait_cmd, - pre_fence); + fence->value, job->wait_cmd, + job->pre_fence); } + + if (!err) { + if (job->wait_cmd->valid) + *wait_cmd = job->wait_cmd; + *pre_fence = job->pre_fence; + } else + goto clean_up_pre_fence; } - if (err) - goto fail; if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) && (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) @@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, * is used to keep track of method completion for idle railgating. The * sync_pt/semaphore PB is added to the GPFIFO later on in submit. */ + job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL); + job->post_fence = gk20a_alloc_fence(c); + + if (!job->incr_cmd || !job->post_fence) { + err = -ENOMEM; + goto clean_up_post_fence; + } + if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) - err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd, - post_fence, need_wfi, need_sync_fence); + err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, + job->post_fence, need_wfi, need_sync_fence); else - err = c->sync->incr(c->sync, incr_cmd, - post_fence, need_sync_fence); - if (err) - goto fail; + err = c->sync->incr(c->sync, job->incr_cmd, + job->post_fence, need_sync_fence); + if (!err) { + *incr_cmd = job->incr_cmd; + *post_fence = job->post_fence; + } else + goto clean_up_post_fence; return 0; +clean_up_post_fence: + gk20a_free_priv_cmdbuf(c, job->incr_cmd); + gk20a_fence_put(job->post_fence); + job->incr_cmd = NULL; + job->post_fence = NULL; +clean_up_pre_fence: + gk20a_free_priv_cmdbuf(c, job->wait_cmd); + gk20a_fence_put(job->pre_fence); + job->wait_cmd = NULL; + job->pre_fence = NULL; + *wait_cmd = NULL; + *pre_fence = NULL; fail: - /* - * Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the - * real owner of the objects we make here. - */ return err; } @@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_fence *pre_fence = NULL; struct gk20a_fence *post_fence = NULL; + struct channel_gk20a_job *job = NULL; /* we might need two extra gpfifo entries - one for pre fence * and one for post fence. */ const int extra_entries = 2; @@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, } if (need_job_tracking) { - err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd, + job = channel_gk20a_alloc_job(c); + if (!job) { + err = -ENOMEM; + goto clean_up; + } + + err = gk20a_submit_prepare_syncs(c, fence, job, + &wait_cmd, &incr_cmd, &pre_fence, &post_fence, force_need_sync_fence, flags); if (err) - goto clean_up; + goto clean_up_job; } if (wait_cmd) @@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo, num_entries); if (err) - goto clean_up; + goto clean_up_job; /* * And here's where we add the incr_cmd we generated earlier. It should @@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, if (need_job_tracking) /* TODO! Check for errors... */ - gk20a_channel_add_job(c, pre_fence, post_fence, - wait_cmd, incr_cmd, - skip_buffer_refcounting); + gk20a_channel_add_job(c, job, skip_buffer_refcounting); g->ops.fifo.userd_gp_put(g, c); @@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, gk20a_dbg_fn("done"); return err; +clean_up_job: + channel_gk20a_free_job(c, job); clean_up: gk20a_dbg_fn("fail"); - free_priv_cmdbuf(c, wait_cmd); - free_priv_cmdbuf(c, incr_cmd); gk20a_fence_put(pre_fence); gk20a_fence_put(post_fence); if (need_job_tracking) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index f6571b6f..0d8746b8 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -218,7 +218,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch); void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error); void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events); int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size, - struct priv_cmd_entry **entry); + struct priv_cmd_entry *entry); int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 7a71c4eb..767738ea 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -57,12 +57,11 @@ static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off, } static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, - u32 id, u32 thresh, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd, + struct gk20a_fence *fence) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); - struct priv_cmd_entry *wait_cmd = NULL; struct channel_gk20a *c = sp->c; int err = 0; @@ -75,7 +74,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh)) return 0; - err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); @@ -84,21 +83,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, add_wait_cmd(c->g, wait_cmd, 0, id, thresh); - *entry = wait_cmd; - *fence = NULL; return 0; } static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct priv_cmd_entry *wait_cmd, + struct gk20a_fence *fence) { #ifdef CONFIG_SYNC int i; int num_wait_cmds; struct sync_fence *sync_fence; struct sync_pt *pt; - struct priv_cmd_entry *wait_cmd = NULL; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; @@ -134,7 +130,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, return 0; } - err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); @@ -172,8 +168,6 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, WARN_ON(i != num_wait_cmds); sync_fence_put(sync_fence); - *entry = wait_cmd; - *fence = NULL; return 0; #else return -ENODEV; @@ -193,15 +187,14 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed) static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, bool wfi_cmd, bool register_irq, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *incr_cmd, + struct gk20a_fence *fence, bool need_sync_fence) { u32 thresh; int incr_cmd_size; int off; int err; - struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); struct channel_gk20a *c = sp->c; @@ -210,7 +203,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, if (wfi_cmd) incr_cmd_size += 2; - err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); if (err) return err; @@ -267,15 +260,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, } } - *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, + err = gk20a_fence_from_syncpt(fence, sp->host1x_pdev, sp->id, thresh, wfi_cmd, need_sync_fence); - *entry = incr_cmd; + if (err) + goto clean_up_priv_cmd; + return 0; + +clean_up_priv_cmd: + gk20a_free_priv_cmdbuf(c, incr_cmd); + return err; } static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct priv_cmd_entry *entry, + struct gk20a_fence *fence) { return __gk20a_channel_syncpt_incr(s, true /* wfi */, @@ -284,8 +283,8 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s, } static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning @@ -298,8 +297,8 @@ static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, int wait_fence_fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool wfi, bool need_sync_fence) { @@ -500,8 +499,8 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, static int gk20a_channel_semaphore_wait_syncpt( struct gk20a_channel_sync *s, u32 id, - u32 thresh, struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + u32 thresh, struct priv_cmd_entry *entry, + struct gk20a_fence *fence) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); @@ -525,7 +524,7 @@ static int gk20a_channel_semaphore_wait_syncpt( */ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, struct sync_fence *fence, - struct priv_cmd_entry **wait_cmd, + struct priv_cmd_entry *wait_cmd, struct gk20a_semaphore **fp_sema) { struct gk20a_semaphore *sema; @@ -551,7 +550,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, gk20a_semaphore_get(sema); BUG_ON(!atomic_read(&sema->value)); - add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false); + add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false); /* * Make sure that gk20a_channel_semaphore_wait_fd() can create another @@ -565,8 +564,8 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c, static int gk20a_channel_semaphore_wait_fd( struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct priv_cmd_entry *entry, + struct gk20a_fence *fence) { struct gk20a_channel_semaphore *sema = container_of(s, struct gk20a_channel_semaphore, ops); @@ -574,7 +573,7 @@ static int gk20a_channel_semaphore_wait_fd( #ifdef CONFIG_SYNC struct gk20a_semaphore *fp_sema; struct sync_fence *sync_fence; - struct priv_cmd_entry *wait_cmd = NULL; + struct priv_cmd_entry *wait_cmd = entry; struct wait_fence_work *w = NULL; int err, ret, status; @@ -582,19 +581,24 @@ static int gk20a_channel_semaphore_wait_fd( if (!sync_fence) return -EINVAL; - ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema); + ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema); if (ret == 0) { - if (fp_sema) - *fence = gk20a_fence_from_semaphore(sema->timeline, - fp_sema, - &c->semaphore_wq, - NULL, false, false); - else + if (fp_sema) { + err = gk20a_fence_from_semaphore(fence, + sema->timeline, + fp_sema, + &c->semaphore_wq, + NULL, false, false); + if (err) { + gk20a_semaphore_put(fp_sema); + goto clean_up_priv_cmd; + } + } else /* - * Allocate an empty fence. It will instantly return + * Init an empty fence. It will instantly return * from gk20a_fence_wait(). */ - *fence = gk20a_alloc_fence(NULL, NULL, false); + gk20a_init_fence(fence, NULL, NULL, false); sync_fence_put(sync_fence); goto skip_slow_path; @@ -611,18 +615,17 @@ static int gk20a_channel_semaphore_wait_fd( goto skip_slow_path; } - err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); - sync_fence_put(sync_fence); - return -ENOMEM; + goto clean_up_sync_fence; } w = kzalloc(sizeof(*w), GFP_KERNEL); if (!w) { err = -ENOMEM; - goto fail_free_cmdbuf; + goto clean_up_priv_cmd; } sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); @@ -631,7 +634,7 @@ static int gk20a_channel_semaphore_wait_fd( if (!w->sema) { gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); err = -ENOMEM; - goto fail_free_worker; + goto clean_up_worker; } /* worker takes one reference */ @@ -641,6 +644,16 @@ static int gk20a_channel_semaphore_wait_fd( /* GPU unblocked when the semaphore value increments. */ add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); + /* + * We need to create the fence before adding the waiter to ensure + * that we properly clean up in the event the sync_fence has + * already signaled + */ + err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema, + &c->semaphore_wq, NULL, false, false); + if (err) + goto clean_up_sema; + ret = sync_fence_wait_async(sync_fence, &w->waiter); /* @@ -655,24 +668,22 @@ static int gk20a_channel_semaphore_wait_fd( gk20a_semaphore_put(w->sema); } - /* XXX - this fixes an actual bug, we need to hold a ref to this - semaphore while the job is in flight. */ - *fence = gk20a_fence_from_semaphore(sema->timeline, w->sema, - &c->semaphore_wq, - NULL, false, false); - skip_slow_path: - *entry = wait_cmd; return 0; -fail_free_worker: - if (w && w->sema) - gk20a_semaphore_put(w->sema); +clean_up_sema: + /* + * Release the refs to the semaphore, including + * the one for the worker since it will never run. + */ + gk20a_semaphore_put(w->sema); + gk20a_semaphore_put(w->sema); +clean_up_worker: kfree(w); +clean_up_priv_cmd: + gk20a_free_priv_cmdbuf(c, entry); +clean_up_sync_fence: sync_fence_put(sync_fence); -fail_free_cmdbuf: - if (wait_cmd) - gk20a_free_priv_cmdbuf(c, wait_cmd); return err; #else gk20a_err(dev_from_gk20a(c->g), @@ -684,12 +695,11 @@ fail_free_cmdbuf: static int __gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, bool wfi_cmd, struct sync_fence *dependency, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *incr_cmd, + struct gk20a_fence *fence, bool need_sync_fence) { int incr_cmd_size; - struct priv_cmd_entry *incr_cmd = NULL; struct gk20a_channel_semaphore *sp = container_of(s, struct gk20a_channel_semaphore, ops); struct channel_gk20a *c = sp->c; @@ -704,29 +714,37 @@ static int __gk20a_channel_semaphore_incr( } incr_cmd_size = 10; - err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); + err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd); if (err) { gk20a_err(dev_from_gk20a(c->g), "not enough priv cmd buffer space"); - gk20a_semaphore_put(semaphore); - return err; + goto clean_up_sema; } /* Release the completion semaphore. */ add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd); - *fence = gk20a_fence_from_semaphore(sp->timeline, semaphore, - &c->semaphore_wq, - dependency, wfi_cmd, - need_sync_fence); - *entry = incr_cmd; + err = gk20a_fence_from_semaphore(fence, + sp->timeline, semaphore, + &c->semaphore_wq, + dependency, wfi_cmd, + need_sync_fence); + if (err) + goto clean_up_priv_cmd; + return 0; + +clean_up_priv_cmd: + gk20a_free_priv_cmdbuf(c, incr_cmd); +clean_up_sema: + gk20a_semaphore_put(semaphore); + return err; } static int gk20a_channel_semaphore_incr_wfi( struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence) + struct priv_cmd_entry *entry, + struct gk20a_fence *fence) { return __gk20a_channel_semaphore_incr(s, true /* wfi */, @@ -736,8 +754,8 @@ static int gk20a_channel_semaphore_incr_wfi( static int gk20a_channel_semaphore_incr( struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool need_sync_fence) { /* Don't put wfi cmd to this one since we're not returning @@ -751,8 +769,8 @@ static int gk20a_channel_semaphore_incr( static int gk20a_channel_semaphore_incr_user( struct gk20a_channel_sync *s, int wait_fence_fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool wfi, bool need_sync_fence) { diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 4b0918de..c3a92ad2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -36,8 +36,8 @@ struct gk20a_channel_sync { * cmdbuf is executed. */ int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence); + struct priv_cmd_entry *entry, + struct gk20a_fence *fence); /* Generate a gpu wait cmdbuf from sync fd. * Returns @@ -46,8 +46,8 @@ struct gk20a_channel_sync { * cmdbuf is executed. */ int (*wait_fd)(struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence); + struct priv_cmd_entry *entry, + struct gk20a_fence *fence); /* Increment syncpoint/semaphore. * Returns @@ -55,8 +55,8 @@ struct gk20a_channel_sync { * - a fence that can be passed to wait_cpu() and is_expired(). */ int (*incr)(struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool need_sync_fence); /* Increment syncpoint/semaphore, preceded by a wfi. @@ -65,8 +65,8 @@ struct gk20a_channel_sync { * - a fence that can be passed to wait_cpu() and is_expired(). */ int (*incr_wfi)(struct gk20a_channel_sync *s, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence); + struct priv_cmd_entry *entry, + struct gk20a_fence *fence); /* Increment syncpoint/semaphore, so that the returned fence represents * work completion (may need wfi) and can be returned to user space. @@ -77,8 +77,8 @@ struct gk20a_channel_sync { */ int (*incr_user)(struct gk20a_channel_sync *s, int wait_fence_fd, - struct priv_cmd_entry **entry, - struct gk20a_fence **fence, + struct priv_cmd_entry *entry, + struct gk20a_fence *fence, bool wfi, bool need_sync_fence); diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 596dc549..f788829f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f) return f; } +static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) +{ + bool valid = f->valid; + + rmb(); + return valid; +} + int gk20a_fence_wait(struct gk20a_fence *f, int timeout) { - if (!tegra_platform_is_silicon()) - timeout = (u32)MAX_SCHEDULE_TIMEOUT; - return f->ops->wait(f, timeout); + if (f && gk20a_fence_is_valid(f)) { + if (!tegra_platform_is_silicon()) + timeout = (u32)MAX_SCHEDULE_TIMEOUT; + return f->ops->wait(f, timeout); + } + return 0; } bool gk20a_fence_is_expired(struct gk20a_fence *f) { - if (f && f->ops) + if (f && gk20a_fence_is_valid(f) && f->ops) return f->ops->is_expired(f); else return true; @@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) #ifdef CONFIG_SYNC int fd; - if (!f->sync_fence) + if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence) return -EINVAL; fd = get_unused_fd_flags(O_RDWR); @@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f) #endif } -struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, - struct sync_fence *sync_fence, bool wfi) +struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) { - struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) + struct gk20a_fence *fence; + + fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL); + if (!fence) return NULL; - kref_init(&f->ref); + + kref_init(&fence->ref); + return fence; +} + +void gk20a_init_fence(struct gk20a_fence *f, + const struct gk20a_fence_ops *ops, + struct sync_fence *sync_fence, bool wfi) +{ + if (!f) + return; f->ops = ops; f->sync_fence = sync_fence; f->wfi = wfi; f->syncpt_id = -1; - return f; } /* Fences that are backed by GPU semaphores: */ @@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = { }; /* This function takes ownership of the semaphore */ -struct gk20a_fence *gk20a_fence_from_semaphore( +int gk20a_fence_from_semaphore( + struct gk20a_fence *fence_out, struct sync_timeline *timeline, struct gk20a_semaphore *semaphore, wait_queue_head_t *semaphore_wq, struct sync_fence *dependency, bool wfi, bool need_sync_fence) { - struct gk20a_fence *f; + struct gk20a_fence *f = fence_out; struct sync_fence *sync_fence = NULL; #ifdef CONFIG_SYNC @@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore( dependency, "f-gk20a-0x%04x", gk20a_semaphore_gpu_ro_va(semaphore)); if (!sync_fence) - return NULL; + return -1; } #endif - f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi); + gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi); if (!f) { #ifdef CONFIG_SYNC sync_fence_put(sync_fence); #endif - return NULL; + return -EINVAL; } f->semaphore = semaphore; f->semaphore_wq = semaphore_wq; - return f; + + /* commit previous writes before setting the valid flag */ + wmb(); + f->valid = true; + + return 0; } #ifdef CONFIG_TEGRA_GK20A @@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { .is_expired = &gk20a_syncpt_fence_is_expired, }; -struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, - u32 id, u32 value, bool wfi, - bool need_sync_fence) +int gk20a_fence_from_syncpt( + struct gk20a_fence *fence_out, + struct platform_device *host1x_pdev, + u32 id, u32 value, bool wfi, + bool need_sync_fence) { - struct gk20a_fence *f; + struct gk20a_fence *f = fence_out; struct sync_fence *sync_fence = NULL; #ifdef CONFIG_SYNC @@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, "fence"); if (IS_ERR(sync_fence)) - return NULL; + return -1; } #endif - f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); + gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi); if (!f) { #ifdef CONFIG_SYNC if (sync_fence) sync_fence_put(sync_fence); #endif - return NULL; + return -EINVAL; } f->host1x_pdev = host1x_pdev; f->syncpt_id = id; f->syncpt_value = value; - return f; + + /* commit previous writes before setting the valid flag */ + wmb(); + f->valid = true; + + return 0; } #else -struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, +int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, u32 id, u32 value, bool wfi) { - return NULL; + return -EINVAL; } #endif diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index 35488ea3..3fe2d8b2 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h @@ -31,6 +31,7 @@ struct gk20a_fence_ops; struct gk20a_fence { /* Valid for all fence types: */ + bool valid; struct kref ref; bool wfi; struct sync_fence *sync_fence; @@ -47,21 +48,25 @@ struct gk20a_fence { }; /* Fences can be created from semaphores or syncpoint (id, value) pairs */ -struct gk20a_fence *gk20a_fence_from_semaphore( +int gk20a_fence_from_semaphore( + struct gk20a_fence *fence_out, struct sync_timeline *timeline, struct gk20a_semaphore *semaphore, wait_queue_head_t *semaphore_wq, struct sync_fence *dependency, bool wfi, bool need_sync_fence); -struct gk20a_fence *gk20a_fence_from_syncpt( +int gk20a_fence_from_syncpt( + struct gk20a_fence *fence_out, struct platform_device *host1x_pdev, u32 id, u32 value, bool wfi, bool need_sync_fence); -struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops, - struct sync_fence *sync_fence, - bool wfi); +struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c); + +void gk20a_init_fence(struct gk20a_fence *f, + const struct gk20a_fence_ops *ops, + struct sync_fence *sync_fence, bool wfi); /* Fence operations */ void gk20a_fence_put(struct gk20a_fence *f); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b34ff4a7..b2cca072 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -198,6 +198,7 @@ struct priv_cmd_queue { }; struct priv_cmd_entry { + bool valid; struct mem_desc *mem; u32 off; /* offset in mem, in u32 entries */ u64 gva; -- cgit v1.2.2