gpu: nvgpu: make deferred clean-up conditional

This change makes the invocation of the deferred job clean-up mechanism conditional. For submissions that require job tracking, deferred clean-up is only required if any of the following conditions are met: 1) Channel's deterministic flag is not set 2) Rail-gating is enabled 3) Channel WDT is enabled 4) Buffer refcounting is enabled 5) Dependency on Sync Framework In case deferred clean-up is not needed, we clean-up a single job tracking resource in the submit path. For deterministic channels, we do not allow deferred clean-up to occur and fail any submits that require it. Bug 1795076 Change-Id: I4021dffe8a71aa58f12db6b58518d3f4021f3313 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1220920 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> (cherry picked from commit b09f7589d5ad3c496e7350f1ed583a4fe2db574a) Reviewed-on: http://git-master/r/1223941 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
author: Sachit Kadle <skadle@nvidia.com> 2016-09-14 14:45:38 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-10-21 14:23:53 -0400
commit: ab593b9ccd3132404406ef6340348fdb28c23bf0 (patch)
tree: 394624e853fdb5e7fbe380c1ee8f4f2fca9fe24d /drivers/gpu/nvgpu
parent: 55dba9f1a91a6f94fb63090880ae28eedf4189a2 (diff)
5 files changed, 117 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 6d4b4f60..f839bfbc 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -76,7 +76,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
 static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
-static void gk20a_channel_clean_up_jobs(struct work_struct *work);
+static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
+                                        bool clean_all);
 static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
                                bool wait_for_completion);
@@ -1029,6 +1030,7 @@ unbind:
        g->ops.fifo.free_inst(g, ch);
        ch->vpr = false;
+        ch->deterministic = false;
        ch->vm = NULL;
        WARN_ON(ch->sync);
@@ -1703,9 +1705,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
        gpfifo_size = args->num_entries;
-        if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
+        if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
                c->vpr = true;
+        if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
+                c->deterministic = true;
        /* an address space needs to have been bound at this point. */
        if (!gk20a_channel_as_bound(c)) {
                gk20a_err(d,
@@ -2173,10 +2178,17 @@ err_put_vm:
        return err;
 }
-static void gk20a_channel_clean_up_jobs(struct work_struct *work)
+static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work)
 {
        struct channel_gk20a *c = container_of(to_delayed_work(work),
                        struct channel_gk20a, clean_up.wq);
+        gk20a_channel_clean_up_jobs(c, true);
+}
+static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
+                                        bool clean_all)
+{
        struct vm_gk20a *vm;
        struct channel_gk20a_job *job;
        struct gk20a_platform *platform;
@@ -2273,6 +2285,9 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
                channel_gk20a_free_job(c, job);
                job_finished = 1;
                gk20a_idle(g->dev);
+                if (!clean_all)
+                        break;
        }
        if (job_finished && c->update_fn)
@@ -2419,6 +2434,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
                                      struct gk20a_fence **pre_fence,
                                      struct gk20a_fence **post_fence,
                                      bool force_need_sync_fence,
+                                      bool register_irq,
                                      u32 flags)
 {
        struct gk20a *g = c->g;
@@ -2515,10 +2531,12 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
        if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
                err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
-                                 job->post_fence, need_wfi, need_sync_fence);
+                                 job->post_fence, need_wfi, need_sync_fence,
+                                 register_irq);
        else
                err = c->sync->incr(c->sync, job->incr_cmd,
-                                    job->post_fence, need_sync_fence);
+                                    job->post_fence, need_sync_fence,
+                                    register_irq);
        if (!err) {
                *incr_cmd = job->incr_cmd;
                *post_fence = job->post_fence;
@@ -2568,6 +2586,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                        NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
        int err = 0;
        bool need_job_tracking;
+        bool need_deferred_cleanup = false;
        struct nvgpu_gpfifo __user *user_gpfifo = args ?
                (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
        struct gk20a_platform *platform = gk20a_get_platform(d);
@@ -2626,13 +2645,48 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                        !skip_buffer_refcounting;
        if (need_job_tracking) {
+                bool need_sync_framework = false;
                /*
-                 * If the submit is to have deterministic latency and
+                 * If the channel is to have deterministic latency and
                 * job tracking is required, the channel must have
                 * pre-allocated resources. Otherwise, we fail the submit here
                 */
-                if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC) &&
+                if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
-                                !channel_gk20a_is_prealloc_enabled(c))
+                        return -EINVAL;
+                need_sync_framework = force_need_sync_fence ||
+                        gk20a_channel_sync_needs_sync_framework(c) ||
+                        (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
+                        (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
+                         flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
+                /*
+                 * Deferred clean-up is necessary for any of the following
+                 * conditions:
+                 * - channel's deterministic flag is not set
+                 * - dependency on sync framework, which could make the
+                 *   behavior of the clean-up operation non-deterministic
+                 *   (should not be performed in the submit path)
+                 * - channel wdt
+                 * - GPU rail-gating
+                 * - buffer refcounting
+                 *
+                 * If none of the conditions are met, then deferred clean-up
+                 * is not required, and we clean-up one job-tracking
+                 * resource in the submit path.
+                 */
+                need_deferred_cleanup = !c->deterministic ||
+                                        need_sync_framework ||
+                                        c->wdt_enabled ||
+                                        platform->can_railgate ||
+                                        !skip_buffer_refcounting;
+                /*
+                 * For deterministic channels, we don't allow deferred clean_up
+                 * processing to occur. In cases we hit this, we fail the submit
+                 */
+                if (c->deterministic && need_deferred_cleanup)
                        return -EINVAL;
                /* gk20a_channel_update releases this ref. */
@@ -2641,6 +2695,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                        gk20a_err(d, "failed to host gk20a to submit gpfifo");
                        return err;
                }
+                if (!need_deferred_cleanup) {
+                        /* clean up a single job */
+                        gk20a_channel_clean_up_jobs(c, false);
+                }
        }
        trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
@@ -2678,7 +2737,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
                err = gk20a_submit_prepare_syncs(c, fence, job,
                                                 &wait_cmd, &incr_cmd,
                                                 &pre_fence, &post_fence,
-                                                 force_need_sync_fence, flags);
+                                                 force_need_sync_fence,
+                                                 need_deferred_cleanup,
+                                                 flags);
                if (err)
                        goto clean_up_job;
        }
@@ -2727,7 +2788,7 @@ clean_up:
        gk20a_dbg_fn("fail");
        gk20a_fence_put(pre_fence);
        gk20a_fence_put(post_fence);
-        if (need_job_tracking)
+        if (need_deferred_cleanup)
                gk20a_idle(g->dev);
        return err;
 }
@@ -2749,7 +2810,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
        raw_spin_lock_init(&c->timeout.lock);
        mutex_init(&c->sync_lock);
        INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
-        INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
+        INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
        mutex_init(&c->clean_up.lock);
        INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -3416,10 +3477,10 @@ long gk20a_channel_ioctl(struct file *filp,
                if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
                        err = -EINVAL;
+                        gk20a_idle(dev);
                        break;
                }
-                err = gk20a_alloc_channel_gpfifo(ch,
+                err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args);
-                                (struct nvgpu_alloc_gpfifo_ex_args *)buf);
                gk20a_idle(dev);
                break;
        }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8cceb6b2..92b51cca 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -131,7 +131,7 @@ struct channel_gk20a {
        bool bound;
        bool first_init;
        bool vpr;
-        bool no_block;
+        bool deterministic;
        bool cde;
        pid_t pid;
        pid_t tgid;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 767738ea..febea719 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -285,13 +285,14 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
 static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
                              struct priv_cmd_entry *entry,
                              struct gk20a_fence *fence,
-                              bool need_sync_fence)
+                              bool need_sync_fence,
+                              bool register_irq)
 {
        /* Don't put wfi cmd to this one since we're not returning
         * a fence to user space. */
        return __gk20a_channel_syncpt_incr(s,
                        false /* no wfi */,
-                        true /* register irq */,
+                        register_irq /* register irq */,
                        entry, fence, need_sync_fence);
 }
@@ -300,13 +301,14 @@ static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
                                   struct priv_cmd_entry *entry,
                                   struct gk20a_fence *fence,
                                   bool wfi,
-                                   bool need_sync_fence)
+                                   bool need_sync_fence,
+                                   bool register_irq)
 {
        /* Need to do 'wfi + host incr' since we return the fence
         * to user space. */
        return __gk20a_channel_syncpt_incr(s,
                        wfi,
-                        true /* register irq */,
+                        register_irq /* register irq */,
                        entry, fence, need_sync_fence);
 }
@@ -756,7 +758,8 @@ static int gk20a_channel_semaphore_incr(
                struct gk20a_channel_sync *s,
                struct priv_cmd_entry *entry,
                struct gk20a_fence *fence,
-                bool need_sync_fence)
+                bool need_sync_fence,
+                bool register_irq)
 {
        /* Don't put wfi cmd to this one since we're not returning
         * a fence to user space. */
@@ -772,7 +775,8 @@ static int gk20a_channel_semaphore_incr_user(
                struct priv_cmd_entry *entry,
                struct gk20a_fence *fence,
                bool wfi,
-                bool need_sync_fence)
+                bool need_sync_fence,
+                bool register_irq)
 {
 #ifdef CONFIG_SYNC
        struct sync_fence *dependency = NULL;
@@ -889,3 +893,12 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
 #endif
        return gk20a_channel_semaphore_create(c);
 }
+bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
+{
+#ifdef CONFIG_TEGRA_GK20A
+        if (gk20a_platform_has_syncpoints(c->g->dev))
+                return false;
+#endif
+        return true;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index c3a92ad2..5e75dd9b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -57,7 +57,8 @@ struct gk20a_channel_sync {
        int (*incr)(struct gk20a_channel_sync *s,
                    struct priv_cmd_entry *entry,
                    struct gk20a_fence *fence,
-                    bool need_sync_fence);
+                    bool need_sync_fence,
+                    bool register_irq);
        /* Increment syncpoint/semaphore, preceded by a wfi.
         * Returns
@@ -80,7 +81,8 @@ struct gk20a_channel_sync {
                         struct priv_cmd_entry *entry,
                         struct gk20a_fence *fence,
                         bool wfi,
-                         bool need_sync_fence);
+                         bool need_sync_fence,
+                         bool register_irq);
        /* Reset the channel syncpoint/semaphore. */
        void (*set_min_eq_max)(struct gk20a_channel_sync *s);
@@ -99,5 +101,6 @@ struct gk20a_channel_sync {
 void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
 struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
+bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 7228f6f7..4673f28c 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -272,8 +272,24 @@ static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
 static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
 {
-        return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
-                                            f->syncpt_value);
+        /*
+         * In cases we don't register a notifier, we can't expect the
+         * syncpt value to be updated. For this case, we force a read
+         * of the value from HW, and then check for expiration.
+         */
+        if (!nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
+                                f->syncpt_value)) {
+                u32 val;
+                if (!nvhost_syncpt_read_ext_check(f->host1x_pdev,
+                                f->syncpt_id, &val)) {
+                        return nvhost_syncpt_is_expired_ext(f->host1x_pdev,
+                                        f->syncpt_id, f->syncpt_value);
+                }
+        }
+        return true;
 }
 static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
author	Sachit Kadle <skadle@nvidia.com>	2016-09-14 14:45:38 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-10-21 14:23:53 -0400
commit	ab593b9ccd3132404406ef6340348fdb28c23bf0 (patch)
tree	394624e853fdb5e7fbe380c1ee8f4f2fca9fe24d /drivers/gpu/nvgpu
parent	55dba9f1a91a6f94fb63090880ae28eedf4189a2 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 6d4b4f60..f839bfbc 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -76,7 +76,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
76		76
77	static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);	77	static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
78		78
79	static void gk20a_channel_clean_up_jobs(struct work_struct *work);	79	static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
		80	bool clean_all);
80	static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,	81	static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
81	bool wait_for_completion);	82	bool wait_for_completion);
82		83
@@ -1029,6 +1030,7 @@ unbind:
1029	g->ops.fifo.free_inst(g, ch);	1030	g->ops.fifo.free_inst(g, ch);
1030		1031
1031	ch->vpr = false;	1032	ch->vpr = false;
		1033	ch->deterministic = false;
1032	ch->vm = NULL;	1034	ch->vm = NULL;
1033		1035
1034	WARN_ON(ch->sync);	1036	WARN_ON(ch->sync);
@@ -1703,9 +1705,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1703		1705
1704	gpfifo_size = args->num_entries;	1706	gpfifo_size = args->num_entries;
1705		1707
1706	if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)	1708	if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
1707	c->vpr = true;	1709	c->vpr = true;
1708		1710
		1711	if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
		1712	c->deterministic = true;
		1713
1709	/* an address space needs to have been bound at this point. */	1714	/* an address space needs to have been bound at this point. */
1710	if (!gk20a_channel_as_bound(c)) {	1715	if (!gk20a_channel_as_bound(c)) {
1711	gk20a_err(d,	1716	gk20a_err(d,
@@ -2173,10 +2178,17 @@ err_put_vm:
2173	return err;	2178	return err;
2174	}	2179	}
2175		2180
2176	static void gk20a_channel_clean_up_jobs(struct work_struct *work)	2181	static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work)
2177	{	2182	{
2178	struct channel_gk20a *c = container_of(to_delayed_work(work),	2183	struct channel_gk20a *c = container_of(to_delayed_work(work),
2179	struct channel_gk20a, clean_up.wq);	2184	struct channel_gk20a, clean_up.wq);
		2185
		2186	gk20a_channel_clean_up_jobs(c, true);
		2187	}
		2188
		2189	static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
		2190	bool clean_all)
		2191	{
2180	struct vm_gk20a *vm;	2192	struct vm_gk20a *vm;
2181	struct channel_gk20a_job *job;	2193	struct channel_gk20a_job *job;
2182	struct gk20a_platform *platform;	2194	struct gk20a_platform *platform;
@@ -2273,6 +2285,9 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
2273	channel_gk20a_free_job(c, job);	2285	channel_gk20a_free_job(c, job);
2274	job_finished = 1;	2286	job_finished = 1;
2275	gk20a_idle(g->dev);	2287	gk20a_idle(g->dev);
		2288
		2289	if (!clean_all)
		2290	break;
2276	}	2291	}
2277		2292
2278	if (job_finished && c->update_fn)	2293	if (job_finished && c->update_fn)
@@ -2419,6 +2434,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2419	struct gk20a_fence **pre_fence,	2434	struct gk20a_fence **pre_fence,
2420	struct gk20a_fence **post_fence,	2435	struct gk20a_fence **post_fence,
2421	bool force_need_sync_fence,	2436	bool force_need_sync_fence,
		2437	bool register_irq,
2422	u32 flags)	2438	u32 flags)
2423	{	2439	{
2424	struct gk20a *g = c->g;	2440	struct gk20a *g = c->g;
@@ -2515,10 +2531,12 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2515		2531
2516	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)	2532	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
2517	err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,	2533	err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
2518	job->post_fence, need_wfi, need_sync_fence);	2534	job->post_fence, need_wfi, need_sync_fence,
		2535	register_irq);
2519	else	2536	else
2520	err = c->sync->incr(c->sync, job->incr_cmd,	2537	err = c->sync->incr(c->sync, job->incr_cmd,
2521	job->post_fence, need_sync_fence);	2538	job->post_fence, need_sync_fence,
		2539	register_irq);
2522	if (!err) {	2540	if (!err) {
2523	*incr_cmd = job->incr_cmd;	2541	*incr_cmd = job->incr_cmd;
2524	*post_fence = job->post_fence;	2542	*post_fence = job->post_fence;
@@ -2568,6 +2586,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2568	NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);	2586	NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
2569	int err = 0;	2587	int err = 0;
2570	bool need_job_tracking;	2588	bool need_job_tracking;
		2589	bool need_deferred_cleanup = false;
2571	struct nvgpu_gpfifo __user *user_gpfifo = args ?	2590	struct nvgpu_gpfifo __user *user_gpfifo = args ?
2572	(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;	2591	(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
2573	struct gk20a_platform *platform = gk20a_get_platform(d);	2592	struct gk20a_platform *platform = gk20a_get_platform(d);
@@ -2626,13 +2645,48 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2626	!skip_buffer_refcounting;	2645	!skip_buffer_refcounting;
2627		2646
2628	if (need_job_tracking) {	2647	if (need_job_tracking) {
		2648	bool need_sync_framework = false;
		2649
2629	/*	2650	/*
2630	* If the submit is to have deterministic latency and	2651	* If the channel is to have deterministic latency and
2631	* job tracking is required, the channel must have	2652	* job tracking is required, the channel must have
2632	* pre-allocated resources. Otherwise, we fail the submit here	2653	* pre-allocated resources. Otherwise, we fail the submit here
2633	*/	2654	*/
2634	if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC) &&	2655	if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
2635	!channel_gk20a_is_prealloc_enabled(c))	2656	return -EINVAL;
		2657
		2658	need_sync_framework = force_need_sync_fence \|\|
		2659	gk20a_channel_sync_needs_sync_framework(c) \|\|
		2660	(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
		2661	(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT \|\|
		2662	flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
		2663
		2664	/*
		2665	* Deferred clean-up is necessary for any of the following
		2666	* conditions:
		2667	* - channel's deterministic flag is not set
		2668	* - dependency on sync framework, which could make the
		2669	* behavior of the clean-up operation non-deterministic
		2670	* (should not be performed in the submit path)
		2671	* - channel wdt
		2672	* - GPU rail-gating
		2673	* - buffer refcounting
		2674	*
		2675	* If none of the conditions are met, then deferred clean-up
		2676	* is not required, and we clean-up one job-tracking
		2677	* resource in the submit path.
		2678	*/
		2679	need_deferred_cleanup = !c->deterministic \|\|
		2680	need_sync_framework \|\|
		2681	c->wdt_enabled \|\|
		2682	platform->can_railgate \|\|
		2683	!skip_buffer_refcounting;
		2684
		2685	/*
		2686	* For deterministic channels, we don't allow deferred clean_up
		2687	* processing to occur. In cases we hit this, we fail the submit
		2688	*/
		2689	if (c->deterministic && need_deferred_cleanup)
2636	return -EINVAL;	2690	return -EINVAL;
2637		2691
2638	/* gk20a_channel_update releases this ref. */	2692	/* gk20a_channel_update releases this ref. */
@@ -2641,6 +2695,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2641	gk20a_err(d, "failed to host gk20a to submit gpfifo");	2695	gk20a_err(d, "failed to host gk20a to submit gpfifo");
2642	return err;	2696	return err;
2643	}	2697	}
		2698
		2699	if (!need_deferred_cleanup) {
		2700	/* clean up a single job */
		2701	gk20a_channel_clean_up_jobs(c, false);
		2702	}
2644	}	2703	}
2645		2704
2646	trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),	2705	trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
@@ -2678,7 +2737,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2678	err = gk20a_submit_prepare_syncs(c, fence, job,	2737	err = gk20a_submit_prepare_syncs(c, fence, job,
2679	&wait_cmd, &incr_cmd,	2738	&wait_cmd, &incr_cmd,
2680	&pre_fence, &post_fence,	2739	&pre_fence, &post_fence,
2681	force_need_sync_fence, flags);	2740	force_need_sync_fence,
		2741	need_deferred_cleanup,
		2742	flags);
2682	if (err)	2743	if (err)
2683	goto clean_up_job;	2744	goto clean_up_job;
2684	}	2745	}
@@ -2727,7 +2788,7 @@ clean_up:
2727	gk20a_dbg_fn("fail");	2788	gk20a_dbg_fn("fail");
2728	gk20a_fence_put(pre_fence);	2789	gk20a_fence_put(pre_fence);
2729	gk20a_fence_put(post_fence);	2790	gk20a_fence_put(post_fence);
2730	if (need_job_tracking)	2791	if (need_deferred_cleanup)
2731	gk20a_idle(g->dev);	2792	gk20a_idle(g->dev);
2732	return err;	2793	return err;
2733	}	2794	}
@@ -2749,7 +2810,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2749	raw_spin_lock_init(&c->timeout.lock);	2810	raw_spin_lock_init(&c->timeout.lock);
2750	mutex_init(&c->sync_lock);	2811	mutex_init(&c->sync_lock);
2751	INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);	2812	INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
2752	INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);	2813	INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
2753	mutex_init(&c->clean_up.lock);	2814	mutex_init(&c->clean_up.lock);
2754	INIT_LIST_HEAD(&c->joblist.dynamic.jobs);	2815	INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
2755	#if defined(CONFIG_GK20A_CYCLE_STATS)	2816	#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -3416,10 +3477,10 @@ long gk20a_channel_ioctl(struct file *filp,
3416		3477
3417	if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {	3478	if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
3418	err = -EINVAL;	3479	err = -EINVAL;
		3480	gk20a_idle(dev);
3419	break;	3481	break;
3420	}	3482	}
3421	err = gk20a_alloc_channel_gpfifo(ch,	3483	err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args);
3422	(struct nvgpu_alloc_gpfifo_ex_args *)buf);
3423	gk20a_idle(dev);	3484	gk20a_idle(dev);
3424	break;	3485	break;
3425	}	3486	}


diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 8cceb6b2..92b51cca 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -131,7 +131,7 @@ struct channel_gk20a {
131	bool bound;	131	bool bound;
132	bool first_init;	132	bool first_init;
133	bool vpr;	133	bool vpr;
134	bool no_block;	134	bool deterministic;
135	bool cde;	135	bool cde;
136	pid_t pid;	136	pid_t pid;
137	pid_t tgid;	137	pid_t tgid;


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 767738ea..febea719 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -285,13 +285,14 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
285	static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,	285	static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
286	struct priv_cmd_entry *entry,	286	struct priv_cmd_entry *entry,
287	struct gk20a_fence *fence,	287	struct gk20a_fence *fence,
288	bool need_sync_fence)	288	bool need_sync_fence,
		289	bool register_irq)
289	{	290	{
290	/* Don't put wfi cmd to this one since we're not returning	291	/* Don't put wfi cmd to this one since we're not returning
291	* a fence to user space. */	292	* a fence to user space. */
292	return __gk20a_channel_syncpt_incr(s,	293	return __gk20a_channel_syncpt_incr(s,
293	false /* no wfi */,	294	false /* no wfi */,
294	true /* register irq */,	295	register_irq /* register irq */,
295	entry, fence, need_sync_fence);	296	entry, fence, need_sync_fence);
296	}	297	}
297		298
@@ -300,13 +301,14 @@ static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
300	struct priv_cmd_entry *entry,	301	struct priv_cmd_entry *entry,
301	struct gk20a_fence *fence,	302	struct gk20a_fence *fence,
302	bool wfi,	303	bool wfi,
303	bool need_sync_fence)	304	bool need_sync_fence,
		305	bool register_irq)
304	{	306	{
305	/* Need to do 'wfi + host incr' since we return the fence	307	/* Need to do 'wfi + host incr' since we return the fence
306	* to user space. */	308	* to user space. */
307	return __gk20a_channel_syncpt_incr(s,	309	return __gk20a_channel_syncpt_incr(s,
308	wfi,	310	wfi,
309	true /* register irq */,	311	register_irq /* register irq */,
310	entry, fence, need_sync_fence);	312	entry, fence, need_sync_fence);
311	}	313	}
312		314
@@ -756,7 +758,8 @@ static int gk20a_channel_semaphore_incr(
756	struct gk20a_channel_sync *s,	758	struct gk20a_channel_sync *s,
757	struct priv_cmd_entry *entry,	759	struct priv_cmd_entry *entry,
758	struct gk20a_fence *fence,	760	struct gk20a_fence *fence,
759	bool need_sync_fence)	761	bool need_sync_fence,
		762	bool register_irq)
760	{	763	{
761	/* Don't put wfi cmd to this one since we're not returning	764	/* Don't put wfi cmd to this one since we're not returning
762	* a fence to user space. */	765	* a fence to user space. */
@@ -772,7 +775,8 @@ static int gk20a_channel_semaphore_incr_user(
772	struct priv_cmd_entry *entry,	775	struct priv_cmd_entry *entry,
773	struct gk20a_fence *fence,	776	struct gk20a_fence *fence,
774	bool wfi,	777	bool wfi,
775	bool need_sync_fence)	778	bool need_sync_fence,
		779	bool register_irq)
776	{	780	{
777	#ifdef CONFIG_SYNC	781	#ifdef CONFIG_SYNC
778	struct sync_fence *dependency = NULL;	782	struct sync_fence *dependency = NULL;
@@ -889,3 +893,12 @@ struct gk20a_channel_sync gk20a_channel_sync_create(struct channel_gk20a c)
889	#endif	893	#endif
890	return gk20a_channel_semaphore_create(c);	894	return gk20a_channel_semaphore_create(c);
891	}	895	}
		896
		897	bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
		898	{
		899	#ifdef CONFIG_TEGRA_GK20A
		900	if (gk20a_platform_has_syncpoints(c->g->dev))
		901	return false;
		902	#endif
		903	return true;
		904	}


diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index c3a92ad2..5e75dd9b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -57,7 +57,8 @@ struct gk20a_channel_sync {
57	int (incr)(struct gk20a_channel_sync s,	57	int (incr)(struct gk20a_channel_sync s,
58	struct priv_cmd_entry *entry,	58	struct priv_cmd_entry *entry,
59	struct gk20a_fence *fence,	59	struct gk20a_fence *fence,
60	bool need_sync_fence);	60	bool need_sync_fence,
		61	bool register_irq);
61		62
62	/* Increment syncpoint/semaphore, preceded by a wfi.	63	/* Increment syncpoint/semaphore, preceded by a wfi.
63	* Returns	64	* Returns
@@ -80,7 +81,8 @@ struct gk20a_channel_sync {
80	struct priv_cmd_entry *entry,	81	struct priv_cmd_entry *entry,
81	struct gk20a_fence *fence,	82	struct gk20a_fence *fence,
82	bool wfi,	83	bool wfi,
83	bool need_sync_fence);	84	bool need_sync_fence,
		85	bool register_irq);
84		86
85	/* Reset the channel syncpoint/semaphore. */	87	/* Reset the channel syncpoint/semaphore. */
86	void (set_min_eq_max)(struct gk20a_channel_sync s);	88	void (set_min_eq_max)(struct gk20a_channel_sync s);
@@ -99,5 +101,6 @@ struct gk20a_channel_sync {
99		101
100	void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);	102	void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
101	struct gk20a_channel_sync gk20a_channel_sync_create(struct channel_gk20a c);	103	struct gk20a_channel_sync gk20a_channel_sync_create(struct channel_gk20a c);
		104	bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
102		105
103	#endif	106	#endif


diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 7228f6f7..4673f28c 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -272,8 +272,24 @@ static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
272		272
273	static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)	273	static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
274	{	274	{
275	return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,	275
276	f->syncpt_value);	276	/*
		277	* In cases we don't register a notifier, we can't expect the
		278	* syncpt value to be updated. For this case, we force a read
		279	* of the value from HW, and then check for expiration.
		280	*/
		281	if (!nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
		282	f->syncpt_value)) {
		283	u32 val;
		284
		285	if (!nvhost_syncpt_read_ext_check(f->host1x_pdev,
		286	f->syncpt_id, &val)) {
		287	return nvhost_syncpt_is_expired_ext(f->host1x_pdev,
		288	f->syncpt_id, f->syncpt_value);
		289	}
		290	}
		291
		292	return true;
277	}	293	}
278		294
279	static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {	295	static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {