summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
authorSachit Kadle <skadle@nvidia.com>2016-09-14 14:45:38 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-21 14:23:53 -0400
commitab593b9ccd3132404406ef6340348fdb28c23bf0 (patch)
tree394624e853fdb5e7fbe380c1ee8f4f2fca9fe24d /drivers/gpu/nvgpu
parent55dba9f1a91a6f94fb63090880ae28eedf4189a2 (diff)
gpu: nvgpu: make deferred clean-up conditional
This change makes the invocation of the deferred job clean-up mechanism conditional. For submissions that require job tracking, deferred clean-up is only required if any of the following conditions are met: 1) Channel's deterministic flag is not set 2) Rail-gating is enabled 3) Channel WDT is enabled 4) Buffer refcounting is enabled 5) Dependency on Sync Framework In case deferred clean-up is not needed, we clean-up a single job tracking resource in the submit path. For deterministic channels, we do not allow deferred clean-up to occur and fail any submits that require it. Bug 1795076 Change-Id: I4021dffe8a71aa58f12db6b58518d3f4021f3313 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1220920 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> (cherry picked from commit b09f7589d5ad3c496e7350f1ed583a4fe2db574a) Reviewed-on: http://git-master/r/1223941 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c87
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c25
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c20
5 files changed, 117 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 6d4b4f60..f839bfbc 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -76,7 +76,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
76 76
77static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); 77static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
78 78
79static void gk20a_channel_clean_up_jobs(struct work_struct *work); 79static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
80 bool clean_all);
80static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, 81static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
81 bool wait_for_completion); 82 bool wait_for_completion);
82 83
@@ -1029,6 +1030,7 @@ unbind:
1029 g->ops.fifo.free_inst(g, ch); 1030 g->ops.fifo.free_inst(g, ch);
1030 1031
1031 ch->vpr = false; 1032 ch->vpr = false;
1033 ch->deterministic = false;
1032 ch->vm = NULL; 1034 ch->vm = NULL;
1033 1035
1034 WARN_ON(ch->sync); 1036 WARN_ON(ch->sync);
@@ -1703,9 +1705,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1703 1705
1704 gpfifo_size = args->num_entries; 1706 gpfifo_size = args->num_entries;
1705 1707
1706 if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED) 1708 if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
1707 c->vpr = true; 1709 c->vpr = true;
1708 1710
1711 if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
1712 c->deterministic = true;
1713
1709 /* an address space needs to have been bound at this point. */ 1714 /* an address space needs to have been bound at this point. */
1710 if (!gk20a_channel_as_bound(c)) { 1715 if (!gk20a_channel_as_bound(c)) {
1711 gk20a_err(d, 1716 gk20a_err(d,
@@ -2173,10 +2178,17 @@ err_put_vm:
2173 return err; 2178 return err;
2174} 2179}
2175 2180
2176static void gk20a_channel_clean_up_jobs(struct work_struct *work) 2181static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work)
2177{ 2182{
2178 struct channel_gk20a *c = container_of(to_delayed_work(work), 2183 struct channel_gk20a *c = container_of(to_delayed_work(work),
2179 struct channel_gk20a, clean_up.wq); 2184 struct channel_gk20a, clean_up.wq);
2185
2186 gk20a_channel_clean_up_jobs(c, true);
2187}
2188
2189static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
2190 bool clean_all)
2191{
2180 struct vm_gk20a *vm; 2192 struct vm_gk20a *vm;
2181 struct channel_gk20a_job *job; 2193 struct channel_gk20a_job *job;
2182 struct gk20a_platform *platform; 2194 struct gk20a_platform *platform;
@@ -2273,6 +2285,9 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
2273 channel_gk20a_free_job(c, job); 2285 channel_gk20a_free_job(c, job);
2274 job_finished = 1; 2286 job_finished = 1;
2275 gk20a_idle(g->dev); 2287 gk20a_idle(g->dev);
2288
2289 if (!clean_all)
2290 break;
2276 } 2291 }
2277 2292
2278 if (job_finished && c->update_fn) 2293 if (job_finished && c->update_fn)
@@ -2419,6 +2434,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2419 struct gk20a_fence **pre_fence, 2434 struct gk20a_fence **pre_fence,
2420 struct gk20a_fence **post_fence, 2435 struct gk20a_fence **post_fence,
2421 bool force_need_sync_fence, 2436 bool force_need_sync_fence,
2437 bool register_irq,
2422 u32 flags) 2438 u32 flags)
2423{ 2439{
2424 struct gk20a *g = c->g; 2440 struct gk20a *g = c->g;
@@ -2515,10 +2531,12 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2515 2531
2516 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) 2532 if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
2517 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, 2533 err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
2518 job->post_fence, need_wfi, need_sync_fence); 2534 job->post_fence, need_wfi, need_sync_fence,
2535 register_irq);
2519 else 2536 else
2520 err = c->sync->incr(c->sync, job->incr_cmd, 2537 err = c->sync->incr(c->sync, job->incr_cmd,
2521 job->post_fence, need_sync_fence); 2538 job->post_fence, need_sync_fence,
2539 register_irq);
2522 if (!err) { 2540 if (!err) {
2523 *incr_cmd = job->incr_cmd; 2541 *incr_cmd = job->incr_cmd;
2524 *post_fence = job->post_fence; 2542 *post_fence = job->post_fence;
@@ -2568,6 +2586,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2568 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); 2586 NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
2569 int err = 0; 2587 int err = 0;
2570 bool need_job_tracking; 2588 bool need_job_tracking;
2589 bool need_deferred_cleanup = false;
2571 struct nvgpu_gpfifo __user *user_gpfifo = args ? 2590 struct nvgpu_gpfifo __user *user_gpfifo = args ?
2572 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; 2591 (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
2573 struct gk20a_platform *platform = gk20a_get_platform(d); 2592 struct gk20a_platform *platform = gk20a_get_platform(d);
@@ -2626,13 +2645,48 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2626 !skip_buffer_refcounting; 2645 !skip_buffer_refcounting;
2627 2646
2628 if (need_job_tracking) { 2647 if (need_job_tracking) {
2648 bool need_sync_framework = false;
2649
2629 /* 2650 /*
2630 * If the submit is to have deterministic latency and 2651 * If the channel is to have deterministic latency and
2631 * job tracking is required, the channel must have 2652 * job tracking is required, the channel must have
2632 * pre-allocated resources. Otherwise, we fail the submit here 2653 * pre-allocated resources. Otherwise, we fail the submit here
2633 */ 2654 */
2634 if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC) && 2655 if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
2635 !channel_gk20a_is_prealloc_enabled(c)) 2656 return -EINVAL;
2657
2658 need_sync_framework = force_need_sync_fence ||
2659 gk20a_channel_sync_needs_sync_framework(c) ||
2660 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
2661 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
2662 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
2663
2664 /*
2665 * Deferred clean-up is necessary for any of the following
2666 * conditions:
2667 * - channel's deterministic flag is not set
2668 * - dependency on sync framework, which could make the
2669 * behavior of the clean-up operation non-deterministic
2670 * (should not be performed in the submit path)
2671 * - channel wdt
2672 * - GPU rail-gating
2673 * - buffer refcounting
2674 *
2675 * If none of the conditions are met, then deferred clean-up
2676 * is not required, and we clean-up one job-tracking
2677 * resource in the submit path.
2678 */
2679 need_deferred_cleanup = !c->deterministic ||
2680 need_sync_framework ||
2681 c->wdt_enabled ||
2682 platform->can_railgate ||
2683 !skip_buffer_refcounting;
2684
2685 /*
2686 * For deterministic channels, we don't allow deferred clean_up
2687 * processing to occur. In cases we hit this, we fail the submit
2688 */
2689 if (c->deterministic && need_deferred_cleanup)
2636 return -EINVAL; 2690 return -EINVAL;
2637 2691
2638 /* gk20a_channel_update releases this ref. */ 2692 /* gk20a_channel_update releases this ref. */
@@ -2641,6 +2695,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2641 gk20a_err(d, "failed to host gk20a to submit gpfifo"); 2695 gk20a_err(d, "failed to host gk20a to submit gpfifo");
2642 return err; 2696 return err;
2643 } 2697 }
2698
2699 if (!need_deferred_cleanup) {
2700 /* clean up a single job */
2701 gk20a_channel_clean_up_jobs(c, false);
2702 }
2644 } 2703 }
2645 2704
2646 trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev), 2705 trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
@@ -2678,7 +2737,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2678 err = gk20a_submit_prepare_syncs(c, fence, job, 2737 err = gk20a_submit_prepare_syncs(c, fence, job,
2679 &wait_cmd, &incr_cmd, 2738 &wait_cmd, &incr_cmd,
2680 &pre_fence, &post_fence, 2739 &pre_fence, &post_fence,
2681 force_need_sync_fence, flags); 2740 force_need_sync_fence,
2741 need_deferred_cleanup,
2742 flags);
2682 if (err) 2743 if (err)
2683 goto clean_up_job; 2744 goto clean_up_job;
2684 } 2745 }
@@ -2727,7 +2788,7 @@ clean_up:
2727 gk20a_dbg_fn("fail"); 2788 gk20a_dbg_fn("fail");
2728 gk20a_fence_put(pre_fence); 2789 gk20a_fence_put(pre_fence);
2729 gk20a_fence_put(post_fence); 2790 gk20a_fence_put(post_fence);
2730 if (need_job_tracking) 2791 if (need_deferred_cleanup)
2731 gk20a_idle(g->dev); 2792 gk20a_idle(g->dev);
2732 return err; 2793 return err;
2733} 2794}
@@ -2749,7 +2810,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
2749 raw_spin_lock_init(&c->timeout.lock); 2810 raw_spin_lock_init(&c->timeout.lock);
2750 mutex_init(&c->sync_lock); 2811 mutex_init(&c->sync_lock);
2751 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); 2812 INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
2752 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); 2813 INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
2753 mutex_init(&c->clean_up.lock); 2814 mutex_init(&c->clean_up.lock);
2754 INIT_LIST_HEAD(&c->joblist.dynamic.jobs); 2815 INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
2755#if defined(CONFIG_GK20A_CYCLE_STATS) 2816#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -3416,10 +3477,10 @@ long gk20a_channel_ioctl(struct file *filp,
3416 3477
3417 if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { 3478 if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
3418 err = -EINVAL; 3479 err = -EINVAL;
3480 gk20a_idle(dev);
3419 break; 3481 break;
3420 } 3482 }
3421 err = gk20a_alloc_channel_gpfifo(ch, 3483 err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args);
3422 (struct nvgpu_alloc_gpfifo_ex_args *)buf);
3423 gk20a_idle(dev); 3484 gk20a_idle(dev);
3424 break; 3485 break;
3425 } 3486 }
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 8cceb6b2..92b51cca 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -131,7 +131,7 @@ struct channel_gk20a {
131 bool bound; 131 bool bound;
132 bool first_init; 132 bool first_init;
133 bool vpr; 133 bool vpr;
134 bool no_block; 134 bool deterministic;
135 bool cde; 135 bool cde;
136 pid_t pid; 136 pid_t pid;
137 pid_t tgid; 137 pid_t tgid;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 767738ea..febea719 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -285,13 +285,14 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
285static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, 285static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
286 struct priv_cmd_entry *entry, 286 struct priv_cmd_entry *entry,
287 struct gk20a_fence *fence, 287 struct gk20a_fence *fence,
288 bool need_sync_fence) 288 bool need_sync_fence,
289 bool register_irq)
289{ 290{
290 /* Don't put wfi cmd to this one since we're not returning 291 /* Don't put wfi cmd to this one since we're not returning
291 * a fence to user space. */ 292 * a fence to user space. */
292 return __gk20a_channel_syncpt_incr(s, 293 return __gk20a_channel_syncpt_incr(s,
293 false /* no wfi */, 294 false /* no wfi */,
294 true /* register irq */, 295 register_irq /* register irq */,
295 entry, fence, need_sync_fence); 296 entry, fence, need_sync_fence);
296} 297}
297 298
@@ -300,13 +301,14 @@ static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
300 struct priv_cmd_entry *entry, 301 struct priv_cmd_entry *entry,
301 struct gk20a_fence *fence, 302 struct gk20a_fence *fence,
302 bool wfi, 303 bool wfi,
303 bool need_sync_fence) 304 bool need_sync_fence,
305 bool register_irq)
304{ 306{
305 /* Need to do 'wfi + host incr' since we return the fence 307 /* Need to do 'wfi + host incr' since we return the fence
306 * to user space. */ 308 * to user space. */
307 return __gk20a_channel_syncpt_incr(s, 309 return __gk20a_channel_syncpt_incr(s,
308 wfi, 310 wfi,
309 true /* register irq */, 311 register_irq /* register irq */,
310 entry, fence, need_sync_fence); 312 entry, fence, need_sync_fence);
311} 313}
312 314
@@ -756,7 +758,8 @@ static int gk20a_channel_semaphore_incr(
756 struct gk20a_channel_sync *s, 758 struct gk20a_channel_sync *s,
757 struct priv_cmd_entry *entry, 759 struct priv_cmd_entry *entry,
758 struct gk20a_fence *fence, 760 struct gk20a_fence *fence,
759 bool need_sync_fence) 761 bool need_sync_fence,
762 bool register_irq)
760{ 763{
761 /* Don't put wfi cmd to this one since we're not returning 764 /* Don't put wfi cmd to this one since we're not returning
762 * a fence to user space. */ 765 * a fence to user space. */
@@ -772,7 +775,8 @@ static int gk20a_channel_semaphore_incr_user(
772 struct priv_cmd_entry *entry, 775 struct priv_cmd_entry *entry,
773 struct gk20a_fence *fence, 776 struct gk20a_fence *fence,
774 bool wfi, 777 bool wfi,
775 bool need_sync_fence) 778 bool need_sync_fence,
779 bool register_irq)
776{ 780{
777#ifdef CONFIG_SYNC 781#ifdef CONFIG_SYNC
778 struct sync_fence *dependency = NULL; 782 struct sync_fence *dependency = NULL;
@@ -889,3 +893,12 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
889#endif 893#endif
890 return gk20a_channel_semaphore_create(c); 894 return gk20a_channel_semaphore_create(c);
891} 895}
896
897bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
898{
899#ifdef CONFIG_TEGRA_GK20A
900 if (gk20a_platform_has_syncpoints(c->g->dev))
901 return false;
902#endif
903 return true;
904}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index c3a92ad2..5e75dd9b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -57,7 +57,8 @@ struct gk20a_channel_sync {
57 int (*incr)(struct gk20a_channel_sync *s, 57 int (*incr)(struct gk20a_channel_sync *s,
58 struct priv_cmd_entry *entry, 58 struct priv_cmd_entry *entry,
59 struct gk20a_fence *fence, 59 struct gk20a_fence *fence,
60 bool need_sync_fence); 60 bool need_sync_fence,
61 bool register_irq);
61 62
62 /* Increment syncpoint/semaphore, preceded by a wfi. 63 /* Increment syncpoint/semaphore, preceded by a wfi.
63 * Returns 64 * Returns
@@ -80,7 +81,8 @@ struct gk20a_channel_sync {
80 struct priv_cmd_entry *entry, 81 struct priv_cmd_entry *entry,
81 struct gk20a_fence *fence, 82 struct gk20a_fence *fence,
82 bool wfi, 83 bool wfi,
83 bool need_sync_fence); 84 bool need_sync_fence,
85 bool register_irq);
84 86
85 /* Reset the channel syncpoint/semaphore. */ 87 /* Reset the channel syncpoint/semaphore. */
86 void (*set_min_eq_max)(struct gk20a_channel_sync *s); 88 void (*set_min_eq_max)(struct gk20a_channel_sync *s);
@@ -99,5 +101,6 @@ struct gk20a_channel_sync {
99 101
100void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); 102void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 103struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
104bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
102 105
103#endif 106#endif
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 7228f6f7..4673f28c 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -272,8 +272,24 @@ static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
272 272
273static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f) 273static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
274{ 274{
275 return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id, 275
276 f->syncpt_value); 276 /*
277 * In cases we don't register a notifier, we can't expect the
278 * syncpt value to be updated. For this case, we force a read
279 * of the value from HW, and then check for expiration.
280 */
281 if (!nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
282 f->syncpt_value)) {
283 u32 val;
284
285 if (!nvhost_syncpt_read_ext_check(f->host1x_pdev,
286 f->syncpt_id, &val)) {
287 return nvhost_syncpt_is_expired_ext(f->host1x_pdev,
288 f->syncpt_id, f->syncpt_value);
289 }
290 }
291
292 return true;
277} 293}
278 294
279static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { 295static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {