diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/channel_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 87 |
1 files changed, 74 insertions, 13 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 6d4b4f60..f839bfbc 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -76,7 +76,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch); | |||
76 | 76 | ||
77 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); | 77 | static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); |
78 | 78 | ||
79 | static void gk20a_channel_clean_up_jobs(struct work_struct *work); | 79 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, |
80 | bool clean_all); | ||
80 | static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, | 81 | static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, |
81 | bool wait_for_completion); | 82 | bool wait_for_completion); |
82 | 83 | ||
@@ -1029,6 +1030,7 @@ unbind: | |||
1029 | g->ops.fifo.free_inst(g, ch); | 1030 | g->ops.fifo.free_inst(g, ch); |
1030 | 1031 | ||
1031 | ch->vpr = false; | 1032 | ch->vpr = false; |
1033 | ch->deterministic = false; | ||
1032 | ch->vm = NULL; | 1034 | ch->vm = NULL; |
1033 | 1035 | ||
1034 | WARN_ON(ch->sync); | 1036 | WARN_ON(ch->sync); |
@@ -1703,9 +1705,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, | |||
1703 | 1705 | ||
1704 | gpfifo_size = args->num_entries; | 1706 | gpfifo_size = args->num_entries; |
1705 | 1707 | ||
1706 | if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED) | 1708 | if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED) |
1707 | c->vpr = true; | 1709 | c->vpr = true; |
1708 | 1710 | ||
1711 | if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC) | ||
1712 | c->deterministic = true; | ||
1713 | |||
1709 | /* an address space needs to have been bound at this point. */ | 1714 | /* an address space needs to have been bound at this point. */ |
1710 | if (!gk20a_channel_as_bound(c)) { | 1715 | if (!gk20a_channel_as_bound(c)) { |
1711 | gk20a_err(d, | 1716 | gk20a_err(d, |
@@ -2173,10 +2178,17 @@ err_put_vm: | |||
2173 | return err; | 2178 | return err; |
2174 | } | 2179 | } |
2175 | 2180 | ||
2176 | static void gk20a_channel_clean_up_jobs(struct work_struct *work) | 2181 | static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work) |
2177 | { | 2182 | { |
2178 | struct channel_gk20a *c = container_of(to_delayed_work(work), | 2183 | struct channel_gk20a *c = container_of(to_delayed_work(work), |
2179 | struct channel_gk20a, clean_up.wq); | 2184 | struct channel_gk20a, clean_up.wq); |
2185 | |||
2186 | gk20a_channel_clean_up_jobs(c, true); | ||
2187 | } | ||
2188 | |||
2189 | static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, | ||
2190 | bool clean_all) | ||
2191 | { | ||
2180 | struct vm_gk20a *vm; | 2192 | struct vm_gk20a *vm; |
2181 | struct channel_gk20a_job *job; | 2193 | struct channel_gk20a_job *job; |
2182 | struct gk20a_platform *platform; | 2194 | struct gk20a_platform *platform; |
@@ -2273,6 +2285,9 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work) | |||
2273 | channel_gk20a_free_job(c, job); | 2285 | channel_gk20a_free_job(c, job); |
2274 | job_finished = 1; | 2286 | job_finished = 1; |
2275 | gk20a_idle(g->dev); | 2287 | gk20a_idle(g->dev); |
2288 | |||
2289 | if (!clean_all) | ||
2290 | break; | ||
2276 | } | 2291 | } |
2277 | 2292 | ||
2278 | if (job_finished && c->update_fn) | 2293 | if (job_finished && c->update_fn) |
@@ -2419,6 +2434,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2419 | struct gk20a_fence **pre_fence, | 2434 | struct gk20a_fence **pre_fence, |
2420 | struct gk20a_fence **post_fence, | 2435 | struct gk20a_fence **post_fence, |
2421 | bool force_need_sync_fence, | 2436 | bool force_need_sync_fence, |
2437 | bool register_irq, | ||
2422 | u32 flags) | 2438 | u32 flags) |
2423 | { | 2439 | { |
2424 | struct gk20a *g = c->g; | 2440 | struct gk20a *g = c->g; |
@@ -2515,10 +2531,12 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c, | |||
2515 | 2531 | ||
2516 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) | 2532 | if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) |
2517 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, | 2533 | err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd, |
2518 | job->post_fence, need_wfi, need_sync_fence); | 2534 | job->post_fence, need_wfi, need_sync_fence, |
2535 | register_irq); | ||
2519 | else | 2536 | else |
2520 | err = c->sync->incr(c->sync, job->incr_cmd, | 2537 | err = c->sync->incr(c->sync, job->incr_cmd, |
2521 | job->post_fence, need_sync_fence); | 2538 | job->post_fence, need_sync_fence, |
2539 | register_irq); | ||
2522 | if (!err) { | 2540 | if (!err) { |
2523 | *incr_cmd = job->incr_cmd; | 2541 | *incr_cmd = job->incr_cmd; |
2524 | *post_fence = job->post_fence; | 2542 | *post_fence = job->post_fence; |
@@ -2568,6 +2586,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2568 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); | 2586 | NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); |
2569 | int err = 0; | 2587 | int err = 0; |
2570 | bool need_job_tracking; | 2588 | bool need_job_tracking; |
2589 | bool need_deferred_cleanup = false; | ||
2571 | struct nvgpu_gpfifo __user *user_gpfifo = args ? | 2590 | struct nvgpu_gpfifo __user *user_gpfifo = args ? |
2572 | (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; | 2591 | (struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL; |
2573 | struct gk20a_platform *platform = gk20a_get_platform(d); | 2592 | struct gk20a_platform *platform = gk20a_get_platform(d); |
@@ -2626,13 +2645,48 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2626 | !skip_buffer_refcounting; | 2645 | !skip_buffer_refcounting; |
2627 | 2646 | ||
2628 | if (need_job_tracking) { | 2647 | if (need_job_tracking) { |
2648 | bool need_sync_framework = false; | ||
2649 | |||
2629 | /* | 2650 | /* |
2630 | * If the submit is to have deterministic latency and | 2651 | * If the channel is to have deterministic latency and |
2631 | * job tracking is required, the channel must have | 2652 | * job tracking is required, the channel must have |
2632 | * pre-allocated resources. Otherwise, we fail the submit here | 2653 | * pre-allocated resources. Otherwise, we fail the submit here |
2633 | */ | 2654 | */ |
2634 | if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC) && | 2655 | if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c)) |
2635 | !channel_gk20a_is_prealloc_enabled(c)) | 2656 | return -EINVAL; |
2657 | |||
2658 | need_sync_framework = force_need_sync_fence || | ||
2659 | gk20a_channel_sync_needs_sync_framework(c) || | ||
2660 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && | ||
2661 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || | ||
2662 | flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); | ||
2663 | |||
2664 | /* | ||
2665 | * Deferred clean-up is necessary for any of the following | ||
2666 | * conditions: | ||
2667 | * - channel's deterministic flag is not set | ||
2668 | * - dependency on sync framework, which could make the | ||
2669 | * behavior of the clean-up operation non-deterministic | ||
2670 | * (should not be performed in the submit path) | ||
2671 | * - channel wdt | ||
2672 | * - GPU rail-gating | ||
2673 | * - buffer refcounting | ||
2674 | * | ||
2675 | * If none of the conditions are met, then deferred clean-up | ||
2676 | * is not required, and we clean-up one job-tracking | ||
2677 | * resource in the submit path. | ||
2678 | */ | ||
2679 | need_deferred_cleanup = !c->deterministic || | ||
2680 | need_sync_framework || | ||
2681 | c->wdt_enabled || | ||
2682 | platform->can_railgate || | ||
2683 | !skip_buffer_refcounting; | ||
2684 | |||
2685 | /* | ||
2686 | * For deterministic channels, we don't allow deferred clean_up | ||
2687 | * processing to occur. In cases we hit this, we fail the submit | ||
2688 | */ | ||
2689 | if (c->deterministic && need_deferred_cleanup) | ||
2636 | return -EINVAL; | 2690 | return -EINVAL; |
2637 | 2691 | ||
2638 | /* gk20a_channel_update releases this ref. */ | 2692 | /* gk20a_channel_update releases this ref. */ |
@@ -2641,6 +2695,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2641 | gk20a_err(d, "failed to host gk20a to submit gpfifo"); | 2695 | gk20a_err(d, "failed to host gk20a to submit gpfifo"); |
2642 | return err; | 2696 | return err; |
2643 | } | 2697 | } |
2698 | |||
2699 | if (!need_deferred_cleanup) { | ||
2700 | /* clean up a single job */ | ||
2701 | gk20a_channel_clean_up_jobs(c, false); | ||
2702 | } | ||
2644 | } | 2703 | } |
2645 | 2704 | ||
2646 | trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev), | 2705 | trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev), |
@@ -2678,7 +2737,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2678 | err = gk20a_submit_prepare_syncs(c, fence, job, | 2737 | err = gk20a_submit_prepare_syncs(c, fence, job, |
2679 | &wait_cmd, &incr_cmd, | 2738 | &wait_cmd, &incr_cmd, |
2680 | &pre_fence, &post_fence, | 2739 | &pre_fence, &post_fence, |
2681 | force_need_sync_fence, flags); | 2740 | force_need_sync_fence, |
2741 | need_deferred_cleanup, | ||
2742 | flags); | ||
2682 | if (err) | 2743 | if (err) |
2683 | goto clean_up_job; | 2744 | goto clean_up_job; |
2684 | } | 2745 | } |
@@ -2727,7 +2788,7 @@ clean_up: | |||
2727 | gk20a_dbg_fn("fail"); | 2788 | gk20a_dbg_fn("fail"); |
2728 | gk20a_fence_put(pre_fence); | 2789 | gk20a_fence_put(pre_fence); |
2729 | gk20a_fence_put(post_fence); | 2790 | gk20a_fence_put(post_fence); |
2730 | if (need_job_tracking) | 2791 | if (need_deferred_cleanup) |
2731 | gk20a_idle(g->dev); | 2792 | gk20a_idle(g->dev); |
2732 | return err; | 2793 | return err; |
2733 | } | 2794 | } |
@@ -2749,7 +2810,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) | |||
2749 | raw_spin_lock_init(&c->timeout.lock); | 2810 | raw_spin_lock_init(&c->timeout.lock); |
2750 | mutex_init(&c->sync_lock); | 2811 | mutex_init(&c->sync_lock); |
2751 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); | 2812 | INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); |
2752 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs); | 2813 | INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn); |
2753 | mutex_init(&c->clean_up.lock); | 2814 | mutex_init(&c->clean_up.lock); |
2754 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); | 2815 | INIT_LIST_HEAD(&c->joblist.dynamic.jobs); |
2755 | #if defined(CONFIG_GK20A_CYCLE_STATS) | 2816 | #if defined(CONFIG_GK20A_CYCLE_STATS) |
@@ -3416,10 +3477,10 @@ long gk20a_channel_ioctl(struct file *filp, | |||
3416 | 3477 | ||
3417 | if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { | 3478 | if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) { |
3418 | err = -EINVAL; | 3479 | err = -EINVAL; |
3480 | gk20a_idle(dev); | ||
3419 | break; | 3481 | break; |
3420 | } | 3482 | } |
3421 | err = gk20a_alloc_channel_gpfifo(ch, | 3483 | err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args); |
3422 | (struct nvgpu_alloc_gpfifo_ex_args *)buf); | ||
3423 | gk20a_idle(dev); | 3484 | gk20a_idle(dev); |
3424 | break; | 3485 | break; |
3425 | } | 3486 | } |