diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-09-09 09:11:56 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-09-09 09:23:06 -0400 |
commit | a2bc4695bbf24877d75b34da5d11fcb38393eee9 (patch) | |
tree | 7f87e3e04bf11a321fd6a316bd05a51855e17969 | |
parent | 0f25dff6e960c44c5e7d306ff3a3fdad5367a90e (diff) |
drm/i915: Prepare object synchronisation for asynchronicity
We are about to specialize object synchronisation to enable nonblocking
execbuf submission. First we make a copy of the current object
synchronisation for execbuffer. The general i915_gem_object_sync() will
be removed following the removal of CS flips in the near future.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: John Harrison <john.c.harrison@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160909131201.16673-16-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 91 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_request.c | 87 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_request.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_display.c | 2 |
6 files changed, 95 insertions, 95 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 37978d3f62ce..1e2dda88a483 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -3221,8 +3221,6 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) | |||
3221 | } | 3221 | } |
3222 | 3222 | ||
3223 | int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); | 3223 | int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); |
3224 | int i915_gem_object_sync(struct drm_i915_gem_object *obj, | ||
3225 | struct drm_i915_gem_request *to); | ||
3226 | void i915_vma_move_to_active(struct i915_vma *vma, | 3224 | void i915_vma_move_to_active(struct i915_vma *vma, |
3227 | struct drm_i915_gem_request *req, | 3225 | struct drm_i915_gem_request *req, |
3228 | unsigned int flags); | 3226 | unsigned int flags); |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 89a5f8d948e7..4b5364d477f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -2818,97 +2818,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) | |||
2818 | return ret; | 2818 | return ret; |
2819 | } | 2819 | } |
2820 | 2820 | ||
2821 | static int | ||
2822 | __i915_gem_object_sync(struct drm_i915_gem_request *to, | ||
2823 | struct drm_i915_gem_request *from) | ||
2824 | { | ||
2825 | int ret; | ||
2826 | |||
2827 | if (to->engine == from->engine) | ||
2828 | return 0; | ||
2829 | |||
2830 | if (!i915.semaphores) { | ||
2831 | ret = i915_wait_request(from, | ||
2832 | from->i915->mm.interruptible | | ||
2833 | I915_WAIT_LOCKED, | ||
2834 | NULL, | ||
2835 | NO_WAITBOOST); | ||
2836 | if (ret) | ||
2837 | return ret; | ||
2838 | } else { | ||
2839 | int idx = intel_engine_sync_index(from->engine, to->engine); | ||
2840 | if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) | ||
2841 | return 0; | ||
2842 | |||
2843 | trace_i915_gem_ring_sync_to(to, from); | ||
2844 | ret = to->engine->semaphore.sync_to(to, from); | ||
2845 | if (ret) | ||
2846 | return ret; | ||
2847 | |||
2848 | from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; | ||
2849 | } | ||
2850 | |||
2851 | return 0; | ||
2852 | } | ||
2853 | |||
2854 | /** | ||
2855 | * i915_gem_object_sync - sync an object to a ring. | ||
2856 | * | ||
2857 | * @obj: object which may be in use on another ring. | ||
2858 | * @to: request we are wishing to use | ||
2859 | * | ||
2860 | * This code is meant to abstract object synchronization with the GPU. | ||
2861 | * Conceptually we serialise writes between engines inside the GPU. | ||
2862 | * We only allow one engine to write into a buffer at any time, but | ||
2863 | * multiple readers. To ensure each has a coherent view of memory, we must: | ||
2864 | * | ||
2865 | * - If there is an outstanding write request to the object, the new | ||
2866 | * request must wait for it to complete (either CPU or in hw, requests | ||
2867 | * on the same ring will be naturally ordered). | ||
2868 | * | ||
2869 | * - If we are a write request (pending_write_domain is set), the new | ||
2870 | * request must wait for outstanding read requests to complete. | ||
2871 | * | ||
2872 | * Returns 0 if successful, else propagates up the lower layer error. | ||
2873 | */ | ||
2874 | int | ||
2875 | i915_gem_object_sync(struct drm_i915_gem_object *obj, | ||
2876 | struct drm_i915_gem_request *to) | ||
2877 | { | ||
2878 | struct i915_gem_active *active; | ||
2879 | unsigned long active_mask; | ||
2880 | int idx; | ||
2881 | |||
2882 | lockdep_assert_held(&obj->base.dev->struct_mutex); | ||
2883 | |||
2884 | active_mask = i915_gem_object_get_active(obj); | ||
2885 | if (!active_mask) | ||
2886 | return 0; | ||
2887 | |||
2888 | if (obj->base.pending_write_domain) { | ||
2889 | active = obj->last_read; | ||
2890 | } else { | ||
2891 | active_mask = 1; | ||
2892 | active = &obj->last_write; | ||
2893 | } | ||
2894 | |||
2895 | for_each_active(active_mask, idx) { | ||
2896 | struct drm_i915_gem_request *request; | ||
2897 | int ret; | ||
2898 | |||
2899 | request = i915_gem_active_peek(&active[idx], | ||
2900 | &obj->base.dev->struct_mutex); | ||
2901 | if (!request) | ||
2902 | continue; | ||
2903 | |||
2904 | ret = __i915_gem_object_sync(to, request); | ||
2905 | if (ret) | ||
2906 | return ret; | ||
2907 | } | ||
2908 | |||
2909 | return 0; | ||
2910 | } | ||
2911 | |||
2912 | static void __i915_vma_iounmap(struct i915_vma *vma) | 2821 | static void __i915_vma_iounmap(struct i915_vma *vma) |
2913 | { | 2822 | { |
2914 | GEM_BUG_ON(i915_vma_is_pinned(vma)); | 2823 | GEM_BUG_ON(i915_vma_is_pinned(vma)); |
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9432d4ce9ffb..ccaf15ba4e32 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -1133,7 +1133,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, | |||
1133 | struct drm_i915_gem_object *obj = vma->obj; | 1133 | struct drm_i915_gem_object *obj = vma->obj; |
1134 | 1134 | ||
1135 | if (obj->flags & other_rings) { | 1135 | if (obj->flags & other_rings) { |
1136 | ret = i915_gem_object_sync(obj, req); | 1136 | ret = i915_gem_request_await_object |
1137 | (req, obj, obj->base.pending_write_domain); | ||
1137 | if (ret) | 1138 | if (ret) |
1138 | return ret; | 1139 | return ret; |
1139 | } | 1140 | } |
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a149310c82ce..017cadf54d80 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c | |||
@@ -460,6 +460,93 @@ err: | |||
460 | return ERR_PTR(ret); | 460 | return ERR_PTR(ret); |
461 | } | 461 | } |
462 | 462 | ||
463 | static int | ||
464 | i915_gem_request_await_request(struct drm_i915_gem_request *to, | ||
465 | struct drm_i915_gem_request *from) | ||
466 | { | ||
467 | int idx, ret; | ||
468 | |||
469 | GEM_BUG_ON(to == from); | ||
470 | |||
471 | if (to->engine == from->engine) | ||
472 | return 0; | ||
473 | |||
474 | idx = intel_engine_sync_index(from->engine, to->engine); | ||
475 | if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) | ||
476 | return 0; | ||
477 | |||
478 | trace_i915_gem_ring_sync_to(to, from); | ||
479 | if (!i915.semaphores) { | ||
480 | ret = i915_wait_request(from, | ||
481 | I915_WAIT_INTERRUPTIBLE | | ||
482 | I915_WAIT_LOCKED, | ||
483 | NULL, NO_WAITBOOST); | ||
484 | if (ret) | ||
485 | return ret; | ||
486 | } else { | ||
487 | ret = to->engine->semaphore.sync_to(to, from); | ||
488 | if (ret) | ||
489 | return ret; | ||
490 | } | ||
491 | |||
492 | from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; | ||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | /** | ||
497 | * i915_gem_request_await_object - set this request to (async) wait upon a bo | ||
498 | * | ||
499 | * @to: request we are wishing to use | ||
500 | * @obj: object which may be in use on another ring. | ||
501 | * | ||
502 | * This code is meant to abstract object synchronization with the GPU. | ||
503 | * Conceptually we serialise writes between engines inside the GPU. | ||
504 | * We only allow one engine to write into a buffer at any time, but | ||
505 | * multiple readers. To ensure each has a coherent view of memory, we must: | ||
506 | * | ||
507 | * - If there is an outstanding write request to the object, the new | ||
508 | * request must wait for it to complete (either CPU or in hw, requests | ||
509 | * on the same ring will be naturally ordered). | ||
510 | * | ||
511 | * - If we are a write request (pending_write_domain is set), the new | ||
512 | * request must wait for outstanding read requests to complete. | ||
513 | * | ||
514 | * Returns 0 if successful, else propagates up the lower layer error. | ||
515 | */ | ||
516 | int | ||
517 | i915_gem_request_await_object(struct drm_i915_gem_request *to, | ||
518 | struct drm_i915_gem_object *obj, | ||
519 | bool write) | ||
520 | { | ||
521 | struct i915_gem_active *active; | ||
522 | unsigned long active_mask; | ||
523 | int idx; | ||
524 | |||
525 | if (write) { | ||
526 | active_mask = i915_gem_object_get_active(obj); | ||
527 | active = obj->last_read; | ||
528 | } else { | ||
529 | active_mask = 1; | ||
530 | active = &obj->last_write; | ||
531 | } | ||
532 | |||
533 | for_each_active(active_mask, idx) { | ||
534 | struct drm_i915_gem_request *request; | ||
535 | int ret; | ||
536 | |||
537 | request = i915_gem_active_peek(&active[idx], | ||
538 | &obj->base.dev->struct_mutex); | ||
539 | if (!request) | ||
540 | continue; | ||
541 | |||
542 | ret = i915_gem_request_await_request(to, request); | ||
543 | if (ret) | ||
544 | return ret; | ||
545 | } | ||
546 | |||
547 | return 0; | ||
548 | } | ||
549 | |||
463 | static void i915_gem_mark_busy(const struct intel_engine_cs *engine) | 550 | static void i915_gem_mark_busy(const struct intel_engine_cs *engine) |
464 | { | 551 | { |
465 | struct drm_i915_private *dev_priv = engine->i915; | 552 | struct drm_i915_private *dev_priv = engine->i915; |
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e141b1cca16a..883df3bdb381 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h | |||
@@ -209,6 +209,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, | |||
209 | *pdst = src; | 209 | *pdst = src; |
210 | } | 210 | } |
211 | 211 | ||
212 | int | ||
213 | i915_gem_request_await_object(struct drm_i915_gem_request *to, | ||
214 | struct drm_i915_gem_object *obj, | ||
215 | bool write); | ||
216 | |||
212 | void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); | 217 | void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); |
213 | #define i915_add_request(req) \ | 218 | #define i915_add_request(req) \ |
214 | __i915_add_request(req, true) | 219 | __i915_add_request(req, true) |
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b92042f4dc50..69b80d078f06 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c | |||
@@ -12273,7 +12273,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, | |||
12273 | goto cleanup_unpin; | 12273 | goto cleanup_unpin; |
12274 | } | 12274 | } |
12275 | 12275 | ||
12276 | ret = i915_gem_object_sync(obj, request); | 12276 | ret = i915_gem_request_await_object(request, obj, false); |
12277 | if (ret) | 12277 | if (ret) |
12278 | goto cleanup_request; | 12278 | goto cleanup_request; |
12279 | 12279 | ||