aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2015-12-08 06:55:07 -0500
committerTvrtko Ursulin <tvrtko.ursulin@intel.com>2015-12-09 05:20:17 -0500
commit506a8e87d8d2746b9e9d2433503fe237c54e4750 (patch)
treeef8712f9deffc5f3c1aa35b7d0f8008fff898051
parent0f55564406d7005c2d681b14e2b7e2248d8c412d (diff)
drm/i915: Add soft-pinning API for execbuffer
Userspace can pass in an offset that it presumes the object is located at. The kernel will then do its utmost to fit the object into that location. The assumption is that userspace is handling its own object locations (for example along with full-ppgtt) and that the kernel will rarely have to make space for the user's requests. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. (Not published externally) v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction to allow eviction of soft-pinned objects when another soft-pinned object used by a subsequent execbuffer overlaps reported by Michal Winiarski. (Not published externally) v4: Moved soft-pinned objects to the front of ordered_vmas so that they are pinned first after an address conflict happens to avoid repeated conflicts in rare cases (Suggested by Chris Wilson). Expanded comment on drm_i915_gem_exec_object2.offset to cover this new API. v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure buffers are not considered misplaced without the user specifying EXEC_OBJECT_SUPPORTS_48B_ADDRESS. User must assume responsibility for any addressing workarounds. Updated object2.offset field comment again to clarify NO_RELOC case (Chris). checkpatch cleanup. v6: Trivial rebase on latest drm-intel-nightly v7: Catch attempts to pin above the max virtual address size and return EINVAL (Tvrtko). Decouple EXEC_OBJECT_SUPPORTS_48B_ADDRESS and EXEC_OBJECT_PINNED flags, user must pass both flags in any attempt to pin something at an offset above 4GB (Chris, Daniel Vetter). Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Zou Nanhai <nanhai.zou@intel.com> Cc: Kristian Høgsberg <hoegsberg@gmail.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Reviewed-by: Michel Thierry <michel.thierry@intel.com> Acked-by: PDT Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1449575707-20933-1-git-send-email-thomas.daniel@intel.com
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c64
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c39
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c16
-rw-r--r--include/uapi/drm/i915_drm.h12
6 files changed, 111 insertions, 25 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a81c76603544..52b82893ba42 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
169 case I915_PARAM_HAS_RESOURCE_STREAMER: 169 case I915_PARAM_HAS_RESOURCE_STREAMER:
170 value = HAS_RESOURCE_STREAMER(dev); 170 value = HAS_RESOURCE_STREAMER(dev);
171 break; 171 break;
172 case I915_PARAM_HAS_EXEC_SOFTPIN:
173 value = 1;
174 break;
172 default: 175 default:
173 DRM_DEBUG("Unknown parameter %d\n", param->param); 176 DRM_DEBUG("Unknown parameter %d\n", param->param);
174 return -EINVAL; 177 return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6dee97c0d5d0..547c14269292 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2868,6 +2868,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
2868#define PIN_UPDATE (1<<5) 2868#define PIN_UPDATE (1<<5)
2869#define PIN_ZONE_4G (1<<6) 2869#define PIN_ZONE_4G (1<<6)
2870#define PIN_HIGH (1<<7) 2870#define PIN_HIGH (1<<7)
2871#define PIN_OFFSET_FIXED (1<<8)
2871#define PIN_OFFSET_MASK (~4095) 2872#define PIN_OFFSET_MASK (~4095)
2872int __must_check 2873int __must_check
2873i915_gem_object_pin(struct drm_i915_gem_object *obj, 2874i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3213,6 +3214,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
3213 unsigned long start, 3214 unsigned long start,
3214 unsigned long end, 3215 unsigned long end,
3215 unsigned flags); 3216 unsigned flags);
3217int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
3216int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); 3218int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
3217 3219
3218/* belongs in i915_gem_gtt.h */ 3220/* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a531cb83295c..d7b8d16ff6e6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3468,30 +3468,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3468 if (IS_ERR(vma)) 3468 if (IS_ERR(vma))
3469 goto err_unpin; 3469 goto err_unpin;
3470 3470
3471 if (flags & PIN_HIGH) { 3471 if (flags & PIN_OFFSET_FIXED) {
3472 search_flag = DRM_MM_SEARCH_BELOW; 3472 uint64_t offset = flags & PIN_OFFSET_MASK;
3473 alloc_flag = DRM_MM_CREATE_TOP; 3473
3474 if (offset & (alignment - 1) || offset + size > end) {
3475 ret = -EINVAL;
3476 goto err_free_vma;
3477 }
3478 vma->node.start = offset;
3479 vma->node.size = size;
3480 vma->node.color = obj->cache_level;
3481 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3482 if (ret) {
3483 ret = i915_gem_evict_for_vma(vma);
3484 if (ret == 0)
3485 ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3486 }
3487 if (ret)
3488 goto err_free_vma;
3474 } else { 3489 } else {
3475 search_flag = DRM_MM_SEARCH_DEFAULT; 3490 if (flags & PIN_HIGH) {
3476 alloc_flag = DRM_MM_CREATE_DEFAULT; 3491 search_flag = DRM_MM_SEARCH_BELOW;
3477 } 3492 alloc_flag = DRM_MM_CREATE_TOP;
3493 } else {
3494 search_flag = DRM_MM_SEARCH_DEFAULT;
3495 alloc_flag = DRM_MM_CREATE_DEFAULT;
3496 }
3478 3497
3479search_free: 3498search_free:
3480 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3499 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3481 size, alignment, 3500 size, alignment,
3482 obj->cache_level, 3501 obj->cache_level,
3483 start, end, 3502 start, end,
3484 search_flag, 3503 search_flag,
3485 alloc_flag); 3504 alloc_flag);
3486 if (ret) { 3505 if (ret) {
3487 ret = i915_gem_evict_something(dev, vm, size, alignment, 3506 ret = i915_gem_evict_something(dev, vm, size, alignment,
3488 obj->cache_level, 3507 obj->cache_level,
3489 start, end, 3508 start, end,
3490 flags); 3509 flags);
3491 if (ret == 0) 3510 if (ret == 0)
3492 goto search_free; 3511 goto search_free;
3493 3512
3494 goto err_free_vma; 3513 goto err_free_vma;
3514 }
3495 } 3515 }
3496 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { 3516 if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
3497 ret = -EINVAL; 3517 ret = -EINVAL;
@@ -4082,6 +4102,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
4082 vma->node.start < (flags & PIN_OFFSET_MASK)) 4102 vma->node.start < (flags & PIN_OFFSET_MASK))
4083 return true; 4103 return true;
4084 4104
4105 if (flags & PIN_OFFSET_FIXED &&
4106 vma->node.start != (flags & PIN_OFFSET_MASK))
4107 return true;
4108
4085 return false; 4109 return false;
4086} 4110}
4087 4111
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d71a133ceff5..07c6e4d320c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,45 @@ found:
199 return ret; 199 return ret;
200} 200}
201 201
202int
203i915_gem_evict_for_vma(struct i915_vma *target)
204{
205 struct drm_mm_node *node, *next;
206
207 list_for_each_entry_safe(node, next,
208 &target->vm->mm.head_node.node_list,
209 node_list) {
210 struct i915_vma *vma;
211 int ret;
212
213 if (node->start + node->size <= target->node.start)
214 continue;
215 if (node->start >= target->node.start + target->node.size)
216 break;
217
218 vma = container_of(node, typeof(*vma), node);
219
220 if (vma->pin_count) {
221 if (!vma->exec_entry || (vma->pin_count > 1))
222 /* Object is pinned for some other use */
223 return -EBUSY;
224
225 /* We need to evict a buffer in the same batch */
226 if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
227 /* Overlapping fixed objects in the same batch */
228 return -EINVAL;
229
230 return -ENOSPC;
231 }
232
233 ret = i915_vma_unbind(vma);
234 if (ret)
235 return ret;
236 }
237
238 return 0;
239}
240
202/** 241/**
203 * i915_gem_evict_vm - Evict all idle vmas from a vm 242 * i915_gem_evict_vm - Evict all idle vmas from a vm
204 * @vm: Address space to cleanse 243 * @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a4c243cec4aa..48ec4846e6f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
599 flags |= PIN_GLOBAL | PIN_MAPPABLE; 599 flags |= PIN_GLOBAL | PIN_MAPPABLE;
600 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) 600 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
601 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 601 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
602 if (entry->flags & EXEC_OBJECT_PINNED)
603 flags |= entry->offset | PIN_OFFSET_FIXED;
602 if ((flags & PIN_MAPPABLE) == 0) 604 if ((flags & PIN_MAPPABLE) == 0)
603 flags |= PIN_HIGH; 605 flags |= PIN_HIGH;
604 } 606 }
@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
670 vma->node.start & (entry->alignment - 1)) 672 vma->node.start & (entry->alignment - 1))
671 return true; 673 return true;
672 674
675 if (entry->flags & EXEC_OBJECT_PINNED &&
676 vma->node.start != entry->offset)
677 return true;
678
673 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && 679 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
674 vma->node.start < BATCH_OFFSET_BIAS) 680 vma->node.start < BATCH_OFFSET_BIAS)
675 return true; 681 return true;
@@ -695,6 +701,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
695 struct i915_vma *vma; 701 struct i915_vma *vma;
696 struct i915_address_space *vm; 702 struct i915_address_space *vm;
697 struct list_head ordered_vmas; 703 struct list_head ordered_vmas;
704 struct list_head pinned_vmas;
698 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 705 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
699 int retry; 706 int retry;
700 707
@@ -703,6 +710,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
703 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; 710 vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
704 711
705 INIT_LIST_HEAD(&ordered_vmas); 712 INIT_LIST_HEAD(&ordered_vmas);
713 INIT_LIST_HEAD(&pinned_vmas);
706 while (!list_empty(vmas)) { 714 while (!list_empty(vmas)) {
707 struct drm_i915_gem_exec_object2 *entry; 715 struct drm_i915_gem_exec_object2 *entry;
708 bool need_fence, need_mappable; 716 bool need_fence, need_mappable;
@@ -721,7 +729,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
721 obj->tiling_mode != I915_TILING_NONE; 729 obj->tiling_mode != I915_TILING_NONE;
722 need_mappable = need_fence || need_reloc_mappable(vma); 730 need_mappable = need_fence || need_reloc_mappable(vma);
723 731
724 if (need_mappable) { 732 if (entry->flags & EXEC_OBJECT_PINNED)
733 list_move_tail(&vma->exec_list, &pinned_vmas);
734 else if (need_mappable) {
725 entry->flags |= __EXEC_OBJECT_NEEDS_MAP; 735 entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
726 list_move(&vma->exec_list, &ordered_vmas); 736 list_move(&vma->exec_list, &ordered_vmas);
727 } else 737 } else
@@ -731,6 +741,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
731 obj->base.pending_write_domain = 0; 741 obj->base.pending_write_domain = 0;
732 } 742 }
733 list_splice(&ordered_vmas, vmas); 743 list_splice(&ordered_vmas, vmas);
744 list_splice(&pinned_vmas, vmas);
734 745
735 /* Attempt to pin all of the buffers into the GTT. 746 /* Attempt to pin all of the buffers into the GTT.
736 * This is done in 3 phases: 747 * This is done in 3 phases:
@@ -1317,7 +1328,8 @@ eb_get_batch(struct eb_vmas *eb)
1317 * Note that actual hangs have only been observed on gen7, but for 1328 * Note that actual hangs have only been observed on gen7, but for
1318 * paranoia do it everywhere. 1329 * paranoia do it everywhere.
1319 */ 1330 */
1320 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 1331 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
1332 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1321 1333
1322 return vma->obj; 1334 return vma->obj;
1323} 1335}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 67ef73a5d6eb..d727b49f07ac 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
356#define I915_PARAM_EU_TOTAL 34 356#define I915_PARAM_EU_TOTAL 34
357#define I915_PARAM_HAS_GPU_RESET 35 357#define I915_PARAM_HAS_GPU_RESET 35
358#define I915_PARAM_HAS_RESOURCE_STREAMER 36 358#define I915_PARAM_HAS_RESOURCE_STREAMER 36
359#define I915_PARAM_HAS_EXEC_SOFTPIN 37
359 360
360typedef struct drm_i915_getparam { 361typedef struct drm_i915_getparam {
361 __s32 param; 362 __s32 param;
@@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
682 __u64 alignment; 683 __u64 alignment;
683 684
684 /** 685 /**
685 * Returned value of the updated offset of the object, for future 686 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
686 * presumed_offset writes. 687 * the user with the GTT offset at which this object will be pinned.
688 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
689 * presumed_offset of the object.
690 * During execbuffer2 the kernel populates it with the value of the
691 * current GTT offset of the object, for future presumed_offset writes.
687 */ 692 */
688 __u64 offset; 693 __u64 offset;
689 694
@@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
691#define EXEC_OBJECT_NEEDS_GTT (1<<1) 696#define EXEC_OBJECT_NEEDS_GTT (1<<1)
692#define EXEC_OBJECT_WRITE (1<<2) 697#define EXEC_OBJECT_WRITE (1<<2)
693#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) 698#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
694#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) 699#define EXEC_OBJECT_PINNED (1<<4)
700#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
695 __u64 flags; 701 __u64 flags;
696 702
697 __u64 rsvd1; 703 __u64 rsvd1;