aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-05-23 02:48:08 -0400
committerJani Nikula <jani.nikula@intel.com>2014-05-27 04:18:40 -0400
commitd23db88c3ab233daed18709e3a24d6c95344117f (patch)
tree9e1ff705b415f76a882aa5eb3e546aaf543ae0cb
parent9aab8bff7aa3bee567213ad3c1fdfb217bb980a2 (diff)
drm/i915: Prevent negative relocation deltas from wrapping
This is pure evil. Userspace, I'm looking at you SNA, repacks batch buffers on the fly after generation as they are being passed to the kernel for execution. These batches also contain self-referenced relocations as a single buffer encompasses the state commands, kernels, vertices and sampler. During generation the buffers are placed at known offsets within the full batch, and then the relocation deltas (as passed to the kernel) are tweaked as the batch is repacked into a smaller buffer. This means that userspace is passing negative relocations deltas, which subsequently wrap to large values if the batch is at a low address. The GPU hangs when it then tries to use the large value as a base for its address offsets, rather than wrapping back to the real value (as one would hope). As the GPU uses positive offsets from the base, we can treat the relocation address as the minimum address read by the GPU. For the upper bound, we trust that userspace will not read beyond the end of the buffer. So, how do we fix negative relocations from wrapping? We can either check that every relocation looks valid when we write it, and then position each object such that we prevent the offset wraparound, or we just special-case the self-referential behaviour of SNA and force all batches to be above 256k. Daniel prefers the latter approach. This fixes a GPU hang when it tries to use an address (relocation + offset) greater than the GTT size. The issue would occur quite easily with full-ppgtt as each fd gets its own VM space, so low offsets would often be handed out. However, with the rearrangement of the low GTT due to capturing the BIOS framebuffer, it is already affecting kernels 3.15 onwards. I think only IVB+ is susceptible to this bug, but the workaround should only kick in rarely, so it seems sensible to always apply it. v3: Use a bias for batch buffers to prevent small negative delta relocations from wrapping. v4 from Daniel: - s/BIAS/BATCH_OFFSET_BIAS/ - Extract eb_vma_misplaced/i915_vma_misplaced since the conditions were growing rather cumbersome. - Add a comment to eb_get_batch explaining why we do this. - Apply the batch offset bias everywhere but mention that we've only observed it on gen7 gpus. - Drop PIN_OFFSET_FIX for now, that slipped in from a feature patch. v5: Add static to eb_get_batch, spotted by 0-day tester. Testcase: igt/gem_bad_reloc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=78533 Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (v3) Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c46
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c9
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c76
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c4
5 files changed, 103 insertions, 38 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec5f6fb42ab3..388c028e223c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2189,10 +2189,12 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
2189#define PIN_MAPPABLE 0x1 2189#define PIN_MAPPABLE 0x1
2190#define PIN_NONBLOCK 0x2 2190#define PIN_NONBLOCK 0x2
2191#define PIN_GLOBAL 0x4 2191#define PIN_GLOBAL 0x4
2192#define PIN_OFFSET_BIAS 0x8
2193#define PIN_OFFSET_MASK (~4095)
2192int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, 2194int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
2193 struct i915_address_space *vm, 2195 struct i915_address_space *vm,
2194 uint32_t alignment, 2196 uint32_t alignment,
2195 unsigned flags); 2197 uint64_t flags);
2196int __must_check i915_vma_unbind(struct i915_vma *vma); 2198int __must_check i915_vma_unbind(struct i915_vma *vma);
2197int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); 2199int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
2198void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); 2200void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
@@ -2445,6 +2447,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
2445 int min_size, 2447 int min_size,
2446 unsigned alignment, 2448 unsigned alignment,
2447 unsigned cache_level, 2449 unsigned cache_level,
2450 unsigned long start,
2451 unsigned long end,
2448 unsigned flags); 2452 unsigned flags);
2449int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); 2453int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
2450int i915_gem_evict_everything(struct drm_device *dev); 2454int i915_gem_evict_everything(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b391f30f9985..3326770c9ed2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3326,12 +3326,14 @@ static struct i915_vma *
3326i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, 3326i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3327 struct i915_address_space *vm, 3327 struct i915_address_space *vm,
3328 unsigned alignment, 3328 unsigned alignment,
3329 unsigned flags) 3329 uint64_t flags)
3330{ 3330{
3331 struct drm_device *dev = obj->base.dev; 3331 struct drm_device *dev = obj->base.dev;
3332 struct drm_i915_private *dev_priv = dev->dev_private; 3332 struct drm_i915_private *dev_priv = dev->dev_private;
3333 u32 size, fence_size, fence_alignment, unfenced_alignment; 3333 u32 size, fence_size, fence_alignment, unfenced_alignment;
3334 size_t gtt_max = 3334 unsigned long start =
3335 flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3336 unsigned long end =
3335 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; 3337 flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
3336 struct i915_vma *vma; 3338 struct i915_vma *vma;
3337 int ret; 3339 int ret;
@@ -3360,11 +3362,11 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3360 /* If the object is bigger than the entire aperture, reject it early 3362 /* If the object is bigger than the entire aperture, reject it early
3361 * before evicting everything in a vain attempt to find space. 3363 * before evicting everything in a vain attempt to find space.
3362 */ 3364 */
3363 if (obj->base.size > gtt_max) { 3365 if (obj->base.size > end) {
3364 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n", 3366 DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
3365 obj->base.size, 3367 obj->base.size,
3366 flags & PIN_MAPPABLE ? "mappable" : "total", 3368 flags & PIN_MAPPABLE ? "mappable" : "total",
3367 gtt_max); 3369 end);
3368 return ERR_PTR(-E2BIG); 3370 return ERR_PTR(-E2BIG);
3369 } 3371 }
3370 3372
@@ -3381,12 +3383,15 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3381search_free: 3383search_free:
3382 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, 3384 ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3383 size, alignment, 3385 size, alignment,
3384 obj->cache_level, 0, gtt_max, 3386 obj->cache_level,
3387 start, end,
3385 DRM_MM_SEARCH_DEFAULT, 3388 DRM_MM_SEARCH_DEFAULT,
3386 DRM_MM_CREATE_DEFAULT); 3389 DRM_MM_CREATE_DEFAULT);
3387 if (ret) { 3390 if (ret) {
3388 ret = i915_gem_evict_something(dev, vm, size, alignment, 3391 ret = i915_gem_evict_something(dev, vm, size, alignment,
3389 obj->cache_level, flags); 3392 obj->cache_level,
3393 start, end,
3394 flags);
3390 if (ret == 0) 3395 if (ret == 0)
3391 goto search_free; 3396 goto search_free;
3392 3397
@@ -3946,11 +3951,30 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3946 return ret; 3951 return ret;
3947} 3952}
3948 3953
3954static bool
3955i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3956{
3957 struct drm_i915_gem_object *obj = vma->obj;
3958
3959 if (alignment &&
3960 vma->node.start & (alignment - 1))
3961 return true;
3962
3963 if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3964 return true;
3965
3966 if (flags & PIN_OFFSET_BIAS &&
3967 vma->node.start < (flags & PIN_OFFSET_MASK))
3968 return true;
3969
3970 return false;
3971}
3972
3949int 3973int
3950i915_gem_object_pin(struct drm_i915_gem_object *obj, 3974i915_gem_object_pin(struct drm_i915_gem_object *obj,
3951 struct i915_address_space *vm, 3975 struct i915_address_space *vm,
3952 uint32_t alignment, 3976 uint32_t alignment,
3953 unsigned flags) 3977 uint64_t flags)
3954{ 3978{
3955 struct i915_vma *vma; 3979 struct i915_vma *vma;
3956 int ret; 3980 int ret;
@@ -3963,15 +3987,13 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
3963 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT)) 3987 if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3964 return -EBUSY; 3988 return -EBUSY;
3965 3989
3966 if ((alignment && 3990 if (i915_vma_misplaced(vma, alignment, flags)) {
3967 vma->node.start & (alignment - 1)) ||
3968 (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
3969 WARN(vma->pin_count, 3991 WARN(vma->pin_count,
3970 "bo is already pinned with incorrect alignment:" 3992 "bo is already pinned with incorrect alignment:"
3971 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d," 3993 " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
3972 " obj->map_and_fenceable=%d\n", 3994 " obj->map_and_fenceable=%d\n",
3973 i915_gem_obj_offset(obj, vm), alignment, 3995 i915_gem_obj_offset(obj, vm), alignment,
3974 flags & PIN_MAPPABLE, 3996 !!(flags & PIN_MAPPABLE),
3975 obj->map_and_fenceable); 3997 obj->map_and_fenceable);
3976 ret = i915_vma_unbind(vma); 3998 ret = i915_vma_unbind(vma);
3977 if (ret) 3999 if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 75fca63dc8c1..bbf4b12d842e 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -68,9 +68,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
68int 68int
69i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm, 69i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
70 int min_size, unsigned alignment, unsigned cache_level, 70 int min_size, unsigned alignment, unsigned cache_level,
71 unsigned long start, unsigned long end,
71 unsigned flags) 72 unsigned flags)
72{ 73{
73 struct drm_i915_private *dev_priv = dev->dev_private;
74 struct list_head eviction_list, unwind_list; 74 struct list_head eviction_list, unwind_list;
75 struct i915_vma *vma; 75 struct i915_vma *vma;
76 int ret = 0; 76 int ret = 0;
@@ -102,11 +102,10 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
102 */ 102 */
103 103
104 INIT_LIST_HEAD(&unwind_list); 104 INIT_LIST_HEAD(&unwind_list);
105 if (flags & PIN_MAPPABLE) { 105 if (start != 0 || end != vm->total) {
106 BUG_ON(!i915_is_ggtt(vm));
107 drm_mm_init_scan_with_range(&vm->mm, min_size, 106 drm_mm_init_scan_with_range(&vm->mm, min_size,
108 alignment, cache_level, 0, 107 alignment, cache_level,
109 dev_priv->gtt.mappable_end); 108 start, end);
110 } else 109 } else
111 drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level); 110 drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
112 111
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7aaf6390cfaf..20fef6c50267 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,9 @@
35 35
36#define __EXEC_OBJECT_HAS_PIN (1<<31) 36#define __EXEC_OBJECT_HAS_PIN (1<<31)
37#define __EXEC_OBJECT_HAS_FENCE (1<<30) 37#define __EXEC_OBJECT_HAS_FENCE (1<<30)
38#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
39
40#define BATCH_OFFSET_BIAS (256*1024)
38 41
39struct eb_vmas { 42struct eb_vmas {
40 struct list_head vmas; 43 struct list_head vmas;
@@ -545,7 +548,7 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
545 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 548 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
546 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; 549 bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
547 bool need_fence; 550 bool need_fence;
548 unsigned flags; 551 uint64_t flags;
549 int ret; 552 int ret;
550 553
551 flags = 0; 554 flags = 0;
@@ -559,6 +562,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
559 562
560 if (entry->flags & EXEC_OBJECT_NEEDS_GTT) 563 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
561 flags |= PIN_GLOBAL; 564 flags |= PIN_GLOBAL;
565 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
566 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
562 567
563 ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); 568 ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
564 if (ret) 569 if (ret)
@@ -592,6 +597,36 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
592 return 0; 597 return 0;
593} 598}
594 599
600static bool
601eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
602{
603 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
604 struct drm_i915_gem_object *obj = vma->obj;
605 bool need_fence, need_mappable;
606
607 need_fence =
608 has_fenced_gpu_access &&
609 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
610 obj->tiling_mode != I915_TILING_NONE;
611 need_mappable = need_fence || need_reloc_mappable(vma);
612
613 WARN_ON((need_mappable || need_fence) &&
614 !i915_is_ggtt(vma->vm));
615
616 if (entry->alignment &&
617 vma->node.start & (entry->alignment - 1))
618 return true;
619
620 if (need_mappable && !obj->map_and_fenceable)
621 return true;
622
623 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
624 vma->node.start < BATCH_OFFSET_BIAS)
625 return true;
626
627 return false;
628}
629
595static int 630static int
596i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, 631i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
597 struct list_head *vmas, 632 struct list_head *vmas,
@@ -653,26 +688,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
653 688
654 /* Unbind any ill-fitting objects or pin. */ 689 /* Unbind any ill-fitting objects or pin. */
655 list_for_each_entry(vma, vmas, exec_list) { 690 list_for_each_entry(vma, vmas, exec_list) {
656 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
657 bool need_fence, need_mappable;
658
659 obj = vma->obj;
660
661 if (!drm_mm_node_allocated(&vma->node)) 691 if (!drm_mm_node_allocated(&vma->node))
662 continue; 692 continue;
663 693
664 need_fence = 694 if (eb_vma_misplaced(vma, has_fenced_gpu_access))
665 has_fenced_gpu_access &&
666 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
667 obj->tiling_mode != I915_TILING_NONE;
668 need_mappable = need_fence || need_reloc_mappable(vma);
669
670 WARN_ON((need_mappable || need_fence) &&
671 !i915_is_ggtt(vma->vm));
672
673 if ((entry->alignment &&
674 vma->node.start & (entry->alignment - 1)) ||
675 (need_mappable && !obj->map_and_fenceable))
676 ret = i915_vma_unbind(vma); 695 ret = i915_vma_unbind(vma);
677 else 696 else
678 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); 697 ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -999,6 +1018,25 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
999 return 0; 1018 return 0;
1000} 1019}
1001 1020
1021static struct drm_i915_gem_object *
1022eb_get_batch(struct eb_vmas *eb)
1023{
1024 struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1025
1026 /*
1027 * SNA is doing fancy tricks with compressing batch buffers, which leads
1028 * to negative relocation deltas. Usually that works out ok since the
1029 * relocate address is still positive, except when the batch is placed
1030 * very low in the GTT. Ensure this doesn't happen.
1031 *
1032 * Note that actual hangs have only been observed on gen7, but for
1033 * paranoia do it everywhere.
1034 */
1035 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1036
1037 return vma->obj;
1038}
1039
1002static int 1040static int
1003i915_gem_do_execbuffer(struct drm_device *dev, void *data, 1041i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1004 struct drm_file *file, 1042 struct drm_file *file,
@@ -1153,7 +1191,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1153 goto err; 1191 goto err;
1154 1192
1155 /* take note of the batch buffer before we might reorder the lists */ 1193 /* take note of the batch buffer before we might reorder the lists */
1156 batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj; 1194 batch_obj = eb_get_batch(eb);
1157 1195
1158 /* Move the objects en-masse into the GTT, evicting if necessary. */ 1196 /* Move the objects en-masse into the GTT, evicting if necessary. */
1159 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1197 need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 154b0f8bb88d..5deb22864c52 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1089,7 +1089,9 @@ alloc:
1089 if (ret == -ENOSPC && !retried) { 1089 if (ret == -ENOSPC && !retried) {
1090 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, 1090 ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
1091 GEN6_PD_SIZE, GEN6_PD_ALIGN, 1091 GEN6_PD_SIZE, GEN6_PD_ALIGN,
1092 I915_CACHE_NONE, 0); 1092 I915_CACHE_NONE,
1093 0, dev_priv->gtt.base.total,
1094 0);
1093 if (ret) 1095 if (ret)
1094 return ret; 1096 return ret;
1095 1097