aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-18 12:17:00 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-18 17:36:50 -0400
commit49ef5294cda256aa5496ba56bbf859d3c7a17e07 (patch)
treed764052e4f8e5979320750f1fd75f0a1b489bf53 /drivers
parenta1e5afbe4d5b6a0b1e3ffb32ec11dd51887ca7a3 (diff)
drm/i915: Move fence tracking from object to vma
In order to handle tiled partial GTT mmappings, we need to associate the fence with an individual vma. v2: A couple of silly drops replaced spotted by Joonas Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-21-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c16
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h82
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c30
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence.c422
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.h8
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c67
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c57
-rw-r--r--drivers/gpu/drm/i915/intel_fbc.c10
-rw-r--r--drivers/gpu/drm/i915/intel_overlay.c2
12 files changed, 330 insertions, 388 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 36112282f590..d0b4c74974be 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -152,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
152 seq_printf(m, "%x ", 152 seq_printf(m, "%x ",
153 i915_gem_active_get_seqno(&obj->last_read[id], 153 i915_gem_active_get_seqno(&obj->last_read[id],
154 &obj->base.dev->struct_mutex)); 154 &obj->base.dev->struct_mutex));
155 seq_printf(m, "] %x %x%s%s%s", 155 seq_printf(m, "] %x %s%s%s",
156 i915_gem_active_get_seqno(&obj->last_write, 156 i915_gem_active_get_seqno(&obj->last_write,
157 &obj->base.dev->struct_mutex), 157 &obj->base.dev->struct_mutex),
158 i915_gem_active_get_seqno(&obj->last_fence,
159 &obj->base.dev->struct_mutex),
160 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), 158 i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
161 obj->dirty ? " dirty" : "", 159 obj->dirty ? " dirty" : "",
162 obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); 160 obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -169,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
169 seq_printf(m, " (pinned x %d)", pin_count); 167 seq_printf(m, " (pinned x %d)", pin_count);
170 if (obj->pin_display) 168 if (obj->pin_display)
171 seq_printf(m, " (display)"); 169 seq_printf(m, " (display)");
172 if (obj->fence_reg != I915_FENCE_REG_NONE)
173 seq_printf(m, " (fence: %d)", obj->fence_reg);
174 list_for_each_entry(vma, &obj->vma_list, obj_link) { 170 list_for_each_entry(vma, &obj->vma_list, obj_link) {
175 if (!drm_mm_node_allocated(&vma->node)) 171 if (!drm_mm_node_allocated(&vma->node))
176 continue; 172 continue;
@@ -180,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
180 vma->node.start, vma->node.size); 176 vma->node.start, vma->node.size);
181 if (i915_vma_is_ggtt(vma)) 177 if (i915_vma_is_ggtt(vma))
182 seq_printf(m, ", type: %u", vma->ggtt_view.type); 178 seq_printf(m, ", type: %u", vma->ggtt_view.type);
179 if (vma->fence)
180 seq_printf(m, " , fence: %d%s",
181 vma->fence->id,
182 i915_gem_active_isset(&vma->last_fence) ? "*" : "");
183 seq_puts(m, ")"); 183 seq_puts(m, ")");
184 } 184 }
185 if (obj->stolen) 185 if (obj->stolen)
@@ -938,14 +938,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
938 938
939 seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs); 939 seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
940 for (i = 0; i < dev_priv->num_fence_regs; i++) { 940 for (i = 0; i < dev_priv->num_fence_regs; i++) {
941 struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; 941 struct i915_vma *vma = dev_priv->fence_regs[i].vma;
942 942
943 seq_printf(m, "Fence %d, pin count = %d, object = ", 943 seq_printf(m, "Fence %d, pin count = %d, object = ",
944 i, dev_priv->fence_regs[i].pin_count); 944 i, dev_priv->fence_regs[i].pin_count);
945 if (obj == NULL) 945 if (!vma)
946 seq_puts(m, "unused"); 946 seq_puts(m, "unused");
947 else 947 else
948 describe_obj(m, obj); 948 describe_obj(m, vma->obj);
949 seq_putc(m, '\n'); 949 seq_putc(m, '\n');
950 } 950 }
951 951
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 67ece6db60d9..56d439374fe5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -455,15 +455,21 @@ struct intel_opregion {
455struct intel_overlay; 455struct intel_overlay;
456struct intel_overlay_error_state; 456struct intel_overlay_error_state;
457 457
458#define I915_FENCE_REG_NONE -1
459#define I915_MAX_NUM_FENCES 32
460/* 32 fences + sign bit for FENCE_REG_NONE */
461#define I915_MAX_NUM_FENCE_BITS 6
462
463struct drm_i915_fence_reg { 458struct drm_i915_fence_reg {
464 struct list_head link; 459 struct list_head link;
465 struct drm_i915_gem_object *obj; 460 struct drm_i915_private *i915;
461 struct i915_vma *vma;
466 int pin_count; 462 int pin_count;
463 int id;
464 /**
465 * Whether the tiling parameters for the currently
466 * associated fence register have changed. Note that
467 * for the purposes of tracking tiling changes we also
468 * treat the unfenced register, the register slot that
469 * the object occupies whilst it executes a fenced
470 * command (such as BLT on gen2/3), as a "fence".
471 */
472 bool dirty;
467}; 473};
468 474
469struct sdvo_device_mapping { 475struct sdvo_device_mapping {
@@ -2172,27 +2178,11 @@ struct drm_i915_gem_object {
2172 unsigned int dirty:1; 2178 unsigned int dirty:1;
2173 2179
2174 /** 2180 /**
2175 * Fence register bits (if any) for this object. Will be set
2176 * as needed when mapped into the GTT.
2177 * Protected by dev->struct_mutex.
2178 */
2179 signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
2180
2181 /**
2182 * Advice: are the backing pages purgeable? 2181 * Advice: are the backing pages purgeable?
2183 */ 2182 */
2184 unsigned int madv:2; 2183 unsigned int madv:2;
2185 2184
2186 /** 2185 /**
2187 * Whether the tiling parameters for the currently associated fence
2188 * register have changed. Note that for the purposes of tracking
2189 * tiling changes we also treat the unfenced register, the register
2190 * slot that the object occupies whilst it executes a fenced
2191 * command (such as BLT on gen2/3), as a "fence".
2192 */
2193 unsigned int fence_dirty:1;
2194
2195 /**
2196 * Whether the current gtt mapping needs to be mappable (and isn't just 2186 * Whether the current gtt mapping needs to be mappable (and isn't just
2197 * mappable by accident). Track pin and fault separate for a more 2187 * mappable by accident). Track pin and fault separate for a more
2198 * accurate mappable working set. 2188 * accurate mappable working set.
@@ -2240,7 +2230,6 @@ struct drm_i915_gem_object {
2240 */ 2230 */
2241 struct i915_gem_active last_read[I915_NUM_ENGINES]; 2231 struct i915_gem_active last_read[I915_NUM_ENGINES];
2242 struct i915_gem_active last_write; 2232 struct i915_gem_active last_write;
2243 struct i915_gem_active last_fence;
2244 2233
2245 /** References from framebuffers, locks out tiling changes. */ 2234 /** References from framebuffers, locks out tiling changes. */
2246 unsigned long framebuffer_references; 2235 unsigned long framebuffer_references;
@@ -3343,11 +3332,50 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o,
3343} 3332}
3344 3333
3345/* i915_gem_fence.c */ 3334/* i915_gem_fence.c */
3346int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); 3335int __must_check i915_vma_get_fence(struct i915_vma *vma);
3347int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); 3336int __must_check i915_vma_put_fence(struct i915_vma *vma);
3337
3338/**
3339 * i915_vma_pin_fence - pin fencing state
3340 * @vma: vma to pin fencing for
3341 *
3342 * This pins the fencing state (whether tiled or untiled) to make sure the
3343 * vma (and its object) is ready to be used as a scanout target. Fencing
3344 * status must be synchronize first by calling i915_vma_get_fence():
3345 *
3346 * The resulting fence pin reference must be released again with
3347 * i915_vma_unpin_fence().
3348 *
3349 * Returns:
3350 *
3351 * True if the vma has a fence, false otherwise.
3352 */
3353static inline bool
3354i915_vma_pin_fence(struct i915_vma *vma)
3355{
3356 if (vma->fence) {
3357 vma->fence->pin_count++;
3358 return true;
3359 } else
3360 return false;
3361}
3348 3362
3349bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj); 3363/**
3350void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj); 3364 * i915_vma_unpin_fence - unpin fencing state
3365 * @vma: vma to unpin fencing for
3366 *
3367 * This releases the fence pin reference acquired through
3368 * i915_vma_pin_fence. It will handle both objects with and without an
3369 * attached fence correctly, callers do not need to distinguish this.
3370 */
3371static inline void
3372i915_vma_unpin_fence(struct i915_vma *vma)
3373{
3374 if (vma->fence) {
3375 GEM_BUG_ON(vma->fence->pin_count <= 0);
3376 vma->fence->pin_count--;
3377 }
3378}
3351 3379
3352void i915_gem_restore_fences(struct drm_device *dev); 3380void i915_gem_restore_fences(struct drm_device *dev);
3353 3381
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dd68f0c15801..9276c73f1d81 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -829,7 +829,7 @@ i915_gem_gtt_pread(struct drm_device *dev,
829 if (!IS_ERR(vma)) { 829 if (!IS_ERR(vma)) {
830 node.start = i915_ggtt_offset(vma); 830 node.start = i915_ggtt_offset(vma);
831 node.allocated = false; 831 node.allocated = false;
832 ret = i915_gem_object_put_fence(obj); 832 ret = i915_vma_put_fence(vma);
833 if (ret) { 833 if (ret) {
834 i915_vma_unpin(vma); 834 i915_vma_unpin(vma);
835 vma = ERR_PTR(ret); 835 vma = ERR_PTR(ret);
@@ -1131,7 +1131,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915,
1131 if (!IS_ERR(vma)) { 1131 if (!IS_ERR(vma)) {
1132 node.start = i915_ggtt_offset(vma); 1132 node.start = i915_ggtt_offset(vma);
1133 node.allocated = false; 1133 node.allocated = false;
1134 ret = i915_gem_object_put_fence(obj); 1134 ret = i915_vma_put_fence(vma);
1135 if (ret) { 1135 if (ret) {
1136 i915_vma_unpin(vma); 1136 i915_vma_unpin(vma);
1137 vma = ERR_PTR(ret); 1137 vma = ERR_PTR(ret);
@@ -1751,7 +1751,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1751 if (ret) 1751 if (ret)
1752 goto err_unpin; 1752 goto err_unpin;
1753 1753
1754 ret = i915_gem_object_get_fence(obj); 1754 ret = i915_vma_get_fence(vma);
1755 if (ret) 1755 if (ret)
1756 goto err_unpin; 1756 goto err_unpin;
1757 1757
@@ -2903,7 +2903,7 @@ int i915_vma_unbind(struct i915_vma *vma)
2903 i915_gem_object_finish_gtt(obj); 2903 i915_gem_object_finish_gtt(obj);
2904 2904
2905 /* release the fence reg _after_ flushing */ 2905 /* release the fence reg _after_ flushing */
2906 ret = i915_gem_object_put_fence(obj); 2906 ret = i915_vma_put_fence(vma);
2907 if (ret) 2907 if (ret)
2908 return ret; 2908 return ret;
2909 2909
@@ -3385,9 +3385,11 @@ restart:
3385 * dropped the fence as all snoopable access is 3385 * dropped the fence as all snoopable access is
3386 * supposed to be linear. 3386 * supposed to be linear.
3387 */ 3387 */
3388 ret = i915_gem_object_put_fence(obj); 3388 list_for_each_entry(vma, &obj->vma_list, obj_link) {
3389 if (ret) 3389 ret = i915_vma_put_fence(vma);
3390 return ret; 3390 if (ret)
3391 return ret;
3392 }
3391 } else { 3393 } else {
3392 /* We either have incoherent backing store and 3394 /* We either have incoherent backing store and
3393 * so no GTT access or the architecture is fully 3395 * so no GTT access or the architecture is fully
@@ -4065,14 +4067,12 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
4065 i915_gem_object_retire__read); 4067 i915_gem_object_retire__read);
4066 init_request_active(&obj->last_write, 4068 init_request_active(&obj->last_write,
4067 i915_gem_object_retire__write); 4069 i915_gem_object_retire__write);
4068 init_request_active(&obj->last_fence, NULL);
4069 INIT_LIST_HEAD(&obj->obj_exec_link); 4070 INIT_LIST_HEAD(&obj->obj_exec_link);
4070 INIT_LIST_HEAD(&obj->vma_list); 4071 INIT_LIST_HEAD(&obj->vma_list);
4071 INIT_LIST_HEAD(&obj->batch_pool_link); 4072 INIT_LIST_HEAD(&obj->batch_pool_link);
4072 4073
4073 obj->ops = ops; 4074 obj->ops = ops;
4074 4075
4075 obj->fence_reg = I915_FENCE_REG_NONE;
4076 obj->madv = I915_MADV_WILLNEED; 4076 obj->madv = I915_MADV_WILLNEED;
4077 4077
4078 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); 4078 i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
@@ -4502,6 +4502,7 @@ void
4502i915_gem_load_init_fences(struct drm_i915_private *dev_priv) 4502i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4503{ 4503{
4504 struct drm_device *dev = &dev_priv->drm; 4504 struct drm_device *dev = &dev_priv->drm;
4505 int i;
4505 4506
4506 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && 4507 if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) &&
4507 !IS_CHERRYVIEW(dev_priv)) 4508 !IS_CHERRYVIEW(dev_priv))
@@ -4517,6 +4518,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
4517 I915_READ(vgtif_reg(avail_rs.fence_num)); 4518 I915_READ(vgtif_reg(avail_rs.fence_num));
4518 4519
4519 /* Initialize fence registers to zero */ 4520 /* Initialize fence registers to zero */
4521 for (i = 0; i < dev_priv->num_fence_regs; i++) {
4522 struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
4523
4524 fence->i915 = dev_priv;
4525 fence->id = i;
4526 list_add_tail(&fence->link, &dev_priv->mm.fence_list);
4527 }
4520 i915_gem_restore_fences(dev); 4528 i915_gem_restore_fences(dev);
4521 4529
4522 i915_gem_detect_bit_6_swizzle(dev); 4530 i915_gem_detect_bit_6_swizzle(dev);
@@ -4552,8 +4560,6 @@ i915_gem_load_init(struct drm_device *dev)
4552 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4560 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4553 for (i = 0; i < I915_NUM_ENGINES; i++) 4561 for (i = 0; i < I915_NUM_ENGINES; i++)
4554 init_engine_lists(&dev_priv->engine[i]); 4562 init_engine_lists(&dev_priv->engine[i]);
4555 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
4556 INIT_LIST_HEAD(&dev_priv->fence_regs[i].link);
4557 INIT_DELAYED_WORK(&dev_priv->gt.retire_work, 4563 INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
4558 i915_gem_retire_work_handler); 4564 i915_gem_retire_work_handler);
4559 INIT_DELAYED_WORK(&dev_priv->gt.idle_work, 4565 INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
@@ -4563,8 +4569,6 @@ i915_gem_load_init(struct drm_device *dev)
4563 4569
4564 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 4570 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
4565 4571
4566 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4567
4568 init_waitqueue_head(&dev_priv->pending_flip_queue); 4572 init_waitqueue_head(&dev_priv->pending_flip_queue);
4569 4573
4570 dev_priv->mm.interruptible = true; 4574 dev_priv->mm.interruptible = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 58cebafa8348..907386630e26 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -250,7 +250,6 @@ static void
250i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) 250i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
251{ 251{
252 struct drm_i915_gem_exec_object2 *entry; 252 struct drm_i915_gem_exec_object2 *entry;
253 struct drm_i915_gem_object *obj = vma->obj;
254 253
255 if (!drm_mm_node_allocated(&vma->node)) 254 if (!drm_mm_node_allocated(&vma->node))
256 return; 255 return;
@@ -258,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
258 entry = vma->exec_entry; 257 entry = vma->exec_entry;
259 258
260 if (entry->flags & __EXEC_OBJECT_HAS_FENCE) 259 if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
261 i915_gem_object_unpin_fence(obj); 260 i915_vma_unpin_fence(vma);
262 261
263 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 262 if (entry->flags & __EXEC_OBJECT_HAS_PIN)
264 __i915_vma_unpin(vma); 263 __i915_vma_unpin(vma);
@@ -455,7 +454,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
455 if (ret) 454 if (ret)
456 return ERR_PTR(ret); 455 return ERR_PTR(ret);
457 } else { 456 } else {
458 ret = i915_gem_object_put_fence(obj); 457 ret = i915_vma_put_fence(vma);
459 if (ret) { 458 if (ret) {
460 i915_vma_unpin(vma); 459 i915_vma_unpin(vma);
461 return ERR_PTR(ret); 460 return ERR_PTR(ret);
@@ -811,11 +810,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
811 entry->flags |= __EXEC_OBJECT_HAS_PIN; 810 entry->flags |= __EXEC_OBJECT_HAS_PIN;
812 811
813 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 812 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
814 ret = i915_gem_object_get_fence(obj); 813 ret = i915_vma_get_fence(vma);
815 if (ret) 814 if (ret)
816 return ret; 815 return ret;
817 816
818 if (i915_gem_object_pin_fence(obj)) 817 if (i915_vma_pin_fence(vma))
819 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 818 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
820 } 819 }
821 820
@@ -1305,15 +1304,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1305 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; 1304 obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1306 } 1305 }
1307 1306
1308 if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1307 if (flags & EXEC_OBJECT_NEEDS_FENCE)
1309 i915_gem_active_set(&obj->last_fence, req); 1308 i915_gem_active_set(&vma->last_fence, req);
1310 if (flags & __EXEC_OBJECT_HAS_FENCE) {
1311 struct drm_i915_private *dev_priv = req->i915;
1312
1313 list_move_tail(&dev_priv->fence_regs[obj->fence_reg].link,
1314 &dev_priv->mm.fence_list);
1315 }
1316 }
1317 1309
1318 i915_vma_set_active(vma, idx); 1310 i915_vma_set_active(vma, idx);
1319 i915_gem_active_set(&vma->last_read[idx], req); 1311 i915_gem_active_set(&vma->last_read[idx], req);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index 1b32351aee42..dfe0a1a5e584 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -55,74 +55,73 @@
55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. 55 * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed.
56 */ 56 */
57 57
58static void i965_write_fence_reg(struct drm_device *dev, int reg, 58#define pipelined 0
59 struct drm_i915_gem_object *obj) 59
60static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
61 struct i915_vma *vma)
60{ 62{
61 struct drm_i915_private *dev_priv = to_i915(dev);
62 i915_reg_t fence_reg_lo, fence_reg_hi; 63 i915_reg_t fence_reg_lo, fence_reg_hi;
63 int fence_pitch_shift; 64 int fence_pitch_shift;
65 u64 val;
64 66
65 if (INTEL_INFO(dev)->gen >= 6) { 67 if (INTEL_INFO(fence->i915)->gen >= 6) {
66 fence_reg_lo = FENCE_REG_GEN6_LO(reg); 68 fence_reg_lo = FENCE_REG_GEN6_LO(fence->id);
67 fence_reg_hi = FENCE_REG_GEN6_HI(reg); 69 fence_reg_hi = FENCE_REG_GEN6_HI(fence->id);
68 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; 70 fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT;
71
69 } else { 72 } else {
70 fence_reg_lo = FENCE_REG_965_LO(reg); 73 fence_reg_lo = FENCE_REG_965_LO(fence->id);
71 fence_reg_hi = FENCE_REG_965_HI(reg); 74 fence_reg_hi = FENCE_REG_965_HI(fence->id);
72 fence_pitch_shift = I965_FENCE_PITCH_SHIFT; 75 fence_pitch_shift = I965_FENCE_PITCH_SHIFT;
73 } 76 }
74 77
75 /* To w/a incoherency with non-atomic 64-bit register updates, 78 val = 0;
76 * we split the 64-bit update into two 32-bit writes. In order 79 if (vma) {
77 * for a partial fence not to be evaluated between writes, we 80 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
78 * precede the update with write to turn off the fence register, 81 bool is_y_tiled = tiling == I915_TILING_Y;
79 * and only enable the fence as the last step. 82 unsigned int stride = i915_gem_object_get_stride(vma->obj);
80 * 83 u32 row_size = stride * (is_y_tiled ? 32 : 8);
81 * For extra levels of paranoia, we make sure each step lands 84 u32 size = rounddown((u32)vma->node.size, row_size);
82 * before applying the next step.
83 */
84 I915_WRITE(fence_reg_lo, 0);
85 POSTING_READ(fence_reg_lo);
86
87 if (obj) {
88 struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL);
89 unsigned int tiling = i915_gem_object_get_tiling(obj);
90 unsigned int stride = i915_gem_object_get_stride(obj);
91 u32 size = vma->node.size;
92 u32 row_size = stride * (tiling == I915_TILING_Y ? 32 : 8);
93 u64 val;
94
95 /* Adjust fence size to match tiled area */
96 size = rounddown(size, row_size);
97 85
98 val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; 86 val = ((vma->node.start + size - 4096) & 0xfffff000) << 32;
99 val |= vma->node.start & 0xfffff000; 87 val |= vma->node.start & 0xfffff000;
100 val |= (u64)((stride / 128) - 1) << fence_pitch_shift; 88 val |= (u64)((stride / 128) - 1) << fence_pitch_shift;
101 if (tiling == I915_TILING_Y) 89 if (is_y_tiled)
102 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 90 val |= BIT(I965_FENCE_TILING_Y_SHIFT);
103 val |= I965_FENCE_REG_VALID; 91 val |= I965_FENCE_REG_VALID;
92 }
104 93
105 I915_WRITE(fence_reg_hi, val >> 32); 94 if (!pipelined) {
106 POSTING_READ(fence_reg_hi); 95 struct drm_i915_private *dev_priv = fence->i915;
107 96
108 I915_WRITE(fence_reg_lo, val); 97 /* To w/a incoherency with non-atomic 64-bit register updates,
98 * we split the 64-bit update into two 32-bit writes. In order
99 * for a partial fence not to be evaluated between writes, we
100 * precede the update with write to turn off the fence register,
101 * and only enable the fence as the last step.
102 *
103 * For extra levels of paranoia, we make sure each step lands
104 * before applying the next step.
105 */
106 I915_WRITE(fence_reg_lo, 0);
107 POSTING_READ(fence_reg_lo);
108
109 I915_WRITE(fence_reg_hi, upper_32_bits(val));
110 I915_WRITE(fence_reg_lo, lower_32_bits(val));
109 POSTING_READ(fence_reg_lo); 111 POSTING_READ(fence_reg_lo);
110 } else {
111 I915_WRITE(fence_reg_hi, 0);
112 POSTING_READ(fence_reg_hi);
113 } 112 }
114} 113}
115 114
116static void i915_write_fence_reg(struct drm_device *dev, int reg, 115static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
117 struct drm_i915_gem_object *obj) 116 struct i915_vma *vma)
118{ 117{
119 struct drm_i915_private *dev_priv = to_i915(dev);
120 u32 val; 118 u32 val;
121 119
122 if (obj) { 120 val = 0;
123 struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL); 121 if (vma) {
124 unsigned int tiling = i915_gem_object_get_tiling(obj); 122 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
125 unsigned int stride = i915_gem_object_get_stride(obj); 123 bool is_y_tiled = tiling == I915_TILING_Y;
124 unsigned int stride = i915_gem_object_get_stride(vma->obj);
126 int pitch_val; 125 int pitch_val;
127 int tile_width; 126 int tile_width;
128 127
@@ -134,7 +133,7 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
134 i915_vma_is_map_and_fenceable(vma), 133 i915_vma_is_map_and_fenceable(vma),
135 vma->node.size); 134 vma->node.size);
136 135
137 if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 136 if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915))
138 tile_width = 128; 137 tile_width = 128;
139 else 138 else
140 tile_width = 512; 139 tile_width = 512;
@@ -144,28 +143,32 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
144 pitch_val = ffs(pitch_val) - 1; 143 pitch_val = ffs(pitch_val) - 1;
145 144
146 val = vma->node.start; 145 val = vma->node.start;
147 if (tiling == I915_TILING_Y) 146 if (is_y_tiled)
148 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 147 val |= BIT(I830_FENCE_TILING_Y_SHIFT);
149 val |= I915_FENCE_SIZE_BITS(vma->node.size); 148 val |= I915_FENCE_SIZE_BITS(vma->node.size);
150 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 149 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
151 val |= I830_FENCE_REG_VALID; 150 val |= I830_FENCE_REG_VALID;
152 } else 151 }
153 val = 0;
154 152
155 I915_WRITE(FENCE_REG(reg), val); 153 if (!pipelined) {
156 POSTING_READ(FENCE_REG(reg)); 154 struct drm_i915_private *dev_priv = fence->i915;
155 i915_reg_t reg = FENCE_REG(fence->id);
156
157 I915_WRITE(reg, val);
158 POSTING_READ(reg);
159 }
157} 160}
158 161
159static void i830_write_fence_reg(struct drm_device *dev, int reg, 162static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
160 struct drm_i915_gem_object *obj) 163 struct i915_vma *vma)
161{ 164{
162 struct drm_i915_private *dev_priv = to_i915(dev);
163 u32 val; 165 u32 val;
164 166
165 if (obj) { 167 val = 0;
166 struct i915_vma *vma = i915_gem_object_to_ggtt(obj, NULL); 168 if (vma) {
167 unsigned int tiling = i915_gem_object_get_tiling(obj); 169 unsigned int tiling = i915_gem_object_get_tiling(vma->obj);
168 unsigned int stride = i915_gem_object_get_stride(obj); 170 bool is_y_tiled = tiling == I915_TILING_Y;
171 unsigned int stride = i915_gem_object_get_stride(vma->obj);
169 u32 pitch_val; 172 u32 pitch_val;
170 173
171 WARN((vma->node.start & ~I830_FENCE_START_MASK) || 174 WARN((vma->node.start & ~I830_FENCE_START_MASK) ||
@@ -178,104 +181,102 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
178 pitch_val = ffs(pitch_val) - 1; 181 pitch_val = ffs(pitch_val) - 1;
179 182
180 val = vma->node.start; 183 val = vma->node.start;
181 if (tiling == I915_TILING_Y) 184 if (is_y_tiled)
182 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 185 val |= BIT(I830_FENCE_TILING_Y_SHIFT);
183 val |= I830_FENCE_SIZE_BITS(vma->node.size); 186 val |= I830_FENCE_SIZE_BITS(vma->node.size);
184 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 187 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
185 val |= I830_FENCE_REG_VALID; 188 val |= I830_FENCE_REG_VALID;
186 } else 189 }
187 val = 0;
188 190
189 I915_WRITE(FENCE_REG(reg), val); 191 if (!pipelined) {
190 POSTING_READ(FENCE_REG(reg)); 192 struct drm_i915_private *dev_priv = fence->i915;
191} 193 i915_reg_t reg = FENCE_REG(fence->id);
192 194
193inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) 195 I915_WRITE(reg, val);
194{ 196 POSTING_READ(reg);
195 return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; 197 }
196} 198}
197 199
198static void i915_gem_write_fence(struct drm_device *dev, int reg, 200static void fence_write(struct drm_i915_fence_reg *fence,
199 struct drm_i915_gem_object *obj) 201 struct i915_vma *vma)
200{ 202{
201 struct drm_i915_private *dev_priv = to_i915(dev); 203 /* Previous access through the fence register is marshalled by
202 204 * the mb() inside the fault handlers (i915_gem_release_mmaps)
203 /* Ensure that all CPU reads are completed before installing a fence 205 * and explicitly managed for internal users.
204 * and all writes before removing the fence.
205 */ 206 */
206 if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) 207
207 mb(); 208 if (IS_GEN2(fence->i915))
208 209 i830_write_fence_reg(fence, vma);
209 WARN(obj && 210 else if (IS_GEN3(fence->i915))
210 (!i915_gem_object_get_stride(obj) || 211 i915_write_fence_reg(fence, vma);
211 !i915_gem_object_get_tiling(obj)), 212 else
212 "bogus fence setup with stride: 0x%x, tiling mode: %i\n", 213 i965_write_fence_reg(fence, vma);
213 i915_gem_object_get_stride(obj), 214
214 i915_gem_object_get_tiling(obj)); 215 /* Access through the fenced region afterwards is
215 216 * ordered by the posting reads whilst writing the registers.
216 if (IS_GEN2(dev))
217 i830_write_fence_reg(dev, reg, obj);
218 else if (IS_GEN3(dev))
219 i915_write_fence_reg(dev, reg, obj);
220 else if (INTEL_INFO(dev)->gen >= 4)
221 i965_write_fence_reg(dev, reg, obj);
222
223 /* And similarly be paranoid that no direct access to this region
224 * is reordered to before the fence is installed.
225 */ 217 */
226 if (i915_gem_object_needs_mb(obj))
227 mb();
228}
229 218
230static inline int fence_number(struct drm_i915_private *dev_priv, 219 fence->dirty = false;
231 struct drm_i915_fence_reg *fence)
232{
233 return fence - dev_priv->fence_regs;
234} 220}
235 221
236static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, 222static int fence_update(struct drm_i915_fence_reg *fence,
237 struct drm_i915_fence_reg *fence, 223 struct i915_vma *vma)
238 bool enable)
239{ 224{
240 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 225 int ret;
241 int reg = fence_number(dev_priv, fence);
242 226
243 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); 227 if (vma) {
228 if (!i915_vma_is_map_and_fenceable(vma))
229 return -EINVAL;
244 230
245 if (enable) { 231 if (WARN(!i915_gem_object_get_stride(vma->obj) ||
246 obj->fence_reg = reg; 232 !i915_gem_object_get_tiling(vma->obj),
247 fence->obj = obj; 233 "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
248 list_move_tail(&fence->link, &dev_priv->mm.fence_list); 234 i915_gem_object_get_stride(vma->obj),
249 } else { 235 i915_gem_object_get_tiling(vma->obj)))
250 obj->fence_reg = I915_FENCE_REG_NONE; 236 return -EINVAL;
251 fence->obj = NULL; 237
252 list_del_init(&fence->link); 238 ret = i915_gem_active_retire(&vma->last_fence,
239 &vma->obj->base.dev->struct_mutex);
240 if (ret)
241 return ret;
253 } 242 }
254 obj->fence_dirty = false;
255}
256 243
257static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 244 if (fence->vma) {
258{ 245 ret = i915_gem_active_retire(&fence->vma->last_fence,
259 if (i915_gem_object_is_tiled(obj)) 246 &fence->vma->obj->base.dev->struct_mutex);
260 i915_gem_release_mmap(obj); 247 if (ret)
248 return ret;
249 }
261 250
262 /* As we do not have an associated fence register, we will force 251 if (fence->vma && fence->vma != vma) {
263 * a tiling change if we ever need to acquire one. 252 /* Ensure that all userspace CPU access is completed before
264 */ 253 * stealing the fence.
265 obj->fence_dirty = false; 254 */
266 obj->fence_reg = I915_FENCE_REG_NONE; 255 i915_gem_release_mmap(fence->vma->obj);
267}
268 256
269static int 257 fence->vma->fence = NULL;
270i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) 258 fence->vma = NULL;
271{ 259
272 return i915_gem_active_retire(&obj->last_fence, 260 list_move(&fence->link, &fence->i915->mm.fence_list);
273 &obj->base.dev->struct_mutex); 261 }
262
263 fence_write(fence, vma);
264
265 if (vma) {
266 if (fence->vma != vma) {
267 vma->fence = fence;
268 fence->vma = vma;
269 }
270
271 list_move_tail(&fence->link, &fence->i915->mm.fence_list);
272 }
273
274 return 0;
274} 275}
275 276
276/** 277/**
277 * i915_gem_object_put_fence - force-remove fence for an object 278 * i915_vma_put_fence - force-remove fence for a VMA
278 * @obj: object to map through a fence reg 279 * @vma: vma to map linearly (not through a fence reg)
279 * 280 *
280 * This function force-removes any fence from the given object, which is useful 281 * This function force-removes any fence from the given object, which is useful
281 * if the kernel wants to do untiled GTT access. 282 * if the kernel wants to do untiled GTT access.
@@ -285,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
285 * 0 on success, negative error code on failure. 286 * 0 on success, negative error code on failure.
286 */ 287 */
287int 288int
288i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 289i915_vma_put_fence(struct i915_vma *vma)
289{ 290{
290 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 291 struct drm_i915_fence_reg *fence = vma->fence;
291 struct drm_i915_fence_reg *fence;
292 int ret;
293
294 ret = i915_gem_object_wait_fence(obj);
295 if (ret)
296 return ret;
297 292
298 if (obj->fence_reg == I915_FENCE_REG_NONE) 293 if (!fence)
299 return 0; 294 return 0;
300 295
301 fence = &dev_priv->fence_regs[obj->fence_reg];
302
303 if (fence->pin_count) 296 if (fence->pin_count)
304 return -EBUSY; 297 return -EBUSY;
305 298
306 i915_gem_object_fence_lost(obj); 299 return fence_update(fence, NULL);
307 i915_gem_object_update_fence(obj, fence, false);
308
309 return 0;
310} 300}
311 301
312static struct drm_i915_fence_reg * 302static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
313i915_find_fence_reg(struct drm_device *dev)
314{ 303{
315 struct drm_i915_private *dev_priv = to_i915(dev); 304 struct drm_i915_fence_reg *fence;
316 struct drm_i915_fence_reg *reg, *avail;
317 int i;
318
319 /* First try to find a free reg */
320 avail = NULL;
321 for (i = 0; i < dev_priv->num_fence_regs; i++) {
322 reg = &dev_priv->fence_regs[i];
323 if (!reg->obj)
324 return reg;
325
326 if (!reg->pin_count)
327 avail = reg;
328 }
329
330 if (avail == NULL)
331 goto deadlock;
332 305
333 /* None available, try to steal one or wait for a user to finish */ 306 list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
334 list_for_each_entry(reg, &dev_priv->mm.fence_list, link) { 307 if (fence->pin_count)
335 if (reg->pin_count)
336 continue; 308 continue;
337 309
338 return reg; 310 return fence;
339 } 311 }
340 312
341deadlock:
342 /* Wait for completion of pending flips which consume fences */ 313 /* Wait for completion of pending flips which consume fences */
343 if (intel_has_pending_fb_unpin(dev)) 314 if (intel_has_pending_fb_unpin(&dev_priv->drm))
344 return ERR_PTR(-EAGAIN); 315 return ERR_PTR(-EAGAIN);
345 316
346 return ERR_PTR(-EDEADLK); 317 return ERR_PTR(-EDEADLK);
347} 318}
348 319
349/** 320/**
350 * i915_gem_object_get_fence - set up fencing for an object 321 * i915_vma_get_fence - set up fencing for a vma
351 * @obj: object to map through a fence reg 322 * @vma: vma to map through a fence reg
352 * 323 *
353 * When mapping objects through the GTT, userspace wants to be able to write 324 * When mapping objects through the GTT, userspace wants to be able to write
354 * to them without having to worry about swizzling if the object is tiled. 325 * to them without having to worry about swizzling if the object is tiled.
@@ -365,93 +336,27 @@ deadlock:
365 * 0 on success, negative error code on failure. 336 * 0 on success, negative error code on failure.
366 */ 337 */
367int 338int
368i915_gem_object_get_fence(struct drm_i915_gem_object *obj) 339i915_vma_get_fence(struct i915_vma *vma)
369{ 340{
370 struct drm_device *dev = obj->base.dev; 341 struct drm_i915_fence_reg *fence;
371 struct drm_i915_private *dev_priv = to_i915(dev); 342 struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
372 bool enable = i915_gem_object_is_tiled(obj);
373 struct drm_i915_fence_reg *reg;
374 int ret;
375
376 /* Have we updated the tiling parameters upon the object and so
377 * will need to serialise the write to the associated fence register?
378 */
379 if (obj->fence_dirty) {
380 ret = i915_gem_object_wait_fence(obj);
381 if (ret)
382 return ret;
383 }
384 343
385 /* Just update our place in the LRU if our fence is getting reused. */ 344 /* Just update our place in the LRU if our fence is getting reused. */
386 if (obj->fence_reg != I915_FENCE_REG_NONE) { 345 if (vma->fence) {
387 reg = &dev_priv->fence_regs[obj->fence_reg]; 346 fence = vma->fence;
388 if (!obj->fence_dirty) { 347 if (!fence->dirty) {
389 list_move_tail(&reg->link, &dev_priv->mm.fence_list); 348 list_move_tail(&fence->link,
349 &fence->i915->mm.fence_list);
390 return 0; 350 return 0;
391 } 351 }
392 } else if (enable) { 352 } else if (set) {
393 reg = i915_find_fence_reg(dev); 353 fence = fence_find(to_i915(vma->vm->dev));
394 if (IS_ERR(reg)) 354 if (IS_ERR(fence))
395 return PTR_ERR(reg); 355 return PTR_ERR(fence);
396
397 if (reg->obj) {
398 struct drm_i915_gem_object *old = reg->obj;
399
400 ret = i915_gem_object_wait_fence(old);
401 if (ret)
402 return ret;
403
404 i915_gem_object_fence_lost(old);
405 }
406 } else 356 } else
407 return 0; 357 return 0;
408 358
409 i915_gem_object_update_fence(obj, reg, enable); 359 return fence_update(fence, set);
410
411 return 0;
412}
413
414/**
415 * i915_gem_object_pin_fence - pin fencing state
416 * @obj: object to pin fencing for
417 *
418 * This pins the fencing state (whether tiled or untiled) to make sure the
419 * object is ready to be used as a scanout target. Fencing status must be
420 * synchronize first by calling i915_gem_object_get_fence():
421 *
422 * The resulting fence pin reference must be released again with
423 * i915_gem_object_unpin_fence().
424 *
425 * Returns:
426 *
427 * True if the object has a fence, false otherwise.
428 */
429bool
430i915_gem_object_pin_fence(struct drm_i915_gem_object *obj)
431{
432 if (obj->fence_reg != I915_FENCE_REG_NONE) {
433 to_i915(obj->base.dev)->fence_regs[obj->fence_reg].pin_count++;
434 return true;
435 } else
436 return false;
437}
438
439/**
440 * i915_gem_object_unpin_fence - unpin fencing state
441 * @obj: object to unpin fencing for
442 *
443 * This releases the fence pin reference acquired through
444 * i915_gem_object_pin_fence. It will handle both objects with and without an
445 * attached fence correctly, callers do not need to distinguish this.
446 */
447void
448i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj)
449{
450 if (obj->fence_reg != I915_FENCE_REG_NONE) {
451 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
452 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0);
453 dev_priv->fence_regs[obj->fence_reg].pin_count--;
454 }
455} 360}
456 361
457/** 362/**
@@ -473,12 +378,7 @@ void i915_gem_restore_fences(struct drm_device *dev)
473 * Commit delayed tiling changes if we have an object still 378 * Commit delayed tiling changes if we have an object still
474 * attached to the fence, otherwise just clear the fence. 379 * attached to the fence, otherwise just clear the fence.
475 */ 380 */
476 if (reg->obj) { 381 fence_write(reg, reg->vma);
477 i915_gem_object_update_fence(reg->obj, reg,
478 i915_gem_object_get_tiling(reg->obj));
479 } else {
480 i915_gem_write_fence(dev, i, NULL);
481 }
482 } 382 }
483} 383}
484 384
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e31f98df26f6..a18363a0d8c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3322,6 +3322,7 @@ void i915_vma_destroy(struct i915_vma *vma)
3322 GEM_BUG_ON(vma->node.allocated); 3322 GEM_BUG_ON(vma->node.allocated);
3323 GEM_BUG_ON(i915_vma_is_active(vma)); 3323 GEM_BUG_ON(i915_vma_is_active(vma));
3324 GEM_BUG_ON(!i915_vma_is_closed(vma)); 3324 GEM_BUG_ON(!i915_vma_is_closed(vma));
3325 GEM_BUG_ON(vma->fence);
3325 3326
3326 list_del(&vma->vm_link); 3327 list_del(&vma->vm_link);
3327 if (!i915_vma_is_ggtt(vma)) 3328 if (!i915_vma_is_ggtt(vma))
@@ -3357,6 +3358,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
3357 INIT_LIST_HEAD(&vma->exec_list); 3358 INIT_LIST_HEAD(&vma->exec_list);
3358 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) 3359 for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
3359 init_request_active(&vma->last_read[i], i915_vma_retire); 3360 init_request_active(&vma->last_read[i], i915_vma_retire);
3361 init_request_active(&vma->last_fence, NULL);
3360 list_add(&vma->vm_link, &vm->unbound_list); 3362 list_add(&vma->vm_link, &vm->unbound_list);
3361 vma->vm = vm; 3363 vma->vm = vm;
3362 vma->obj = obj; 3364 vma->obj = obj;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d7ff78b46266..c88af2ab5538 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,7 +38,13 @@
38 38
39#include "i915_gem_request.h" 39#include "i915_gem_request.h"
40 40
41#define I915_FENCE_REG_NONE -1
42#define I915_MAX_NUM_FENCES 32
43/* 32 fences + sign bit for FENCE_REG_NONE */
44#define I915_MAX_NUM_FENCE_BITS 6
45
41struct drm_i915_file_private; 46struct drm_i915_file_private;
47struct drm_i915_fence_reg;
42 48
43typedef uint32_t gen6_pte_t; 49typedef uint32_t gen6_pte_t;
44typedef uint64_t gen8_pte_t; 50typedef uint64_t gen8_pte_t;
@@ -174,6 +180,7 @@ struct i915_vma {
174 struct drm_mm_node node; 180 struct drm_mm_node node;
175 struct drm_i915_gem_object *obj; 181 struct drm_i915_gem_object *obj;
176 struct i915_address_space *vm; 182 struct i915_address_space *vm;
183 struct drm_i915_fence_reg *fence;
177 struct sg_table *pages; 184 struct sg_table *pages;
178 void __iomem *iomap; 185 void __iomem *iomap;
179 u64 size; 186 u64 size;
@@ -203,6 +210,7 @@ struct i915_vma {
203 210
204 unsigned int active; 211 unsigned int active;
205 struct i915_gem_active last_read[I915_NUM_ENGINES]; 212 struct i915_gem_active last_read[I915_NUM_ENGINES];
213 struct i915_gem_active last_fence;
206 214
207 /** 215 /**
208 * Support different GGTT views into the same object. 216 * Support different GGTT views into the same object.
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af70d4460a9e..a14b1e3d4c78 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -116,13 +116,39 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
116 return true; 116 return true;
117} 117}
118 118
119static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode)
120{
121 struct drm_i915_private *dev_priv = to_i915(vma->vm->dev);
122 u32 size;
123
124 if (!i915_vma_is_map_and_fenceable(vma))
125 return true;
126
127 if (INTEL_GEN(dev_priv) == 3) {
128 if (vma->node.start & ~I915_FENCE_START_MASK)
129 return false;
130 } else {
131 if (vma->node.start & ~I830_FENCE_START_MASK)
132 return false;
133 }
134
135 size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
136 if (vma->node.size < size)
137 return false;
138
139 if (vma->node.start & (size - 1))
140 return false;
141
142 return true;
143}
144
119/* Make the current GTT allocation valid for the change in tiling. */ 145/* Make the current GTT allocation valid for the change in tiling. */
120static int 146static int
121i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode) 147i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
122{ 148{
123 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 149 struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
124 struct i915_vma *vma; 150 struct i915_vma *vma;
125 u32 size; 151 int ret;
126 152
127 if (tiling_mode == I915_TILING_NONE) 153 if (tiling_mode == I915_TILING_NONE)
128 return 0; 154 return 0;
@@ -130,32 +156,16 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode)
130 if (INTEL_GEN(dev_priv) >= 4) 156 if (INTEL_GEN(dev_priv) >= 4)
131 return 0; 157 return 0;
132 158
133 vma = i915_gem_object_to_ggtt(obj, NULL); 159 list_for_each_entry(vma, &obj->vma_list, obj_link) {
134 if (!vma) 160 if (i915_vma_fence_prepare(vma, tiling_mode))
135 return 0; 161 continue;
136
137 if (!i915_vma_is_map_and_fenceable(vma))
138 return 0;
139 162
140 if (IS_GEN3(dev_priv)) { 163 ret = i915_vma_unbind(vma);
141 if (vma->node.start & ~I915_FENCE_START_MASK) 164 if (ret)
142 goto bad; 165 return ret;
143 } else {
144 if (vma->node.start & ~I830_FENCE_START_MASK)
145 goto bad;
146 } 166 }
147 167
148 size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode);
149 if (vma->node.size < size)
150 goto bad;
151
152 if (vma->node.start & (size - 1))
153 goto bad;
154
155 return 0; 168 return 0;
156
157bad:
158 return i915_vma_unbind(vma);
159} 169}
160 170
161/** 171/**
@@ -248,6 +258,8 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
248 258
249 err = i915_gem_object_fence_prepare(obj, args->tiling_mode); 259 err = i915_gem_object_fence_prepare(obj, args->tiling_mode);
250 if (!err) { 260 if (!err) {
261 struct i915_vma *vma;
262
251 if (obj->pages && 263 if (obj->pages &&
252 obj->madv == I915_MADV_WILLNEED && 264 obj->madv == I915_MADV_WILLNEED &&
253 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { 265 dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
@@ -257,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
257 i915_gem_object_pin_pages(obj); 269 i915_gem_object_pin_pages(obj);
258 } 270 }
259 271
260 obj->fence_dirty = 272 list_for_each_entry(vma, &obj->vma_list, obj_link) {
261 !i915_gem_active_is_idle(&obj->last_fence, 273 if (!vma->fence)
262 &dev->struct_mutex) || 274 continue;
263 obj->fence_reg != I915_FENCE_REG_NONE;
264 275
276 vma->fence->dirty = true;
277 }
265 obj->tiling_and_stride = 278 obj->tiling_and_stride =
266 args->stride | args->tiling_mode; 279 args->stride | args->tiling_mode;
267 280
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 0c3f30ce85c3..84dd5bc06db3 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -797,7 +797,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
797 err->gtt_offset = vma->node.start; 797 err->gtt_offset = vma->node.start;
798 err->read_domains = obj->base.read_domains; 798 err->read_domains = obj->base.read_domains;
799 err->write_domain = obj->base.write_domain; 799 err->write_domain = obj->base.write_domain;
800 err->fence_reg = obj->fence_reg; 800 err->fence_reg = vma->fence ? vma->fence->id : -1;
801 err->tiling = i915_gem_object_get_tiling(obj); 801 err->tiling = i915_gem_object_get_tiling(obj);
802 err->dirty = obj->dirty; 802 err->dirty = obj->dirty;
803 err->purgeable = obj->madv != I915_MADV_WILLNEED; 803 err->purgeable = obj->madv != I915_MADV_WILLNEED;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04a8900f68c1..c81c89adaff3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2188,7 +2188,6 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
2188 struct i915_ggtt_view view; 2188 struct i915_ggtt_view view;
2189 struct i915_vma *vma; 2189 struct i915_vma *vma;
2190 u32 alignment; 2190 u32 alignment;
2191 int ret;
2192 2191
2193 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 2192 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
2194 2193
@@ -2214,43 +2213,33 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
2214 intel_runtime_pm_get(dev_priv); 2213 intel_runtime_pm_get(dev_priv);
2215 2214
2216 vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); 2215 vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view);
2217 if (IS_ERR(vma)) { 2216 if (IS_ERR(vma))
2218 ret = PTR_ERR(vma); 2217 goto err;
2219 goto err_pm;
2220 }
2221 2218
2222 /* Install a fence for tiled scan-out. Pre-i965 always needs a
2223 * fence, whereas 965+ only requires a fence if using
2224 * framebuffer compression. For simplicity, we always install
2225 * a fence as the cost is not that onerous.
2226 */
2227 if (i915_vma_is_map_and_fenceable(vma)) { 2219 if (i915_vma_is_map_and_fenceable(vma)) {
2228 ret = i915_gem_object_get_fence(obj); 2220 /* Install a fence for tiled scan-out. Pre-i965 always needs a
2229 if (ret == -EDEADLK) { 2221 * fence, whereas 965+ only requires a fence if using
2230 /* 2222 * framebuffer compression. For simplicity, we always, when
2231 * -EDEADLK means there are no free fences 2223 * possible, install a fence as the cost is not that onerous.
2232 * no pending flips. 2224 *
2233 * 2225 * If we fail to fence the tiled scanout, then either the
2234 * This is propagated to atomic, but it uses 2226 * modeset will reject the change (which is highly unlikely as
2235 * -EDEADLK to force a locking recovery, so 2227 * the affected systems, all but one, do not have unmappable
2236 * change the returned error to -EBUSY. 2228 * space) or we will not be able to enable full powersaving
2237 */ 2229 * techniques (also likely not to apply due to various limits
2238 ret = -EBUSY; 2230 * FBC and the like impose on the size of the buffer, which
2239 goto err_unpin; 2231 * presumably we violated anyway with this unmappable buffer).
2240 } else if (ret) 2232 * Anyway, it is presumably better to stumble onwards with
2241 goto err_unpin; 2233 * something and try to run the system in a "less than optimal"
2242 2234 * mode that matches the user configuration.
2243 i915_gem_object_pin_fence(obj); 2235 */
2236 if (i915_vma_get_fence(vma) == 0)
2237 i915_vma_pin_fence(vma);
2244 } 2238 }
2245 2239
2240err:
2246 intel_runtime_pm_put(dev_priv); 2241 intel_runtime_pm_put(dev_priv);
2247 return vma; 2242 return vma;
2248
2249err_unpin:
2250 i915_gem_object_unpin_from_display_plane(vma);
2251err_pm:
2252 intel_runtime_pm_put(dev_priv);
2253 return ERR_PTR(ret);
2254} 2243}
2255 2244
2256void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) 2245void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
@@ -2264,9 +2253,7 @@ void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation)
2264 intel_fill_fb_ggtt_view(&view, fb, rotation); 2253 intel_fill_fb_ggtt_view(&view, fb, rotation);
2265 vma = i915_gem_object_to_ggtt(obj, &view); 2254 vma = i915_gem_object_to_ggtt(obj, &view);
2266 2255
2267 if (i915_vma_is_map_and_fenceable(vma)) 2256 i915_vma_unpin_fence(vma);
2268 i915_gem_object_unpin_fence(obj);
2269
2270 i915_gem_object_unpin_from_display_plane(vma); 2257 i915_gem_object_unpin_from_display_plane(vma);
2271} 2258}
2272 2259
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index e122052c4081..40bf2e4c804d 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -709,6 +709,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
709 return effective_w <= max_w && effective_h <= max_h; 709 return effective_w <= max_w && effective_h <= max_h;
710} 710}
711 711
712/* XXX replace me when we have VMA tracking for intel_plane_state */
713static int get_fence_id(struct drm_framebuffer *fb)
714{
715 struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL);
716
717 return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE;
718}
719
712static void intel_fbc_update_state_cache(struct intel_crtc *crtc, 720static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
713 struct intel_crtc_state *crtc_state, 721 struct intel_crtc_state *crtc_state,
714 struct intel_plane_state *plane_state) 722 struct intel_plane_state *plane_state)
@@ -740,7 +748,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc,
740 cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL); 748 cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL);
741 cache->fb.pixel_format = fb->pixel_format; 749 cache->fb.pixel_format = fb->pixel_format;
742 cache->fb.stride = fb->pitches[0]; 750 cache->fb.stride = fb->pitches[0];
743 cache->fb.fence_reg = obj->fence_reg; 751 cache->fb.fence_reg = get_fence_id(fb);
744 cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); 752 cache->fb.tiling_mode = i915_gem_object_get_tiling(obj);
745} 753}
746 754
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 72f8990a13d2..3cf8d02064a8 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -760,7 +760,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
760 if (IS_ERR(vma)) 760 if (IS_ERR(vma))
761 return PTR_ERR(vma); 761 return PTR_ERR(vma);
762 762
763 ret = i915_gem_object_put_fence(new_bo); 763 ret = i915_vma_put_fence(vma);
764 if (ret) 764 if (ret)
765 goto out_unpin; 765 goto out_unpin;
766 766