aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-11-10 11:40:20 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2010-12-02 05:07:05 -0500
commitd9e86c0ee60f323e890484628f351bf50fa9a15d (patch)
tree4755a8ec484340bc12e2d1f88f35fbd266df6001
parent87ca9c8a7ea9c8c7ce1561edaad1aa8570f1a01e (diff)
drm/i915: Pipelined fencing [infrastructure]
With this change, every batchbuffer can use all available fences (save pinned and scanout, of course) without ever stalling the gpu! In theory. Currently the actual pipelined update of the register is disabled due to some stability issues. However, just the deferred update is a significant win. Based on a series of patches by Daniel Vetter. The premise is that before every access to a buffer through the GTT we have to declare whether we need a register or not. If the access is by the GPU, a pipelined update to the register is made via the ringbuffer, and we track the last seqno of the batches that access it. If by the CPU we wait for the last GPU access and update the register (either to clear or to set it for the current buffer). One advantage of being able to pipeline changes is that we can defer the actual updating of the fence register until we first need to access the object through the GTT, i.e. we can eliminate the stall on set_tiling. This is important as the userspace bo cache does not track the tiling status of active buffers which generate frequent stalls on gen3 when enabling tiling for an already bound buffer. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h10
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c360
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c23
-rw-r--r--drivers/gpu/drm/i915/intel_display.c17
-rw-r--r--drivers/gpu/drm/i915/intel_overlay.c10
6 files changed, 274 insertions, 166 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7b37c198cb19..af9ff40b135b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -126,6 +126,7 @@ struct drm_i915_master_private {
126struct drm_i915_fence_reg { 126struct drm_i915_fence_reg {
127 struct list_head lru_list; 127 struct list_head lru_list;
128 struct drm_i915_gem_object *obj; 128 struct drm_i915_gem_object *obj;
129 uint32_t setup_seqno;
129}; 130};
130 131
131struct sdvo_device_mapping { 132struct sdvo_device_mapping {
@@ -752,6 +753,7 @@ struct drm_i915_gem_object {
752 * Current tiling mode for the object. 753 * Current tiling mode for the object.
753 */ 754 */
754 unsigned int tiling_mode : 2; 755 unsigned int tiling_mode : 2;
756 unsigned int tiling_changed : 1;
755 757
756 /** How many users have pinned this object in GTT space. The following 758 /** How many users have pinned this object in GTT space. The following
757 * users can each hold at most one reference: pwrite/pread, pin_ioctl 759 * users can each hold at most one reference: pwrite/pread, pin_ioctl
@@ -1121,10 +1123,10 @@ i915_gem_next_request_seqno(struct drm_device *dev,
1121 return ring->outstanding_lazy_request = dev_priv->next_seqno; 1123 return ring->outstanding_lazy_request = dev_priv->next_seqno;
1122} 1124}
1123 1125
1124int __must_check i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj, 1126int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
1125 bool interruptible); 1127 struct intel_ring_buffer *pipelined,
1126int __must_check i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj, 1128 bool interruptible);
1127 bool interruptible); 1129int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
1128 1130
1129void i915_gem_retire_requests(struct drm_device *dev); 1131void i915_gem_retire_requests(struct drm_device *dev);
1130void i915_gem_reset(struct drm_device *dev); 1132void i915_gem_reset(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c3e6d7bda6e1..23d2417a3585 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -47,7 +47,8 @@ static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_obje
47static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 47static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
48 unsigned alignment, 48 unsigned alignment,
49 bool map_and_fenceable); 49 bool map_and_fenceable);
50static void i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj); 50static void i915_gem_clear_fence_reg(struct drm_device *dev,
51 struct drm_i915_fence_reg *reg);
51static int i915_gem_phys_pwrite(struct drm_device *dev, 52static int i915_gem_phys_pwrite(struct drm_device *dev,
52 struct drm_i915_gem_object *obj, 53 struct drm_i915_gem_object *obj,
53 struct drm_i915_gem_pwrite *args, 54 struct drm_i915_gem_pwrite *args,
@@ -684,7 +685,11 @@ i915_gem_gtt_pwrite_slow(struct drm_device *dev,
684 goto out_unpin_pages; 685 goto out_unpin_pages;
685 } 686 }
686 687
687 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 688 ret = i915_gem_object_set_to_gtt_domain(obj, true);
689 if (ret)
690 goto out_unpin_pages;
691
692 ret = i915_gem_object_put_fence(obj);
688 if (ret) 693 if (ret)
689 goto out_unpin_pages; 694 goto out_unpin_pages;
690 695
@@ -966,14 +971,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
966 */ 971 */
967 if (obj->phys_obj) 972 if (obj->phys_obj)
968 ret = i915_gem_phys_pwrite(dev, obj, args, file); 973 ret = i915_gem_phys_pwrite(dev, obj, args, file);
969 else if (obj->tiling_mode == I915_TILING_NONE && 974 else if (obj->gtt_space &&
970 obj->gtt_space &&
971 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 975 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
972 ret = i915_gem_object_pin(obj, 0, true); 976 ret = i915_gem_object_pin(obj, 0, true);
973 if (ret) 977 if (ret)
974 goto out; 978 goto out;
975 979
976 ret = i915_gem_object_set_to_gtt_domain(obj, 1); 980 ret = i915_gem_object_set_to_gtt_domain(obj, true);
981 if (ret)
982 goto out_unpin;
983
984 ret = i915_gem_object_put_fence(obj);
977 if (ret) 985 if (ret)
978 goto out_unpin; 986 goto out_unpin;
979 987
@@ -1205,12 +1213,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1205 if (ret) 1213 if (ret)
1206 goto unlock; 1214 goto unlock;
1207 1215
1208 /* Need a new fence register? */ 1216 if (obj->tiling_mode == I915_TILING_NONE)
1209 if (obj->tiling_mode != I915_TILING_NONE) { 1217 ret = i915_gem_object_put_fence(obj);
1210 ret = i915_gem_object_get_fence_reg(obj, true); 1218 else
1211 if (ret) 1219 ret = i915_gem_object_get_fence(obj, NULL, true);
1212 goto unlock; 1220 if (ret)
1213 } 1221 goto unlock;
1214 1222
1215 if (i915_gem_object_is_inactive(obj)) 1223 if (i915_gem_object_is_inactive(obj))
1216 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 1224 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
@@ -1608,7 +1616,6 @@ i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1608{ 1616{
1609 list_del_init(&obj->ring_list); 1617 list_del_init(&obj->ring_list);
1610 obj->last_rendering_seqno = 0; 1618 obj->last_rendering_seqno = 0;
1611 obj->last_fenced_seqno = 0;
1612} 1619}
1613 1620
1614static void 1621static void
@@ -1640,7 +1647,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1640 1647
1641 i915_gem_object_move_off_active(obj); 1648 i915_gem_object_move_off_active(obj);
1642 obj->fenced_gpu_access = false; 1649 obj->fenced_gpu_access = false;
1643 obj->last_fenced_ring = NULL;
1644 1650
1645 obj->active = 0; 1651 obj->active = 0;
1646 obj->pending_gpu_write = false; 1652 obj->pending_gpu_write = false;
@@ -1803,7 +1809,11 @@ static void i915_gem_reset_fences(struct drm_device *dev)
1803 if (obj->tiling_mode) 1809 if (obj->tiling_mode)
1804 i915_gem_release_mmap(obj); 1810 i915_gem_release_mmap(obj);
1805 1811
1806 i915_gem_clear_fence_reg(obj); 1812 reg->obj->fence_reg = I915_FENCE_REG_NONE;
1813 reg->obj->fenced_gpu_access = false;
1814 reg->obj->last_fenced_seqno = 0;
1815 reg->obj->last_fenced_ring = NULL;
1816 i915_gem_clear_fence_reg(dev, reg);
1807 } 1817 }
1808} 1818}
1809 1819
@@ -2114,8 +2124,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2114 } 2124 }
2115 2125
2116 /* release the fence reg _after_ flushing */ 2126 /* release the fence reg _after_ flushing */
2117 if (obj->fence_reg != I915_FENCE_REG_NONE) 2127 ret = i915_gem_object_put_fence(obj);
2118 i915_gem_clear_fence_reg(obj); 2128 if (ret == -ERESTARTSYS)
2129 return ret;
2119 2130
2120 i915_gem_gtt_unbind_object(obj); 2131 i915_gem_gtt_unbind_object(obj);
2121 i915_gem_object_put_pages_gtt(obj); 2132 i915_gem_object_put_pages_gtt(obj);
@@ -2357,59 +2368,118 @@ static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2357 return 0; 2368 return 0;
2358} 2369}
2359 2370
2360static int i915_find_fence_reg(struct drm_device *dev, 2371static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
2361 bool interruptible) 2372{
2373 return i915_seqno_passed(ring->get_seqno(ring), seqno);
2374}
2375
2376static int
2377i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2378 struct intel_ring_buffer *pipelined,
2379 bool interruptible)
2380{
2381 int ret;
2382
2383 if (obj->fenced_gpu_access) {
2384 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
2385 i915_gem_flush_ring(obj->base.dev,
2386 obj->last_fenced_ring,
2387 0, obj->base.write_domain);
2388
2389 obj->fenced_gpu_access = false;
2390 }
2391
2392 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
2393 if (!ring_passed_seqno(obj->last_fenced_ring,
2394 obj->last_fenced_seqno)) {
2395 ret = i915_do_wait_request(obj->base.dev,
2396 obj->last_fenced_seqno,
2397 interruptible,
2398 obj->last_fenced_ring);
2399 if (ret)
2400 return ret;
2401 }
2402
2403 obj->last_fenced_seqno = 0;
2404 obj->last_fenced_ring = NULL;
2405 }
2406
2407 return 0;
2408}
2409
2410int
2411i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2412{
2413 int ret;
2414
2415 if (obj->tiling_mode)
2416 i915_gem_release_mmap(obj);
2417
2418 ret = i915_gem_object_flush_fence(obj, NULL, true);
2419 if (ret)
2420 return ret;
2421
2422 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2423 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2424 i915_gem_clear_fence_reg(obj->base.dev,
2425 &dev_priv->fence_regs[obj->fence_reg]);
2426
2427 obj->fence_reg = I915_FENCE_REG_NONE;
2428 }
2429
2430 return 0;
2431}
2432
2433static struct drm_i915_fence_reg *
2434i915_find_fence_reg(struct drm_device *dev,
2435 struct intel_ring_buffer *pipelined)
2362{ 2436{
2363 struct drm_i915_private *dev_priv = dev->dev_private; 2437 struct drm_i915_private *dev_priv = dev->dev_private;
2364 struct drm_i915_fence_reg *reg; 2438 struct drm_i915_fence_reg *reg, *first, *avail;
2365 struct drm_i915_gem_object *obj = NULL; 2439 int i;
2366 int i, avail, ret;
2367 2440
2368 /* First try to find a free reg */ 2441 /* First try to find a free reg */
2369 avail = 0; 2442 avail = NULL;
2370 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { 2443 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2371 reg = &dev_priv->fence_regs[i]; 2444 reg = &dev_priv->fence_regs[i];
2372 if (!reg->obj) 2445 if (!reg->obj)
2373 return i; 2446 return reg;
2374 2447
2375 if (!reg->obj->pin_count) 2448 if (!reg->obj->pin_count)
2376 avail++; 2449 avail = reg;
2377 } 2450 }
2378 2451
2379 if (avail == 0) 2452 if (avail == NULL)
2380 return -ENOSPC; 2453 return NULL;
2381 2454
2382 /* None available, try to steal one or wait for a user to finish */ 2455 /* None available, try to steal one or wait for a user to finish */
2383 avail = I915_FENCE_REG_NONE; 2456 avail = first = NULL;
2384 list_for_each_entry(reg, &dev_priv->mm.fence_list, 2457 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2385 lru_list) { 2458 if (reg->obj->pin_count)
2386 obj = reg->obj;
2387 if (obj->pin_count)
2388 continue; 2459 continue;
2389 2460
2390 /* found one! */ 2461 if (first == NULL)
2391 avail = obj->fence_reg; 2462 first = reg;
2392 break;
2393 }
2394 2463
2395 BUG_ON(avail == I915_FENCE_REG_NONE); 2464 if (!pipelined ||
2465 !reg->obj->last_fenced_ring ||
2466 reg->obj->last_fenced_ring == pipelined) {
2467 avail = reg;
2468 break;
2469 }
2470 }
2396 2471
2397 /* We only have a reference on obj from the active list. put_fence_reg 2472 if (avail == NULL)
2398 * might drop that one, causing a use-after-free in it. So hold a 2473 avail = first;
2399 * private reference to obj like the other callers of put_fence_reg
2400 * (set_tiling ioctl) do. */
2401 drm_gem_object_reference(&obj->base);
2402 ret = i915_gem_object_put_fence_reg(obj, interruptible);
2403 drm_gem_object_unreference(&obj->base);
2404 if (ret != 0)
2405 return ret;
2406 2474
2407 return avail; 2475 return avail;
2408} 2476}
2409 2477
2410/** 2478/**
2411 * i915_gem_object_get_fence_reg - set up a fence reg for an object 2479 * i915_gem_object_get_fence - set up a fence reg for an object
2412 * @obj: object to map through a fence reg 2480 * @obj: object to map through a fence reg
2481 * @pipelined: ring on which to queue the change, or NULL for CPU access
2482 * @interruptible: must we wait uninterruptibly for the register to retire?
2413 * 2483 *
2414 * When mapping objects through the GTT, userspace wants to be able to write 2484 * When mapping objects through the GTT, userspace wants to be able to write
2415 * to them without having to worry about swizzling if the object is tiled. 2485 * to them without having to worry about swizzling if the object is tiled.
@@ -2421,52 +2491,119 @@ static int i915_find_fence_reg(struct drm_device *dev,
2421 * and tiling format. 2491 * and tiling format.
2422 */ 2492 */
2423int 2493int
2424i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj, 2494i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
2425 bool interruptible) 2495 struct intel_ring_buffer *pipelined,
2496 bool interruptible)
2426{ 2497{
2427 struct drm_device *dev = obj->base.dev; 2498 struct drm_device *dev = obj->base.dev;
2428 struct drm_i915_private *dev_priv = dev->dev_private; 2499 struct drm_i915_private *dev_priv = dev->dev_private;
2429 struct drm_i915_fence_reg *reg = NULL; 2500 struct drm_i915_fence_reg *reg;
2430 struct intel_ring_buffer *pipelined = NULL;
2431 int ret; 2501 int ret;
2432 2502
2433 /* Just update our place in the LRU if our fence is getting used. */ 2503 /* Just update our place in the LRU if our fence is getting reused. */
2434 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2504 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2435 reg = &dev_priv->fence_regs[obj->fence_reg]; 2505 reg = &dev_priv->fence_regs[obj->fence_reg];
2436 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list); 2506 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2507
2508 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2509 pipelined = NULL;
2510
2511 if (!pipelined) {
2512 if (reg->setup_seqno) {
2513 if (!ring_passed_seqno(obj->last_fenced_ring,
2514 reg->setup_seqno)) {
2515 ret = i915_do_wait_request(obj->base.dev,
2516 reg->setup_seqno,
2517 interruptible,
2518 obj->last_fenced_ring);
2519 if (ret)
2520 return ret;
2521 }
2522
2523 reg->setup_seqno = 0;
2524 }
2525 } else if (obj->last_fenced_ring &&
2526 obj->last_fenced_ring != pipelined) {
2527 ret = i915_gem_object_flush_fence(obj,
2528 pipelined,
2529 interruptible);
2530 if (ret)
2531 return ret;
2532 } else if (obj->tiling_changed) {
2533 if (obj->fenced_gpu_access) {
2534 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS)
2535 i915_gem_flush_ring(obj->base.dev, obj->ring,
2536 0, obj->base.write_domain);
2537
2538 obj->fenced_gpu_access = false;
2539 }
2540 }
2541
2542 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2543 pipelined = NULL;
2544 BUG_ON(!pipelined && reg->setup_seqno);
2545
2546 if (obj->tiling_changed) {
2547 if (pipelined) {
2548 reg->setup_seqno =
2549 i915_gem_next_request_seqno(dev, pipelined);
2550 obj->last_fenced_seqno = reg->setup_seqno;
2551 obj->last_fenced_ring = pipelined;
2552 }
2553 goto update;
2554 }
2555
2437 return 0; 2556 return 0;
2438 } 2557 }
2439 2558
2440 switch (obj->tiling_mode) { 2559 reg = i915_find_fence_reg(dev, pipelined);
2441 case I915_TILING_NONE: 2560 if (reg == NULL)
2442 WARN(1, "allocating a fence for non-tiled object?\n"); 2561 return -ENOSPC;
2443 break;
2444 case I915_TILING_X:
2445 if (!obj->stride)
2446 return -EINVAL;
2447 WARN((obj->stride & (512 - 1)),
2448 "object 0x%08x is X tiled but has non-512B pitch\n",
2449 obj->gtt_offset);
2450 break;
2451 case I915_TILING_Y:
2452 if (!obj->stride)
2453 return -EINVAL;
2454 WARN((obj->stride & (128 - 1)),
2455 "object 0x%08x is Y tiled but has non-128B pitch\n",
2456 obj->gtt_offset);
2457 break;
2458 }
2459 2562
2460 ret = i915_find_fence_reg(dev, interruptible); 2563 ret = i915_gem_object_flush_fence(obj, pipelined, interruptible);
2461 if (ret < 0) 2564 if (ret)
2462 return ret; 2565 return ret;
2463 2566
2464 obj->fence_reg = ret; 2567 if (reg->obj) {
2465 reg = &dev_priv->fence_regs[obj->fence_reg]; 2568 struct drm_i915_gem_object *old = reg->obj;
2466 list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list); 2569
2570 drm_gem_object_reference(&old->base);
2571
2572 if (old->tiling_mode)
2573 i915_gem_release_mmap(old);
2574
2575 /* XXX The pipelined change over appears to be incoherent. */
2576 ret = i915_gem_object_flush_fence(old,
2577 NULL, //pipelined,
2578 interruptible);
2579 if (ret) {
2580 drm_gem_object_unreference(&old->base);
2581 return ret;
2582 }
2583
2584 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
2585 pipelined = NULL;
2586
2587 old->fence_reg = I915_FENCE_REG_NONE;
2588 old->last_fenced_ring = pipelined;
2589 old->last_fenced_seqno =
2590 pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0;
2591
2592 drm_gem_object_unreference(&old->base);
2593 } else if (obj->last_fenced_seqno == 0)
2594 pipelined = NULL;
2467 2595
2468 reg->obj = obj; 2596 reg->obj = obj;
2597 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2598 obj->fence_reg = reg - dev_priv->fence_regs;
2599 obj->last_fenced_ring = pipelined;
2469 2600
2601 reg->setup_seqno =
2602 pipelined ? i915_gem_next_request_seqno(dev, pipelined) : 0;
2603 obj->last_fenced_seqno = reg->setup_seqno;
2604
2605update:
2606 obj->tiling_changed = false;
2470 switch (INTEL_INFO(dev)->gen) { 2607 switch (INTEL_INFO(dev)->gen) {
2471 case 6: 2608 case 6:
2472 ret = sandybridge_write_fence_reg(obj, pipelined); 2609 ret = sandybridge_write_fence_reg(obj, pipelined);
@@ -2497,87 +2634,34 @@ i915_gem_object_get_fence_reg(struct drm_i915_gem_object *obj,
2497 * data structures in dev_priv and obj. 2634 * data structures in dev_priv and obj.
2498 */ 2635 */
2499static void 2636static void
2500i915_gem_clear_fence_reg(struct drm_i915_gem_object *obj) 2637i915_gem_clear_fence_reg(struct drm_device *dev,
2638 struct drm_i915_fence_reg *reg)
2501{ 2639{
2502 struct drm_device *dev = obj->base.dev;
2503 drm_i915_private_t *dev_priv = dev->dev_private; 2640 drm_i915_private_t *dev_priv = dev->dev_private;
2504 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[obj->fence_reg]; 2641 uint32_t fence_reg = reg - dev_priv->fence_regs;
2505 uint32_t fence_reg;
2506 2642
2507 switch (INTEL_INFO(dev)->gen) { 2643 switch (INTEL_INFO(dev)->gen) {
2508 case 6: 2644 case 6:
2509 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + 2645 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2510 (obj->fence_reg * 8), 0);
2511 break; 2646 break;
2512 case 5: 2647 case 5:
2513 case 4: 2648 case 4:
2514 I915_WRITE64(FENCE_REG_965_0 + (obj->fence_reg * 8), 0); 2649 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2515 break; 2650 break;
2516 case 3: 2651 case 3:
2517 if (obj->fence_reg >= 8) 2652 if (fence_reg >= 8)
2518 fence_reg = FENCE_REG_945_8 + (obj->fence_reg - 8) * 4; 2653 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2519 else 2654 else
2520 case 2: 2655 case 2:
2521 fence_reg = FENCE_REG_830_0 + obj->fence_reg * 4; 2656 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2522 2657
2523 I915_WRITE(fence_reg, 0); 2658 I915_WRITE(fence_reg, 0);
2524 break; 2659 break;
2525 } 2660 }
2526 2661
2527 reg->obj = NULL;
2528 obj->fence_reg = I915_FENCE_REG_NONE;
2529 list_del_init(&reg->lru_list); 2662 list_del_init(&reg->lru_list);
2530} 2663 reg->obj = NULL;
2531 2664 reg->setup_seqno = 0;
2532/**
2533 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2534 * to the buffer to finish, and then resets the fence register.
2535 * @obj: tiled object holding a fence register.
2536 * @bool: whether the wait upon the fence is interruptible
2537 *
2538 * Zeroes out the fence register itself and clears out the associated
2539 * data structures in dev_priv and obj.
2540 */
2541int
2542i915_gem_object_put_fence_reg(struct drm_i915_gem_object *obj,
2543 bool interruptible)
2544{
2545 struct drm_device *dev = obj->base.dev;
2546 int ret;
2547
2548 if (obj->fence_reg == I915_FENCE_REG_NONE)
2549 return 0;
2550
2551 /* If we've changed tiling, GTT-mappings of the object
2552 * need to re-fault to ensure that the correct fence register
2553 * setup is in place.
2554 */
2555 i915_gem_release_mmap(obj);
2556
2557 /* On the i915, GPU access to tiled buffers is via a fence,
2558 * therefore we must wait for any outstanding access to complete
2559 * before clearing the fence.
2560 */
2561 if (obj->fenced_gpu_access) {
2562 i915_gem_object_flush_gpu_write_domain(obj);
2563 obj->fenced_gpu_access = false;
2564 }
2565
2566 if (obj->last_fenced_seqno) {
2567 ret = i915_do_wait_request(dev,
2568 obj->last_fenced_seqno,
2569 interruptible,
2570 obj->last_fenced_ring);
2571 if (ret)
2572 return ret;
2573
2574 obj->last_fenced_seqno = false;
2575 }
2576
2577 i915_gem_object_flush_gtt_write_domain(obj);
2578 i915_gem_clear_fence_reg(obj);
2579
2580 return 0;
2581} 2665}
2582 2666
2583/** 2667/**
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index af01a58a643b..9bdc495e17bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -424,7 +424,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
424} 424}
425 425
426static int 426static int
427i915_gem_execbuffer_reserve(struct drm_device *dev, 427i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
428 struct drm_file *file, 428 struct drm_file *file,
429 struct list_head *objects, 429 struct list_head *objects,
430 struct drm_i915_gem_exec_object2 *exec) 430 struct drm_i915_gem_exec_object2 *exec)
@@ -499,10 +499,15 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
499 } 499 }
500 500
501 if (need_fence) { 501 if (need_fence) {
502 ret = i915_gem_object_get_fence_reg(obj, true); 502 ret = i915_gem_object_get_fence(obj, ring, 1);
503 if (ret)
504 break;
505 } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
506 obj->tiling_mode == I915_TILING_NONE) {
507 /* XXX pipelined! */
508 ret = i915_gem_object_put_fence(obj);
503 if (ret) 509 if (ret)
504 break; 510 break;
505
506 } 511 }
507 obj->pending_fenced_gpu_access = need_fence; 512 obj->pending_fenced_gpu_access = need_fence;
508 513
@@ -522,7 +527,7 @@ i915_gem_execbuffer_reserve(struct drm_device *dev,
522 /* First attempt, just clear anything that is purgeable. 527 /* First attempt, just clear anything that is purgeable.
523 * Second attempt, clear the entire GTT. 528 * Second attempt, clear the entire GTT.
524 */ 529 */
525 ret = i915_gem_evict_everything(dev, retry == 0); 530 ret = i915_gem_evict_everything(ring->dev, retry == 0);
526 if (ret) 531 if (ret)
527 return ret; 532 return ret;
528 533
@@ -548,6 +553,7 @@ err:
548static int 553static int
549i915_gem_execbuffer_relocate_slow(struct drm_device *dev, 554i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
550 struct drm_file *file, 555 struct drm_file *file,
556 struct intel_ring_buffer *ring,
551 struct list_head *objects, 557 struct list_head *objects,
552 struct drm_i915_gem_exec_object2 *exec, 558 struct drm_i915_gem_exec_object2 *exec,
553 int count) 559 int count)
@@ -590,7 +596,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
590 goto err; 596 goto err;
591 } 597 }
592 598
593 ret = i915_gem_execbuffer_reserve(dev, file, objects, exec); 599 ret = i915_gem_execbuffer_reserve(ring, file, objects, exec);
594 if (ret) 600 if (ret)
595 goto err; 601 goto err;
596 602
@@ -930,7 +936,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
930 } 936 }
931 937
932 /* Move the objects en-masse into the GTT, evicting if necessary. */ 938 /* Move the objects en-masse into the GTT, evicting if necessary. */
933 ret = i915_gem_execbuffer_reserve(dev, file, &objects, exec); 939 ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec);
934 if (ret) 940 if (ret)
935 goto err; 941 goto err;
936 942
@@ -938,7 +944,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
938 ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec); 944 ret = i915_gem_execbuffer_relocate(dev, file, &objects, exec);
939 if (ret) { 945 if (ret) {
940 if (ret == -EFAULT) { 946 if (ret == -EFAULT) {
941 ret = i915_gem_execbuffer_relocate_slow(dev, file, 947 ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
942 &objects, exec, 948 &objects, exec,
943 args->buffer_count); 949 args->buffer_count);
944 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 950 BUG_ON(!mutex_is_locked(&dev->struct_mutex));
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 1c5fdb30f272..22a32b9932c5 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -244,9 +244,6 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
244 if (INTEL_INFO(obj->base.dev)->gen >= 4) 244 if (INTEL_INFO(obj->base.dev)->gen >= 4)
245 return true; 245 return true;
246 246
247 if (!obj->gtt_space)
248 return true;
249
250 if (INTEL_INFO(obj->base.dev)->gen == 3) { 247 if (INTEL_INFO(obj->base.dev)->gen == 3) {
251 if (obj->gtt_offset & ~I915_FENCE_START_MASK) 248 if (obj->gtt_offset & ~I915_FENCE_START_MASK)
252 return false; 249 return false;
@@ -345,27 +342,21 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
345 * tiling mode. Otherwise we can just leave it alone, but 342 * tiling mode. Otherwise we can just leave it alone, but
346 * need to ensure that any fence register is cleared. 343 * need to ensure that any fence register is cleared.
347 */ 344 */
348 if (!i915_gem_object_fence_ok(obj, args->tiling_mode)) 345 i915_gem_release_mmap(obj);
349 ret = i915_gem_object_unbind(obj);
350 else if (obj->fence_reg != I915_FENCE_REG_NONE)
351 ret = i915_gem_object_put_fence_reg(obj, true);
352 else
353 i915_gem_release_mmap(obj);
354 346
355 if (ret != 0) { 347 obj->map_and_fenceable =
356 args->tiling_mode = obj->tiling_mode; 348 obj->gtt_space == NULL ||
357 args->stride = obj->stride; 349 (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
358 goto err; 350 i915_gem_object_fence_ok(obj, args->tiling_mode));
359 }
360 351
352 obj->tiling_changed = true;
361 obj->tiling_mode = args->tiling_mode; 353 obj->tiling_mode = args->tiling_mode;
362 obj->stride = args->stride; 354 obj->stride = args->stride;
363 } 355 }
364err:
365 drm_gem_object_unreference(&obj->base); 356 drm_gem_object_unreference(&obj->base);
366 mutex_unlock(&dev->struct_mutex); 357 mutex_unlock(&dev->struct_mutex);
367 358
368 return ret; 359 return 0;
369} 360}
370 361
371/** 362/**
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c2c94a26f92e..e141dd2e46e5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1474,7 +1474,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
1474 * a fence as the cost is not that onerous. 1474 * a fence as the cost is not that onerous.
1475 */ 1475 */
1476 if (obj->tiling_mode != I915_TILING_NONE) { 1476 if (obj->tiling_mode != I915_TILING_NONE) {
1477 ret = i915_gem_object_get_fence_reg(obj, false); 1477 ret = i915_gem_object_get_fence(obj, pipelined, false);
1478 if (ret) 1478 if (ret)
1479 goto err_unpin; 1479 goto err_unpin;
1480 } 1480 }
@@ -4370,6 +4370,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
4370 /* we only need to pin inside GTT if cursor is non-phy */ 4370 /* we only need to pin inside GTT if cursor is non-phy */
4371 mutex_lock(&dev->struct_mutex); 4371 mutex_lock(&dev->struct_mutex);
4372 if (!dev_priv->info->cursor_needs_physical) { 4372 if (!dev_priv->info->cursor_needs_physical) {
4373 if (obj->tiling_mode) {
4374 DRM_ERROR("cursor cannot be tiled\n");
4375 ret = -EINVAL;
4376 goto fail_locked;
4377 }
4378
4373 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 4379 ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
4374 if (ret) { 4380 if (ret) {
4375 DRM_ERROR("failed to pin cursor bo\n"); 4381 DRM_ERROR("failed to pin cursor bo\n");
@@ -4382,6 +4388,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
4382 goto fail_unpin; 4388 goto fail_unpin;
4383 } 4389 }
4384 4390
4391 ret = i915_gem_object_put_fence(obj);
4392 if (ret) {
4393 DRM_ERROR("failed to move cursor bo into the GTT\n");
4394 goto fail_unpin;
4395 }
4396
4385 addr = obj->gtt_offset; 4397 addr = obj->gtt_offset;
4386 } else { 4398 } else {
4387 int align = IS_I830(dev) ? 16 * 1024 : 256; 4399 int align = IS_I830(dev) ? 16 * 1024 : 256;
@@ -4966,6 +4978,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
4966 i915_gem_object_unpin(work->old_fb_obj); 4978 i915_gem_object_unpin(work->old_fb_obj);
4967 drm_gem_object_unreference(&work->pending_flip_obj->base); 4979 drm_gem_object_unreference(&work->pending_flip_obj->base);
4968 drm_gem_object_unreference(&work->old_fb_obj->base); 4980 drm_gem_object_unreference(&work->old_fb_obj->base);
4981
4969 mutex_unlock(&work->dev->struct_mutex); 4982 mutex_unlock(&work->dev->struct_mutex);
4970 kfree(work); 4983 kfree(work);
4971} 4984}
@@ -5009,10 +5022,12 @@ static void do_intel_finish_page_flip(struct drm_device *dev,
5009 spin_unlock_irqrestore(&dev->event_lock, flags); 5022 spin_unlock_irqrestore(&dev->event_lock, flags);
5010 5023
5011 obj = work->old_fb_obj; 5024 obj = work->old_fb_obj;
5025
5012 atomic_clear_mask(1 << intel_crtc->plane, 5026 atomic_clear_mask(1 << intel_crtc->plane,
5013 &obj->pending_flip.counter); 5027 &obj->pending_flip.counter);
5014 if (atomic_read(&obj->pending_flip) == 0) 5028 if (atomic_read(&obj->pending_flip) == 0)
5015 wake_up(&dev_priv->pending_flip_queue); 5029 wake_up(&dev_priv->pending_flip_queue);
5030
5016 schedule_work(&work->work); 5031 schedule_work(&work->work);
5017 5032
5018 trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); 5033 trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index af715cc03ee0..d0c1add393a3 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -787,6 +787,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
787 if (ret != 0) 787 if (ret != 0)
788 goto out_unpin; 788 goto out_unpin;
789 789
790 ret = i915_gem_object_put_fence(new_bo);
791 if (ret)
792 goto out_unpin;
793
790 if (!overlay->active) { 794 if (!overlay->active) {
791 regs = intel_overlay_map_regs(overlay); 795 regs = intel_overlay_map_regs(overlay);
792 if (!regs) { 796 if (!regs) {
@@ -1161,6 +1165,12 @@ int intel_overlay_put_image(struct drm_device *dev, void *data,
1161 mutex_lock(&dev->mode_config.mutex); 1165 mutex_lock(&dev->mode_config.mutex);
1162 mutex_lock(&dev->struct_mutex); 1166 mutex_lock(&dev->struct_mutex);
1163 1167
1168 if (new_bo->tiling_mode) {
1169 DRM_ERROR("buffer used for overlay image can not be tiled\n");
1170 ret = -EINVAL;
1171 goto out_unlock;
1172 }
1173
1164 ret = intel_overlay_recover_from_interrupt(overlay, true); 1174 ret = intel_overlay_recover_from_interrupt(overlay, true);
1165 if (ret != 0) 1175 if (ret != 0)
1166 goto out_unlock; 1176 goto out_unlock;