aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Barnes <jbarnes@virtuousgeek.org>2009-01-26 20:10:45 -0500
committerDave Airlie <airlied@linux.ie>2009-02-08 06:38:02 -0500
commit0f973f27888e4664b253ab2cf69c67c2eb80ab1b (patch)
tree1f921af14dff65311dd10106dfc10778e3730b1e
parentd9ddcb96e05cfbadf3dbf66859bcaf5eae25af0b (diff)
drm/i915: add fence register management to execbuf
Adds code to set up fence registers at execbuf time on pre-965 chips as necessary. Also fixes up a few bugs in the pre-965 tile register support (get_order != ffs). The number of fences available to the kernel defaults to the hw limit minus 3 (for legacy X front/back/depth), but a new parameter allows userspace to override that as needed. Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Dave Airlie <airlied@linux.ie>
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c10
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c56
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c88
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h4
-rw-r--r--include/drm/i915_drm.h2
6 files changed, 146 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 1e01e7847155..cc0adb428cee 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -731,6 +731,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
731 case I915_PARAM_HAS_GEM: 731 case I915_PARAM_HAS_GEM:
732 value = dev_priv->has_gem; 732 value = dev_priv->has_gem;
733 break; 733 break;
734 case I915_PARAM_NUM_FENCES_AVAIL:
735 value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
736 break;
734 default: 737 default:
735 DRM_ERROR("Unknown parameter %d\n", param->param); 738 DRM_ERROR("Unknown parameter %d\n", param->param);
736 return -EINVAL; 739 return -EINVAL;
@@ -764,6 +767,13 @@ static int i915_setparam(struct drm_device *dev, void *data,
764 case I915_SETPARAM_ALLOW_BATCHBUFFER: 767 case I915_SETPARAM_ALLOW_BATCHBUFFER:
765 dev_priv->allow_batchbuffer = param->value; 768 dev_priv->allow_batchbuffer = param->value;
766 break; 769 break;
770 case I915_SETPARAM_NUM_USED_FENCES:
771 if (param->value > dev_priv->num_fence_regs ||
772 param->value < 0)
773 return -EINVAL;
774 /* Userspace can use first N regs */
775 dev_priv->fence_reg_start = param->value;
776 break;
767 default: 777 default:
768 DRM_ERROR("unknown parameter %d\n", param->param); 778 DRM_ERROR("unknown parameter %d\n", param->param);
769 return -EINVAL; 779 return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f471d218b89a..a70bf77290fc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -602,6 +602,7 @@ int i915_gem_init_object(struct drm_gem_object *obj);
602void i915_gem_free_object(struct drm_gem_object *obj); 602void i915_gem_free_object(struct drm_gem_object *obj);
603int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment); 603int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment);
604void i915_gem_object_unpin(struct drm_gem_object *obj); 604void i915_gem_object_unpin(struct drm_gem_object *obj);
605int i915_gem_object_unbind(struct drm_gem_object *obj);
605void i915_gem_lastclose(struct drm_device *dev); 606void i915_gem_lastclose(struct drm_device *dev);
606uint32_t i915_get_gem_seqno(struct drm_device *dev); 607uint32_t i915_get_gem_seqno(struct drm_device *dev);
607void i915_gem_retire_requests(struct drm_device *dev); 608void i915_gem_retire_requests(struct drm_device *dev);
@@ -785,6 +786,11 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
785 IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev)) 786 IS_I945GM(dev) || IS_I965GM(dev) || IS_GM45(dev))
786 787
787#define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev)) 788#define I915_NEED_GFX_HWS(dev) (IS_G33(dev) || IS_GM45(dev) || IS_G4X(dev))
789/* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
790 * rows, which changed the alignment requirements and fence programming.
791 */
792#define HAS_128_BYTE_Y_TILING(dev) (IS_I9XX(dev) && !(IS_I915G(dev) || \
793 IS_I915GM(dev)))
788#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev)) 794#define SUPPORTS_INTEGRATED_HDMI(dev) (IS_G4X(dev))
789 795
790#define PRIMARY_RINGBUFFER_SIZE (128*1024) 796#define PRIMARY_RINGBUFFER_SIZE (128*1024)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e1f831f166ca..6a9e3a875083 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,7 +52,7 @@ static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
52static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); 52static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
53static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, 53static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
54 unsigned alignment); 54 unsigned alignment);
55static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); 55static int i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write);
56static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); 56static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
57static int i915_gem_evict_something(struct drm_device *dev); 57static int i915_gem_evict_something(struct drm_device *dev);
58static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 58static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
@@ -567,6 +567,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
567 pgoff_t page_offset; 567 pgoff_t page_offset;
568 unsigned long pfn; 568 unsigned long pfn;
569 int ret = 0; 569 int ret = 0;
570 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
570 571
571 /* We don't use vmf->pgoff since that has the fake offset */ 572 /* We don't use vmf->pgoff since that has the fake offset */
572 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> 573 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
@@ -586,7 +587,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
586 /* Need a new fence register? */ 587 /* Need a new fence register? */
587 if (obj_priv->fence_reg == I915_FENCE_REG_NONE && 588 if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
588 obj_priv->tiling_mode != I915_TILING_NONE) { 589 obj_priv->tiling_mode != I915_TILING_NONE) {
589 ret = i915_gem_object_get_fence_reg(obj); 590 ret = i915_gem_object_get_fence_reg(obj, write);
590 if (ret != 0) 591 if (ret != 0)
591 return VM_FAULT_SIGBUS; 592 return VM_FAULT_SIGBUS;
592 } 593 }
@@ -1214,7 +1215,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1214/** 1215/**
1215 * Unbinds an object from the GTT aperture. 1216 * Unbinds an object from the GTT aperture.
1216 */ 1217 */
1217static int 1218int
1218i915_gem_object_unbind(struct drm_gem_object *obj) 1219i915_gem_object_unbind(struct drm_gem_object *obj)
1219{ 1220{
1220 struct drm_device *dev = obj->dev; 1221 struct drm_device *dev = obj->dev;
@@ -1448,21 +1449,26 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
1448 drm_i915_private_t *dev_priv = dev->dev_private; 1449 drm_i915_private_t *dev_priv = dev->dev_private;
1449 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1450 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1450 int regnum = obj_priv->fence_reg; 1451 int regnum = obj_priv->fence_reg;
1452 int tile_width;
1451 uint32_t val; 1453 uint32_t val;
1452 uint32_t pitch_val; 1454 uint32_t pitch_val;
1453 1455
1454 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 1456 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
1455 (obj_priv->gtt_offset & (obj->size - 1))) { 1457 (obj_priv->gtt_offset & (obj->size - 1))) {
1456 WARN(1, "%s: object not 1M or size aligned\n", __func__); 1458 WARN(1, "%s: object 0x%08x not 1M or size (0x%x) aligned\n",
1459 __func__, obj_priv->gtt_offset, obj->size);
1457 return; 1460 return;
1458 } 1461 }
1459 1462
1460 if (obj_priv->tiling_mode == I915_TILING_Y && (IS_I945G(dev) || 1463 if (obj_priv->tiling_mode == I915_TILING_Y &&
1461 IS_I945GM(dev) || 1464 HAS_128_BYTE_Y_TILING(dev))
1462 IS_G33(dev))) 1465 tile_width = 128;
1463 pitch_val = (obj_priv->stride / 128) - 1;
1464 else 1466 else
1465 pitch_val = (obj_priv->stride / 512) - 1; 1467 tile_width = 512;
1468
1469 /* Note: pitch better be a power of two tile widths */
1470 pitch_val = obj_priv->stride / tile_width;
1471 pitch_val = ffs(pitch_val) - 1;
1466 1472
1467 val = obj_priv->gtt_offset; 1473 val = obj_priv->gtt_offset;
1468 if (obj_priv->tiling_mode == I915_TILING_Y) 1474 if (obj_priv->tiling_mode == I915_TILING_Y)
@@ -1486,7 +1492,8 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
1486 1492
1487 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || 1493 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
1488 (obj_priv->gtt_offset & (obj->size - 1))) { 1494 (obj_priv->gtt_offset & (obj->size - 1))) {
1489 WARN(1, "%s: object not 1M or size aligned\n", __func__); 1495 WARN(1, "%s: object 0x%08x not 1M or size aligned\n",
1496 __func__, obj_priv->gtt_offset);
1490 return; 1497 return;
1491 } 1498 }
1492 1499
@@ -1506,6 +1513,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
1506/** 1513/**
1507 * i915_gem_object_get_fence_reg - set up a fence reg for an object 1514 * i915_gem_object_get_fence_reg - set up a fence reg for an object
1508 * @obj: object to map through a fence reg 1515 * @obj: object to map through a fence reg
1516 * @write: object is about to be written
1509 * 1517 *
1510 * When mapping objects through the GTT, userspace wants to be able to write 1518 * When mapping objects through the GTT, userspace wants to be able to write
1511 * to them without having to worry about swizzling if the object is tiled. 1519 * to them without having to worry about swizzling if the object is tiled.
@@ -1517,7 +1525,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
1517 * and tiling format. 1525 * and tiling format.
1518 */ 1526 */
1519static int 1527static int
1520i915_gem_object_get_fence_reg(struct drm_gem_object *obj) 1528i915_gem_object_get_fence_reg(struct drm_gem_object *obj, bool write)
1521{ 1529{
1522 struct drm_device *dev = obj->dev; 1530 struct drm_device *dev = obj->dev;
1523 struct drm_i915_private *dev_priv = dev->dev_private; 1531 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1530,12 +1538,18 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
1530 WARN(1, "allocating a fence for non-tiled object?\n"); 1538 WARN(1, "allocating a fence for non-tiled object?\n");
1531 break; 1539 break;
1532 case I915_TILING_X: 1540 case I915_TILING_X:
1533 WARN(obj_priv->stride & (512 - 1), 1541 if (!obj_priv->stride)
1534 "object is X tiled but has non-512B pitch\n"); 1542 return -EINVAL;
1543 WARN((obj_priv->stride & (512 - 1)),
1544 "object 0x%08x is X tiled but has non-512B pitch\n",
1545 obj_priv->gtt_offset);
1535 break; 1546 break;
1536 case I915_TILING_Y: 1547 case I915_TILING_Y:
1537 WARN(obj_priv->stride & (128 - 1), 1548 if (!obj_priv->stride)
1538 "object is Y tiled but has non-128B pitch\n"); 1549 return -EINVAL;
1550 WARN((obj_priv->stride & (128 - 1)),
1551 "object 0x%08x is Y tiled but has non-128B pitch\n",
1552 obj_priv->gtt_offset);
1539 break; 1553 break;
1540 } 1554 }
1541 1555
@@ -1637,7 +1651,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
1637 if (dev_priv->mm.suspended) 1651 if (dev_priv->mm.suspended)
1638 return -EBUSY; 1652 return -EBUSY;
1639 if (alignment == 0) 1653 if (alignment == 0)
1640 alignment = PAGE_SIZE; 1654 alignment = i915_gem_get_gtt_alignment(obj);
1641 if (alignment & (PAGE_SIZE - 1)) { 1655 if (alignment & (PAGE_SIZE - 1)) {
1642 DRM_ERROR("Invalid object alignment requested %u\n", alignment); 1656 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
1643 return -EINVAL; 1657 return -EINVAL;
@@ -2658,6 +2672,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
2658 DRM_ERROR("Failure to bind: %d", ret); 2672 DRM_ERROR("Failure to bind: %d", ret);
2659 return ret; 2673 return ret;
2660 } 2674 }
2675 /*
2676 * Pre-965 chips need a fence register set up in order to
2677 * properly handle tiled surfaces.
2678 */
2679 if (!IS_I965G(dev) &&
2680 obj_priv->fence_reg == I915_FENCE_REG_NONE &&
2681 obj_priv->tiling_mode != I915_TILING_NONE)
2682 i915_gem_object_get_fence_reg(obj, true);
2661 } 2683 }
2662 obj_priv->pin_count++; 2684 obj_priv->pin_count++;
2663 2685
@@ -3297,7 +3319,7 @@ i915_gem_load(struct drm_device *dev)
3297 /* Old X drivers will take 0-2 for front, back, depth buffers */ 3319 /* Old X drivers will take 0-2 for front, back, depth buffers */
3298 dev_priv->fence_reg_start = 3; 3320 dev_priv->fence_reg_start = 3;
3299 3321
3300 if (IS_I965G(dev)) 3322 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3301 dev_priv->num_fence_regs = 16; 3323 dev_priv->num_fence_regs = 16;
3302 else 3324 else
3303 dev_priv->num_fence_regs = 8; 3325 dev_priv->num_fence_regs = 8;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 241f39b7f460..2534c792808e 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -173,6 +173,73 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
173 dev_priv->mm.bit_6_swizzle_y = swizzle_y; 173 dev_priv->mm.bit_6_swizzle_y = swizzle_y;
174} 174}
175 175
176
177/**
178 * Returns the size of the fence for a tiled object of the given size.
179 */
180static int
181i915_get_fence_size(struct drm_device *dev, int size)
182{
183 int i;
184 int start;
185
186 if (IS_I965G(dev)) {
187 /* The 965 can have fences at any page boundary. */
188 return ALIGN(size, 4096);
189 } else {
190 /* Align the size to a power of two greater than the smallest
191 * fence size.
192 */
193 if (IS_I9XX(dev))
194 start = 1024 * 1024;
195 else
196 start = 512 * 1024;
197
198 for (i = start; i < size; i <<= 1)
199 ;
200
201 return i;
202 }
203}
204
205/* Check pitch constriants for all chips & tiling formats */
206static bool
207i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
208{
209 int tile_width;
210
211 /* Linear is always fine */
212 if (tiling_mode == I915_TILING_NONE)
213 return true;
214
215 if (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
216 tile_width = 128;
217 else
218 tile_width = 512;
219
220 /* 965+ just needs multiples of tile width */
221 if (IS_I965G(dev)) {
222 if (stride & (tile_width - 1))
223 return false;
224 return true;
225 }
226
227 /* Pre-965 needs power of two tile widths */
228 if (stride < tile_width)
229 return false;
230
231 if (stride & (stride - 1))
232 return false;
233
234 /* We don't handle the aperture area covered by the fence being bigger
235 * than the object size.
236 */
237 if (i915_get_fence_size(dev, size) != size)
238 return false;
239
240 return true;
241}
242
176/** 243/**
177 * Sets the tiling mode of an object, returning the required swizzling of 244 * Sets the tiling mode of an object, returning the required swizzling of
178 * bit 6 of addresses in the object. 245 * bit 6 of addresses in the object.
@@ -191,6 +258,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
191 return -EINVAL; 258 return -EINVAL;
192 obj_priv = obj->driver_private; 259 obj_priv = obj->driver_private;
193 260
261 if (!i915_tiling_ok(dev, args->stride, obj->size, args->tiling_mode))
262 return -EINVAL;
263
194 mutex_lock(&dev->struct_mutex); 264 mutex_lock(&dev->struct_mutex);
195 265
196 if (args->tiling_mode == I915_TILING_NONE) { 266 if (args->tiling_mode == I915_TILING_NONE) {
@@ -207,7 +277,23 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
207 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 277 args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
208 } 278 }
209 } 279 }
210 obj_priv->tiling_mode = args->tiling_mode; 280 if (args->tiling_mode != obj_priv->tiling_mode) {
281 int ret;
282
283 /* Unbind the object, as switching tiling means we're
284 * switching the cache organization due to fencing, probably.
285 */
286 ret = i915_gem_object_unbind(obj);
287 if (ret != 0) {
288 WARN(ret != -ERESTARTSYS,
289 "failed to unbind object for tiling switch");
290 args->tiling_mode = obj_priv->tiling_mode;
291 mutex_unlock(&dev->struct_mutex);
292
293 return ret;
294 }
295 obj_priv->tiling_mode = args->tiling_mode;
296 }
211 obj_priv->stride = args->stride; 297 obj_priv->stride = args->stride;
212 298
213 mutex_unlock(&dev->struct_mutex); 299 mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 273162579e1b..928e00462570 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -186,12 +186,12 @@
186#define FENCE_REG_830_0 0x2000 186#define FENCE_REG_830_0 0x2000
187#define I830_FENCE_START_MASK 0x07f80000 187#define I830_FENCE_START_MASK 0x07f80000
188#define I830_FENCE_TILING_Y_SHIFT 12 188#define I830_FENCE_TILING_Y_SHIFT 12
189#define I830_FENCE_SIZE_BITS(size) ((get_order(size >> 19) - 1) << 8) 189#define I830_FENCE_SIZE_BITS(size) ((ffs((size) >> 19) - 1) << 8)
190#define I830_FENCE_PITCH_SHIFT 4 190#define I830_FENCE_PITCH_SHIFT 4
191#define I830_FENCE_REG_VALID (1<<0) 191#define I830_FENCE_REG_VALID (1<<0)
192 192
193#define I915_FENCE_START_MASK 0x0ff00000 193#define I915_FENCE_START_MASK 0x0ff00000
194#define I915_FENCE_SIZE_BITS(size) ((get_order(size >> 20) - 1) << 8) 194#define I915_FENCE_SIZE_BITS(size) ((ffs((size) >> 20) - 1) << 8)
195 195
196#define FENCE_REG_965_0 0x03000 196#define FENCE_REG_965_0 0x03000
197#define I965_FENCE_PITCH_SHIFT 2 197#define I965_FENCE_PITCH_SHIFT 2
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index b3bcf72dc656..912cd52db965 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -261,6 +261,7 @@ typedef struct drm_i915_irq_wait {
261#define I915_PARAM_LAST_DISPATCH 3 261#define I915_PARAM_LAST_DISPATCH 3
262#define I915_PARAM_CHIPSET_ID 4 262#define I915_PARAM_CHIPSET_ID 4
263#define I915_PARAM_HAS_GEM 5 263#define I915_PARAM_HAS_GEM 5
264#define I915_PARAM_NUM_FENCES_AVAIL 6
264 265
265typedef struct drm_i915_getparam { 266typedef struct drm_i915_getparam {
266 int param; 267 int param;
@@ -272,6 +273,7 @@ typedef struct drm_i915_getparam {
272#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 273#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
273#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 274#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
274#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 275#define I915_SETPARAM_ALLOW_BATCHBUFFER 3
276#define I915_SETPARAM_NUM_USED_FENCES 4
275 277
276typedef struct drm_i915_setparam { 278typedef struct drm_i915_setparam {
277 int param; 279 int param;