aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-10-14 07:10:41 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2010-10-19 04:18:36 -0400
commit2549d6c26ce1c85a76990b972a2c7e8f440455cd (patch)
tree1e9e3f948ca2f15a1c98c76f1307f87174fd7252
parent55b7d6e8c4690047ac001026cb75a47f747db816 (diff)
drm/i915: Avoid vmallocing a buffer for the relocations
... perform an access validation check up front instead and copy them in on-demand, during i915_gem_object_pin_and_relocate(). As around 20% of the CPU overhead may be spent inside vmalloc for the relocation entries when submitting an execbuffer [for x11perf -aa10text], the savings are considerable and result in around a 10% throughput increase [for glyphs]. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c224
1 files changed, 75 insertions, 149 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 72ab3032300a..67998e8a2d70 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3291,12 +3291,12 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3291static int 3291static int
3292i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, 3292i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3293 struct drm_file *file_priv, 3293 struct drm_file *file_priv,
3294 struct drm_i915_gem_exec_object2 *entry, 3294 struct drm_i915_gem_exec_object2 *entry)
3295 struct drm_i915_gem_relocation_entry *relocs)
3296{ 3295{
3297 struct drm_device *dev = obj->dev; 3296 struct drm_device *dev = obj->dev;
3298 drm_i915_private_t *dev_priv = dev->dev_private; 3297 drm_i915_private_t *dev_priv = dev->dev_private;
3299 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); 3298 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3299 struct drm_i915_gem_relocation_entry __user *user_relocs;
3300 int i, ret; 3300 int i, ret;
3301 void __iomem *reloc_page; 3301 void __iomem *reloc_page;
3302 bool need_fence; 3302 bool need_fence;
@@ -3337,15 +3337,24 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3337 /* Apply the relocations, using the GTT aperture to avoid cache 3337 /* Apply the relocations, using the GTT aperture to avoid cache
3338 * flushing requirements. 3338 * flushing requirements.
3339 */ 3339 */
3340 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3340 for (i = 0; i < entry->relocation_count; i++) { 3341 for (i = 0; i < entry->relocation_count; i++) {
3341 struct drm_i915_gem_relocation_entry *reloc= &relocs[i]; 3342 struct drm_i915_gem_relocation_entry reloc;
3342 struct drm_gem_object *target_obj; 3343 struct drm_gem_object *target_obj;
3343 struct drm_i915_gem_object *target_obj_priv; 3344 struct drm_i915_gem_object *target_obj_priv;
3344 uint32_t reloc_val, reloc_offset; 3345 uint32_t reloc_val, reloc_offset;
3345 uint32_t __iomem *reloc_entry; 3346 uint32_t __iomem *reloc_entry;
3346 3347
3348 ret = __copy_from_user_inatomic(&reloc,
3349 user_relocs+i,
3350 sizeof(reloc));
3351 if (ret) {
3352 i915_gem_object_unpin(obj);
3353 return -EFAULT;
3354 }
3355
3347 target_obj = drm_gem_object_lookup(obj->dev, file_priv, 3356 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3348 reloc->target_handle); 3357 reloc.target_handle);
3349 if (target_obj == NULL) { 3358 if (target_obj == NULL) {
3350 i915_gem_object_unpin(obj); 3359 i915_gem_object_unpin(obj);
3351 return -ENOENT; 3360 return -ENOENT;
@@ -3358,13 +3367,13 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3358 "presumed %08x delta %08x\n", 3367 "presumed %08x delta %08x\n",
3359 __func__, 3368 __func__,
3360 obj, 3369 obj,
3361 (int) reloc->offset, 3370 (int) reloc.offset,
3362 (int) reloc->target_handle, 3371 (int) reloc.target_handle,
3363 (int) reloc->read_domains, 3372 (int) reloc.read_domains,
3364 (int) reloc->write_domain, 3373 (int) reloc.write_domain,
3365 (int) target_obj_priv->gtt_offset, 3374 (int) target_obj_priv->gtt_offset,
3366 (int) reloc->presumed_offset, 3375 (int) reloc.presumed_offset,
3367 reloc->delta); 3376 reloc.delta);
3368#endif 3377#endif
3369 3378
3370 /* The target buffer should have appeared before us in the 3379 /* The target buffer should have appeared before us in the
@@ -3372,89 +3381,89 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3372 */ 3381 */
3373 if (target_obj_priv->gtt_space == NULL) { 3382 if (target_obj_priv->gtt_space == NULL) {
3374 DRM_ERROR("No GTT space found for object %d\n", 3383 DRM_ERROR("No GTT space found for object %d\n",
3375 reloc->target_handle); 3384 reloc.target_handle);
3376 drm_gem_object_unreference(target_obj); 3385 drm_gem_object_unreference(target_obj);
3377 i915_gem_object_unpin(obj); 3386 i915_gem_object_unpin(obj);
3378 return -EINVAL; 3387 return -EINVAL;
3379 } 3388 }
3380 3389
3381 /* Validate that the target is in a valid r/w GPU domain */ 3390 /* Validate that the target is in a valid r/w GPU domain */
3382 if (reloc->write_domain & (reloc->write_domain - 1)) { 3391 if (reloc.write_domain & (reloc.write_domain - 1)) {
3383 DRM_ERROR("reloc with multiple write domains: " 3392 DRM_ERROR("reloc with multiple write domains: "
3384 "obj %p target %d offset %d " 3393 "obj %p target %d offset %d "
3385 "read %08x write %08x", 3394 "read %08x write %08x",
3386 obj, reloc->target_handle, 3395 obj, reloc.target_handle,
3387 (int) reloc->offset, 3396 (int) reloc.offset,
3388 reloc->read_domains, 3397 reloc.read_domains,
3389 reloc->write_domain); 3398 reloc.write_domain);
3390 drm_gem_object_unreference(target_obj); 3399 drm_gem_object_unreference(target_obj);
3391 i915_gem_object_unpin(obj); 3400 i915_gem_object_unpin(obj);
3392 return -EINVAL; 3401 return -EINVAL;
3393 } 3402 }
3394 if (reloc->write_domain & I915_GEM_DOMAIN_CPU || 3403 if (reloc.write_domain & I915_GEM_DOMAIN_CPU ||
3395 reloc->read_domains & I915_GEM_DOMAIN_CPU) { 3404 reloc.read_domains & I915_GEM_DOMAIN_CPU) {
3396 DRM_ERROR("reloc with read/write CPU domains: " 3405 DRM_ERROR("reloc with read/write CPU domains: "
3397 "obj %p target %d offset %d " 3406 "obj %p target %d offset %d "
3398 "read %08x write %08x", 3407 "read %08x write %08x",
3399 obj, reloc->target_handle, 3408 obj, reloc.target_handle,
3400 (int) reloc->offset, 3409 (int) reloc.offset,
3401 reloc->read_domains, 3410 reloc.read_domains,
3402 reloc->write_domain); 3411 reloc.write_domain);
3403 drm_gem_object_unreference(target_obj); 3412 drm_gem_object_unreference(target_obj);
3404 i915_gem_object_unpin(obj); 3413 i915_gem_object_unpin(obj);
3405 return -EINVAL; 3414 return -EINVAL;
3406 } 3415 }
3407 if (reloc->write_domain && target_obj->pending_write_domain && 3416 if (reloc.write_domain && target_obj->pending_write_domain &&
3408 reloc->write_domain != target_obj->pending_write_domain) { 3417 reloc.write_domain != target_obj->pending_write_domain) {
3409 DRM_ERROR("Write domain conflict: " 3418 DRM_ERROR("Write domain conflict: "
3410 "obj %p target %d offset %d " 3419 "obj %p target %d offset %d "
3411 "new %08x old %08x\n", 3420 "new %08x old %08x\n",
3412 obj, reloc->target_handle, 3421 obj, reloc.target_handle,
3413 (int) reloc->offset, 3422 (int) reloc.offset,
3414 reloc->write_domain, 3423 reloc.write_domain,
3415 target_obj->pending_write_domain); 3424 target_obj->pending_write_domain);
3416 drm_gem_object_unreference(target_obj); 3425 drm_gem_object_unreference(target_obj);
3417 i915_gem_object_unpin(obj); 3426 i915_gem_object_unpin(obj);
3418 return -EINVAL; 3427 return -EINVAL;
3419 } 3428 }
3420 3429
3421 target_obj->pending_read_domains |= reloc->read_domains; 3430 target_obj->pending_read_domains |= reloc.read_domains;
3422 target_obj->pending_write_domain |= reloc->write_domain; 3431 target_obj->pending_write_domain |= reloc.write_domain;
3423 3432
3424 /* If the relocation already has the right value in it, no 3433 /* If the relocation already has the right value in it, no
3425 * more work needs to be done. 3434 * more work needs to be done.
3426 */ 3435 */
3427 if (target_obj_priv->gtt_offset == reloc->presumed_offset) { 3436 if (target_obj_priv->gtt_offset == reloc.presumed_offset) {
3428 drm_gem_object_unreference(target_obj); 3437 drm_gem_object_unreference(target_obj);
3429 continue; 3438 continue;
3430 } 3439 }
3431 3440
3432 /* Check that the relocation address is valid... */ 3441 /* Check that the relocation address is valid... */
3433 if (reloc->offset > obj->size - 4) { 3442 if (reloc.offset > obj->size - 4) {
3434 DRM_ERROR("Relocation beyond object bounds: " 3443 DRM_ERROR("Relocation beyond object bounds: "
3435 "obj %p target %d offset %d size %d.\n", 3444 "obj %p target %d offset %d size %d.\n",
3436 obj, reloc->target_handle, 3445 obj, reloc.target_handle,
3437 (int) reloc->offset, (int) obj->size); 3446 (int) reloc.offset, (int) obj->size);
3438 drm_gem_object_unreference(target_obj); 3447 drm_gem_object_unreference(target_obj);
3439 i915_gem_object_unpin(obj); 3448 i915_gem_object_unpin(obj);
3440 return -EINVAL; 3449 return -EINVAL;
3441 } 3450 }
3442 if (reloc->offset & 3) { 3451 if (reloc.offset & 3) {
3443 DRM_ERROR("Relocation not 4-byte aligned: " 3452 DRM_ERROR("Relocation not 4-byte aligned: "
3444 "obj %p target %d offset %d.\n", 3453 "obj %p target %d offset %d.\n",
3445 obj, reloc->target_handle, 3454 obj, reloc.target_handle,
3446 (int) reloc->offset); 3455 (int) reloc.offset);
3447 drm_gem_object_unreference(target_obj); 3456 drm_gem_object_unreference(target_obj);
3448 i915_gem_object_unpin(obj); 3457 i915_gem_object_unpin(obj);
3449 return -EINVAL; 3458 return -EINVAL;
3450 } 3459 }
3451 3460
3452 /* and points to somewhere within the target object. */ 3461 /* and points to somewhere within the target object. */
3453 if (reloc->delta >= target_obj->size) { 3462 if (reloc.delta >= target_obj->size) {
3454 DRM_ERROR("Relocation beyond target object bounds: " 3463 DRM_ERROR("Relocation beyond target object bounds: "
3455 "obj %p target %d delta %d size %d.\n", 3464 "obj %p target %d delta %d size %d.\n",
3456 obj, reloc->target_handle, 3465 obj, reloc.target_handle,
3457 (int) reloc->delta, (int) target_obj->size); 3466 (int) reloc.delta, (int) target_obj->size);
3458 drm_gem_object_unreference(target_obj); 3467 drm_gem_object_unreference(target_obj);
3459 i915_gem_object_unpin(obj); 3468 i915_gem_object_unpin(obj);
3460 return -EINVAL; 3469 return -EINVAL;
@@ -3470,23 +3479,18 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3470 /* Map the page containing the relocation we're going to 3479 /* Map the page containing the relocation we're going to
3471 * perform. 3480 * perform.
3472 */ 3481 */
3473 reloc_offset = obj_priv->gtt_offset + reloc->offset; 3482 reloc_offset = obj_priv->gtt_offset + reloc.offset;
3474 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, 3483 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3475 (reloc_offset & 3484 (reloc_offset &
3476 ~(PAGE_SIZE - 1)), 3485 ~(PAGE_SIZE - 1)),
3477 KM_USER0); 3486 KM_USER0);
3478 reloc_entry = (uint32_t __iomem *)(reloc_page + 3487 reloc_entry = (uint32_t __iomem *)(reloc_page +
3479 (reloc_offset & (PAGE_SIZE - 1))); 3488 (reloc_offset & (PAGE_SIZE - 1)));
3480 reloc_val = target_obj_priv->gtt_offset + reloc->delta; 3489 reloc_val = target_obj_priv->gtt_offset + reloc.delta;
3481 3490
3482 writel(reloc_val, reloc_entry); 3491 writel(reloc_val, reloc_entry);
3483 io_mapping_unmap_atomic(reloc_page, KM_USER0); 3492 io_mapping_unmap_atomic(reloc_page, KM_USER0);
3484 3493
3485 /* The updated presumed offset for this entry will be
3486 * copied back out to the user.
3487 */
3488 reloc->presumed_offset = target_obj_priv->gtt_offset;
3489
3490 drm_gem_object_unreference(target_obj); 3494 drm_gem_object_unreference(target_obj);
3491 } 3495 }
3492 3496
@@ -3551,98 +3555,40 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3551} 3555}
3552 3556
3553static int 3557static int
3554i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, 3558i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
3555 uint32_t buffer_count, 3559 uint64_t exec_offset)
3556 struct drm_i915_gem_relocation_entry **relocs)
3557{ 3560{
3558 uint32_t reloc_count = 0, reloc_index = 0, i; 3561 uint32_t exec_start, exec_len;
3559 int ret;
3560
3561 *relocs = NULL;
3562 for (i = 0; i < buffer_count; i++) {
3563 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3564 return -EINVAL;
3565 reloc_count += exec_list[i].relocation_count;
3566 }
3567
3568 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3569 if (*relocs == NULL) {
3570 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3571 return -ENOMEM;
3572 }
3573
3574 for (i = 0; i < buffer_count; i++) {
3575 struct drm_i915_gem_relocation_entry __user *user_relocs;
3576 3562
3577 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; 3563 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3564 exec_len = (uint32_t) exec->batch_len;
3578 3565
3579 ret = copy_from_user(&(*relocs)[reloc_index], 3566 if ((exec_start | exec_len) & 0x7)
3580 user_relocs, 3567 return -EINVAL;
3581 exec_list[i].relocation_count *
3582 sizeof(**relocs));
3583 if (ret != 0) {
3584 drm_free_large(*relocs);
3585 *relocs = NULL;
3586 return -EFAULT;
3587 }
3588 3568
3589 reloc_index += exec_list[i].relocation_count; 3569 if (!exec_start)
3590 } 3570 return -EINVAL;
3591 3571
3592 return 0; 3572 return 0;
3593} 3573}
3594 3574
3595static int 3575static int
3596i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, 3576validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
3597 uint32_t buffer_count, 3577 int count)
3598 struct drm_i915_gem_relocation_entry *relocs)
3599{ 3578{
3600 uint32_t reloc_count = 0, i; 3579 int i;
3601 int ret = 0;
3602
3603 if (relocs == NULL)
3604 return 0;
3605
3606 for (i = 0; i < buffer_count; i++) {
3607 struct drm_i915_gem_relocation_entry __user *user_relocs;
3608 int unwritten;
3609
3610 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3611 3580
3612 unwritten = copy_to_user(user_relocs, 3581 for (i = 0; i < count; i++) {
3613 &relocs[reloc_count], 3582 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
3614 exec_list[i].relocation_count * 3583 size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry);
3615 sizeof(*relocs));
3616 3584
3617 if (unwritten) { 3585 if (!access_ok(VERIFY_READ, ptr, length))
3618 ret = -EFAULT; 3586 return -EFAULT;
3619 goto err;
3620 }
3621 3587
3622 reloc_count += exec_list[i].relocation_count; 3588 if (fault_in_pages_readable(ptr, length))
3589 return -EFAULT;
3623 } 3590 }
3624 3591
3625err:
3626 drm_free_large(relocs);
3627
3628 return ret;
3629}
3630
3631static int
3632i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3633 uint64_t exec_offset)
3634{
3635 uint32_t exec_start, exec_len;
3636
3637 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3638 exec_len = (uint32_t) exec->batch_len;
3639
3640 if ((exec_start | exec_len) & 0x7)
3641 return -EINVAL;
3642
3643 if (!exec_start)
3644 return -EINVAL;
3645
3646 return 0; 3592 return 0;
3647} 3593}
3648 3594
@@ -3657,11 +3603,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3657 struct drm_gem_object *batch_obj; 3603 struct drm_gem_object *batch_obj;
3658 struct drm_i915_gem_object *obj_priv; 3604 struct drm_i915_gem_object *obj_priv;
3659 struct drm_clip_rect *cliprects = NULL; 3605 struct drm_clip_rect *cliprects = NULL;
3660 struct drm_i915_gem_relocation_entry *relocs = NULL;
3661 struct drm_i915_gem_request *request = NULL; 3606 struct drm_i915_gem_request *request = NULL;
3662 int ret, ret2, i, pinned = 0; 3607 int ret, i, pinned = 0;
3663 uint64_t exec_offset; 3608 uint64_t exec_offset;
3664 uint32_t reloc_index;
3665 int pin_tries, flips; 3609 int pin_tries, flips;
3666 3610
3667 struct intel_ring_buffer *ring = NULL; 3611 struct intel_ring_buffer *ring = NULL;
@@ -3670,6 +3614,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3670 if (ret) 3614 if (ret)
3671 return ret; 3615 return ret;
3672 3616
3617 ret = validate_exec_list(exec_list, args->buffer_count);
3618 if (ret)
3619 return ret;
3620
3673#if WATCH_EXEC 3621#if WATCH_EXEC
3674 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", 3622 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3675 (int) args->buffers_ptr, args->buffer_count, args->batch_len); 3623 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
@@ -3722,11 +3670,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3722 goto pre_mutex_err; 3670 goto pre_mutex_err;
3723 } 3671 }
3724 3672
3725 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3726 &relocs);
3727 if (ret != 0)
3728 goto pre_mutex_err;
3729
3730 ret = i915_mutex_lock_interruptible(dev); 3673 ret = i915_mutex_lock_interruptible(dev);
3731 if (ret) 3674 if (ret)
3732 goto pre_mutex_err; 3675 goto pre_mutex_err;
@@ -3765,19 +3708,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3765 /* Pin and relocate */ 3708 /* Pin and relocate */
3766 for (pin_tries = 0; ; pin_tries++) { 3709 for (pin_tries = 0; ; pin_tries++) {
3767 ret = 0; 3710 ret = 0;
3768 reloc_index = 0;
3769 3711
3770 for (i = 0; i < args->buffer_count; i++) { 3712 for (i = 0; i < args->buffer_count; i++) {
3771 object_list[i]->pending_read_domains = 0; 3713 object_list[i]->pending_read_domains = 0;
3772 object_list[i]->pending_write_domain = 0; 3714 object_list[i]->pending_write_domain = 0;
3773 ret = i915_gem_object_pin_and_relocate(object_list[i], 3715 ret = i915_gem_object_pin_and_relocate(object_list[i],
3774 file_priv, 3716 file_priv,
3775 &exec_list[i], 3717 &exec_list[i]);
3776 &relocs[reloc_index]);
3777 if (ret) 3718 if (ret)
3778 break; 3719 break;
3779 pinned = i + 1; 3720 pinned = i + 1;
3780 reloc_index += exec_list[i].relocation_count;
3781 } 3721 }
3782 /* success */ 3722 /* success */
3783 if (ret == 0) 3723 if (ret == 0)
@@ -3967,20 +3907,6 @@ err:
3967 mutex_unlock(&dev->struct_mutex); 3907 mutex_unlock(&dev->struct_mutex);
3968 3908
3969pre_mutex_err: 3909pre_mutex_err:
3970 /* Copy the updated relocations out regardless of current error
3971 * state. Failure to update the relocs would mean that the next
3972 * time userland calls execbuf, it would do so with presumed offset
3973 * state that didn't match the actual object state.
3974 */
3975 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3976 relocs);
3977 if (ret2 != 0) {
3978 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3979
3980 if (ret == 0)
3981 ret = ret2;
3982 }
3983
3984 drm_free_large(object_list); 3910 drm_free_large(object_list);
3985 kfree(cliprects); 3911 kfree(cliprects);
3986 kfree(request); 3912 kfree(request);