aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-08-16 04:52:06 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2017-08-18 06:57:36 -0400
commitc7c6e46f913bb3a6ff19e64940ebb54652033677 (patch)
tree6da86f21d43db7debd99d0444551604dd4ad678e
parent8bcbfb12818f811d63801b71d25809690d1798fc (diff)
drm/i915: Convert execbuf to use struct-of-array packing for critical fields
When userspace is doing most of the work, avoiding relocs (using NO_RELOC) and opting out of implicit synchronisation (using ASYNC), we still spend a lot of time processing the arrays in execbuf, even though we now should have nothing to do most of the time. One issue that becomes readily apparent in profiling anv is that iterating over the large execobj[] is unfriendly to the loop prefetchers of the CPU and it much prefers iterating over a pair of arrays rather than one big array. v2: Clear vma[] on construction to handle errors during vma lookup Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20170816085210.4199-3-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c301
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h2
3 files changed, 156 insertions, 151 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a193f1b36c67..4df039ef2ce3 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -318,8 +318,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
318 /* Overlap of objects in the same batch? */ 318 /* Overlap of objects in the same batch? */
319 if (i915_vma_is_pinned(vma)) { 319 if (i915_vma_is_pinned(vma)) {
320 ret = -ENOSPC; 320 ret = -ENOSPC;
321 if (vma->exec_entry && 321 if (vma->exec_flags &&
322 vma->exec_entry->flags & EXEC_OBJECT_PINNED) 322 *vma->exec_flags & EXEC_OBJECT_PINNED)
323 ret = -EINVAL; 323 ret = -EINVAL;
324 break; 324 break;
325 } 325 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 044fb1205554..da6cb2fe5f85 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -192,6 +192,8 @@ struct i915_execbuffer {
192 struct drm_file *file; /** per-file lookup tables and limits */ 192 struct drm_file *file; /** per-file lookup tables and limits */
193 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ 193 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
194 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ 194 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
195 struct i915_vma **vma;
196 unsigned int *flags;
195 197
196 struct intel_engine_cs *engine; /** engine to queue the request to */ 198 struct intel_engine_cs *engine; /** engine to queue the request to */
197 struct i915_gem_context *ctx; /** context for building the request */ 199 struct i915_gem_context *ctx; /** context for building the request */
@@ -245,13 +247,7 @@ struct i915_execbuffer {
245 struct hlist_head *buckets; /** ht for relocation handles */ 247 struct hlist_head *buckets; /** ht for relocation handles */
246}; 248};
247 249
248/* 250#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
249 * As an alternative to creating a hashtable of handle-to-vma for a batch,
250 * we used the last available reserved field in the execobject[] and stash
251 * a link from the execobj to its vma.
252 */
253#define __exec_to_vma(ee) (ee)->rsvd2
254#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
255 251
256/* 252/*
257 * Used to convert any address to canonical form. 253 * Used to convert any address to canonical form.
@@ -320,85 +316,82 @@ static int eb_create(struct i915_execbuffer *eb)
320 316
321static bool 317static bool
322eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, 318eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
323 const struct i915_vma *vma) 319 const struct i915_vma *vma,
320 unsigned int flags)
324{ 321{
325 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
326 return true;
327
328 if (vma->node.size < entry->pad_to_size) 322 if (vma->node.size < entry->pad_to_size)
329 return true; 323 return true;
330 324
331 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) 325 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
332 return true; 326 return true;
333 327
334 if (entry->flags & EXEC_OBJECT_PINNED && 328 if (flags & EXEC_OBJECT_PINNED &&
335 vma->node.start != entry->offset) 329 vma->node.start != entry->offset)
336 return true; 330 return true;
337 331
338 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && 332 if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
339 vma->node.start < BATCH_OFFSET_BIAS) 333 vma->node.start < BATCH_OFFSET_BIAS)
340 return true; 334 return true;
341 335
342 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && 336 if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
343 (vma->node.start + vma->node.size - 1) >> 32) 337 (vma->node.start + vma->node.size - 1) >> 32)
344 return true; 338 return true;
345 339
346 return false; 340 return false;
347} 341}
348 342
349static inline void 343static inline bool
350eb_pin_vma(struct i915_execbuffer *eb, 344eb_pin_vma(struct i915_execbuffer *eb,
351 struct drm_i915_gem_exec_object2 *entry, 345 const struct drm_i915_gem_exec_object2 *entry,
352 struct i915_vma *vma) 346 struct i915_vma *vma)
353{ 347{
354 u64 flags; 348 unsigned int exec_flags = *vma->exec_flags;
349 u64 pin_flags;
355 350
356 if (vma->node.size) 351 if (vma->node.size)
357 flags = vma->node.start; 352 pin_flags = vma->node.start;
358 else 353 else
359 flags = entry->offset & PIN_OFFSET_MASK; 354 pin_flags = entry->offset & PIN_OFFSET_MASK;
360 355
361 flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED; 356 pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
362 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) 357 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
363 flags |= PIN_GLOBAL; 358 pin_flags |= PIN_GLOBAL;
364 359
365 if (unlikely(i915_vma_pin(vma, 0, 0, flags))) 360 if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
366 return; 361 return false;
367 362
368 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { 363 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
369 if (unlikely(i915_vma_get_fence(vma))) { 364 if (unlikely(i915_vma_get_fence(vma))) {
370 i915_vma_unpin(vma); 365 i915_vma_unpin(vma);
371 return; 366 return false;
372 } 367 }
373 368
374 if (i915_vma_pin_fence(vma)) 369 if (i915_vma_pin_fence(vma))
375 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 370 exec_flags |= __EXEC_OBJECT_HAS_FENCE;
376 } 371 }
377 372
378 entry->flags |= __EXEC_OBJECT_HAS_PIN; 373 *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
374 return !eb_vma_misplaced(entry, vma, exec_flags);
379} 375}
380 376
381static inline void 377static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
382__eb_unreserve_vma(struct i915_vma *vma,
383 const struct drm_i915_gem_exec_object2 *entry)
384{ 378{
385 GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN)); 379 GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
386 380
387 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) 381 if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
388 i915_vma_unpin_fence(vma); 382 i915_vma_unpin_fence(vma);
389 383
390 __i915_vma_unpin(vma); 384 __i915_vma_unpin(vma);
391} 385}
392 386
393static inline void 387static inline void
394eb_unreserve_vma(struct i915_vma *vma, 388eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
395 struct drm_i915_gem_exec_object2 *entry)
396{ 389{
397 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) 390 if (!(*flags & __EXEC_OBJECT_HAS_PIN))
398 return; 391 return;
399 392
400 __eb_unreserve_vma(vma, entry); 393 __eb_unreserve_vma(vma, *flags);
401 entry->flags &= ~__EXEC_OBJECT_RESERVED; 394 *flags &= ~__EXEC_OBJECT_RESERVED;
402} 395}
403 396
404static int 397static int
@@ -428,7 +421,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
428 entry->pad_to_size = 0; 421 entry->pad_to_size = 0;
429 } 422 }
430 423
431 if (unlikely(vma->exec_entry)) { 424 if (unlikely(vma->exec_flags)) {
432 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", 425 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
433 entry->handle, (int)(entry - eb->exec)); 426 entry->handle, (int)(entry - eb->exec));
434 return -EINVAL; 427 return -EINVAL;
@@ -441,14 +434,25 @@ eb_validate_vma(struct i915_execbuffer *eb,
441 */ 434 */
442 entry->offset = gen8_noncanonical_addr(entry->offset); 435 entry->offset = gen8_noncanonical_addr(entry->offset);
443 436
437 if (!eb->reloc_cache.has_fence) {
438 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
439 } else {
440 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
441 eb->reloc_cache.needs_unfenced) &&
442 i915_gem_object_is_tiled(vma->obj))
443 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
444 }
445
446 if (!(entry->flags & EXEC_OBJECT_PINNED))
447 entry->flags |= eb->context_flags;
448
444 return 0; 449 return 0;
445} 450}
446 451
447static int 452static int
448eb_add_vma(struct i915_execbuffer *eb, 453eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
449 struct drm_i915_gem_exec_object2 *entry,
450 struct i915_vma *vma)
451{ 454{
455 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
452 int err; 456 int err;
453 457
454 GEM_BUG_ON(i915_vma_is_closed(vma)); 458 GEM_BUG_ON(i915_vma_is_closed(vma));
@@ -469,40 +473,28 @@ eb_add_vma(struct i915_execbuffer *eb,
469 if (entry->relocation_count) 473 if (entry->relocation_count)
470 list_add_tail(&vma->reloc_link, &eb->relocs); 474 list_add_tail(&vma->reloc_link, &eb->relocs);
471 475
472 if (!eb->reloc_cache.has_fence) {
473 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
474 } else {
475 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
476 eb->reloc_cache.needs_unfenced) &&
477 i915_gem_object_is_tiled(vma->obj))
478 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
479 }
480
481 if (!(entry->flags & EXEC_OBJECT_PINNED))
482 entry->flags |= eb->context_flags;
483
484 /* 476 /*
485 * Stash a pointer from the vma to execobj, so we can query its flags, 477 * Stash a pointer from the vma to execobj, so we can query its flags,
486 * size, alignment etc as provided by the user. Also we stash a pointer 478 * size, alignment etc as provided by the user. Also we stash a pointer
487 * to the vma inside the execobj so that we can use a direct lookup 479 * to the vma inside the execobj so that we can use a direct lookup
488 * to find the right target VMA when doing relocations. 480 * to find the right target VMA when doing relocations.
489 */ 481 */
490 vma->exec_entry = entry; 482 eb->vma[i] = vma;
491 __exec_to_vma(entry) = (uintptr_t)vma; 483 eb->flags[i] = entry->flags;
484 vma->exec_flags = &eb->flags[i];
492 485
493 err = 0; 486 err = 0;
494 eb_pin_vma(eb, entry, vma); 487 if (eb_pin_vma(eb, entry, vma)) {
495 if (eb_vma_misplaced(entry, vma)) {
496 eb_unreserve_vma(vma, entry);
497
498 list_add_tail(&vma->exec_link, &eb->unbound);
499 if (drm_mm_node_allocated(&vma->node))
500 err = i915_vma_unbind(vma);
501 } else {
502 if (entry->offset != vma->node.start) { 488 if (entry->offset != vma->node.start) {
503 entry->offset = vma->node.start | UPDATE; 489 entry->offset = vma->node.start | UPDATE;
504 eb->args->flags |= __EXEC_HAS_RELOC; 490 eb->args->flags |= __EXEC_HAS_RELOC;
505 } 491 }
492 } else {
493 eb_unreserve_vma(vma, vma->exec_flags);
494
495 list_add_tail(&vma->exec_link, &eb->unbound);
496 if (drm_mm_node_allocated(&vma->node))
497 err = i915_vma_unbind(vma);
506 } 498 }
507 return err; 499 return err;
508} 500}
@@ -527,32 +519,35 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
527static int eb_reserve_vma(const struct i915_execbuffer *eb, 519static int eb_reserve_vma(const struct i915_execbuffer *eb,
528 struct i915_vma *vma) 520 struct i915_vma *vma)
529{ 521{
530 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 522 struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
531 u64 flags; 523 unsigned int exec_flags = *vma->exec_flags;
524 u64 pin_flags;
532 int err; 525 int err;
533 526
534 flags = PIN_USER | PIN_NONBLOCK; 527 pin_flags = PIN_USER | PIN_NONBLOCK;
535 if (entry->flags & EXEC_OBJECT_NEEDS_GTT) 528 if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
536 flags |= PIN_GLOBAL; 529 pin_flags |= PIN_GLOBAL;
537 530
538 /* 531 /*
539 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, 532 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
540 * limit address to the first 4GBs for unflagged objects. 533 * limit address to the first 4GBs for unflagged objects.
541 */ 534 */
542 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) 535 if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
543 flags |= PIN_ZONE_4G; 536 pin_flags |= PIN_ZONE_4G;
544 537
545 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) 538 if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
546 flags |= PIN_MAPPABLE; 539 pin_flags |= PIN_MAPPABLE;
547 540
548 if (entry->flags & EXEC_OBJECT_PINNED) { 541 if (exec_flags & EXEC_OBJECT_PINNED) {
549 flags |= entry->offset | PIN_OFFSET_FIXED; 542 pin_flags |= entry->offset | PIN_OFFSET_FIXED;
550 flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */ 543 pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
551 } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) { 544 } else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
552 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; 545 pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
553 } 546 }
554 547
555 err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); 548 err = i915_vma_pin(vma,
549 entry->pad_to_size, entry->alignment,
550 pin_flags);
556 if (err) 551 if (err)
557 return err; 552 return err;
558 553
@@ -561,7 +556,7 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
561 eb->args->flags |= __EXEC_HAS_RELOC; 556 eb->args->flags |= __EXEC_HAS_RELOC;
562 } 557 }
563 558
564 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { 559 if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
565 err = i915_vma_get_fence(vma); 560 err = i915_vma_get_fence(vma);
566 if (unlikely(err)) { 561 if (unlikely(err)) {
567 i915_vma_unpin(vma); 562 i915_vma_unpin(vma);
@@ -569,11 +564,11 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb,
569 } 564 }
570 565
571 if (i915_vma_pin_fence(vma)) 566 if (i915_vma_pin_fence(vma))
572 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 567 exec_flags |= __EXEC_OBJECT_HAS_FENCE;
573 } 568 }
574 569
575 entry->flags |= __EXEC_OBJECT_HAS_PIN; 570 *vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
576 GEM_BUG_ON(eb_vma_misplaced(entry, vma)); 571 GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
577 572
578 return 0; 573 return 0;
579} 574}
@@ -615,18 +610,18 @@ static int eb_reserve(struct i915_execbuffer *eb)
615 INIT_LIST_HEAD(&eb->unbound); 610 INIT_LIST_HEAD(&eb->unbound);
616 INIT_LIST_HEAD(&last); 611 INIT_LIST_HEAD(&last);
617 for (i = 0; i < count; i++) { 612 for (i = 0; i < count; i++) {
618 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 613 unsigned int flags = eb->flags[i];
614 struct i915_vma *vma = eb->vma[i];
619 615
620 if (entry->flags & EXEC_OBJECT_PINNED && 616 if (flags & EXEC_OBJECT_PINNED &&
621 entry->flags & __EXEC_OBJECT_HAS_PIN) 617 flags & __EXEC_OBJECT_HAS_PIN)
622 continue; 618 continue;
623 619
624 vma = exec_to_vma(entry); 620 eb_unreserve_vma(vma, &eb->flags[i]);
625 eb_unreserve_vma(vma, entry);
626 621
627 if (entry->flags & EXEC_OBJECT_PINNED) 622 if (flags & EXEC_OBJECT_PINNED)
628 list_add(&vma->exec_link, &eb->unbound); 623 list_add(&vma->exec_link, &eb->unbound);
629 else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) 624 else if (flags & __EXEC_OBJECT_NEEDS_MAP)
630 list_add_tail(&vma->exec_link, &eb->unbound); 625 list_add_tail(&vma->exec_link, &eb->unbound);
631 else 626 else
632 list_add_tail(&vma->exec_link, &last); 627 list_add_tail(&vma->exec_link, &last);
@@ -714,18 +709,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
714 GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); 709 GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
715 710
716 for (i = 0; i < count; i++) { 711 for (i = 0; i < count; i++) {
717 __exec_to_vma(&eb->exec[i]) = 0;
718
719 hlist_for_each_entry(vma, 712 hlist_for_each_entry(vma,
720 ht_head(lut, eb->exec[i].handle), 713 ht_head(lut, eb->exec[i].handle),
721 ctx_node) { 714 ctx_node) {
722 if (vma->ctx_handle != eb->exec[i].handle) 715 if (vma->ctx_handle != eb->exec[i].handle)
723 continue; 716 continue;
724 717
725 err = eb_add_vma(eb, &eb->exec[i], vma); 718 err = eb_add_vma(eb, i, vma);
726 if (unlikely(err)) 719 if (unlikely(err))
727 return err; 720 return err;
728
729 goto next_vma; 721 goto next_vma;
730 } 722 }
731 723
@@ -746,7 +738,7 @@ next_vma: ;
746 for (i = slow_pass; i < count; i++) { 738 for (i = slow_pass; i < count; i++) {
747 struct drm_i915_gem_object *obj; 739 struct drm_i915_gem_object *obj;
748 740
749 if (__exec_to_vma(&eb->exec[i])) 741 if (eb->vma[i])
750 continue; 742 continue;
751 743
752 obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); 744 obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
@@ -758,14 +750,17 @@ next_vma: ;
758 goto err; 750 goto err;
759 } 751 }
760 752
761 __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; 753 eb->vma[i] = (struct i915_vma *)
754 ptr_pack_bits(obj, INTERMEDIATE, 1);
762 } 755 }
763 spin_unlock(&eb->file->table_lock); 756 spin_unlock(&eb->file->table_lock);
764 757
765 for (i = slow_pass; i < count; i++) { 758 for (i = slow_pass; i < count; i++) {
766 struct drm_i915_gem_object *obj; 759 struct drm_i915_gem_object *obj;
760 unsigned int is_obj;
767 761
768 if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)) 762 obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
763 if (!is_obj)
769 continue; 764 continue;
770 765
771 /* 766 /*
@@ -776,8 +771,6 @@ next_vma: ;
776 * from the (obj, vm) we don't run the risk of creating 771 * from the (obj, vm) we don't run the risk of creating
777 * duplicated vmas for the same vm. 772 * duplicated vmas for the same vm.
778 */ 773 */
779 obj = u64_to_ptr(typeof(*obj),
780 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
781 vma = i915_vma_instance(obj, eb->vm, NULL); 774 vma = i915_vma_instance(obj, eb->vm, NULL);
782 if (unlikely(IS_ERR(vma))) { 775 if (unlikely(IS_ERR(vma))) {
783 DRM_DEBUG("Failed to lookup VMA\n"); 776 DRM_DEBUG("Failed to lookup VMA\n");
@@ -801,14 +794,17 @@ next_vma: ;
801 i915_vma_get(vma); 794 i915_vma_get(vma);
802 } 795 }
803 796
804 err = eb_add_vma(eb, &eb->exec[i], vma); 797 err = eb_add_vma(eb, i, vma);
805 if (unlikely(err)) 798 if (unlikely(err))
806 goto err; 799 goto err;
807 800
801 GEM_BUG_ON(vma != eb->vma[i]);
802 GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
803
808 /* Only after we validated the user didn't use our bits */ 804 /* Only after we validated the user didn't use our bits */
809 if (vma->ctx != eb->ctx) { 805 if (vma->ctx != eb->ctx) {
810 i915_vma_get(vma); 806 i915_vma_get(vma);
811 eb->exec[i].flags |= __EXEC_OBJECT_HAS_REF; 807 *vma->exec_flags |= __EXEC_OBJECT_HAS_REF;
812 } 808 }
813 } 809 }
814 810
@@ -822,7 +818,8 @@ next_vma: ;
822out: 818out:
823 /* take note of the batch buffer before we might reorder the lists */ 819 /* take note of the batch buffer before we might reorder the lists */
824 i = eb_batch_index(eb); 820 i = eb_batch_index(eb);
825 eb->batch = exec_to_vma(&eb->exec[i]); 821 eb->batch = eb->vma[i];
822 GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
826 823
827 /* 824 /*
828 * SNA is doing fancy tricks with compressing batch buffers, which leads 825 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -833,18 +830,18 @@ out:
833 * Note that actual hangs have only been observed on gen7, but for 830 * Note that actual hangs have only been observed on gen7, but for
834 * paranoia do it everywhere. 831 * paranoia do it everywhere.
835 */ 832 */
836 if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED)) 833 if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
837 eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; 834 eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
838 if (eb->reloc_cache.has_fence) 835 if (eb->reloc_cache.has_fence)
839 eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; 836 eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
840 837
841 eb->args->flags |= __EXEC_VALIDATED; 838 eb->args->flags |= __EXEC_VALIDATED;
842 return eb_reserve(eb); 839 return eb_reserve(eb);
843 840
844err: 841err:
845 for (i = slow_pass; i < count; i++) { 842 for (i = slow_pass; i < count; i++) {
846 if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) 843 if (ptr_unmask_bits(eb->vma[i], 1))
847 __exec_to_vma(&eb->exec[i]) = 0; 844 eb->vma[i] = NULL;
848 } 845 }
849 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; 846 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
850 return err; 847 return err;
@@ -857,7 +854,7 @@ eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
857 if (eb->lut_size < 0) { 854 if (eb->lut_size < 0) {
858 if (handle >= -eb->lut_size) 855 if (handle >= -eb->lut_size)
859 return NULL; 856 return NULL;
860 return exec_to_vma(&eb->exec[handle]); 857 return eb->vma[handle];
861 } else { 858 } else {
862 struct hlist_head *head; 859 struct hlist_head *head;
863 struct i915_vma *vma; 860 struct i915_vma *vma;
@@ -877,24 +874,21 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
877 unsigned int i; 874 unsigned int i;
878 875
879 for (i = 0; i < count; i++) { 876 for (i = 0; i < count; i++) {
880 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 877 struct i915_vma *vma = eb->vma[i];
881 struct i915_vma *vma = exec_to_vma(entry); 878 unsigned int flags = eb->flags[i];
882 879
883 if (!vma) 880 if (!vma)
884 continue; 881 continue;
885 882
886 GEM_BUG_ON(vma->exec_entry != entry); 883 GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
887 vma->exec_entry = NULL; 884 vma->exec_flags = NULL;
888 __exec_to_vma(entry) = 0; 885 eb->vma[i] = NULL;
889 886
890 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 887 if (flags & __EXEC_OBJECT_HAS_PIN)
891 __eb_unreserve_vma(vma, entry); 888 __eb_unreserve_vma(vma, flags);
892 889
893 if (entry->flags & __EXEC_OBJECT_HAS_REF) 890 if (flags & __EXEC_OBJECT_HAS_REF)
894 i915_vma_put(vma); 891 i915_vma_put(vma);
895
896 entry->flags &=
897 ~(__EXEC_OBJECT_RESERVED | __EXEC_OBJECT_HAS_REF);
898 } 892 }
899} 893}
900 894
@@ -1383,7 +1377,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
1383 } 1377 }
1384 1378
1385 if (reloc->write_domain) { 1379 if (reloc->write_domain) {
1386 target->exec_entry->flags |= EXEC_OBJECT_WRITE; 1380 *target->exec_flags |= EXEC_OBJECT_WRITE;
1387 1381
1388 /* 1382 /*
1389 * Sandybridge PPGTT errata: We need a global gtt mapping 1383 * Sandybridge PPGTT errata: We need a global gtt mapping
@@ -1435,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
1435 * do relocations we are already stalling, disable the user's opt 1429 * do relocations we are already stalling, disable the user's opt
1436 * of our synchronisation. 1430 * of our synchronisation.
1437 */ 1431 */
1438 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; 1432 *vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1439 1433
1440 /* and update the user's relocation entry */ 1434 /* and update the user's relocation entry */
1441 return relocate_entry(vma, reloc, eb, target); 1435 return relocate_entry(vma, reloc, eb, target);
@@ -1446,7 +1440,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1446#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1440#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1447 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; 1441 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1448 struct drm_i915_gem_relocation_entry __user *urelocs; 1442 struct drm_i915_gem_relocation_entry __user *urelocs;
1449 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1443 const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1450 unsigned int remain; 1444 unsigned int remain;
1451 1445
1452 urelocs = u64_to_user_ptr(entry->relocs_ptr); 1446 urelocs = u64_to_user_ptr(entry->relocs_ptr);
@@ -1529,7 +1523,7 @@ out:
1529static int 1523static int
1530eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) 1524eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
1531{ 1525{
1532 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1526 const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1533 struct drm_i915_gem_relocation_entry *relocs = 1527 struct drm_i915_gem_relocation_entry *relocs =
1534 u64_to_ptr(typeof(*relocs), entry->relocs_ptr); 1528 u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1535 unsigned int i; 1529 unsigned int i;
@@ -1733,6 +1727,8 @@ repeat:
1733 if (err) 1727 if (err)
1734 goto err; 1728 goto err;
1735 1729
1730 GEM_BUG_ON(!eb->batch);
1731
1736 list_for_each_entry(vma, &eb->relocs, reloc_link) { 1732 list_for_each_entry(vma, &eb->relocs, reloc_link) {
1737 if (!have_copy) { 1733 if (!have_copy) {
1738 pagefault_disable(); 1734 pagefault_disable();
@@ -1826,11 +1822,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
1826 int err; 1822 int err;
1827 1823
1828 for (i = 0; i < count; i++) { 1824 for (i = 0; i < count; i++) {
1829 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 1825 unsigned int flags = eb->flags[i];
1830 struct i915_vma *vma = exec_to_vma(entry); 1826 struct i915_vma *vma = eb->vma[i];
1831 struct drm_i915_gem_object *obj = vma->obj; 1827 struct drm_i915_gem_object *obj = vma->obj;
1832 1828
1833 if (entry->flags & EXEC_OBJECT_CAPTURE) { 1829 if (flags & EXEC_OBJECT_CAPTURE) {
1834 struct i915_gem_capture_list *capture; 1830 struct i915_gem_capture_list *capture;
1835 1831
1836 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 1832 capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1838,7 +1834,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
1838 return -ENOMEM; 1834 return -ENOMEM;
1839 1835
1840 capture->next = eb->request->capture_list; 1836 capture->next = eb->request->capture_list;
1841 capture->vma = vma; 1837 capture->vma = eb->vma[i];
1842 eb->request->capture_list = capture; 1838 eb->request->capture_list = capture;
1843 } 1839 }
1844 1840
@@ -1856,29 +1852,29 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
1856 */ 1852 */
1857 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) { 1853 if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1858 if (i915_gem_clflush_object(obj, 0)) 1854 if (i915_gem_clflush_object(obj, 0))
1859 entry->flags &= ~EXEC_OBJECT_ASYNC; 1855 flags &= ~EXEC_OBJECT_ASYNC;
1860 } 1856 }
1861 1857
1862 if (entry->flags & EXEC_OBJECT_ASYNC) 1858 if (flags & EXEC_OBJECT_ASYNC)
1863 goto skip_flushes; 1859 continue;
1864 1860
1865 err = i915_gem_request_await_object 1861 err = i915_gem_request_await_object
1866 (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); 1862 (eb->request, obj, flags & EXEC_OBJECT_WRITE);
1867 if (err) 1863 if (err)
1868 return err; 1864 return err;
1869
1870skip_flushes:
1871 i915_vma_move_to_active(vma, eb->request, entry->flags);
1872 __eb_unreserve_vma(vma, entry);
1873 vma->exec_entry = NULL;
1874 } 1865 }
1875 1866
1876 for (i = 0; i < count; i++) { 1867 for (i = 0; i < count; i++) {
1877 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 1868 unsigned int flags = eb->flags[i];
1878 struct i915_vma *vma = exec_to_vma(entry); 1869 struct i915_vma *vma = eb->vma[i];
1870
1871 i915_vma_move_to_active(vma, eb->request, flags);
1872 eb_export_fence(vma, eb->request, flags);
1879 1873
1880 eb_export_fence(vma, eb->request, entry->flags); 1874 __eb_unreserve_vma(vma, flags);
1881 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) 1875 vma->exec_flags = NULL;
1876
1877 if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1882 i915_vma_put(vma); 1878 i915_vma_put(vma);
1883 } 1879 }
1884 eb->exec = NULL; 1880 eb->exec = NULL;
@@ -2007,11 +2003,11 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
2007 if (IS_ERR(vma)) 2003 if (IS_ERR(vma))
2008 goto out; 2004 goto out;
2009 2005
2010 vma->exec_entry = 2006 eb->vma[eb->buffer_count] = i915_vma_get(vma);
2011 memset(&eb->exec[eb->buffer_count++], 2007 eb->flags[eb->buffer_count] =
2012 0, sizeof(*vma->exec_entry)); 2008 __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
2013 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; 2009 vma->exec_flags = &eb->flags[eb->buffer_count];
2014 __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); 2010 eb->buffer_count++;
2015 2011
2016out: 2012out:
2017 i915_gem_object_unpin_pages(shadow_batch_obj); 2013 i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -2270,7 +2266,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
2270 eb.args = args; 2266 eb.args = args;
2271 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) 2267 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2272 args->flags |= __EXEC_HAS_RELOC; 2268 args->flags |= __EXEC_HAS_RELOC;
2269
2273 eb.exec = exec; 2270 eb.exec = exec;
2271 eb.vma = memset(exec + args->buffer_count + 1, 0,
2272 (args->buffer_count + 1) * sizeof(*eb.vma));
2273 eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2274
2274 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; 2275 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2275 if (USES_FULL_PPGTT(eb.i915)) 2276 if (USES_FULL_PPGTT(eb.i915))
2276 eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; 2277 eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
@@ -2358,7 +2359,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
2358 goto err_vma; 2359 goto err_vma;
2359 } 2360 }
2360 2361
2361 if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) { 2362 if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2362 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 2363 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2363 err = -EINVAL; 2364 err = -EINVAL;
2364 goto err_vma; 2365 goto err_vma;
@@ -2511,7 +2512,9 @@ int
2511i915_gem_execbuffer(struct drm_device *dev, void *data, 2512i915_gem_execbuffer(struct drm_device *dev, void *data,
2512 struct drm_file *file) 2513 struct drm_file *file)
2513{ 2514{
2514 const size_t sz = sizeof(struct drm_i915_gem_exec_object2); 2515 const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
2516 sizeof(struct i915_vma *) +
2517 sizeof(unsigned int));
2515 struct drm_i915_gem_execbuffer *args = data; 2518 struct drm_i915_gem_execbuffer *args = data;
2516 struct drm_i915_gem_execbuffer2 exec2; 2519 struct drm_i915_gem_execbuffer2 exec2;
2517 struct drm_i915_gem_exec_object *exec_list = NULL; 2520 struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -2602,7 +2605,9 @@ int
2602i915_gem_execbuffer2(struct drm_device *dev, void *data, 2605i915_gem_execbuffer2(struct drm_device *dev, void *data,
2603 struct drm_file *file) 2606 struct drm_file *file)
2604{ 2607{
2605 const size_t sz = sizeof(struct drm_i915_gem_exec_object2); 2608 const size_t sz = (sizeof(struct drm_i915_gem_exec_object2) +
2609 sizeof(struct i915_vma *) +
2610 sizeof(unsigned int));
2606 struct drm_i915_gem_execbuffer2 *args = data; 2611 struct drm_i915_gem_execbuffer2 *args = data;
2607 struct drm_i915_gem_exec_object2 *exec2_list; 2612 struct drm_i915_gem_exec_object2 *exec2_list;
2608 struct drm_syncobj **fences = NULL; 2613 struct drm_syncobj **fences = NULL;
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 20cf272c97b1..5c49506d14bc 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -112,7 +112,7 @@ struct i915_vma {
112 /** 112 /**
113 * Used for performing relocations during execbuffer insertion. 113 * Used for performing relocations during execbuffer insertion.
114 */ 114 */
115 struct drm_i915_gem_exec_object2 *exec_entry; 115 unsigned int *exec_flags;
116 struct hlist_node exec_node; 116 struct hlist_node exec_node;
117 u32 exec_handle; 117 u32 exec_handle;
118 118