aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-07-26 06:49:32 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-07-26 06:56:25 -0400
commit42d6ab4839799b2f246748ce663d6b023f02bb73 (patch)
tree33c8f55e3a4dfd9f62df68894685af6c11921767
parentf27b92651d72e863c308ea5dca5615fc98e38ca6 (diff)
drm/i915: Segregate memory domains in the GTT using coloring
Several functions of the GPU have the restriction that differing memory domains cannot be placed next to each other (as the GPU may prefetch beyond the end of one domain and hang as it crosses into the other domain). We use the facility of the drm_mm to mark ranges with a particular color that corresponds to the cache attributes of those pages in order to prevent allocating adjacent blocks of differing memory types. v2: Rebase ontop of drm_mm coloring v2. v3: Fix rebinding existing gtt_space and add a verification routine. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c111
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c7
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c19
4 files changed, 128 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e6e63c1aee68..270b31cabc1a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -109,6 +109,7 @@ struct intel_pch_pll {
109 109
110#define WATCH_COHERENCY 0 110#define WATCH_COHERENCY 0
111#define WATCH_LISTS 0 111#define WATCH_LISTS 0
112#define WATCH_GTT 0
112 113
113#define I915_GEM_PHYS_CURSOR_0 1 114#define I915_GEM_PHYS_CURSOR_0 1
114#define I915_GEM_PHYS_CURSOR_1 2 115#define I915_GEM_PHYS_CURSOR_1 2
@@ -1406,7 +1407,9 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
1406 1407
1407/* i915_gem_evict.c */ 1408/* i915_gem_evict.c */
1408int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, 1409int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
1409 unsigned alignment, bool mappable); 1410 unsigned alignment,
1411 unsigned cache_level,
1412 bool mappable);
1410int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only); 1413int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only);
1411 1414
1412/* i915_gem_stolen.c */ 1415/* i915_gem_stolen.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b274810eaeab..19bdc245a87a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2586,6 +2586,76 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2586 return 0; 2586 return 0;
2587} 2587}
2588 2588
2589static bool i915_gem_valid_gtt_space(struct drm_device *dev,
2590 struct drm_mm_node *gtt_space,
2591 unsigned long cache_level)
2592{
2593 struct drm_mm_node *other;
2594
2595 /* On non-LLC machines we have to be careful when putting differing
2596 * types of snoopable memory together to avoid the prefetcher
2597 * crossing memory domains and dieing.
2598 */
2599 if (HAS_LLC(dev))
2600 return true;
2601
2602 if (gtt_space == NULL)
2603 return true;
2604
2605 if (list_empty(&gtt_space->node_list))
2606 return true;
2607
2608 other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2609 if (other->allocated && !other->hole_follows && other->color != cache_level)
2610 return false;
2611
2612 other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2613 if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2614 return false;
2615
2616 return true;
2617}
2618
2619static void i915_gem_verify_gtt(struct drm_device *dev)
2620{
2621#if WATCH_GTT
2622 struct drm_i915_private *dev_priv = dev->dev_private;
2623 struct drm_i915_gem_object *obj;
2624 int err = 0;
2625
2626 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
2627 if (obj->gtt_space == NULL) {
2628 printk(KERN_ERR "object found on GTT list with no space reserved\n");
2629 err++;
2630 continue;
2631 }
2632
2633 if (obj->cache_level != obj->gtt_space->color) {
2634 printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
2635 obj->gtt_space->start,
2636 obj->gtt_space->start + obj->gtt_space->size,
2637 obj->cache_level,
2638 obj->gtt_space->color);
2639 err++;
2640 continue;
2641 }
2642
2643 if (!i915_gem_valid_gtt_space(dev,
2644 obj->gtt_space,
2645 obj->cache_level)) {
2646 printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
2647 obj->gtt_space->start,
2648 obj->gtt_space->start + obj->gtt_space->size,
2649 obj->cache_level);
2650 err++;
2651 continue;
2652 }
2653 }
2654
2655 WARN_ON(err);
2656#endif
2657}
2658
2589/** 2659/**
2590 * Finds free space in the GTT aperture and binds the object there. 2660 * Finds free space in the GTT aperture and binds the object there.
2591 */ 2661 */
@@ -2640,36 +2710,47 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2640 search_free: 2710 search_free:
2641 if (map_and_fenceable) 2711 if (map_and_fenceable)
2642 free_space = 2712 free_space =
2643 drm_mm_search_free_in_range(&dev_priv->mm.gtt_space, 2713 drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
2644 size, alignment, 2714 size, alignment, obj->cache_level,
2645 0, dev_priv->mm.gtt_mappable_end, 2715 0, dev_priv->mm.gtt_mappable_end,
2646 0); 2716 false);
2647 else 2717 else
2648 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, 2718 free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
2649 size, alignment, 0); 2719 size, alignment, obj->cache_level,
2720 false);
2650 2721
2651 if (free_space != NULL) { 2722 if (free_space != NULL) {
2652 if (map_and_fenceable) 2723 if (map_and_fenceable)
2653 obj->gtt_space = 2724 obj->gtt_space =
2654 drm_mm_get_block_range_generic(free_space, 2725 drm_mm_get_block_range_generic(free_space,
2655 size, alignment, 0, 2726 size, alignment, obj->cache_level,
2656 0, dev_priv->mm.gtt_mappable_end, 2727 0, dev_priv->mm.gtt_mappable_end,
2657 0); 2728 false);
2658 else 2729 else
2659 obj->gtt_space = 2730 obj->gtt_space =
2660 drm_mm_get_block(free_space, size, alignment); 2731 drm_mm_get_block_generic(free_space,
2732 size, alignment, obj->cache_level,
2733 false);
2661 } 2734 }
2662 if (obj->gtt_space == NULL) { 2735 if (obj->gtt_space == NULL) {
2663 /* If the gtt is empty and we're still having trouble 2736 /* If the gtt is empty and we're still having trouble
2664 * fitting our object in, we're out of memory. 2737 * fitting our object in, we're out of memory.
2665 */ 2738 */
2666 ret = i915_gem_evict_something(dev, size, alignment, 2739 ret = i915_gem_evict_something(dev, size, alignment,
2740 obj->cache_level,
2667 map_and_fenceable); 2741 map_and_fenceable);
2668 if (ret) 2742 if (ret)
2669 return ret; 2743 return ret;
2670 2744
2671 goto search_free; 2745 goto search_free;
2672 } 2746 }
2747 if (WARN_ON(!i915_gem_valid_gtt_space(dev,
2748 obj->gtt_space,
2749 obj->cache_level))) {
2750 drm_mm_put_block(obj->gtt_space);
2751 obj->gtt_space = NULL;
2752 return -EINVAL;
2753 }
2673 2754
2674 ret = i915_gem_object_get_pages_gtt(obj, gfpmask); 2755 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2675 if (ret) { 2756 if (ret) {
@@ -2732,6 +2813,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2732 obj->map_and_fenceable = mappable && fenceable; 2813 obj->map_and_fenceable = mappable && fenceable;
2733 2814
2734 trace_i915_gem_object_bind(obj, map_and_fenceable); 2815 trace_i915_gem_object_bind(obj, map_and_fenceable);
2816 i915_gem_verify_gtt(dev);
2735 return 0; 2817 return 0;
2736} 2818}
2737 2819
@@ -2873,6 +2955,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2873 return -EBUSY; 2955 return -EBUSY;
2874 } 2956 }
2875 2957
2958 if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
2959 ret = i915_gem_object_unbind(obj);
2960 if (ret)
2961 return ret;
2962 }
2963
2876 if (obj->gtt_space) { 2964 if (obj->gtt_space) {
2877 ret = i915_gem_object_finish_gpu(obj); 2965 ret = i915_gem_object_finish_gpu(obj);
2878 if (ret) 2966 if (ret)
@@ -2884,7 +2972,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2884 * registers with snooped memory, so relinquish any fences 2972 * registers with snooped memory, so relinquish any fences
2885 * currently pointing to our region in the aperture. 2973 * currently pointing to our region in the aperture.
2886 */ 2974 */
2887 if (INTEL_INFO(obj->base.dev)->gen < 6) { 2975 if (INTEL_INFO(dev)->gen < 6) {
2888 ret = i915_gem_object_put_fence(obj); 2976 ret = i915_gem_object_put_fence(obj);
2889 if (ret) 2977 if (ret)
2890 return ret; 2978 return ret;
@@ -2895,6 +2983,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2895 if (obj->has_aliasing_ppgtt_mapping) 2983 if (obj->has_aliasing_ppgtt_mapping)
2896 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2984 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2897 obj, cache_level); 2985 obj, cache_level);
2986
2987 obj->gtt_space->color = cache_level;
2898 } 2988 }
2899 2989
2900 if (cache_level == I915_CACHE_NONE) { 2990 if (cache_level == I915_CACHE_NONE) {
@@ -2921,6 +3011,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2921 } 3011 }
2922 3012
2923 obj->cache_level = cache_level; 3013 obj->cache_level = cache_level;
3014 i915_gem_verify_gtt(dev);
2924 return 0; 3015 return 0;
2925} 3016}
2926 3017
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 51e547c4ed89..7279c31d4a9a 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -44,7 +44,8 @@ mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
44 44
45int 45int
46i915_gem_evict_something(struct drm_device *dev, int min_size, 46i915_gem_evict_something(struct drm_device *dev, int min_size,
47 unsigned alignment, bool mappable) 47 unsigned alignment, unsigned cache_level,
48 bool mappable)
48{ 49{
49 drm_i915_private_t *dev_priv = dev->dev_private; 50 drm_i915_private_t *dev_priv = dev->dev_private;
50 struct list_head eviction_list, unwind_list; 51 struct list_head eviction_list, unwind_list;
@@ -79,11 +80,11 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
79 INIT_LIST_HEAD(&unwind_list); 80 INIT_LIST_HEAD(&unwind_list);
80 if (mappable) 81 if (mappable)
81 drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space, 82 drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
82 min_size, alignment, 0, 83 min_size, alignment, cache_level,
83 0, dev_priv->mm.gtt_mappable_end); 84 0, dev_priv->mm.gtt_mappable_end);
84 else 85 else
85 drm_mm_init_scan(&dev_priv->mm.gtt_space, 86 drm_mm_init_scan(&dev_priv->mm.gtt_space,
86 min_size, alignment, 0); 87 min_size, alignment, cache_level);
87 88
88 /* First see if there is a large enough contiguous idle region... */ 89 /* First see if there is a large enough contiguous idle region... */
89 list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) { 90 list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 9fd25a435536..4584f7f0063e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -422,6 +422,23 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
422 undo_idling(dev_priv, interruptible); 422 undo_idling(dev_priv, interruptible);
423} 423}
424 424
425static void i915_gtt_color_adjust(struct drm_mm_node *node,
426 unsigned long color,
427 unsigned long *start,
428 unsigned long *end)
429{
430 if (node->color != color)
431 *start += 4096;
432
433 if (!list_empty(&node->node_list)) {
434 node = list_entry(node->node_list.next,
435 struct drm_mm_node,
436 node_list);
437 if (node->allocated && node->color != color)
438 *end -= 4096;
439 }
440}
441
425void i915_gem_init_global_gtt(struct drm_device *dev, 442void i915_gem_init_global_gtt(struct drm_device *dev,
426 unsigned long start, 443 unsigned long start,
427 unsigned long mappable_end, 444 unsigned long mappable_end,
@@ -431,6 +448,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
431 448
432 /* Substract the guard page ... */ 449 /* Substract the guard page ... */
433 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE); 450 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
451 if (!HAS_LLC(dev))
452 dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
434 453
435 dev_priv->mm.gtt_start = start; 454 dev_priv->mm.gtt_start = start;
436 dev_priv->mm.gtt_mappable_end = mappable_end; 455 dev_priv->mm.gtt_mappable_end = mappable_end;