diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2013-01-08 05:53:17 -0500 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-01-17 16:23:47 -0500 |
commit | eef90ccb8a4d50b219a95cc53878ebb007315b32 (patch) | |
tree | e10ef8121e9b6f0c061df75967c7af77a942feb3 | |
parent | ed5982e6ce5f106abcbf071f80730db344a6da42 (diff) |
drm/i915: Use the reloc.handle as an index into the execbuffer array
Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:
c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.
(measured relative to a baseline with neither optimisations applied).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r-- | drivers/gpu/drm/i915/i915_dma.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 100 | ||||
-rw-r--r-- | include/uapi/drm/i915_drm.h | 8 |
3 files changed, 71 insertions, 40 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a6e047d533ec..442118293883 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c | |||
@@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data, | |||
995 | case I915_PARAM_HAS_EXEC_NO_RELOC: | 995 | case I915_PARAM_HAS_EXEC_NO_RELOC: |
996 | value = 1; | 996 | value = 1; |
997 | break; | 997 | break; |
998 | case I915_PARAM_HAS_EXEC_HANDLE_LUT: | ||
999 | value = 1; | ||
1000 | break; | ||
998 | default: | 1001 | default: |
999 | DRM_DEBUG_DRIVER("Unknown parameter %d\n", | 1002 | DRM_DEBUG_DRIVER("Unknown parameter %d\n", |
1000 | param->param); | 1003 | param->param); |
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 34f6cdffa9f8..f5a11ecf5494 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -36,24 +36,40 @@ | |||
36 | struct eb_objects { | 36 | struct eb_objects { |
37 | struct list_head objects; | 37 | struct list_head objects; |
38 | int and; | 38 | int and; |
39 | struct hlist_head buckets[0]; | 39 | union { |
40 | struct drm_i915_gem_object *lut[0]; | ||
41 | struct hlist_head buckets[0]; | ||
42 | }; | ||
40 | }; | 43 | }; |
41 | 44 | ||
42 | static struct eb_objects * | 45 | static struct eb_objects * |
43 | eb_create(int size) | 46 | eb_create(struct drm_i915_gem_execbuffer2 *args) |
44 | { | 47 | { |
45 | struct eb_objects *eb; | 48 | struct eb_objects *eb = NULL; |
46 | int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; | 49 | |
47 | BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); | 50 | if (args->flags & I915_EXEC_HANDLE_LUT) { |
48 | while (count > size) | 51 | int size = args->buffer_count; |
49 | count >>= 1; | 52 | size *= sizeof(struct drm_i915_gem_object *); |
50 | eb = kzalloc(count*sizeof(struct hlist_head) + | 53 | size += sizeof(struct eb_objects); |
51 | sizeof(struct eb_objects), | 54 | eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); |
52 | GFP_KERNEL); | 55 | } |
53 | if (eb == NULL) | 56 | |
54 | return eb; | 57 | if (eb == NULL) { |
55 | 58 | int size = args->buffer_count; | |
56 | eb->and = count - 1; | 59 | int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; |
60 | BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); | ||
61 | while (count > 2*size) | ||
62 | count >>= 1; | ||
63 | eb = kzalloc(count*sizeof(struct hlist_head) + | ||
64 | sizeof(struct eb_objects), | ||
65 | GFP_TEMPORARY); | ||
66 | if (eb == NULL) | ||
67 | return eb; | ||
68 | |||
69 | eb->and = count - 1; | ||
70 | } else | ||
71 | eb->and = -args->buffer_count; | ||
72 | |||
57 | INIT_LIST_HEAD(&eb->objects); | 73 | INIT_LIST_HEAD(&eb->objects); |
58 | return eb; | 74 | return eb; |
59 | } | 75 | } |
@@ -61,26 +77,20 @@ eb_create(int size) | |||
61 | static void | 77 | static void |
62 | eb_reset(struct eb_objects *eb) | 78 | eb_reset(struct eb_objects *eb) |
63 | { | 79 | { |
64 | memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); | 80 | if (eb->and >= 0) |
65 | } | 81 | memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); |
66 | |||
67 | static void | ||
68 | eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) | ||
69 | { | ||
70 | hlist_add_head(&obj->exec_node, | ||
71 | &eb->buckets[obj->exec_handle & eb->and]); | ||
72 | } | 82 | } |
73 | 83 | ||
74 | static int | 84 | static int |
75 | eb_lookup_objects(struct eb_objects *eb, | 85 | eb_lookup_objects(struct eb_objects *eb, |
76 | struct drm_i915_gem_exec_object2 *exec, | 86 | struct drm_i915_gem_exec_object2 *exec, |
77 | int count, | 87 | const struct drm_i915_gem_execbuffer2 *args, |
78 | struct drm_file *file) | 88 | struct drm_file *file) |
79 | { | 89 | { |
80 | int i; | 90 | int i; |
81 | 91 | ||
82 | spin_lock(&file->table_lock); | 92 | spin_lock(&file->table_lock); |
83 | for (i = 0; i < count; i++) { | 93 | for (i = 0; i < args->buffer_count; i++) { |
84 | struct drm_i915_gem_object *obj; | 94 | struct drm_i915_gem_object *obj; |
85 | 95 | ||
86 | obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); | 96 | obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); |
@@ -101,9 +111,15 @@ eb_lookup_objects(struct eb_objects *eb, | |||
101 | drm_gem_object_reference(&obj->base); | 111 | drm_gem_object_reference(&obj->base); |
102 | list_add_tail(&obj->exec_list, &eb->objects); | 112 | list_add_tail(&obj->exec_list, &eb->objects); |
103 | 113 | ||
104 | obj->exec_handle = exec[i].handle; | ||
105 | obj->exec_entry = &exec[i]; | 114 | obj->exec_entry = &exec[i]; |
106 | eb_add_object(eb, obj); | 115 | if (eb->and < 0) { |
116 | eb->lut[i] = obj; | ||
117 | } else { | ||
118 | uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; | ||
119 | obj->exec_handle = handle; | ||
120 | hlist_add_head(&obj->exec_node, | ||
121 | &eb->buckets[handle & eb->and]); | ||
122 | } | ||
107 | } | 123 | } |
108 | spin_unlock(&file->table_lock); | 124 | spin_unlock(&file->table_lock); |
109 | 125 | ||
@@ -113,18 +129,24 @@ eb_lookup_objects(struct eb_objects *eb, | |||
113 | static struct drm_i915_gem_object * | 129 | static struct drm_i915_gem_object * |
114 | eb_get_object(struct eb_objects *eb, unsigned long handle) | 130 | eb_get_object(struct eb_objects *eb, unsigned long handle) |
115 | { | 131 | { |
116 | struct hlist_head *head; | 132 | if (eb->and < 0) { |
117 | struct hlist_node *node; | 133 | if (handle >= -eb->and) |
118 | struct drm_i915_gem_object *obj; | 134 | return NULL; |
135 | return eb->lut[handle]; | ||
136 | } else { | ||
137 | struct hlist_head *head; | ||
138 | struct hlist_node *node; | ||
119 | 139 | ||
120 | head = &eb->buckets[handle & eb->and]; | 140 | head = &eb->buckets[handle & eb->and]; |
121 | hlist_for_each(node, head) { | 141 | hlist_for_each(node, head) { |
122 | obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); | 142 | struct drm_i915_gem_object *obj; |
123 | if (obj->exec_handle == handle) | ||
124 | return obj; | ||
125 | } | ||
126 | 143 | ||
127 | return NULL; | 144 | obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); |
145 | if (obj->exec_handle == handle) | ||
146 | return obj; | ||
147 | } | ||
148 | return NULL; | ||
149 | } | ||
128 | } | 150 | } |
129 | 151 | ||
130 | static void | 152 | static void |
@@ -615,7 +637,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | |||
615 | 637 | ||
616 | /* reacquire the objects */ | 638 | /* reacquire the objects */ |
617 | eb_reset(eb); | 639 | eb_reset(eb); |
618 | ret = eb_lookup_objects(eb, exec, count, file); | 640 | ret = eb_lookup_objects(eb, exec, args, file); |
619 | if (ret) | 641 | if (ret) |
620 | goto err; | 642 | goto err; |
621 | 643 | ||
@@ -919,7 +941,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, | |||
919 | goto pre_mutex_err; | 941 | goto pre_mutex_err; |
920 | } | 942 | } |
921 | 943 | ||
922 | eb = eb_create(args->buffer_count); | 944 | eb = eb_create(args); |
923 | if (eb == NULL) { | 945 | if (eb == NULL) { |
924 | mutex_unlock(&dev->struct_mutex); | 946 | mutex_unlock(&dev->struct_mutex); |
925 | ret = -ENOMEM; | 947 | ret = -ENOMEM; |
@@ -927,7 +949,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, | |||
927 | } | 949 | } |
928 | 950 | ||
929 | /* Look up object handles */ | 951 | /* Look up object handles */ |
930 | ret = eb_lookup_objects(eb, exec, args->buffer_count, file); | 952 | ret = eb_lookup_objects(eb, exec, args, file); |
931 | if (ret) | 953 | if (ret) |
932 | goto err; | 954 | goto err; |
933 | 955 | ||
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2430b6ad6a85..07d59419fe6b 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h | |||
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait { | |||
309 | #define I915_PARAM_HAS_SECURE_BATCHES 23 | 309 | #define I915_PARAM_HAS_SECURE_BATCHES 23 |
310 | #define I915_PARAM_HAS_PINNED_BATCHES 24 | 310 | #define I915_PARAM_HAS_PINNED_BATCHES 24 |
311 | #define I915_PARAM_HAS_EXEC_NO_RELOC 25 | 311 | #define I915_PARAM_HAS_EXEC_NO_RELOC 25 |
312 | #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 | ||
312 | 313 | ||
313 | typedef struct drm_i915_getparam { | 314 | typedef struct drm_i915_getparam { |
314 | int param; | 315 | int param; |
@@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 { | |||
699 | */ | 700 | */ |
700 | #define I915_EXEC_NO_RELOC (1<<11) | 701 | #define I915_EXEC_NO_RELOC (1<<11) |
701 | 702 | ||
702 | #define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) | 703 | /** Use the reloc.handle as an index into the exec object array rather |
704 | * than as the per-file handle. | ||
705 | */ | ||
706 | #define I915_EXEC_HANDLE_LUT (1<<12) | ||
707 | |||
708 | #define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) | ||
703 | 709 | ||
704 | #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) | 710 | #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) |
705 | #define i915_execbuffer2_set_context_id(eb2, context) \ | 711 | #define i915_execbuffer2_set_context_id(eb2, context) \ |