aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-08 05:53:17 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-01-17 16:23:47 -0500
commiteef90ccb8a4d50b219a95cc53878ebb007315b32 (patch)
treee10ef8121e9b6f0c061df75967c7af77a942feb3
parented5982e6ce5f106abcbf071f80730db344a6da42 (diff)
drm/i915: Use the reloc.handle as an index into the execbuffer array
Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c100
-rw-r--r--include/uapi/drm/i915_drm.h8
3 files changed, 71 insertions, 40 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a6e047d533ec..442118293883 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
995 case I915_PARAM_HAS_EXEC_NO_RELOC: 995 case I915_PARAM_HAS_EXEC_NO_RELOC:
996 value = 1; 996 value = 1;
997 break; 997 break;
998 case I915_PARAM_HAS_EXEC_HANDLE_LUT:
999 value = 1;
1000 break;
998 default: 1001 default:
999 DRM_DEBUG_DRIVER("Unknown parameter %d\n", 1002 DRM_DEBUG_DRIVER("Unknown parameter %d\n",
1000 param->param); 1003 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 34f6cdffa9f8..f5a11ecf5494 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -36,24 +36,40 @@
36struct eb_objects { 36struct eb_objects {
37 struct list_head objects; 37 struct list_head objects;
38 int and; 38 int and;
39 struct hlist_head buckets[0]; 39 union {
40 struct drm_i915_gem_object *lut[0];
41 struct hlist_head buckets[0];
42 };
40}; 43};
41 44
42static struct eb_objects * 45static struct eb_objects *
43eb_create(int size) 46eb_create(struct drm_i915_gem_execbuffer2 *args)
44{ 47{
45 struct eb_objects *eb; 48 struct eb_objects *eb = NULL;
46 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; 49
47 BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head))); 50 if (args->flags & I915_EXEC_HANDLE_LUT) {
48 while (count > size) 51 int size = args->buffer_count;
49 count >>= 1; 52 size *= sizeof(struct drm_i915_gem_object *);
50 eb = kzalloc(count*sizeof(struct hlist_head) + 53 size += sizeof(struct eb_objects);
51 sizeof(struct eb_objects), 54 eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
52 GFP_KERNEL); 55 }
53 if (eb == NULL) 56
54 return eb; 57 if (eb == NULL) {
55 58 int size = args->buffer_count;
56 eb->and = count - 1; 59 int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
60 BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
61 while (count > 2*size)
62 count >>= 1;
63 eb = kzalloc(count*sizeof(struct hlist_head) +
64 sizeof(struct eb_objects),
65 GFP_TEMPORARY);
66 if (eb == NULL)
67 return eb;
68
69 eb->and = count - 1;
70 } else
71 eb->and = -args->buffer_count;
72
57 INIT_LIST_HEAD(&eb->objects); 73 INIT_LIST_HEAD(&eb->objects);
58 return eb; 74 return eb;
59} 75}
@@ -61,26 +77,20 @@ eb_create(int size)
61static void 77static void
62eb_reset(struct eb_objects *eb) 78eb_reset(struct eb_objects *eb)
63{ 79{
64 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); 80 if (eb->and >= 0)
65} 81 memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
66
67static void
68eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
69{
70 hlist_add_head(&obj->exec_node,
71 &eb->buckets[obj->exec_handle & eb->and]);
72} 82}
73 83
74static int 84static int
75eb_lookup_objects(struct eb_objects *eb, 85eb_lookup_objects(struct eb_objects *eb,
76 struct drm_i915_gem_exec_object2 *exec, 86 struct drm_i915_gem_exec_object2 *exec,
77 int count, 87 const struct drm_i915_gem_execbuffer2 *args,
78 struct drm_file *file) 88 struct drm_file *file)
79{ 89{
80 int i; 90 int i;
81 91
82 spin_lock(&file->table_lock); 92 spin_lock(&file->table_lock);
83 for (i = 0; i < count; i++) { 93 for (i = 0; i < args->buffer_count; i++) {
84 struct drm_i915_gem_object *obj; 94 struct drm_i915_gem_object *obj;
85 95
86 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); 96 obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
@@ -101,9 +111,15 @@ eb_lookup_objects(struct eb_objects *eb,
101 drm_gem_object_reference(&obj->base); 111 drm_gem_object_reference(&obj->base);
102 list_add_tail(&obj->exec_list, &eb->objects); 112 list_add_tail(&obj->exec_list, &eb->objects);
103 113
104 obj->exec_handle = exec[i].handle;
105 obj->exec_entry = &exec[i]; 114 obj->exec_entry = &exec[i];
106 eb_add_object(eb, obj); 115 if (eb->and < 0) {
116 eb->lut[i] = obj;
117 } else {
118 uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
119 obj->exec_handle = handle;
120 hlist_add_head(&obj->exec_node,
121 &eb->buckets[handle & eb->and]);
122 }
107 } 123 }
108 spin_unlock(&file->table_lock); 124 spin_unlock(&file->table_lock);
109 125
@@ -113,18 +129,24 @@ eb_lookup_objects(struct eb_objects *eb,
113static struct drm_i915_gem_object * 129static struct drm_i915_gem_object *
114eb_get_object(struct eb_objects *eb, unsigned long handle) 130eb_get_object(struct eb_objects *eb, unsigned long handle)
115{ 131{
116 struct hlist_head *head; 132 if (eb->and < 0) {
117 struct hlist_node *node; 133 if (handle >= -eb->and)
118 struct drm_i915_gem_object *obj; 134 return NULL;
135 return eb->lut[handle];
136 } else {
137 struct hlist_head *head;
138 struct hlist_node *node;
119 139
120 head = &eb->buckets[handle & eb->and]; 140 head = &eb->buckets[handle & eb->and];
121 hlist_for_each(node, head) { 141 hlist_for_each(node, head) {
122 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); 142 struct drm_i915_gem_object *obj;
123 if (obj->exec_handle == handle)
124 return obj;
125 }
126 143
127 return NULL; 144 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
145 if (obj->exec_handle == handle)
146 return obj;
147 }
148 return NULL;
149 }
128} 150}
129 151
130static void 152static void
@@ -615,7 +637,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
615 637
616 /* reacquire the objects */ 638 /* reacquire the objects */
617 eb_reset(eb); 639 eb_reset(eb);
618 ret = eb_lookup_objects(eb, exec, count, file); 640 ret = eb_lookup_objects(eb, exec, args, file);
619 if (ret) 641 if (ret)
620 goto err; 642 goto err;
621 643
@@ -919,7 +941,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
919 goto pre_mutex_err; 941 goto pre_mutex_err;
920 } 942 }
921 943
922 eb = eb_create(args->buffer_count); 944 eb = eb_create(args);
923 if (eb == NULL) { 945 if (eb == NULL) {
924 mutex_unlock(&dev->struct_mutex); 946 mutex_unlock(&dev->struct_mutex);
925 ret = -ENOMEM; 947 ret = -ENOMEM;
@@ -927,7 +949,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
927 } 949 }
928 950
929 /* Look up object handles */ 951 /* Look up object handles */
930 ret = eb_lookup_objects(eb, exec, args->buffer_count, file); 952 ret = eb_lookup_objects(eb, exec, args, file);
931 if (ret) 953 if (ret)
932 goto err; 954 goto err;
933 955
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2430b6ad6a85..07d59419fe6b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
309#define I915_PARAM_HAS_SECURE_BATCHES 23 309#define I915_PARAM_HAS_SECURE_BATCHES 23
310#define I915_PARAM_HAS_PINNED_BATCHES 24 310#define I915_PARAM_HAS_PINNED_BATCHES 24
311#define I915_PARAM_HAS_EXEC_NO_RELOC 25 311#define I915_PARAM_HAS_EXEC_NO_RELOC 25
312#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
312 313
313typedef struct drm_i915_getparam { 314typedef struct drm_i915_getparam {
314 int param; 315 int param;
@@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 {
699 */ 700 */
700#define I915_EXEC_NO_RELOC (1<<11) 701#define I915_EXEC_NO_RELOC (1<<11)
701 702
702#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1) 703/** Use the reloc.handle as an index into the exec object array rather
704 * than as the per-file handle.
705 */
706#define I915_EXEC_HANDLE_LUT (1<<12)
707
708#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
703 709
704#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) 710#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
705#define i915_execbuffer2_set_context_id(eb2, context) \ 711#define i915_execbuffer2_set_context_id(eb2, context) \