diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2017-06-16 10:05:25 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2017-06-16 11:54:05 -0400 |
commit | 95ff7c7dd7098860bc131c7dec0ad76ca61e796a (patch) | |
tree | 5f29c1d63c00587df5ef7894ac7009aa3fe31721 | |
parent | 7dd4f6729f9243bd7046c6f04c107a456bda38eb (diff) |
drm/i915: Stash a pointer to the obj's resv in the vma
During execbuf, a mandatory step is that we add this request (this
fence) to each object's reservation_object. Inside execbuf, we track the
vma, and to add the fence to the reservation_object then means having to
first chase the obj, incurring another cache miss. We can reduce the
number of cache misses by stashing a pointer to the reservation_object
in the vma itself.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170616140525.6394-1-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_vma.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_vma.h | 3 |
3 files changed, 15 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2f7a2d2510fc..eb46dfa374a7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -1192,17 +1192,17 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, | |||
1192 | if (err) | 1192 | if (err) |
1193 | goto err_request; | 1193 | goto err_request; |
1194 | 1194 | ||
1195 | GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true)); | 1195 | GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true)); |
1196 | i915_vma_move_to_active(batch, rq, 0); | 1196 | i915_vma_move_to_active(batch, rq, 0); |
1197 | reservation_object_lock(obj->resv, NULL); | 1197 | reservation_object_lock(batch->resv, NULL); |
1198 | reservation_object_add_excl_fence(obj->resv, &rq->fence); | 1198 | reservation_object_add_excl_fence(batch->resv, &rq->fence); |
1199 | reservation_object_unlock(obj->resv); | 1199 | reservation_object_unlock(batch->resv); |
1200 | i915_vma_unpin(batch); | 1200 | i915_vma_unpin(batch); |
1201 | 1201 | ||
1202 | i915_vma_move_to_active(vma, rq, true); | 1202 | i915_vma_move_to_active(vma, rq, true); |
1203 | reservation_object_lock(vma->obj->resv, NULL); | 1203 | reservation_object_lock(vma->resv, NULL); |
1204 | reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); | 1204 | reservation_object_add_excl_fence(vma->resv, &rq->fence); |
1205 | reservation_object_unlock(vma->obj->resv); | 1205 | reservation_object_unlock(vma->resv); |
1206 | 1206 | ||
1207 | rq->batch = batch; | 1207 | rq->batch = batch; |
1208 | 1208 | ||
@@ -1252,7 +1252,6 @@ relocate_entry(struct i915_vma *vma, | |||
1252 | struct i915_execbuffer *eb, | 1252 | struct i915_execbuffer *eb, |
1253 | const struct i915_vma *target) | 1253 | const struct i915_vma *target) |
1254 | { | 1254 | { |
1255 | struct drm_i915_gem_object *obj = vma->obj; | ||
1256 | u64 offset = reloc->offset; | 1255 | u64 offset = reloc->offset; |
1257 | u64 target_offset = relocation_target(reloc, target); | 1256 | u64 target_offset = relocation_target(reloc, target); |
1258 | bool wide = eb->reloc_cache.use_64bit_reloc; | 1257 | bool wide = eb->reloc_cache.use_64bit_reloc; |
@@ -1260,7 +1259,7 @@ relocate_entry(struct i915_vma *vma, | |||
1260 | 1259 | ||
1261 | if (!eb->reloc_cache.vaddr && | 1260 | if (!eb->reloc_cache.vaddr && |
1262 | (DBG_FORCE_RELOC == FORCE_GPU_RELOC || | 1261 | (DBG_FORCE_RELOC == FORCE_GPU_RELOC || |
1263 | !reservation_object_test_signaled_rcu(obj->resv, true))) { | 1262 | !reservation_object_test_signaled_rcu(vma->resv, true))) { |
1264 | const unsigned int gen = eb->reloc_cache.gen; | 1263 | const unsigned int gen = eb->reloc_cache.gen; |
1265 | unsigned int len; | 1264 | unsigned int len; |
1266 | u32 *batch; | 1265 | u32 *batch; |
@@ -1320,7 +1319,7 @@ relocate_entry(struct i915_vma *vma, | |||
1320 | } | 1319 | } |
1321 | 1320 | ||
1322 | repeat: | 1321 | repeat: |
1323 | vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); | 1322 | vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); |
1324 | if (IS_ERR(vaddr)) | 1323 | if (IS_ERR(vaddr)) |
1325 | return PTR_ERR(vaddr); | 1324 | return PTR_ERR(vaddr); |
1326 | 1325 | ||
@@ -1793,11 +1792,11 @@ slow: | |||
1793 | return eb_relocate_slow(eb); | 1792 | return eb_relocate_slow(eb); |
1794 | } | 1793 | } |
1795 | 1794 | ||
1796 | static void eb_export_fence(struct drm_i915_gem_object *obj, | 1795 | static void eb_export_fence(struct i915_vma *vma, |
1797 | struct drm_i915_gem_request *req, | 1796 | struct drm_i915_gem_request *req, |
1798 | unsigned int flags) | 1797 | unsigned int flags) |
1799 | { | 1798 | { |
1800 | struct reservation_object *resv = obj->resv; | 1799 | struct reservation_object *resv = vma->resv; |
1801 | 1800 | ||
1802 | /* | 1801 | /* |
1803 | * Ignore errors from failing to allocate the new fence, we can't | 1802 | * Ignore errors from failing to allocate the new fence, we can't |
@@ -1856,7 +1855,7 @@ skip_flushes: | |||
1856 | const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; | 1855 | const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; |
1857 | struct i915_vma *vma = exec_to_vma(entry); | 1856 | struct i915_vma *vma = exec_to_vma(entry); |
1858 | 1857 | ||
1859 | eb_export_fence(vma->obj, eb->request, entry->flags); | 1858 | eb_export_fence(vma, eb->request, entry->flags); |
1860 | if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) | 1859 | if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) |
1861 | i915_vma_put(vma); | 1860 | i915_vma_put(vma); |
1862 | } | 1861 | } |
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index f5c57dff288e..532c709febbd 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c | |||
@@ -90,6 +90,7 @@ vma_create(struct drm_i915_gem_object *obj, | |||
90 | init_request_active(&vma->last_fence, NULL); | 90 | init_request_active(&vma->last_fence, NULL); |
91 | vma->vm = vm; | 91 | vma->vm = vm; |
92 | vma->obj = obj; | 92 | vma->obj = obj; |
93 | vma->resv = obj->resv; | ||
93 | vma->size = obj->base.size; | 94 | vma->size = obj->base.size; |
94 | vma->display_alignment = I915_GTT_MIN_ALIGNMENT; | 95 | vma->display_alignment = I915_GTT_MIN_ALIGNMENT; |
95 | 96 | ||
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 04d7a5da70fd..4a673fc1a432 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h | |||
@@ -50,6 +50,7 @@ struct i915_vma { | |||
50 | struct drm_i915_gem_object *obj; | 50 | struct drm_i915_gem_object *obj; |
51 | struct i915_address_space *vm; | 51 | struct i915_address_space *vm; |
52 | struct drm_i915_fence_reg *fence; | 52 | struct drm_i915_fence_reg *fence; |
53 | struct reservation_object *resv; /** Alias of obj->resv */ | ||
53 | struct sg_table *pages; | 54 | struct sg_table *pages; |
54 | void __iomem *iomap; | 55 | void __iomem *iomap; |
55 | u64 size; | 56 | u64 size; |
@@ -111,8 +112,8 @@ struct i915_vma { | |||
111 | /** | 112 | /** |
112 | * Used for performing relocations during execbuffer insertion. | 113 | * Used for performing relocations during execbuffer insertion. |
113 | */ | 114 | */ |
114 | struct hlist_node exec_node; | ||
115 | struct drm_i915_gem_exec_object2 *exec_entry; | 115 | struct drm_i915_gem_exec_object2 *exec_entry; |
116 | struct hlist_node exec_node; | ||
116 | u32 exec_handle; | 117 | u32 exec_handle; |
117 | 118 | ||
118 | struct i915_gem_context *ctx; | 119 | struct i915_gem_context *ctx; |