aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-06-16 10:05:25 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2017-06-16 11:54:05 -0400
commit95ff7c7dd7098860bc131c7dec0ad76ca61e796a (patch)
tree5f29c1d63c00587df5ef7894ac7009aa3fe31721
parent7dd4f6729f9243bd7046c6f04c107a456bda38eb (diff)
drm/i915: Stash a pointer to the obj's resv in the vma
During execbuf, a mandatory step is that we add this request (this fence) to each object's reservation_object. Inside execbuf, we track the vma, and to add the fence to the reservation_object then means having to first chase the obj, incurring another cache miss. We can reduce the number of cache misses by stashing a pointer to the reservation_object in the vma itself. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170616140525.6394-1-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c25
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c1
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h3
3 files changed, 15 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2f7a2d2510fc..eb46dfa374a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1192,17 +1192,17 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1192 if (err) 1192 if (err)
1193 goto err_request; 1193 goto err_request;
1194 1194
1195 GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true)); 1195 GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
1196 i915_vma_move_to_active(batch, rq, 0); 1196 i915_vma_move_to_active(batch, rq, 0);
1197 reservation_object_lock(obj->resv, NULL); 1197 reservation_object_lock(batch->resv, NULL);
1198 reservation_object_add_excl_fence(obj->resv, &rq->fence); 1198 reservation_object_add_excl_fence(batch->resv, &rq->fence);
1199 reservation_object_unlock(obj->resv); 1199 reservation_object_unlock(batch->resv);
1200 i915_vma_unpin(batch); 1200 i915_vma_unpin(batch);
1201 1201
1202 i915_vma_move_to_active(vma, rq, true); 1202 i915_vma_move_to_active(vma, rq, true);
1203 reservation_object_lock(vma->obj->resv, NULL); 1203 reservation_object_lock(vma->resv, NULL);
1204 reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); 1204 reservation_object_add_excl_fence(vma->resv, &rq->fence);
1205 reservation_object_unlock(vma->obj->resv); 1205 reservation_object_unlock(vma->resv);
1206 1206
1207 rq->batch = batch; 1207 rq->batch = batch;
1208 1208
@@ -1252,7 +1252,6 @@ relocate_entry(struct i915_vma *vma,
1252 struct i915_execbuffer *eb, 1252 struct i915_execbuffer *eb,
1253 const struct i915_vma *target) 1253 const struct i915_vma *target)
1254{ 1254{
1255 struct drm_i915_gem_object *obj = vma->obj;
1256 u64 offset = reloc->offset; 1255 u64 offset = reloc->offset;
1257 u64 target_offset = relocation_target(reloc, target); 1256 u64 target_offset = relocation_target(reloc, target);
1258 bool wide = eb->reloc_cache.use_64bit_reloc; 1257 bool wide = eb->reloc_cache.use_64bit_reloc;
@@ -1260,7 +1259,7 @@ relocate_entry(struct i915_vma *vma,
1260 1259
1261 if (!eb->reloc_cache.vaddr && 1260 if (!eb->reloc_cache.vaddr &&
1262 (DBG_FORCE_RELOC == FORCE_GPU_RELOC || 1261 (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1263 !reservation_object_test_signaled_rcu(obj->resv, true))) { 1262 !reservation_object_test_signaled_rcu(vma->resv, true))) {
1264 const unsigned int gen = eb->reloc_cache.gen; 1263 const unsigned int gen = eb->reloc_cache.gen;
1265 unsigned int len; 1264 unsigned int len;
1266 u32 *batch; 1265 u32 *batch;
@@ -1320,7 +1319,7 @@ relocate_entry(struct i915_vma *vma,
1320 } 1319 }
1321 1320
1322repeat: 1321repeat:
1323 vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); 1322 vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1324 if (IS_ERR(vaddr)) 1323 if (IS_ERR(vaddr))
1325 return PTR_ERR(vaddr); 1324 return PTR_ERR(vaddr);
1326 1325
@@ -1793,11 +1792,11 @@ slow:
1793 return eb_relocate_slow(eb); 1792 return eb_relocate_slow(eb);
1794} 1793}
1795 1794
1796static void eb_export_fence(struct drm_i915_gem_object *obj, 1795static void eb_export_fence(struct i915_vma *vma,
1797 struct drm_i915_gem_request *req, 1796 struct drm_i915_gem_request *req,
1798 unsigned int flags) 1797 unsigned int flags)
1799{ 1798{
1800 struct reservation_object *resv = obj->resv; 1799 struct reservation_object *resv = vma->resv;
1801 1800
1802 /* 1801 /*
1803 * Ignore errors from failing to allocate the new fence, we can't 1802 * Ignore errors from failing to allocate the new fence, we can't
@@ -1856,7 +1855,7 @@ skip_flushes:
1856 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; 1855 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1857 struct i915_vma *vma = exec_to_vma(entry); 1856 struct i915_vma *vma = exec_to_vma(entry);
1858 1857
1859 eb_export_fence(vma->obj, eb->request, entry->flags); 1858 eb_export_fence(vma, eb->request, entry->flags);
1860 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF)) 1859 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_REF))
1861 i915_vma_put(vma); 1860 i915_vma_put(vma);
1862 } 1861 }
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f5c57dff288e..532c709febbd 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -90,6 +90,7 @@ vma_create(struct drm_i915_gem_object *obj,
90 init_request_active(&vma->last_fence, NULL); 90 init_request_active(&vma->last_fence, NULL);
91 vma->vm = vm; 91 vma->vm = vm;
92 vma->obj = obj; 92 vma->obj = obj;
93 vma->resv = obj->resv;
93 vma->size = obj->base.size; 94 vma->size = obj->base.size;
94 vma->display_alignment = I915_GTT_MIN_ALIGNMENT; 95 vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
95 96
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 04d7a5da70fd..4a673fc1a432 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -50,6 +50,7 @@ struct i915_vma {
50 struct drm_i915_gem_object *obj; 50 struct drm_i915_gem_object *obj;
51 struct i915_address_space *vm; 51 struct i915_address_space *vm;
52 struct drm_i915_fence_reg *fence; 52 struct drm_i915_fence_reg *fence;
53 struct reservation_object *resv; /** Alias of obj->resv */
53 struct sg_table *pages; 54 struct sg_table *pages;
54 void __iomem *iomap; 55 void __iomem *iomap;
55 u64 size; 56 u64 size;
@@ -111,8 +112,8 @@ struct i915_vma {
111 /** 112 /**
112 * Used for performing relocations during execbuffer insertion. 113 * Used for performing relocations during execbuffer insertion.
113 */ 114 */
114 struct hlist_node exec_node;
115 struct drm_i915_gem_exec_object2 *exec_entry; 115 struct drm_i915_gem_exec_object2 *exec_entry;
116 struct hlist_node exec_node;
116 u32 exec_handle; 117 u32 exec_handle;
117 118
118 struct i915_gem_context *ctx; 119 struct i915_gem_context *ctx;