aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c227
2 files changed, 220 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 96b344901a7b..7dcac3bfb771 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4397,7 +4397,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
4397 GEM_BUG_ON(i915_gem_object_is_active(obj)); 4397 GEM_BUG_ON(i915_gem_object_is_active(obj));
4398 list_for_each_entry_safe(vma, vn, 4398 list_for_each_entry_safe(vma, vn,
4399 &obj->vma_list, obj_link) { 4399 &obj->vma_list, obj_link) {
4400 GEM_BUG_ON(!i915_vma_is_ggtt(vma));
4401 GEM_BUG_ON(i915_vma_is_active(vma)); 4400 GEM_BUG_ON(i915_vma_is_active(vma));
4402 vma->flags &= ~I915_VMA_PIN_MASK; 4401 vma->flags &= ~I915_VMA_PIN_MASK;
4403 i915_vma_close(vma); 4402 i915_vma_close(vma);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e262133a7cf5..2f7a2d2510fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -40,7 +40,12 @@
40#include "intel_drv.h" 40#include "intel_drv.h"
41#include "intel_frontbuffer.h" 41#include "intel_frontbuffer.h"
42 42
43#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ 43enum {
44 FORCE_CPU_RELOC = 1,
45 FORCE_GTT_RELOC,
46 FORCE_GPU_RELOC,
47#define DBG_FORCE_RELOC 0 /* choose one of the above! */
48};
44 49
45#define __EXEC_OBJECT_HAS_REF BIT(31) 50#define __EXEC_OBJECT_HAS_REF BIT(31)
46#define __EXEC_OBJECT_HAS_PIN BIT(30) 51#define __EXEC_OBJECT_HAS_PIN BIT(30)
@@ -212,10 +217,15 @@ struct i915_execbuffer {
212 struct drm_mm_node node; /** temporary GTT binding */ 217 struct drm_mm_node node; /** temporary GTT binding */
213 unsigned long vaddr; /** Current kmap address */ 218 unsigned long vaddr; /** Current kmap address */
214 unsigned long page; /** Currently mapped page index */ 219 unsigned long page; /** Currently mapped page index */
220 unsigned int gen; /** Cached value of INTEL_GEN */
215 bool use_64bit_reloc : 1; 221 bool use_64bit_reloc : 1;
216 bool has_llc : 1; 222 bool has_llc : 1;
217 bool has_fence : 1; 223 bool has_fence : 1;
218 bool needs_unfenced : 1; 224 bool needs_unfenced : 1;
225
226 struct drm_i915_gem_request *rq;
227 u32 *rq_cmd;
228 unsigned int rq_size;
219 } reloc_cache; 229 } reloc_cache;
220 230
221 u64 invalid_flags; /** Set of execobj.flags that are invalid */ 231 u64 invalid_flags; /** Set of execobj.flags that are invalid */
@@ -496,8 +506,11 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache,
496 if (!i915_gem_object_has_struct_page(obj)) 506 if (!i915_gem_object_has_struct_page(obj))
497 return false; 507 return false;
498 508
499 if (DBG_USE_CPU_RELOC) 509 if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
500 return DBG_USE_CPU_RELOC > 0; 510 return true;
511
512 if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
513 return false;
501 514
502 return (cache->has_llc || 515 return (cache->has_llc ||
503 obj->cache_dirty || 516 obj->cache_dirty ||
@@ -887,6 +900,8 @@ static void eb_reset_vmas(const struct i915_execbuffer *eb)
887 900
888static void eb_destroy(const struct i915_execbuffer *eb) 901static void eb_destroy(const struct i915_execbuffer *eb)
889{ 902{
903 GEM_BUG_ON(eb->reloc_cache.rq);
904
890 if (eb->lut_size >= 0) 905 if (eb->lut_size >= 0)
891 kfree(eb->buckets); 906 kfree(eb->buckets);
892} 907}
@@ -904,11 +919,14 @@ static void reloc_cache_init(struct reloc_cache *cache,
904 cache->page = -1; 919 cache->page = -1;
905 cache->vaddr = 0; 920 cache->vaddr = 0;
906 /* Must be a variable in the struct to allow GCC to unroll. */ 921 /* Must be a variable in the struct to allow GCC to unroll. */
922 cache->gen = INTEL_GEN(i915);
907 cache->has_llc = HAS_LLC(i915); 923 cache->has_llc = HAS_LLC(i915);
908 cache->has_fence = INTEL_GEN(i915) < 4;
909 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
910 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 924 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
925 cache->has_fence = cache->gen < 4;
926 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
911 cache->node.allocated = false; 927 cache->node.allocated = false;
928 cache->rq = NULL;
929 cache->rq_size = 0;
912} 930}
913 931
914static inline void *unmask_page(unsigned long p) 932static inline void *unmask_page(unsigned long p)
@@ -930,10 +948,24 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
930 return &i915->ggtt; 948 return &i915->ggtt;
931} 949}
932 950
951static void reloc_gpu_flush(struct reloc_cache *cache)
952{
953 GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
954 cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
955 i915_gem_object_unpin_map(cache->rq->batch->obj);
956 i915_gem_chipset_flush(cache->rq->i915);
957
958 __i915_add_request(cache->rq, true);
959 cache->rq = NULL;
960}
961
933static void reloc_cache_reset(struct reloc_cache *cache) 962static void reloc_cache_reset(struct reloc_cache *cache)
934{ 963{
935 void *vaddr; 964 void *vaddr;
936 965
966 if (cache->rq)
967 reloc_gpu_flush(cache);
968
937 if (!cache->vaddr) 969 if (!cache->vaddr)
938 return; 970 return;
939 971
@@ -1099,6 +1131,121 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1099 *addr = value; 1131 *addr = value;
1100} 1132}
1101 1133
1134static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1135 struct i915_vma *vma,
1136 unsigned int len)
1137{
1138 struct reloc_cache *cache = &eb->reloc_cache;
1139 struct drm_i915_gem_object *obj;
1140 struct drm_i915_gem_request *rq;
1141 struct i915_vma *batch;
1142 u32 *cmd;
1143 int err;
1144
1145 GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU);
1146
1147 obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
1148 if (IS_ERR(obj))
1149 return PTR_ERR(obj);
1150
1151 cmd = i915_gem_object_pin_map(obj,
1152 cache->has_llc ? I915_MAP_WB : I915_MAP_WC);
1153 i915_gem_object_unpin_pages(obj);
1154 if (IS_ERR(cmd))
1155 return PTR_ERR(cmd);
1156
1157 err = i915_gem_object_set_to_wc_domain(obj, false);
1158 if (err)
1159 goto err_unmap;
1160
1161 batch = i915_vma_instance(obj, vma->vm, NULL);
1162 if (IS_ERR(batch)) {
1163 err = PTR_ERR(batch);
1164 goto err_unmap;
1165 }
1166
1167 err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1168 if (err)
1169 goto err_unmap;
1170
1171 rq = i915_gem_request_alloc(eb->engine, eb->ctx);
1172 if (IS_ERR(rq)) {
1173 err = PTR_ERR(rq);
1174 goto err_unpin;
1175 }
1176
1177 err = i915_gem_request_await_object(rq, vma->obj, true);
1178 if (err)
1179 goto err_request;
1180
1181 err = eb->engine->emit_flush(rq, EMIT_INVALIDATE);
1182 if (err)
1183 goto err_request;
1184
1185 err = i915_switch_context(rq);
1186 if (err)
1187 goto err_request;
1188
1189 err = eb->engine->emit_bb_start(rq,
1190 batch->node.start, PAGE_SIZE,
1191 cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1192 if (err)
1193 goto err_request;
1194
1195 GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true));
1196 i915_vma_move_to_active(batch, rq, 0);
1197 reservation_object_lock(obj->resv, NULL);
1198 reservation_object_add_excl_fence(obj->resv, &rq->fence);
1199 reservation_object_unlock(obj->resv);
1200 i915_vma_unpin(batch);
1201
1202 i915_vma_move_to_active(vma, rq, true);
1203 reservation_object_lock(vma->obj->resv, NULL);
1204 reservation_object_add_excl_fence(vma->obj->resv, &rq->fence);
1205 reservation_object_unlock(vma->obj->resv);
1206
1207 rq->batch = batch;
1208
1209 cache->rq = rq;
1210 cache->rq_cmd = cmd;
1211 cache->rq_size = 0;
1212
1213 /* Return with batch mapping (cmd) still pinned */
1214 return 0;
1215
1216err_request:
1217 i915_add_request(rq);
1218err_unpin:
1219 i915_vma_unpin(batch);
1220err_unmap:
1221 i915_gem_object_unpin_map(obj);
1222 return err;
1223}
1224
1225static u32 *reloc_gpu(struct i915_execbuffer *eb,
1226 struct i915_vma *vma,
1227 unsigned int len)
1228{
1229 struct reloc_cache *cache = &eb->reloc_cache;
1230 u32 *cmd;
1231
1232 if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1233 reloc_gpu_flush(cache);
1234
1235 if (unlikely(!cache->rq)) {
1236 int err;
1237
1238 err = __reloc_gpu_alloc(eb, vma, len);
1239 if (unlikely(err))
1240 return ERR_PTR(err);
1241 }
1242
1243 cmd = cache->rq_cmd + cache->rq_size;
1244 cache->rq_size += len;
1245
1246 return cmd;
1247}
1248
1102static u64 1249static u64
1103relocate_entry(struct i915_vma *vma, 1250relocate_entry(struct i915_vma *vma,
1104 const struct drm_i915_gem_relocation_entry *reloc, 1251 const struct drm_i915_gem_relocation_entry *reloc,
@@ -1111,6 +1258,67 @@ relocate_entry(struct i915_vma *vma,
1111 bool wide = eb->reloc_cache.use_64bit_reloc; 1258 bool wide = eb->reloc_cache.use_64bit_reloc;
1112 void *vaddr; 1259 void *vaddr;
1113 1260
1261 if (!eb->reloc_cache.vaddr &&
1262 (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1263 !reservation_object_test_signaled_rcu(obj->resv, true))) {
1264 const unsigned int gen = eb->reloc_cache.gen;
1265 unsigned int len;
1266 u32 *batch;
1267 u64 addr;
1268
1269 if (wide)
1270 len = offset & 7 ? 8 : 5;
1271 else if (gen >= 4)
1272 len = 4;
1273 else if (gen >= 3)
1274 len = 3;
1275 else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */
1276 goto repeat;
1277
1278 batch = reloc_gpu(eb, vma, len);
1279 if (IS_ERR(batch))
1280 goto repeat;
1281
1282 addr = gen8_canonical_addr(vma->node.start + offset);
1283 if (wide) {
1284 if (offset & 7) {
1285 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1286 *batch++ = lower_32_bits(addr);
1287 *batch++ = upper_32_bits(addr);
1288 *batch++ = lower_32_bits(target_offset);
1289
1290 addr = gen8_canonical_addr(addr + 4);
1291
1292 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1293 *batch++ = lower_32_bits(addr);
1294 *batch++ = upper_32_bits(addr);
1295 *batch++ = upper_32_bits(target_offset);
1296 } else {
1297 *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1298 *batch++ = lower_32_bits(addr);
1299 *batch++ = upper_32_bits(addr);
1300 *batch++ = lower_32_bits(target_offset);
1301 *batch++ = upper_32_bits(target_offset);
1302 }
1303 } else if (gen >= 6) {
1304 *batch++ = MI_STORE_DWORD_IMM_GEN4;
1305 *batch++ = 0;
1306 *batch++ = addr;
1307 *batch++ = target_offset;
1308 } else if (gen >= 4) {
1309 *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1310 *batch++ = 0;
1311 *batch++ = addr;
1312 *batch++ = target_offset;
1313 } else {
1314 *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1315 *batch++ = addr;
1316 *batch++ = target_offset;
1317 }
1318
1319 goto out;
1320 }
1321
1114repeat: 1322repeat:
1115 vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); 1323 vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1116 if (IS_ERR(vaddr)) 1324 if (IS_ERR(vaddr))
@@ -1127,6 +1335,7 @@ repeat:
1127 goto repeat; 1335 goto repeat;
1128 } 1336 }
1129 1337
1338out:
1130 return target->node.start | UPDATE; 1339 return target->node.start | UPDATE;
1131} 1340}
1132 1341
@@ -1189,7 +1398,8 @@ eb_relocate_entry(struct i915_execbuffer *eb,
1189 * If the relocation already has the right value in it, no 1398 * If the relocation already has the right value in it, no
1190 * more work needs to be done. 1399 * more work needs to be done.
1191 */ 1400 */
1192 if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset) 1401 if (!DBG_FORCE_RELOC &&
1402 gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1193 return 0; 1403 return 0;
1194 1404
1195 /* Check that the relocation address is valid... */ 1405 /* Check that the relocation address is valid... */
@@ -1915,7 +2125,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1915 eb.i915 = to_i915(dev); 2125 eb.i915 = to_i915(dev);
1916 eb.file = file; 2126 eb.file = file;
1917 eb.args = args; 2127 eb.args = args;
1918 if (!(args->flags & I915_EXEC_NO_RELOC)) 2128 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
1919 args->flags |= __EXEC_HAS_RELOC; 2129 args->flags |= __EXEC_HAS_RELOC;
1920 eb.exec = exec; 2130 eb.exec = exec;
1921 eb.ctx = NULL; 2131 eb.ctx = NULL;
@@ -2068,6 +2278,9 @@ i915_gem_do_execbuffer(struct drm_device *dev,
2068 eb.batch = vma; 2278 eb.batch = vma;
2069 } 2279 }
2070 2280
2281 /* All GPU relocation batches must be submitted prior to the user rq */
2282 GEM_BUG_ON(eb.reloc_cache.rq);
2283
2071 /* Allocate a request for this batch buffer nice and early. */ 2284 /* Allocate a request for this batch buffer nice and early. */
2072 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); 2285 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
2073 if (IS_ERR(eb.request)) { 2286 if (IS_ERR(eb.request)) {