diff options
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 227 |
2 files changed, 220 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 96b344901a7b..7dcac3bfb771 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
| @@ -4397,7 +4397,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, | |||
| 4397 | GEM_BUG_ON(i915_gem_object_is_active(obj)); | 4397 | GEM_BUG_ON(i915_gem_object_is_active(obj)); |
| 4398 | list_for_each_entry_safe(vma, vn, | 4398 | list_for_each_entry_safe(vma, vn, |
| 4399 | &obj->vma_list, obj_link) { | 4399 | &obj->vma_list, obj_link) { |
| 4400 | GEM_BUG_ON(!i915_vma_is_ggtt(vma)); | ||
| 4401 | GEM_BUG_ON(i915_vma_is_active(vma)); | 4400 | GEM_BUG_ON(i915_vma_is_active(vma)); |
| 4402 | vma->flags &= ~I915_VMA_PIN_MASK; | 4401 | vma->flags &= ~I915_VMA_PIN_MASK; |
| 4403 | i915_vma_close(vma); | 4402 | i915_vma_close(vma); |
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e262133a7cf5..2f7a2d2510fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
| @@ -40,7 +40,12 @@ | |||
| 40 | #include "intel_drv.h" | 40 | #include "intel_drv.h" |
| 41 | #include "intel_frontbuffer.h" | 41 | #include "intel_frontbuffer.h" |
| 42 | 42 | ||
| 43 | #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ | 43 | enum { |
| 44 | FORCE_CPU_RELOC = 1, | ||
| 45 | FORCE_GTT_RELOC, | ||
| 46 | FORCE_GPU_RELOC, | ||
| 47 | #define DBG_FORCE_RELOC 0 /* choose one of the above! */ | ||
| 48 | }; | ||
| 44 | 49 | ||
| 45 | #define __EXEC_OBJECT_HAS_REF BIT(31) | 50 | #define __EXEC_OBJECT_HAS_REF BIT(31) |
| 46 | #define __EXEC_OBJECT_HAS_PIN BIT(30) | 51 | #define __EXEC_OBJECT_HAS_PIN BIT(30) |
| @@ -212,10 +217,15 @@ struct i915_execbuffer { | |||
| 212 | struct drm_mm_node node; /** temporary GTT binding */ | 217 | struct drm_mm_node node; /** temporary GTT binding */ |
| 213 | unsigned long vaddr; /** Current kmap address */ | 218 | unsigned long vaddr; /** Current kmap address */ |
| 214 | unsigned long page; /** Currently mapped page index */ | 219 | unsigned long page; /** Currently mapped page index */ |
| 220 | unsigned int gen; /** Cached value of INTEL_GEN */ | ||
| 215 | bool use_64bit_reloc : 1; | 221 | bool use_64bit_reloc : 1; |
| 216 | bool has_llc : 1; | 222 | bool has_llc : 1; |
| 217 | bool has_fence : 1; | 223 | bool has_fence : 1; |
| 218 | bool needs_unfenced : 1; | 224 | bool needs_unfenced : 1; |
| 225 | |||
| 226 | struct drm_i915_gem_request *rq; | ||
| 227 | u32 *rq_cmd; | ||
| 228 | unsigned int rq_size; | ||
| 219 | } reloc_cache; | 229 | } reloc_cache; |
| 220 | 230 | ||
| 221 | u64 invalid_flags; /** Set of execobj.flags that are invalid */ | 231 | u64 invalid_flags; /** Set of execobj.flags that are invalid */ |
| @@ -496,8 +506,11 @@ static inline int use_cpu_reloc(const struct reloc_cache *cache, | |||
| 496 | if (!i915_gem_object_has_struct_page(obj)) | 506 | if (!i915_gem_object_has_struct_page(obj)) |
| 497 | return false; | 507 | return false; |
| 498 | 508 | ||
| 499 | if (DBG_USE_CPU_RELOC) | 509 | if (DBG_FORCE_RELOC == FORCE_CPU_RELOC) |
| 500 | return DBG_USE_CPU_RELOC > 0; | 510 | return true; |
| 511 | |||
| 512 | if (DBG_FORCE_RELOC == FORCE_GTT_RELOC) | ||
| 513 | return false; | ||
| 501 | 514 | ||
| 502 | return (cache->has_llc || | 515 | return (cache->has_llc || |
| 503 | obj->cache_dirty || | 516 | obj->cache_dirty || |
| @@ -887,6 +900,8 @@ static void eb_reset_vmas(const struct i915_execbuffer *eb) | |||
| 887 | 900 | ||
| 888 | static void eb_destroy(const struct i915_execbuffer *eb) | 901 | static void eb_destroy(const struct i915_execbuffer *eb) |
| 889 | { | 902 | { |
| 903 | GEM_BUG_ON(eb->reloc_cache.rq); | ||
| 904 | |||
| 890 | if (eb->lut_size >= 0) | 905 | if (eb->lut_size >= 0) |
| 891 | kfree(eb->buckets); | 906 | kfree(eb->buckets); |
| 892 | } | 907 | } |
| @@ -904,11 +919,14 @@ static void reloc_cache_init(struct reloc_cache *cache, | |||
| 904 | cache->page = -1; | 919 | cache->page = -1; |
| 905 | cache->vaddr = 0; | 920 | cache->vaddr = 0; |
| 906 | /* Must be a variable in the struct to allow GCC to unroll. */ | 921 | /* Must be a variable in the struct to allow GCC to unroll. */ |
| 922 | cache->gen = INTEL_GEN(i915); | ||
| 907 | cache->has_llc = HAS_LLC(i915); | 923 | cache->has_llc = HAS_LLC(i915); |
| 908 | cache->has_fence = INTEL_GEN(i915) < 4; | ||
| 909 | cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; | ||
| 910 | cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); | 924 | cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); |
| 925 | cache->has_fence = cache->gen < 4; | ||
| 926 | cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; | ||
| 911 | cache->node.allocated = false; | 927 | cache->node.allocated = false; |
| 928 | cache->rq = NULL; | ||
| 929 | cache->rq_size = 0; | ||
| 912 | } | 930 | } |
| 913 | 931 | ||
| 914 | static inline void *unmask_page(unsigned long p) | 932 | static inline void *unmask_page(unsigned long p) |
| @@ -930,10 +948,24 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) | |||
| 930 | return &i915->ggtt; | 948 | return &i915->ggtt; |
| 931 | } | 949 | } |
| 932 | 950 | ||
| 951 | static void reloc_gpu_flush(struct reloc_cache *cache) | ||
| 952 | { | ||
| 953 | GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32)); | ||
| 954 | cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; | ||
| 955 | i915_gem_object_unpin_map(cache->rq->batch->obj); | ||
| 956 | i915_gem_chipset_flush(cache->rq->i915); | ||
| 957 | |||
| 958 | __i915_add_request(cache->rq, true); | ||
| 959 | cache->rq = NULL; | ||
| 960 | } | ||
| 961 | |||
| 933 | static void reloc_cache_reset(struct reloc_cache *cache) | 962 | static void reloc_cache_reset(struct reloc_cache *cache) |
| 934 | { | 963 | { |
| 935 | void *vaddr; | 964 | void *vaddr; |
| 936 | 965 | ||
| 966 | if (cache->rq) | ||
| 967 | reloc_gpu_flush(cache); | ||
| 968 | |||
| 937 | if (!cache->vaddr) | 969 | if (!cache->vaddr) |
| 938 | return; | 970 | return; |
| 939 | 971 | ||
| @@ -1099,6 +1131,121 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) | |||
| 1099 | *addr = value; | 1131 | *addr = value; |
| 1100 | } | 1132 | } |
| 1101 | 1133 | ||
| 1134 | static int __reloc_gpu_alloc(struct i915_execbuffer *eb, | ||
| 1135 | struct i915_vma *vma, | ||
| 1136 | unsigned int len) | ||
| 1137 | { | ||
| 1138 | struct reloc_cache *cache = &eb->reloc_cache; | ||
| 1139 | struct drm_i915_gem_object *obj; | ||
| 1140 | struct drm_i915_gem_request *rq; | ||
| 1141 | struct i915_vma *batch; | ||
| 1142 | u32 *cmd; | ||
| 1143 | int err; | ||
| 1144 | |||
| 1145 | GEM_BUG_ON(vma->obj->base.write_domain & I915_GEM_DOMAIN_CPU); | ||
| 1146 | |||
| 1147 | obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE); | ||
| 1148 | if (IS_ERR(obj)) | ||
| 1149 | return PTR_ERR(obj); | ||
| 1150 | |||
| 1151 | cmd = i915_gem_object_pin_map(obj, | ||
| 1152 | cache->has_llc ? I915_MAP_WB : I915_MAP_WC); | ||
| 1153 | i915_gem_object_unpin_pages(obj); | ||
| 1154 | if (IS_ERR(cmd)) | ||
| 1155 | return PTR_ERR(cmd); | ||
| 1156 | |||
| 1157 | err = i915_gem_object_set_to_wc_domain(obj, false); | ||
| 1158 | if (err) | ||
| 1159 | goto err_unmap; | ||
| 1160 | |||
| 1161 | batch = i915_vma_instance(obj, vma->vm, NULL); | ||
| 1162 | if (IS_ERR(batch)) { | ||
| 1163 | err = PTR_ERR(batch); | ||
| 1164 | goto err_unmap; | ||
| 1165 | } | ||
| 1166 | |||
| 1167 | err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); | ||
| 1168 | if (err) | ||
| 1169 | goto err_unmap; | ||
| 1170 | |||
| 1171 | rq = i915_gem_request_alloc(eb->engine, eb->ctx); | ||
| 1172 | if (IS_ERR(rq)) { | ||
| 1173 | err = PTR_ERR(rq); | ||
| 1174 | goto err_unpin; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | err = i915_gem_request_await_object(rq, vma->obj, true); | ||
| 1178 | if (err) | ||
| 1179 | goto err_request; | ||
| 1180 | |||
| 1181 | err = eb->engine->emit_flush(rq, EMIT_INVALIDATE); | ||
| 1182 | if (err) | ||
| 1183 | goto err_request; | ||
| 1184 | |||
| 1185 | err = i915_switch_context(rq); | ||
| 1186 | if (err) | ||
| 1187 | goto err_request; | ||
| 1188 | |||
| 1189 | err = eb->engine->emit_bb_start(rq, | ||
| 1190 | batch->node.start, PAGE_SIZE, | ||
| 1191 | cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); | ||
| 1192 | if (err) | ||
| 1193 | goto err_request; | ||
| 1194 | |||
| 1195 | GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, true)); | ||
| 1196 | i915_vma_move_to_active(batch, rq, 0); | ||
| 1197 | reservation_object_lock(obj->resv, NULL); | ||
| 1198 | reservation_object_add_excl_fence(obj->resv, &rq->fence); | ||
| 1199 | reservation_object_unlock(obj->resv); | ||
| 1200 | i915_vma_unpin(batch); | ||
| 1201 | |||
| 1202 | i915_vma_move_to_active(vma, rq, true); | ||
| 1203 | reservation_object_lock(vma->obj->resv, NULL); | ||
| 1204 | reservation_object_add_excl_fence(vma->obj->resv, &rq->fence); | ||
| 1205 | reservation_object_unlock(vma->obj->resv); | ||
| 1206 | |||
| 1207 | rq->batch = batch; | ||
| 1208 | |||
| 1209 | cache->rq = rq; | ||
| 1210 | cache->rq_cmd = cmd; | ||
| 1211 | cache->rq_size = 0; | ||
| 1212 | |||
| 1213 | /* Return with batch mapping (cmd) still pinned */ | ||
| 1214 | return 0; | ||
| 1215 | |||
| 1216 | err_request: | ||
| 1217 | i915_add_request(rq); | ||
| 1218 | err_unpin: | ||
| 1219 | i915_vma_unpin(batch); | ||
| 1220 | err_unmap: | ||
| 1221 | i915_gem_object_unpin_map(obj); | ||
| 1222 | return err; | ||
| 1223 | } | ||
| 1224 | |||
| 1225 | static u32 *reloc_gpu(struct i915_execbuffer *eb, | ||
| 1226 | struct i915_vma *vma, | ||
| 1227 | unsigned int len) | ||
| 1228 | { | ||
| 1229 | struct reloc_cache *cache = &eb->reloc_cache; | ||
| 1230 | u32 *cmd; | ||
| 1231 | |||
| 1232 | if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) | ||
| 1233 | reloc_gpu_flush(cache); | ||
| 1234 | |||
| 1235 | if (unlikely(!cache->rq)) { | ||
| 1236 | int err; | ||
| 1237 | |||
| 1238 | err = __reloc_gpu_alloc(eb, vma, len); | ||
| 1239 | if (unlikely(err)) | ||
| 1240 | return ERR_PTR(err); | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | cmd = cache->rq_cmd + cache->rq_size; | ||
| 1244 | cache->rq_size += len; | ||
| 1245 | |||
| 1246 | return cmd; | ||
| 1247 | } | ||
| 1248 | |||
| 1102 | static u64 | 1249 | static u64 |
| 1103 | relocate_entry(struct i915_vma *vma, | 1250 | relocate_entry(struct i915_vma *vma, |
| 1104 | const struct drm_i915_gem_relocation_entry *reloc, | 1251 | const struct drm_i915_gem_relocation_entry *reloc, |
| @@ -1111,6 +1258,67 @@ relocate_entry(struct i915_vma *vma, | |||
| 1111 | bool wide = eb->reloc_cache.use_64bit_reloc; | 1258 | bool wide = eb->reloc_cache.use_64bit_reloc; |
| 1112 | void *vaddr; | 1259 | void *vaddr; |
| 1113 | 1260 | ||
| 1261 | if (!eb->reloc_cache.vaddr && | ||
| 1262 | (DBG_FORCE_RELOC == FORCE_GPU_RELOC || | ||
| 1263 | !reservation_object_test_signaled_rcu(obj->resv, true))) { | ||
| 1264 | const unsigned int gen = eb->reloc_cache.gen; | ||
| 1265 | unsigned int len; | ||
| 1266 | u32 *batch; | ||
| 1267 | u64 addr; | ||
| 1268 | |||
| 1269 | if (wide) | ||
| 1270 | len = offset & 7 ? 8 : 5; | ||
| 1271 | else if (gen >= 4) | ||
| 1272 | len = 4; | ||
| 1273 | else if (gen >= 3) | ||
| 1274 | len = 3; | ||
| 1275 | else /* On gen2 MI_STORE_DWORD_IMM uses a physical address */ | ||
| 1276 | goto repeat; | ||
| 1277 | |||
| 1278 | batch = reloc_gpu(eb, vma, len); | ||
| 1279 | if (IS_ERR(batch)) | ||
| 1280 | goto repeat; | ||
| 1281 | |||
| 1282 | addr = gen8_canonical_addr(vma->node.start + offset); | ||
| 1283 | if (wide) { | ||
| 1284 | if (offset & 7) { | ||
| 1285 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | ||
| 1286 | *batch++ = lower_32_bits(addr); | ||
| 1287 | *batch++ = upper_32_bits(addr); | ||
| 1288 | *batch++ = lower_32_bits(target_offset); | ||
| 1289 | |||
| 1290 | addr = gen8_canonical_addr(addr + 4); | ||
| 1291 | |||
| 1292 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | ||
| 1293 | *batch++ = lower_32_bits(addr); | ||
| 1294 | *batch++ = upper_32_bits(addr); | ||
| 1295 | *batch++ = upper_32_bits(target_offset); | ||
| 1296 | } else { | ||
| 1297 | *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; | ||
| 1298 | *batch++ = lower_32_bits(addr); | ||
| 1299 | *batch++ = upper_32_bits(addr); | ||
| 1300 | *batch++ = lower_32_bits(target_offset); | ||
| 1301 | *batch++ = upper_32_bits(target_offset); | ||
| 1302 | } | ||
| 1303 | } else if (gen >= 6) { | ||
| 1304 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | ||
| 1305 | *batch++ = 0; | ||
| 1306 | *batch++ = addr; | ||
| 1307 | *batch++ = target_offset; | ||
| 1308 | } else if (gen >= 4) { | ||
| 1309 | *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; | ||
| 1310 | *batch++ = 0; | ||
| 1311 | *batch++ = addr; | ||
| 1312 | *batch++ = target_offset; | ||
| 1313 | } else { | ||
| 1314 | *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; | ||
| 1315 | *batch++ = addr; | ||
| 1316 | *batch++ = target_offset; | ||
| 1317 | } | ||
| 1318 | |||
| 1319 | goto out; | ||
| 1320 | } | ||
| 1321 | |||
| 1114 | repeat: | 1322 | repeat: |
| 1115 | vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); | 1323 | vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); |
| 1116 | if (IS_ERR(vaddr)) | 1324 | if (IS_ERR(vaddr)) |
| @@ -1127,6 +1335,7 @@ repeat: | |||
| 1127 | goto repeat; | 1335 | goto repeat; |
| 1128 | } | 1336 | } |
| 1129 | 1337 | ||
| 1338 | out: | ||
| 1130 | return target->node.start | UPDATE; | 1339 | return target->node.start | UPDATE; |
| 1131 | } | 1340 | } |
| 1132 | 1341 | ||
| @@ -1189,7 +1398,8 @@ eb_relocate_entry(struct i915_execbuffer *eb, | |||
| 1189 | * If the relocation already has the right value in it, no | 1398 | * If the relocation already has the right value in it, no |
| 1190 | * more work needs to be done. | 1399 | * more work needs to be done. |
| 1191 | */ | 1400 | */ |
| 1192 | if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset) | 1401 | if (!DBG_FORCE_RELOC && |
| 1402 | gen8_canonical_addr(target->node.start) == reloc->presumed_offset) | ||
| 1193 | return 0; | 1403 | return 0; |
| 1194 | 1404 | ||
| 1195 | /* Check that the relocation address is valid... */ | 1405 | /* Check that the relocation address is valid... */ |
| @@ -1915,7 +2125,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
| 1915 | eb.i915 = to_i915(dev); | 2125 | eb.i915 = to_i915(dev); |
| 1916 | eb.file = file; | 2126 | eb.file = file; |
| 1917 | eb.args = args; | 2127 | eb.args = args; |
| 1918 | if (!(args->flags & I915_EXEC_NO_RELOC)) | 2128 | if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) |
| 1919 | args->flags |= __EXEC_HAS_RELOC; | 2129 | args->flags |= __EXEC_HAS_RELOC; |
| 1920 | eb.exec = exec; | 2130 | eb.exec = exec; |
| 1921 | eb.ctx = NULL; | 2131 | eb.ctx = NULL; |
| @@ -2068,6 +2278,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
| 2068 | eb.batch = vma; | 2278 | eb.batch = vma; |
| 2069 | } | 2279 | } |
| 2070 | 2280 | ||
| 2281 | /* All GPU relocation batches must be submitted prior to the user rq */ | ||
| 2282 | GEM_BUG_ON(eb.reloc_cache.rq); | ||
| 2283 | |||
| 2071 | /* Allocate a request for this batch buffer nice and early. */ | 2284 | /* Allocate a request for this batch buffer nice and early. */ |
| 2072 | eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); | 2285 | eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); |
| 2073 | if (IS_ERR(eb.request)) { | 2286 | if (IS_ERR(eb.request)) { |
