aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2038
1 files changed, 1174 insertions, 864 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9c3f6c40270f..a052072fe8b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -42,41 +42,195 @@
42 42
43#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ 43#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
44 44
45#define __EXEC_OBJECT_HAS_PIN (1<<31) 45#define __EXEC_OBJECT_HAS_PIN BIT(31)
46#define __EXEC_OBJECT_HAS_FENCE (1<<30) 46#define __EXEC_OBJECT_HAS_FENCE BIT(30)
47#define __EXEC_OBJECT_NEEDS_MAP (1<<29) 47#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
48#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 48#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
49#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ 49#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
50#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
51
52#define __EXEC_HAS_RELOC BIT(31)
53#define __EXEC_VALIDATED BIT(30)
54#define UPDATE PIN_OFFSET_FIXED
50 55
51#define BATCH_OFFSET_BIAS (256*1024) 56#define BATCH_OFFSET_BIAS (256*1024)
52 57
53#define __I915_EXEC_ILLEGAL_FLAGS \ 58#define __I915_EXEC_ILLEGAL_FLAGS \
54 (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) 59 (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK)
55 60
61/**
62 * DOC: User command execution
63 *
64 * Userspace submits commands to be executed on the GPU as an instruction
65 * stream within a GEM object we call a batchbuffer. This instructions may
66 * refer to other GEM objects containing auxiliary state such as kernels,
67 * samplers, render targets and even secondary batchbuffers. Userspace does
68 * not know where in the GPU memory these objects reside and so before the
69 * batchbuffer is passed to the GPU for execution, those addresses in the
70 * batchbuffer and auxiliary objects are updated. This is known as relocation,
71 * or patching. To try and avoid having to relocate each object on the next
72 * execution, userspace is told the location of those objects in this pass,
73 * but this remains just a hint as the kernel may choose a new location for
74 * any object in the future.
75 *
76 * Processing an execbuf ioctl is conceptually split up into a few phases.
77 *
78 * 1. Validation - Ensure all the pointers, handles and flags are valid.
79 * 2. Reservation - Assign GPU address space for every object
80 * 3. Relocation - Update any addresses to point to the final locations
81 * 4. Serialisation - Order the request with respect to its dependencies
82 * 5. Construction - Construct a request to execute the batchbuffer
83 * 6. Submission (at some point in the future execution)
84 *
85 * Reserving resources for the execbuf is the most complicated phase. We
86 * neither want to have to migrate the object in the address space, nor do
87 * we want to have to update any relocations pointing to this object. Ideally,
88 * we want to leave the object where it is and for all the existing relocations
89 * to match. If the object is given a new address, or if userspace thinks the
90 * object is elsewhere, we have to parse all the relocation entries and update
91 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
92 * all the target addresses in all of its objects match the value in the
93 * relocation entries and that they all match the presumed offsets given by the
94 * list of execbuffer objects. Using this knowledge, we know that if we haven't
95 * moved any buffers, all the relocation entries are valid and we can skip
96 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
97 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
98 *
99 * The addresses written in the objects must match the corresponding
100 * reloc.presumed_offset which in turn must match the corresponding
101 * execobject.offset.
102 *
103 * Any render targets written to in the batch must be flagged with
104 * EXEC_OBJECT_WRITE.
105 *
106 * To avoid stalling, execobject.offset should match the current
107 * address of that object within the active context.
108 *
109 * The reservation is done is multiple phases. First we try and keep any
110 * object already bound in its current location - so as long as meets the
111 * constraints imposed by the new execbuffer. Any object left unbound after the
112 * first pass is then fitted into any available idle space. If an object does
113 * not fit, all objects are removed from the reservation and the process rerun
114 * after sorting the objects into a priority order (more difficult to fit
115 * objects are tried first). Failing that, the entire VM is cleared and we try
116 * to fit the execbuf once last time before concluding that it simply will not
117 * fit.
118 *
119 * A small complication to all of this is that we allow userspace not only to
120 * specify an alignment and a size for the object in the address space, but
121 * we also allow userspace to specify the exact offset. This objects are
122 * simpler to place (the location is known a priori) all we have to do is make
123 * sure the space is available.
124 *
125 * Once all the objects are in place, patching up the buried pointers to point
126 * to the final locations is a fairly simple job of walking over the relocation
127 * entry arrays, looking up the right address and rewriting the value into
128 * the object. Simple! ... The relocation entries are stored in user memory
129 * and so to access them we have to copy them into a local buffer. That copy
130 * has to avoid taking any pagefaults as they may lead back to a GEM object
131 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
132 * the relocation into multiple passes. First we try to do everything within an
133 * atomic context (avoid the pagefaults) which requires that we never wait. If
134 * we detect that we may wait, or if we need to fault, then we have to fallback
135 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
136 * bells yet?) Dropping the mutex means that we lose all the state we have
137 * built up so far for the execbuf and we must reset any global data. However,
138 * we do leave the objects pinned in their final locations - which is a
139 * potential issue for concurrent execbufs. Once we have left the mutex, we can
140 * allocate and copy all the relocation entries into a large array at our
141 * leisure, reacquire the mutex, reclaim all the objects and other state and
142 * then proceed to update any incorrect addresses with the objects.
143 *
144 * As we process the relocation entries, we maintain a record of whether the
145 * object is being written to. Using NORELOC, we expect userspace to provide
146 * this information instead. We also check whether we can skip the relocation
147 * by comparing the expected value inside the relocation entry with the target's
148 * final address. If they differ, we have to map the current object and rewrite
149 * the 4 or 8 byte pointer within.
150 *
151 * Serialising an execbuf is quite simple according to the rules of the GEM
152 * ABI. Execution within each context is ordered by the order of submission.
153 * Writes to any GEM object are in order of submission and are exclusive. Reads
154 * from a GEM object are unordered with respect to other reads, but ordered by
155 * writes. A write submitted after a read cannot occur before the read, and
156 * similarly any read submitted after a write cannot occur before the write.
157 * Writes are ordered between engines such that only one write occurs at any
158 * time (completing any reads beforehand) - using semaphores where available
159 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
160 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
161 * reads before starting, and any read (either using set-domain or pread) must
162 * flush all GPU writes before starting. (Note we only employ a barrier before,
163 * we currently rely on userspace not concurrently starting a new execution
164 * whilst reading or writing to an object. This may be an advantage or not
165 * depending on how much you trust userspace not to shoot themselves in the
166 * foot.) Serialisation may just result in the request being inserted into
167 * a DAG awaiting its turn, but most simple is to wait on the CPU until
168 * all dependencies are resolved.
169 *
170 * After all of that, is just a matter of closing the request and handing it to
171 * the hardware (well, leaving it in a queue to be executed). However, we also
172 * offer the ability for batchbuffers to be run with elevated privileges so
173 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
174 * Before any batch is given extra privileges we first must check that it
175 * contains no nefarious instructions, we check that each instruction is from
176 * our whitelist and all registers are also from an allowed list. We first
177 * copy the user's batchbuffer to a shadow (so that the user doesn't have
178 * access to it, either by the CPU or GPU as we scan it) and then parse each
179 * instruction. If everything is ok, we set a flag telling the hardware to run
180 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
181 */
182
56struct i915_execbuffer { 183struct i915_execbuffer {
57 struct drm_i915_private *i915; 184 struct drm_i915_private *i915; /** i915 backpointer */
58 struct drm_file *file; 185 struct drm_file *file; /** per-file lookup tables and limits */
59 struct drm_i915_gem_execbuffer2 *args; 186 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
60 struct drm_i915_gem_exec_object2 *exec; 187 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
61 struct intel_engine_cs *engine; 188
62 struct i915_gem_context *ctx; 189 struct intel_engine_cs *engine; /** engine to queue the request to */
63 struct i915_address_space *vm; 190 struct i915_gem_context *ctx; /** context for building the request */
64 struct i915_vma *batch; 191 struct i915_address_space *vm; /** GTT and vma for the request */
65 struct drm_i915_gem_request *request; 192
66 u32 batch_start_offset; 193 struct drm_i915_gem_request *request; /** our request to build */
67 u32 batch_len; 194 struct i915_vma *batch; /** identity of the batch obj/vma */
68 unsigned int dispatch_flags; 195
69 struct drm_i915_gem_exec_object2 shadow_exec_entry; 196 /** actual size of execobj[] as we may extend it for the cmdparser */
70 bool need_relocs; 197 unsigned int buffer_count;
71 struct list_head vmas; 198
199 /** list of vma not yet bound during reservation phase */
200 struct list_head unbound;
201
202 /** list of vma that have execobj.relocation_count */
203 struct list_head relocs;
204
205 /**
206 * Track the most recently used object for relocations, as we
207 * frequently have to perform multiple relocations within the same
208 * obj/page
209 */
72 struct reloc_cache { 210 struct reloc_cache {
73 struct drm_mm_node node; 211 struct drm_mm_node node; /** temporary GTT binding */
74 unsigned long vaddr; 212 unsigned long vaddr; /** Current kmap address */
75 unsigned int page; 213 unsigned long page; /** Currently mapped page index */
76 bool use_64bit_reloc : 1; 214 bool use_64bit_reloc : 1;
215 bool has_llc : 1;
216 bool has_fence : 1;
217 bool needs_unfenced : 1;
77 } reloc_cache; 218 } reloc_cache;
78 int lut_mask; 219
79 struct hlist_head *buckets; 220 u64 invalid_flags; /** Set of execobj.flags that are invalid */
221 u32 context_flags; /** Set of execobj.flags to insert from the ctx */
222
223 u32 batch_start_offset; /** Location within object of batch */
224 u32 batch_len; /** Length of batch within object */
225 u32 batch_flags; /** Flags composed for emit_bb_start() */
226
227 /**
228 * Indicate either the size of the hastable used to resolve
229 * relocation handles, or if negative that we are using a direct
230 * index into the execobj[].
231 */
232 int lut_size;
233 struct hlist_head *buckets; /** ht for relocation handles */
80}; 234};
81 235
82/* 236/*
@@ -87,11 +241,41 @@ struct i915_execbuffer {
87#define __exec_to_vma(ee) (ee)->rsvd2 241#define __exec_to_vma(ee) (ee)->rsvd2
88#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) 242#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
89 243
244/*
245 * Used to convert any address to canonical form.
246 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
247 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
248 * addresses to be in a canonical form:
249 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
250 * canonical form [63:48] == [47]."
251 */
252#define GEN8_HIGH_ADDRESS_BIT 47
253static inline u64 gen8_canonical_addr(u64 address)
254{
255 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
256}
257
258static inline u64 gen8_noncanonical_addr(u64 address)
259{
260 return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
261}
262
90static int eb_create(struct i915_execbuffer *eb) 263static int eb_create(struct i915_execbuffer *eb)
91{ 264{
92 if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) { 265 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
93 unsigned int size = 1 + ilog2(eb->args->buffer_count); 266 unsigned int size = 1 + ilog2(eb->buffer_count);
94 267
268 /*
269 * Without a 1:1 association between relocation handles and
270 * the execobject[] index, we instead create a hashtable.
271 * We size it dynamically based on available memory, starting
272 * first with 1:1 assocative hash and scaling back until
273 * the allocation succeeds.
274 *
275 * Later on we use a positive lut_size to indicate we are
276 * using this hashtable, and a negative value to indicate a
277 * direct lookup.
278 */
95 do { 279 do {
96 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 280 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
97 GFP_TEMPORARY | 281 GFP_TEMPORARY |
@@ -108,112 +292,411 @@ static int eb_create(struct i915_execbuffer *eb)
108 return -ENOMEM; 292 return -ENOMEM;
109 } 293 }
110 294
111 eb->lut_mask = size; 295 eb->lut_size = size;
112 } else { 296 } else {
113 eb->lut_mask = -eb->args->buffer_count; 297 eb->lut_size = -eb->buffer_count;
114 } 298 }
115 299
116 return 0; 300 return 0;
117} 301}
118 302
303static bool
304eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
305 const struct i915_vma *vma)
306{
307 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
308 return true;
309
310 if (vma->node.size < entry->pad_to_size)
311 return true;
312
313 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
314 return true;
315
316 if (entry->flags & EXEC_OBJECT_PINNED &&
317 vma->node.start != entry->offset)
318 return true;
319
320 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
321 vma->node.start < BATCH_OFFSET_BIAS)
322 return true;
323
324 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
325 (vma->node.start + vma->node.size - 1) >> 32)
326 return true;
327
328 return false;
329}
330
331static inline void
332eb_pin_vma(struct i915_execbuffer *eb,
333 struct drm_i915_gem_exec_object2 *entry,
334 struct i915_vma *vma)
335{
336 u64 flags;
337
338 flags = vma->node.start;
339 flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED;
340 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT))
341 flags |= PIN_GLOBAL;
342 if (unlikely(i915_vma_pin(vma, 0, 0, flags)))
343 return;
344
345 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
346 if (unlikely(i915_vma_get_fence(vma))) {
347 i915_vma_unpin(vma);
348 return;
349 }
350
351 if (i915_vma_pin_fence(vma))
352 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
353 }
354
355 entry->flags |= __EXEC_OBJECT_HAS_PIN;
356}
357
119static inline void 358static inline void
120__eb_unreserve_vma(struct i915_vma *vma, 359__eb_unreserve_vma(struct i915_vma *vma,
121 const struct drm_i915_gem_exec_object2 *entry) 360 const struct drm_i915_gem_exec_object2 *entry)
122{ 361{
362 GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
363
123 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) 364 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
124 i915_vma_unpin_fence(vma); 365 i915_vma_unpin_fence(vma);
125 366
126 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 367 __i915_vma_unpin(vma);
127 __i915_vma_unpin(vma);
128} 368}
129 369
130static void 370static inline void
131eb_unreserve_vma(struct i915_vma *vma) 371eb_unreserve_vma(struct i915_vma *vma,
372 struct drm_i915_gem_exec_object2 *entry)
132{ 373{
133 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 374 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
375 return;
134 376
135 __eb_unreserve_vma(vma, entry); 377 __eb_unreserve_vma(vma, entry);
136 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 378 entry->flags &= ~__EXEC_OBJECT_RESERVED;
137} 379}
138 380
139static void 381static int
140eb_reset(struct i915_execbuffer *eb) 382eb_validate_vma(struct i915_execbuffer *eb,
383 struct drm_i915_gem_exec_object2 *entry,
384 struct i915_vma *vma)
141{ 385{
142 struct i915_vma *vma; 386 if (unlikely(entry->flags & eb->invalid_flags))
387 return -EINVAL;
143 388
144 list_for_each_entry(vma, &eb->vmas, exec_link) { 389 if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
145 eb_unreserve_vma(vma); 390 return -EINVAL;
146 i915_vma_put(vma); 391
147 vma->exec_entry = NULL; 392 /*
393 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
394 * any non-page-aligned or non-canonical addresses.
395 */
396 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
397 entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
398 return -EINVAL;
399
400 /* pad_to_size was once a reserved field, so sanitize it */
401 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
402 if (unlikely(offset_in_page(entry->pad_to_size)))
403 return -EINVAL;
404 } else {
405 entry->pad_to_size = 0;
148 } 406 }
149 407
150 if (eb->lut_mask >= 0) 408 if (unlikely(vma->exec_entry)) {
151 memset(eb->buckets, 0, 409 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
152 sizeof(struct hlist_head) << eb->lut_mask); 410 entry->handle, (int)(entry - eb->exec));
411 return -EINVAL;
412 }
413
414 /*
415 * From drm_mm perspective address space is continuous,
416 * so from this point we're always using non-canonical
417 * form internally.
418 */
419 entry->offset = gen8_noncanonical_addr(entry->offset);
420
421 return 0;
153} 422}
154 423
155static bool 424static int
156eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) 425eb_add_vma(struct i915_execbuffer *eb,
426 struct drm_i915_gem_exec_object2 *entry,
427 struct i915_vma *vma)
157{ 428{
158 if (unlikely(vma->exec_entry)) { 429 int err;
159 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", 430
160 eb->exec[i].handle, i); 431 GEM_BUG_ON(i915_vma_is_closed(vma));
161 return false; 432
433 if (!(eb->args->flags & __EXEC_VALIDATED)) {
434 err = eb_validate_vma(eb, entry, vma);
435 if (unlikely(err))
436 return err;
162 } 437 }
163 list_add_tail(&vma->exec_link, &eb->vmas);
164 438
165 vma->exec_entry = &eb->exec[i]; 439 if (eb->lut_size >= 0) {
166 if (eb->lut_mask >= 0) { 440 vma->exec_handle = entry->handle;
167 vma->exec_handle = eb->exec[i].handle;
168 hlist_add_head(&vma->exec_node, 441 hlist_add_head(&vma->exec_node,
169 &eb->buckets[hash_32(vma->exec_handle, 442 &eb->buckets[hash_32(entry->handle,
170 eb->lut_mask)]); 443 eb->lut_size)]);
171 } 444 }
172 445
173 i915_vma_get(vma); 446 if (entry->relocation_count)
174 __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma; 447 list_add_tail(&vma->reloc_link, &eb->relocs);
175 return true; 448
449 if (!eb->reloc_cache.has_fence) {
450 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
451 } else {
452 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
453 eb->reloc_cache.needs_unfenced) &&
454 i915_gem_object_is_tiled(vma->obj))
455 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
456 }
457
458 if (!(entry->flags & EXEC_OBJECT_PINNED))
459 entry->flags |= eb->context_flags;
460
461 /*
462 * Stash a pointer from the vma to execobj, so we can query its flags,
463 * size, alignment etc as provided by the user. Also we stash a pointer
464 * to the vma inside the execobj so that we can use a direct lookup
465 * to find the right target VMA when doing relocations.
466 */
467 vma->exec_entry = entry;
468 __exec_to_vma(entry) = (uintptr_t)i915_vma_get(vma);
469
470 err = 0;
471 if (vma->node.size)
472 eb_pin_vma(eb, entry, vma);
473 if (eb_vma_misplaced(entry, vma)) {
474 eb_unreserve_vma(vma, entry);
475
476 list_add_tail(&vma->exec_link, &eb->unbound);
477 if (drm_mm_node_allocated(&vma->node))
478 err = i915_vma_unbind(vma);
479 } else {
480 if (entry->offset != vma->node.start) {
481 entry->offset = vma->node.start | UPDATE;
482 eb->args->flags |= __EXEC_HAS_RELOC;
483 }
484 }
485 return err;
486}
487
488static inline int use_cpu_reloc(const struct reloc_cache *cache,
489 const struct drm_i915_gem_object *obj)
490{
491 if (!i915_gem_object_has_struct_page(obj))
492 return false;
493
494 if (DBG_USE_CPU_RELOC)
495 return DBG_USE_CPU_RELOC > 0;
496
497 return (cache->has_llc ||
498 obj->cache_dirty ||
499 obj->cache_level != I915_CACHE_NONE);
500}
501
502static int eb_reserve_vma(const struct i915_execbuffer *eb,
503 struct i915_vma *vma)
504{
505 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
506 u64 flags;
507 int err;
508
509 flags = PIN_USER | PIN_NONBLOCK;
510 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
511 flags |= PIN_GLOBAL;
512
513 /*
514 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
515 * limit address to the first 4GBs for unflagged objects.
516 */
517 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
518 flags |= PIN_ZONE_4G;
519
520 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
521 flags |= PIN_MAPPABLE;
522
523 if (entry->flags & EXEC_OBJECT_PINNED) {
524 flags |= entry->offset | PIN_OFFSET_FIXED;
525 flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */
526 } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) {
527 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
528 }
529
530 err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags);
531 if (err)
532 return err;
533
534 if (entry->offset != vma->node.start) {
535 entry->offset = vma->node.start | UPDATE;
536 eb->args->flags |= __EXEC_HAS_RELOC;
537 }
538
539 entry->flags |= __EXEC_OBJECT_HAS_PIN;
540 GEM_BUG_ON(eb_vma_misplaced(entry, vma));
541
542 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
543 err = i915_vma_get_fence(vma);
544 if (unlikely(err)) {
545 i915_vma_unpin(vma);
546 return err;
547 }
548
549 if (i915_vma_pin_fence(vma))
550 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
551 }
552
553 return 0;
554}
555
556static int eb_reserve(struct i915_execbuffer *eb)
557{
558 const unsigned int count = eb->buffer_count;
559 struct list_head last;
560 struct i915_vma *vma;
561 unsigned int i, pass;
562 int err;
563
564 /*
565 * Attempt to pin all of the buffers into the GTT.
566 * This is done in 3 phases:
567 *
568 * 1a. Unbind all objects that do not match the GTT constraints for
569 * the execbuffer (fenceable, mappable, alignment etc).
570 * 1b. Increment pin count for already bound objects.
571 * 2. Bind new objects.
572 * 3. Decrement pin count.
573 *
574 * This avoid unnecessary unbinding of later objects in order to make
575 * room for the earlier objects *unless* we need to defragment.
576 */
577
578 pass = 0;
579 err = 0;
580 do {
581 list_for_each_entry(vma, &eb->unbound, exec_link) {
582 err = eb_reserve_vma(eb, vma);
583 if (err)
584 break;
585 }
586 if (err != -ENOSPC)
587 return err;
588
589 /* Resort *all* the objects into priority order */
590 INIT_LIST_HEAD(&eb->unbound);
591 INIT_LIST_HEAD(&last);
592 for (i = 0; i < count; i++) {
593 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
594
595 if (entry->flags & EXEC_OBJECT_PINNED &&
596 entry->flags & __EXEC_OBJECT_HAS_PIN)
597 continue;
598
599 vma = exec_to_vma(entry);
600 eb_unreserve_vma(vma, entry);
601
602 if (entry->flags & EXEC_OBJECT_PINNED)
603 list_add(&vma->exec_link, &eb->unbound);
604 else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
605 list_add_tail(&vma->exec_link, &eb->unbound);
606 else
607 list_add_tail(&vma->exec_link, &last);
608 }
609 list_splice_tail(&last, &eb->unbound);
610
611 switch (pass++) {
612 case 0:
613 break;
614
615 case 1:
616 /* Too fragmented, unbind everything and retry */
617 err = i915_gem_evict_vm(eb->vm);
618 if (err)
619 return err;
620 break;
621
622 default:
623 return -ENOSPC;
624 }
625 } while (1);
176} 626}
177 627
178static inline struct hlist_head * 628static inline struct hlist_head *
179ht_head(const struct i915_gem_context *ctx, u32 handle) 629ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle)
180{ 630{
181 return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)]; 631 return &lut->ht[hash_32(handle, lut->ht_bits)];
182} 632}
183 633
184static inline bool 634static inline bool
185ht_needs_resize(const struct i915_gem_context *ctx) 635ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
186{ 636{
187 return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size || 637 return (4*lut->ht_count > 3*lut->ht_size ||
188 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size); 638 4*lut->ht_count + 1 < lut->ht_size);
189} 639}
190 640
191static int 641static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
192eb_lookup_vmas(struct i915_execbuffer *eb) 642{
643 return eb->buffer_count - 1;
644}
645
646static int eb_select_context(struct i915_execbuffer *eb)
647{
648 struct i915_gem_context *ctx;
649
650 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
651 if (unlikely(IS_ERR(ctx)))
652 return PTR_ERR(ctx);
653
654 if (unlikely(i915_gem_context_is_banned(ctx))) {
655 DRM_DEBUG("Context %u tried to submit while banned\n",
656 ctx->user_handle);
657 return -EIO;
658 }
659
660 eb->ctx = i915_gem_context_get(ctx);
661 eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
662
663 eb->context_flags = 0;
664 if (ctx->flags & CONTEXT_NO_ZEROMAP)
665 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
666
667 return 0;
668}
669
670static int eb_lookup_vmas(struct i915_execbuffer *eb)
193{ 671{
194#define INTERMEDIATE BIT(0) 672#define INTERMEDIATE BIT(0)
195 const int count = eb->args->buffer_count; 673 const unsigned int count = eb->buffer_count;
674 struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
196 struct i915_vma *vma; 675 struct i915_vma *vma;
676 struct idr *idr;
677 unsigned int i;
197 int slow_pass = -1; 678 int slow_pass = -1;
198 int i; 679 int err;
199 680
200 INIT_LIST_HEAD(&eb->vmas); 681 INIT_LIST_HEAD(&eb->relocs);
682 INIT_LIST_HEAD(&eb->unbound);
201 683
202 if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS)) 684 if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
203 flush_work(&eb->ctx->vma_lut.resize); 685 flush_work(&lut->resize);
204 GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS); 686 GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
205 687
206 for (i = 0; i < count; i++) { 688 for (i = 0; i < count; i++) {
207 __exec_to_vma(&eb->exec[i]) = 0; 689 __exec_to_vma(&eb->exec[i]) = 0;
208 690
209 hlist_for_each_entry(vma, 691 hlist_for_each_entry(vma,
210 ht_head(eb->ctx, eb->exec[i].handle), 692 ht_head(lut, eb->exec[i].handle),
211 ctx_node) { 693 ctx_node) {
212 if (vma->ctx_handle != eb->exec[i].handle) 694 if (vma->ctx_handle != eb->exec[i].handle)
213 continue; 695 continue;
214 696
215 if (!eb_add_vma(eb, vma, i)) 697 err = eb_add_vma(eb, &eb->exec[i], vma);
216 return -EINVAL; 698 if (unlikely(err))
699 return err;
217 700
218 goto next_vma; 701 goto next_vma;
219 } 702 }
@@ -224,24 +707,27 @@ next_vma: ;
224 } 707 }
225 708
226 if (slow_pass < 0) 709 if (slow_pass < 0)
227 return 0; 710 goto out;
228 711
229 spin_lock(&eb->file->table_lock); 712 spin_lock(&eb->file->table_lock);
230 /* Grab a reference to the object and release the lock so we can lookup 713 /*
231 * or create the VMA without using GFP_ATOMIC */ 714 * Grab a reference to the object and release the lock so we can lookup
715 * or create the VMA without using GFP_ATOMIC
716 */
717 idr = &eb->file->object_idr;
232 for (i = slow_pass; i < count; i++) { 718 for (i = slow_pass; i < count; i++) {
233 struct drm_i915_gem_object *obj; 719 struct drm_i915_gem_object *obj;
234 720
235 if (__exec_to_vma(&eb->exec[i])) 721 if (__exec_to_vma(&eb->exec[i]))
236 continue; 722 continue;
237 723
238 obj = to_intel_bo(idr_find(&eb->file->object_idr, 724 obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
239 eb->exec[i].handle));
240 if (unlikely(!obj)) { 725 if (unlikely(!obj)) {
241 spin_unlock(&eb->file->table_lock); 726 spin_unlock(&eb->file->table_lock);
242 DRM_DEBUG("Invalid object handle %d at index %d\n", 727 DRM_DEBUG("Invalid object handle %d at index %d\n",
243 eb->exec[i].handle, i); 728 eb->exec[i].handle, i);
244 return -ENOENT; 729 err = -ENOENT;
730 goto err;
245 } 731 }
246 732
247 __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; 733 __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
@@ -251,7 +737,7 @@ next_vma: ;
251 for (i = slow_pass; i < count; i++) { 737 for (i = slow_pass; i < count; i++) {
252 struct drm_i915_gem_object *obj; 738 struct drm_i915_gem_object *obj;
253 739
254 if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0) 740 if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
255 continue; 741 continue;
256 742
257 /* 743 /*
@@ -262,12 +748,13 @@ next_vma: ;
262 * from the (obj, vm) we don't run the risk of creating 748 * from the (obj, vm) we don't run the risk of creating
263 * duplicated vmas for the same vm. 749 * duplicated vmas for the same vm.
264 */ 750 */
265 obj = u64_to_ptr(struct drm_i915_gem_object, 751 obj = u64_to_ptr(typeof(*obj),
266 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); 752 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
267 vma = i915_vma_instance(obj, eb->vm, NULL); 753 vma = i915_vma_instance(obj, eb->vm, NULL);
268 if (unlikely(IS_ERR(vma))) { 754 if (unlikely(IS_ERR(vma))) {
269 DRM_DEBUG("Failed to lookup VMA\n"); 755 DRM_DEBUG("Failed to lookup VMA\n");
270 return PTR_ERR(vma); 756 err = PTR_ERR(vma);
757 goto err;
271 } 758 }
272 759
273 /* First come, first served */ 760 /* First come, first served */
@@ -275,32 +762,31 @@ next_vma: ;
275 vma->ctx = eb->ctx; 762 vma->ctx = eb->ctx;
276 vma->ctx_handle = eb->exec[i].handle; 763 vma->ctx_handle = eb->exec[i].handle;
277 hlist_add_head(&vma->ctx_node, 764 hlist_add_head(&vma->ctx_node,
278 ht_head(eb->ctx, eb->exec[i].handle)); 765 ht_head(lut, eb->exec[i].handle));
279 eb->ctx->vma_lut.ht_count++; 766 lut->ht_count++;
767 lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
280 if (i915_vma_is_ggtt(vma)) { 768 if (i915_vma_is_ggtt(vma)) {
281 GEM_BUG_ON(obj->vma_hashed); 769 GEM_BUG_ON(obj->vma_hashed);
282 obj->vma_hashed = vma; 770 obj->vma_hashed = vma;
283 } 771 }
284 } 772 }
285 773
286 if (!eb_add_vma(eb, vma, i)) 774 err = eb_add_vma(eb, &eb->exec[i], vma);
287 return -EINVAL; 775 if (unlikely(err))
776 goto err;
288 } 777 }
289 778
290 if (ht_needs_resize(eb->ctx)) { 779 if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
291 eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS; 780 if (ht_needs_resize(lut))
292 queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize); 781 queue_work(system_highpri_wq, &lut->resize);
782 else
783 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
293 } 784 }
294 785
295 return 0; 786out:
296#undef INTERMEDIATE 787 /* take note of the batch buffer before we might reorder the lists */
297} 788 i = eb_batch_index(eb);
298 789 eb->batch = exec_to_vma(&eb->exec[i]);
299static struct i915_vma *
300eb_get_batch(struct i915_execbuffer *eb)
301{
302 struct i915_vma *vma =
303 exec_to_vma(&eb->exec[eb->args->buffer_count - 1]);
304 790
305 /* 791 /*
306 * SNA is doing fancy tricks with compressing batch buffers, which leads 792 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -311,24 +797,36 @@ eb_get_batch(struct i915_execbuffer *eb)
311 * Note that actual hangs have only been observed on gen7, but for 797 * Note that actual hangs have only been observed on gen7, but for
312 * paranoia do it everywhere. 798 * paranoia do it everywhere.
313 */ 799 */
314 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) 800 if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED))
315 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 801 eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS;
802 if (eb->reloc_cache.has_fence)
803 eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
316 804
317 return vma; 805 eb->args->flags |= __EXEC_VALIDATED;
806 return eb_reserve(eb);
807
808err:
809 for (i = slow_pass; i < count; i++) {
810 if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
811 __exec_to_vma(&eb->exec[i]) = 0;
812 }
813 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
814 return err;
815#undef INTERMEDIATE
318} 816}
319 817
320static struct i915_vma * 818static struct i915_vma *
321eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) 819eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
322{ 820{
323 if (eb->lut_mask < 0) { 821 if (eb->lut_size < 0) {
324 if (handle >= -eb->lut_mask) 822 if (handle >= -eb->lut_size)
325 return NULL; 823 return NULL;
326 return exec_to_vma(&eb->exec[handle]); 824 return exec_to_vma(&eb->exec[handle]);
327 } else { 825 } else {
328 struct hlist_head *head; 826 struct hlist_head *head;
329 struct i915_vma *vma; 827 struct i915_vma *vma;
330 828
331 head = &eb->buckets[hash_32(handle, eb->lut_mask)]; 829 head = &eb->buckets[hash_32(handle, eb->lut_size)];
332 hlist_for_each_entry(vma, head, exec_node) { 830 hlist_for_each_entry(vma, head, exec_node) {
333 if (vma->exec_handle == handle) 831 if (vma->exec_handle == handle)
334 return vma; 832 return vma;
@@ -337,61 +835,46 @@ eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
337 } 835 }
338} 836}
339 837
340static void eb_destroy(struct i915_execbuffer *eb) 838static void eb_release_vmas(const struct i915_execbuffer *eb)
341{ 839{
342 struct i915_vma *vma; 840 const unsigned int count = eb->buffer_count;
841 unsigned int i;
343 842
344 list_for_each_entry(vma, &eb->vmas, exec_link) { 843 for (i = 0; i < count; i++) {
345 if (!vma->exec_entry) 844 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
845 struct i915_vma *vma = exec_to_vma(entry);
846
847 if (!vma)
346 continue; 848 continue;
347 849
348 __eb_unreserve_vma(vma, vma->exec_entry); 850 GEM_BUG_ON(vma->exec_entry != entry);
349 vma->exec_entry = NULL; 851 vma->exec_entry = NULL;
350 i915_vma_put(vma);
351 }
352
353 i915_gem_context_put(eb->ctx);
354 852
355 if (eb->lut_mask >= 0) 853 eb_unreserve_vma(vma, entry);
356 kfree(eb->buckets);
357}
358
359static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
360{
361 if (!i915_gem_object_has_struct_page(obj))
362 return false;
363 854
364 if (DBG_USE_CPU_RELOC) 855 i915_vma_put(vma);
365 return DBG_USE_CPU_RELOC > 0; 856 }
366
367 return (HAS_LLC(to_i915(obj->base.dev)) ||
368 obj->cache_dirty ||
369 obj->cache_level != I915_CACHE_NONE);
370} 857}
371 858
372/* Used to convert any address to canonical form. 859static void eb_reset_vmas(const struct i915_execbuffer *eb)
373 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
374 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
375 * addresses to be in a canonical form:
376 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
377 * canonical form [63:48] == [47]."
378 */
379#define GEN8_HIGH_ADDRESS_BIT 47
380static inline uint64_t gen8_canonical_addr(uint64_t address)
381{ 860{
382 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); 861 eb_release_vmas(eb);
862 if (eb->lut_size >= 0)
863 memset(eb->buckets, 0,
864 sizeof(struct hlist_head) << eb->lut_size);
383} 865}
384 866
385static inline uint64_t gen8_noncanonical_addr(uint64_t address) 867static void eb_destroy(const struct i915_execbuffer *eb)
386{ 868{
387 return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); 869 if (eb->lut_size >= 0)
870 kfree(eb->buckets);
388} 871}
389 872
390static inline uint64_t 873static inline u64
391relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 874relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
392 uint64_t target_offset) 875 const struct i915_vma *target)
393{ 876{
394 return gen8_canonical_addr((int)reloc->delta + target_offset); 877 return gen8_canonical_addr((int)reloc->delta + target->node.start);
395} 878}
396 879
397static void reloc_cache_init(struct reloc_cache *cache, 880static void reloc_cache_init(struct reloc_cache *cache,
@@ -400,6 +883,9 @@ static void reloc_cache_init(struct reloc_cache *cache,
400 cache->page = -1; 883 cache->page = -1;
401 cache->vaddr = 0; 884 cache->vaddr = 0;
402 /* Must be a variable in the struct to allow GCC to unroll. */ 885 /* Must be a variable in the struct to allow GCC to unroll. */
886 cache->has_llc = HAS_LLC(i915);
887 cache->has_fence = INTEL_GEN(i915) < 4;
888 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
403 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 889 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
404 cache->node.allocated = false; 890 cache->node.allocated = false;
405} 891}
@@ -458,7 +944,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
458 944
459static void *reloc_kmap(struct drm_i915_gem_object *obj, 945static void *reloc_kmap(struct drm_i915_gem_object *obj,
460 struct reloc_cache *cache, 946 struct reloc_cache *cache,
461 int page) 947 unsigned long page)
462{ 948{
463 void *vaddr; 949 void *vaddr;
464 950
@@ -466,11 +952,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
466 kunmap_atomic(unmask_page(cache->vaddr)); 952 kunmap_atomic(unmask_page(cache->vaddr));
467 } else { 953 } else {
468 unsigned int flushes; 954 unsigned int flushes;
469 int ret; 955 int err;
470 956
471 ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); 957 err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
472 if (ret) 958 if (err)
473 return ERR_PTR(ret); 959 return ERR_PTR(err);
474 960
475 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 961 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
476 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 962 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
@@ -490,7 +976,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
490 976
491static void *reloc_iomap(struct drm_i915_gem_object *obj, 977static void *reloc_iomap(struct drm_i915_gem_object *obj,
492 struct reloc_cache *cache, 978 struct reloc_cache *cache,
493 int page) 979 unsigned long page)
494{ 980{
495 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 981 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
496 unsigned long offset; 982 unsigned long offset;
@@ -500,31 +986,31 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
500 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 986 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
501 } else { 987 } else {
502 struct i915_vma *vma; 988 struct i915_vma *vma;
503 int ret; 989 int err;
504 990
505 if (use_cpu_reloc(obj)) 991 if (use_cpu_reloc(cache, obj))
506 return NULL; 992 return NULL;
507 993
508 ret = i915_gem_object_set_to_gtt_domain(obj, true); 994 err = i915_gem_object_set_to_gtt_domain(obj, true);
509 if (ret) 995 if (err)
510 return ERR_PTR(ret); 996 return ERR_PTR(err);
511 997
512 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 998 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
513 PIN_MAPPABLE | PIN_NONBLOCK); 999 PIN_MAPPABLE | PIN_NONBLOCK);
514 if (IS_ERR(vma)) { 1000 if (IS_ERR(vma)) {
515 memset(&cache->node, 0, sizeof(cache->node)); 1001 memset(&cache->node, 0, sizeof(cache->node));
516 ret = drm_mm_insert_node_in_range 1002 err = drm_mm_insert_node_in_range
517 (&ggtt->base.mm, &cache->node, 1003 (&ggtt->base.mm, &cache->node,
518 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1004 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
519 0, ggtt->mappable_end, 1005 0, ggtt->mappable_end,
520 DRM_MM_INSERT_LOW); 1006 DRM_MM_INSERT_LOW);
521 if (ret) /* no inactive aperture space, use cpu reloc */ 1007 if (err) /* no inactive aperture space, use cpu reloc */
522 return NULL; 1008 return NULL;
523 } else { 1009 } else {
524 ret = i915_vma_put_fence(vma); 1010 err = i915_vma_put_fence(vma);
525 if (ret) { 1011 if (err) {
526 i915_vma_unpin(vma); 1012 i915_vma_unpin(vma);
527 return ERR_PTR(ret); 1013 return ERR_PTR(err);
528 } 1014 }
529 1015
530 cache->node.start = vma->node.start; 1016 cache->node.start = vma->node.start;
@@ -552,7 +1038,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
552 1038
553static void *reloc_vaddr(struct drm_i915_gem_object *obj, 1039static void *reloc_vaddr(struct drm_i915_gem_object *obj,
554 struct reloc_cache *cache, 1040 struct reloc_cache *cache,
555 int page) 1041 unsigned long page)
556{ 1042{
557 void *vaddr; 1043 void *vaddr;
558 1044
@@ -579,7 +1065,8 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
579 1065
580 *addr = value; 1066 *addr = value;
581 1067
582 /* Writes to the same cacheline are serialised by the CPU 1068 /*
1069 * Writes to the same cacheline are serialised by the CPU
583 * (including clflush). On the write path, we only require 1070 * (including clflush). On the write path, we only require
584 * that it hits memory in an orderly fashion and place 1071 * that it hits memory in an orderly fashion and place
585 * mb barriers at the start and end of the relocation phase 1072 * mb barriers at the start and end of the relocation phase
@@ -591,25 +1078,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
591 *addr = value; 1078 *addr = value;
592} 1079}
593 1080
594static int 1081static u64
595relocate_entry(struct drm_i915_gem_object *obj, 1082relocate_entry(struct i915_vma *vma,
596 const struct drm_i915_gem_relocation_entry *reloc, 1083 const struct drm_i915_gem_relocation_entry *reloc,
597 struct reloc_cache *cache, 1084 struct i915_execbuffer *eb,
598 u64 target_offset) 1085 const struct i915_vma *target)
599{ 1086{
1087 struct drm_i915_gem_object *obj = vma->obj;
600 u64 offset = reloc->offset; 1088 u64 offset = reloc->offset;
601 bool wide = cache->use_64bit_reloc; 1089 u64 target_offset = relocation_target(reloc, target);
1090 bool wide = eb->reloc_cache.use_64bit_reloc;
602 void *vaddr; 1091 void *vaddr;
603 1092
604 target_offset = relocation_target(reloc, target_offset);
605repeat: 1093repeat:
606 vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); 1094 vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
607 if (IS_ERR(vaddr)) 1095 if (IS_ERR(vaddr))
608 return PTR_ERR(vaddr); 1096 return PTR_ERR(vaddr);
609 1097
610 clflush_write32(vaddr + offset_in_page(offset), 1098 clflush_write32(vaddr + offset_in_page(offset),
611 lower_32_bits(target_offset), 1099 lower_32_bits(target_offset),
612 cache->vaddr); 1100 eb->reloc_cache.vaddr);
613 1101
614 if (wide) { 1102 if (wide) {
615 offset += sizeof(u32); 1103 offset += sizeof(u32);
@@ -618,17 +1106,16 @@ repeat:
618 goto repeat; 1106 goto repeat;
619 } 1107 }
620 1108
621 return 0; 1109 return target->node.start | UPDATE;
622} 1110}
623 1111
624static int 1112static u64
625eb_relocate_entry(struct i915_vma *vma, 1113eb_relocate_entry(struct i915_execbuffer *eb,
626 struct i915_execbuffer *eb, 1114 struct i915_vma *vma,
627 struct drm_i915_gem_relocation_entry *reloc) 1115 const struct drm_i915_gem_relocation_entry *reloc)
628{ 1116{
629 struct i915_vma *target; 1117 struct i915_vma *target;
630 u64 target_offset; 1118 int err;
631 int ret;
632 1119
633 /* we've already hold a reference to all valid objects */ 1120 /* we've already hold a reference to all valid objects */
634 target = eb_get_vma(eb, reloc->target_handle); 1121 target = eb_get_vma(eb, reloc->target_handle);
@@ -658,27 +1145,30 @@ eb_relocate_entry(struct i915_vma *vma,
658 return -EINVAL; 1145 return -EINVAL;
659 } 1146 }
660 1147
661 if (reloc->write_domain) 1148 if (reloc->write_domain) {
662 target->exec_entry->flags |= EXEC_OBJECT_WRITE; 1149 target->exec_entry->flags |= EXEC_OBJECT_WRITE;
663 1150
664 /* 1151 /*
665 * Sandybridge PPGTT errata: We need a global gtt mapping for MI and 1152 * Sandybridge PPGTT errata: We need a global gtt mapping
666 * pipe_control writes because the gpu doesn't properly redirect them 1153 * for MI and pipe_control writes because the gpu doesn't
667 * through the ppgtt for non_secure batchbuffers. 1154 * properly redirect them through the ppgtt for non_secure
668 */ 1155 * batchbuffers.
669 if (unlikely(IS_GEN6(eb->i915) && 1156 */
670 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { 1157 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
671 ret = i915_vma_bind(target, target->obj->cache_level, 1158 IS_GEN6(eb->i915)) {
672 PIN_GLOBAL); 1159 err = i915_vma_bind(target, target->obj->cache_level,
673 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) 1160 PIN_GLOBAL);
674 return ret; 1161 if (WARN_ONCE(err,
1162 "Unexpected failure to bind target VMA!"))
1163 return err;
1164 }
675 } 1165 }
676 1166
677 /* If the relocation already has the right value in it, no 1167 /*
1168 * If the relocation already has the right value in it, no
678 * more work needs to be done. 1169 * more work needs to be done.
679 */ 1170 */
680 target_offset = gen8_canonical_addr(target->node.start); 1171 if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
681 if (target_offset == reloc->presumed_offset)
682 return 0; 1172 return 0;
683 1173
684 /* Check that the relocation address is valid... */ 1174 /* Check that the relocation address is valid... */
@@ -709,35 +1199,39 @@ eb_relocate_entry(struct i915_vma *vma,
709 */ 1199 */
710 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; 1200 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
711 1201
712 ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset);
713 if (ret)
714 return ret;
715
716 /* and update the user's relocation entry */ 1202 /* and update the user's relocation entry */
717 reloc->presumed_offset = target_offset; 1203 return relocate_entry(vma, reloc, eb, target);
718 return 0;
719} 1204}
720 1205
721static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) 1206static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
722{ 1207{
723#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1208#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
724 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 1209 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
725 struct drm_i915_gem_relocation_entry __user *user_relocs; 1210 struct drm_i915_gem_relocation_entry __user *urelocs;
726 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1211 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
727 int remain, ret = 0; 1212 unsigned int remain;
728
729 user_relocs = u64_to_user_ptr(entry->relocs_ptr);
730 1213
1214 urelocs = u64_to_user_ptr(entry->relocs_ptr);
731 remain = entry->relocation_count; 1215 remain = entry->relocation_count;
732 while (remain) { 1216 if (unlikely(remain > N_RELOC(ULONG_MAX)))
733 struct drm_i915_gem_relocation_entry *r = stack_reloc; 1217 return -EINVAL;
734 unsigned long unwritten;
735 unsigned int count;
736 1218
737 count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); 1219 /*
738 remain -= count; 1220 * We must check that the entire relocation array is safe
1221 * to read. However, if the array is not writable the user loses
1222 * the updated relocation values.
1223 */
1224 if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(urelocs))))
1225 return -EFAULT;
1226
1227 do {
1228 struct drm_i915_gem_relocation_entry *r = stack;
1229 unsigned int count =
1230 min_t(unsigned int, remain, ARRAY_SIZE(stack));
1231 unsigned int copied;
739 1232
740 /* This is the fast path and we cannot handle a pagefault 1233 /*
1234 * This is the fast path and we cannot handle a pagefault
741 * whilst holding the struct mutex lest the user pass in the 1235 * whilst holding the struct mutex lest the user pass in the
742 * relocations contained within a mmaped bo. For in such a case 1236 * relocations contained within a mmaped bo. For in such a case
743 * we, the page fault handler would call i915_gem_fault() and 1237 * we, the page fault handler would call i915_gem_fault() and
@@ -745,409 +1239,357 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb)
745 * this is bad and so lockdep complains vehemently. 1239 * this is bad and so lockdep complains vehemently.
746 */ 1240 */
747 pagefault_disable(); 1241 pagefault_disable();
748 unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); 1242 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
749 pagefault_enable(); 1243 pagefault_enable();
750 if (unlikely(unwritten)) { 1244 if (unlikely(copied)) {
751 ret = -EFAULT; 1245 remain = -EFAULT;
752 goto out; 1246 goto out;
753 } 1247 }
754 1248
1249 remain -= count;
755 do { 1250 do {
756 u64 offset = r->presumed_offset; 1251 u64 offset = eb_relocate_entry(eb, vma, r);
757 1252
758 ret = eb_relocate_entry(vma, eb, r); 1253 if (likely(offset == 0)) {
759 if (ret) 1254 } else if ((s64)offset < 0) {
1255 remain = (int)offset;
760 goto out; 1256 goto out;
761 1257 } else {
762 if (r->presumed_offset != offset) { 1258 /*
763 pagefault_disable(); 1259 * Note that reporting an error now
764 unwritten = __put_user(r->presumed_offset, 1260 * leaves everything in an inconsistent
765 &user_relocs->presumed_offset); 1261 * state as we have *already* changed
766 pagefault_enable(); 1262 * the relocation value inside the
767 if (unlikely(unwritten)) { 1263 * object. As we have not changed the
768 /* Note that reporting an error now 1264 * reloc.presumed_offset or will not
769 * leaves everything in an inconsistent 1265 * change the execobject.offset, on the
770 * state as we have *already* changed 1266 * call we may not rewrite the value
771 * the relocation value inside the 1267 * inside the object, leaving it
772 * object. As we have not changed the 1268 * dangling and causing a GPU hang. Unless
773 * reloc.presumed_offset or will not 1269 * userspace dynamically rebuilds the
774 * change the execobject.offset, on the 1270 * relocations on each execbuf rather than
775 * call we may not rewrite the value 1271 * presume a static tree.
776 * inside the object, leaving it 1272 *
777 * dangling and causing a GPU hang. 1273 * We did previously check if the relocations
778 */ 1274 * were writable (access_ok), an error now
779 ret = -EFAULT; 1275 * would be a strange race with mprotect,
780 goto out; 1276 * having already demonstrated that we
781 } 1277 * can read from this userspace address.
1278 */
1279 offset = gen8_canonical_addr(offset & ~UPDATE);
1280 __put_user(offset,
1281 &urelocs[r-stack].presumed_offset);
782 } 1282 }
783 1283 } while (r++, --count);
784 user_relocs++; 1284 urelocs += ARRAY_SIZE(stack);
785 r++; 1285 } while (remain);
786 } while (--count);
787 }
788
789out: 1286out:
790 reloc_cache_reset(&eb->reloc_cache); 1287 reloc_cache_reset(&eb->reloc_cache);
791 return ret; 1288 return remain;
792#undef N_RELOC
793} 1289}
794 1290
795static int 1291static int
796eb_relocate_vma_slow(struct i915_vma *vma, 1292eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
797 struct i915_execbuffer *eb,
798 struct drm_i915_gem_relocation_entry *relocs)
799{ 1293{
800 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1294 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
801 int i, ret = 0; 1295 struct drm_i915_gem_relocation_entry *relocs =
1296 u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1297 unsigned int i;
1298 int err;
802 1299
803 for (i = 0; i < entry->relocation_count; i++) { 1300 for (i = 0; i < entry->relocation_count; i++) {
804 ret = eb_relocate_entry(vma, eb, &relocs[i]); 1301 u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
805 if (ret) 1302
806 break; 1303 if ((s64)offset < 0) {
1304 err = (int)offset;
1305 goto err;
1306 }
807 } 1307 }
1308 err = 0;
1309err:
808 reloc_cache_reset(&eb->reloc_cache); 1310 reloc_cache_reset(&eb->reloc_cache);
809 return ret; 1311 return err;
810} 1312}
811 1313
812static int eb_relocate(struct i915_execbuffer *eb) 1314static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
813{ 1315{
814 struct i915_vma *vma; 1316 const char __user *addr, *end;
815 int ret = 0; 1317 unsigned long size;
1318 char __maybe_unused c;
816 1319
817 list_for_each_entry(vma, &eb->vmas, exec_link) { 1320 size = entry->relocation_count;
818 ret = eb_relocate_vma(vma, eb); 1321 if (size == 0)
819 if (ret) 1322 return 0;
820 break;
821 }
822 1323
823 return ret; 1324 if (size > N_RELOC(ULONG_MAX))
824} 1325 return -EINVAL;
825 1326
826static bool only_mappable_for_reloc(unsigned int flags) 1327 addr = u64_to_user_ptr(entry->relocs_ptr);
827{ 1328 size *= sizeof(struct drm_i915_gem_relocation_entry);
828 return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == 1329 if (!access_ok(VERIFY_READ, addr, size))
829 __EXEC_OBJECT_NEEDS_MAP; 1330 return -EFAULT;
1331
1332 end = addr + size;
1333 for (; addr < end; addr += PAGE_SIZE) {
1334 int err = __get_user(c, addr);
1335 if (err)
1336 return err;
1337 }
1338 return __get_user(c, end - 1);
830} 1339}
831 1340
832static int 1341static int eb_copy_relocations(const struct i915_execbuffer *eb)
833eb_reserve_vma(struct i915_vma *vma,
834 struct intel_engine_cs *engine,
835 bool *need_reloc)
836{ 1342{
837 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1343 const unsigned int count = eb->buffer_count;
838 uint64_t flags; 1344 unsigned int i;
839 int ret; 1345 int err;
840
841 flags = PIN_USER;
842 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
843 flags |= PIN_GLOBAL;
844
845 if (!drm_mm_node_allocated(&vma->node)) {
846 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
847 * limit address to the first 4GBs for unflagged objects.
848 */
849 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
850 flags |= PIN_ZONE_4G;
851 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
852 flags |= PIN_GLOBAL | PIN_MAPPABLE;
853 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
854 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
855 if (entry->flags & EXEC_OBJECT_PINNED)
856 flags |= entry->offset | PIN_OFFSET_FIXED;
857 if ((flags & PIN_MAPPABLE) == 0)
858 flags |= PIN_HIGH;
859 }
860
861 ret = i915_vma_pin(vma,
862 entry->pad_to_size,
863 entry->alignment,
864 flags);
865 if ((ret == -ENOSPC || ret == -E2BIG) &&
866 only_mappable_for_reloc(entry->flags))
867 ret = i915_vma_pin(vma,
868 entry->pad_to_size,
869 entry->alignment,
870 flags & ~PIN_MAPPABLE);
871 if (ret)
872 return ret;
873 1346
874 entry->flags |= __EXEC_OBJECT_HAS_PIN; 1347 for (i = 0; i < count; i++) {
1348 const unsigned int nreloc = eb->exec[i].relocation_count;
1349 struct drm_i915_gem_relocation_entry __user *urelocs;
1350 struct drm_i915_gem_relocation_entry *relocs;
1351 unsigned long size;
1352 unsigned long copied;
875 1353
876 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 1354 if (nreloc == 0)
877 ret = i915_vma_get_fence(vma); 1355 continue;
878 if (ret)
879 return ret;
880 1356
881 if (i915_vma_pin_fence(vma)) 1357 err = check_relocations(&eb->exec[i]);
882 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 1358 if (err)
883 } 1359 goto err;
884 1360
885 if (entry->offset != vma->node.start) { 1361 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
886 entry->offset = vma->node.start; 1362 size = nreloc * sizeof(*relocs);
887 *need_reloc = true;
888 }
889 1363
890 return 0; 1364 relocs = kvmalloc_array(size, 1, GFP_TEMPORARY);
891} 1365 if (!relocs) {
1366 kvfree(relocs);
1367 err = -ENOMEM;
1368 goto err;
1369 }
892 1370
893static bool 1371 /* copy_from_user is limited to < 4GiB */
894need_reloc_mappable(struct i915_vma *vma) 1372 copied = 0;
895{ 1373 do {
896 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1374 unsigned int len =
1375 min_t(u64, BIT_ULL(31), size - copied);
1376
1377 if (__copy_from_user((char *)relocs + copied,
1378 (char *)urelocs + copied,
1379 len)) {
1380 kvfree(relocs);
1381 err = -EFAULT;
1382 goto err;
1383 }
897 1384
898 if (entry->relocation_count == 0) 1385 copied += len;
899 return false; 1386 } while (copied < size);
900 1387
901 if (!i915_vma_is_ggtt(vma)) 1388 /*
902 return false; 1389 * As we do not update the known relocation offsets after
1390 * relocating (due to the complexities in lock handling),
1391 * we need to mark them as invalid now so that we force the
1392 * relocation processing next time. Just in case the target
1393 * object is evicted and then rebound into its old
1394 * presumed_offset before the next execbuffer - if that
1395 * happened we would make the mistake of assuming that the
1396 * relocations were valid.
1397 */
1398 user_access_begin();
1399 for (copied = 0; copied < nreloc; copied++)
1400 unsafe_put_user(-1,
1401 &urelocs[copied].presumed_offset,
1402 end_user);
1403end_user:
1404 user_access_end();
903 1405
904 /* See also use_cpu_reloc() */ 1406 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
905 if (HAS_LLC(to_i915(vma->obj->base.dev))) 1407 }
906 return false;
907 1408
908 if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) 1409 return 0;
909 return false;
910 1410
911 return true; 1411err:
1412 while (i--) {
1413 struct drm_i915_gem_relocation_entry *relocs =
1414 u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1415 if (eb->exec[i].relocation_count)
1416 kvfree(relocs);
1417 }
1418 return err;
912} 1419}
913 1420
914static bool 1421static int eb_prefault_relocations(const struct i915_execbuffer *eb)
915eb_vma_misplaced(struct i915_vma *vma)
916{ 1422{
917 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1423 const unsigned int count = eb->buffer_count;
918 1424 unsigned int i;
919 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
920 !i915_vma_is_ggtt(vma));
921
922 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
923 return true;
924 1425
925 if (vma->node.size < entry->pad_to_size) 1426 if (unlikely(i915.prefault_disable))
926 return true; 1427 return 0;
927
928 if (entry->flags & EXEC_OBJECT_PINNED &&
929 vma->node.start != entry->offset)
930 return true;
931
932 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
933 vma->node.start < BATCH_OFFSET_BIAS)
934 return true;
935 1428
936 /* avoid costly ping-pong once a batch bo ended up non-mappable */ 1429 for (i = 0; i < count; i++) {
937 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && 1430 int err;
938 !i915_vma_is_map_and_fenceable(vma))
939 return !only_mappable_for_reloc(entry->flags);
940 1431
941 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && 1432 err = check_relocations(&eb->exec[i]);
942 (vma->node.start + vma->node.size - 1) >> 32) 1433 if (err)
943 return true; 1434 return err;
1435 }
944 1436
945 return false; 1437 return 0;
946} 1438}
947 1439
948static int eb_reserve(struct i915_execbuffer *eb) 1440static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
949{ 1441{
950 const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; 1442 struct drm_device *dev = &eb->i915->drm;
951 const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; 1443 bool have_copy = false;
952 struct i915_vma *vma; 1444 struct i915_vma *vma;
953 struct list_head ordered_vmas; 1445 int err = 0;
954 struct list_head pinned_vmas; 1446
955 int retry; 1447repeat:
956 1448 if (signal_pending(current)) {
957 INIT_LIST_HEAD(&ordered_vmas); 1449 err = -ERESTARTSYS;
958 INIT_LIST_HEAD(&pinned_vmas); 1450 goto out;
959 while (!list_empty(&eb->vmas)) { 1451 }
960 struct drm_i915_gem_exec_object2 *entry; 1452
961 bool need_fence, need_mappable; 1453 /* We may process another execbuffer during the unlock... */
962 1454 eb_reset_vmas(eb);
963 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); 1455 mutex_unlock(&dev->struct_mutex);
964 entry = vma->exec_entry; 1456
965 1457 /*
966 if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) 1458 * We take 3 passes through the slowpatch.
967 entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
968
969 if (!has_fenced_gpu_access)
970 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
971 need_fence =
972 (entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
973 needs_unfenced_map) &&
974 i915_gem_object_is_tiled(vma->obj);
975 need_mappable = need_fence || need_reloc_mappable(vma);
976
977 if (entry->flags & EXEC_OBJECT_PINNED)
978 list_move_tail(&vma->exec_link, &pinned_vmas);
979 else if (need_mappable) {
980 entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
981 list_move(&vma->exec_link, &ordered_vmas);
982 } else
983 list_move_tail(&vma->exec_link, &ordered_vmas);
984 }
985 list_splice(&ordered_vmas, &eb->vmas);
986 list_splice(&pinned_vmas, &eb->vmas);
987
988 /* Attempt to pin all of the buffers into the GTT.
989 * This is done in 3 phases:
990 * 1459 *
991 * 1a. Unbind all objects that do not match the GTT constraints for 1460 * 1 - we try to just prefault all the user relocation entries and
992 * the execbuffer (fenceable, mappable, alignment etc). 1461 * then attempt to reuse the atomic pagefault disabled fast path again.
993 * 1b. Increment pin count for already bound objects.
994 * 2. Bind new objects.
995 * 3. Decrement pin count.
996 * 1462 *
997 * This avoid unnecessary unbinding of later objects in order to make 1463 * 2 - we copy the user entries to a local buffer here outside of the
998 * room for the earlier objects *unless* we need to defragment. 1464 * local and allow ourselves to wait upon any rendering before
1465 * relocations
1466 *
1467 * 3 - we already have a local copy of the relocation entries, but
1468 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
999 */ 1469 */
1000 retry = 0; 1470 if (!err) {
1001 do { 1471 err = eb_prefault_relocations(eb);
1002 int ret = 0; 1472 } else if (!have_copy) {
1003 1473 err = eb_copy_relocations(eb);
1004 /* Unbind any ill-fitting objects or pin. */ 1474 have_copy = err == 0;
1005 list_for_each_entry(vma, &eb->vmas, exec_link) { 1475 } else {
1006 if (!drm_mm_node_allocated(&vma->node)) 1476 cond_resched();
1007 continue; 1477 err = 0;
1478 }
1479 if (err) {
1480 mutex_lock(&dev->struct_mutex);
1481 goto out;
1482 }
1008 1483
1009 if (eb_vma_misplaced(vma)) 1484 err = i915_mutex_lock_interruptible(dev);
1010 ret = i915_vma_unbind(vma); 1485 if (err) {
1011 else 1486 mutex_lock(&dev->struct_mutex);
1012 ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); 1487 goto out;
1013 if (ret) 1488 }
1014 goto err;
1015 }
1016 1489
1017 /* Bind fresh objects */ 1490 /* reacquire the objects */
1018 list_for_each_entry(vma, &eb->vmas, exec_link) { 1491 err = eb_lookup_vmas(eb);
1019 if (drm_mm_node_allocated(&vma->node)) 1492 if (err)
1020 continue; 1493 goto err;
1021 1494
1022 ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); 1495 list_for_each_entry(vma, &eb->relocs, reloc_link) {
1023 if (ret) 1496 if (!have_copy) {
1497 pagefault_disable();
1498 err = eb_relocate_vma(eb, vma);
1499 pagefault_enable();
1500 if (err)
1501 goto repeat;
1502 } else {
1503 err = eb_relocate_vma_slow(eb, vma);
1504 if (err)
1024 goto err; 1505 goto err;
1025 } 1506 }
1507 }
1026 1508
1027err: 1509 /*
1028 if (ret != -ENOSPC || retry++) 1510 * Leave the user relocations as are, this is the painfully slow path,
1029 return ret; 1511 * and we want to avoid the complication of dropping the lock whilst
1030 1512 * having buffers reserved in the aperture and so causing spurious
1031 /* Decrement pin count for bound objects */ 1513 * ENOSPC for random operations.
1032 list_for_each_entry(vma, &eb->vmas, exec_link) 1514 */
1033 eb_unreserve_vma(vma);
1034 1515
1035 ret = i915_gem_evict_vm(eb->vm, true); 1516err:
1036 if (ret) 1517 if (err == -EAGAIN)
1037 return ret; 1518 goto repeat;
1038 } while (1);
1039}
1040 1519
1041static int 1520out:
1042eb_relocate_slow(struct i915_execbuffer *eb) 1521 if (have_copy) {
1043{ 1522 const unsigned int count = eb->buffer_count;
1044 const unsigned int count = eb->args->buffer_count; 1523 unsigned int i;
1045 struct drm_device *dev = &eb->i915->drm;
1046 struct drm_i915_gem_relocation_entry *reloc;
1047 struct i915_vma *vma;
1048 int *reloc_offset;
1049 int i, total, ret;
1050 1524
1051 /* We may process another execbuffer during the unlock... */ 1525 for (i = 0; i < count; i++) {
1052 eb_reset(eb); 1526 const struct drm_i915_gem_exec_object2 *entry =
1053 mutex_unlock(&dev->struct_mutex); 1527 &eb->exec[i];
1528 struct drm_i915_gem_relocation_entry *relocs;
1054 1529
1055 total = 0; 1530 if (!entry->relocation_count)
1056 for (i = 0; i < count; i++) 1531 continue;
1057 total += eb->exec[i].relocation_count;
1058 1532
1059 reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL); 1533 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1060 reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL); 1534 kvfree(relocs);
1061 if (reloc == NULL || reloc_offset == NULL) { 1535 }
1062 kvfree(reloc);
1063 kvfree(reloc_offset);
1064 mutex_lock(&dev->struct_mutex);
1065 return -ENOMEM;
1066 } 1536 }
1067 1537
1068 total = 0; 1538 return err ?: have_copy;
1069 for (i = 0; i < count; i++) { 1539}
1070 struct drm_i915_gem_relocation_entry __user *user_relocs;
1071 u64 invalid_offset = (u64)-1;
1072 int j;
1073 1540
1074 user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1541static int eb_relocate(struct i915_execbuffer *eb)
1542{
1543 if (eb_lookup_vmas(eb))
1544 goto slow;
1075 1545
1076 if (copy_from_user(reloc+total, user_relocs, 1546 /* The objects are in their final locations, apply the relocations. */
1077 eb->exec[i].relocation_count * sizeof(*reloc))) { 1547 if (eb->args->flags & __EXEC_HAS_RELOC) {
1078 ret = -EFAULT; 1548 struct i915_vma *vma;
1079 mutex_lock(&dev->struct_mutex);
1080 goto err;
1081 }
1082 1549
1083 /* As we do not update the known relocation offsets after 1550 list_for_each_entry(vma, &eb->relocs, reloc_link) {
1084 * relocating (due to the complexities in lock handling), 1551 if (eb_relocate_vma(eb, vma))
1085 * we need to mark them as invalid now so that we force the 1552 goto slow;
1086 * relocation processing next time. Just in case the target
1087 * object is evicted and then rebound into its old
1088 * presumed_offset before the next execbuffer - if that
1089 * happened we would make the mistake of assuming that the
1090 * relocations were valid.
1091 */
1092 for (j = 0; j < eb->exec[i].relocation_count; j++) {
1093 if (__copy_to_user(&user_relocs[j].presumed_offset,
1094 &invalid_offset,
1095 sizeof(invalid_offset))) {
1096 ret = -EFAULT;
1097 mutex_lock(&dev->struct_mutex);
1098 goto err;
1099 }
1100 } 1553 }
1101
1102 reloc_offset[i] = total;
1103 total += eb->exec[i].relocation_count;
1104 } 1554 }
1105 1555
1106 ret = i915_mutex_lock_interruptible(dev); 1556 return 0;
1107 if (ret) {
1108 mutex_lock(&dev->struct_mutex);
1109 goto err;
1110 }
1111
1112 /* reacquire the objects */
1113 ret = eb_lookup_vmas(eb);
1114 if (ret)
1115 goto err;
1116
1117 ret = eb_reserve(eb);
1118 if (ret)
1119 goto err;
1120 1557
1121 list_for_each_entry(vma, &eb->vmas, exec_link) { 1558slow:
1122 int idx = vma->exec_entry - eb->exec; 1559 return eb_relocate_slow(eb);
1560}
1123 1561
1124 ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); 1562static void eb_export_fence(struct drm_i915_gem_object *obj,
1125 if (ret) 1563 struct drm_i915_gem_request *req,
1126 goto err; 1564 unsigned int flags)
1127 } 1565{
1566 struct reservation_object *resv = obj->resv;
1128 1567
1129 /* Leave the user relocations as are, this is the painfully slow path, 1568 /*
1130 * and we want to avoid the complication of dropping the lock whilst 1569 * Ignore errors from failing to allocate the new fence, we can't
1131 * having buffers reserved in the aperture and so causing spurious 1570 * handle an error right now. Worst case should be missed
1132 * ENOSPC for random operations. 1571 * synchronisation leading to rendering corruption.
1133 */ 1572 */
1134 1573 reservation_object_lock(resv, NULL);
1135err: 1574 if (flags & EXEC_OBJECT_WRITE)
1136 kvfree(reloc); 1575 reservation_object_add_excl_fence(resv, &req->fence);
1137 kvfree(reloc_offset); 1576 else if (reservation_object_reserve_shared(resv) == 0)
1138 return ret; 1577 reservation_object_add_shared_fence(resv, &req->fence);
1578 reservation_object_unlock(resv);
1139} 1579}
1140 1580
1141static int 1581static int eb_move_to_gpu(struct i915_execbuffer *eb)
1142eb_move_to_gpu(struct i915_execbuffer *eb)
1143{ 1582{
1144 struct i915_vma *vma; 1583 const unsigned int count = eb->buffer_count;
1145 int ret; 1584 unsigned int i;
1585 int err;
1146 1586
1147 list_for_each_entry(vma, &eb->vmas, exec_link) { 1587 for (i = 0; i < count; i++) {
1588 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1589 struct i915_vma *vma = exec_to_vma(entry);
1148 struct drm_i915_gem_object *obj = vma->obj; 1590 struct drm_i915_gem_object *obj = vma->obj;
1149 1591
1150 if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { 1592 if (entry->flags & EXEC_OBJECT_CAPTURE) {
1151 struct i915_gem_capture_list *capture; 1593 struct i915_gem_capture_list *capture;
1152 1594
1153 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 1595 capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1159,18 +1601,32 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
1159 eb->request->capture_list = capture; 1601 eb->request->capture_list = capture;
1160 } 1602 }
1161 1603
1162 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) 1604 if (entry->flags & EXEC_OBJECT_ASYNC)
1163 continue; 1605 goto skip_flushes;
1164 1606
1165 if (unlikely(obj->cache_dirty && !obj->cache_coherent)) 1607 if (unlikely(obj->cache_dirty && !obj->cache_coherent))
1166 i915_gem_clflush_object(obj, 0); 1608 i915_gem_clflush_object(obj, 0);
1167 1609
1168 ret = i915_gem_request_await_object 1610 err = i915_gem_request_await_object
1169 (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE); 1611 (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
1170 if (ret) 1612 if (err)
1171 return ret; 1613 return err;
1614
1615skip_flushes:
1616 i915_vma_move_to_active(vma, eb->request, entry->flags);
1617 __eb_unreserve_vma(vma, entry);
1618 vma->exec_entry = NULL;
1172 } 1619 }
1173 1620
1621 for (i = 0; i < count; i++) {
1622 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1623 struct i915_vma *vma = exec_to_vma(entry);
1624
1625 eb_export_fence(vma->obj, eb->request, entry->flags);
1626 i915_vma_put(vma);
1627 }
1628 eb->exec = NULL;
1629
1174 /* Unconditionally flush any chipset caches (for streaming writes). */ 1630 /* Unconditionally flush any chipset caches (for streaming writes). */
1175 i915_gem_chipset_flush(eb->i915); 1631 i915_gem_chipset_flush(eb->i915);
1176 1632
@@ -1178,8 +1634,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
1178 return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); 1634 return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
1179} 1635}
1180 1636
1181static bool 1637static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1182i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1183{ 1638{
1184 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 1639 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1185 return false; 1640 return false;
@@ -1201,103 +1656,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1201 return true; 1656 return true;
1202} 1657}
1203 1658
1204static int
1205validate_exec_list(struct drm_device *dev,
1206 struct drm_i915_gem_exec_object2 *exec,
1207 int count)
1208{
1209 unsigned relocs_total = 0;
1210 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1211 unsigned invalid_flags;
1212 int i;
1213
1214 /* INTERNAL flags must not overlap with external ones */
1215 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1216
1217 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1218 if (USES_FULL_PPGTT(dev))
1219 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1220
1221 for (i = 0; i < count; i++) {
1222 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1223 int length; /* limited by fault_in_pages_readable() */
1224
1225 if (exec[i].flags & invalid_flags)
1226 return -EINVAL;
1227
1228 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1229 * any non-page-aligned or non-canonical addresses.
1230 */
1231 if (exec[i].flags & EXEC_OBJECT_PINNED) {
1232 if (exec[i].offset !=
1233 gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1234 return -EINVAL;
1235 }
1236
1237 /* From drm_mm perspective address space is continuous,
1238 * so from this point we're always using non-canonical
1239 * form internally.
1240 */
1241 exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1242
1243 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1244 return -EINVAL;
1245
1246 /* pad_to_size was once a reserved field, so sanitize it */
1247 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1248 if (offset_in_page(exec[i].pad_to_size))
1249 return -EINVAL;
1250 } else {
1251 exec[i].pad_to_size = 0;
1252 }
1253
1254 /* First check for malicious input causing overflow in
1255 * the worst case where we need to allocate the entire
1256 * relocation tree as a single array.
1257 */
1258 if (exec[i].relocation_count > relocs_max - relocs_total)
1259 return -EINVAL;
1260 relocs_total += exec[i].relocation_count;
1261
1262 length = exec[i].relocation_count *
1263 sizeof(struct drm_i915_gem_relocation_entry);
1264 /*
1265 * We must check that the entire relocation array is safe
1266 * to read, but since we may need to update the presumed
1267 * offsets during execution, check for full write access.
1268 */
1269 if (!access_ok(VERIFY_WRITE, ptr, length))
1270 return -EFAULT;
1271
1272 if (likely(!i915.prefault_disable)) {
1273 if (fault_in_pages_readable(ptr, length))
1274 return -EFAULT;
1275 }
1276 }
1277
1278 return 0;
1279}
1280
1281static int eb_select_context(struct i915_execbuffer *eb)
1282{
1283 unsigned int ctx_id = i915_execbuffer2_get_context_id(*eb->args);
1284 struct i915_gem_context *ctx;
1285
1286 ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id);
1287 if (unlikely(IS_ERR(ctx)))
1288 return PTR_ERR(ctx);
1289
1290 if (unlikely(i915_gem_context_is_banned(ctx))) {
1291 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1292 return -EIO;
1293 }
1294
1295 eb->ctx = i915_gem_context_get(ctx);
1296 eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
1297
1298 return 0;
1299}
1300
1301void i915_vma_move_to_active(struct i915_vma *vma, 1659void i915_vma_move_to_active(struct i915_vma *vma,
1302 struct drm_i915_gem_request *req, 1660 struct drm_i915_gem_request *req,
1303 unsigned int flags) 1661 unsigned int flags)
@@ -1308,7 +1666,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1308 lockdep_assert_held(&req->i915->drm.struct_mutex); 1666 lockdep_assert_held(&req->i915->drm.struct_mutex);
1309 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 1667 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1310 1668
1311 /* Add a reference if we're newly entering the active list. 1669 /*
1670 * Add a reference if we're newly entering the active list.
1312 * The order in which we add operations to the retirement queue is 1671 * The order in which we add operations to the retirement queue is
1313 * vital here: mark_active adds to the start of the callback list, 1672 * vital here: mark_active adds to the start of the callback list,
1314 * such that subsequent callbacks are called first. Therefore we 1673 * such that subsequent callbacks are called first. Therefore we
@@ -1336,44 +1695,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1336 i915_gem_active_set(&vma->last_fence, req); 1695 i915_gem_active_set(&vma->last_fence, req);
1337} 1696}
1338 1697
1339static void eb_export_fence(struct drm_i915_gem_object *obj, 1698static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1340 struct drm_i915_gem_request *req,
1341 unsigned int flags)
1342{
1343 struct reservation_object *resv = obj->resv;
1344
1345 /* Ignore errors from failing to allocate the new fence, we can't
1346 * handle an error right now. Worst case should be missed
1347 * synchronisation leading to rendering corruption.
1348 */
1349 reservation_object_lock(resv, NULL);
1350 if (flags & EXEC_OBJECT_WRITE)
1351 reservation_object_add_excl_fence(resv, &req->fence);
1352 else if (reservation_object_reserve_shared(resv) == 0)
1353 reservation_object_add_shared_fence(resv, &req->fence);
1354 reservation_object_unlock(resv);
1355}
1356
1357static void
1358eb_move_to_active(struct i915_execbuffer *eb)
1359{
1360 struct i915_vma *vma;
1361
1362 list_for_each_entry(vma, &eb->vmas, exec_link) {
1363 struct drm_i915_gem_object *obj = vma->obj;
1364
1365 obj->base.write_domain = 0;
1366 if (vma->exec_entry->flags & EXEC_OBJECT_WRITE)
1367 obj->base.read_domains = 0;
1368 obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
1369
1370 i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags);
1371 eb_export_fence(obj, eb->request, vma->exec_entry->flags);
1372 }
1373}
1374
1375static int
1376i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1377{ 1699{
1378 u32 *cs; 1700 u32 *cs;
1379 int i; 1701 int i;
@@ -1383,16 +1705,16 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1383 return -EINVAL; 1705 return -EINVAL;
1384 } 1706 }
1385 1707
1386 cs = intel_ring_begin(req, 4 * 3); 1708 cs = intel_ring_begin(req, 4 * 2 + 2);
1387 if (IS_ERR(cs)) 1709 if (IS_ERR(cs))
1388 return PTR_ERR(cs); 1710 return PTR_ERR(cs);
1389 1711
1712 *cs++ = MI_LOAD_REGISTER_IMM(4);
1390 for (i = 0; i < 4; i++) { 1713 for (i = 0; i < 4; i++) {
1391 *cs++ = MI_LOAD_REGISTER_IMM(1);
1392 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 1714 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1393 *cs++ = 0; 1715 *cs++ = 0;
1394 } 1716 }
1395 1717 *cs++ = MI_NOOP;
1396 intel_ring_advance(req, cs); 1718 intel_ring_advance(req, cs);
1397 1719
1398 return 0; 1720 return 0;
@@ -1402,24 +1724,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1402{ 1724{
1403 struct drm_i915_gem_object *shadow_batch_obj; 1725 struct drm_i915_gem_object *shadow_batch_obj;
1404 struct i915_vma *vma; 1726 struct i915_vma *vma;
1405 int ret; 1727 int err;
1406 1728
1407 shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, 1729 shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
1408 PAGE_ALIGN(eb->batch_len)); 1730 PAGE_ALIGN(eb->batch_len));
1409 if (IS_ERR(shadow_batch_obj)) 1731 if (IS_ERR(shadow_batch_obj))
1410 return ERR_CAST(shadow_batch_obj); 1732 return ERR_CAST(shadow_batch_obj);
1411 1733
1412 ret = intel_engine_cmd_parser(eb->engine, 1734 err = intel_engine_cmd_parser(eb->engine,
1413 eb->batch->obj, 1735 eb->batch->obj,
1414 shadow_batch_obj, 1736 shadow_batch_obj,
1415 eb->batch_start_offset, 1737 eb->batch_start_offset,
1416 eb->batch_len, 1738 eb->batch_len,
1417 is_master); 1739 is_master);
1418 if (ret) { 1740 if (err) {
1419 if (ret == -EACCES) /* unhandled chained batch */ 1741 if (err == -EACCES) /* unhandled chained batch */
1420 vma = NULL; 1742 vma = NULL;
1421 else 1743 else
1422 vma = ERR_PTR(ret); 1744 vma = ERR_PTR(err);
1423 goto out; 1745 goto out;
1424 } 1746 }
1425 1747
@@ -1428,10 +1750,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1428 goto out; 1750 goto out;
1429 1751
1430 vma->exec_entry = 1752 vma->exec_entry =
1431 memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); 1753 memset(&eb->exec[eb->buffer_count++],
1754 0, sizeof(*vma->exec_entry));
1432 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; 1755 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1433 i915_gem_object_get(shadow_batch_obj); 1756 __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
1434 list_add_tail(&vma->exec_link, &eb->vmas);
1435 1757
1436out: 1758out:
1437 i915_gem_object_unpin_pages(shadow_batch_obj); 1759 i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -1439,41 +1761,37 @@ out:
1439} 1761}
1440 1762
1441static void 1763static void
1442add_to_client(struct drm_i915_gem_request *req, 1764add_to_client(struct drm_i915_gem_request *req, struct drm_file *file)
1443 struct drm_file *file)
1444{ 1765{
1445 req->file_priv = file->driver_priv; 1766 req->file_priv = file->driver_priv;
1446 list_add_tail(&req->client_link, &req->file_priv->mm.request_list); 1767 list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
1447} 1768}
1448 1769
1449static int 1770static int eb_submit(struct i915_execbuffer *eb)
1450execbuf_submit(struct i915_execbuffer *eb)
1451{ 1771{
1452 int ret; 1772 int err;
1453 1773
1454 ret = eb_move_to_gpu(eb); 1774 err = eb_move_to_gpu(eb);
1455 if (ret) 1775 if (err)
1456 return ret; 1776 return err;
1457 1777
1458 ret = i915_switch_context(eb->request); 1778 err = i915_switch_context(eb->request);
1459 if (ret) 1779 if (err)
1460 return ret; 1780 return err;
1461 1781
1462 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 1782 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
1463 ret = i915_reset_gen7_sol_offsets(eb->request); 1783 err = i915_reset_gen7_sol_offsets(eb->request);
1464 if (ret) 1784 if (err)
1465 return ret; 1785 return err;
1466 } 1786 }
1467 1787
1468 ret = eb->engine->emit_bb_start(eb->request, 1788 err = eb->engine->emit_bb_start(eb->request,
1469 eb->batch->node.start + 1789 eb->batch->node.start +
1470 eb->batch_start_offset, 1790 eb->batch_start_offset,
1471 eb->batch_len, 1791 eb->batch_len,
1472 eb->dispatch_flags); 1792 eb->batch_flags);
1473 if (ret) 1793 if (err)
1474 return ret; 1794 return err;
1475
1476 eb_move_to_active(eb);
1477 1795
1478 return 0; 1796 return 0;
1479} 1797}
@@ -1564,34 +1882,36 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1564 struct dma_fence *in_fence = NULL; 1882 struct dma_fence *in_fence = NULL;
1565 struct sync_file *out_fence = NULL; 1883 struct sync_file *out_fence = NULL;
1566 int out_fence_fd = -1; 1884 int out_fence_fd = -1;
1567 int ret; 1885 int err;
1568 1886
1569 if (!i915_gem_check_execbuffer(args)) 1887 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
1570 return -EINVAL; 1888 ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1571
1572 ret = validate_exec_list(dev, exec, args->buffer_count);
1573 if (ret)
1574 return ret;
1575 1889
1576 eb.i915 = to_i915(dev); 1890 eb.i915 = to_i915(dev);
1577 eb.file = file; 1891 eb.file = file;
1578 eb.args = args; 1892 eb.args = args;
1893 if (!(args->flags & I915_EXEC_NO_RELOC))
1894 args->flags |= __EXEC_HAS_RELOC;
1579 eb.exec = exec; 1895 eb.exec = exec;
1580 eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1896 eb.ctx = NULL;
1897 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1898 if (USES_FULL_PPGTT(eb.i915))
1899 eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1581 reloc_cache_init(&eb.reloc_cache, eb.i915); 1900 reloc_cache_init(&eb.reloc_cache, eb.i915);
1582 1901
1902 eb.buffer_count = args->buffer_count;
1583 eb.batch_start_offset = args->batch_start_offset; 1903 eb.batch_start_offset = args->batch_start_offset;
1584 eb.batch_len = args->batch_len; 1904 eb.batch_len = args->batch_len;
1585 1905
1586 eb.dispatch_flags = 0; 1906 eb.batch_flags = 0;
1587 if (args->flags & I915_EXEC_SECURE) { 1907 if (args->flags & I915_EXEC_SECURE) {
1588 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 1908 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1589 return -EPERM; 1909 return -EPERM;
1590 1910
1591 eb.dispatch_flags |= I915_DISPATCH_SECURE; 1911 eb.batch_flags |= I915_DISPATCH_SECURE;
1592 } 1912 }
1593 if (args->flags & I915_EXEC_IS_PINNED) 1913 if (args->flags & I915_EXEC_IS_PINNED)
1594 eb.dispatch_flags |= I915_DISPATCH_PINNED; 1914 eb.batch_flags |= I915_DISPATCH_PINNED;
1595 1915
1596 eb.engine = eb_select_engine(eb.i915, file, args); 1916 eb.engine = eb_select_engine(eb.i915, file, args);
1597 if (!eb.engine) 1917 if (!eb.engine)
@@ -1608,7 +1928,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1608 return -EINVAL; 1928 return -EINVAL;
1609 } 1929 }
1610 1930
1611 eb.dispatch_flags |= I915_DISPATCH_RS; 1931 eb.batch_flags |= I915_DISPATCH_RS;
1612 } 1932 }
1613 1933
1614 if (args->flags & I915_EXEC_FENCE_IN) { 1934 if (args->flags & I915_EXEC_FENCE_IN) {
@@ -1620,71 +1940,53 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1620 if (args->flags & I915_EXEC_FENCE_OUT) { 1940 if (args->flags & I915_EXEC_FENCE_OUT) {
1621 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 1941 out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
1622 if (out_fence_fd < 0) { 1942 if (out_fence_fd < 0) {
1623 ret = out_fence_fd; 1943 err = out_fence_fd;
1624 goto err_in_fence; 1944 goto err_in_fence;
1625 } 1945 }
1626 } 1946 }
1627 1947
1628 /* Take a local wakeref for preparing to dispatch the execbuf as 1948 if (eb_create(&eb))
1949 return -ENOMEM;
1950
1951 /*
1952 * Take a local wakeref for preparing to dispatch the execbuf as
1629 * we expect to access the hardware fairly frequently in the 1953 * we expect to access the hardware fairly frequently in the
1630 * process. Upon first dispatch, we acquire another prolonged 1954 * process. Upon first dispatch, we acquire another prolonged
1631 * wakeref that we hold until the GPU has been idle for at least 1955 * wakeref that we hold until the GPU has been idle for at least
1632 * 100ms. 1956 * 100ms.
1633 */ 1957 */
1634 intel_runtime_pm_get(eb.i915); 1958 intel_runtime_pm_get(eb.i915);
1959 err = i915_mutex_lock_interruptible(dev);
1960 if (err)
1961 goto err_rpm;
1635 1962
1636 ret = i915_mutex_lock_interruptible(dev); 1963 err = eb_select_context(&eb);
1637 if (ret) 1964 if (unlikely(err))
1638 goto pre_mutex_err; 1965 goto err_unlock;
1639
1640 ret = eb_select_context(&eb);
1641 if (ret) {
1642 mutex_unlock(&dev->struct_mutex);
1643 goto pre_mutex_err;
1644 }
1645
1646 if (eb_create(&eb)) {
1647 i915_gem_context_put(eb.ctx);
1648 mutex_unlock(&dev->struct_mutex);
1649 ret = -ENOMEM;
1650 goto pre_mutex_err;
1651 }
1652
1653 /* Look up object handles */
1654 ret = eb_lookup_vmas(&eb);
1655 if (ret)
1656 goto err;
1657
1658 /* take note of the batch buffer before we might reorder the lists */
1659 eb.batch = eb_get_batch(&eb);
1660
1661 /* Move the objects en-masse into the GTT, evicting if necessary. */
1662 ret = eb_reserve(&eb);
1663 if (ret)
1664 goto err;
1665 1966
1666 /* The objects are in their final locations, apply the relocations. */ 1967 err = eb_relocate(&eb);
1667 if (eb.need_relocs) 1968 if (err)
1668 ret = eb_relocate(&eb); 1969 /*
1669 if (ret) { 1970 * If the user expects the execobject.offset and
1670 if (ret == -EFAULT) { 1971 * reloc.presumed_offset to be an exact match,
1671 ret = eb_relocate_slow(&eb); 1972 * as for using NO_RELOC, then we cannot update
1672 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1973 * the execobject.offset until we have completed
1673 } 1974 * relocation.
1674 if (ret) 1975 */
1675 goto err; 1976 args->flags &= ~__EXEC_HAS_RELOC;
1676 } 1977 if (err < 0)
1978 goto err_vma;
1677 1979
1678 if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) { 1980 if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
1679 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1981 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1680 ret = -EINVAL; 1982 err = -EINVAL;
1681 goto err; 1983 goto err_vma;
1682 } 1984 }
1683 if (eb.batch_start_offset > eb.batch->size || 1985 if (eb.batch_start_offset > eb.batch->size ||
1684 eb.batch_len > eb.batch->size - eb.batch_start_offset) { 1986 eb.batch_len > eb.batch->size - eb.batch_start_offset) {
1685 DRM_DEBUG("Attempting to use out-of-bounds batch\n"); 1987 DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1686 ret = -EINVAL; 1988 err = -EINVAL;
1687 goto err; 1989 goto err_vma;
1688 } 1990 }
1689 1991
1690 if (eb.engine->needs_cmd_parser && eb.batch_len) { 1992 if (eb.engine->needs_cmd_parser && eb.batch_len) {
@@ -1692,8 +1994,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1692 1994
1693 vma = eb_parse(&eb, drm_is_current_master(file)); 1995 vma = eb_parse(&eb, drm_is_current_master(file));
1694 if (IS_ERR(vma)) { 1996 if (IS_ERR(vma)) {
1695 ret = PTR_ERR(vma); 1997 err = PTR_ERR(vma);
1696 goto err; 1998 goto err_vma;
1697 } 1999 }
1698 2000
1699 if (vma) { 2001 if (vma) {
@@ -1706,7 +2008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1706 * specifically don't want that set on batches the 2008 * specifically don't want that set on batches the
1707 * command parser has accepted. 2009 * command parser has accepted.
1708 */ 2010 */
1709 eb.dispatch_flags |= I915_DISPATCH_SECURE; 2011 eb.batch_flags |= I915_DISPATCH_SECURE;
1710 eb.batch_start_offset = 0; 2012 eb.batch_start_offset = 0;
1711 eb.batch = vma; 2013 eb.batch = vma;
1712 } 2014 }
@@ -1715,11 +2017,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1715 if (eb.batch_len == 0) 2017 if (eb.batch_len == 0)
1716 eb.batch_len = eb.batch->size - eb.batch_start_offset; 2018 eb.batch_len = eb.batch->size - eb.batch_start_offset;
1717 2019
1718 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2020 /*
2021 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1719 * batch" bit. Hence we need to pin secure batches into the global gtt. 2022 * batch" bit. Hence we need to pin secure batches into the global gtt.
1720 * hsw should have this fixed, but bdw mucks it up again. */ 2023 * hsw should have this fixed, but bdw mucks it up again. */
1721 if (eb.dispatch_flags & I915_DISPATCH_SECURE) { 2024 if (eb.batch_flags & I915_DISPATCH_SECURE) {
1722 struct drm_i915_gem_object *obj = eb.batch->obj;
1723 struct i915_vma *vma; 2025 struct i915_vma *vma;
1724 2026
1725 /* 2027 /*
@@ -1732,10 +2034,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1732 * fitting due to fragmentation. 2034 * fitting due to fragmentation.
1733 * So this is actually safe. 2035 * So this is actually safe.
1734 */ 2036 */
1735 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 2037 vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
1736 if (IS_ERR(vma)) { 2038 if (IS_ERR(vma)) {
1737 ret = PTR_ERR(vma); 2039 err = PTR_ERR(vma);
1738 goto err; 2040 goto err_vma;
1739 } 2041 }
1740 2042
1741 eb.batch = vma; 2043 eb.batch = vma;
@@ -1744,25 +2046,26 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1744 /* Allocate a request for this batch buffer nice and early. */ 2046 /* Allocate a request for this batch buffer nice and early. */
1745 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); 2047 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
1746 if (IS_ERR(eb.request)) { 2048 if (IS_ERR(eb.request)) {
1747 ret = PTR_ERR(eb.request); 2049 err = PTR_ERR(eb.request);
1748 goto err_batch_unpin; 2050 goto err_batch_unpin;
1749 } 2051 }
1750 2052
1751 if (in_fence) { 2053 if (in_fence) {
1752 ret = i915_gem_request_await_dma_fence(eb.request, in_fence); 2054 err = i915_gem_request_await_dma_fence(eb.request, in_fence);
1753 if (ret < 0) 2055 if (err < 0)
1754 goto err_request; 2056 goto err_request;
1755 } 2057 }
1756 2058
1757 if (out_fence_fd != -1) { 2059 if (out_fence_fd != -1) {
1758 out_fence = sync_file_create(&eb.request->fence); 2060 out_fence = sync_file_create(&eb.request->fence);
1759 if (!out_fence) { 2061 if (!out_fence) {
1760 ret = -ENOMEM; 2062 err = -ENOMEM;
1761 goto err_request; 2063 goto err_request;
1762 } 2064 }
1763 } 2065 }
1764 2066
1765 /* Whilst this request exists, batch_obj will be on the 2067 /*
2068 * Whilst this request exists, batch_obj will be on the
1766 * active_list, and so will hold the active reference. Only when this 2069 * active_list, and so will hold the active reference. Only when this
1767 * request is retired will the the batch_obj be moved onto the 2070 * request is retired will the the batch_obj be moved onto the
1768 * inactive_list and lose its active reference. Hence we do not need 2071 * inactive_list and lose its active reference. Hence we do not need
@@ -1770,14 +2073,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1770 */ 2073 */
1771 eb.request->batch = eb.batch; 2074 eb.request->batch = eb.batch;
1772 2075
1773 trace_i915_gem_request_queue(eb.request, eb.dispatch_flags); 2076 trace_i915_gem_request_queue(eb.request, eb.batch_flags);
1774 ret = execbuf_submit(&eb); 2077 err = eb_submit(&eb);
1775err_request: 2078err_request:
1776 __i915_add_request(eb.request, ret == 0); 2079 __i915_add_request(eb.request, err == 0);
1777 add_to_client(eb.request, file); 2080 add_to_client(eb.request, file);
1778 2081
1779 if (out_fence) { 2082 if (out_fence) {
1780 if (ret == 0) { 2083 if (err == 0) {
1781 fd_install(out_fence_fd, out_fence->file); 2084 fd_install(out_fence_fd, out_fence->file);
1782 args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ 2085 args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
1783 args->rsvd2 |= (u64)out_fence_fd << 32; 2086 args->rsvd2 |= (u64)out_fence_fd << 32;
@@ -1788,28 +2091,22 @@ err_request:
1788 } 2091 }
1789 2092
1790err_batch_unpin: 2093err_batch_unpin:
1791 /* 2094 if (eb.batch_flags & I915_DISPATCH_SECURE)
1792 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1793 * batch vma for correctness. For less ugly and less fragility this
1794 * needs to be adjusted to also track the ggtt batch vma properly as
1795 * active.
1796 */
1797 if (eb.dispatch_flags & I915_DISPATCH_SECURE)
1798 i915_vma_unpin(eb.batch); 2095 i915_vma_unpin(eb.batch);
1799err: 2096err_vma:
1800 /* the request owns the ref now */ 2097 if (eb.exec)
1801 eb_destroy(&eb); 2098 eb_release_vmas(&eb);
2099 i915_gem_context_put(eb.ctx);
2100err_unlock:
1802 mutex_unlock(&dev->struct_mutex); 2101 mutex_unlock(&dev->struct_mutex);
1803 2102err_rpm:
1804pre_mutex_err:
1805 /* intel_gpu_busy should also get a ref, so it will free when the device
1806 * is really idle. */
1807 intel_runtime_pm_put(eb.i915); 2103 intel_runtime_pm_put(eb.i915);
2104 eb_destroy(&eb);
1808 if (out_fence_fd != -1) 2105 if (out_fence_fd != -1)
1809 put_unused_fd(out_fence_fd); 2106 put_unused_fd(out_fence_fd);
1810err_in_fence: 2107err_in_fence:
1811 dma_fence_put(in_fence); 2108 dma_fence_put(in_fence);
1812 return ret; 2109 return err;
1813} 2110}
1814 2111
1815/* 2112/*
@@ -1820,20 +2117,38 @@ int
1820i915_gem_execbuffer(struct drm_device *dev, void *data, 2117i915_gem_execbuffer(struct drm_device *dev, void *data,
1821 struct drm_file *file) 2118 struct drm_file *file)
1822{ 2119{
2120 const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
1823 struct drm_i915_gem_execbuffer *args = data; 2121 struct drm_i915_gem_execbuffer *args = data;
1824 struct drm_i915_gem_execbuffer2 exec2; 2122 struct drm_i915_gem_execbuffer2 exec2;
1825 struct drm_i915_gem_exec_object *exec_list = NULL; 2123 struct drm_i915_gem_exec_object *exec_list = NULL;
1826 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 2124 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1827 int ret, i; 2125 unsigned int i;
2126 int err;
1828 2127
1829 if (args->buffer_count < 1) { 2128 if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
1830 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 2129 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1831 return -EINVAL; 2130 return -EINVAL;
1832 } 2131 }
1833 2132
2133 exec2.buffers_ptr = args->buffers_ptr;
2134 exec2.buffer_count = args->buffer_count;
2135 exec2.batch_start_offset = args->batch_start_offset;
2136 exec2.batch_len = args->batch_len;
2137 exec2.DR1 = args->DR1;
2138 exec2.DR4 = args->DR4;
2139 exec2.num_cliprects = args->num_cliprects;
2140 exec2.cliprects_ptr = args->cliprects_ptr;
2141 exec2.flags = I915_EXEC_RENDER;
2142 i915_execbuffer2_set_context_id(exec2, 0);
2143
2144 if (!i915_gem_check_execbuffer(&exec2))
2145 return -EINVAL;
2146
1834 /* Copy in the exec list from userland */ 2147 /* Copy in the exec list from userland */
1835 exec_list = kvmalloc_array(sizeof(*exec_list), args->buffer_count, GFP_KERNEL); 2148 exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list),
1836 exec2_list = kvmalloc_array(sizeof(*exec2_list), args->buffer_count, GFP_KERNEL); 2149 __GFP_NOWARN | GFP_TEMPORARY);
2150 exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2151 __GFP_NOWARN | GFP_TEMPORARY);
1837 if (exec_list == NULL || exec2_list == NULL) { 2152 if (exec_list == NULL || exec2_list == NULL) {
1838 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 2153 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1839 args->buffer_count); 2154 args->buffer_count);
@@ -1841,12 +2156,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1841 kvfree(exec2_list); 2156 kvfree(exec2_list);
1842 return -ENOMEM; 2157 return -ENOMEM;
1843 } 2158 }
1844 ret = copy_from_user(exec_list, 2159 err = copy_from_user(exec_list,
1845 u64_to_user_ptr(args->buffers_ptr), 2160 u64_to_user_ptr(args->buffers_ptr),
1846 sizeof(*exec_list) * args->buffer_count); 2161 sizeof(*exec_list) * args->buffer_count);
1847 if (ret != 0) { 2162 if (err) {
1848 DRM_DEBUG("copy %d exec entries failed %d\n", 2163 DRM_DEBUG("copy %d exec entries failed %d\n",
1849 args->buffer_count, ret); 2164 args->buffer_count, err);
1850 kvfree(exec_list); 2165 kvfree(exec_list);
1851 kvfree(exec2_list); 2166 kvfree(exec2_list);
1852 return -EFAULT; 2167 return -EFAULT;
@@ -1864,99 +2179,94 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1864 exec2_list[i].flags = 0; 2179 exec2_list[i].flags = 0;
1865 } 2180 }
1866 2181
1867 exec2.buffers_ptr = args->buffers_ptr; 2182 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
1868 exec2.buffer_count = args->buffer_count; 2183 if (exec2.flags & __EXEC_HAS_RELOC) {
1869 exec2.batch_start_offset = args->batch_start_offset;
1870 exec2.batch_len = args->batch_len;
1871 exec2.DR1 = args->DR1;
1872 exec2.DR4 = args->DR4;
1873 exec2.num_cliprects = args->num_cliprects;
1874 exec2.cliprects_ptr = args->cliprects_ptr;
1875 exec2.flags = I915_EXEC_RENDER;
1876 i915_execbuffer2_set_context_id(exec2, 0);
1877
1878 ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
1879 if (!ret) {
1880 struct drm_i915_gem_exec_object __user *user_exec_list = 2184 struct drm_i915_gem_exec_object __user *user_exec_list =
1881 u64_to_user_ptr(args->buffers_ptr); 2185 u64_to_user_ptr(args->buffers_ptr);
1882 2186
1883 /* Copy the new buffer offsets back to the user's exec list. */ 2187 /* Copy the new buffer offsets back to the user's exec list. */
1884 for (i = 0; i < args->buffer_count; i++) { 2188 for (i = 0; i < args->buffer_count; i++) {
2189 if (!(exec2_list[i].offset & UPDATE))
2190 continue;
2191
1885 exec2_list[i].offset = 2192 exec2_list[i].offset =
1886 gen8_canonical_addr(exec2_list[i].offset); 2193 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
1887 ret = __copy_to_user(&user_exec_list[i].offset, 2194 exec2_list[i].offset &= PIN_OFFSET_MASK;
1888 &exec2_list[i].offset, 2195 if (__copy_to_user(&user_exec_list[i].offset,
1889 sizeof(user_exec_list[i].offset)); 2196 &exec2_list[i].offset,
1890 if (ret) { 2197 sizeof(user_exec_list[i].offset)))
1891 ret = -EFAULT;
1892 DRM_DEBUG("failed to copy %d exec entries "
1893 "back to user (%d)\n",
1894 args->buffer_count, ret);
1895 break; 2198 break;
1896 }
1897 } 2199 }
1898 } 2200 }
1899 2201
1900 kvfree(exec_list); 2202 kvfree(exec_list);
1901 kvfree(exec2_list); 2203 kvfree(exec2_list);
1902 return ret; 2204 return err;
1903} 2205}
1904 2206
1905int 2207int
1906i915_gem_execbuffer2(struct drm_device *dev, void *data, 2208i915_gem_execbuffer2(struct drm_device *dev, void *data,
1907 struct drm_file *file) 2209 struct drm_file *file)
1908{ 2210{
2211 const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
1909 struct drm_i915_gem_execbuffer2 *args = data; 2212 struct drm_i915_gem_execbuffer2 *args = data;
1910 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 2213 struct drm_i915_gem_exec_object2 *exec2_list;
1911 int ret; 2214 int err;
1912 2215
1913 if (args->buffer_count < 1 || 2216 if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
1914 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1915 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 2217 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1916 return -EINVAL; 2218 return -EINVAL;
1917 } 2219 }
1918 2220
1919 exec2_list = kvmalloc_array(args->buffer_count, 2221 if (!i915_gem_check_execbuffer(args))
1920 sizeof(*exec2_list), 2222 return -EINVAL;
1921 GFP_TEMPORARY); 2223
2224 /* Allocate an extra slot for use by the command parser */
2225 exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2226 __GFP_NOWARN | GFP_TEMPORARY);
1922 if (exec2_list == NULL) { 2227 if (exec2_list == NULL) {
1923 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 2228 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1924 args->buffer_count); 2229 args->buffer_count);
1925 return -ENOMEM; 2230 return -ENOMEM;
1926 } 2231 }
1927 ret = copy_from_user(exec2_list, 2232 if (copy_from_user(exec2_list,
1928 u64_to_user_ptr(args->buffers_ptr), 2233 u64_to_user_ptr(args->buffers_ptr),
1929 sizeof(*exec2_list) * args->buffer_count); 2234 sizeof(*exec2_list) * args->buffer_count)) {
1930 if (ret != 0) { 2235 DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count);
1931 DRM_DEBUG("copy %d exec entries failed %d\n",
1932 args->buffer_count, ret);
1933 kvfree(exec2_list); 2236 kvfree(exec2_list);
1934 return -EFAULT; 2237 return -EFAULT;
1935 } 2238 }
1936 2239
1937 ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); 2240 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
1938 if (!ret) { 2241
1939 /* Copy the new buffer offsets back to the user's exec list. */ 2242 /*
2243 * Now that we have begun execution of the batchbuffer, we ignore
2244 * any new error after this point. Also given that we have already
2245 * updated the associated relocations, we try to write out the current
2246 * object locations irrespective of any error.
2247 */
2248 if (args->flags & __EXEC_HAS_RELOC) {
1940 struct drm_i915_gem_exec_object2 __user *user_exec_list = 2249 struct drm_i915_gem_exec_object2 __user *user_exec_list =
1941 u64_to_user_ptr(args->buffers_ptr); 2250 u64_to_user_ptr(args->buffers_ptr);
1942 int i; 2251 unsigned int i;
1943 2252
2253 /* Copy the new buffer offsets back to the user's exec list. */
2254 user_access_begin();
1944 for (i = 0; i < args->buffer_count; i++) { 2255 for (i = 0; i < args->buffer_count; i++) {
2256 if (!(exec2_list[i].offset & UPDATE))
2257 continue;
2258
1945 exec2_list[i].offset = 2259 exec2_list[i].offset =
1946 gen8_canonical_addr(exec2_list[i].offset); 2260 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
1947 ret = __copy_to_user(&user_exec_list[i].offset, 2261 unsafe_put_user(exec2_list[i].offset,
1948 &exec2_list[i].offset, 2262 &user_exec_list[i].offset,
1949 sizeof(user_exec_list[i].offset)); 2263 end_user);
1950 if (ret) {
1951 ret = -EFAULT;
1952 DRM_DEBUG("failed to copy %d exec entries "
1953 "back to user\n",
1954 args->buffer_count);
1955 break;
1956 }
1957 } 2264 }
2265end_user:
2266 user_access_end();
1958 } 2267 }
1959 2268
2269 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
1960 kvfree(exec2_list); 2270 kvfree(exec2_list);
1961 return ret; 2271 return err;
1962} 2272}