aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-06-16 10:05:19 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2017-06-16 11:54:05 -0400
commit2889caa9232109afc8881f29a2205abeb5709d0c (patch)
tree395a6e3d57003a20b890860cff70e32430030544
parent071750e550af46b5d3a84ad56c2a108c3e136284 (diff)
drm/i915: Eliminate lots of iterations over the execobjects array
The major scaling bottleneck in execbuffer is the processing of the execobjects. Creating an auxiliary list is inefficient when compared to using the execobject array we already have allocated. Reservation is then split into phases. As we lookup up the VMA, we try and bind it back into active location. Only if that fails, do we add it to the unbound list for phase 2. In phase 2, we try and add all those objects that could not fit into their previous location, with fallback to retrying all objects and evicting the VM in case of severe fragmentation. (This is the same as before, except that phase 1 is now done inline with looking up the VMA to avoid an iteration over the execobject array. In the ideal case, we eliminate the separate reservation phase). During the reservation phase, we only evict from the VM between passes (rather than currently as we try to fit every new VMA). In testing with Unreal Engine's Atlantis demo which stresses the eviction logic on gen7 class hardware, this speed up the framerate by a factor of 2. The second loop amalgamation is between move_to_gpu and move_to_active. As we always submit the request, even if incomplete, we can use the current request to track active VMA as we perform the flushes and synchronisation required. The next big advancement is to avoid copying back to the user any execobjects and relocations that are not changed. v2: Add a Theory of Operation spiel. v3: Fall back to slow relocations in preparation for flushing userptrs. v4: Document struct members, factor out eb_validate_vma(), add a few more comments to explain some magic and hide other magic behind macros. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c92
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2038
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c2
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_evict.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_vma.c16
7 files changed, 1239 insertions, 916 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index af2a54672396..7e182dd7e356 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3581,7 +3581,7 @@ int __must_check i915_gem_evict_something(struct i915_address_space *vm,
3581int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, 3581int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
3582 struct drm_mm_node *node, 3582 struct drm_mm_node *node,
3583 unsigned int flags); 3583 unsigned int flags);
3584int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); 3584int i915_gem_evict_vm(struct i915_address_space *vm);
3585 3585
3586/* belongs in i915_gem_gtt.h */ 3586/* belongs in i915_gem_gtt.h */
3587static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) 3587static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 204a2d9288ae..a193f1b36c67 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -50,6 +50,29 @@ static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
50 return true; 50 return true;
51} 51}
52 52
53static int ggtt_flush(struct drm_i915_private *i915)
54{
55 int err;
56
57 /* Not everything in the GGTT is tracked via vma (otherwise we
58 * could evict as required with minimal stalling) so we are forced
59 * to idle the GPU and explicitly retire outstanding requests in
60 * the hopes that we can then remove contexts and the like only
61 * bound by their active reference.
62 */
63 err = i915_gem_switch_to_kernel_context(i915);
64 if (err)
65 return err;
66
67 err = i915_gem_wait_for_idle(i915,
68 I915_WAIT_INTERRUPTIBLE |
69 I915_WAIT_LOCKED);
70 if (err)
71 return err;
72
73 return 0;
74}
75
53static bool 76static bool
54mark_free(struct drm_mm_scan *scan, 77mark_free(struct drm_mm_scan *scan,
55 struct i915_vma *vma, 78 struct i915_vma *vma,
@@ -175,19 +198,7 @@ search_again:
175 return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC; 198 return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
176 } 199 }
177 200
178 /* Not everything in the GGTT is tracked via vma (otherwise we 201 ret = ggtt_flush(dev_priv);
179 * could evict as required with minimal stalling) so we are forced
180 * to idle the GPU and explicitly retire outstanding requests in
181 * the hopes that we can then remove contexts and the like only
182 * bound by their active reference.
183 */
184 ret = i915_gem_switch_to_kernel_context(dev_priv);
185 if (ret)
186 return ret;
187
188 ret = i915_gem_wait_for_idle(dev_priv,
189 I915_WAIT_INTERRUPTIBLE |
190 I915_WAIT_LOCKED);
191 if (ret) 202 if (ret)
192 return ret; 203 return ret;
193 204
@@ -337,10 +348,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
337/** 348/**
338 * i915_gem_evict_vm - Evict all idle vmas from a vm 349 * i915_gem_evict_vm - Evict all idle vmas from a vm
339 * @vm: Address space to cleanse 350 * @vm: Address space to cleanse
340 * @do_idle: Boolean directing whether to idle first.
341 * 351 *
342 * This function evicts all idles vmas from a vm. If all unpinned vmas should be 352 * This function evicts all vmas from a vm.
343 * evicted the @do_idle needs to be set to true.
344 * 353 *
345 * This is used by the execbuf code as a last-ditch effort to defragment the 354 * This is used by the execbuf code as a last-ditch effort to defragment the
346 * address space. 355 * address space.
@@ -348,37 +357,50 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
348 * To clarify: This is for freeing up virtual address space, not for freeing 357 * To clarify: This is for freeing up virtual address space, not for freeing
349 * memory in e.g. the shrinker. 358 * memory in e.g. the shrinker.
350 */ 359 */
351int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) 360int i915_gem_evict_vm(struct i915_address_space *vm)
352{ 361{
362 struct list_head *phases[] = {
363 &vm->inactive_list,
364 &vm->active_list,
365 NULL
366 }, **phase;
367 struct list_head eviction_list;
353 struct i915_vma *vma, *next; 368 struct i915_vma *vma, *next;
354 int ret; 369 int ret;
355 370
356 lockdep_assert_held(&vm->i915->drm.struct_mutex); 371 lockdep_assert_held(&vm->i915->drm.struct_mutex);
357 trace_i915_gem_evict_vm(vm); 372 trace_i915_gem_evict_vm(vm);
358 373
359 if (do_idle) { 374 /* Switch back to the default context in order to unpin
360 struct drm_i915_private *dev_priv = vm->i915; 375 * the existing context objects. However, such objects only
361 376 * pin themselves inside the global GTT and performing the
362 if (i915_is_ggtt(vm)) { 377 * switch otherwise is ineffective.
363 ret = i915_gem_switch_to_kernel_context(dev_priv); 378 */
364 if (ret) 379 if (i915_is_ggtt(vm)) {
365 return ret; 380 ret = ggtt_flush(vm->i915);
366 }
367
368 ret = i915_gem_wait_for_idle(dev_priv,
369 I915_WAIT_INTERRUPTIBLE |
370 I915_WAIT_LOCKED);
371 if (ret) 381 if (ret)
372 return ret; 382 return ret;
373
374 WARN_ON(!list_empty(&vm->active_list));
375 } 383 }
376 384
377 list_for_each_entry_safe(vma, next, &vm->inactive_list, vm_link) 385 INIT_LIST_HEAD(&eviction_list);
378 if (!i915_vma_is_pinned(vma)) 386 phase = phases;
379 WARN_ON(i915_vma_unbind(vma)); 387 do {
388 list_for_each_entry(vma, *phase, vm_link) {
389 if (i915_vma_is_pinned(vma))
390 continue;
380 391
381 return 0; 392 __i915_vma_pin(vma);
393 list_add(&vma->evict_link, &eviction_list);
394 }
395 } while (*++phase);
396
397 ret = 0;
398 list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
399 __i915_vma_unpin(vma);
400 if (ret == 0)
401 ret = i915_vma_unbind(vma);
402 }
403 return ret;
382} 404}
383 405
384#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 406#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9c3f6c40270f..a052072fe8b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -42,41 +42,195 @@
42 42
43#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ 43#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */
44 44
45#define __EXEC_OBJECT_HAS_PIN (1<<31) 45#define __EXEC_OBJECT_HAS_PIN BIT(31)
46#define __EXEC_OBJECT_HAS_FENCE (1<<30) 46#define __EXEC_OBJECT_HAS_FENCE BIT(30)
47#define __EXEC_OBJECT_NEEDS_MAP (1<<29) 47#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
48#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) 48#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
49#define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ 49#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
50#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
51
52#define __EXEC_HAS_RELOC BIT(31)
53#define __EXEC_VALIDATED BIT(30)
54#define UPDATE PIN_OFFSET_FIXED
50 55
51#define BATCH_OFFSET_BIAS (256*1024) 56#define BATCH_OFFSET_BIAS (256*1024)
52 57
53#define __I915_EXEC_ILLEGAL_FLAGS \ 58#define __I915_EXEC_ILLEGAL_FLAGS \
54 (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) 59 (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK)
55 60
61/**
62 * DOC: User command execution
63 *
64 * Userspace submits commands to be executed on the GPU as an instruction
65 * stream within a GEM object we call a batchbuffer. This instructions may
66 * refer to other GEM objects containing auxiliary state such as kernels,
67 * samplers, render targets and even secondary batchbuffers. Userspace does
68 * not know where in the GPU memory these objects reside and so before the
69 * batchbuffer is passed to the GPU for execution, those addresses in the
70 * batchbuffer and auxiliary objects are updated. This is known as relocation,
71 * or patching. To try and avoid having to relocate each object on the next
72 * execution, userspace is told the location of those objects in this pass,
73 * but this remains just a hint as the kernel may choose a new location for
74 * any object in the future.
75 *
76 * Processing an execbuf ioctl is conceptually split up into a few phases.
77 *
78 * 1. Validation - Ensure all the pointers, handles and flags are valid.
79 * 2. Reservation - Assign GPU address space for every object
80 * 3. Relocation - Update any addresses to point to the final locations
81 * 4. Serialisation - Order the request with respect to its dependencies
82 * 5. Construction - Construct a request to execute the batchbuffer
83 * 6. Submission (at some point in the future execution)
84 *
85 * Reserving resources for the execbuf is the most complicated phase. We
86 * neither want to have to migrate the object in the address space, nor do
87 * we want to have to update any relocations pointing to this object. Ideally,
88 * we want to leave the object where it is and for all the existing relocations
89 * to match. If the object is given a new address, or if userspace thinks the
90 * object is elsewhere, we have to parse all the relocation entries and update
91 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
92 * all the target addresses in all of its objects match the value in the
93 * relocation entries and that they all match the presumed offsets given by the
94 * list of execbuffer objects. Using this knowledge, we know that if we haven't
95 * moved any buffers, all the relocation entries are valid and we can skip
96 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
97 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
98 *
99 * The addresses written in the objects must match the corresponding
100 * reloc.presumed_offset which in turn must match the corresponding
101 * execobject.offset.
102 *
103 * Any render targets written to in the batch must be flagged with
104 * EXEC_OBJECT_WRITE.
105 *
106 * To avoid stalling, execobject.offset should match the current
107 * address of that object within the active context.
108 *
109 * The reservation is done is multiple phases. First we try and keep any
110 * object already bound in its current location - so as long as meets the
111 * constraints imposed by the new execbuffer. Any object left unbound after the
112 * first pass is then fitted into any available idle space. If an object does
113 * not fit, all objects are removed from the reservation and the process rerun
114 * after sorting the objects into a priority order (more difficult to fit
115 * objects are tried first). Failing that, the entire VM is cleared and we try
116 * to fit the execbuf once last time before concluding that it simply will not
117 * fit.
118 *
119 * A small complication to all of this is that we allow userspace not only to
120 * specify an alignment and a size for the object in the address space, but
121 * we also allow userspace to specify the exact offset. This objects are
122 * simpler to place (the location is known a priori) all we have to do is make
123 * sure the space is available.
124 *
125 * Once all the objects are in place, patching up the buried pointers to point
126 * to the final locations is a fairly simple job of walking over the relocation
127 * entry arrays, looking up the right address and rewriting the value into
128 * the object. Simple! ... The relocation entries are stored in user memory
129 * and so to access them we have to copy them into a local buffer. That copy
130 * has to avoid taking any pagefaults as they may lead back to a GEM object
131 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
132 * the relocation into multiple passes. First we try to do everything within an
133 * atomic context (avoid the pagefaults) which requires that we never wait. If
134 * we detect that we may wait, or if we need to fault, then we have to fallback
135 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
136 * bells yet?) Dropping the mutex means that we lose all the state we have
137 * built up so far for the execbuf and we must reset any global data. However,
138 * we do leave the objects pinned in their final locations - which is a
139 * potential issue for concurrent execbufs. Once we have left the mutex, we can
140 * allocate and copy all the relocation entries into a large array at our
141 * leisure, reacquire the mutex, reclaim all the objects and other state and
142 * then proceed to update any incorrect addresses with the objects.
143 *
144 * As we process the relocation entries, we maintain a record of whether the
145 * object is being written to. Using NORELOC, we expect userspace to provide
146 * this information instead. We also check whether we can skip the relocation
147 * by comparing the expected value inside the relocation entry with the target's
148 * final address. If they differ, we have to map the current object and rewrite
149 * the 4 or 8 byte pointer within.
150 *
151 * Serialising an execbuf is quite simple according to the rules of the GEM
152 * ABI. Execution within each context is ordered by the order of submission.
153 * Writes to any GEM object are in order of submission and are exclusive. Reads
154 * from a GEM object are unordered with respect to other reads, but ordered by
155 * writes. A write submitted after a read cannot occur before the read, and
156 * similarly any read submitted after a write cannot occur before the write.
157 * Writes are ordered between engines such that only one write occurs at any
158 * time (completing any reads beforehand) - using semaphores where available
159 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
160 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
161 * reads before starting, and any read (either using set-domain or pread) must
162 * flush all GPU writes before starting. (Note we only employ a barrier before,
163 * we currently rely on userspace not concurrently starting a new execution
164 * whilst reading or writing to an object. This may be an advantage or not
165 * depending on how much you trust userspace not to shoot themselves in the
166 * foot.) Serialisation may just result in the request being inserted into
167 * a DAG awaiting its turn, but most simple is to wait on the CPU until
168 * all dependencies are resolved.
169 *
170 * After all of that, is just a matter of closing the request and handing it to
171 * the hardware (well, leaving it in a queue to be executed). However, we also
172 * offer the ability for batchbuffers to be run with elevated privileges so
173 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
174 * Before any batch is given extra privileges we first must check that it
175 * contains no nefarious instructions, we check that each instruction is from
176 * our whitelist and all registers are also from an allowed list. We first
177 * copy the user's batchbuffer to a shadow (so that the user doesn't have
178 * access to it, either by the CPU or GPU as we scan it) and then parse each
179 * instruction. If everything is ok, we set a flag telling the hardware to run
180 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
181 */
182
56struct i915_execbuffer { 183struct i915_execbuffer {
57 struct drm_i915_private *i915; 184 struct drm_i915_private *i915; /** i915 backpointer */
58 struct drm_file *file; 185 struct drm_file *file; /** per-file lookup tables and limits */
59 struct drm_i915_gem_execbuffer2 *args; 186 struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
60 struct drm_i915_gem_exec_object2 *exec; 187 struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
61 struct intel_engine_cs *engine; 188
62 struct i915_gem_context *ctx; 189 struct intel_engine_cs *engine; /** engine to queue the request to */
63 struct i915_address_space *vm; 190 struct i915_gem_context *ctx; /** context for building the request */
64 struct i915_vma *batch; 191 struct i915_address_space *vm; /** GTT and vma for the request */
65 struct drm_i915_gem_request *request; 192
66 u32 batch_start_offset; 193 struct drm_i915_gem_request *request; /** our request to build */
67 u32 batch_len; 194 struct i915_vma *batch; /** identity of the batch obj/vma */
68 unsigned int dispatch_flags; 195
69 struct drm_i915_gem_exec_object2 shadow_exec_entry; 196 /** actual size of execobj[] as we may extend it for the cmdparser */
70 bool need_relocs; 197 unsigned int buffer_count;
71 struct list_head vmas; 198
199 /** list of vma not yet bound during reservation phase */
200 struct list_head unbound;
201
202 /** list of vma that have execobj.relocation_count */
203 struct list_head relocs;
204
205 /**
206 * Track the most recently used object for relocations, as we
207 * frequently have to perform multiple relocations within the same
208 * obj/page
209 */
72 struct reloc_cache { 210 struct reloc_cache {
73 struct drm_mm_node node; 211 struct drm_mm_node node; /** temporary GTT binding */
74 unsigned long vaddr; 212 unsigned long vaddr; /** Current kmap address */
75 unsigned int page; 213 unsigned long page; /** Currently mapped page index */
76 bool use_64bit_reloc : 1; 214 bool use_64bit_reloc : 1;
215 bool has_llc : 1;
216 bool has_fence : 1;
217 bool needs_unfenced : 1;
77 } reloc_cache; 218 } reloc_cache;
78 int lut_mask; 219
79 struct hlist_head *buckets; 220 u64 invalid_flags; /** Set of execobj.flags that are invalid */
221 u32 context_flags; /** Set of execobj.flags to insert from the ctx */
222
223 u32 batch_start_offset; /** Location within object of batch */
224 u32 batch_len; /** Length of batch within object */
225 u32 batch_flags; /** Flags composed for emit_bb_start() */
226
227 /**
228 * Indicate either the size of the hastable used to resolve
229 * relocation handles, or if negative that we are using a direct
230 * index into the execobj[].
231 */
232 int lut_size;
233 struct hlist_head *buckets; /** ht for relocation handles */
80}; 234};
81 235
82/* 236/*
@@ -87,11 +241,41 @@ struct i915_execbuffer {
87#define __exec_to_vma(ee) (ee)->rsvd2 241#define __exec_to_vma(ee) (ee)->rsvd2
88#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) 242#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
89 243
244/*
245 * Used to convert any address to canonical form.
246 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
247 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
248 * addresses to be in a canonical form:
249 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
250 * canonical form [63:48] == [47]."
251 */
252#define GEN8_HIGH_ADDRESS_BIT 47
253static inline u64 gen8_canonical_addr(u64 address)
254{
255 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
256}
257
258static inline u64 gen8_noncanonical_addr(u64 address)
259{
260 return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
261}
262
90static int eb_create(struct i915_execbuffer *eb) 263static int eb_create(struct i915_execbuffer *eb)
91{ 264{
92 if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) { 265 if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
93 unsigned int size = 1 + ilog2(eb->args->buffer_count); 266 unsigned int size = 1 + ilog2(eb->buffer_count);
94 267
268 /*
269 * Without a 1:1 association between relocation handles and
270 * the execobject[] index, we instead create a hashtable.
271 * We size it dynamically based on available memory, starting
272 * first with 1:1 assocative hash and scaling back until
273 * the allocation succeeds.
274 *
275 * Later on we use a positive lut_size to indicate we are
276 * using this hashtable, and a negative value to indicate a
277 * direct lookup.
278 */
95 do { 279 do {
96 eb->buckets = kzalloc(sizeof(struct hlist_head) << size, 280 eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
97 GFP_TEMPORARY | 281 GFP_TEMPORARY |
@@ -108,112 +292,411 @@ static int eb_create(struct i915_execbuffer *eb)
108 return -ENOMEM; 292 return -ENOMEM;
109 } 293 }
110 294
111 eb->lut_mask = size; 295 eb->lut_size = size;
112 } else { 296 } else {
113 eb->lut_mask = -eb->args->buffer_count; 297 eb->lut_size = -eb->buffer_count;
114 } 298 }
115 299
116 return 0; 300 return 0;
117} 301}
118 302
303static bool
304eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
305 const struct i915_vma *vma)
306{
307 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
308 return true;
309
310 if (vma->node.size < entry->pad_to_size)
311 return true;
312
313 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
314 return true;
315
316 if (entry->flags & EXEC_OBJECT_PINNED &&
317 vma->node.start != entry->offset)
318 return true;
319
320 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
321 vma->node.start < BATCH_OFFSET_BIAS)
322 return true;
323
324 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
325 (vma->node.start + vma->node.size - 1) >> 32)
326 return true;
327
328 return false;
329}
330
331static inline void
332eb_pin_vma(struct i915_execbuffer *eb,
333 struct drm_i915_gem_exec_object2 *entry,
334 struct i915_vma *vma)
335{
336 u64 flags;
337
338 flags = vma->node.start;
339 flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED;
340 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT))
341 flags |= PIN_GLOBAL;
342 if (unlikely(i915_vma_pin(vma, 0, 0, flags)))
343 return;
344
345 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
346 if (unlikely(i915_vma_get_fence(vma))) {
347 i915_vma_unpin(vma);
348 return;
349 }
350
351 if (i915_vma_pin_fence(vma))
352 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
353 }
354
355 entry->flags |= __EXEC_OBJECT_HAS_PIN;
356}
357
119static inline void 358static inline void
120__eb_unreserve_vma(struct i915_vma *vma, 359__eb_unreserve_vma(struct i915_vma *vma,
121 const struct drm_i915_gem_exec_object2 *entry) 360 const struct drm_i915_gem_exec_object2 *entry)
122{ 361{
362 GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN));
363
123 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) 364 if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE))
124 i915_vma_unpin_fence(vma); 365 i915_vma_unpin_fence(vma);
125 366
126 if (entry->flags & __EXEC_OBJECT_HAS_PIN) 367 __i915_vma_unpin(vma);
127 __i915_vma_unpin(vma);
128} 368}
129 369
130static void 370static inline void
131eb_unreserve_vma(struct i915_vma *vma) 371eb_unreserve_vma(struct i915_vma *vma,
372 struct drm_i915_gem_exec_object2 *entry)
132{ 373{
133 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 374 if (!(entry->flags & __EXEC_OBJECT_HAS_PIN))
375 return;
134 376
135 __eb_unreserve_vma(vma, entry); 377 __eb_unreserve_vma(vma, entry);
136 entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); 378 entry->flags &= ~__EXEC_OBJECT_RESERVED;
137} 379}
138 380
139static void 381static int
140eb_reset(struct i915_execbuffer *eb) 382eb_validate_vma(struct i915_execbuffer *eb,
383 struct drm_i915_gem_exec_object2 *entry,
384 struct i915_vma *vma)
141{ 385{
142 struct i915_vma *vma; 386 if (unlikely(entry->flags & eb->invalid_flags))
387 return -EINVAL;
143 388
144 list_for_each_entry(vma, &eb->vmas, exec_link) { 389 if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
145 eb_unreserve_vma(vma); 390 return -EINVAL;
146 i915_vma_put(vma); 391
147 vma->exec_entry = NULL; 392 /*
393 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
394 * any non-page-aligned or non-canonical addresses.
395 */
396 if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
397 entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
398 return -EINVAL;
399
400 /* pad_to_size was once a reserved field, so sanitize it */
401 if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
402 if (unlikely(offset_in_page(entry->pad_to_size)))
403 return -EINVAL;
404 } else {
405 entry->pad_to_size = 0;
148 } 406 }
149 407
150 if (eb->lut_mask >= 0) 408 if (unlikely(vma->exec_entry)) {
151 memset(eb->buckets, 0, 409 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
152 sizeof(struct hlist_head) << eb->lut_mask); 410 entry->handle, (int)(entry - eb->exec));
411 return -EINVAL;
412 }
413
414 /*
415 * From drm_mm perspective address space is continuous,
416 * so from this point we're always using non-canonical
417 * form internally.
418 */
419 entry->offset = gen8_noncanonical_addr(entry->offset);
420
421 return 0;
153} 422}
154 423
155static bool 424static int
156eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) 425eb_add_vma(struct i915_execbuffer *eb,
426 struct drm_i915_gem_exec_object2 *entry,
427 struct i915_vma *vma)
157{ 428{
158 if (unlikely(vma->exec_entry)) { 429 int err;
159 DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", 430
160 eb->exec[i].handle, i); 431 GEM_BUG_ON(i915_vma_is_closed(vma));
161 return false; 432
433 if (!(eb->args->flags & __EXEC_VALIDATED)) {
434 err = eb_validate_vma(eb, entry, vma);
435 if (unlikely(err))
436 return err;
162 } 437 }
163 list_add_tail(&vma->exec_link, &eb->vmas);
164 438
165 vma->exec_entry = &eb->exec[i]; 439 if (eb->lut_size >= 0) {
166 if (eb->lut_mask >= 0) { 440 vma->exec_handle = entry->handle;
167 vma->exec_handle = eb->exec[i].handle;
168 hlist_add_head(&vma->exec_node, 441 hlist_add_head(&vma->exec_node,
169 &eb->buckets[hash_32(vma->exec_handle, 442 &eb->buckets[hash_32(entry->handle,
170 eb->lut_mask)]); 443 eb->lut_size)]);
171 } 444 }
172 445
173 i915_vma_get(vma); 446 if (entry->relocation_count)
174 __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma; 447 list_add_tail(&vma->reloc_link, &eb->relocs);
175 return true; 448
449 if (!eb->reloc_cache.has_fence) {
450 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
451 } else {
452 if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
453 eb->reloc_cache.needs_unfenced) &&
454 i915_gem_object_is_tiled(vma->obj))
455 entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
456 }
457
458 if (!(entry->flags & EXEC_OBJECT_PINNED))
459 entry->flags |= eb->context_flags;
460
461 /*
462 * Stash a pointer from the vma to execobj, so we can query its flags,
463 * size, alignment etc as provided by the user. Also we stash a pointer
464 * to the vma inside the execobj so that we can use a direct lookup
465 * to find the right target VMA when doing relocations.
466 */
467 vma->exec_entry = entry;
468 __exec_to_vma(entry) = (uintptr_t)i915_vma_get(vma);
469
470 err = 0;
471 if (vma->node.size)
472 eb_pin_vma(eb, entry, vma);
473 if (eb_vma_misplaced(entry, vma)) {
474 eb_unreserve_vma(vma, entry);
475
476 list_add_tail(&vma->exec_link, &eb->unbound);
477 if (drm_mm_node_allocated(&vma->node))
478 err = i915_vma_unbind(vma);
479 } else {
480 if (entry->offset != vma->node.start) {
481 entry->offset = vma->node.start | UPDATE;
482 eb->args->flags |= __EXEC_HAS_RELOC;
483 }
484 }
485 return err;
486}
487
488static inline int use_cpu_reloc(const struct reloc_cache *cache,
489 const struct drm_i915_gem_object *obj)
490{
491 if (!i915_gem_object_has_struct_page(obj))
492 return false;
493
494 if (DBG_USE_CPU_RELOC)
495 return DBG_USE_CPU_RELOC > 0;
496
497 return (cache->has_llc ||
498 obj->cache_dirty ||
499 obj->cache_level != I915_CACHE_NONE);
500}
501
502static int eb_reserve_vma(const struct i915_execbuffer *eb,
503 struct i915_vma *vma)
504{
505 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
506 u64 flags;
507 int err;
508
509 flags = PIN_USER | PIN_NONBLOCK;
510 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
511 flags |= PIN_GLOBAL;
512
513 /*
514 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
515 * limit address to the first 4GBs for unflagged objects.
516 */
517 if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
518 flags |= PIN_ZONE_4G;
519
520 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
521 flags |= PIN_MAPPABLE;
522
523 if (entry->flags & EXEC_OBJECT_PINNED) {
524 flags |= entry->offset | PIN_OFFSET_FIXED;
525 flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */
526 } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) {
527 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
528 }
529
530 err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags);
531 if (err)
532 return err;
533
534 if (entry->offset != vma->node.start) {
535 entry->offset = vma->node.start | UPDATE;
536 eb->args->flags |= __EXEC_HAS_RELOC;
537 }
538
539 entry->flags |= __EXEC_OBJECT_HAS_PIN;
540 GEM_BUG_ON(eb_vma_misplaced(entry, vma));
541
542 if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) {
543 err = i915_vma_get_fence(vma);
544 if (unlikely(err)) {
545 i915_vma_unpin(vma);
546 return err;
547 }
548
549 if (i915_vma_pin_fence(vma))
550 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
551 }
552
553 return 0;
554}
555
556static int eb_reserve(struct i915_execbuffer *eb)
557{
558 const unsigned int count = eb->buffer_count;
559 struct list_head last;
560 struct i915_vma *vma;
561 unsigned int i, pass;
562 int err;
563
564 /*
565 * Attempt to pin all of the buffers into the GTT.
566 * This is done in 3 phases:
567 *
568 * 1a. Unbind all objects that do not match the GTT constraints for
569 * the execbuffer (fenceable, mappable, alignment etc).
570 * 1b. Increment pin count for already bound objects.
571 * 2. Bind new objects.
572 * 3. Decrement pin count.
573 *
574 * This avoid unnecessary unbinding of later objects in order to make
575 * room for the earlier objects *unless* we need to defragment.
576 */
577
578 pass = 0;
579 err = 0;
580 do {
581 list_for_each_entry(vma, &eb->unbound, exec_link) {
582 err = eb_reserve_vma(eb, vma);
583 if (err)
584 break;
585 }
586 if (err != -ENOSPC)
587 return err;
588
589 /* Resort *all* the objects into priority order */
590 INIT_LIST_HEAD(&eb->unbound);
591 INIT_LIST_HEAD(&last);
592 for (i = 0; i < count; i++) {
593 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
594
595 if (entry->flags & EXEC_OBJECT_PINNED &&
596 entry->flags & __EXEC_OBJECT_HAS_PIN)
597 continue;
598
599 vma = exec_to_vma(entry);
600 eb_unreserve_vma(vma, entry);
601
602 if (entry->flags & EXEC_OBJECT_PINNED)
603 list_add(&vma->exec_link, &eb->unbound);
604 else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
605 list_add_tail(&vma->exec_link, &eb->unbound);
606 else
607 list_add_tail(&vma->exec_link, &last);
608 }
609 list_splice_tail(&last, &eb->unbound);
610
611 switch (pass++) {
612 case 0:
613 break;
614
615 case 1:
616 /* Too fragmented, unbind everything and retry */
617 err = i915_gem_evict_vm(eb->vm);
618 if (err)
619 return err;
620 break;
621
622 default:
623 return -ENOSPC;
624 }
625 } while (1);
176} 626}
177 627
178static inline struct hlist_head * 628static inline struct hlist_head *
179ht_head(const struct i915_gem_context *ctx, u32 handle) 629ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle)
180{ 630{
181 return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)]; 631 return &lut->ht[hash_32(handle, lut->ht_bits)];
182} 632}
183 633
184static inline bool 634static inline bool
185ht_needs_resize(const struct i915_gem_context *ctx) 635ht_needs_resize(const struct i915_gem_context_vma_lut *lut)
186{ 636{
187 return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size || 637 return (4*lut->ht_count > 3*lut->ht_size ||
188 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size); 638 4*lut->ht_count + 1 < lut->ht_size);
189} 639}
190 640
191static int 641static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
192eb_lookup_vmas(struct i915_execbuffer *eb) 642{
643 return eb->buffer_count - 1;
644}
645
646static int eb_select_context(struct i915_execbuffer *eb)
647{
648 struct i915_gem_context *ctx;
649
650 ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
651 if (unlikely(IS_ERR(ctx)))
652 return PTR_ERR(ctx);
653
654 if (unlikely(i915_gem_context_is_banned(ctx))) {
655 DRM_DEBUG("Context %u tried to submit while banned\n",
656 ctx->user_handle);
657 return -EIO;
658 }
659
660 eb->ctx = i915_gem_context_get(ctx);
661 eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
662
663 eb->context_flags = 0;
664 if (ctx->flags & CONTEXT_NO_ZEROMAP)
665 eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
666
667 return 0;
668}
669
670static int eb_lookup_vmas(struct i915_execbuffer *eb)
193{ 671{
194#define INTERMEDIATE BIT(0) 672#define INTERMEDIATE BIT(0)
195 const int count = eb->args->buffer_count; 673 const unsigned int count = eb->buffer_count;
674 struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
196 struct i915_vma *vma; 675 struct i915_vma *vma;
676 struct idr *idr;
677 unsigned int i;
197 int slow_pass = -1; 678 int slow_pass = -1;
198 int i; 679 int err;
199 680
200 INIT_LIST_HEAD(&eb->vmas); 681 INIT_LIST_HEAD(&eb->relocs);
682 INIT_LIST_HEAD(&eb->unbound);
201 683
202 if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS)) 684 if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS))
203 flush_work(&eb->ctx->vma_lut.resize); 685 flush_work(&lut->resize);
204 GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS); 686 GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
205 687
206 for (i = 0; i < count; i++) { 688 for (i = 0; i < count; i++) {
207 __exec_to_vma(&eb->exec[i]) = 0; 689 __exec_to_vma(&eb->exec[i]) = 0;
208 690
209 hlist_for_each_entry(vma, 691 hlist_for_each_entry(vma,
210 ht_head(eb->ctx, eb->exec[i].handle), 692 ht_head(lut, eb->exec[i].handle),
211 ctx_node) { 693 ctx_node) {
212 if (vma->ctx_handle != eb->exec[i].handle) 694 if (vma->ctx_handle != eb->exec[i].handle)
213 continue; 695 continue;
214 696
215 if (!eb_add_vma(eb, vma, i)) 697 err = eb_add_vma(eb, &eb->exec[i], vma);
216 return -EINVAL; 698 if (unlikely(err))
699 return err;
217 700
218 goto next_vma; 701 goto next_vma;
219 } 702 }
@@ -224,24 +707,27 @@ next_vma: ;
224 } 707 }
225 708
226 if (slow_pass < 0) 709 if (slow_pass < 0)
227 return 0; 710 goto out;
228 711
229 spin_lock(&eb->file->table_lock); 712 spin_lock(&eb->file->table_lock);
230 /* Grab a reference to the object and release the lock so we can lookup 713 /*
231 * or create the VMA without using GFP_ATOMIC */ 714 * Grab a reference to the object and release the lock so we can lookup
715 * or create the VMA without using GFP_ATOMIC
716 */
717 idr = &eb->file->object_idr;
232 for (i = slow_pass; i < count; i++) { 718 for (i = slow_pass; i < count; i++) {
233 struct drm_i915_gem_object *obj; 719 struct drm_i915_gem_object *obj;
234 720
235 if (__exec_to_vma(&eb->exec[i])) 721 if (__exec_to_vma(&eb->exec[i]))
236 continue; 722 continue;
237 723
238 obj = to_intel_bo(idr_find(&eb->file->object_idr, 724 obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
239 eb->exec[i].handle));
240 if (unlikely(!obj)) { 725 if (unlikely(!obj)) {
241 spin_unlock(&eb->file->table_lock); 726 spin_unlock(&eb->file->table_lock);
242 DRM_DEBUG("Invalid object handle %d at index %d\n", 727 DRM_DEBUG("Invalid object handle %d at index %d\n",
243 eb->exec[i].handle, i); 728 eb->exec[i].handle, i);
244 return -ENOENT; 729 err = -ENOENT;
730 goto err;
245 } 731 }
246 732
247 __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; 733 __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
@@ -251,7 +737,7 @@ next_vma: ;
251 for (i = slow_pass; i < count; i++) { 737 for (i = slow_pass; i < count; i++) {
252 struct drm_i915_gem_object *obj; 738 struct drm_i915_gem_object *obj;
253 739
254 if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0) 740 if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE))
255 continue; 741 continue;
256 742
257 /* 743 /*
@@ -262,12 +748,13 @@ next_vma: ;
262 * from the (obj, vm) we don't run the risk of creating 748 * from the (obj, vm) we don't run the risk of creating
263 * duplicated vmas for the same vm. 749 * duplicated vmas for the same vm.
264 */ 750 */
265 obj = u64_to_ptr(struct drm_i915_gem_object, 751 obj = u64_to_ptr(typeof(*obj),
266 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); 752 __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
267 vma = i915_vma_instance(obj, eb->vm, NULL); 753 vma = i915_vma_instance(obj, eb->vm, NULL);
268 if (unlikely(IS_ERR(vma))) { 754 if (unlikely(IS_ERR(vma))) {
269 DRM_DEBUG("Failed to lookup VMA\n"); 755 DRM_DEBUG("Failed to lookup VMA\n");
270 return PTR_ERR(vma); 756 err = PTR_ERR(vma);
757 goto err;
271 } 758 }
272 759
273 /* First come, first served */ 760 /* First come, first served */
@@ -275,32 +762,31 @@ next_vma: ;
275 vma->ctx = eb->ctx; 762 vma->ctx = eb->ctx;
276 vma->ctx_handle = eb->exec[i].handle; 763 vma->ctx_handle = eb->exec[i].handle;
277 hlist_add_head(&vma->ctx_node, 764 hlist_add_head(&vma->ctx_node,
278 ht_head(eb->ctx, eb->exec[i].handle)); 765 ht_head(lut, eb->exec[i].handle));
279 eb->ctx->vma_lut.ht_count++; 766 lut->ht_count++;
767 lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
280 if (i915_vma_is_ggtt(vma)) { 768 if (i915_vma_is_ggtt(vma)) {
281 GEM_BUG_ON(obj->vma_hashed); 769 GEM_BUG_ON(obj->vma_hashed);
282 obj->vma_hashed = vma; 770 obj->vma_hashed = vma;
283 } 771 }
284 } 772 }
285 773
286 if (!eb_add_vma(eb, vma, i)) 774 err = eb_add_vma(eb, &eb->exec[i], vma);
287 return -EINVAL; 775 if (unlikely(err))
776 goto err;
288 } 777 }
289 778
290 if (ht_needs_resize(eb->ctx)) { 779 if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
291 eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS; 780 if (ht_needs_resize(lut))
292 queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize); 781 queue_work(system_highpri_wq, &lut->resize);
782 else
783 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
293 } 784 }
294 785
295 return 0; 786out:
296#undef INTERMEDIATE 787 /* take note of the batch buffer before we might reorder the lists */
297} 788 i = eb_batch_index(eb);
298 789 eb->batch = exec_to_vma(&eb->exec[i]);
299static struct i915_vma *
300eb_get_batch(struct i915_execbuffer *eb)
301{
302 struct i915_vma *vma =
303 exec_to_vma(&eb->exec[eb->args->buffer_count - 1]);
304 790
305 /* 791 /*
306 * SNA is doing fancy tricks with compressing batch buffers, which leads 792 * SNA is doing fancy tricks with compressing batch buffers, which leads
@@ -311,24 +797,36 @@ eb_get_batch(struct i915_execbuffer *eb)
311 * Note that actual hangs have only been observed on gen7, but for 797 * Note that actual hangs have only been observed on gen7, but for
312 * paranoia do it everywhere. 798 * paranoia do it everywhere.
313 */ 799 */
314 if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) 800 if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED))
315 vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; 801 eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS;
802 if (eb->reloc_cache.has_fence)
803 eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
316 804
317 return vma; 805 eb->args->flags |= __EXEC_VALIDATED;
806 return eb_reserve(eb);
807
808err:
809 for (i = slow_pass; i < count; i++) {
810 if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)
811 __exec_to_vma(&eb->exec[i]) = 0;
812 }
813 lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
814 return err;
815#undef INTERMEDIATE
318} 816}
319 817
320static struct i915_vma * 818static struct i915_vma *
321eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) 819eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
322{ 820{
323 if (eb->lut_mask < 0) { 821 if (eb->lut_size < 0) {
324 if (handle >= -eb->lut_mask) 822 if (handle >= -eb->lut_size)
325 return NULL; 823 return NULL;
326 return exec_to_vma(&eb->exec[handle]); 824 return exec_to_vma(&eb->exec[handle]);
327 } else { 825 } else {
328 struct hlist_head *head; 826 struct hlist_head *head;
329 struct i915_vma *vma; 827 struct i915_vma *vma;
330 828
331 head = &eb->buckets[hash_32(handle, eb->lut_mask)]; 829 head = &eb->buckets[hash_32(handle, eb->lut_size)];
332 hlist_for_each_entry(vma, head, exec_node) { 830 hlist_for_each_entry(vma, head, exec_node) {
333 if (vma->exec_handle == handle) 831 if (vma->exec_handle == handle)
334 return vma; 832 return vma;
@@ -337,61 +835,46 @@ eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
337 } 835 }
338} 836}
339 837
340static void eb_destroy(struct i915_execbuffer *eb) 838static void eb_release_vmas(const struct i915_execbuffer *eb)
341{ 839{
342 struct i915_vma *vma; 840 const unsigned int count = eb->buffer_count;
841 unsigned int i;
343 842
344 list_for_each_entry(vma, &eb->vmas, exec_link) { 843 for (i = 0; i < count; i++) {
345 if (!vma->exec_entry) 844 struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
845 struct i915_vma *vma = exec_to_vma(entry);
846
847 if (!vma)
346 continue; 848 continue;
347 849
348 __eb_unreserve_vma(vma, vma->exec_entry); 850 GEM_BUG_ON(vma->exec_entry != entry);
349 vma->exec_entry = NULL; 851 vma->exec_entry = NULL;
350 i915_vma_put(vma);
351 }
352
353 i915_gem_context_put(eb->ctx);
354 852
355 if (eb->lut_mask >= 0) 853 eb_unreserve_vma(vma, entry);
356 kfree(eb->buckets);
357}
358
359static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
360{
361 if (!i915_gem_object_has_struct_page(obj))
362 return false;
363 854
364 if (DBG_USE_CPU_RELOC) 855 i915_vma_put(vma);
365 return DBG_USE_CPU_RELOC > 0; 856 }
366
367 return (HAS_LLC(to_i915(obj->base.dev)) ||
368 obj->cache_dirty ||
369 obj->cache_level != I915_CACHE_NONE);
370} 857}
371 858
372/* Used to convert any address to canonical form. 859static void eb_reset_vmas(const struct i915_execbuffer *eb)
373 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
374 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
375 * addresses to be in a canonical form:
376 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
377 * canonical form [63:48] == [47]."
378 */
379#define GEN8_HIGH_ADDRESS_BIT 47
380static inline uint64_t gen8_canonical_addr(uint64_t address)
381{ 860{
382 return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); 861 eb_release_vmas(eb);
862 if (eb->lut_size >= 0)
863 memset(eb->buckets, 0,
864 sizeof(struct hlist_head) << eb->lut_size);
383} 865}
384 866
385static inline uint64_t gen8_noncanonical_addr(uint64_t address) 867static void eb_destroy(const struct i915_execbuffer *eb)
386{ 868{
387 return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); 869 if (eb->lut_size >= 0)
870 kfree(eb->buckets);
388} 871}
389 872
390static inline uint64_t 873static inline u64
391relocation_target(const struct drm_i915_gem_relocation_entry *reloc, 874relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
392 uint64_t target_offset) 875 const struct i915_vma *target)
393{ 876{
394 return gen8_canonical_addr((int)reloc->delta + target_offset); 877 return gen8_canonical_addr((int)reloc->delta + target->node.start);
395} 878}
396 879
397static void reloc_cache_init(struct reloc_cache *cache, 880static void reloc_cache_init(struct reloc_cache *cache,
@@ -400,6 +883,9 @@ static void reloc_cache_init(struct reloc_cache *cache,
400 cache->page = -1; 883 cache->page = -1;
401 cache->vaddr = 0; 884 cache->vaddr = 0;
402 /* Must be a variable in the struct to allow GCC to unroll. */ 885 /* Must be a variable in the struct to allow GCC to unroll. */
886 cache->has_llc = HAS_LLC(i915);
887 cache->has_fence = INTEL_GEN(i915) < 4;
888 cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
403 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); 889 cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
404 cache->node.allocated = false; 890 cache->node.allocated = false;
405} 891}
@@ -458,7 +944,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
458 944
459static void *reloc_kmap(struct drm_i915_gem_object *obj, 945static void *reloc_kmap(struct drm_i915_gem_object *obj,
460 struct reloc_cache *cache, 946 struct reloc_cache *cache,
461 int page) 947 unsigned long page)
462{ 948{
463 void *vaddr; 949 void *vaddr;
464 950
@@ -466,11 +952,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
466 kunmap_atomic(unmask_page(cache->vaddr)); 952 kunmap_atomic(unmask_page(cache->vaddr));
467 } else { 953 } else {
468 unsigned int flushes; 954 unsigned int flushes;
469 int ret; 955 int err;
470 956
471 ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); 957 err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
472 if (ret) 958 if (err)
473 return ERR_PTR(ret); 959 return ERR_PTR(err);
474 960
475 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); 961 BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
476 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); 962 BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
@@ -490,7 +976,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
490 976
491static void *reloc_iomap(struct drm_i915_gem_object *obj, 977static void *reloc_iomap(struct drm_i915_gem_object *obj,
492 struct reloc_cache *cache, 978 struct reloc_cache *cache,
493 int page) 979 unsigned long page)
494{ 980{
495 struct i915_ggtt *ggtt = cache_to_ggtt(cache); 981 struct i915_ggtt *ggtt = cache_to_ggtt(cache);
496 unsigned long offset; 982 unsigned long offset;
@@ -500,31 +986,31 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
500 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); 986 io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
501 } else { 987 } else {
502 struct i915_vma *vma; 988 struct i915_vma *vma;
503 int ret; 989 int err;
504 990
505 if (use_cpu_reloc(obj)) 991 if (use_cpu_reloc(cache, obj))
506 return NULL; 992 return NULL;
507 993
508 ret = i915_gem_object_set_to_gtt_domain(obj, true); 994 err = i915_gem_object_set_to_gtt_domain(obj, true);
509 if (ret) 995 if (err)
510 return ERR_PTR(ret); 996 return ERR_PTR(err);
511 997
512 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 998 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
513 PIN_MAPPABLE | PIN_NONBLOCK); 999 PIN_MAPPABLE | PIN_NONBLOCK);
514 if (IS_ERR(vma)) { 1000 if (IS_ERR(vma)) {
515 memset(&cache->node, 0, sizeof(cache->node)); 1001 memset(&cache->node, 0, sizeof(cache->node));
516 ret = drm_mm_insert_node_in_range 1002 err = drm_mm_insert_node_in_range
517 (&ggtt->base.mm, &cache->node, 1003 (&ggtt->base.mm, &cache->node,
518 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 1004 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
519 0, ggtt->mappable_end, 1005 0, ggtt->mappable_end,
520 DRM_MM_INSERT_LOW); 1006 DRM_MM_INSERT_LOW);
521 if (ret) /* no inactive aperture space, use cpu reloc */ 1007 if (err) /* no inactive aperture space, use cpu reloc */
522 return NULL; 1008 return NULL;
523 } else { 1009 } else {
524 ret = i915_vma_put_fence(vma); 1010 err = i915_vma_put_fence(vma);
525 if (ret) { 1011 if (err) {
526 i915_vma_unpin(vma); 1012 i915_vma_unpin(vma);
527 return ERR_PTR(ret); 1013 return ERR_PTR(err);
528 } 1014 }
529 1015
530 cache->node.start = vma->node.start; 1016 cache->node.start = vma->node.start;
@@ -552,7 +1038,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
552 1038
553static void *reloc_vaddr(struct drm_i915_gem_object *obj, 1039static void *reloc_vaddr(struct drm_i915_gem_object *obj,
554 struct reloc_cache *cache, 1040 struct reloc_cache *cache,
555 int page) 1041 unsigned long page)
556{ 1042{
557 void *vaddr; 1043 void *vaddr;
558 1044
@@ -579,7 +1065,8 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
579 1065
580 *addr = value; 1066 *addr = value;
581 1067
582 /* Writes to the same cacheline are serialised by the CPU 1068 /*
1069 * Writes to the same cacheline are serialised by the CPU
583 * (including clflush). On the write path, we only require 1070 * (including clflush). On the write path, we only require
584 * that it hits memory in an orderly fashion and place 1071 * that it hits memory in an orderly fashion and place
585 * mb barriers at the start and end of the relocation phase 1072 * mb barriers at the start and end of the relocation phase
@@ -591,25 +1078,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
591 *addr = value; 1078 *addr = value;
592} 1079}
593 1080
594static int 1081static u64
595relocate_entry(struct drm_i915_gem_object *obj, 1082relocate_entry(struct i915_vma *vma,
596 const struct drm_i915_gem_relocation_entry *reloc, 1083 const struct drm_i915_gem_relocation_entry *reloc,
597 struct reloc_cache *cache, 1084 struct i915_execbuffer *eb,
598 u64 target_offset) 1085 const struct i915_vma *target)
599{ 1086{
1087 struct drm_i915_gem_object *obj = vma->obj;
600 u64 offset = reloc->offset; 1088 u64 offset = reloc->offset;
601 bool wide = cache->use_64bit_reloc; 1089 u64 target_offset = relocation_target(reloc, target);
1090 bool wide = eb->reloc_cache.use_64bit_reloc;
602 void *vaddr; 1091 void *vaddr;
603 1092
604 target_offset = relocation_target(reloc, target_offset);
605repeat: 1093repeat:
606 vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); 1094 vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
607 if (IS_ERR(vaddr)) 1095 if (IS_ERR(vaddr))
608 return PTR_ERR(vaddr); 1096 return PTR_ERR(vaddr);
609 1097
610 clflush_write32(vaddr + offset_in_page(offset), 1098 clflush_write32(vaddr + offset_in_page(offset),
611 lower_32_bits(target_offset), 1099 lower_32_bits(target_offset),
612 cache->vaddr); 1100 eb->reloc_cache.vaddr);
613 1101
614 if (wide) { 1102 if (wide) {
615 offset += sizeof(u32); 1103 offset += sizeof(u32);
@@ -618,17 +1106,16 @@ repeat:
618 goto repeat; 1106 goto repeat;
619 } 1107 }
620 1108
621 return 0; 1109 return target->node.start | UPDATE;
622} 1110}
623 1111
624static int 1112static u64
625eb_relocate_entry(struct i915_vma *vma, 1113eb_relocate_entry(struct i915_execbuffer *eb,
626 struct i915_execbuffer *eb, 1114 struct i915_vma *vma,
627 struct drm_i915_gem_relocation_entry *reloc) 1115 const struct drm_i915_gem_relocation_entry *reloc)
628{ 1116{
629 struct i915_vma *target; 1117 struct i915_vma *target;
630 u64 target_offset; 1118 int err;
631 int ret;
632 1119
633 /* we've already hold a reference to all valid objects */ 1120 /* we've already hold a reference to all valid objects */
634 target = eb_get_vma(eb, reloc->target_handle); 1121 target = eb_get_vma(eb, reloc->target_handle);
@@ -658,27 +1145,30 @@ eb_relocate_entry(struct i915_vma *vma,
658 return -EINVAL; 1145 return -EINVAL;
659 } 1146 }
660 1147
661 if (reloc->write_domain) 1148 if (reloc->write_domain) {
662 target->exec_entry->flags |= EXEC_OBJECT_WRITE; 1149 target->exec_entry->flags |= EXEC_OBJECT_WRITE;
663 1150
664 /* 1151 /*
665 * Sandybridge PPGTT errata: We need a global gtt mapping for MI and 1152 * Sandybridge PPGTT errata: We need a global gtt mapping
666 * pipe_control writes because the gpu doesn't properly redirect them 1153 * for MI and pipe_control writes because the gpu doesn't
667 * through the ppgtt for non_secure batchbuffers. 1154 * properly redirect them through the ppgtt for non_secure
668 */ 1155 * batchbuffers.
669 if (unlikely(IS_GEN6(eb->i915) && 1156 */
670 reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { 1157 if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
671 ret = i915_vma_bind(target, target->obj->cache_level, 1158 IS_GEN6(eb->i915)) {
672 PIN_GLOBAL); 1159 err = i915_vma_bind(target, target->obj->cache_level,
673 if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) 1160 PIN_GLOBAL);
674 return ret; 1161 if (WARN_ONCE(err,
1162 "Unexpected failure to bind target VMA!"))
1163 return err;
1164 }
675 } 1165 }
676 1166
677 /* If the relocation already has the right value in it, no 1167 /*
1168 * If the relocation already has the right value in it, no
678 * more work needs to be done. 1169 * more work needs to be done.
679 */ 1170 */
680 target_offset = gen8_canonical_addr(target->node.start); 1171 if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
681 if (target_offset == reloc->presumed_offset)
682 return 0; 1172 return 0;
683 1173
684 /* Check that the relocation address is valid... */ 1174 /* Check that the relocation address is valid... */
@@ -709,35 +1199,39 @@ eb_relocate_entry(struct i915_vma *vma,
709 */ 1199 */
710 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; 1200 vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC;
711 1201
712 ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset);
713 if (ret)
714 return ret;
715
716 /* and update the user's relocation entry */ 1202 /* and update the user's relocation entry */
717 reloc->presumed_offset = target_offset; 1203 return relocate_entry(vma, reloc, eb, target);
718 return 0;
719} 1204}
720 1205
721static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) 1206static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
722{ 1207{
723#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) 1208#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
724 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; 1209 struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
725 struct drm_i915_gem_relocation_entry __user *user_relocs; 1210 struct drm_i915_gem_relocation_entry __user *urelocs;
726 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1211 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
727 int remain, ret = 0; 1212 unsigned int remain;
728
729 user_relocs = u64_to_user_ptr(entry->relocs_ptr);
730 1213
1214 urelocs = u64_to_user_ptr(entry->relocs_ptr);
731 remain = entry->relocation_count; 1215 remain = entry->relocation_count;
732 while (remain) { 1216 if (unlikely(remain > N_RELOC(ULONG_MAX)))
733 struct drm_i915_gem_relocation_entry *r = stack_reloc; 1217 return -EINVAL;
734 unsigned long unwritten;
735 unsigned int count;
736 1218
737 count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); 1219 /*
738 remain -= count; 1220 * We must check that the entire relocation array is safe
1221 * to read. However, if the array is not writable the user loses
1222 * the updated relocation values.
1223 */
1224 if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(urelocs))))
1225 return -EFAULT;
1226
1227 do {
1228 struct drm_i915_gem_relocation_entry *r = stack;
1229 unsigned int count =
1230 min_t(unsigned int, remain, ARRAY_SIZE(stack));
1231 unsigned int copied;
739 1232
740 /* This is the fast path and we cannot handle a pagefault 1233 /*
1234 * This is the fast path and we cannot handle a pagefault
741 * whilst holding the struct mutex lest the user pass in the 1235 * whilst holding the struct mutex lest the user pass in the
742 * relocations contained within a mmaped bo. For in such a case 1236 * relocations contained within a mmaped bo. For in such a case
743 * we, the page fault handler would call i915_gem_fault() and 1237 * we, the page fault handler would call i915_gem_fault() and
@@ -745,409 +1239,357 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb)
745 * this is bad and so lockdep complains vehemently. 1239 * this is bad and so lockdep complains vehemently.
746 */ 1240 */
747 pagefault_disable(); 1241 pagefault_disable();
748 unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); 1242 copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
749 pagefault_enable(); 1243 pagefault_enable();
750 if (unlikely(unwritten)) { 1244 if (unlikely(copied)) {
751 ret = -EFAULT; 1245 remain = -EFAULT;
752 goto out; 1246 goto out;
753 } 1247 }
754 1248
1249 remain -= count;
755 do { 1250 do {
756 u64 offset = r->presumed_offset; 1251 u64 offset = eb_relocate_entry(eb, vma, r);
757 1252
758 ret = eb_relocate_entry(vma, eb, r); 1253 if (likely(offset == 0)) {
759 if (ret) 1254 } else if ((s64)offset < 0) {
1255 remain = (int)offset;
760 goto out; 1256 goto out;
761 1257 } else {
762 if (r->presumed_offset != offset) { 1258 /*
763 pagefault_disable(); 1259 * Note that reporting an error now
764 unwritten = __put_user(r->presumed_offset, 1260 * leaves everything in an inconsistent
765 &user_relocs->presumed_offset); 1261 * state as we have *already* changed
766 pagefault_enable(); 1262 * the relocation value inside the
767 if (unlikely(unwritten)) { 1263 * object. As we have not changed the
768 /* Note that reporting an error now 1264 * reloc.presumed_offset or will not
769 * leaves everything in an inconsistent 1265 * change the execobject.offset, on the
770 * state as we have *already* changed 1266 * call we may not rewrite the value
771 * the relocation value inside the 1267 * inside the object, leaving it
772 * object. As we have not changed the 1268 * dangling and causing a GPU hang. Unless
773 * reloc.presumed_offset or will not 1269 * userspace dynamically rebuilds the
774 * change the execobject.offset, on the 1270 * relocations on each execbuf rather than
775 * call we may not rewrite the value 1271 * presume a static tree.
776 * inside the object, leaving it 1272 *
777 * dangling and causing a GPU hang. 1273 * We did previously check if the relocations
778 */ 1274 * were writable (access_ok), an error now
779 ret = -EFAULT; 1275 * would be a strange race with mprotect,
780 goto out; 1276 * having already demonstrated that we
781 } 1277 * can read from this userspace address.
1278 */
1279 offset = gen8_canonical_addr(offset & ~UPDATE);
1280 __put_user(offset,
1281 &urelocs[r-stack].presumed_offset);
782 } 1282 }
783 1283 } while (r++, --count);
784 user_relocs++; 1284 urelocs += ARRAY_SIZE(stack);
785 r++; 1285 } while (remain);
786 } while (--count);
787 }
788
789out: 1286out:
790 reloc_cache_reset(&eb->reloc_cache); 1287 reloc_cache_reset(&eb->reloc_cache);
791 return ret; 1288 return remain;
792#undef N_RELOC
793} 1289}
794 1290
795static int 1291static int
796eb_relocate_vma_slow(struct i915_vma *vma, 1292eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
797 struct i915_execbuffer *eb,
798 struct drm_i915_gem_relocation_entry *relocs)
799{ 1293{
800 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1294 const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
801 int i, ret = 0; 1295 struct drm_i915_gem_relocation_entry *relocs =
1296 u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1297 unsigned int i;
1298 int err;
802 1299
803 for (i = 0; i < entry->relocation_count; i++) { 1300 for (i = 0; i < entry->relocation_count; i++) {
804 ret = eb_relocate_entry(vma, eb, &relocs[i]); 1301 u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
805 if (ret) 1302
806 break; 1303 if ((s64)offset < 0) {
1304 err = (int)offset;
1305 goto err;
1306 }
807 } 1307 }
1308 err = 0;
1309err:
808 reloc_cache_reset(&eb->reloc_cache); 1310 reloc_cache_reset(&eb->reloc_cache);
809 return ret; 1311 return err;
810} 1312}
811 1313
812static int eb_relocate(struct i915_execbuffer *eb) 1314static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
813{ 1315{
814 struct i915_vma *vma; 1316 const char __user *addr, *end;
815 int ret = 0; 1317 unsigned long size;
1318 char __maybe_unused c;
816 1319
817 list_for_each_entry(vma, &eb->vmas, exec_link) { 1320 size = entry->relocation_count;
818 ret = eb_relocate_vma(vma, eb); 1321 if (size == 0)
819 if (ret) 1322 return 0;
820 break;
821 }
822 1323
823 return ret; 1324 if (size > N_RELOC(ULONG_MAX))
824} 1325 return -EINVAL;
825 1326
826static bool only_mappable_for_reloc(unsigned int flags) 1327 addr = u64_to_user_ptr(entry->relocs_ptr);
827{ 1328 size *= sizeof(struct drm_i915_gem_relocation_entry);
828 return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == 1329 if (!access_ok(VERIFY_READ, addr, size))
829 __EXEC_OBJECT_NEEDS_MAP; 1330 return -EFAULT;
1331
1332 end = addr + size;
1333 for (; addr < end; addr += PAGE_SIZE) {
1334 int err = __get_user(c, addr);
1335 if (err)
1336 return err;
1337 }
1338 return __get_user(c, end - 1);
830} 1339}
831 1340
832static int 1341static int eb_copy_relocations(const struct i915_execbuffer *eb)
833eb_reserve_vma(struct i915_vma *vma,
834 struct intel_engine_cs *engine,
835 bool *need_reloc)
836{ 1342{
837 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1343 const unsigned int count = eb->buffer_count;
838 uint64_t flags; 1344 unsigned int i;
839 int ret; 1345 int err;
840
841 flags = PIN_USER;
842 if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
843 flags |= PIN_GLOBAL;
844
845 if (!drm_mm_node_allocated(&vma->node)) {
846 /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
847 * limit address to the first 4GBs for unflagged objects.
848 */
849 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
850 flags |= PIN_ZONE_4G;
851 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
852 flags |= PIN_GLOBAL | PIN_MAPPABLE;
853 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
854 flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
855 if (entry->flags & EXEC_OBJECT_PINNED)
856 flags |= entry->offset | PIN_OFFSET_FIXED;
857 if ((flags & PIN_MAPPABLE) == 0)
858 flags |= PIN_HIGH;
859 }
860
861 ret = i915_vma_pin(vma,
862 entry->pad_to_size,
863 entry->alignment,
864 flags);
865 if ((ret == -ENOSPC || ret == -E2BIG) &&
866 only_mappable_for_reloc(entry->flags))
867 ret = i915_vma_pin(vma,
868 entry->pad_to_size,
869 entry->alignment,
870 flags & ~PIN_MAPPABLE);
871 if (ret)
872 return ret;
873 1346
874 entry->flags |= __EXEC_OBJECT_HAS_PIN; 1347 for (i = 0; i < count; i++) {
1348 const unsigned int nreloc = eb->exec[i].relocation_count;
1349 struct drm_i915_gem_relocation_entry __user *urelocs;
1350 struct drm_i915_gem_relocation_entry *relocs;
1351 unsigned long size;
1352 unsigned long copied;
875 1353
876 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { 1354 if (nreloc == 0)
877 ret = i915_vma_get_fence(vma); 1355 continue;
878 if (ret)
879 return ret;
880 1356
881 if (i915_vma_pin_fence(vma)) 1357 err = check_relocations(&eb->exec[i]);
882 entry->flags |= __EXEC_OBJECT_HAS_FENCE; 1358 if (err)
883 } 1359 goto err;
884 1360
885 if (entry->offset != vma->node.start) { 1361 urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
886 entry->offset = vma->node.start; 1362 size = nreloc * sizeof(*relocs);
887 *need_reloc = true;
888 }
889 1363
890 return 0; 1364 relocs = kvmalloc_array(size, 1, GFP_TEMPORARY);
891} 1365 if (!relocs) {
1366 kvfree(relocs);
1367 err = -ENOMEM;
1368 goto err;
1369 }
892 1370
893static bool 1371 /* copy_from_user is limited to < 4GiB */
894need_reloc_mappable(struct i915_vma *vma) 1372 copied = 0;
895{ 1373 do {
896 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1374 unsigned int len =
1375 min_t(u64, BIT_ULL(31), size - copied);
1376
1377 if (__copy_from_user((char *)relocs + copied,
1378 (char *)urelocs + copied,
1379 len)) {
1380 kvfree(relocs);
1381 err = -EFAULT;
1382 goto err;
1383 }
897 1384
898 if (entry->relocation_count == 0) 1385 copied += len;
899 return false; 1386 } while (copied < size);
900 1387
901 if (!i915_vma_is_ggtt(vma)) 1388 /*
902 return false; 1389 * As we do not update the known relocation offsets after
1390 * relocating (due to the complexities in lock handling),
1391 * we need to mark them as invalid now so that we force the
1392 * relocation processing next time. Just in case the target
1393 * object is evicted and then rebound into its old
1394 * presumed_offset before the next execbuffer - if that
1395 * happened we would make the mistake of assuming that the
1396 * relocations were valid.
1397 */
1398 user_access_begin();
1399 for (copied = 0; copied < nreloc; copied++)
1400 unsafe_put_user(-1,
1401 &urelocs[copied].presumed_offset,
1402 end_user);
1403end_user:
1404 user_access_end();
903 1405
904 /* See also use_cpu_reloc() */ 1406 eb->exec[i].relocs_ptr = (uintptr_t)relocs;
905 if (HAS_LLC(to_i915(vma->obj->base.dev))) 1407 }
906 return false;
907 1408
908 if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) 1409 return 0;
909 return false;
910 1410
911 return true; 1411err:
1412 while (i--) {
1413 struct drm_i915_gem_relocation_entry *relocs =
1414 u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1415 if (eb->exec[i].relocation_count)
1416 kvfree(relocs);
1417 }
1418 return err;
912} 1419}
913 1420
914static bool 1421static int eb_prefault_relocations(const struct i915_execbuffer *eb)
915eb_vma_misplaced(struct i915_vma *vma)
916{ 1422{
917 struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; 1423 const unsigned int count = eb->buffer_count;
918 1424 unsigned int i;
919 WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
920 !i915_vma_is_ggtt(vma));
921
922 if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
923 return true;
924 1425
925 if (vma->node.size < entry->pad_to_size) 1426 if (unlikely(i915.prefault_disable))
926 return true; 1427 return 0;
927
928 if (entry->flags & EXEC_OBJECT_PINNED &&
929 vma->node.start != entry->offset)
930 return true;
931
932 if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
933 vma->node.start < BATCH_OFFSET_BIAS)
934 return true;
935 1428
936 /* avoid costly ping-pong once a batch bo ended up non-mappable */ 1429 for (i = 0; i < count; i++) {
937 if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && 1430 int err;
938 !i915_vma_is_map_and_fenceable(vma))
939 return !only_mappable_for_reloc(entry->flags);
940 1431
941 if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && 1432 err = check_relocations(&eb->exec[i]);
942 (vma->node.start + vma->node.size - 1) >> 32) 1433 if (err)
943 return true; 1434 return err;
1435 }
944 1436
945 return false; 1437 return 0;
946} 1438}
947 1439
948static int eb_reserve(struct i915_execbuffer *eb) 1440static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
949{ 1441{
950 const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; 1442 struct drm_device *dev = &eb->i915->drm;
951 const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; 1443 bool have_copy = false;
952 struct i915_vma *vma; 1444 struct i915_vma *vma;
953 struct list_head ordered_vmas; 1445 int err = 0;
954 struct list_head pinned_vmas; 1446
955 int retry; 1447repeat:
956 1448 if (signal_pending(current)) {
957 INIT_LIST_HEAD(&ordered_vmas); 1449 err = -ERESTARTSYS;
958 INIT_LIST_HEAD(&pinned_vmas); 1450 goto out;
959 while (!list_empty(&eb->vmas)) { 1451 }
960 struct drm_i915_gem_exec_object2 *entry; 1452
961 bool need_fence, need_mappable; 1453 /* We may process another execbuffer during the unlock... */
962 1454 eb_reset_vmas(eb);
963 vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); 1455 mutex_unlock(&dev->struct_mutex);
964 entry = vma->exec_entry; 1456
965 1457 /*
966 if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) 1458 * We take 3 passes through the slowpatch.
967 entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
968
969 if (!has_fenced_gpu_access)
970 entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
971 need_fence =
972 (entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
973 needs_unfenced_map) &&
974 i915_gem_object_is_tiled(vma->obj);
975 need_mappable = need_fence || need_reloc_mappable(vma);
976
977 if (entry->flags & EXEC_OBJECT_PINNED)
978 list_move_tail(&vma->exec_link, &pinned_vmas);
979 else if (need_mappable) {
980 entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
981 list_move(&vma->exec_link, &ordered_vmas);
982 } else
983 list_move_tail(&vma->exec_link, &ordered_vmas);
984 }
985 list_splice(&ordered_vmas, &eb->vmas);
986 list_splice(&pinned_vmas, &eb->vmas);
987
988 /* Attempt to pin all of the buffers into the GTT.
989 * This is done in 3 phases:
990 * 1459 *
991 * 1a. Unbind all objects that do not match the GTT constraints for 1460 * 1 - we try to just prefault all the user relocation entries and
992 * the execbuffer (fenceable, mappable, alignment etc). 1461 * then attempt to reuse the atomic pagefault disabled fast path again.
993 * 1b. Increment pin count for already bound objects.
994 * 2. Bind new objects.
995 * 3. Decrement pin count.
996 * 1462 *
997 * This avoid unnecessary unbinding of later objects in order to make 1463 * 2 - we copy the user entries to a local buffer here outside of the
998 * room for the earlier objects *unless* we need to defragment. 1464 * local and allow ourselves to wait upon any rendering before
1465 * relocations
1466 *
1467 * 3 - we already have a local copy of the relocation entries, but
1468 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
999 */ 1469 */
1000 retry = 0; 1470 if (!err) {
1001 do { 1471 err = eb_prefault_relocations(eb);
1002 int ret = 0; 1472 } else if (!have_copy) {
1003 1473 err = eb_copy_relocations(eb);
1004 /* Unbind any ill-fitting objects or pin. */ 1474 have_copy = err == 0;
1005 list_for_each_entry(vma, &eb->vmas, exec_link) { 1475 } else {
1006 if (!drm_mm_node_allocated(&vma->node)) 1476 cond_resched();
1007 continue; 1477 err = 0;
1478 }
1479 if (err) {
1480 mutex_lock(&dev->struct_mutex);
1481 goto out;
1482 }
1008 1483
1009 if (eb_vma_misplaced(vma)) 1484 err = i915_mutex_lock_interruptible(dev);
1010 ret = i915_vma_unbind(vma); 1485 if (err) {
1011 else 1486 mutex_lock(&dev->struct_mutex);
1012 ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); 1487 goto out;
1013 if (ret) 1488 }
1014 goto err;
1015 }
1016 1489
1017 /* Bind fresh objects */ 1490 /* reacquire the objects */
1018 list_for_each_entry(vma, &eb->vmas, exec_link) { 1491 err = eb_lookup_vmas(eb);
1019 if (drm_mm_node_allocated(&vma->node)) 1492 if (err)
1020 continue; 1493 goto err;
1021 1494
1022 ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); 1495 list_for_each_entry(vma, &eb->relocs, reloc_link) {
1023 if (ret) 1496 if (!have_copy) {
1497 pagefault_disable();
1498 err = eb_relocate_vma(eb, vma);
1499 pagefault_enable();
1500 if (err)
1501 goto repeat;
1502 } else {
1503 err = eb_relocate_vma_slow(eb, vma);
1504 if (err)
1024 goto err; 1505 goto err;
1025 } 1506 }
1507 }
1026 1508
1027err: 1509 /*
1028 if (ret != -ENOSPC || retry++) 1510 * Leave the user relocations as are, this is the painfully slow path,
1029 return ret; 1511 * and we want to avoid the complication of dropping the lock whilst
1030 1512 * having buffers reserved in the aperture and so causing spurious
1031 /* Decrement pin count for bound objects */ 1513 * ENOSPC for random operations.
1032 list_for_each_entry(vma, &eb->vmas, exec_link) 1514 */
1033 eb_unreserve_vma(vma);
1034 1515
1035 ret = i915_gem_evict_vm(eb->vm, true); 1516err:
1036 if (ret) 1517 if (err == -EAGAIN)
1037 return ret; 1518 goto repeat;
1038 } while (1);
1039}
1040 1519
1041static int 1520out:
1042eb_relocate_slow(struct i915_execbuffer *eb) 1521 if (have_copy) {
1043{ 1522 const unsigned int count = eb->buffer_count;
1044 const unsigned int count = eb->args->buffer_count; 1523 unsigned int i;
1045 struct drm_device *dev = &eb->i915->drm;
1046 struct drm_i915_gem_relocation_entry *reloc;
1047 struct i915_vma *vma;
1048 int *reloc_offset;
1049 int i, total, ret;
1050 1524
1051 /* We may process another execbuffer during the unlock... */ 1525 for (i = 0; i < count; i++) {
1052 eb_reset(eb); 1526 const struct drm_i915_gem_exec_object2 *entry =
1053 mutex_unlock(&dev->struct_mutex); 1527 &eb->exec[i];
1528 struct drm_i915_gem_relocation_entry *relocs;
1054 1529
1055 total = 0; 1530 if (!entry->relocation_count)
1056 for (i = 0; i < count; i++) 1531 continue;
1057 total += eb->exec[i].relocation_count;
1058 1532
1059 reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL); 1533 relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1060 reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL); 1534 kvfree(relocs);
1061 if (reloc == NULL || reloc_offset == NULL) { 1535 }
1062 kvfree(reloc);
1063 kvfree(reloc_offset);
1064 mutex_lock(&dev->struct_mutex);
1065 return -ENOMEM;
1066 } 1536 }
1067 1537
1068 total = 0; 1538 return err ?: have_copy;
1069 for (i = 0; i < count; i++) { 1539}
1070 struct drm_i915_gem_relocation_entry __user *user_relocs;
1071 u64 invalid_offset = (u64)-1;
1072 int j;
1073 1540
1074 user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); 1541static int eb_relocate(struct i915_execbuffer *eb)
1542{
1543 if (eb_lookup_vmas(eb))
1544 goto slow;
1075 1545
1076 if (copy_from_user(reloc+total, user_relocs, 1546 /* The objects are in their final locations, apply the relocations. */
1077 eb->exec[i].relocation_count * sizeof(*reloc))) { 1547 if (eb->args->flags & __EXEC_HAS_RELOC) {
1078 ret = -EFAULT; 1548 struct i915_vma *vma;
1079 mutex_lock(&dev->struct_mutex);
1080 goto err;
1081 }
1082 1549
1083 /* As we do not update the known relocation offsets after 1550 list_for_each_entry(vma, &eb->relocs, reloc_link) {
1084 * relocating (due to the complexities in lock handling), 1551 if (eb_relocate_vma(eb, vma))
1085 * we need to mark them as invalid now so that we force the 1552 goto slow;
1086 * relocation processing next time. Just in case the target
1087 * object is evicted and then rebound into its old
1088 * presumed_offset before the next execbuffer - if that
1089 * happened we would make the mistake of assuming that the
1090 * relocations were valid.
1091 */
1092 for (j = 0; j < eb->exec[i].relocation_count; j++) {
1093 if (__copy_to_user(&user_relocs[j].presumed_offset,
1094 &invalid_offset,
1095 sizeof(invalid_offset))) {
1096 ret = -EFAULT;
1097 mutex_lock(&dev->struct_mutex);
1098 goto err;
1099 }
1100 } 1553 }
1101
1102 reloc_offset[i] = total;
1103 total += eb->exec[i].relocation_count;
1104 } 1554 }
1105 1555
1106 ret = i915_mutex_lock_interruptible(dev); 1556 return 0;
1107 if (ret) {
1108 mutex_lock(&dev->struct_mutex);
1109 goto err;
1110 }
1111
1112 /* reacquire the objects */
1113 ret = eb_lookup_vmas(eb);
1114 if (ret)
1115 goto err;
1116
1117 ret = eb_reserve(eb);
1118 if (ret)
1119 goto err;
1120 1557
1121 list_for_each_entry(vma, &eb->vmas, exec_link) { 1558slow:
1122 int idx = vma->exec_entry - eb->exec; 1559 return eb_relocate_slow(eb);
1560}
1123 1561
1124 ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); 1562static void eb_export_fence(struct drm_i915_gem_object *obj,
1125 if (ret) 1563 struct drm_i915_gem_request *req,
1126 goto err; 1564 unsigned int flags)
1127 } 1565{
1566 struct reservation_object *resv = obj->resv;
1128 1567
1129 /* Leave the user relocations as are, this is the painfully slow path, 1568 /*
1130 * and we want to avoid the complication of dropping the lock whilst 1569 * Ignore errors from failing to allocate the new fence, we can't
1131 * having buffers reserved in the aperture and so causing spurious 1570 * handle an error right now. Worst case should be missed
1132 * ENOSPC for random operations. 1571 * synchronisation leading to rendering corruption.
1133 */ 1572 */
1134 1573 reservation_object_lock(resv, NULL);
1135err: 1574 if (flags & EXEC_OBJECT_WRITE)
1136 kvfree(reloc); 1575 reservation_object_add_excl_fence(resv, &req->fence);
1137 kvfree(reloc_offset); 1576 else if (reservation_object_reserve_shared(resv) == 0)
1138 return ret; 1577 reservation_object_add_shared_fence(resv, &req->fence);
1578 reservation_object_unlock(resv);
1139} 1579}
1140 1580
1141static int 1581static int eb_move_to_gpu(struct i915_execbuffer *eb)
1142eb_move_to_gpu(struct i915_execbuffer *eb)
1143{ 1582{
1144 struct i915_vma *vma; 1583 const unsigned int count = eb->buffer_count;
1145 int ret; 1584 unsigned int i;
1585 int err;
1146 1586
1147 list_for_each_entry(vma, &eb->vmas, exec_link) { 1587 for (i = 0; i < count; i++) {
1588 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1589 struct i915_vma *vma = exec_to_vma(entry);
1148 struct drm_i915_gem_object *obj = vma->obj; 1590 struct drm_i915_gem_object *obj = vma->obj;
1149 1591
1150 if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { 1592 if (entry->flags & EXEC_OBJECT_CAPTURE) {
1151 struct i915_gem_capture_list *capture; 1593 struct i915_gem_capture_list *capture;
1152 1594
1153 capture = kmalloc(sizeof(*capture), GFP_KERNEL); 1595 capture = kmalloc(sizeof(*capture), GFP_KERNEL);
@@ -1159,18 +1601,32 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
1159 eb->request->capture_list = capture; 1601 eb->request->capture_list = capture;
1160 } 1602 }
1161 1603
1162 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) 1604 if (entry->flags & EXEC_OBJECT_ASYNC)
1163 continue; 1605 goto skip_flushes;
1164 1606
1165 if (unlikely(obj->cache_dirty && !obj->cache_coherent)) 1607 if (unlikely(obj->cache_dirty && !obj->cache_coherent))
1166 i915_gem_clflush_object(obj, 0); 1608 i915_gem_clflush_object(obj, 0);
1167 1609
1168 ret = i915_gem_request_await_object 1610 err = i915_gem_request_await_object
1169 (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE); 1611 (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE);
1170 if (ret) 1612 if (err)
1171 return ret; 1613 return err;
1614
1615skip_flushes:
1616 i915_vma_move_to_active(vma, eb->request, entry->flags);
1617 __eb_unreserve_vma(vma, entry);
1618 vma->exec_entry = NULL;
1172 } 1619 }
1173 1620
1621 for (i = 0; i < count; i++) {
1622 const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
1623 struct i915_vma *vma = exec_to_vma(entry);
1624
1625 eb_export_fence(vma->obj, eb->request, entry->flags);
1626 i915_vma_put(vma);
1627 }
1628 eb->exec = NULL;
1629
1174 /* Unconditionally flush any chipset caches (for streaming writes). */ 1630 /* Unconditionally flush any chipset caches (for streaming writes). */
1175 i915_gem_chipset_flush(eb->i915); 1631 i915_gem_chipset_flush(eb->i915);
1176 1632
@@ -1178,8 +1634,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb)
1178 return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); 1634 return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE);
1179} 1635}
1180 1636
1181static bool 1637static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1182i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1183{ 1638{
1184 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) 1639 if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1185 return false; 1640 return false;
@@ -1201,103 +1656,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1201 return true; 1656 return true;
1202} 1657}
1203 1658
1204static int
1205validate_exec_list(struct drm_device *dev,
1206 struct drm_i915_gem_exec_object2 *exec,
1207 int count)
1208{
1209 unsigned relocs_total = 0;
1210 unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1211 unsigned invalid_flags;
1212 int i;
1213
1214 /* INTERNAL flags must not overlap with external ones */
1215 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1216
1217 invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1218 if (USES_FULL_PPGTT(dev))
1219 invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1220
1221 for (i = 0; i < count; i++) {
1222 char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1223 int length; /* limited by fault_in_pages_readable() */
1224
1225 if (exec[i].flags & invalid_flags)
1226 return -EINVAL;
1227
1228 /* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1229 * any non-page-aligned or non-canonical addresses.
1230 */
1231 if (exec[i].flags & EXEC_OBJECT_PINNED) {
1232 if (exec[i].offset !=
1233 gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1234 return -EINVAL;
1235 }
1236
1237 /* From drm_mm perspective address space is continuous,
1238 * so from this point we're always using non-canonical
1239 * form internally.
1240 */
1241 exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1242
1243 if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1244 return -EINVAL;
1245
1246 /* pad_to_size was once a reserved field, so sanitize it */
1247 if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
1248 if (offset_in_page(exec[i].pad_to_size))
1249 return -EINVAL;
1250 } else {
1251 exec[i].pad_to_size = 0;
1252 }
1253
1254 /* First check for malicious input causing overflow in
1255 * the worst case where we need to allocate the entire
1256 * relocation tree as a single array.
1257 */
1258 if (exec[i].relocation_count > relocs_max - relocs_total)
1259 return -EINVAL;
1260 relocs_total += exec[i].relocation_count;
1261
1262 length = exec[i].relocation_count *
1263 sizeof(struct drm_i915_gem_relocation_entry);
1264 /*
1265 * We must check that the entire relocation array is safe
1266 * to read, but since we may need to update the presumed
1267 * offsets during execution, check for full write access.
1268 */
1269 if (!access_ok(VERIFY_WRITE, ptr, length))
1270 return -EFAULT;
1271
1272 if (likely(!i915.prefault_disable)) {
1273 if (fault_in_pages_readable(ptr, length))
1274 return -EFAULT;
1275 }
1276 }
1277
1278 return 0;
1279}
1280
1281static int eb_select_context(struct i915_execbuffer *eb)
1282{
1283 unsigned int ctx_id = i915_execbuffer2_get_context_id(*eb->args);
1284 struct i915_gem_context *ctx;
1285
1286 ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id);
1287 if (unlikely(IS_ERR(ctx)))
1288 return PTR_ERR(ctx);
1289
1290 if (unlikely(i915_gem_context_is_banned(ctx))) {
1291 DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1292 return -EIO;
1293 }
1294
1295 eb->ctx = i915_gem_context_get(ctx);
1296 eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
1297
1298 return 0;
1299}
1300
1301void i915_vma_move_to_active(struct i915_vma *vma, 1659void i915_vma_move_to_active(struct i915_vma *vma,
1302 struct drm_i915_gem_request *req, 1660 struct drm_i915_gem_request *req,
1303 unsigned int flags) 1661 unsigned int flags)
@@ -1308,7 +1666,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1308 lockdep_assert_held(&req->i915->drm.struct_mutex); 1666 lockdep_assert_held(&req->i915->drm.struct_mutex);
1309 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); 1667 GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1310 1668
1311 /* Add a reference if we're newly entering the active list. 1669 /*
1670 * Add a reference if we're newly entering the active list.
1312 * The order in which we add operations to the retirement queue is 1671 * The order in which we add operations to the retirement queue is
1313 * vital here: mark_active adds to the start of the callback list, 1672 * vital here: mark_active adds to the start of the callback list,
1314 * such that subsequent callbacks are called first. Therefore we 1673 * such that subsequent callbacks are called first. Therefore we
@@ -1336,44 +1695,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
1336 i915_gem_active_set(&vma->last_fence, req); 1695 i915_gem_active_set(&vma->last_fence, req);
1337} 1696}
1338 1697
1339static void eb_export_fence(struct drm_i915_gem_object *obj, 1698static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1340 struct drm_i915_gem_request *req,
1341 unsigned int flags)
1342{
1343 struct reservation_object *resv = obj->resv;
1344
1345 /* Ignore errors from failing to allocate the new fence, we can't
1346 * handle an error right now. Worst case should be missed
1347 * synchronisation leading to rendering corruption.
1348 */
1349 reservation_object_lock(resv, NULL);
1350 if (flags & EXEC_OBJECT_WRITE)
1351 reservation_object_add_excl_fence(resv, &req->fence);
1352 else if (reservation_object_reserve_shared(resv) == 0)
1353 reservation_object_add_shared_fence(resv, &req->fence);
1354 reservation_object_unlock(resv);
1355}
1356
1357static void
1358eb_move_to_active(struct i915_execbuffer *eb)
1359{
1360 struct i915_vma *vma;
1361
1362 list_for_each_entry(vma, &eb->vmas, exec_link) {
1363 struct drm_i915_gem_object *obj = vma->obj;
1364
1365 obj->base.write_domain = 0;
1366 if (vma->exec_entry->flags & EXEC_OBJECT_WRITE)
1367 obj->base.read_domains = 0;
1368 obj->base.read_domains |= I915_GEM_GPU_DOMAINS;
1369
1370 i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags);
1371 eb_export_fence(obj, eb->request, vma->exec_entry->flags);
1372 }
1373}
1374
1375static int
1376i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1377{ 1699{
1378 u32 *cs; 1700 u32 *cs;
1379 int i; 1701 int i;
@@ -1383,16 +1705,16 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1383 return -EINVAL; 1705 return -EINVAL;
1384 } 1706 }
1385 1707
1386 cs = intel_ring_begin(req, 4 * 3); 1708 cs = intel_ring_begin(req, 4 * 2 + 2);
1387 if (IS_ERR(cs)) 1709 if (IS_ERR(cs))
1388 return PTR_ERR(cs); 1710 return PTR_ERR(cs);
1389 1711
1712 *cs++ = MI_LOAD_REGISTER_IMM(4);
1390 for (i = 0; i < 4; i++) { 1713 for (i = 0; i < 4; i++) {
1391 *cs++ = MI_LOAD_REGISTER_IMM(1);
1392 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); 1714 *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1393 *cs++ = 0; 1715 *cs++ = 0;
1394 } 1716 }
1395 1717 *cs++ = MI_NOOP;
1396 intel_ring_advance(req, cs); 1718 intel_ring_advance(req, cs);
1397 1719
1398 return 0; 1720 return 0;
@@ -1402,24 +1724,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1402{ 1724{
1403 struct drm_i915_gem_object *shadow_batch_obj; 1725 struct drm_i915_gem_object *shadow_batch_obj;
1404 struct i915_vma *vma; 1726 struct i915_vma *vma;
1405 int ret; 1727 int err;
1406 1728
1407 shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, 1729 shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
1408 PAGE_ALIGN(eb->batch_len)); 1730 PAGE_ALIGN(eb->batch_len));
1409 if (IS_ERR(shadow_batch_obj)) 1731 if (IS_ERR(shadow_batch_obj))
1410 return ERR_CAST(shadow_batch_obj); 1732 return ERR_CAST(shadow_batch_obj);
1411 1733
1412 ret = intel_engine_cmd_parser(eb->engine, 1734 err = intel_engine_cmd_parser(eb->engine,
1413 eb->batch->obj, 1735 eb->batch->obj,
1414 shadow_batch_obj, 1736 shadow_batch_obj,
1415 eb->batch_start_offset, 1737 eb->batch_start_offset,
1416 eb->batch_len, 1738 eb->batch_len,
1417 is_master); 1739 is_master);
1418 if (ret) { 1740 if (err) {
1419 if (ret == -EACCES) /* unhandled chained batch */ 1741 if (err == -EACCES) /* unhandled chained batch */
1420 vma = NULL; 1742 vma = NULL;
1421 else 1743 else
1422 vma = ERR_PTR(ret); 1744 vma = ERR_PTR(err);
1423 goto out; 1745 goto out;
1424 } 1746 }
1425 1747
@@ -1428,10 +1750,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1428 goto out; 1750 goto out;
1429 1751
1430 vma->exec_entry = 1752 vma->exec_entry =
1431 memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); 1753 memset(&eb->exec[eb->buffer_count++],
1754 0, sizeof(*vma->exec_entry));
1432 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; 1755 vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1433 i915_gem_object_get(shadow_batch_obj); 1756 __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma);
1434 list_add_tail(&vma->exec_link, &eb->vmas);
1435 1757
1436out: 1758out:
1437 i915_gem_object_unpin_pages(shadow_batch_obj); 1759 i915_gem_object_unpin_pages(shadow_batch_obj);
@@ -1439,41 +1761,37 @@ out:
1439} 1761}
1440 1762
1441static void 1763static void
1442add_to_client(struct drm_i915_gem_request *req, 1764add_to_client(struct drm_i915_gem_request *req, struct drm_file *file)
1443 struct drm_file *file)
1444{ 1765{
1445 req->file_priv = file->driver_priv; 1766 req->file_priv = file->driver_priv;
1446 list_add_tail(&req->client_link, &req->file_priv->mm.request_list); 1767 list_add_tail(&req->client_link, &req->file_priv->mm.request_list);
1447} 1768}
1448 1769
1449static int 1770static int eb_submit(struct i915_execbuffer *eb)
1450execbuf_submit(struct i915_execbuffer *eb)
1451{ 1771{
1452 int ret; 1772 int err;
1453 1773
1454 ret = eb_move_to_gpu(eb); 1774 err = eb_move_to_gpu(eb);
1455 if (ret) 1775 if (err)
1456 return ret; 1776 return err;
1457 1777
1458 ret = i915_switch_context(eb->request); 1778 err = i915_switch_context(eb->request);
1459 if (ret) 1779 if (err)
1460 return ret; 1780 return err;
1461 1781
1462 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { 1782 if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
1463 ret = i915_reset_gen7_sol_offsets(eb->request); 1783 err = i915_reset_gen7_sol_offsets(eb->request);
1464 if (ret) 1784 if (err)
1465 return ret; 1785 return err;
1466 } 1786 }
1467 1787
1468 ret = eb->engine->emit_bb_start(eb->request, 1788 err = eb->engine->emit_bb_start(eb->request,
1469 eb->batch->node.start + 1789 eb->batch->node.start +
1470 eb->batch_start_offset, 1790 eb->batch_start_offset,
1471 eb->batch_len, 1791 eb->batch_len,
1472 eb->dispatch_flags); 1792 eb->batch_flags);
1473 if (ret) 1793 if (err)
1474 return ret; 1794 return err;
1475
1476 eb_move_to_active(eb);
1477 1795
1478 return 0; 1796 return 0;
1479} 1797}
@@ -1564,34 +1882,36 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1564 struct dma_fence *in_fence = NULL; 1882 struct dma_fence *in_fence = NULL;
1565 struct sync_file *out_fence = NULL; 1883 struct sync_file *out_fence = NULL;
1566 int out_fence_fd = -1; 1884 int out_fence_fd = -1;
1567 int ret; 1885 int err;
1568 1886
1569 if (!i915_gem_check_execbuffer(args)) 1887 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
1570 return -EINVAL; 1888 ~__EXEC_OBJECT_UNKNOWN_FLAGS);
1571
1572 ret = validate_exec_list(dev, exec, args->buffer_count);
1573 if (ret)
1574 return ret;
1575 1889
1576 eb.i915 = to_i915(dev); 1890 eb.i915 = to_i915(dev);
1577 eb.file = file; 1891 eb.file = file;
1578 eb.args = args; 1892 eb.args = args;
1893 if (!(args->flags & I915_EXEC_NO_RELOC))
1894 args->flags |= __EXEC_HAS_RELOC;
1579 eb.exec = exec; 1895 eb.exec = exec;
1580 eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; 1896 eb.ctx = NULL;
1897 eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1898 if (USES_FULL_PPGTT(eb.i915))
1899 eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1581 reloc_cache_init(&eb.reloc_cache, eb.i915); 1900 reloc_cache_init(&eb.reloc_cache, eb.i915);
1582 1901
1902 eb.buffer_count = args->buffer_count;
1583 eb.batch_start_offset = args->batch_start_offset; 1903 eb.batch_start_offset = args->batch_start_offset;
1584 eb.batch_len = args->batch_len; 1904 eb.batch_len = args->batch_len;
1585 1905
1586 eb.dispatch_flags = 0; 1906 eb.batch_flags = 0;
1587 if (args->flags & I915_EXEC_SECURE) { 1907 if (args->flags & I915_EXEC_SECURE) {
1588 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 1908 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1589 return -EPERM; 1909 return -EPERM;
1590 1910
1591 eb.dispatch_flags |= I915_DISPATCH_SECURE; 1911 eb.batch_flags |= I915_DISPATCH_SECURE;
1592 } 1912 }
1593 if (args->flags & I915_EXEC_IS_PINNED) 1913 if (args->flags & I915_EXEC_IS_PINNED)
1594 eb.dispatch_flags |= I915_DISPATCH_PINNED; 1914 eb.batch_flags |= I915_DISPATCH_PINNED;
1595 1915
1596 eb.engine = eb_select_engine(eb.i915, file, args); 1916 eb.engine = eb_select_engine(eb.i915, file, args);
1597 if (!eb.engine) 1917 if (!eb.engine)
@@ -1608,7 +1928,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1608 return -EINVAL; 1928 return -EINVAL;
1609 } 1929 }
1610 1930
1611 eb.dispatch_flags |= I915_DISPATCH_RS; 1931 eb.batch_flags |= I915_DISPATCH_RS;
1612 } 1932 }
1613 1933
1614 if (args->flags & I915_EXEC_FENCE_IN) { 1934 if (args->flags & I915_EXEC_FENCE_IN) {
@@ -1620,71 +1940,53 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1620 if (args->flags & I915_EXEC_FENCE_OUT) { 1940 if (args->flags & I915_EXEC_FENCE_OUT) {
1621 out_fence_fd = get_unused_fd_flags(O_CLOEXEC); 1941 out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
1622 if (out_fence_fd < 0) { 1942 if (out_fence_fd < 0) {
1623 ret = out_fence_fd; 1943 err = out_fence_fd;
1624 goto err_in_fence; 1944 goto err_in_fence;
1625 } 1945 }
1626 } 1946 }
1627 1947
1628 /* Take a local wakeref for preparing to dispatch the execbuf as 1948 if (eb_create(&eb))
1949 return -ENOMEM;
1950
1951 /*
1952 * Take a local wakeref for preparing to dispatch the execbuf as
1629 * we expect to access the hardware fairly frequently in the 1953 * we expect to access the hardware fairly frequently in the
1630 * process. Upon first dispatch, we acquire another prolonged 1954 * process. Upon first dispatch, we acquire another prolonged
1631 * wakeref that we hold until the GPU has been idle for at least 1955 * wakeref that we hold until the GPU has been idle for at least
1632 * 100ms. 1956 * 100ms.
1633 */ 1957 */
1634 intel_runtime_pm_get(eb.i915); 1958 intel_runtime_pm_get(eb.i915);
1959 err = i915_mutex_lock_interruptible(dev);
1960 if (err)
1961 goto err_rpm;
1635 1962
1636 ret = i915_mutex_lock_interruptible(dev); 1963 err = eb_select_context(&eb);
1637 if (ret) 1964 if (unlikely(err))
1638 goto pre_mutex_err; 1965 goto err_unlock;
1639
1640 ret = eb_select_context(&eb);
1641 if (ret) {
1642 mutex_unlock(&dev->struct_mutex);
1643 goto pre_mutex_err;
1644 }
1645
1646 if (eb_create(&eb)) {
1647 i915_gem_context_put(eb.ctx);
1648 mutex_unlock(&dev->struct_mutex);
1649 ret = -ENOMEM;
1650 goto pre_mutex_err;
1651 }
1652
1653 /* Look up object handles */
1654 ret = eb_lookup_vmas(&eb);
1655 if (ret)
1656 goto err;
1657
1658 /* take note of the batch buffer before we might reorder the lists */
1659 eb.batch = eb_get_batch(&eb);
1660
1661 /* Move the objects en-masse into the GTT, evicting if necessary. */
1662 ret = eb_reserve(&eb);
1663 if (ret)
1664 goto err;
1665 1966
1666 /* The objects are in their final locations, apply the relocations. */ 1967 err = eb_relocate(&eb);
1667 if (eb.need_relocs) 1968 if (err)
1668 ret = eb_relocate(&eb); 1969 /*
1669 if (ret) { 1970 * If the user expects the execobject.offset and
1670 if (ret == -EFAULT) { 1971 * reloc.presumed_offset to be an exact match,
1671 ret = eb_relocate_slow(&eb); 1972 * as for using NO_RELOC, then we cannot update
1672 BUG_ON(!mutex_is_locked(&dev->struct_mutex)); 1973 * the execobject.offset until we have completed
1673 } 1974 * relocation.
1674 if (ret) 1975 */
1675 goto err; 1976 args->flags &= ~__EXEC_HAS_RELOC;
1676 } 1977 if (err < 0)
1978 goto err_vma;
1677 1979
1678 if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) { 1980 if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) {
1679 DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); 1981 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1680 ret = -EINVAL; 1982 err = -EINVAL;
1681 goto err; 1983 goto err_vma;
1682 } 1984 }
1683 if (eb.batch_start_offset > eb.batch->size || 1985 if (eb.batch_start_offset > eb.batch->size ||
1684 eb.batch_len > eb.batch->size - eb.batch_start_offset) { 1986 eb.batch_len > eb.batch->size - eb.batch_start_offset) {
1685 DRM_DEBUG("Attempting to use out-of-bounds batch\n"); 1987 DRM_DEBUG("Attempting to use out-of-bounds batch\n");
1686 ret = -EINVAL; 1988 err = -EINVAL;
1687 goto err; 1989 goto err_vma;
1688 } 1990 }
1689 1991
1690 if (eb.engine->needs_cmd_parser && eb.batch_len) { 1992 if (eb.engine->needs_cmd_parser && eb.batch_len) {
@@ -1692,8 +1994,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1692 1994
1693 vma = eb_parse(&eb, drm_is_current_master(file)); 1995 vma = eb_parse(&eb, drm_is_current_master(file));
1694 if (IS_ERR(vma)) { 1996 if (IS_ERR(vma)) {
1695 ret = PTR_ERR(vma); 1997 err = PTR_ERR(vma);
1696 goto err; 1998 goto err_vma;
1697 } 1999 }
1698 2000
1699 if (vma) { 2001 if (vma) {
@@ -1706,7 +2008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1706 * specifically don't want that set on batches the 2008 * specifically don't want that set on batches the
1707 * command parser has accepted. 2009 * command parser has accepted.
1708 */ 2010 */
1709 eb.dispatch_flags |= I915_DISPATCH_SECURE; 2011 eb.batch_flags |= I915_DISPATCH_SECURE;
1710 eb.batch_start_offset = 0; 2012 eb.batch_start_offset = 0;
1711 eb.batch = vma; 2013 eb.batch = vma;
1712 } 2014 }
@@ -1715,11 +2017,11 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1715 if (eb.batch_len == 0) 2017 if (eb.batch_len == 0)
1716 eb.batch_len = eb.batch->size - eb.batch_start_offset; 2018 eb.batch_len = eb.batch->size - eb.batch_start_offset;
1717 2019
1718 /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure 2020 /*
2021 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1719 * batch" bit. Hence we need to pin secure batches into the global gtt. 2022 * batch" bit. Hence we need to pin secure batches into the global gtt.
1720 * hsw should have this fixed, but bdw mucks it up again. */ 2023 * hsw should have this fixed, but bdw mucks it up again. */
1721 if (eb.dispatch_flags & I915_DISPATCH_SECURE) { 2024 if (eb.batch_flags & I915_DISPATCH_SECURE) {
1722 struct drm_i915_gem_object *obj = eb.batch->obj;
1723 struct i915_vma *vma; 2025 struct i915_vma *vma;
1724 2026
1725 /* 2027 /*
@@ -1732,10 +2034,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1732 * fitting due to fragmentation. 2034 * fitting due to fragmentation.
1733 * So this is actually safe. 2035 * So this is actually safe.
1734 */ 2036 */
1735 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 2037 vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
1736 if (IS_ERR(vma)) { 2038 if (IS_ERR(vma)) {
1737 ret = PTR_ERR(vma); 2039 err = PTR_ERR(vma);
1738 goto err; 2040 goto err_vma;
1739 } 2041 }
1740 2042
1741 eb.batch = vma; 2043 eb.batch = vma;
@@ -1744,25 +2046,26 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1744 /* Allocate a request for this batch buffer nice and early. */ 2046 /* Allocate a request for this batch buffer nice and early. */
1745 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); 2047 eb.request = i915_gem_request_alloc(eb.engine, eb.ctx);
1746 if (IS_ERR(eb.request)) { 2048 if (IS_ERR(eb.request)) {
1747 ret = PTR_ERR(eb.request); 2049 err = PTR_ERR(eb.request);
1748 goto err_batch_unpin; 2050 goto err_batch_unpin;
1749 } 2051 }
1750 2052
1751 if (in_fence) { 2053 if (in_fence) {
1752 ret = i915_gem_request_await_dma_fence(eb.request, in_fence); 2054 err = i915_gem_request_await_dma_fence(eb.request, in_fence);
1753 if (ret < 0) 2055 if (err < 0)
1754 goto err_request; 2056 goto err_request;
1755 } 2057 }
1756 2058
1757 if (out_fence_fd != -1) { 2059 if (out_fence_fd != -1) {
1758 out_fence = sync_file_create(&eb.request->fence); 2060 out_fence = sync_file_create(&eb.request->fence);
1759 if (!out_fence) { 2061 if (!out_fence) {
1760 ret = -ENOMEM; 2062 err = -ENOMEM;
1761 goto err_request; 2063 goto err_request;
1762 } 2064 }
1763 } 2065 }
1764 2066
1765 /* Whilst this request exists, batch_obj will be on the 2067 /*
2068 * Whilst this request exists, batch_obj will be on the
1766 * active_list, and so will hold the active reference. Only when this 2069 * active_list, and so will hold the active reference. Only when this
1767 * request is retired will the the batch_obj be moved onto the 2070 * request is retired will the the batch_obj be moved onto the
1768 * inactive_list and lose its active reference. Hence we do not need 2071 * inactive_list and lose its active reference. Hence we do not need
@@ -1770,14 +2073,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
1770 */ 2073 */
1771 eb.request->batch = eb.batch; 2074 eb.request->batch = eb.batch;
1772 2075
1773 trace_i915_gem_request_queue(eb.request, eb.dispatch_flags); 2076 trace_i915_gem_request_queue(eb.request, eb.batch_flags);
1774 ret = execbuf_submit(&eb); 2077 err = eb_submit(&eb);
1775err_request: 2078err_request:
1776 __i915_add_request(eb.request, ret == 0); 2079 __i915_add_request(eb.request, err == 0);
1777 add_to_client(eb.request, file); 2080 add_to_client(eb.request, file);
1778 2081
1779 if (out_fence) { 2082 if (out_fence) {
1780 if (ret == 0) { 2083 if (err == 0) {
1781 fd_install(out_fence_fd, out_fence->file); 2084 fd_install(out_fence_fd, out_fence->file);
1782 args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ 2085 args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
1783 args->rsvd2 |= (u64)out_fence_fd << 32; 2086 args->rsvd2 |= (u64)out_fence_fd << 32;
@@ -1788,28 +2091,22 @@ err_request:
1788 } 2091 }
1789 2092
1790err_batch_unpin: 2093err_batch_unpin:
1791 /* 2094 if (eb.batch_flags & I915_DISPATCH_SECURE)
1792 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1793 * batch vma for correctness. For less ugly and less fragility this
1794 * needs to be adjusted to also track the ggtt batch vma properly as
1795 * active.
1796 */
1797 if (eb.dispatch_flags & I915_DISPATCH_SECURE)
1798 i915_vma_unpin(eb.batch); 2095 i915_vma_unpin(eb.batch);
1799err: 2096err_vma:
1800 /* the request owns the ref now */ 2097 if (eb.exec)
1801 eb_destroy(&eb); 2098 eb_release_vmas(&eb);
2099 i915_gem_context_put(eb.ctx);
2100err_unlock:
1802 mutex_unlock(&dev->struct_mutex); 2101 mutex_unlock(&dev->struct_mutex);
1803 2102err_rpm:
1804pre_mutex_err:
1805 /* intel_gpu_busy should also get a ref, so it will free when the device
1806 * is really idle. */
1807 intel_runtime_pm_put(eb.i915); 2103 intel_runtime_pm_put(eb.i915);
2104 eb_destroy(&eb);
1808 if (out_fence_fd != -1) 2105 if (out_fence_fd != -1)
1809 put_unused_fd(out_fence_fd); 2106 put_unused_fd(out_fence_fd);
1810err_in_fence: 2107err_in_fence:
1811 dma_fence_put(in_fence); 2108 dma_fence_put(in_fence);
1812 return ret; 2109 return err;
1813} 2110}
1814 2111
1815/* 2112/*
@@ -1820,20 +2117,38 @@ int
1820i915_gem_execbuffer(struct drm_device *dev, void *data, 2117i915_gem_execbuffer(struct drm_device *dev, void *data,
1821 struct drm_file *file) 2118 struct drm_file *file)
1822{ 2119{
2120 const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
1823 struct drm_i915_gem_execbuffer *args = data; 2121 struct drm_i915_gem_execbuffer *args = data;
1824 struct drm_i915_gem_execbuffer2 exec2; 2122 struct drm_i915_gem_execbuffer2 exec2;
1825 struct drm_i915_gem_exec_object *exec_list = NULL; 2123 struct drm_i915_gem_exec_object *exec_list = NULL;
1826 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 2124 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1827 int ret, i; 2125 unsigned int i;
2126 int err;
1828 2127
1829 if (args->buffer_count < 1) { 2128 if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
1830 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); 2129 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1831 return -EINVAL; 2130 return -EINVAL;
1832 } 2131 }
1833 2132
2133 exec2.buffers_ptr = args->buffers_ptr;
2134 exec2.buffer_count = args->buffer_count;
2135 exec2.batch_start_offset = args->batch_start_offset;
2136 exec2.batch_len = args->batch_len;
2137 exec2.DR1 = args->DR1;
2138 exec2.DR4 = args->DR4;
2139 exec2.num_cliprects = args->num_cliprects;
2140 exec2.cliprects_ptr = args->cliprects_ptr;
2141 exec2.flags = I915_EXEC_RENDER;
2142 i915_execbuffer2_set_context_id(exec2, 0);
2143
2144 if (!i915_gem_check_execbuffer(&exec2))
2145 return -EINVAL;
2146
1834 /* Copy in the exec list from userland */ 2147 /* Copy in the exec list from userland */
1835 exec_list = kvmalloc_array(sizeof(*exec_list), args->buffer_count, GFP_KERNEL); 2148 exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list),
1836 exec2_list = kvmalloc_array(sizeof(*exec2_list), args->buffer_count, GFP_KERNEL); 2149 __GFP_NOWARN | GFP_TEMPORARY);
2150 exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2151 __GFP_NOWARN | GFP_TEMPORARY);
1837 if (exec_list == NULL || exec2_list == NULL) { 2152 if (exec_list == NULL || exec2_list == NULL) {
1838 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 2153 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1839 args->buffer_count); 2154 args->buffer_count);
@@ -1841,12 +2156,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1841 kvfree(exec2_list); 2156 kvfree(exec2_list);
1842 return -ENOMEM; 2157 return -ENOMEM;
1843 } 2158 }
1844 ret = copy_from_user(exec_list, 2159 err = copy_from_user(exec_list,
1845 u64_to_user_ptr(args->buffers_ptr), 2160 u64_to_user_ptr(args->buffers_ptr),
1846 sizeof(*exec_list) * args->buffer_count); 2161 sizeof(*exec_list) * args->buffer_count);
1847 if (ret != 0) { 2162 if (err) {
1848 DRM_DEBUG("copy %d exec entries failed %d\n", 2163 DRM_DEBUG("copy %d exec entries failed %d\n",
1849 args->buffer_count, ret); 2164 args->buffer_count, err);
1850 kvfree(exec_list); 2165 kvfree(exec_list);
1851 kvfree(exec2_list); 2166 kvfree(exec2_list);
1852 return -EFAULT; 2167 return -EFAULT;
@@ -1864,99 +2179,94 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
1864 exec2_list[i].flags = 0; 2179 exec2_list[i].flags = 0;
1865 } 2180 }
1866 2181
1867 exec2.buffers_ptr = args->buffers_ptr; 2182 err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
1868 exec2.buffer_count = args->buffer_count; 2183 if (exec2.flags & __EXEC_HAS_RELOC) {
1869 exec2.batch_start_offset = args->batch_start_offset;
1870 exec2.batch_len = args->batch_len;
1871 exec2.DR1 = args->DR1;
1872 exec2.DR4 = args->DR4;
1873 exec2.num_cliprects = args->num_cliprects;
1874 exec2.cliprects_ptr = args->cliprects_ptr;
1875 exec2.flags = I915_EXEC_RENDER;
1876 i915_execbuffer2_set_context_id(exec2, 0);
1877
1878 ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
1879 if (!ret) {
1880 struct drm_i915_gem_exec_object __user *user_exec_list = 2184 struct drm_i915_gem_exec_object __user *user_exec_list =
1881 u64_to_user_ptr(args->buffers_ptr); 2185 u64_to_user_ptr(args->buffers_ptr);
1882 2186
1883 /* Copy the new buffer offsets back to the user's exec list. */ 2187 /* Copy the new buffer offsets back to the user's exec list. */
1884 for (i = 0; i < args->buffer_count; i++) { 2188 for (i = 0; i < args->buffer_count; i++) {
2189 if (!(exec2_list[i].offset & UPDATE))
2190 continue;
2191
1885 exec2_list[i].offset = 2192 exec2_list[i].offset =
1886 gen8_canonical_addr(exec2_list[i].offset); 2193 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
1887 ret = __copy_to_user(&user_exec_list[i].offset, 2194 exec2_list[i].offset &= PIN_OFFSET_MASK;
1888 &exec2_list[i].offset, 2195 if (__copy_to_user(&user_exec_list[i].offset,
1889 sizeof(user_exec_list[i].offset)); 2196 &exec2_list[i].offset,
1890 if (ret) { 2197 sizeof(user_exec_list[i].offset)))
1891 ret = -EFAULT;
1892 DRM_DEBUG("failed to copy %d exec entries "
1893 "back to user (%d)\n",
1894 args->buffer_count, ret);
1895 break; 2198 break;
1896 }
1897 } 2199 }
1898 } 2200 }
1899 2201
1900 kvfree(exec_list); 2202 kvfree(exec_list);
1901 kvfree(exec2_list); 2203 kvfree(exec2_list);
1902 return ret; 2204 return err;
1903} 2205}
1904 2206
1905int 2207int
1906i915_gem_execbuffer2(struct drm_device *dev, void *data, 2208i915_gem_execbuffer2(struct drm_device *dev, void *data,
1907 struct drm_file *file) 2209 struct drm_file *file)
1908{ 2210{
2211 const size_t sz = sizeof(struct drm_i915_gem_exec_object2);
1909 struct drm_i915_gem_execbuffer2 *args = data; 2212 struct drm_i915_gem_execbuffer2 *args = data;
1910 struct drm_i915_gem_exec_object2 *exec2_list = NULL; 2213 struct drm_i915_gem_exec_object2 *exec2_list;
1911 int ret; 2214 int err;
1912 2215
1913 if (args->buffer_count < 1 || 2216 if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) {
1914 args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1915 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); 2217 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1916 return -EINVAL; 2218 return -EINVAL;
1917 } 2219 }
1918 2220
1919 exec2_list = kvmalloc_array(args->buffer_count, 2221 if (!i915_gem_check_execbuffer(args))
1920 sizeof(*exec2_list), 2222 return -EINVAL;
1921 GFP_TEMPORARY); 2223
2224 /* Allocate an extra slot for use by the command parser */
2225 exec2_list = kvmalloc_array(args->buffer_count + 1, sz,
2226 __GFP_NOWARN | GFP_TEMPORARY);
1922 if (exec2_list == NULL) { 2227 if (exec2_list == NULL) {
1923 DRM_DEBUG("Failed to allocate exec list for %d buffers\n", 2228 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1924 args->buffer_count); 2229 args->buffer_count);
1925 return -ENOMEM; 2230 return -ENOMEM;
1926 } 2231 }
1927 ret = copy_from_user(exec2_list, 2232 if (copy_from_user(exec2_list,
1928 u64_to_user_ptr(args->buffers_ptr), 2233 u64_to_user_ptr(args->buffers_ptr),
1929 sizeof(*exec2_list) * args->buffer_count); 2234 sizeof(*exec2_list) * args->buffer_count)) {
1930 if (ret != 0) { 2235 DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count);
1931 DRM_DEBUG("copy %d exec entries failed %d\n",
1932 args->buffer_count, ret);
1933 kvfree(exec2_list); 2236 kvfree(exec2_list);
1934 return -EFAULT; 2237 return -EFAULT;
1935 } 2238 }
1936 2239
1937 ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); 2240 err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
1938 if (!ret) { 2241
1939 /* Copy the new buffer offsets back to the user's exec list. */ 2242 /*
2243 * Now that we have begun execution of the batchbuffer, we ignore
2244 * any new error after this point. Also given that we have already
2245 * updated the associated relocations, we try to write out the current
2246 * object locations irrespective of any error.
2247 */
2248 if (args->flags & __EXEC_HAS_RELOC) {
1940 struct drm_i915_gem_exec_object2 __user *user_exec_list = 2249 struct drm_i915_gem_exec_object2 __user *user_exec_list =
1941 u64_to_user_ptr(args->buffers_ptr); 2250 u64_to_user_ptr(args->buffers_ptr);
1942 int i; 2251 unsigned int i;
1943 2252
2253 /* Copy the new buffer offsets back to the user's exec list. */
2254 user_access_begin();
1944 for (i = 0; i < args->buffer_count; i++) { 2255 for (i = 0; i < args->buffer_count; i++) {
2256 if (!(exec2_list[i].offset & UPDATE))
2257 continue;
2258
1945 exec2_list[i].offset = 2259 exec2_list[i].offset =
1946 gen8_canonical_addr(exec2_list[i].offset); 2260 gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
1947 ret = __copy_to_user(&user_exec_list[i].offset, 2261 unsafe_put_user(exec2_list[i].offset,
1948 &exec2_list[i].offset, 2262 &user_exec_list[i].offset,
1949 sizeof(user_exec_list[i].offset)); 2263 end_user);
1950 if (ret) {
1951 ret = -EFAULT;
1952 DRM_DEBUG("failed to copy %d exec entries "
1953 "back to user\n",
1954 args->buffer_count);
1955 break;
1956 }
1957 } 2264 }
2265end_user:
2266 user_access_end();
1958 } 2267 }
1959 2268
2269 args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
1960 kvfree(exec2_list); 2270 kvfree(exec2_list);
1961 return ret; 2271 return err;
1962} 2272}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ce68194ebff6..9e6a47323362 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -463,7 +463,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
463 size, obj->base.size, 463 size, obj->base.size,
464 flags & PIN_MAPPABLE ? "mappable" : "total", 464 flags & PIN_MAPPABLE ? "mappable" : "total",
465 end); 465 end);
466 return -E2BIG; 466 return -ENOSPC;
467 } 467 }
468 468
469 ret = i915_gem_object_pin_pages(obj); 469 ret = i915_gem_object_pin_pages(obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index ea98e6e4262f..04d7a5da70fd 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -103,6 +103,7 @@ struct i915_vma {
103 103
104 /** This vma's place in the execbuf reservation list */ 104 /** This vma's place in the execbuf reservation list */
105 struct list_head exec_link; 105 struct list_head exec_link;
106 struct list_head reloc_link;
106 107
107 /** This vma's place in the eviction list */ 108 /** This vma's place in the eviction list */
108 struct list_head evict_link; 109 struct list_head evict_link;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 14e9c2fbc4e6..5ea373221f49 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -304,7 +304,7 @@ static int igt_evict_vm(void *arg)
304 goto cleanup; 304 goto cleanup;
305 305
306 /* Everything is pinned, nothing should happen */ 306 /* Everything is pinned, nothing should happen */
307 err = i915_gem_evict_vm(&ggtt->base, false); 307 err = i915_gem_evict_vm(&ggtt->base);
308 if (err) { 308 if (err) {
309 pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", 309 pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
310 err); 310 err);
@@ -313,7 +313,7 @@ static int igt_evict_vm(void *arg)
313 313
314 unpin_ggtt(i915); 314 unpin_ggtt(i915);
315 315
316 err = i915_gem_evict_vm(&ggtt->base, false); 316 err = i915_gem_evict_vm(&ggtt->base);
317 if (err) { 317 if (err) {
318 pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", 318 pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
319 err); 319 err);
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ad56566e24db..fb9072d5877f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -225,14 +225,6 @@ static bool assert_pin_valid(const struct i915_vma *vma,
225} 225}
226 226
227__maybe_unused 227__maybe_unused
228static bool assert_pin_e2big(const struct i915_vma *vma,
229 const struct pin_mode *mode,
230 int result)
231{
232 return result == -E2BIG;
233}
234
235__maybe_unused
236static bool assert_pin_enospc(const struct i915_vma *vma, 228static bool assert_pin_enospc(const struct i915_vma *vma,
237 const struct pin_mode *mode, 229 const struct pin_mode *mode,
238 int result) 230 int result)
@@ -255,7 +247,6 @@ static int igt_vma_pin1(void *arg)
255#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } 247#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " }
256#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } 248#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" }
257#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) 249#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL)
258#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG)
259#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) 250#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC)
260 VALID(0, PIN_GLOBAL), 251 VALID(0, PIN_GLOBAL),
261 VALID(0, PIN_GLOBAL | PIN_MAPPABLE), 252 VALID(0, PIN_GLOBAL | PIN_MAPPABLE),
@@ -276,11 +267,11 @@ static int igt_vma_pin1(void *arg)
276 VALID(8192, PIN_GLOBAL), 267 VALID(8192, PIN_GLOBAL),
277 VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), 268 VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE),
278 VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), 269 VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE),
279 TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), 270 NOSPACE(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE),
280 VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), 271 VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL),
281 VALID(i915->ggtt.base.total, PIN_GLOBAL), 272 VALID(i915->ggtt.base.total, PIN_GLOBAL),
282 TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), 273 NOSPACE(i915->ggtt.base.total + 4096, PIN_GLOBAL),
283 TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), 274 NOSPACE(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL),
284 INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), 275 INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)),
285 INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), 276 INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)),
286 INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), 277 INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)),
@@ -300,7 +291,6 @@ static int igt_vma_pin1(void *arg)
300#endif 291#endif
301 { }, 292 { },
302#undef NOSPACE 293#undef NOSPACE
303#undef TOOBIG
304#undef INVALID 294#undef INVALID
305#undef __INVALID 295#undef __INVALID
306#undef VALID 296#undef VALID