diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2038 |
1 files changed, 1174 insertions, 864 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9c3f6c40270f..a052072fe8b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -42,41 +42,195 @@ | |||
42 | 42 | ||
43 | #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ | 43 | #define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ |
44 | 44 | ||
45 | #define __EXEC_OBJECT_HAS_PIN (1<<31) | 45 | #define __EXEC_OBJECT_HAS_PIN BIT(31) |
46 | #define __EXEC_OBJECT_HAS_FENCE (1<<30) | 46 | #define __EXEC_OBJECT_HAS_FENCE BIT(30) |
47 | #define __EXEC_OBJECT_NEEDS_MAP (1<<29) | 47 | #define __EXEC_OBJECT_NEEDS_MAP BIT(29) |
48 | #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) | 48 | #define __EXEC_OBJECT_NEEDS_BIAS BIT(28) |
49 | #define __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */ | 49 | #define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */ |
50 | #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) | ||
51 | |||
52 | #define __EXEC_HAS_RELOC BIT(31) | ||
53 | #define __EXEC_VALIDATED BIT(30) | ||
54 | #define UPDATE PIN_OFFSET_FIXED | ||
50 | 55 | ||
51 | #define BATCH_OFFSET_BIAS (256*1024) | 56 | #define BATCH_OFFSET_BIAS (256*1024) |
52 | 57 | ||
53 | #define __I915_EXEC_ILLEGAL_FLAGS \ | 58 | #define __I915_EXEC_ILLEGAL_FLAGS \ |
54 | (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) | 59 | (__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK) |
55 | 60 | ||
61 | /** | ||
62 | * DOC: User command execution | ||
63 | * | ||
64 | * Userspace submits commands to be executed on the GPU as an instruction | ||
65 | * stream within a GEM object we call a batchbuffer. This instructions may | ||
66 | * refer to other GEM objects containing auxiliary state such as kernels, | ||
67 | * samplers, render targets and even secondary batchbuffers. Userspace does | ||
68 | * not know where in the GPU memory these objects reside and so before the | ||
69 | * batchbuffer is passed to the GPU for execution, those addresses in the | ||
70 | * batchbuffer and auxiliary objects are updated. This is known as relocation, | ||
71 | * or patching. To try and avoid having to relocate each object on the next | ||
72 | * execution, userspace is told the location of those objects in this pass, | ||
73 | * but this remains just a hint as the kernel may choose a new location for | ||
74 | * any object in the future. | ||
75 | * | ||
76 | * Processing an execbuf ioctl is conceptually split up into a few phases. | ||
77 | * | ||
78 | * 1. Validation - Ensure all the pointers, handles and flags are valid. | ||
79 | * 2. Reservation - Assign GPU address space for every object | ||
80 | * 3. Relocation - Update any addresses to point to the final locations | ||
81 | * 4. Serialisation - Order the request with respect to its dependencies | ||
82 | * 5. Construction - Construct a request to execute the batchbuffer | ||
83 | * 6. Submission (at some point in the future execution) | ||
84 | * | ||
85 | * Reserving resources for the execbuf is the most complicated phase. We | ||
86 | * neither want to have to migrate the object in the address space, nor do | ||
87 | * we want to have to update any relocations pointing to this object. Ideally, | ||
88 | * we want to leave the object where it is and for all the existing relocations | ||
89 | * to match. If the object is given a new address, or if userspace thinks the | ||
90 | * object is elsewhere, we have to parse all the relocation entries and update | ||
91 | * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that | ||
92 | * all the target addresses in all of its objects match the value in the | ||
93 | * relocation entries and that they all match the presumed offsets given by the | ||
94 | * list of execbuffer objects. Using this knowledge, we know that if we haven't | ||
95 | * moved any buffers, all the relocation entries are valid and we can skip | ||
96 | * the update. (If userspace is wrong, the likely outcome is an impromptu GPU | ||
97 | * hang.) The requirement for using I915_EXEC_NO_RELOC are: | ||
98 | * | ||
99 | * The addresses written in the objects must match the corresponding | ||
100 | * reloc.presumed_offset which in turn must match the corresponding | ||
101 | * execobject.offset. | ||
102 | * | ||
103 | * Any render targets written to in the batch must be flagged with | ||
104 | * EXEC_OBJECT_WRITE. | ||
105 | * | ||
106 | * To avoid stalling, execobject.offset should match the current | ||
107 | * address of that object within the active context. | ||
108 | * | ||
109 | * The reservation is done is multiple phases. First we try and keep any | ||
110 | * object already bound in its current location - so as long as meets the | ||
111 | * constraints imposed by the new execbuffer. Any object left unbound after the | ||
112 | * first pass is then fitted into any available idle space. If an object does | ||
113 | * not fit, all objects are removed from the reservation and the process rerun | ||
114 | * after sorting the objects into a priority order (more difficult to fit | ||
115 | * objects are tried first). Failing that, the entire VM is cleared and we try | ||
116 | * to fit the execbuf once last time before concluding that it simply will not | ||
117 | * fit. | ||
118 | * | ||
119 | * A small complication to all of this is that we allow userspace not only to | ||
120 | * specify an alignment and a size for the object in the address space, but | ||
121 | * we also allow userspace to specify the exact offset. This objects are | ||
122 | * simpler to place (the location is known a priori) all we have to do is make | ||
123 | * sure the space is available. | ||
124 | * | ||
125 | * Once all the objects are in place, patching up the buried pointers to point | ||
126 | * to the final locations is a fairly simple job of walking over the relocation | ||
127 | * entry arrays, looking up the right address and rewriting the value into | ||
128 | * the object. Simple! ... The relocation entries are stored in user memory | ||
129 | * and so to access them we have to copy them into a local buffer. That copy | ||
130 | * has to avoid taking any pagefaults as they may lead back to a GEM object | ||
131 | * requiring the struct_mutex (i.e. recursive deadlock). So once again we split | ||
132 | * the relocation into multiple passes. First we try to do everything within an | ||
133 | * atomic context (avoid the pagefaults) which requires that we never wait. If | ||
134 | * we detect that we may wait, or if we need to fault, then we have to fallback | ||
135 | * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm | ||
136 | * bells yet?) Dropping the mutex means that we lose all the state we have | ||
137 | * built up so far for the execbuf and we must reset any global data. However, | ||
138 | * we do leave the objects pinned in their final locations - which is a | ||
139 | * potential issue for concurrent execbufs. Once we have left the mutex, we can | ||
140 | * allocate and copy all the relocation entries into a large array at our | ||
141 | * leisure, reacquire the mutex, reclaim all the objects and other state and | ||
142 | * then proceed to update any incorrect addresses with the objects. | ||
143 | * | ||
144 | * As we process the relocation entries, we maintain a record of whether the | ||
145 | * object is being written to. Using NORELOC, we expect userspace to provide | ||
146 | * this information instead. We also check whether we can skip the relocation | ||
147 | * by comparing the expected value inside the relocation entry with the target's | ||
148 | * final address. If they differ, we have to map the current object and rewrite | ||
149 | * the 4 or 8 byte pointer within. | ||
150 | * | ||
151 | * Serialising an execbuf is quite simple according to the rules of the GEM | ||
152 | * ABI. Execution within each context is ordered by the order of submission. | ||
153 | * Writes to any GEM object are in order of submission and are exclusive. Reads | ||
154 | * from a GEM object are unordered with respect to other reads, but ordered by | ||
155 | * writes. A write submitted after a read cannot occur before the read, and | ||
156 | * similarly any read submitted after a write cannot occur before the write. | ||
157 | * Writes are ordered between engines such that only one write occurs at any | ||
158 | * time (completing any reads beforehand) - using semaphores where available | ||
159 | * and CPU serialisation otherwise. Other GEM access obey the same rules, any | ||
160 | * write (either via mmaps using set-domain, or via pwrite) must flush all GPU | ||
161 | * reads before starting, and any read (either using set-domain or pread) must | ||
162 | * flush all GPU writes before starting. (Note we only employ a barrier before, | ||
163 | * we currently rely on userspace not concurrently starting a new execution | ||
164 | * whilst reading or writing to an object. This may be an advantage or not | ||
165 | * depending on how much you trust userspace not to shoot themselves in the | ||
166 | * foot.) Serialisation may just result in the request being inserted into | ||
167 | * a DAG awaiting its turn, but most simple is to wait on the CPU until | ||
168 | * all dependencies are resolved. | ||
169 | * | ||
170 | * After all of that, is just a matter of closing the request and handing it to | ||
171 | * the hardware (well, leaving it in a queue to be executed). However, we also | ||
172 | * offer the ability for batchbuffers to be run with elevated privileges so | ||
173 | * that they access otherwise hidden registers. (Used to adjust L3 cache etc.) | ||
174 | * Before any batch is given extra privileges we first must check that it | ||
175 | * contains no nefarious instructions, we check that each instruction is from | ||
176 | * our whitelist and all registers are also from an allowed list. We first | ||
177 | * copy the user's batchbuffer to a shadow (so that the user doesn't have | ||
178 | * access to it, either by the CPU or GPU as we scan it) and then parse each | ||
179 | * instruction. If everything is ok, we set a flag telling the hardware to run | ||
180 | * the batchbuffer in trusted mode, otherwise the ioctl is rejected. | ||
181 | */ | ||
182 | |||
56 | struct i915_execbuffer { | 183 | struct i915_execbuffer { |
57 | struct drm_i915_private *i915; | 184 | struct drm_i915_private *i915; /** i915 backpointer */ |
58 | struct drm_file *file; | 185 | struct drm_file *file; /** per-file lookup tables and limits */ |
59 | struct drm_i915_gem_execbuffer2 *args; | 186 | struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */ |
60 | struct drm_i915_gem_exec_object2 *exec; | 187 | struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */ |
61 | struct intel_engine_cs *engine; | 188 | |
62 | struct i915_gem_context *ctx; | 189 | struct intel_engine_cs *engine; /** engine to queue the request to */ |
63 | struct i915_address_space *vm; | 190 | struct i915_gem_context *ctx; /** context for building the request */ |
64 | struct i915_vma *batch; | 191 | struct i915_address_space *vm; /** GTT and vma for the request */ |
65 | struct drm_i915_gem_request *request; | 192 | |
66 | u32 batch_start_offset; | 193 | struct drm_i915_gem_request *request; /** our request to build */ |
67 | u32 batch_len; | 194 | struct i915_vma *batch; /** identity of the batch obj/vma */ |
68 | unsigned int dispatch_flags; | 195 | |
69 | struct drm_i915_gem_exec_object2 shadow_exec_entry; | 196 | /** actual size of execobj[] as we may extend it for the cmdparser */ |
70 | bool need_relocs; | 197 | unsigned int buffer_count; |
71 | struct list_head vmas; | 198 | |
199 | /** list of vma not yet bound during reservation phase */ | ||
200 | struct list_head unbound; | ||
201 | |||
202 | /** list of vma that have execobj.relocation_count */ | ||
203 | struct list_head relocs; | ||
204 | |||
205 | /** | ||
206 | * Track the most recently used object for relocations, as we | ||
207 | * frequently have to perform multiple relocations within the same | ||
208 | * obj/page | ||
209 | */ | ||
72 | struct reloc_cache { | 210 | struct reloc_cache { |
73 | struct drm_mm_node node; | 211 | struct drm_mm_node node; /** temporary GTT binding */ |
74 | unsigned long vaddr; | 212 | unsigned long vaddr; /** Current kmap address */ |
75 | unsigned int page; | 213 | unsigned long page; /** Currently mapped page index */ |
76 | bool use_64bit_reloc : 1; | 214 | bool use_64bit_reloc : 1; |
215 | bool has_llc : 1; | ||
216 | bool has_fence : 1; | ||
217 | bool needs_unfenced : 1; | ||
77 | } reloc_cache; | 218 | } reloc_cache; |
78 | int lut_mask; | 219 | |
79 | struct hlist_head *buckets; | 220 | u64 invalid_flags; /** Set of execobj.flags that are invalid */ |
221 | u32 context_flags; /** Set of execobj.flags to insert from the ctx */ | ||
222 | |||
223 | u32 batch_start_offset; /** Location within object of batch */ | ||
224 | u32 batch_len; /** Length of batch within object */ | ||
225 | u32 batch_flags; /** Flags composed for emit_bb_start() */ | ||
226 | |||
227 | /** | ||
228 | * Indicate either the size of the hastable used to resolve | ||
229 | * relocation handles, or if negative that we are using a direct | ||
230 | * index into the execobj[]. | ||
231 | */ | ||
232 | int lut_size; | ||
233 | struct hlist_head *buckets; /** ht for relocation handles */ | ||
80 | }; | 234 | }; |
81 | 235 | ||
82 | /* | 236 | /* |
@@ -87,11 +241,41 @@ struct i915_execbuffer { | |||
87 | #define __exec_to_vma(ee) (ee)->rsvd2 | 241 | #define __exec_to_vma(ee) (ee)->rsvd2 |
88 | #define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) | 242 | #define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee)) |
89 | 243 | ||
244 | /* | ||
245 | * Used to convert any address to canonical form. | ||
246 | * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, | ||
247 | * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the | ||
248 | * addresses to be in a canonical form: | ||
249 | * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct | ||
250 | * canonical form [63:48] == [47]." | ||
251 | */ | ||
252 | #define GEN8_HIGH_ADDRESS_BIT 47 | ||
253 | static inline u64 gen8_canonical_addr(u64 address) | ||
254 | { | ||
255 | return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); | ||
256 | } | ||
257 | |||
258 | static inline u64 gen8_noncanonical_addr(u64 address) | ||
259 | { | ||
260 | return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); | ||
261 | } | ||
262 | |||
90 | static int eb_create(struct i915_execbuffer *eb) | 263 | static int eb_create(struct i915_execbuffer *eb) |
91 | { | 264 | { |
92 | if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) { | 265 | if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { |
93 | unsigned int size = 1 + ilog2(eb->args->buffer_count); | 266 | unsigned int size = 1 + ilog2(eb->buffer_count); |
94 | 267 | ||
268 | /* | ||
269 | * Without a 1:1 association between relocation handles and | ||
270 | * the execobject[] index, we instead create a hashtable. | ||
271 | * We size it dynamically based on available memory, starting | ||
272 | * first with 1:1 assocative hash and scaling back until | ||
273 | * the allocation succeeds. | ||
274 | * | ||
275 | * Later on we use a positive lut_size to indicate we are | ||
276 | * using this hashtable, and a negative value to indicate a | ||
277 | * direct lookup. | ||
278 | */ | ||
95 | do { | 279 | do { |
96 | eb->buckets = kzalloc(sizeof(struct hlist_head) << size, | 280 | eb->buckets = kzalloc(sizeof(struct hlist_head) << size, |
97 | GFP_TEMPORARY | | 281 | GFP_TEMPORARY | |
@@ -108,112 +292,411 @@ static int eb_create(struct i915_execbuffer *eb) | |||
108 | return -ENOMEM; | 292 | return -ENOMEM; |
109 | } | 293 | } |
110 | 294 | ||
111 | eb->lut_mask = size; | 295 | eb->lut_size = size; |
112 | } else { | 296 | } else { |
113 | eb->lut_mask = -eb->args->buffer_count; | 297 | eb->lut_size = -eb->buffer_count; |
114 | } | 298 | } |
115 | 299 | ||
116 | return 0; | 300 | return 0; |
117 | } | 301 | } |
118 | 302 | ||
303 | static bool | ||
304 | eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry, | ||
305 | const struct i915_vma *vma) | ||
306 | { | ||
307 | if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) | ||
308 | return true; | ||
309 | |||
310 | if (vma->node.size < entry->pad_to_size) | ||
311 | return true; | ||
312 | |||
313 | if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) | ||
314 | return true; | ||
315 | |||
316 | if (entry->flags & EXEC_OBJECT_PINNED && | ||
317 | vma->node.start != entry->offset) | ||
318 | return true; | ||
319 | |||
320 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && | ||
321 | vma->node.start < BATCH_OFFSET_BIAS) | ||
322 | return true; | ||
323 | |||
324 | if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) && | ||
325 | (vma->node.start + vma->node.size - 1) >> 32) | ||
326 | return true; | ||
327 | |||
328 | return false; | ||
329 | } | ||
330 | |||
331 | static inline void | ||
332 | eb_pin_vma(struct i915_execbuffer *eb, | ||
333 | struct drm_i915_gem_exec_object2 *entry, | ||
334 | struct i915_vma *vma) | ||
335 | { | ||
336 | u64 flags; | ||
337 | |||
338 | flags = vma->node.start; | ||
339 | flags |= PIN_USER | PIN_NONBLOCK | PIN_OFFSET_FIXED; | ||
340 | if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_GTT)) | ||
341 | flags |= PIN_GLOBAL; | ||
342 | if (unlikely(i915_vma_pin(vma, 0, 0, flags))) | ||
343 | return; | ||
344 | |||
345 | if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { | ||
346 | if (unlikely(i915_vma_get_fence(vma))) { | ||
347 | i915_vma_unpin(vma); | ||
348 | return; | ||
349 | } | ||
350 | |||
351 | if (i915_vma_pin_fence(vma)) | ||
352 | entry->flags |= __EXEC_OBJECT_HAS_FENCE; | ||
353 | } | ||
354 | |||
355 | entry->flags |= __EXEC_OBJECT_HAS_PIN; | ||
356 | } | ||
357 | |||
119 | static inline void | 358 | static inline void |
120 | __eb_unreserve_vma(struct i915_vma *vma, | 359 | __eb_unreserve_vma(struct i915_vma *vma, |
121 | const struct drm_i915_gem_exec_object2 *entry) | 360 | const struct drm_i915_gem_exec_object2 *entry) |
122 | { | 361 | { |
362 | GEM_BUG_ON(!(entry->flags & __EXEC_OBJECT_HAS_PIN)); | ||
363 | |||
123 | if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) | 364 | if (unlikely(entry->flags & __EXEC_OBJECT_HAS_FENCE)) |
124 | i915_vma_unpin_fence(vma); | 365 | i915_vma_unpin_fence(vma); |
125 | 366 | ||
126 | if (entry->flags & __EXEC_OBJECT_HAS_PIN) | 367 | __i915_vma_unpin(vma); |
127 | __i915_vma_unpin(vma); | ||
128 | } | 368 | } |
129 | 369 | ||
130 | static void | 370 | static inline void |
131 | eb_unreserve_vma(struct i915_vma *vma) | 371 | eb_unreserve_vma(struct i915_vma *vma, |
372 | struct drm_i915_gem_exec_object2 *entry) | ||
132 | { | 373 | { |
133 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 374 | if (!(entry->flags & __EXEC_OBJECT_HAS_PIN)) |
375 | return; | ||
134 | 376 | ||
135 | __eb_unreserve_vma(vma, entry); | 377 | __eb_unreserve_vma(vma, entry); |
136 | entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); | 378 | entry->flags &= ~__EXEC_OBJECT_RESERVED; |
137 | } | 379 | } |
138 | 380 | ||
139 | static void | 381 | static int |
140 | eb_reset(struct i915_execbuffer *eb) | 382 | eb_validate_vma(struct i915_execbuffer *eb, |
383 | struct drm_i915_gem_exec_object2 *entry, | ||
384 | struct i915_vma *vma) | ||
141 | { | 385 | { |
142 | struct i915_vma *vma; | 386 | if (unlikely(entry->flags & eb->invalid_flags)) |
387 | return -EINVAL; | ||
143 | 388 | ||
144 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 389 | if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) |
145 | eb_unreserve_vma(vma); | 390 | return -EINVAL; |
146 | i915_vma_put(vma); | 391 | |
147 | vma->exec_entry = NULL; | 392 | /* |
393 | * Offset can be used as input (EXEC_OBJECT_PINNED), reject | ||
394 | * any non-page-aligned or non-canonical addresses. | ||
395 | */ | ||
396 | if (unlikely(entry->flags & EXEC_OBJECT_PINNED && | ||
397 | entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK))) | ||
398 | return -EINVAL; | ||
399 | |||
400 | /* pad_to_size was once a reserved field, so sanitize it */ | ||
401 | if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) { | ||
402 | if (unlikely(offset_in_page(entry->pad_to_size))) | ||
403 | return -EINVAL; | ||
404 | } else { | ||
405 | entry->pad_to_size = 0; | ||
148 | } | 406 | } |
149 | 407 | ||
150 | if (eb->lut_mask >= 0) | 408 | if (unlikely(vma->exec_entry)) { |
151 | memset(eb->buckets, 0, | 409 | DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", |
152 | sizeof(struct hlist_head) << eb->lut_mask); | 410 | entry->handle, (int)(entry - eb->exec)); |
411 | return -EINVAL; | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | * From drm_mm perspective address space is continuous, | ||
416 | * so from this point we're always using non-canonical | ||
417 | * form internally. | ||
418 | */ | ||
419 | entry->offset = gen8_noncanonical_addr(entry->offset); | ||
420 | |||
421 | return 0; | ||
153 | } | 422 | } |
154 | 423 | ||
155 | static bool | 424 | static int |
156 | eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i) | 425 | eb_add_vma(struct i915_execbuffer *eb, |
426 | struct drm_i915_gem_exec_object2 *entry, | ||
427 | struct i915_vma *vma) | ||
157 | { | 428 | { |
158 | if (unlikely(vma->exec_entry)) { | 429 | int err; |
159 | DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n", | 430 | |
160 | eb->exec[i].handle, i); | 431 | GEM_BUG_ON(i915_vma_is_closed(vma)); |
161 | return false; | 432 | |
433 | if (!(eb->args->flags & __EXEC_VALIDATED)) { | ||
434 | err = eb_validate_vma(eb, entry, vma); | ||
435 | if (unlikely(err)) | ||
436 | return err; | ||
162 | } | 437 | } |
163 | list_add_tail(&vma->exec_link, &eb->vmas); | ||
164 | 438 | ||
165 | vma->exec_entry = &eb->exec[i]; | 439 | if (eb->lut_size >= 0) { |
166 | if (eb->lut_mask >= 0) { | 440 | vma->exec_handle = entry->handle; |
167 | vma->exec_handle = eb->exec[i].handle; | ||
168 | hlist_add_head(&vma->exec_node, | 441 | hlist_add_head(&vma->exec_node, |
169 | &eb->buckets[hash_32(vma->exec_handle, | 442 | &eb->buckets[hash_32(entry->handle, |
170 | eb->lut_mask)]); | 443 | eb->lut_size)]); |
171 | } | 444 | } |
172 | 445 | ||
173 | i915_vma_get(vma); | 446 | if (entry->relocation_count) |
174 | __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma; | 447 | list_add_tail(&vma->reloc_link, &eb->relocs); |
175 | return true; | 448 | |
449 | if (!eb->reloc_cache.has_fence) { | ||
450 | entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; | ||
451 | } else { | ||
452 | if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE || | ||
453 | eb->reloc_cache.needs_unfenced) && | ||
454 | i915_gem_object_is_tiled(vma->obj)) | ||
455 | entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP; | ||
456 | } | ||
457 | |||
458 | if (!(entry->flags & EXEC_OBJECT_PINNED)) | ||
459 | entry->flags |= eb->context_flags; | ||
460 | |||
461 | /* | ||
462 | * Stash a pointer from the vma to execobj, so we can query its flags, | ||
463 | * size, alignment etc as provided by the user. Also we stash a pointer | ||
464 | * to the vma inside the execobj so that we can use a direct lookup | ||
465 | * to find the right target VMA when doing relocations. | ||
466 | */ | ||
467 | vma->exec_entry = entry; | ||
468 | __exec_to_vma(entry) = (uintptr_t)i915_vma_get(vma); | ||
469 | |||
470 | err = 0; | ||
471 | if (vma->node.size) | ||
472 | eb_pin_vma(eb, entry, vma); | ||
473 | if (eb_vma_misplaced(entry, vma)) { | ||
474 | eb_unreserve_vma(vma, entry); | ||
475 | |||
476 | list_add_tail(&vma->exec_link, &eb->unbound); | ||
477 | if (drm_mm_node_allocated(&vma->node)) | ||
478 | err = i915_vma_unbind(vma); | ||
479 | } else { | ||
480 | if (entry->offset != vma->node.start) { | ||
481 | entry->offset = vma->node.start | UPDATE; | ||
482 | eb->args->flags |= __EXEC_HAS_RELOC; | ||
483 | } | ||
484 | } | ||
485 | return err; | ||
486 | } | ||
487 | |||
488 | static inline int use_cpu_reloc(const struct reloc_cache *cache, | ||
489 | const struct drm_i915_gem_object *obj) | ||
490 | { | ||
491 | if (!i915_gem_object_has_struct_page(obj)) | ||
492 | return false; | ||
493 | |||
494 | if (DBG_USE_CPU_RELOC) | ||
495 | return DBG_USE_CPU_RELOC > 0; | ||
496 | |||
497 | return (cache->has_llc || | ||
498 | obj->cache_dirty || | ||
499 | obj->cache_level != I915_CACHE_NONE); | ||
500 | } | ||
501 | |||
502 | static int eb_reserve_vma(const struct i915_execbuffer *eb, | ||
503 | struct i915_vma *vma) | ||
504 | { | ||
505 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | ||
506 | u64 flags; | ||
507 | int err; | ||
508 | |||
509 | flags = PIN_USER | PIN_NONBLOCK; | ||
510 | if (entry->flags & EXEC_OBJECT_NEEDS_GTT) | ||
511 | flags |= PIN_GLOBAL; | ||
512 | |||
513 | /* | ||
514 | * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, | ||
515 | * limit address to the first 4GBs for unflagged objects. | ||
516 | */ | ||
517 | if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) | ||
518 | flags |= PIN_ZONE_4G; | ||
519 | |||
520 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) | ||
521 | flags |= PIN_MAPPABLE; | ||
522 | |||
523 | if (entry->flags & EXEC_OBJECT_PINNED) { | ||
524 | flags |= entry->offset | PIN_OFFSET_FIXED; | ||
525 | flags &= ~PIN_NONBLOCK; /* force overlapping PINNED checks */ | ||
526 | } else if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) { | ||
527 | flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; | ||
528 | } | ||
529 | |||
530 | err = i915_vma_pin(vma, entry->pad_to_size, entry->alignment, flags); | ||
531 | if (err) | ||
532 | return err; | ||
533 | |||
534 | if (entry->offset != vma->node.start) { | ||
535 | entry->offset = vma->node.start | UPDATE; | ||
536 | eb->args->flags |= __EXEC_HAS_RELOC; | ||
537 | } | ||
538 | |||
539 | entry->flags |= __EXEC_OBJECT_HAS_PIN; | ||
540 | GEM_BUG_ON(eb_vma_misplaced(entry, vma)); | ||
541 | |||
542 | if (unlikely(entry->flags & EXEC_OBJECT_NEEDS_FENCE)) { | ||
543 | err = i915_vma_get_fence(vma); | ||
544 | if (unlikely(err)) { | ||
545 | i915_vma_unpin(vma); | ||
546 | return err; | ||
547 | } | ||
548 | |||
549 | if (i915_vma_pin_fence(vma)) | ||
550 | entry->flags |= __EXEC_OBJECT_HAS_FENCE; | ||
551 | } | ||
552 | |||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static int eb_reserve(struct i915_execbuffer *eb) | ||
557 | { | ||
558 | const unsigned int count = eb->buffer_count; | ||
559 | struct list_head last; | ||
560 | struct i915_vma *vma; | ||
561 | unsigned int i, pass; | ||
562 | int err; | ||
563 | |||
564 | /* | ||
565 | * Attempt to pin all of the buffers into the GTT. | ||
566 | * This is done in 3 phases: | ||
567 | * | ||
568 | * 1a. Unbind all objects that do not match the GTT constraints for | ||
569 | * the execbuffer (fenceable, mappable, alignment etc). | ||
570 | * 1b. Increment pin count for already bound objects. | ||
571 | * 2. Bind new objects. | ||
572 | * 3. Decrement pin count. | ||
573 | * | ||
574 | * This avoid unnecessary unbinding of later objects in order to make | ||
575 | * room for the earlier objects *unless* we need to defragment. | ||
576 | */ | ||
577 | |||
578 | pass = 0; | ||
579 | err = 0; | ||
580 | do { | ||
581 | list_for_each_entry(vma, &eb->unbound, exec_link) { | ||
582 | err = eb_reserve_vma(eb, vma); | ||
583 | if (err) | ||
584 | break; | ||
585 | } | ||
586 | if (err != -ENOSPC) | ||
587 | return err; | ||
588 | |||
589 | /* Resort *all* the objects into priority order */ | ||
590 | INIT_LIST_HEAD(&eb->unbound); | ||
591 | INIT_LIST_HEAD(&last); | ||
592 | for (i = 0; i < count; i++) { | ||
593 | struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; | ||
594 | |||
595 | if (entry->flags & EXEC_OBJECT_PINNED && | ||
596 | entry->flags & __EXEC_OBJECT_HAS_PIN) | ||
597 | continue; | ||
598 | |||
599 | vma = exec_to_vma(entry); | ||
600 | eb_unreserve_vma(vma, entry); | ||
601 | |||
602 | if (entry->flags & EXEC_OBJECT_PINNED) | ||
603 | list_add(&vma->exec_link, &eb->unbound); | ||
604 | else if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) | ||
605 | list_add_tail(&vma->exec_link, &eb->unbound); | ||
606 | else | ||
607 | list_add_tail(&vma->exec_link, &last); | ||
608 | } | ||
609 | list_splice_tail(&last, &eb->unbound); | ||
610 | |||
611 | switch (pass++) { | ||
612 | case 0: | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | /* Too fragmented, unbind everything and retry */ | ||
617 | err = i915_gem_evict_vm(eb->vm); | ||
618 | if (err) | ||
619 | return err; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | return -ENOSPC; | ||
624 | } | ||
625 | } while (1); | ||
176 | } | 626 | } |
177 | 627 | ||
178 | static inline struct hlist_head * | 628 | static inline struct hlist_head * |
179 | ht_head(const struct i915_gem_context *ctx, u32 handle) | 629 | ht_head(const struct i915_gem_context_vma_lut *lut, u32 handle) |
180 | { | 630 | { |
181 | return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)]; | 631 | return &lut->ht[hash_32(handle, lut->ht_bits)]; |
182 | } | 632 | } |
183 | 633 | ||
184 | static inline bool | 634 | static inline bool |
185 | ht_needs_resize(const struct i915_gem_context *ctx) | 635 | ht_needs_resize(const struct i915_gem_context_vma_lut *lut) |
186 | { | 636 | { |
187 | return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size || | 637 | return (4*lut->ht_count > 3*lut->ht_size || |
188 | 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size); | 638 | 4*lut->ht_count + 1 < lut->ht_size); |
189 | } | 639 | } |
190 | 640 | ||
191 | static int | 641 | static unsigned int eb_batch_index(const struct i915_execbuffer *eb) |
192 | eb_lookup_vmas(struct i915_execbuffer *eb) | 642 | { |
643 | return eb->buffer_count - 1; | ||
644 | } | ||
645 | |||
646 | static int eb_select_context(struct i915_execbuffer *eb) | ||
647 | { | ||
648 | struct i915_gem_context *ctx; | ||
649 | |||
650 | ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1); | ||
651 | if (unlikely(IS_ERR(ctx))) | ||
652 | return PTR_ERR(ctx); | ||
653 | |||
654 | if (unlikely(i915_gem_context_is_banned(ctx))) { | ||
655 | DRM_DEBUG("Context %u tried to submit while banned\n", | ||
656 | ctx->user_handle); | ||
657 | return -EIO; | ||
658 | } | ||
659 | |||
660 | eb->ctx = i915_gem_context_get(ctx); | ||
661 | eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; | ||
662 | |||
663 | eb->context_flags = 0; | ||
664 | if (ctx->flags & CONTEXT_NO_ZEROMAP) | ||
665 | eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS; | ||
666 | |||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | static int eb_lookup_vmas(struct i915_execbuffer *eb) | ||
193 | { | 671 | { |
194 | #define INTERMEDIATE BIT(0) | 672 | #define INTERMEDIATE BIT(0) |
195 | const int count = eb->args->buffer_count; | 673 | const unsigned int count = eb->buffer_count; |
674 | struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; | ||
196 | struct i915_vma *vma; | 675 | struct i915_vma *vma; |
676 | struct idr *idr; | ||
677 | unsigned int i; | ||
197 | int slow_pass = -1; | 678 | int slow_pass = -1; |
198 | int i; | 679 | int err; |
199 | 680 | ||
200 | INIT_LIST_HEAD(&eb->vmas); | 681 | INIT_LIST_HEAD(&eb->relocs); |
682 | INIT_LIST_HEAD(&eb->unbound); | ||
201 | 683 | ||
202 | if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS)) | 684 | if (unlikely(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)) |
203 | flush_work(&eb->ctx->vma_lut.resize); | 685 | flush_work(&lut->resize); |
204 | GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS); | 686 | GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); |
205 | 687 | ||
206 | for (i = 0; i < count; i++) { | 688 | for (i = 0; i < count; i++) { |
207 | __exec_to_vma(&eb->exec[i]) = 0; | 689 | __exec_to_vma(&eb->exec[i]) = 0; |
208 | 690 | ||
209 | hlist_for_each_entry(vma, | 691 | hlist_for_each_entry(vma, |
210 | ht_head(eb->ctx, eb->exec[i].handle), | 692 | ht_head(lut, eb->exec[i].handle), |
211 | ctx_node) { | 693 | ctx_node) { |
212 | if (vma->ctx_handle != eb->exec[i].handle) | 694 | if (vma->ctx_handle != eb->exec[i].handle) |
213 | continue; | 695 | continue; |
214 | 696 | ||
215 | if (!eb_add_vma(eb, vma, i)) | 697 | err = eb_add_vma(eb, &eb->exec[i], vma); |
216 | return -EINVAL; | 698 | if (unlikely(err)) |
699 | return err; | ||
217 | 700 | ||
218 | goto next_vma; | 701 | goto next_vma; |
219 | } | 702 | } |
@@ -224,24 +707,27 @@ next_vma: ; | |||
224 | } | 707 | } |
225 | 708 | ||
226 | if (slow_pass < 0) | 709 | if (slow_pass < 0) |
227 | return 0; | 710 | goto out; |
228 | 711 | ||
229 | spin_lock(&eb->file->table_lock); | 712 | spin_lock(&eb->file->table_lock); |
230 | /* Grab a reference to the object and release the lock so we can lookup | 713 | /* |
231 | * or create the VMA without using GFP_ATOMIC */ | 714 | * Grab a reference to the object and release the lock so we can lookup |
715 | * or create the VMA without using GFP_ATOMIC | ||
716 | */ | ||
717 | idr = &eb->file->object_idr; | ||
232 | for (i = slow_pass; i < count; i++) { | 718 | for (i = slow_pass; i < count; i++) { |
233 | struct drm_i915_gem_object *obj; | 719 | struct drm_i915_gem_object *obj; |
234 | 720 | ||
235 | if (__exec_to_vma(&eb->exec[i])) | 721 | if (__exec_to_vma(&eb->exec[i])) |
236 | continue; | 722 | continue; |
237 | 723 | ||
238 | obj = to_intel_bo(idr_find(&eb->file->object_idr, | 724 | obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); |
239 | eb->exec[i].handle)); | ||
240 | if (unlikely(!obj)) { | 725 | if (unlikely(!obj)) { |
241 | spin_unlock(&eb->file->table_lock); | 726 | spin_unlock(&eb->file->table_lock); |
242 | DRM_DEBUG("Invalid object handle %d at index %d\n", | 727 | DRM_DEBUG("Invalid object handle %d at index %d\n", |
243 | eb->exec[i].handle, i); | 728 | eb->exec[i].handle, i); |
244 | return -ENOENT; | 729 | err = -ENOENT; |
730 | goto err; | ||
245 | } | 731 | } |
246 | 732 | ||
247 | __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; | 733 | __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj; |
@@ -251,7 +737,7 @@ next_vma: ; | |||
251 | for (i = slow_pass; i < count; i++) { | 737 | for (i = slow_pass; i < count; i++) { |
252 | struct drm_i915_gem_object *obj; | 738 | struct drm_i915_gem_object *obj; |
253 | 739 | ||
254 | if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0) | 740 | if (!(__exec_to_vma(&eb->exec[i]) & INTERMEDIATE)) |
255 | continue; | 741 | continue; |
256 | 742 | ||
257 | /* | 743 | /* |
@@ -262,12 +748,13 @@ next_vma: ; | |||
262 | * from the (obj, vm) we don't run the risk of creating | 748 | * from the (obj, vm) we don't run the risk of creating |
263 | * duplicated vmas for the same vm. | 749 | * duplicated vmas for the same vm. |
264 | */ | 750 | */ |
265 | obj = u64_to_ptr(struct drm_i915_gem_object, | 751 | obj = u64_to_ptr(typeof(*obj), |
266 | __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); | 752 | __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE); |
267 | vma = i915_vma_instance(obj, eb->vm, NULL); | 753 | vma = i915_vma_instance(obj, eb->vm, NULL); |
268 | if (unlikely(IS_ERR(vma))) { | 754 | if (unlikely(IS_ERR(vma))) { |
269 | DRM_DEBUG("Failed to lookup VMA\n"); | 755 | DRM_DEBUG("Failed to lookup VMA\n"); |
270 | return PTR_ERR(vma); | 756 | err = PTR_ERR(vma); |
757 | goto err; | ||
271 | } | 758 | } |
272 | 759 | ||
273 | /* First come, first served */ | 760 | /* First come, first served */ |
@@ -275,32 +762,31 @@ next_vma: ; | |||
275 | vma->ctx = eb->ctx; | 762 | vma->ctx = eb->ctx; |
276 | vma->ctx_handle = eb->exec[i].handle; | 763 | vma->ctx_handle = eb->exec[i].handle; |
277 | hlist_add_head(&vma->ctx_node, | 764 | hlist_add_head(&vma->ctx_node, |
278 | ht_head(eb->ctx, eb->exec[i].handle)); | 765 | ht_head(lut, eb->exec[i].handle)); |
279 | eb->ctx->vma_lut.ht_count++; | 766 | lut->ht_count++; |
767 | lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; | ||
280 | if (i915_vma_is_ggtt(vma)) { | 768 | if (i915_vma_is_ggtt(vma)) { |
281 | GEM_BUG_ON(obj->vma_hashed); | 769 | GEM_BUG_ON(obj->vma_hashed); |
282 | obj->vma_hashed = vma; | 770 | obj->vma_hashed = vma; |
283 | } | 771 | } |
284 | } | 772 | } |
285 | 773 | ||
286 | if (!eb_add_vma(eb, vma, i)) | 774 | err = eb_add_vma(eb, &eb->exec[i], vma); |
287 | return -EINVAL; | 775 | if (unlikely(err)) |
776 | goto err; | ||
288 | } | 777 | } |
289 | 778 | ||
290 | if (ht_needs_resize(eb->ctx)) { | 779 | if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { |
291 | eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS; | 780 | if (ht_needs_resize(lut)) |
292 | queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize); | 781 | queue_work(system_highpri_wq, &lut->resize); |
782 | else | ||
783 | lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; | ||
293 | } | 784 | } |
294 | 785 | ||
295 | return 0; | 786 | out: |
296 | #undef INTERMEDIATE | 787 | /* take note of the batch buffer before we might reorder the lists */ |
297 | } | 788 | i = eb_batch_index(eb); |
298 | 789 | eb->batch = exec_to_vma(&eb->exec[i]); | |
299 | static struct i915_vma * | ||
300 | eb_get_batch(struct i915_execbuffer *eb) | ||
301 | { | ||
302 | struct i915_vma *vma = | ||
303 | exec_to_vma(&eb->exec[eb->args->buffer_count - 1]); | ||
304 | 790 | ||
305 | /* | 791 | /* |
306 | * SNA is doing fancy tricks with compressing batch buffers, which leads | 792 | * SNA is doing fancy tricks with compressing batch buffers, which leads |
@@ -311,24 +797,36 @@ eb_get_batch(struct i915_execbuffer *eb) | |||
311 | * Note that actual hangs have only been observed on gen7, but for | 797 | * Note that actual hangs have only been observed on gen7, but for |
312 | * paranoia do it everywhere. | 798 | * paranoia do it everywhere. |
313 | */ | 799 | */ |
314 | if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) | 800 | if (!(eb->exec[i].flags & EXEC_OBJECT_PINNED)) |
315 | vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; | 801 | eb->exec[i].flags |= __EXEC_OBJECT_NEEDS_BIAS; |
802 | if (eb->reloc_cache.has_fence) | ||
803 | eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; | ||
316 | 804 | ||
317 | return vma; | 805 | eb->args->flags |= __EXEC_VALIDATED; |
806 | return eb_reserve(eb); | ||
807 | |||
808 | err: | ||
809 | for (i = slow_pass; i < count; i++) { | ||
810 | if (__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) | ||
811 | __exec_to_vma(&eb->exec[i]) = 0; | ||
812 | } | ||
813 | lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; | ||
814 | return err; | ||
815 | #undef INTERMEDIATE | ||
318 | } | 816 | } |
319 | 817 | ||
320 | static struct i915_vma * | 818 | static struct i915_vma * |
321 | eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) | 819 | eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle) |
322 | { | 820 | { |
323 | if (eb->lut_mask < 0) { | 821 | if (eb->lut_size < 0) { |
324 | if (handle >= -eb->lut_mask) | 822 | if (handle >= -eb->lut_size) |
325 | return NULL; | 823 | return NULL; |
326 | return exec_to_vma(&eb->exec[handle]); | 824 | return exec_to_vma(&eb->exec[handle]); |
327 | } else { | 825 | } else { |
328 | struct hlist_head *head; | 826 | struct hlist_head *head; |
329 | struct i915_vma *vma; | 827 | struct i915_vma *vma; |
330 | 828 | ||
331 | head = &eb->buckets[hash_32(handle, eb->lut_mask)]; | 829 | head = &eb->buckets[hash_32(handle, eb->lut_size)]; |
332 | hlist_for_each_entry(vma, head, exec_node) { | 830 | hlist_for_each_entry(vma, head, exec_node) { |
333 | if (vma->exec_handle == handle) | 831 | if (vma->exec_handle == handle) |
334 | return vma; | 832 | return vma; |
@@ -337,61 +835,46 @@ eb_get_vma(struct i915_execbuffer *eb, unsigned long handle) | |||
337 | } | 835 | } |
338 | } | 836 | } |
339 | 837 | ||
340 | static void eb_destroy(struct i915_execbuffer *eb) | 838 | static void eb_release_vmas(const struct i915_execbuffer *eb) |
341 | { | 839 | { |
342 | struct i915_vma *vma; | 840 | const unsigned int count = eb->buffer_count; |
841 | unsigned int i; | ||
343 | 842 | ||
344 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 843 | for (i = 0; i < count; i++) { |
345 | if (!vma->exec_entry) | 844 | struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; |
845 | struct i915_vma *vma = exec_to_vma(entry); | ||
846 | |||
847 | if (!vma) | ||
346 | continue; | 848 | continue; |
347 | 849 | ||
348 | __eb_unreserve_vma(vma, vma->exec_entry); | 850 | GEM_BUG_ON(vma->exec_entry != entry); |
349 | vma->exec_entry = NULL; | 851 | vma->exec_entry = NULL; |
350 | i915_vma_put(vma); | ||
351 | } | ||
352 | |||
353 | i915_gem_context_put(eb->ctx); | ||
354 | 852 | ||
355 | if (eb->lut_mask >= 0) | 853 | eb_unreserve_vma(vma, entry); |
356 | kfree(eb->buckets); | ||
357 | } | ||
358 | |||
359 | static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) | ||
360 | { | ||
361 | if (!i915_gem_object_has_struct_page(obj)) | ||
362 | return false; | ||
363 | 854 | ||
364 | if (DBG_USE_CPU_RELOC) | 855 | i915_vma_put(vma); |
365 | return DBG_USE_CPU_RELOC > 0; | 856 | } |
366 | |||
367 | return (HAS_LLC(to_i915(obj->base.dev)) || | ||
368 | obj->cache_dirty || | ||
369 | obj->cache_level != I915_CACHE_NONE); | ||
370 | } | 857 | } |
371 | 858 | ||
372 | /* Used to convert any address to canonical form. | 859 | static void eb_reset_vmas(const struct i915_execbuffer *eb) |
373 | * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, | ||
374 | * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the | ||
375 | * addresses to be in a canonical form: | ||
376 | * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct | ||
377 | * canonical form [63:48] == [47]." | ||
378 | */ | ||
379 | #define GEN8_HIGH_ADDRESS_BIT 47 | ||
380 | static inline uint64_t gen8_canonical_addr(uint64_t address) | ||
381 | { | 860 | { |
382 | return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); | 861 | eb_release_vmas(eb); |
862 | if (eb->lut_size >= 0) | ||
863 | memset(eb->buckets, 0, | ||
864 | sizeof(struct hlist_head) << eb->lut_size); | ||
383 | } | 865 | } |
384 | 866 | ||
385 | static inline uint64_t gen8_noncanonical_addr(uint64_t address) | 867 | static void eb_destroy(const struct i915_execbuffer *eb) |
386 | { | 868 | { |
387 | return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); | 869 | if (eb->lut_size >= 0) |
870 | kfree(eb->buckets); | ||
388 | } | 871 | } |
389 | 872 | ||
390 | static inline uint64_t | 873 | static inline u64 |
391 | relocation_target(const struct drm_i915_gem_relocation_entry *reloc, | 874 | relocation_target(const struct drm_i915_gem_relocation_entry *reloc, |
392 | uint64_t target_offset) | 875 | const struct i915_vma *target) |
393 | { | 876 | { |
394 | return gen8_canonical_addr((int)reloc->delta + target_offset); | 877 | return gen8_canonical_addr((int)reloc->delta + target->node.start); |
395 | } | 878 | } |
396 | 879 | ||
397 | static void reloc_cache_init(struct reloc_cache *cache, | 880 | static void reloc_cache_init(struct reloc_cache *cache, |
@@ -400,6 +883,9 @@ static void reloc_cache_init(struct reloc_cache *cache, | |||
400 | cache->page = -1; | 883 | cache->page = -1; |
401 | cache->vaddr = 0; | 884 | cache->vaddr = 0; |
402 | /* Must be a variable in the struct to allow GCC to unroll. */ | 885 | /* Must be a variable in the struct to allow GCC to unroll. */ |
886 | cache->has_llc = HAS_LLC(i915); | ||
887 | cache->has_fence = INTEL_GEN(i915) < 4; | ||
888 | cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; | ||
403 | cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); | 889 | cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); |
404 | cache->node.allocated = false; | 890 | cache->node.allocated = false; |
405 | } | 891 | } |
@@ -458,7 +944,7 @@ static void reloc_cache_reset(struct reloc_cache *cache) | |||
458 | 944 | ||
459 | static void *reloc_kmap(struct drm_i915_gem_object *obj, | 945 | static void *reloc_kmap(struct drm_i915_gem_object *obj, |
460 | struct reloc_cache *cache, | 946 | struct reloc_cache *cache, |
461 | int page) | 947 | unsigned long page) |
462 | { | 948 | { |
463 | void *vaddr; | 949 | void *vaddr; |
464 | 950 | ||
@@ -466,11 +952,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, | |||
466 | kunmap_atomic(unmask_page(cache->vaddr)); | 952 | kunmap_atomic(unmask_page(cache->vaddr)); |
467 | } else { | 953 | } else { |
468 | unsigned int flushes; | 954 | unsigned int flushes; |
469 | int ret; | 955 | int err; |
470 | 956 | ||
471 | ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); | 957 | err = i915_gem_obj_prepare_shmem_write(obj, &flushes); |
472 | if (ret) | 958 | if (err) |
473 | return ERR_PTR(ret); | 959 | return ERR_PTR(err); |
474 | 960 | ||
475 | BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); | 961 | BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); |
476 | BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); | 962 | BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); |
@@ -490,7 +976,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj, | |||
490 | 976 | ||
491 | static void *reloc_iomap(struct drm_i915_gem_object *obj, | 977 | static void *reloc_iomap(struct drm_i915_gem_object *obj, |
492 | struct reloc_cache *cache, | 978 | struct reloc_cache *cache, |
493 | int page) | 979 | unsigned long page) |
494 | { | 980 | { |
495 | struct i915_ggtt *ggtt = cache_to_ggtt(cache); | 981 | struct i915_ggtt *ggtt = cache_to_ggtt(cache); |
496 | unsigned long offset; | 982 | unsigned long offset; |
@@ -500,31 +986,31 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, | |||
500 | io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); | 986 | io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); |
501 | } else { | 987 | } else { |
502 | struct i915_vma *vma; | 988 | struct i915_vma *vma; |
503 | int ret; | 989 | int err; |
504 | 990 | ||
505 | if (use_cpu_reloc(obj)) | 991 | if (use_cpu_reloc(cache, obj)) |
506 | return NULL; | 992 | return NULL; |
507 | 993 | ||
508 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | 994 | err = i915_gem_object_set_to_gtt_domain(obj, true); |
509 | if (ret) | 995 | if (err) |
510 | return ERR_PTR(ret); | 996 | return ERR_PTR(err); |
511 | 997 | ||
512 | vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, | 998 | vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, |
513 | PIN_MAPPABLE | PIN_NONBLOCK); | 999 | PIN_MAPPABLE | PIN_NONBLOCK); |
514 | if (IS_ERR(vma)) { | 1000 | if (IS_ERR(vma)) { |
515 | memset(&cache->node, 0, sizeof(cache->node)); | 1001 | memset(&cache->node, 0, sizeof(cache->node)); |
516 | ret = drm_mm_insert_node_in_range | 1002 | err = drm_mm_insert_node_in_range |
517 | (&ggtt->base.mm, &cache->node, | 1003 | (&ggtt->base.mm, &cache->node, |
518 | PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, | 1004 | PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, |
519 | 0, ggtt->mappable_end, | 1005 | 0, ggtt->mappable_end, |
520 | DRM_MM_INSERT_LOW); | 1006 | DRM_MM_INSERT_LOW); |
521 | if (ret) /* no inactive aperture space, use cpu reloc */ | 1007 | if (err) /* no inactive aperture space, use cpu reloc */ |
522 | return NULL; | 1008 | return NULL; |
523 | } else { | 1009 | } else { |
524 | ret = i915_vma_put_fence(vma); | 1010 | err = i915_vma_put_fence(vma); |
525 | if (ret) { | 1011 | if (err) { |
526 | i915_vma_unpin(vma); | 1012 | i915_vma_unpin(vma); |
527 | return ERR_PTR(ret); | 1013 | return ERR_PTR(err); |
528 | } | 1014 | } |
529 | 1015 | ||
530 | cache->node.start = vma->node.start; | 1016 | cache->node.start = vma->node.start; |
@@ -552,7 +1038,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, | |||
552 | 1038 | ||
553 | static void *reloc_vaddr(struct drm_i915_gem_object *obj, | 1039 | static void *reloc_vaddr(struct drm_i915_gem_object *obj, |
554 | struct reloc_cache *cache, | 1040 | struct reloc_cache *cache, |
555 | int page) | 1041 | unsigned long page) |
556 | { | 1042 | { |
557 | void *vaddr; | 1043 | void *vaddr; |
558 | 1044 | ||
@@ -579,7 +1065,8 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) | |||
579 | 1065 | ||
580 | *addr = value; | 1066 | *addr = value; |
581 | 1067 | ||
582 | /* Writes to the same cacheline are serialised by the CPU | 1068 | /* |
1069 | * Writes to the same cacheline are serialised by the CPU | ||
583 | * (including clflush). On the write path, we only require | 1070 | * (including clflush). On the write path, we only require |
584 | * that it hits memory in an orderly fashion and place | 1071 | * that it hits memory in an orderly fashion and place |
585 | * mb barriers at the start and end of the relocation phase | 1072 | * mb barriers at the start and end of the relocation phase |
@@ -591,25 +1078,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) | |||
591 | *addr = value; | 1078 | *addr = value; |
592 | } | 1079 | } |
593 | 1080 | ||
594 | static int | 1081 | static u64 |
595 | relocate_entry(struct drm_i915_gem_object *obj, | 1082 | relocate_entry(struct i915_vma *vma, |
596 | const struct drm_i915_gem_relocation_entry *reloc, | 1083 | const struct drm_i915_gem_relocation_entry *reloc, |
597 | struct reloc_cache *cache, | 1084 | struct i915_execbuffer *eb, |
598 | u64 target_offset) | 1085 | const struct i915_vma *target) |
599 | { | 1086 | { |
1087 | struct drm_i915_gem_object *obj = vma->obj; | ||
600 | u64 offset = reloc->offset; | 1088 | u64 offset = reloc->offset; |
601 | bool wide = cache->use_64bit_reloc; | 1089 | u64 target_offset = relocation_target(reloc, target); |
1090 | bool wide = eb->reloc_cache.use_64bit_reloc; | ||
602 | void *vaddr; | 1091 | void *vaddr; |
603 | 1092 | ||
604 | target_offset = relocation_target(reloc, target_offset); | ||
605 | repeat: | 1093 | repeat: |
606 | vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); | 1094 | vaddr = reloc_vaddr(obj, &eb->reloc_cache, offset >> PAGE_SHIFT); |
607 | if (IS_ERR(vaddr)) | 1095 | if (IS_ERR(vaddr)) |
608 | return PTR_ERR(vaddr); | 1096 | return PTR_ERR(vaddr); |
609 | 1097 | ||
610 | clflush_write32(vaddr + offset_in_page(offset), | 1098 | clflush_write32(vaddr + offset_in_page(offset), |
611 | lower_32_bits(target_offset), | 1099 | lower_32_bits(target_offset), |
612 | cache->vaddr); | 1100 | eb->reloc_cache.vaddr); |
613 | 1101 | ||
614 | if (wide) { | 1102 | if (wide) { |
615 | offset += sizeof(u32); | 1103 | offset += sizeof(u32); |
@@ -618,17 +1106,16 @@ repeat: | |||
618 | goto repeat; | 1106 | goto repeat; |
619 | } | 1107 | } |
620 | 1108 | ||
621 | return 0; | 1109 | return target->node.start | UPDATE; |
622 | } | 1110 | } |
623 | 1111 | ||
624 | static int | 1112 | static u64 |
625 | eb_relocate_entry(struct i915_vma *vma, | 1113 | eb_relocate_entry(struct i915_execbuffer *eb, |
626 | struct i915_execbuffer *eb, | 1114 | struct i915_vma *vma, |
627 | struct drm_i915_gem_relocation_entry *reloc) | 1115 | const struct drm_i915_gem_relocation_entry *reloc) |
628 | { | 1116 | { |
629 | struct i915_vma *target; | 1117 | struct i915_vma *target; |
630 | u64 target_offset; | 1118 | int err; |
631 | int ret; | ||
632 | 1119 | ||
633 | /* we've already hold a reference to all valid objects */ | 1120 | /* we've already hold a reference to all valid objects */ |
634 | target = eb_get_vma(eb, reloc->target_handle); | 1121 | target = eb_get_vma(eb, reloc->target_handle); |
@@ -658,27 +1145,30 @@ eb_relocate_entry(struct i915_vma *vma, | |||
658 | return -EINVAL; | 1145 | return -EINVAL; |
659 | } | 1146 | } |
660 | 1147 | ||
661 | if (reloc->write_domain) | 1148 | if (reloc->write_domain) { |
662 | target->exec_entry->flags |= EXEC_OBJECT_WRITE; | 1149 | target->exec_entry->flags |= EXEC_OBJECT_WRITE; |
663 | 1150 | ||
664 | /* | 1151 | /* |
665 | * Sandybridge PPGTT errata: We need a global gtt mapping for MI and | 1152 | * Sandybridge PPGTT errata: We need a global gtt mapping |
666 | * pipe_control writes because the gpu doesn't properly redirect them | 1153 | * for MI and pipe_control writes because the gpu doesn't |
667 | * through the ppgtt for non_secure batchbuffers. | 1154 | * properly redirect them through the ppgtt for non_secure |
668 | */ | 1155 | * batchbuffers. |
669 | if (unlikely(IS_GEN6(eb->i915) && | 1156 | */ |
670 | reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { | 1157 | if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && |
671 | ret = i915_vma_bind(target, target->obj->cache_level, | 1158 | IS_GEN6(eb->i915)) { |
672 | PIN_GLOBAL); | 1159 | err = i915_vma_bind(target, target->obj->cache_level, |
673 | if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) | 1160 | PIN_GLOBAL); |
674 | return ret; | 1161 | if (WARN_ONCE(err, |
1162 | "Unexpected failure to bind target VMA!")) | ||
1163 | return err; | ||
1164 | } | ||
675 | } | 1165 | } |
676 | 1166 | ||
677 | /* If the relocation already has the right value in it, no | 1167 | /* |
1168 | * If the relocation already has the right value in it, no | ||
678 | * more work needs to be done. | 1169 | * more work needs to be done. |
679 | */ | 1170 | */ |
680 | target_offset = gen8_canonical_addr(target->node.start); | 1171 | if (gen8_canonical_addr(target->node.start) == reloc->presumed_offset) |
681 | if (target_offset == reloc->presumed_offset) | ||
682 | return 0; | 1172 | return 0; |
683 | 1173 | ||
684 | /* Check that the relocation address is valid... */ | 1174 | /* Check that the relocation address is valid... */ |
@@ -709,35 +1199,39 @@ eb_relocate_entry(struct i915_vma *vma, | |||
709 | */ | 1199 | */ |
710 | vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; | 1200 | vma->exec_entry->flags &= ~EXEC_OBJECT_ASYNC; |
711 | 1201 | ||
712 | ret = relocate_entry(vma->obj, reloc, &eb->reloc_cache, target_offset); | ||
713 | if (ret) | ||
714 | return ret; | ||
715 | |||
716 | /* and update the user's relocation entry */ | 1202 | /* and update the user's relocation entry */ |
717 | reloc->presumed_offset = target_offset; | 1203 | return relocate_entry(vma, reloc, eb, target); |
718 | return 0; | ||
719 | } | 1204 | } |
720 | 1205 | ||
721 | static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) | 1206 | static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma) |
722 | { | 1207 | { |
723 | #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) | 1208 | #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) |
724 | struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; | 1209 | struct drm_i915_gem_relocation_entry stack[N_RELOC(512)]; |
725 | struct drm_i915_gem_relocation_entry __user *user_relocs; | 1210 | struct drm_i915_gem_relocation_entry __user *urelocs; |
726 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 1211 | const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
727 | int remain, ret = 0; | 1212 | unsigned int remain; |
728 | |||
729 | user_relocs = u64_to_user_ptr(entry->relocs_ptr); | ||
730 | 1213 | ||
1214 | urelocs = u64_to_user_ptr(entry->relocs_ptr); | ||
731 | remain = entry->relocation_count; | 1215 | remain = entry->relocation_count; |
732 | while (remain) { | 1216 | if (unlikely(remain > N_RELOC(ULONG_MAX))) |
733 | struct drm_i915_gem_relocation_entry *r = stack_reloc; | 1217 | return -EINVAL; |
734 | unsigned long unwritten; | ||
735 | unsigned int count; | ||
736 | 1218 | ||
737 | count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); | 1219 | /* |
738 | remain -= count; | 1220 | * We must check that the entire relocation array is safe |
1221 | * to read. However, if the array is not writable the user loses | ||
1222 | * the updated relocation values. | ||
1223 | */ | ||
1224 | if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(urelocs)))) | ||
1225 | return -EFAULT; | ||
1226 | |||
1227 | do { | ||
1228 | struct drm_i915_gem_relocation_entry *r = stack; | ||
1229 | unsigned int count = | ||
1230 | min_t(unsigned int, remain, ARRAY_SIZE(stack)); | ||
1231 | unsigned int copied; | ||
739 | 1232 | ||
740 | /* This is the fast path and we cannot handle a pagefault | 1233 | /* |
1234 | * This is the fast path and we cannot handle a pagefault | ||
741 | * whilst holding the struct mutex lest the user pass in the | 1235 | * whilst holding the struct mutex lest the user pass in the |
742 | * relocations contained within a mmaped bo. For in such a case | 1236 | * relocations contained within a mmaped bo. For in such a case |
743 | * we, the page fault handler would call i915_gem_fault() and | 1237 | * we, the page fault handler would call i915_gem_fault() and |
@@ -745,409 +1239,357 @@ static int eb_relocate_vma(struct i915_vma *vma, struct i915_execbuffer *eb) | |||
745 | * this is bad and so lockdep complains vehemently. | 1239 | * this is bad and so lockdep complains vehemently. |
746 | */ | 1240 | */ |
747 | pagefault_disable(); | 1241 | pagefault_disable(); |
748 | unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); | 1242 | copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0])); |
749 | pagefault_enable(); | 1243 | pagefault_enable(); |
750 | if (unlikely(unwritten)) { | 1244 | if (unlikely(copied)) { |
751 | ret = -EFAULT; | 1245 | remain = -EFAULT; |
752 | goto out; | 1246 | goto out; |
753 | } | 1247 | } |
754 | 1248 | ||
1249 | remain -= count; | ||
755 | do { | 1250 | do { |
756 | u64 offset = r->presumed_offset; | 1251 | u64 offset = eb_relocate_entry(eb, vma, r); |
757 | 1252 | ||
758 | ret = eb_relocate_entry(vma, eb, r); | 1253 | if (likely(offset == 0)) { |
759 | if (ret) | 1254 | } else if ((s64)offset < 0) { |
1255 | remain = (int)offset; | ||
760 | goto out; | 1256 | goto out; |
761 | 1257 | } else { | |
762 | if (r->presumed_offset != offset) { | 1258 | /* |
763 | pagefault_disable(); | 1259 | * Note that reporting an error now |
764 | unwritten = __put_user(r->presumed_offset, | 1260 | * leaves everything in an inconsistent |
765 | &user_relocs->presumed_offset); | 1261 | * state as we have *already* changed |
766 | pagefault_enable(); | 1262 | * the relocation value inside the |
767 | if (unlikely(unwritten)) { | 1263 | * object. As we have not changed the |
768 | /* Note that reporting an error now | 1264 | * reloc.presumed_offset or will not |
769 | * leaves everything in an inconsistent | 1265 | * change the execobject.offset, on the |
770 | * state as we have *already* changed | 1266 | * call we may not rewrite the value |
771 | * the relocation value inside the | 1267 | * inside the object, leaving it |
772 | * object. As we have not changed the | 1268 | * dangling and causing a GPU hang. Unless |
773 | * reloc.presumed_offset or will not | 1269 | * userspace dynamically rebuilds the |
774 | * change the execobject.offset, on the | 1270 | * relocations on each execbuf rather than |
775 | * call we may not rewrite the value | 1271 | * presume a static tree. |
776 | * inside the object, leaving it | 1272 | * |
777 | * dangling and causing a GPU hang. | 1273 | * We did previously check if the relocations |
778 | */ | 1274 | * were writable (access_ok), an error now |
779 | ret = -EFAULT; | 1275 | * would be a strange race with mprotect, |
780 | goto out; | 1276 | * having already demonstrated that we |
781 | } | 1277 | * can read from this userspace address. |
1278 | */ | ||
1279 | offset = gen8_canonical_addr(offset & ~UPDATE); | ||
1280 | __put_user(offset, | ||
1281 | &urelocs[r-stack].presumed_offset); | ||
782 | } | 1282 | } |
783 | 1283 | } while (r++, --count); | |
784 | user_relocs++; | 1284 | urelocs += ARRAY_SIZE(stack); |
785 | r++; | 1285 | } while (remain); |
786 | } while (--count); | ||
787 | } | ||
788 | |||
789 | out: | 1286 | out: |
790 | reloc_cache_reset(&eb->reloc_cache); | 1287 | reloc_cache_reset(&eb->reloc_cache); |
791 | return ret; | 1288 | return remain; |
792 | #undef N_RELOC | ||
793 | } | 1289 | } |
794 | 1290 | ||
795 | static int | 1291 | static int |
796 | eb_relocate_vma_slow(struct i915_vma *vma, | 1292 | eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma) |
797 | struct i915_execbuffer *eb, | ||
798 | struct drm_i915_gem_relocation_entry *relocs) | ||
799 | { | 1293 | { |
800 | const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 1294 | const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
801 | int i, ret = 0; | 1295 | struct drm_i915_gem_relocation_entry *relocs = |
1296 | u64_to_ptr(typeof(*relocs), entry->relocs_ptr); | ||
1297 | unsigned int i; | ||
1298 | int err; | ||
802 | 1299 | ||
803 | for (i = 0; i < entry->relocation_count; i++) { | 1300 | for (i = 0; i < entry->relocation_count; i++) { |
804 | ret = eb_relocate_entry(vma, eb, &relocs[i]); | 1301 | u64 offset = eb_relocate_entry(eb, vma, &relocs[i]); |
805 | if (ret) | 1302 | |
806 | break; | 1303 | if ((s64)offset < 0) { |
1304 | err = (int)offset; | ||
1305 | goto err; | ||
1306 | } | ||
807 | } | 1307 | } |
1308 | err = 0; | ||
1309 | err: | ||
808 | reloc_cache_reset(&eb->reloc_cache); | 1310 | reloc_cache_reset(&eb->reloc_cache); |
809 | return ret; | 1311 | return err; |
810 | } | 1312 | } |
811 | 1313 | ||
812 | static int eb_relocate(struct i915_execbuffer *eb) | 1314 | static int check_relocations(const struct drm_i915_gem_exec_object2 *entry) |
813 | { | 1315 | { |
814 | struct i915_vma *vma; | 1316 | const char __user *addr, *end; |
815 | int ret = 0; | 1317 | unsigned long size; |
1318 | char __maybe_unused c; | ||
816 | 1319 | ||
817 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 1320 | size = entry->relocation_count; |
818 | ret = eb_relocate_vma(vma, eb); | 1321 | if (size == 0) |
819 | if (ret) | 1322 | return 0; |
820 | break; | ||
821 | } | ||
822 | 1323 | ||
823 | return ret; | 1324 | if (size > N_RELOC(ULONG_MAX)) |
824 | } | 1325 | return -EINVAL; |
825 | 1326 | ||
826 | static bool only_mappable_for_reloc(unsigned int flags) | 1327 | addr = u64_to_user_ptr(entry->relocs_ptr); |
827 | { | 1328 | size *= sizeof(struct drm_i915_gem_relocation_entry); |
828 | return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == | 1329 | if (!access_ok(VERIFY_READ, addr, size)) |
829 | __EXEC_OBJECT_NEEDS_MAP; | 1330 | return -EFAULT; |
1331 | |||
1332 | end = addr + size; | ||
1333 | for (; addr < end; addr += PAGE_SIZE) { | ||
1334 | int err = __get_user(c, addr); | ||
1335 | if (err) | ||
1336 | return err; | ||
1337 | } | ||
1338 | return __get_user(c, end - 1); | ||
830 | } | 1339 | } |
831 | 1340 | ||
832 | static int | 1341 | static int eb_copy_relocations(const struct i915_execbuffer *eb) |
833 | eb_reserve_vma(struct i915_vma *vma, | ||
834 | struct intel_engine_cs *engine, | ||
835 | bool *need_reloc) | ||
836 | { | 1342 | { |
837 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 1343 | const unsigned int count = eb->buffer_count; |
838 | uint64_t flags; | 1344 | unsigned int i; |
839 | int ret; | 1345 | int err; |
840 | |||
841 | flags = PIN_USER; | ||
842 | if (entry->flags & EXEC_OBJECT_NEEDS_GTT) | ||
843 | flags |= PIN_GLOBAL; | ||
844 | |||
845 | if (!drm_mm_node_allocated(&vma->node)) { | ||
846 | /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, | ||
847 | * limit address to the first 4GBs for unflagged objects. | ||
848 | */ | ||
849 | if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) | ||
850 | flags |= PIN_ZONE_4G; | ||
851 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) | ||
852 | flags |= PIN_GLOBAL | PIN_MAPPABLE; | ||
853 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) | ||
854 | flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; | ||
855 | if (entry->flags & EXEC_OBJECT_PINNED) | ||
856 | flags |= entry->offset | PIN_OFFSET_FIXED; | ||
857 | if ((flags & PIN_MAPPABLE) == 0) | ||
858 | flags |= PIN_HIGH; | ||
859 | } | ||
860 | |||
861 | ret = i915_vma_pin(vma, | ||
862 | entry->pad_to_size, | ||
863 | entry->alignment, | ||
864 | flags); | ||
865 | if ((ret == -ENOSPC || ret == -E2BIG) && | ||
866 | only_mappable_for_reloc(entry->flags)) | ||
867 | ret = i915_vma_pin(vma, | ||
868 | entry->pad_to_size, | ||
869 | entry->alignment, | ||
870 | flags & ~PIN_MAPPABLE); | ||
871 | if (ret) | ||
872 | return ret; | ||
873 | 1346 | ||
874 | entry->flags |= __EXEC_OBJECT_HAS_PIN; | 1347 | for (i = 0; i < count; i++) { |
1348 | const unsigned int nreloc = eb->exec[i].relocation_count; | ||
1349 | struct drm_i915_gem_relocation_entry __user *urelocs; | ||
1350 | struct drm_i915_gem_relocation_entry *relocs; | ||
1351 | unsigned long size; | ||
1352 | unsigned long copied; | ||
875 | 1353 | ||
876 | if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { | 1354 | if (nreloc == 0) |
877 | ret = i915_vma_get_fence(vma); | 1355 | continue; |
878 | if (ret) | ||
879 | return ret; | ||
880 | 1356 | ||
881 | if (i915_vma_pin_fence(vma)) | 1357 | err = check_relocations(&eb->exec[i]); |
882 | entry->flags |= __EXEC_OBJECT_HAS_FENCE; | 1358 | if (err) |
883 | } | 1359 | goto err; |
884 | 1360 | ||
885 | if (entry->offset != vma->node.start) { | 1361 | urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); |
886 | entry->offset = vma->node.start; | 1362 | size = nreloc * sizeof(*relocs); |
887 | *need_reloc = true; | ||
888 | } | ||
889 | 1363 | ||
890 | return 0; | 1364 | relocs = kvmalloc_array(size, 1, GFP_TEMPORARY); |
891 | } | 1365 | if (!relocs) { |
1366 | kvfree(relocs); | ||
1367 | err = -ENOMEM; | ||
1368 | goto err; | ||
1369 | } | ||
892 | 1370 | ||
893 | static bool | 1371 | /* copy_from_user is limited to < 4GiB */ |
894 | need_reloc_mappable(struct i915_vma *vma) | 1372 | copied = 0; |
895 | { | 1373 | do { |
896 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 1374 | unsigned int len = |
1375 | min_t(u64, BIT_ULL(31), size - copied); | ||
1376 | |||
1377 | if (__copy_from_user((char *)relocs + copied, | ||
1378 | (char *)urelocs + copied, | ||
1379 | len)) { | ||
1380 | kvfree(relocs); | ||
1381 | err = -EFAULT; | ||
1382 | goto err; | ||
1383 | } | ||
897 | 1384 | ||
898 | if (entry->relocation_count == 0) | 1385 | copied += len; |
899 | return false; | 1386 | } while (copied < size); |
900 | 1387 | ||
901 | if (!i915_vma_is_ggtt(vma)) | 1388 | /* |
902 | return false; | 1389 | * As we do not update the known relocation offsets after |
1390 | * relocating (due to the complexities in lock handling), | ||
1391 | * we need to mark them as invalid now so that we force the | ||
1392 | * relocation processing next time. Just in case the target | ||
1393 | * object is evicted and then rebound into its old | ||
1394 | * presumed_offset before the next execbuffer - if that | ||
1395 | * happened we would make the mistake of assuming that the | ||
1396 | * relocations were valid. | ||
1397 | */ | ||
1398 | user_access_begin(); | ||
1399 | for (copied = 0; copied < nreloc; copied++) | ||
1400 | unsafe_put_user(-1, | ||
1401 | &urelocs[copied].presumed_offset, | ||
1402 | end_user); | ||
1403 | end_user: | ||
1404 | user_access_end(); | ||
903 | 1405 | ||
904 | /* See also use_cpu_reloc() */ | 1406 | eb->exec[i].relocs_ptr = (uintptr_t)relocs; |
905 | if (HAS_LLC(to_i915(vma->obj->base.dev))) | 1407 | } |
906 | return false; | ||
907 | 1408 | ||
908 | if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) | 1409 | return 0; |
909 | return false; | ||
910 | 1410 | ||
911 | return true; | 1411 | err: |
1412 | while (i--) { | ||
1413 | struct drm_i915_gem_relocation_entry *relocs = | ||
1414 | u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr); | ||
1415 | if (eb->exec[i].relocation_count) | ||
1416 | kvfree(relocs); | ||
1417 | } | ||
1418 | return err; | ||
912 | } | 1419 | } |
913 | 1420 | ||
914 | static bool | 1421 | static int eb_prefault_relocations(const struct i915_execbuffer *eb) |
915 | eb_vma_misplaced(struct i915_vma *vma) | ||
916 | { | 1422 | { |
917 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; | 1423 | const unsigned int count = eb->buffer_count; |
918 | 1424 | unsigned int i; | |
919 | WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && | ||
920 | !i915_vma_is_ggtt(vma)); | ||
921 | |||
922 | if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment)) | ||
923 | return true; | ||
924 | 1425 | ||
925 | if (vma->node.size < entry->pad_to_size) | 1426 | if (unlikely(i915.prefault_disable)) |
926 | return true; | 1427 | return 0; |
927 | |||
928 | if (entry->flags & EXEC_OBJECT_PINNED && | ||
929 | vma->node.start != entry->offset) | ||
930 | return true; | ||
931 | |||
932 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && | ||
933 | vma->node.start < BATCH_OFFSET_BIAS) | ||
934 | return true; | ||
935 | 1428 | ||
936 | /* avoid costly ping-pong once a batch bo ended up non-mappable */ | 1429 | for (i = 0; i < count; i++) { |
937 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && | 1430 | int err; |
938 | !i915_vma_is_map_and_fenceable(vma)) | ||
939 | return !only_mappable_for_reloc(entry->flags); | ||
940 | 1431 | ||
941 | if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && | 1432 | err = check_relocations(&eb->exec[i]); |
942 | (vma->node.start + vma->node.size - 1) >> 32) | 1433 | if (err) |
943 | return true; | 1434 | return err; |
1435 | } | ||
944 | 1436 | ||
945 | return false; | 1437 | return 0; |
946 | } | 1438 | } |
947 | 1439 | ||
948 | static int eb_reserve(struct i915_execbuffer *eb) | 1440 | static noinline int eb_relocate_slow(struct i915_execbuffer *eb) |
949 | { | 1441 | { |
950 | const bool has_fenced_gpu_access = INTEL_GEN(eb->i915) < 4; | 1442 | struct drm_device *dev = &eb->i915->drm; |
951 | const bool needs_unfenced_map = INTEL_INFO(eb->i915)->unfenced_needs_alignment; | 1443 | bool have_copy = false; |
952 | struct i915_vma *vma; | 1444 | struct i915_vma *vma; |
953 | struct list_head ordered_vmas; | 1445 | int err = 0; |
954 | struct list_head pinned_vmas; | 1446 | |
955 | int retry; | 1447 | repeat: |
956 | 1448 | if (signal_pending(current)) { | |
957 | INIT_LIST_HEAD(&ordered_vmas); | 1449 | err = -ERESTARTSYS; |
958 | INIT_LIST_HEAD(&pinned_vmas); | 1450 | goto out; |
959 | while (!list_empty(&eb->vmas)) { | 1451 | } |
960 | struct drm_i915_gem_exec_object2 *entry; | 1452 | |
961 | bool need_fence, need_mappable; | 1453 | /* We may process another execbuffer during the unlock... */ |
962 | 1454 | eb_reset_vmas(eb); | |
963 | vma = list_first_entry(&eb->vmas, struct i915_vma, exec_link); | 1455 | mutex_unlock(&dev->struct_mutex); |
964 | entry = vma->exec_entry; | 1456 | |
965 | 1457 | /* | |
966 | if (eb->ctx->flags & CONTEXT_NO_ZEROMAP) | 1458 | * We take 3 passes through the slowpatch. |
967 | entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; | ||
968 | |||
969 | if (!has_fenced_gpu_access) | ||
970 | entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; | ||
971 | need_fence = | ||
972 | (entry->flags & EXEC_OBJECT_NEEDS_FENCE || | ||
973 | needs_unfenced_map) && | ||
974 | i915_gem_object_is_tiled(vma->obj); | ||
975 | need_mappable = need_fence || need_reloc_mappable(vma); | ||
976 | |||
977 | if (entry->flags & EXEC_OBJECT_PINNED) | ||
978 | list_move_tail(&vma->exec_link, &pinned_vmas); | ||
979 | else if (need_mappable) { | ||
980 | entry->flags |= __EXEC_OBJECT_NEEDS_MAP; | ||
981 | list_move(&vma->exec_link, &ordered_vmas); | ||
982 | } else | ||
983 | list_move_tail(&vma->exec_link, &ordered_vmas); | ||
984 | } | ||
985 | list_splice(&ordered_vmas, &eb->vmas); | ||
986 | list_splice(&pinned_vmas, &eb->vmas); | ||
987 | |||
988 | /* Attempt to pin all of the buffers into the GTT. | ||
989 | * This is done in 3 phases: | ||
990 | * | 1459 | * |
991 | * 1a. Unbind all objects that do not match the GTT constraints for | 1460 | * 1 - we try to just prefault all the user relocation entries and |
992 | * the execbuffer (fenceable, mappable, alignment etc). | 1461 | * then attempt to reuse the atomic pagefault disabled fast path again. |
993 | * 1b. Increment pin count for already bound objects. | ||
994 | * 2. Bind new objects. | ||
995 | * 3. Decrement pin count. | ||
996 | * | 1462 | * |
997 | * This avoid unnecessary unbinding of later objects in order to make | 1463 | * 2 - we copy the user entries to a local buffer here outside of the |
998 | * room for the earlier objects *unless* we need to defragment. | 1464 | * local and allow ourselves to wait upon any rendering before |
1465 | * relocations | ||
1466 | * | ||
1467 | * 3 - we already have a local copy of the relocation entries, but | ||
1468 | * were interrupted (EAGAIN) whilst waiting for the objects, try again. | ||
999 | */ | 1469 | */ |
1000 | retry = 0; | 1470 | if (!err) { |
1001 | do { | 1471 | err = eb_prefault_relocations(eb); |
1002 | int ret = 0; | 1472 | } else if (!have_copy) { |
1003 | 1473 | err = eb_copy_relocations(eb); | |
1004 | /* Unbind any ill-fitting objects or pin. */ | 1474 | have_copy = err == 0; |
1005 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 1475 | } else { |
1006 | if (!drm_mm_node_allocated(&vma->node)) | 1476 | cond_resched(); |
1007 | continue; | 1477 | err = 0; |
1478 | } | ||
1479 | if (err) { | ||
1480 | mutex_lock(&dev->struct_mutex); | ||
1481 | goto out; | ||
1482 | } | ||
1008 | 1483 | ||
1009 | if (eb_vma_misplaced(vma)) | 1484 | err = i915_mutex_lock_interruptible(dev); |
1010 | ret = i915_vma_unbind(vma); | 1485 | if (err) { |
1011 | else | 1486 | mutex_lock(&dev->struct_mutex); |
1012 | ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); | 1487 | goto out; |
1013 | if (ret) | 1488 | } |
1014 | goto err; | ||
1015 | } | ||
1016 | 1489 | ||
1017 | /* Bind fresh objects */ | 1490 | /* reacquire the objects */ |
1018 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 1491 | err = eb_lookup_vmas(eb); |
1019 | if (drm_mm_node_allocated(&vma->node)) | 1492 | if (err) |
1020 | continue; | 1493 | goto err; |
1021 | 1494 | ||
1022 | ret = eb_reserve_vma(vma, eb->engine, &eb->need_relocs); | 1495 | list_for_each_entry(vma, &eb->relocs, reloc_link) { |
1023 | if (ret) | 1496 | if (!have_copy) { |
1497 | pagefault_disable(); | ||
1498 | err = eb_relocate_vma(eb, vma); | ||
1499 | pagefault_enable(); | ||
1500 | if (err) | ||
1501 | goto repeat; | ||
1502 | } else { | ||
1503 | err = eb_relocate_vma_slow(eb, vma); | ||
1504 | if (err) | ||
1024 | goto err; | 1505 | goto err; |
1025 | } | 1506 | } |
1507 | } | ||
1026 | 1508 | ||
1027 | err: | 1509 | /* |
1028 | if (ret != -ENOSPC || retry++) | 1510 | * Leave the user relocations as are, this is the painfully slow path, |
1029 | return ret; | 1511 | * and we want to avoid the complication of dropping the lock whilst |
1030 | 1512 | * having buffers reserved in the aperture and so causing spurious | |
1031 | /* Decrement pin count for bound objects */ | 1513 | * ENOSPC for random operations. |
1032 | list_for_each_entry(vma, &eb->vmas, exec_link) | 1514 | */ |
1033 | eb_unreserve_vma(vma); | ||
1034 | 1515 | ||
1035 | ret = i915_gem_evict_vm(eb->vm, true); | 1516 | err: |
1036 | if (ret) | 1517 | if (err == -EAGAIN) |
1037 | return ret; | 1518 | goto repeat; |
1038 | } while (1); | ||
1039 | } | ||
1040 | 1519 | ||
1041 | static int | 1520 | out: |
1042 | eb_relocate_slow(struct i915_execbuffer *eb) | 1521 | if (have_copy) { |
1043 | { | 1522 | const unsigned int count = eb->buffer_count; |
1044 | const unsigned int count = eb->args->buffer_count; | 1523 | unsigned int i; |
1045 | struct drm_device *dev = &eb->i915->drm; | ||
1046 | struct drm_i915_gem_relocation_entry *reloc; | ||
1047 | struct i915_vma *vma; | ||
1048 | int *reloc_offset; | ||
1049 | int i, total, ret; | ||
1050 | 1524 | ||
1051 | /* We may process another execbuffer during the unlock... */ | 1525 | for (i = 0; i < count; i++) { |
1052 | eb_reset(eb); | 1526 | const struct drm_i915_gem_exec_object2 *entry = |
1053 | mutex_unlock(&dev->struct_mutex); | 1527 | &eb->exec[i]; |
1528 | struct drm_i915_gem_relocation_entry *relocs; | ||
1054 | 1529 | ||
1055 | total = 0; | 1530 | if (!entry->relocation_count) |
1056 | for (i = 0; i < count; i++) | 1531 | continue; |
1057 | total += eb->exec[i].relocation_count; | ||
1058 | 1532 | ||
1059 | reloc_offset = kvmalloc_array(count, sizeof(*reloc_offset), GFP_KERNEL); | 1533 | relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr); |
1060 | reloc = kvmalloc_array(total, sizeof(*reloc), GFP_KERNEL); | 1534 | kvfree(relocs); |
1061 | if (reloc == NULL || reloc_offset == NULL) { | 1535 | } |
1062 | kvfree(reloc); | ||
1063 | kvfree(reloc_offset); | ||
1064 | mutex_lock(&dev->struct_mutex); | ||
1065 | return -ENOMEM; | ||
1066 | } | 1536 | } |
1067 | 1537 | ||
1068 | total = 0; | 1538 | return err ?: have_copy; |
1069 | for (i = 0; i < count; i++) { | 1539 | } |
1070 | struct drm_i915_gem_relocation_entry __user *user_relocs; | ||
1071 | u64 invalid_offset = (u64)-1; | ||
1072 | int j; | ||
1073 | 1540 | ||
1074 | user_relocs = u64_to_user_ptr(eb->exec[i].relocs_ptr); | 1541 | static int eb_relocate(struct i915_execbuffer *eb) |
1542 | { | ||
1543 | if (eb_lookup_vmas(eb)) | ||
1544 | goto slow; | ||
1075 | 1545 | ||
1076 | if (copy_from_user(reloc+total, user_relocs, | 1546 | /* The objects are in their final locations, apply the relocations. */ |
1077 | eb->exec[i].relocation_count * sizeof(*reloc))) { | 1547 | if (eb->args->flags & __EXEC_HAS_RELOC) { |
1078 | ret = -EFAULT; | 1548 | struct i915_vma *vma; |
1079 | mutex_lock(&dev->struct_mutex); | ||
1080 | goto err; | ||
1081 | } | ||
1082 | 1549 | ||
1083 | /* As we do not update the known relocation offsets after | 1550 | list_for_each_entry(vma, &eb->relocs, reloc_link) { |
1084 | * relocating (due to the complexities in lock handling), | 1551 | if (eb_relocate_vma(eb, vma)) |
1085 | * we need to mark them as invalid now so that we force the | 1552 | goto slow; |
1086 | * relocation processing next time. Just in case the target | ||
1087 | * object is evicted and then rebound into its old | ||
1088 | * presumed_offset before the next execbuffer - if that | ||
1089 | * happened we would make the mistake of assuming that the | ||
1090 | * relocations were valid. | ||
1091 | */ | ||
1092 | for (j = 0; j < eb->exec[i].relocation_count; j++) { | ||
1093 | if (__copy_to_user(&user_relocs[j].presumed_offset, | ||
1094 | &invalid_offset, | ||
1095 | sizeof(invalid_offset))) { | ||
1096 | ret = -EFAULT; | ||
1097 | mutex_lock(&dev->struct_mutex); | ||
1098 | goto err; | ||
1099 | } | ||
1100 | } | 1553 | } |
1101 | |||
1102 | reloc_offset[i] = total; | ||
1103 | total += eb->exec[i].relocation_count; | ||
1104 | } | 1554 | } |
1105 | 1555 | ||
1106 | ret = i915_mutex_lock_interruptible(dev); | 1556 | return 0; |
1107 | if (ret) { | ||
1108 | mutex_lock(&dev->struct_mutex); | ||
1109 | goto err; | ||
1110 | } | ||
1111 | |||
1112 | /* reacquire the objects */ | ||
1113 | ret = eb_lookup_vmas(eb); | ||
1114 | if (ret) | ||
1115 | goto err; | ||
1116 | |||
1117 | ret = eb_reserve(eb); | ||
1118 | if (ret) | ||
1119 | goto err; | ||
1120 | 1557 | ||
1121 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 1558 | slow: |
1122 | int idx = vma->exec_entry - eb->exec; | 1559 | return eb_relocate_slow(eb); |
1560 | } | ||
1123 | 1561 | ||
1124 | ret = eb_relocate_vma_slow(vma, eb, reloc + reloc_offset[idx]); | 1562 | static void eb_export_fence(struct drm_i915_gem_object *obj, |
1125 | if (ret) | 1563 | struct drm_i915_gem_request *req, |
1126 | goto err; | 1564 | unsigned int flags) |
1127 | } | 1565 | { |
1566 | struct reservation_object *resv = obj->resv; | ||
1128 | 1567 | ||
1129 | /* Leave the user relocations as are, this is the painfully slow path, | 1568 | /* |
1130 | * and we want to avoid the complication of dropping the lock whilst | 1569 | * Ignore errors from failing to allocate the new fence, we can't |
1131 | * having buffers reserved in the aperture and so causing spurious | 1570 | * handle an error right now. Worst case should be missed |
1132 | * ENOSPC for random operations. | 1571 | * synchronisation leading to rendering corruption. |
1133 | */ | 1572 | */ |
1134 | 1573 | reservation_object_lock(resv, NULL); | |
1135 | err: | 1574 | if (flags & EXEC_OBJECT_WRITE) |
1136 | kvfree(reloc); | 1575 | reservation_object_add_excl_fence(resv, &req->fence); |
1137 | kvfree(reloc_offset); | 1576 | else if (reservation_object_reserve_shared(resv) == 0) |
1138 | return ret; | 1577 | reservation_object_add_shared_fence(resv, &req->fence); |
1578 | reservation_object_unlock(resv); | ||
1139 | } | 1579 | } |
1140 | 1580 | ||
1141 | static int | 1581 | static int eb_move_to_gpu(struct i915_execbuffer *eb) |
1142 | eb_move_to_gpu(struct i915_execbuffer *eb) | ||
1143 | { | 1582 | { |
1144 | struct i915_vma *vma; | 1583 | const unsigned int count = eb->buffer_count; |
1145 | int ret; | 1584 | unsigned int i; |
1585 | int err; | ||
1146 | 1586 | ||
1147 | list_for_each_entry(vma, &eb->vmas, exec_link) { | 1587 | for (i = 0; i < count; i++) { |
1588 | const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; | ||
1589 | struct i915_vma *vma = exec_to_vma(entry); | ||
1148 | struct drm_i915_gem_object *obj = vma->obj; | 1590 | struct drm_i915_gem_object *obj = vma->obj; |
1149 | 1591 | ||
1150 | if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) { | 1592 | if (entry->flags & EXEC_OBJECT_CAPTURE) { |
1151 | struct i915_gem_capture_list *capture; | 1593 | struct i915_gem_capture_list *capture; |
1152 | 1594 | ||
1153 | capture = kmalloc(sizeof(*capture), GFP_KERNEL); | 1595 | capture = kmalloc(sizeof(*capture), GFP_KERNEL); |
@@ -1159,18 +1601,32 @@ eb_move_to_gpu(struct i915_execbuffer *eb) | |||
1159 | eb->request->capture_list = capture; | 1601 | eb->request->capture_list = capture; |
1160 | } | 1602 | } |
1161 | 1603 | ||
1162 | if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) | 1604 | if (entry->flags & EXEC_OBJECT_ASYNC) |
1163 | continue; | 1605 | goto skip_flushes; |
1164 | 1606 | ||
1165 | if (unlikely(obj->cache_dirty && !obj->cache_coherent)) | 1607 | if (unlikely(obj->cache_dirty && !obj->cache_coherent)) |
1166 | i915_gem_clflush_object(obj, 0); | 1608 | i915_gem_clflush_object(obj, 0); |
1167 | 1609 | ||
1168 | ret = i915_gem_request_await_object | 1610 | err = i915_gem_request_await_object |
1169 | (eb->request, obj, vma->exec_entry->flags & EXEC_OBJECT_WRITE); | 1611 | (eb->request, obj, entry->flags & EXEC_OBJECT_WRITE); |
1170 | if (ret) | 1612 | if (err) |
1171 | return ret; | 1613 | return err; |
1614 | |||
1615 | skip_flushes: | ||
1616 | i915_vma_move_to_active(vma, eb->request, entry->flags); | ||
1617 | __eb_unreserve_vma(vma, entry); | ||
1618 | vma->exec_entry = NULL; | ||
1172 | } | 1619 | } |
1173 | 1620 | ||
1621 | for (i = 0; i < count; i++) { | ||
1622 | const struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; | ||
1623 | struct i915_vma *vma = exec_to_vma(entry); | ||
1624 | |||
1625 | eb_export_fence(vma->obj, eb->request, entry->flags); | ||
1626 | i915_vma_put(vma); | ||
1627 | } | ||
1628 | eb->exec = NULL; | ||
1629 | |||
1174 | /* Unconditionally flush any chipset caches (for streaming writes). */ | 1630 | /* Unconditionally flush any chipset caches (for streaming writes). */ |
1175 | i915_gem_chipset_flush(eb->i915); | 1631 | i915_gem_chipset_flush(eb->i915); |
1176 | 1632 | ||
@@ -1178,8 +1634,7 @@ eb_move_to_gpu(struct i915_execbuffer *eb) | |||
1178 | return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); | 1634 | return eb->engine->emit_flush(eb->request, EMIT_INVALIDATE); |
1179 | } | 1635 | } |
1180 | 1636 | ||
1181 | static bool | 1637 | static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) |
1182 | i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) | ||
1183 | { | 1638 | { |
1184 | if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) | 1639 | if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS) |
1185 | return false; | 1640 | return false; |
@@ -1201,103 +1656,6 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) | |||
1201 | return true; | 1656 | return true; |
1202 | } | 1657 | } |
1203 | 1658 | ||
1204 | static int | ||
1205 | validate_exec_list(struct drm_device *dev, | ||
1206 | struct drm_i915_gem_exec_object2 *exec, | ||
1207 | int count) | ||
1208 | { | ||
1209 | unsigned relocs_total = 0; | ||
1210 | unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); | ||
1211 | unsigned invalid_flags; | ||
1212 | int i; | ||
1213 | |||
1214 | /* INTERNAL flags must not overlap with external ones */ | ||
1215 | BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); | ||
1216 | |||
1217 | invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; | ||
1218 | if (USES_FULL_PPGTT(dev)) | ||
1219 | invalid_flags |= EXEC_OBJECT_NEEDS_GTT; | ||
1220 | |||
1221 | for (i = 0; i < count; i++) { | ||
1222 | char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr); | ||
1223 | int length; /* limited by fault_in_pages_readable() */ | ||
1224 | |||
1225 | if (exec[i].flags & invalid_flags) | ||
1226 | return -EINVAL; | ||
1227 | |||
1228 | /* Offset can be used as input (EXEC_OBJECT_PINNED), reject | ||
1229 | * any non-page-aligned or non-canonical addresses. | ||
1230 | */ | ||
1231 | if (exec[i].flags & EXEC_OBJECT_PINNED) { | ||
1232 | if (exec[i].offset != | ||
1233 | gen8_canonical_addr(exec[i].offset & PAGE_MASK)) | ||
1234 | return -EINVAL; | ||
1235 | } | ||
1236 | |||
1237 | /* From drm_mm perspective address space is continuous, | ||
1238 | * so from this point we're always using non-canonical | ||
1239 | * form internally. | ||
1240 | */ | ||
1241 | exec[i].offset = gen8_noncanonical_addr(exec[i].offset); | ||
1242 | |||
1243 | if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) | ||
1244 | return -EINVAL; | ||
1245 | |||
1246 | /* pad_to_size was once a reserved field, so sanitize it */ | ||
1247 | if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) { | ||
1248 | if (offset_in_page(exec[i].pad_to_size)) | ||
1249 | return -EINVAL; | ||
1250 | } else { | ||
1251 | exec[i].pad_to_size = 0; | ||
1252 | } | ||
1253 | |||
1254 | /* First check for malicious input causing overflow in | ||
1255 | * the worst case where we need to allocate the entire | ||
1256 | * relocation tree as a single array. | ||
1257 | */ | ||
1258 | if (exec[i].relocation_count > relocs_max - relocs_total) | ||
1259 | return -EINVAL; | ||
1260 | relocs_total += exec[i].relocation_count; | ||
1261 | |||
1262 | length = exec[i].relocation_count * | ||
1263 | sizeof(struct drm_i915_gem_relocation_entry); | ||
1264 | /* | ||
1265 | * We must check that the entire relocation array is safe | ||
1266 | * to read, but since we may need to update the presumed | ||
1267 | * offsets during execution, check for full write access. | ||
1268 | */ | ||
1269 | if (!access_ok(VERIFY_WRITE, ptr, length)) | ||
1270 | return -EFAULT; | ||
1271 | |||
1272 | if (likely(!i915.prefault_disable)) { | ||
1273 | if (fault_in_pages_readable(ptr, length)) | ||
1274 | return -EFAULT; | ||
1275 | } | ||
1276 | } | ||
1277 | |||
1278 | return 0; | ||
1279 | } | ||
1280 | |||
1281 | static int eb_select_context(struct i915_execbuffer *eb) | ||
1282 | { | ||
1283 | unsigned int ctx_id = i915_execbuffer2_get_context_id(*eb->args); | ||
1284 | struct i915_gem_context *ctx; | ||
1285 | |||
1286 | ctx = i915_gem_context_lookup(eb->file->driver_priv, ctx_id); | ||
1287 | if (unlikely(IS_ERR(ctx))) | ||
1288 | return PTR_ERR(ctx); | ||
1289 | |||
1290 | if (unlikely(i915_gem_context_is_banned(ctx))) { | ||
1291 | DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); | ||
1292 | return -EIO; | ||
1293 | } | ||
1294 | |||
1295 | eb->ctx = i915_gem_context_get(ctx); | ||
1296 | eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; | ||
1297 | |||
1298 | return 0; | ||
1299 | } | ||
1300 | |||
1301 | void i915_vma_move_to_active(struct i915_vma *vma, | 1659 | void i915_vma_move_to_active(struct i915_vma *vma, |
1302 | struct drm_i915_gem_request *req, | 1660 | struct drm_i915_gem_request *req, |
1303 | unsigned int flags) | 1661 | unsigned int flags) |
@@ -1308,7 +1666,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, | |||
1308 | lockdep_assert_held(&req->i915->drm.struct_mutex); | 1666 | lockdep_assert_held(&req->i915->drm.struct_mutex); |
1309 | GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); | 1667 | GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); |
1310 | 1668 | ||
1311 | /* Add a reference if we're newly entering the active list. | 1669 | /* |
1670 | * Add a reference if we're newly entering the active list. | ||
1312 | * The order in which we add operations to the retirement queue is | 1671 | * The order in which we add operations to the retirement queue is |
1313 | * vital here: mark_active adds to the start of the callback list, | 1672 | * vital here: mark_active adds to the start of the callback list, |
1314 | * such that subsequent callbacks are called first. Therefore we | 1673 | * such that subsequent callbacks are called first. Therefore we |
@@ -1336,44 +1695,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, | |||
1336 | i915_gem_active_set(&vma->last_fence, req); | 1695 | i915_gem_active_set(&vma->last_fence, req); |
1337 | } | 1696 | } |
1338 | 1697 | ||
1339 | static void eb_export_fence(struct drm_i915_gem_object *obj, | 1698 | static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) |
1340 | struct drm_i915_gem_request *req, | ||
1341 | unsigned int flags) | ||
1342 | { | ||
1343 | struct reservation_object *resv = obj->resv; | ||
1344 | |||
1345 | /* Ignore errors from failing to allocate the new fence, we can't | ||
1346 | * handle an error right now. Worst case should be missed | ||
1347 | * synchronisation leading to rendering corruption. | ||
1348 | */ | ||
1349 | reservation_object_lock(resv, NULL); | ||
1350 | if (flags & EXEC_OBJECT_WRITE) | ||
1351 | reservation_object_add_excl_fence(resv, &req->fence); | ||
1352 | else if (reservation_object_reserve_shared(resv) == 0) | ||
1353 | reservation_object_add_shared_fence(resv, &req->fence); | ||
1354 | reservation_object_unlock(resv); | ||
1355 | } | ||
1356 | |||
1357 | static void | ||
1358 | eb_move_to_active(struct i915_execbuffer *eb) | ||
1359 | { | ||
1360 | struct i915_vma *vma; | ||
1361 | |||
1362 | list_for_each_entry(vma, &eb->vmas, exec_link) { | ||
1363 | struct drm_i915_gem_object *obj = vma->obj; | ||
1364 | |||
1365 | obj->base.write_domain = 0; | ||
1366 | if (vma->exec_entry->flags & EXEC_OBJECT_WRITE) | ||
1367 | obj->base.read_domains = 0; | ||
1368 | obj->base.read_domains |= I915_GEM_GPU_DOMAINS; | ||
1369 | |||
1370 | i915_vma_move_to_active(vma, eb->request, vma->exec_entry->flags); | ||
1371 | eb_export_fence(obj, eb->request, vma->exec_entry->flags); | ||
1372 | } | ||
1373 | } | ||
1374 | |||
1375 | static int | ||
1376 | i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) | ||
1377 | { | 1699 | { |
1378 | u32 *cs; | 1700 | u32 *cs; |
1379 | int i; | 1701 | int i; |
@@ -1383,16 +1705,16 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) | |||
1383 | return -EINVAL; | 1705 | return -EINVAL; |
1384 | } | 1706 | } |
1385 | 1707 | ||
1386 | cs = intel_ring_begin(req, 4 * 3); | 1708 | cs = intel_ring_begin(req, 4 * 2 + 2); |
1387 | if (IS_ERR(cs)) | 1709 | if (IS_ERR(cs)) |
1388 | return PTR_ERR(cs); | 1710 | return PTR_ERR(cs); |
1389 | 1711 | ||
1712 | *cs++ = MI_LOAD_REGISTER_IMM(4); | ||
1390 | for (i = 0; i < 4; i++) { | 1713 | for (i = 0; i < 4; i++) { |
1391 | *cs++ = MI_LOAD_REGISTER_IMM(1); | ||
1392 | *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); | 1714 | *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); |
1393 | *cs++ = 0; | 1715 | *cs++ = 0; |
1394 | } | 1716 | } |
1395 | 1717 | *cs++ = MI_NOOP; | |
1396 | intel_ring_advance(req, cs); | 1718 | intel_ring_advance(req, cs); |
1397 | 1719 | ||
1398 | return 0; | 1720 | return 0; |
@@ -1402,24 +1724,24 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) | |||
1402 | { | 1724 | { |
1403 | struct drm_i915_gem_object *shadow_batch_obj; | 1725 | struct drm_i915_gem_object *shadow_batch_obj; |
1404 | struct i915_vma *vma; | 1726 | struct i915_vma *vma; |
1405 | int ret; | 1727 | int err; |
1406 | 1728 | ||
1407 | shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, | 1729 | shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, |
1408 | PAGE_ALIGN(eb->batch_len)); | 1730 | PAGE_ALIGN(eb->batch_len)); |
1409 | if (IS_ERR(shadow_batch_obj)) | 1731 | if (IS_ERR(shadow_batch_obj)) |
1410 | return ERR_CAST(shadow_batch_obj); | 1732 | return ERR_CAST(shadow_batch_obj); |
1411 | 1733 | ||
1412 | ret = intel_engine_cmd_parser(eb->engine, | 1734 | err = intel_engine_cmd_parser(eb->engine, |
1413 | eb->batch->obj, | 1735 | eb->batch->obj, |
1414 | shadow_batch_obj, | 1736 | shadow_batch_obj, |
1415 | eb->batch_start_offset, | 1737 | eb->batch_start_offset, |
1416 | eb->batch_len, | 1738 | eb->batch_len, |
1417 | is_master); | 1739 | is_master); |
1418 | if (ret) { | 1740 | if (err) { |
1419 | if (ret == -EACCES) /* unhandled chained batch */ | 1741 | if (err == -EACCES) /* unhandled chained batch */ |
1420 | vma = NULL; | 1742 | vma = NULL; |
1421 | else | 1743 | else |
1422 | vma = ERR_PTR(ret); | 1744 | vma = ERR_PTR(err); |
1423 | goto out; | 1745 | goto out; |
1424 | } | 1746 | } |
1425 | 1747 | ||
@@ -1428,10 +1750,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) | |||
1428 | goto out; | 1750 | goto out; |
1429 | 1751 | ||
1430 | vma->exec_entry = | 1752 | vma->exec_entry = |
1431 | memset(&eb->shadow_exec_entry, 0, sizeof(*vma->exec_entry)); | 1753 | memset(&eb->exec[eb->buffer_count++], |
1754 | 0, sizeof(*vma->exec_entry)); | ||
1432 | vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; | 1755 | vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; |
1433 | i915_gem_object_get(shadow_batch_obj); | 1756 | __exec_to_vma(vma->exec_entry) = (uintptr_t)i915_vma_get(vma); |
1434 | list_add_tail(&vma->exec_link, &eb->vmas); | ||
1435 | 1757 | ||
1436 | out: | 1758 | out: |
1437 | i915_gem_object_unpin_pages(shadow_batch_obj); | 1759 | i915_gem_object_unpin_pages(shadow_batch_obj); |
@@ -1439,41 +1761,37 @@ out: | |||
1439 | } | 1761 | } |
1440 | 1762 | ||
1441 | static void | 1763 | static void |
1442 | add_to_client(struct drm_i915_gem_request *req, | 1764 | add_to_client(struct drm_i915_gem_request *req, struct drm_file *file) |
1443 | struct drm_file *file) | ||
1444 | { | 1765 | { |
1445 | req->file_priv = file->driver_priv; | 1766 | req->file_priv = file->driver_priv; |
1446 | list_add_tail(&req->client_link, &req->file_priv->mm.request_list); | 1767 | list_add_tail(&req->client_link, &req->file_priv->mm.request_list); |
1447 | } | 1768 | } |
1448 | 1769 | ||
1449 | static int | 1770 | static int eb_submit(struct i915_execbuffer *eb) |
1450 | execbuf_submit(struct i915_execbuffer *eb) | ||
1451 | { | 1771 | { |
1452 | int ret; | 1772 | int err; |
1453 | 1773 | ||
1454 | ret = eb_move_to_gpu(eb); | 1774 | err = eb_move_to_gpu(eb); |
1455 | if (ret) | 1775 | if (err) |
1456 | return ret; | 1776 | return err; |
1457 | 1777 | ||
1458 | ret = i915_switch_context(eb->request); | 1778 | err = i915_switch_context(eb->request); |
1459 | if (ret) | 1779 | if (err) |
1460 | return ret; | 1780 | return err; |
1461 | 1781 | ||
1462 | if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { | 1782 | if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) { |
1463 | ret = i915_reset_gen7_sol_offsets(eb->request); | 1783 | err = i915_reset_gen7_sol_offsets(eb->request); |
1464 | if (ret) | 1784 | if (err) |
1465 | return ret; | 1785 | return err; |
1466 | } | 1786 | } |
1467 | 1787 | ||
1468 | ret = eb->engine->emit_bb_start(eb->request, | 1788 | err = eb->engine->emit_bb_start(eb->request, |
1469 | eb->batch->node.start + | 1789 | eb->batch->node.start + |
1470 | eb->batch_start_offset, | 1790 | eb->batch_start_offset, |
1471 | eb->batch_len, | 1791 | eb->batch_len, |
1472 | eb->dispatch_flags); | 1792 | eb->batch_flags); |
1473 | if (ret) | 1793 | if (err) |
1474 | return ret; | 1794 | return err; |
1475 | |||
1476 | eb_move_to_active(eb); | ||
1477 | 1795 | ||
1478 | return 0; | 1796 | return 0; |
1479 | } | 1797 | } |
@@ -1564,34 +1882,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1564 | struct dma_fence *in_fence = NULL; | 1882 | struct dma_fence *in_fence = NULL; |
1565 | struct sync_file *out_fence = NULL; | 1883 | struct sync_file *out_fence = NULL; |
1566 | int out_fence_fd = -1; | 1884 | int out_fence_fd = -1; |
1567 | int ret; | 1885 | int err; |
1568 | 1886 | ||
1569 | if (!i915_gem_check_execbuffer(args)) | 1887 | BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & |
1570 | return -EINVAL; | 1888 | ~__EXEC_OBJECT_UNKNOWN_FLAGS); |
1571 | |||
1572 | ret = validate_exec_list(dev, exec, args->buffer_count); | ||
1573 | if (ret) | ||
1574 | return ret; | ||
1575 | 1889 | ||
1576 | eb.i915 = to_i915(dev); | 1890 | eb.i915 = to_i915(dev); |
1577 | eb.file = file; | 1891 | eb.file = file; |
1578 | eb.args = args; | 1892 | eb.args = args; |
1893 | if (!(args->flags & I915_EXEC_NO_RELOC)) | ||
1894 | args->flags |= __EXEC_HAS_RELOC; | ||
1579 | eb.exec = exec; | 1895 | eb.exec = exec; |
1580 | eb.need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; | 1896 | eb.ctx = NULL; |
1897 | eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; | ||
1898 | if (USES_FULL_PPGTT(eb.i915)) | ||
1899 | eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT; | ||
1581 | reloc_cache_init(&eb.reloc_cache, eb.i915); | 1900 | reloc_cache_init(&eb.reloc_cache, eb.i915); |
1582 | 1901 | ||
1902 | eb.buffer_count = args->buffer_count; | ||
1583 | eb.batch_start_offset = args->batch_start_offset; | 1903 | eb.batch_start_offset = args->batch_start_offset; |
1584 | eb.batch_len = args->batch_len; | 1904 | eb.batch_len = args->batch_len; |
1585 | 1905 | ||
1586 | eb.dispatch_flags = 0; | 1906 | eb.batch_flags = 0; |
1587 | if (args->flags & I915_EXEC_SECURE) { | 1907 | if (args->flags & I915_EXEC_SECURE) { |
1588 | if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) | 1908 | if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) |
1589 | return -EPERM; | 1909 | return -EPERM; |
1590 | 1910 | ||
1591 | eb.dispatch_flags |= I915_DISPATCH_SECURE; | 1911 | eb.batch_flags |= I915_DISPATCH_SECURE; |
1592 | } | 1912 | } |
1593 | if (args->flags & I915_EXEC_IS_PINNED) | 1913 | if (args->flags & I915_EXEC_IS_PINNED) |
1594 | eb.dispatch_flags |= I915_DISPATCH_PINNED; | 1914 | eb.batch_flags |= I915_DISPATCH_PINNED; |
1595 | 1915 | ||
1596 | eb.engine = eb_select_engine(eb.i915, file, args); | 1916 | eb.engine = eb_select_engine(eb.i915, file, args); |
1597 | if (!eb.engine) | 1917 | if (!eb.engine) |
@@ -1608,7 +1928,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1608 | return -EINVAL; | 1928 | return -EINVAL; |
1609 | } | 1929 | } |
1610 | 1930 | ||
1611 | eb.dispatch_flags |= I915_DISPATCH_RS; | 1931 | eb.batch_flags |= I915_DISPATCH_RS; |
1612 | } | 1932 | } |
1613 | 1933 | ||
1614 | if (args->flags & I915_EXEC_FENCE_IN) { | 1934 | if (args->flags & I915_EXEC_FENCE_IN) { |
@@ -1620,71 +1940,53 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1620 | if (args->flags & I915_EXEC_FENCE_OUT) { | 1940 | if (args->flags & I915_EXEC_FENCE_OUT) { |
1621 | out_fence_fd = get_unused_fd_flags(O_CLOEXEC); | 1941 | out_fence_fd = get_unused_fd_flags(O_CLOEXEC); |
1622 | if (out_fence_fd < 0) { | 1942 | if (out_fence_fd < 0) { |
1623 | ret = out_fence_fd; | 1943 | err = out_fence_fd; |
1624 | goto err_in_fence; | 1944 | goto err_in_fence; |
1625 | } | 1945 | } |
1626 | } | 1946 | } |
1627 | 1947 | ||
1628 | /* Take a local wakeref for preparing to dispatch the execbuf as | 1948 | if (eb_create(&eb)) |
1949 | return -ENOMEM; | ||
1950 | |||
1951 | /* | ||
1952 | * Take a local wakeref for preparing to dispatch the execbuf as | ||
1629 | * we expect to access the hardware fairly frequently in the | 1953 | * we expect to access the hardware fairly frequently in the |
1630 | * process. Upon first dispatch, we acquire another prolonged | 1954 | * process. Upon first dispatch, we acquire another prolonged |
1631 | * wakeref that we hold until the GPU has been idle for at least | 1955 | * wakeref that we hold until the GPU has been idle for at least |
1632 | * 100ms. | 1956 | * 100ms. |
1633 | */ | 1957 | */ |
1634 | intel_runtime_pm_get(eb.i915); | 1958 | intel_runtime_pm_get(eb.i915); |
1959 | err = i915_mutex_lock_interruptible(dev); | ||
1960 | if (err) | ||
1961 | goto err_rpm; | ||
1635 | 1962 | ||
1636 | ret = i915_mutex_lock_interruptible(dev); | 1963 | err = eb_select_context(&eb); |
1637 | if (ret) | 1964 | if (unlikely(err)) |
1638 | goto pre_mutex_err; | 1965 | goto err_unlock; |
1639 | |||
1640 | ret = eb_select_context(&eb); | ||
1641 | if (ret) { | ||
1642 | mutex_unlock(&dev->struct_mutex); | ||
1643 | goto pre_mutex_err; | ||
1644 | } | ||
1645 | |||
1646 | if (eb_create(&eb)) { | ||
1647 | i915_gem_context_put(eb.ctx); | ||
1648 | mutex_unlock(&dev->struct_mutex); | ||
1649 | ret = -ENOMEM; | ||
1650 | goto pre_mutex_err; | ||
1651 | } | ||
1652 | |||
1653 | /* Look up object handles */ | ||
1654 | ret = eb_lookup_vmas(&eb); | ||
1655 | if (ret) | ||
1656 | goto err; | ||
1657 | |||
1658 | /* take note of the batch buffer before we might reorder the lists */ | ||
1659 | eb.batch = eb_get_batch(&eb); | ||
1660 | |||
1661 | /* Move the objects en-masse into the GTT, evicting if necessary. */ | ||
1662 | ret = eb_reserve(&eb); | ||
1663 | if (ret) | ||
1664 | goto err; | ||
1665 | 1966 | ||
1666 | /* The objects are in their final locations, apply the relocations. */ | 1967 | err = eb_relocate(&eb); |
1667 | if (eb.need_relocs) | 1968 | if (err) |
1668 | ret = eb_relocate(&eb); | 1969 | /* |
1669 | if (ret) { | 1970 | * If the user expects the execobject.offset and |
1670 | if (ret == -EFAULT) { | 1971 | * reloc.presumed_offset to be an exact match, |
1671 | ret = eb_relocate_slow(&eb); | 1972 | * as for using NO_RELOC, then we cannot update |
1672 | BUG_ON(!mutex_is_locked(&dev->struct_mutex)); | 1973 | * the execobject.offset until we have completed |
1673 | } | 1974 | * relocation. |
1674 | if (ret) | 1975 | */ |
1675 | goto err; | 1976 | args->flags &= ~__EXEC_HAS_RELOC; |
1676 | } | 1977 | if (err < 0) |
1978 | goto err_vma; | ||
1677 | 1979 | ||
1678 | if (eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE) { | 1980 | if (unlikely(eb.batch->exec_entry->flags & EXEC_OBJECT_WRITE)) { |
1679 | DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); | 1981 | DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); |
1680 | ret = -EINVAL; | 1982 | err = -EINVAL; |
1681 | goto err; | 1983 | goto err_vma; |
1682 | } | 1984 | } |
1683 | if (eb.batch_start_offset > eb.batch->size || | 1985 | if (eb.batch_start_offset > eb.batch->size || |
1684 | eb.batch_len > eb.batch->size - eb.batch_start_offset) { | 1986 | eb.batch_len > eb.batch->size - eb.batch_start_offset) { |
1685 | DRM_DEBUG("Attempting to use out-of-bounds batch\n"); | 1987 | DRM_DEBUG("Attempting to use out-of-bounds batch\n"); |
1686 | ret = -EINVAL; | 1988 | err = -EINVAL; |
1687 | goto err; | 1989 | goto err_vma; |
1688 | } | 1990 | } |
1689 | 1991 | ||
1690 | if (eb.engine->needs_cmd_parser && eb.batch_len) { | 1992 | if (eb.engine->needs_cmd_parser && eb.batch_len) { |
@@ -1692,8 +1994,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1692 | 1994 | ||
1693 | vma = eb_parse(&eb, drm_is_current_master(file)); | 1995 | vma = eb_parse(&eb, drm_is_current_master(file)); |
1694 | if (IS_ERR(vma)) { | 1996 | if (IS_ERR(vma)) { |
1695 | ret = PTR_ERR(vma); | 1997 | err = PTR_ERR(vma); |
1696 | goto err; | 1998 | goto err_vma; |
1697 | } | 1999 | } |
1698 | 2000 | ||
1699 | if (vma) { | 2001 | if (vma) { |
@@ -1706,7 +2008,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1706 | * specifically don't want that set on batches the | 2008 | * specifically don't want that set on batches the |
1707 | * command parser has accepted. | 2009 | * command parser has accepted. |
1708 | */ | 2010 | */ |
1709 | eb.dispatch_flags |= I915_DISPATCH_SECURE; | 2011 | eb.batch_flags |= I915_DISPATCH_SECURE; |
1710 | eb.batch_start_offset = 0; | 2012 | eb.batch_start_offset = 0; |
1711 | eb.batch = vma; | 2013 | eb.batch = vma; |
1712 | } | 2014 | } |
@@ -1715,11 +2017,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1715 | if (eb.batch_len == 0) | 2017 | if (eb.batch_len == 0) |
1716 | eb.batch_len = eb.batch->size - eb.batch_start_offset; | 2018 | eb.batch_len = eb.batch->size - eb.batch_start_offset; |
1717 | 2019 | ||
1718 | /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure | 2020 | /* |
2021 | * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure | ||
1719 | * batch" bit. Hence we need to pin secure batches into the global gtt. | 2022 | * batch" bit. Hence we need to pin secure batches into the global gtt. |
1720 | * hsw should have this fixed, but bdw mucks it up again. */ | 2023 | * hsw should have this fixed, but bdw mucks it up again. */ |
1721 | if (eb.dispatch_flags & I915_DISPATCH_SECURE) { | 2024 | if (eb.batch_flags & I915_DISPATCH_SECURE) { |
1722 | struct drm_i915_gem_object *obj = eb.batch->obj; | ||
1723 | struct i915_vma *vma; | 2025 | struct i915_vma *vma; |
1724 | 2026 | ||
1725 | /* | 2027 | /* |
@@ -1732,10 +2034,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1732 | * fitting due to fragmentation. | 2034 | * fitting due to fragmentation. |
1733 | * So this is actually safe. | 2035 | * So this is actually safe. |
1734 | */ | 2036 | */ |
1735 | vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); | 2037 | vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0); |
1736 | if (IS_ERR(vma)) { | 2038 | if (IS_ERR(vma)) { |
1737 | ret = PTR_ERR(vma); | 2039 | err = PTR_ERR(vma); |
1738 | goto err; | 2040 | goto err_vma; |
1739 | } | 2041 | } |
1740 | 2042 | ||
1741 | eb.batch = vma; | 2043 | eb.batch = vma; |
@@ -1744,25 +2046,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1744 | /* Allocate a request for this batch buffer nice and early. */ | 2046 | /* Allocate a request for this batch buffer nice and early. */ |
1745 | eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); | 2047 | eb.request = i915_gem_request_alloc(eb.engine, eb.ctx); |
1746 | if (IS_ERR(eb.request)) { | 2048 | if (IS_ERR(eb.request)) { |
1747 | ret = PTR_ERR(eb.request); | 2049 | err = PTR_ERR(eb.request); |
1748 | goto err_batch_unpin; | 2050 | goto err_batch_unpin; |
1749 | } | 2051 | } |
1750 | 2052 | ||
1751 | if (in_fence) { | 2053 | if (in_fence) { |
1752 | ret = i915_gem_request_await_dma_fence(eb.request, in_fence); | 2054 | err = i915_gem_request_await_dma_fence(eb.request, in_fence); |
1753 | if (ret < 0) | 2055 | if (err < 0) |
1754 | goto err_request; | 2056 | goto err_request; |
1755 | } | 2057 | } |
1756 | 2058 | ||
1757 | if (out_fence_fd != -1) { | 2059 | if (out_fence_fd != -1) { |
1758 | out_fence = sync_file_create(&eb.request->fence); | 2060 | out_fence = sync_file_create(&eb.request->fence); |
1759 | if (!out_fence) { | 2061 | if (!out_fence) { |
1760 | ret = -ENOMEM; | 2062 | err = -ENOMEM; |
1761 | goto err_request; | 2063 | goto err_request; |
1762 | } | 2064 | } |
1763 | } | 2065 | } |
1764 | 2066 | ||
1765 | /* Whilst this request exists, batch_obj will be on the | 2067 | /* |
2068 | * Whilst this request exists, batch_obj will be on the | ||
1766 | * active_list, and so will hold the active reference. Only when this | 2069 | * active_list, and so will hold the active reference. Only when this |
1767 | * request is retired will the the batch_obj be moved onto the | 2070 | * request is retired will the the batch_obj be moved onto the |
1768 | * inactive_list and lose its active reference. Hence we do not need | 2071 | * inactive_list and lose its active reference. Hence we do not need |
@@ -1770,14 +2073,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, | |||
1770 | */ | 2073 | */ |
1771 | eb.request->batch = eb.batch; | 2074 | eb.request->batch = eb.batch; |
1772 | 2075 | ||
1773 | trace_i915_gem_request_queue(eb.request, eb.dispatch_flags); | 2076 | trace_i915_gem_request_queue(eb.request, eb.batch_flags); |
1774 | ret = execbuf_submit(&eb); | 2077 | err = eb_submit(&eb); |
1775 | err_request: | 2078 | err_request: |
1776 | __i915_add_request(eb.request, ret == 0); | 2079 | __i915_add_request(eb.request, err == 0); |
1777 | add_to_client(eb.request, file); | 2080 | add_to_client(eb.request, file); |
1778 | 2081 | ||
1779 | if (out_fence) { | 2082 | if (out_fence) { |
1780 | if (ret == 0) { | 2083 | if (err == 0) { |
1781 | fd_install(out_fence_fd, out_fence->file); | 2084 | fd_install(out_fence_fd, out_fence->file); |
1782 | args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ | 2085 | args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ |
1783 | args->rsvd2 |= (u64)out_fence_fd << 32; | 2086 | args->rsvd2 |= (u64)out_fence_fd << 32; |
@@ -1788,28 +2091,22 @@ err_request: | |||
1788 | } | 2091 | } |
1789 | 2092 | ||
1790 | err_batch_unpin: | 2093 | err_batch_unpin: |
1791 | /* | 2094 | if (eb.batch_flags & I915_DISPATCH_SECURE) |
1792 | * FIXME: We crucially rely upon the active tracking for the (ppgtt) | ||
1793 | * batch vma for correctness. For less ugly and less fragility this | ||
1794 | * needs to be adjusted to also track the ggtt batch vma properly as | ||
1795 | * active. | ||
1796 | */ | ||
1797 | if (eb.dispatch_flags & I915_DISPATCH_SECURE) | ||
1798 | i915_vma_unpin(eb.batch); | 2095 | i915_vma_unpin(eb.batch); |
1799 | err: | 2096 | err_vma: |
1800 | /* the request owns the ref now */ | 2097 | if (eb.exec) |
1801 | eb_destroy(&eb); | 2098 | eb_release_vmas(&eb); |
2099 | i915_gem_context_put(eb.ctx); | ||
2100 | err_unlock: | ||
1802 | mutex_unlock(&dev->struct_mutex); | 2101 | mutex_unlock(&dev->struct_mutex); |
1803 | 2102 | err_rpm: | |
1804 | pre_mutex_err: | ||
1805 | /* intel_gpu_busy should also get a ref, so it will free when the device | ||
1806 | * is really idle. */ | ||
1807 | intel_runtime_pm_put(eb.i915); | 2103 | intel_runtime_pm_put(eb.i915); |
2104 | eb_destroy(&eb); | ||
1808 | if (out_fence_fd != -1) | 2105 | if (out_fence_fd != -1) |
1809 | put_unused_fd(out_fence_fd); | 2106 | put_unused_fd(out_fence_fd); |
1810 | err_in_fence: | 2107 | err_in_fence: |
1811 | dma_fence_put(in_fence); | 2108 | dma_fence_put(in_fence); |
1812 | return ret; | 2109 | return err; |
1813 | } | 2110 | } |
1814 | 2111 | ||
1815 | /* | 2112 | /* |
@@ -1820,20 +2117,38 @@ int | |||
1820 | i915_gem_execbuffer(struct drm_device *dev, void *data, | 2117 | i915_gem_execbuffer(struct drm_device *dev, void *data, |
1821 | struct drm_file *file) | 2118 | struct drm_file *file) |
1822 | { | 2119 | { |
2120 | const size_t sz = sizeof(struct drm_i915_gem_exec_object2); | ||
1823 | struct drm_i915_gem_execbuffer *args = data; | 2121 | struct drm_i915_gem_execbuffer *args = data; |
1824 | struct drm_i915_gem_execbuffer2 exec2; | 2122 | struct drm_i915_gem_execbuffer2 exec2; |
1825 | struct drm_i915_gem_exec_object *exec_list = NULL; | 2123 | struct drm_i915_gem_exec_object *exec_list = NULL; |
1826 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; | 2124 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; |
1827 | int ret, i; | 2125 | unsigned int i; |
2126 | int err; | ||
1828 | 2127 | ||
1829 | if (args->buffer_count < 1) { | 2128 | if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { |
1830 | DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); | 2129 | DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); |
1831 | return -EINVAL; | 2130 | return -EINVAL; |
1832 | } | 2131 | } |
1833 | 2132 | ||
2133 | exec2.buffers_ptr = args->buffers_ptr; | ||
2134 | exec2.buffer_count = args->buffer_count; | ||
2135 | exec2.batch_start_offset = args->batch_start_offset; | ||
2136 | exec2.batch_len = args->batch_len; | ||
2137 | exec2.DR1 = args->DR1; | ||
2138 | exec2.DR4 = args->DR4; | ||
2139 | exec2.num_cliprects = args->num_cliprects; | ||
2140 | exec2.cliprects_ptr = args->cliprects_ptr; | ||
2141 | exec2.flags = I915_EXEC_RENDER; | ||
2142 | i915_execbuffer2_set_context_id(exec2, 0); | ||
2143 | |||
2144 | if (!i915_gem_check_execbuffer(&exec2)) | ||
2145 | return -EINVAL; | ||
2146 | |||
1834 | /* Copy in the exec list from userland */ | 2147 | /* Copy in the exec list from userland */ |
1835 | exec_list = kvmalloc_array(sizeof(*exec_list), args->buffer_count, GFP_KERNEL); | 2148 | exec_list = kvmalloc_array(args->buffer_count, sizeof(*exec_list), |
1836 | exec2_list = kvmalloc_array(sizeof(*exec2_list), args->buffer_count, GFP_KERNEL); | 2149 | __GFP_NOWARN | GFP_TEMPORARY); |
2150 | exec2_list = kvmalloc_array(args->buffer_count + 1, sz, | ||
2151 | __GFP_NOWARN | GFP_TEMPORARY); | ||
1837 | if (exec_list == NULL || exec2_list == NULL) { | 2152 | if (exec_list == NULL || exec2_list == NULL) { |
1838 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", | 2153 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", |
1839 | args->buffer_count); | 2154 | args->buffer_count); |
@@ -1841,12 +2156,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, | |||
1841 | kvfree(exec2_list); | 2156 | kvfree(exec2_list); |
1842 | return -ENOMEM; | 2157 | return -ENOMEM; |
1843 | } | 2158 | } |
1844 | ret = copy_from_user(exec_list, | 2159 | err = copy_from_user(exec_list, |
1845 | u64_to_user_ptr(args->buffers_ptr), | 2160 | u64_to_user_ptr(args->buffers_ptr), |
1846 | sizeof(*exec_list) * args->buffer_count); | 2161 | sizeof(*exec_list) * args->buffer_count); |
1847 | if (ret != 0) { | 2162 | if (err) { |
1848 | DRM_DEBUG("copy %d exec entries failed %d\n", | 2163 | DRM_DEBUG("copy %d exec entries failed %d\n", |
1849 | args->buffer_count, ret); | 2164 | args->buffer_count, err); |
1850 | kvfree(exec_list); | 2165 | kvfree(exec_list); |
1851 | kvfree(exec2_list); | 2166 | kvfree(exec2_list); |
1852 | return -EFAULT; | 2167 | return -EFAULT; |
@@ -1864,99 +2179,94 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, | |||
1864 | exec2_list[i].flags = 0; | 2179 | exec2_list[i].flags = 0; |
1865 | } | 2180 | } |
1866 | 2181 | ||
1867 | exec2.buffers_ptr = args->buffers_ptr; | 2182 | err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); |
1868 | exec2.buffer_count = args->buffer_count; | 2183 | if (exec2.flags & __EXEC_HAS_RELOC) { |
1869 | exec2.batch_start_offset = args->batch_start_offset; | ||
1870 | exec2.batch_len = args->batch_len; | ||
1871 | exec2.DR1 = args->DR1; | ||
1872 | exec2.DR4 = args->DR4; | ||
1873 | exec2.num_cliprects = args->num_cliprects; | ||
1874 | exec2.cliprects_ptr = args->cliprects_ptr; | ||
1875 | exec2.flags = I915_EXEC_RENDER; | ||
1876 | i915_execbuffer2_set_context_id(exec2, 0); | ||
1877 | |||
1878 | ret = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); | ||
1879 | if (!ret) { | ||
1880 | struct drm_i915_gem_exec_object __user *user_exec_list = | 2184 | struct drm_i915_gem_exec_object __user *user_exec_list = |
1881 | u64_to_user_ptr(args->buffers_ptr); | 2185 | u64_to_user_ptr(args->buffers_ptr); |
1882 | 2186 | ||
1883 | /* Copy the new buffer offsets back to the user's exec list. */ | 2187 | /* Copy the new buffer offsets back to the user's exec list. */ |
1884 | for (i = 0; i < args->buffer_count; i++) { | 2188 | for (i = 0; i < args->buffer_count; i++) { |
2189 | if (!(exec2_list[i].offset & UPDATE)) | ||
2190 | continue; | ||
2191 | |||
1885 | exec2_list[i].offset = | 2192 | exec2_list[i].offset = |
1886 | gen8_canonical_addr(exec2_list[i].offset); | 2193 | gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); |
1887 | ret = __copy_to_user(&user_exec_list[i].offset, | 2194 | exec2_list[i].offset &= PIN_OFFSET_MASK; |
1888 | &exec2_list[i].offset, | 2195 | if (__copy_to_user(&user_exec_list[i].offset, |
1889 | sizeof(user_exec_list[i].offset)); | 2196 | &exec2_list[i].offset, |
1890 | if (ret) { | 2197 | sizeof(user_exec_list[i].offset))) |
1891 | ret = -EFAULT; | ||
1892 | DRM_DEBUG("failed to copy %d exec entries " | ||
1893 | "back to user (%d)\n", | ||
1894 | args->buffer_count, ret); | ||
1895 | break; | 2198 | break; |
1896 | } | ||
1897 | } | 2199 | } |
1898 | } | 2200 | } |
1899 | 2201 | ||
1900 | kvfree(exec_list); | 2202 | kvfree(exec_list); |
1901 | kvfree(exec2_list); | 2203 | kvfree(exec2_list); |
1902 | return ret; | 2204 | return err; |
1903 | } | 2205 | } |
1904 | 2206 | ||
1905 | int | 2207 | int |
1906 | i915_gem_execbuffer2(struct drm_device *dev, void *data, | 2208 | i915_gem_execbuffer2(struct drm_device *dev, void *data, |
1907 | struct drm_file *file) | 2209 | struct drm_file *file) |
1908 | { | 2210 | { |
2211 | const size_t sz = sizeof(struct drm_i915_gem_exec_object2); | ||
1909 | struct drm_i915_gem_execbuffer2 *args = data; | 2212 | struct drm_i915_gem_execbuffer2 *args = data; |
1910 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; | 2213 | struct drm_i915_gem_exec_object2 *exec2_list; |
1911 | int ret; | 2214 | int err; |
1912 | 2215 | ||
1913 | if (args->buffer_count < 1 || | 2216 | if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { |
1914 | args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { | ||
1915 | DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); | 2217 | DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); |
1916 | return -EINVAL; | 2218 | return -EINVAL; |
1917 | } | 2219 | } |
1918 | 2220 | ||
1919 | exec2_list = kvmalloc_array(args->buffer_count, | 2221 | if (!i915_gem_check_execbuffer(args)) |
1920 | sizeof(*exec2_list), | 2222 | return -EINVAL; |
1921 | GFP_TEMPORARY); | 2223 | |
2224 | /* Allocate an extra slot for use by the command parser */ | ||
2225 | exec2_list = kvmalloc_array(args->buffer_count + 1, sz, | ||
2226 | __GFP_NOWARN | GFP_TEMPORARY); | ||
1922 | if (exec2_list == NULL) { | 2227 | if (exec2_list == NULL) { |
1923 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", | 2228 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", |
1924 | args->buffer_count); | 2229 | args->buffer_count); |
1925 | return -ENOMEM; | 2230 | return -ENOMEM; |
1926 | } | 2231 | } |
1927 | ret = copy_from_user(exec2_list, | 2232 | if (copy_from_user(exec2_list, |
1928 | u64_to_user_ptr(args->buffers_ptr), | 2233 | u64_to_user_ptr(args->buffers_ptr), |
1929 | sizeof(*exec2_list) * args->buffer_count); | 2234 | sizeof(*exec2_list) * args->buffer_count)) { |
1930 | if (ret != 0) { | 2235 | DRM_DEBUG("copy %d exec entries failed\n", args->buffer_count); |
1931 | DRM_DEBUG("copy %d exec entries failed %d\n", | ||
1932 | args->buffer_count, ret); | ||
1933 | kvfree(exec2_list); | 2236 | kvfree(exec2_list); |
1934 | return -EFAULT; | 2237 | return -EFAULT; |
1935 | } | 2238 | } |
1936 | 2239 | ||
1937 | ret = i915_gem_do_execbuffer(dev, file, args, exec2_list); | 2240 | err = i915_gem_do_execbuffer(dev, file, args, exec2_list); |
1938 | if (!ret) { | 2241 | |
1939 | /* Copy the new buffer offsets back to the user's exec list. */ | 2242 | /* |
2243 | * Now that we have begun execution of the batchbuffer, we ignore | ||
2244 | * any new error after this point. Also given that we have already | ||
2245 | * updated the associated relocations, we try to write out the current | ||
2246 | * object locations irrespective of any error. | ||
2247 | */ | ||
2248 | if (args->flags & __EXEC_HAS_RELOC) { | ||
1940 | struct drm_i915_gem_exec_object2 __user *user_exec_list = | 2249 | struct drm_i915_gem_exec_object2 __user *user_exec_list = |
1941 | u64_to_user_ptr(args->buffers_ptr); | 2250 | u64_to_user_ptr(args->buffers_ptr); |
1942 | int i; | 2251 | unsigned int i; |
1943 | 2252 | ||
2253 | /* Copy the new buffer offsets back to the user's exec list. */ | ||
2254 | user_access_begin(); | ||
1944 | for (i = 0; i < args->buffer_count; i++) { | 2255 | for (i = 0; i < args->buffer_count; i++) { |
2256 | if (!(exec2_list[i].offset & UPDATE)) | ||
2257 | continue; | ||
2258 | |||
1945 | exec2_list[i].offset = | 2259 | exec2_list[i].offset = |
1946 | gen8_canonical_addr(exec2_list[i].offset); | 2260 | gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); |
1947 | ret = __copy_to_user(&user_exec_list[i].offset, | 2261 | unsafe_put_user(exec2_list[i].offset, |
1948 | &exec2_list[i].offset, | 2262 | &user_exec_list[i].offset, |
1949 | sizeof(user_exec_list[i].offset)); | 2263 | end_user); |
1950 | if (ret) { | ||
1951 | ret = -EFAULT; | ||
1952 | DRM_DEBUG("failed to copy %d exec entries " | ||
1953 | "back to user\n", | ||
1954 | args->buffer_count); | ||
1955 | break; | ||
1956 | } | ||
1957 | } | 2264 | } |
2265 | end_user: | ||
2266 | user_access_end(); | ||
1958 | } | 2267 | } |
1959 | 2268 | ||
2269 | args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; | ||
1960 | kvfree(exec2_list); | 2270 | kvfree(exec2_list); |
1961 | return ret; | 2271 | return err; |
1962 | } | 2272 | } |