diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 1343 |
1 files changed, 1343 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c new file mode 100644 index 000000000000..61129e6759eb --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |||
@@ -0,0 +1,1343 @@ | |||
1 | /* | ||
2 | * Copyright © 2008,2010 Intel Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: | ||
24 | * Eric Anholt <eric@anholt.net> | ||
25 | * Chris Wilson <chris@chris-wilson.co.uk> | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include "drmP.h" | ||
30 | #include "drm.h" | ||
31 | #include "i915_drm.h" | ||
32 | #include "i915_drv.h" | ||
33 | #include "i915_trace.h" | ||
34 | #include "intel_drv.h" | ||
35 | |||
36 | struct change_domains { | ||
37 | uint32_t invalidate_domains; | ||
38 | uint32_t flush_domains; | ||
39 | uint32_t flush_rings; | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Set the next domain for the specified object. This | ||
44 | * may not actually perform the necessary flushing/invaliding though, | ||
45 | * as that may want to be batched with other set_domain operations | ||
46 | * | ||
47 | * This is (we hope) the only really tricky part of gem. The goal | ||
48 | * is fairly simple -- track which caches hold bits of the object | ||
49 | * and make sure they remain coherent. A few concrete examples may | ||
50 | * help to explain how it works. For shorthand, we use the notation | ||
51 | * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the | ||
52 | * a pair of read and write domain masks. | ||
53 | * | ||
54 | * Case 1: the batch buffer | ||
55 | * | ||
56 | * 1. Allocated | ||
57 | * 2. Written by CPU | ||
58 | * 3. Mapped to GTT | ||
59 | * 4. Read by GPU | ||
60 | * 5. Unmapped from GTT | ||
61 | * 6. Freed | ||
62 | * | ||
63 | * Let's take these a step at a time | ||
64 | * | ||
65 | * 1. Allocated | ||
66 | * Pages allocated from the kernel may still have | ||
67 | * cache contents, so we set them to (CPU, CPU) always. | ||
68 | * 2. Written by CPU (using pwrite) | ||
69 | * The pwrite function calls set_domain (CPU, CPU) and | ||
70 | * this function does nothing (as nothing changes) | ||
71 | * 3. Mapped by GTT | ||
72 | * This function asserts that the object is not | ||
73 | * currently in any GPU-based read or write domains | ||
74 | * 4. Read by GPU | ||
75 | * i915_gem_execbuffer calls set_domain (COMMAND, 0). | ||
76 | * As write_domain is zero, this function adds in the | ||
77 | * current read domains (CPU+COMMAND, 0). | ||
78 | * flush_domains is set to CPU. | ||
79 | * invalidate_domains is set to COMMAND | ||
80 | * clflush is run to get data out of the CPU caches | ||
81 | * then i915_dev_set_domain calls i915_gem_flush to | ||
82 | * emit an MI_FLUSH and drm_agp_chipset_flush | ||
83 | * 5. Unmapped from GTT | ||
84 | * i915_gem_object_unbind calls set_domain (CPU, CPU) | ||
85 | * flush_domains and invalidate_domains end up both zero | ||
86 | * so no flushing/invalidating happens | ||
87 | * 6. Freed | ||
88 | * yay, done | ||
89 | * | ||
90 | * Case 2: The shared render buffer | ||
91 | * | ||
92 | * 1. Allocated | ||
93 | * 2. Mapped to GTT | ||
94 | * 3. Read/written by GPU | ||
95 | * 4. set_domain to (CPU,CPU) | ||
96 | * 5. Read/written by CPU | ||
97 | * 6. Read/written by GPU | ||
98 | * | ||
99 | * 1. Allocated | ||
100 | * Same as last example, (CPU, CPU) | ||
101 | * 2. Mapped to GTT | ||
102 | * Nothing changes (assertions find that it is not in the GPU) | ||
103 | * 3. Read/written by GPU | ||
104 | * execbuffer calls set_domain (RENDER, RENDER) | ||
105 | * flush_domains gets CPU | ||
106 | * invalidate_domains gets GPU | ||
107 | * clflush (obj) | ||
108 | * MI_FLUSH and drm_agp_chipset_flush | ||
109 | * 4. set_domain (CPU, CPU) | ||
110 | * flush_domains gets GPU | ||
111 | * invalidate_domains gets CPU | ||
112 | * wait_rendering (obj) to make sure all drawing is complete. | ||
113 | * This will include an MI_FLUSH to get the data from GPU | ||
114 | * to memory | ||
115 | * clflush (obj) to invalidate the CPU cache | ||
116 | * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) | ||
117 | * 5. Read/written by CPU | ||
118 | * cache lines are loaded and dirtied | ||
119 | * 6. Read written by GPU | ||
120 | * Same as last GPU access | ||
121 | * | ||
122 | * Case 3: The constant buffer | ||
123 | * | ||
124 | * 1. Allocated | ||
125 | * 2. Written by CPU | ||
126 | * 3. Read by GPU | ||
127 | * 4. Updated (written) by CPU again | ||
128 | * 5. Read by GPU | ||
129 | * | ||
130 | * 1. Allocated | ||
131 | * (CPU, CPU) | ||
132 | * 2. Written by CPU | ||
133 | * (CPU, CPU) | ||
134 | * 3. Read by GPU | ||
135 | * (CPU+RENDER, 0) | ||
136 | * flush_domains = CPU | ||
137 | * invalidate_domains = RENDER | ||
138 | * clflush (obj) | ||
139 | * MI_FLUSH | ||
140 | * drm_agp_chipset_flush | ||
141 | * 4. Updated (written) by CPU again | ||
142 | * (CPU, CPU) | ||
143 | * flush_domains = 0 (no previous write domain) | ||
144 | * invalidate_domains = 0 (no new read domains) | ||
145 | * 5. Read by GPU | ||
146 | * (CPU+RENDER, 0) | ||
147 | * flush_domains = CPU | ||
148 | * invalidate_domains = RENDER | ||
149 | * clflush (obj) | ||
150 | * MI_FLUSH | ||
151 | * drm_agp_chipset_flush | ||
152 | */ | ||
153 | static void | ||
154 | i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, | ||
155 | struct intel_ring_buffer *ring, | ||
156 | struct change_domains *cd) | ||
157 | { | ||
158 | uint32_t invalidate_domains = 0, flush_domains = 0; | ||
159 | |||
160 | /* | ||
161 | * If the object isn't moving to a new write domain, | ||
162 | * let the object stay in multiple read domains | ||
163 | */ | ||
164 | if (obj->base.pending_write_domain == 0) | ||
165 | obj->base.pending_read_domains |= obj->base.read_domains; | ||
166 | |||
167 | /* | ||
168 | * Flush the current write domain if | ||
169 | * the new read domains don't match. Invalidate | ||
170 | * any read domains which differ from the old | ||
171 | * write domain | ||
172 | */ | ||
173 | if (obj->base.write_domain && | ||
174 | (((obj->base.write_domain != obj->base.pending_read_domains || | ||
175 | obj->ring != ring)) || | ||
176 | (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { | ||
177 | flush_domains |= obj->base.write_domain; | ||
178 | invalidate_domains |= | ||
179 | obj->base.pending_read_domains & ~obj->base.write_domain; | ||
180 | } | ||
181 | /* | ||
182 | * Invalidate any read caches which may have | ||
183 | * stale data. That is, any new read domains. | ||
184 | */ | ||
185 | invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; | ||
186 | if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) | ||
187 | i915_gem_clflush_object(obj); | ||
188 | |||
189 | /* blow away mappings if mapped through GTT */ | ||
190 | if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) | ||
191 | i915_gem_release_mmap(obj); | ||
192 | |||
193 | /* The actual obj->write_domain will be updated with | ||
194 | * pending_write_domain after we emit the accumulated flush for all | ||
195 | * of our domain changes in execbuffers (which clears objects' | ||
196 | * write_domains). So if we have a current write domain that we | ||
197 | * aren't changing, set pending_write_domain to that. | ||
198 | */ | ||
199 | if (flush_domains == 0 && obj->base.pending_write_domain == 0) | ||
200 | obj->base.pending_write_domain = obj->base.write_domain; | ||
201 | |||
202 | cd->invalidate_domains |= invalidate_domains; | ||
203 | cd->flush_domains |= flush_domains; | ||
204 | if (flush_domains & I915_GEM_GPU_DOMAINS) | ||
205 | cd->flush_rings |= obj->ring->id; | ||
206 | if (invalidate_domains & I915_GEM_GPU_DOMAINS) | ||
207 | cd->flush_rings |= ring->id; | ||
208 | } | ||
209 | |||
210 | struct eb_objects { | ||
211 | int and; | ||
212 | struct hlist_head buckets[0]; | ||
213 | }; | ||
214 | |||
215 | static struct eb_objects * | ||
216 | eb_create(int size) | ||
217 | { | ||
218 | struct eb_objects *eb; | ||
219 | int count = PAGE_SIZE / sizeof(struct hlist_head) / 2; | ||
220 | while (count > size) | ||
221 | count >>= 1; | ||
222 | eb = kzalloc(count*sizeof(struct hlist_head) + | ||
223 | sizeof(struct eb_objects), | ||
224 | GFP_KERNEL); | ||
225 | if (eb == NULL) | ||
226 | return eb; | ||
227 | |||
228 | eb->and = count - 1; | ||
229 | return eb; | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | eb_reset(struct eb_objects *eb) | ||
234 | { | ||
235 | memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); | ||
236 | } | ||
237 | |||
238 | static void | ||
239 | eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) | ||
240 | { | ||
241 | hlist_add_head(&obj->exec_node, | ||
242 | &eb->buckets[obj->exec_handle & eb->and]); | ||
243 | } | ||
244 | |||
245 | static struct drm_i915_gem_object * | ||
246 | eb_get_object(struct eb_objects *eb, unsigned long handle) | ||
247 | { | ||
248 | struct hlist_head *head; | ||
249 | struct hlist_node *node; | ||
250 | struct drm_i915_gem_object *obj; | ||
251 | |||
252 | head = &eb->buckets[handle & eb->and]; | ||
253 | hlist_for_each(node, head) { | ||
254 | obj = hlist_entry(node, struct drm_i915_gem_object, exec_node); | ||
255 | if (obj->exec_handle == handle) | ||
256 | return obj; | ||
257 | } | ||
258 | |||
259 | return NULL; | ||
260 | } | ||
261 | |||
262 | static void | ||
263 | eb_destroy(struct eb_objects *eb) | ||
264 | { | ||
265 | kfree(eb); | ||
266 | } | ||
267 | |||
268 | static int | ||
269 | i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, | ||
270 | struct eb_objects *eb, | ||
271 | struct drm_i915_gem_exec_object2 *entry, | ||
272 | struct drm_i915_gem_relocation_entry *reloc) | ||
273 | { | ||
274 | struct drm_device *dev = obj->base.dev; | ||
275 | struct drm_gem_object *target_obj; | ||
276 | uint32_t target_offset; | ||
277 | int ret = -EINVAL; | ||
278 | |||
279 | /* we've already hold a reference to all valid objects */ | ||
280 | target_obj = &eb_get_object(eb, reloc->target_handle)->base; | ||
281 | if (unlikely(target_obj == NULL)) | ||
282 | return -ENOENT; | ||
283 | |||
284 | target_offset = to_intel_bo(target_obj)->gtt_offset; | ||
285 | |||
286 | #if WATCH_RELOC | ||
287 | DRM_INFO("%s: obj %p offset %08x target %d " | ||
288 | "read %08x write %08x gtt %08x " | ||
289 | "presumed %08x delta %08x\n", | ||
290 | __func__, | ||
291 | obj, | ||
292 | (int) reloc->offset, | ||
293 | (int) reloc->target_handle, | ||
294 | (int) reloc->read_domains, | ||
295 | (int) reloc->write_domain, | ||
296 | (int) target_offset, | ||
297 | (int) reloc->presumed_offset, | ||
298 | reloc->delta); | ||
299 | #endif | ||
300 | |||
301 | /* The target buffer should have appeared before us in the | ||
302 | * exec_object list, so it should have a GTT space bound by now. | ||
303 | */ | ||
304 | if (unlikely(target_offset == 0)) { | ||
305 | DRM_ERROR("No GTT space found for object %d\n", | ||
306 | reloc->target_handle); | ||
307 | return ret; | ||
308 | } | ||
309 | |||
310 | /* Validate that the target is in a valid r/w GPU domain */ | ||
311 | if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { | ||
312 | DRM_ERROR("reloc with multiple write domains: " | ||
313 | "obj %p target %d offset %d " | ||
314 | "read %08x write %08x", | ||
315 | obj, reloc->target_handle, | ||
316 | (int) reloc->offset, | ||
317 | reloc->read_domains, | ||
318 | reloc->write_domain); | ||
319 | return ret; | ||
320 | } | ||
321 | if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) { | ||
322 | DRM_ERROR("reloc with read/write CPU domains: " | ||
323 | "obj %p target %d offset %d " | ||
324 | "read %08x write %08x", | ||
325 | obj, reloc->target_handle, | ||
326 | (int) reloc->offset, | ||
327 | reloc->read_domains, | ||
328 | reloc->write_domain); | ||
329 | return ret; | ||
330 | } | ||
331 | if (unlikely(reloc->write_domain && target_obj->pending_write_domain && | ||
332 | reloc->write_domain != target_obj->pending_write_domain)) { | ||
333 | DRM_ERROR("Write domain conflict: " | ||
334 | "obj %p target %d offset %d " | ||
335 | "new %08x old %08x\n", | ||
336 | obj, reloc->target_handle, | ||
337 | (int) reloc->offset, | ||
338 | reloc->write_domain, | ||
339 | target_obj->pending_write_domain); | ||
340 | return ret; | ||
341 | } | ||
342 | |||
343 | target_obj->pending_read_domains |= reloc->read_domains; | ||
344 | target_obj->pending_write_domain |= reloc->write_domain; | ||
345 | |||
346 | /* If the relocation already has the right value in it, no | ||
347 | * more work needs to be done. | ||
348 | */ | ||
349 | if (target_offset == reloc->presumed_offset) | ||
350 | return 0; | ||
351 | |||
352 | /* Check that the relocation address is valid... */ | ||
353 | if (unlikely(reloc->offset > obj->base.size - 4)) { | ||
354 | DRM_ERROR("Relocation beyond object bounds: " | ||
355 | "obj %p target %d offset %d size %d.\n", | ||
356 | obj, reloc->target_handle, | ||
357 | (int) reloc->offset, | ||
358 | (int) obj->base.size); | ||
359 | return ret; | ||
360 | } | ||
361 | if (unlikely(reloc->offset & 3)) { | ||
362 | DRM_ERROR("Relocation not 4-byte aligned: " | ||
363 | "obj %p target %d offset %d.\n", | ||
364 | obj, reloc->target_handle, | ||
365 | (int) reloc->offset); | ||
366 | return ret; | ||
367 | } | ||
368 | |||
369 | /* and points to somewhere within the target object. */ | ||
370 | if (unlikely(reloc->delta >= target_obj->size)) { | ||
371 | DRM_ERROR("Relocation beyond target object bounds: " | ||
372 | "obj %p target %d delta %d size %d.\n", | ||
373 | obj, reloc->target_handle, | ||
374 | (int) reloc->delta, | ||
375 | (int) target_obj->size); | ||
376 | return ret; | ||
377 | } | ||
378 | |||
379 | reloc->delta += target_offset; | ||
380 | if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) { | ||
381 | uint32_t page_offset = reloc->offset & ~PAGE_MASK; | ||
382 | char *vaddr; | ||
383 | |||
384 | vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]); | ||
385 | *(uint32_t *)(vaddr + page_offset) = reloc->delta; | ||
386 | kunmap_atomic(vaddr); | ||
387 | } else { | ||
388 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
389 | uint32_t __iomem *reloc_entry; | ||
390 | void __iomem *reloc_page; | ||
391 | |||
392 | ret = i915_gem_object_set_to_gtt_domain(obj, 1); | ||
393 | if (ret) | ||
394 | return ret; | ||
395 | |||
396 | /* Map the page containing the relocation we're going to perform. */ | ||
397 | reloc->offset += obj->gtt_offset; | ||
398 | reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, | ||
399 | reloc->offset & PAGE_MASK); | ||
400 | reloc_entry = (uint32_t __iomem *) | ||
401 | (reloc_page + (reloc->offset & ~PAGE_MASK)); | ||
402 | iowrite32(reloc->delta, reloc_entry); | ||
403 | io_mapping_unmap_atomic(reloc_page); | ||
404 | } | ||
405 | |||
406 | /* and update the user's relocation entry */ | ||
407 | reloc->presumed_offset = target_offset; | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | static int | ||
413 | i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, | ||
414 | struct eb_objects *eb, | ||
415 | struct drm_i915_gem_exec_object2 *entry) | ||
416 | { | ||
417 | struct drm_i915_gem_relocation_entry __user *user_relocs; | ||
418 | int i, ret; | ||
419 | |||
420 | user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; | ||
421 | for (i = 0; i < entry->relocation_count; i++) { | ||
422 | struct drm_i915_gem_relocation_entry reloc; | ||
423 | |||
424 | if (__copy_from_user_inatomic(&reloc, | ||
425 | user_relocs+i, | ||
426 | sizeof(reloc))) | ||
427 | return -EFAULT; | ||
428 | |||
429 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &reloc); | ||
430 | if (ret) | ||
431 | return ret; | ||
432 | |||
433 | if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, | ||
434 | &reloc.presumed_offset, | ||
435 | sizeof(reloc.presumed_offset))) | ||
436 | return -EFAULT; | ||
437 | } | ||
438 | |||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | static int | ||
443 | i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, | ||
444 | struct eb_objects *eb, | ||
445 | struct drm_i915_gem_exec_object2 *entry, | ||
446 | struct drm_i915_gem_relocation_entry *relocs) | ||
447 | { | ||
448 | int i, ret; | ||
449 | |||
450 | for (i = 0; i < entry->relocation_count; i++) { | ||
451 | ret = i915_gem_execbuffer_relocate_entry(obj, eb, entry, &relocs[i]); | ||
452 | if (ret) | ||
453 | return ret; | ||
454 | } | ||
455 | |||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | static int | ||
460 | i915_gem_execbuffer_relocate(struct drm_device *dev, | ||
461 | struct eb_objects *eb, | ||
462 | struct list_head *objects, | ||
463 | struct drm_i915_gem_exec_object2 *exec) | ||
464 | { | ||
465 | struct drm_i915_gem_object *obj; | ||
466 | int ret; | ||
467 | |||
468 | list_for_each_entry(obj, objects, exec_list) { | ||
469 | obj->base.pending_read_domains = 0; | ||
470 | obj->base.pending_write_domain = 0; | ||
471 | ret = i915_gem_execbuffer_relocate_object(obj, eb, exec++); | ||
472 | if (ret) | ||
473 | return ret; | ||
474 | } | ||
475 | |||
476 | return 0; | ||
477 | } | ||
478 | |||
479 | static int | ||
480 | i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, | ||
481 | struct drm_file *file, | ||
482 | struct list_head *objects, | ||
483 | struct drm_i915_gem_exec_object2 *exec) | ||
484 | { | ||
485 | struct drm_i915_gem_object *obj; | ||
486 | struct drm_i915_gem_exec_object2 *entry; | ||
487 | int ret, retry; | ||
488 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; | ||
489 | |||
490 | /* Attempt to pin all of the buffers into the GTT. | ||
491 | * This is done in 3 phases: | ||
492 | * | ||
493 | * 1a. Unbind all objects that do not match the GTT constraints for | ||
494 | * the execbuffer (fenceable, mappable, alignment etc). | ||
495 | * 1b. Increment pin count for already bound objects. | ||
496 | * 2. Bind new objects. | ||
497 | * 3. Decrement pin count. | ||
498 | * | ||
499 | * This avoid unnecessary unbinding of later objects in order to makr | ||
500 | * room for the earlier objects *unless* we need to defragment. | ||
501 | */ | ||
502 | retry = 0; | ||
503 | do { | ||
504 | ret = 0; | ||
505 | |||
506 | /* Unbind any ill-fitting objects or pin. */ | ||
507 | entry = exec; | ||
508 | list_for_each_entry(obj, objects, exec_list) { | ||
509 | bool need_fence, need_mappable; | ||
510 | |||
511 | if (!obj->gtt_space) { | ||
512 | entry++; | ||
513 | continue; | ||
514 | } | ||
515 | |||
516 | need_fence = | ||
517 | has_fenced_gpu_access && | ||
518 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && | ||
519 | obj->tiling_mode != I915_TILING_NONE; | ||
520 | need_mappable = | ||
521 | entry->relocation_count ? true : need_fence; | ||
522 | |||
523 | if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || | ||
524 | (need_mappable && !obj->map_and_fenceable)) | ||
525 | ret = i915_gem_object_unbind(obj); | ||
526 | else | ||
527 | ret = i915_gem_object_pin(obj, | ||
528 | entry->alignment, | ||
529 | need_mappable); | ||
530 | if (ret) | ||
531 | goto err; | ||
532 | |||
533 | entry++; | ||
534 | } | ||
535 | |||
536 | /* Bind fresh objects */ | ||
537 | entry = exec; | ||
538 | list_for_each_entry(obj, objects, exec_list) { | ||
539 | bool need_fence; | ||
540 | |||
541 | need_fence = | ||
542 | has_fenced_gpu_access && | ||
543 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && | ||
544 | obj->tiling_mode != I915_TILING_NONE; | ||
545 | |||
546 | if (!obj->gtt_space) { | ||
547 | bool need_mappable = | ||
548 | entry->relocation_count ? true : need_fence; | ||
549 | |||
550 | ret = i915_gem_object_pin(obj, | ||
551 | entry->alignment, | ||
552 | need_mappable); | ||
553 | if (ret) | ||
554 | break; | ||
555 | } | ||
556 | |||
557 | if (has_fenced_gpu_access) { | ||
558 | if (need_fence) { | ||
559 | ret = i915_gem_object_get_fence(obj, ring, 1); | ||
560 | if (ret) | ||
561 | break; | ||
562 | } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE && | ||
563 | obj->tiling_mode == I915_TILING_NONE) { | ||
564 | /* XXX pipelined! */ | ||
565 | ret = i915_gem_object_put_fence(obj); | ||
566 | if (ret) | ||
567 | break; | ||
568 | } | ||
569 | obj->pending_fenced_gpu_access = need_fence; | ||
570 | } | ||
571 | |||
572 | entry->offset = obj->gtt_offset; | ||
573 | entry++; | ||
574 | } | ||
575 | |||
576 | /* Decrement pin count for bound objects */ | ||
577 | list_for_each_entry(obj, objects, exec_list) { | ||
578 | if (obj->gtt_space) | ||
579 | i915_gem_object_unpin(obj); | ||
580 | } | ||
581 | |||
582 | if (ret != -ENOSPC || retry > 1) | ||
583 | return ret; | ||
584 | |||
585 | /* First attempt, just clear anything that is purgeable. | ||
586 | * Second attempt, clear the entire GTT. | ||
587 | */ | ||
588 | ret = i915_gem_evict_everything(ring->dev, retry == 0); | ||
589 | if (ret) | ||
590 | return ret; | ||
591 | |||
592 | retry++; | ||
593 | } while (1); | ||
594 | |||
595 | err: | ||
596 | obj = list_entry(obj->exec_list.prev, | ||
597 | struct drm_i915_gem_object, | ||
598 | exec_list); | ||
599 | while (objects != &obj->exec_list) { | ||
600 | if (obj->gtt_space) | ||
601 | i915_gem_object_unpin(obj); | ||
602 | |||
603 | obj = list_entry(obj->exec_list.prev, | ||
604 | struct drm_i915_gem_object, | ||
605 | exec_list); | ||
606 | } | ||
607 | |||
608 | return ret; | ||
609 | } | ||
610 | |||
611 | static int | ||
612 | i915_gem_execbuffer_relocate_slow(struct drm_device *dev, | ||
613 | struct drm_file *file, | ||
614 | struct intel_ring_buffer *ring, | ||
615 | struct list_head *objects, | ||
616 | struct eb_objects *eb, | ||
617 | struct drm_i915_gem_exec_object2 *exec, | ||
618 | int count) | ||
619 | { | ||
620 | struct drm_i915_gem_relocation_entry *reloc; | ||
621 | struct drm_i915_gem_object *obj; | ||
622 | int i, total, ret; | ||
623 | |||
624 | /* We may process another execbuffer during the unlock... */ | ||
625 | while (list_empty(objects)) { | ||
626 | obj = list_first_entry(objects, | ||
627 | struct drm_i915_gem_object, | ||
628 | exec_list); | ||
629 | list_del_init(&obj->exec_list); | ||
630 | drm_gem_object_unreference(&obj->base); | ||
631 | } | ||
632 | |||
633 | mutex_unlock(&dev->struct_mutex); | ||
634 | |||
635 | total = 0; | ||
636 | for (i = 0; i < count; i++) | ||
637 | total += exec[i].relocation_count; | ||
638 | |||
639 | reloc = drm_malloc_ab(total, sizeof(*reloc)); | ||
640 | if (reloc == NULL) { | ||
641 | mutex_lock(&dev->struct_mutex); | ||
642 | return -ENOMEM; | ||
643 | } | ||
644 | |||
645 | total = 0; | ||
646 | for (i = 0; i < count; i++) { | ||
647 | struct drm_i915_gem_relocation_entry __user *user_relocs; | ||
648 | |||
649 | user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr; | ||
650 | |||
651 | if (copy_from_user(reloc+total, user_relocs, | ||
652 | exec[i].relocation_count * sizeof(*reloc))) { | ||
653 | ret = -EFAULT; | ||
654 | mutex_lock(&dev->struct_mutex); | ||
655 | goto err; | ||
656 | } | ||
657 | |||
658 | total += exec[i].relocation_count; | ||
659 | } | ||
660 | |||
661 | ret = i915_mutex_lock_interruptible(dev); | ||
662 | if (ret) { | ||
663 | mutex_lock(&dev->struct_mutex); | ||
664 | goto err; | ||
665 | } | ||
666 | |||
667 | /* reacquire the objects */ | ||
668 | INIT_LIST_HEAD(objects); | ||
669 | eb_reset(eb); | ||
670 | for (i = 0; i < count; i++) { | ||
671 | struct drm_i915_gem_object *obj; | ||
672 | |||
673 | obj = to_intel_bo(drm_gem_object_lookup(dev, file, | ||
674 | exec[i].handle)); | ||
675 | if (obj == NULL) { | ||
676 | DRM_ERROR("Invalid object handle %d at index %d\n", | ||
677 | exec[i].handle, i); | ||
678 | ret = -ENOENT; | ||
679 | goto err; | ||
680 | } | ||
681 | |||
682 | list_add_tail(&obj->exec_list, objects); | ||
683 | obj->exec_handle = exec[i].handle; | ||
684 | eb_add_object(eb, obj); | ||
685 | } | ||
686 | |||
687 | ret = i915_gem_execbuffer_reserve(ring, file, objects, exec); | ||
688 | if (ret) | ||
689 | goto err; | ||
690 | |||
691 | total = 0; | ||
692 | list_for_each_entry(obj, objects, exec_list) { | ||
693 | obj->base.pending_read_domains = 0; | ||
694 | obj->base.pending_write_domain = 0; | ||
695 | ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, | ||
696 | exec, | ||
697 | reloc + total); | ||
698 | if (ret) | ||
699 | goto err; | ||
700 | |||
701 | total += exec->relocation_count; | ||
702 | exec++; | ||
703 | } | ||
704 | |||
705 | /* Leave the user relocations as are, this is the painfully slow path, | ||
706 | * and we want to avoid the complication of dropping the lock whilst | ||
707 | * having buffers reserved in the aperture and so causing spurious | ||
708 | * ENOSPC for random operations. | ||
709 | */ | ||
710 | |||
711 | err: | ||
712 | drm_free_large(reloc); | ||
713 | return ret; | ||
714 | } | ||
715 | |||
716 | static void | ||
717 | i915_gem_execbuffer_flush(struct drm_device *dev, | ||
718 | uint32_t invalidate_domains, | ||
719 | uint32_t flush_domains, | ||
720 | uint32_t flush_rings) | ||
721 | { | ||
722 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
723 | int i; | ||
724 | |||
725 | if (flush_domains & I915_GEM_DOMAIN_CPU) | ||
726 | intel_gtt_chipset_flush(); | ||
727 | |||
728 | if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { | ||
729 | for (i = 0; i < I915_NUM_RINGS; i++) | ||
730 | if (flush_rings & (1 << i)) | ||
731 | i915_gem_flush_ring(dev, &dev_priv->ring[i], | ||
732 | invalidate_domains, | ||
733 | flush_domains); | ||
734 | } | ||
735 | } | ||
736 | |||
737 | static int | ||
738 | i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj, | ||
739 | struct intel_ring_buffer *to) | ||
740 | { | ||
741 | struct intel_ring_buffer *from = obj->ring; | ||
742 | u32 seqno; | ||
743 | int ret, idx; | ||
744 | |||
745 | if (from == NULL || to == from) | ||
746 | return 0; | ||
747 | |||
748 | if (INTEL_INFO(obj->base.dev)->gen < 6) | ||
749 | return i915_gem_object_wait_rendering(obj, true); | ||
750 | |||
751 | idx = intel_ring_sync_index(from, to); | ||
752 | |||
753 | seqno = obj->last_rendering_seqno; | ||
754 | if (seqno <= from->sync_seqno[idx]) | ||
755 | return 0; | ||
756 | |||
757 | if (seqno == from->outstanding_lazy_request) { | ||
758 | struct drm_i915_gem_request *request; | ||
759 | |||
760 | request = kzalloc(sizeof(*request), GFP_KERNEL); | ||
761 | if (request == NULL) | ||
762 | return -ENOMEM; | ||
763 | |||
764 | ret = i915_add_request(obj->base.dev, NULL, request, from); | ||
765 | if (ret) { | ||
766 | kfree(request); | ||
767 | return ret; | ||
768 | } | ||
769 | |||
770 | seqno = request->seqno; | ||
771 | } | ||
772 | |||
773 | from->sync_seqno[idx] = seqno; | ||
774 | return intel_ring_sync(to, from, seqno - 1); | ||
775 | } | ||
776 | |||
777 | static int | ||
778 | i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, | ||
779 | struct list_head *objects) | ||
780 | { | ||
781 | struct drm_i915_gem_object *obj; | ||
782 | struct change_domains cd; | ||
783 | int ret; | ||
784 | |||
785 | cd.invalidate_domains = 0; | ||
786 | cd.flush_domains = 0; | ||
787 | cd.flush_rings = 0; | ||
788 | list_for_each_entry(obj, objects, exec_list) | ||
789 | i915_gem_object_set_to_gpu_domain(obj, ring, &cd); | ||
790 | |||
791 | if (cd.invalidate_domains | cd.flush_domains) { | ||
792 | #if WATCH_EXEC | ||
793 | DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", | ||
794 | __func__, | ||
795 | cd.invalidate_domains, | ||
796 | cd.flush_domains); | ||
797 | #endif | ||
798 | i915_gem_execbuffer_flush(ring->dev, | ||
799 | cd.invalidate_domains, | ||
800 | cd.flush_domains, | ||
801 | cd.flush_rings); | ||
802 | } | ||
803 | |||
804 | list_for_each_entry(obj, objects, exec_list) { | ||
805 | ret = i915_gem_execbuffer_sync_rings(obj, ring); | ||
806 | if (ret) | ||
807 | return ret; | ||
808 | } | ||
809 | |||
810 | return 0; | ||
811 | } | ||
812 | |||
813 | static bool | ||
814 | i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) | ||
815 | { | ||
816 | return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; | ||
817 | } | ||
818 | |||
819 | static int | ||
820 | validate_exec_list(struct drm_i915_gem_exec_object2 *exec, | ||
821 | int count) | ||
822 | { | ||
823 | int i; | ||
824 | |||
825 | for (i = 0; i < count; i++) { | ||
826 | char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr; | ||
827 | int length; /* limited by fault_in_pages_readable() */ | ||
828 | |||
829 | /* First check for malicious input causing overflow */ | ||
830 | if (exec[i].relocation_count > | ||
831 | INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) | ||
832 | return -EINVAL; | ||
833 | |||
834 | length = exec[i].relocation_count * | ||
835 | sizeof(struct drm_i915_gem_relocation_entry); | ||
836 | if (!access_ok(VERIFY_READ, ptr, length)) | ||
837 | return -EFAULT; | ||
838 | |||
839 | /* we may also need to update the presumed offsets */ | ||
840 | if (!access_ok(VERIFY_WRITE, ptr, length)) | ||
841 | return -EFAULT; | ||
842 | |||
843 | if (fault_in_pages_readable(ptr, length)) | ||
844 | return -EFAULT; | ||
845 | } | ||
846 | |||
847 | return 0; | ||
848 | } | ||
849 | |||
850 | static int | ||
851 | i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, | ||
852 | struct list_head *objects) | ||
853 | { | ||
854 | struct drm_i915_gem_object *obj; | ||
855 | int flips; | ||
856 | |||
857 | /* Check for any pending flips. As we only maintain a flip queue depth | ||
858 | * of 1, we can simply insert a WAIT for the next display flip prior | ||
859 | * to executing the batch and avoid stalling the CPU. | ||
860 | */ | ||
861 | flips = 0; | ||
862 | list_for_each_entry(obj, objects, exec_list) { | ||
863 | if (obj->base.write_domain) | ||
864 | flips |= atomic_read(&obj->pending_flip); | ||
865 | } | ||
866 | if (flips) { | ||
867 | int plane, flip_mask, ret; | ||
868 | |||
869 | for (plane = 0; flips >> plane; plane++) { | ||
870 | if (((flips >> plane) & 1) == 0) | ||
871 | continue; | ||
872 | |||
873 | if (plane) | ||
874 | flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; | ||
875 | else | ||
876 | flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; | ||
877 | |||
878 | ret = intel_ring_begin(ring, 2); | ||
879 | if (ret) | ||
880 | return ret; | ||
881 | |||
882 | intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); | ||
883 | intel_ring_emit(ring, MI_NOOP); | ||
884 | intel_ring_advance(ring); | ||
885 | } | ||
886 | } | ||
887 | |||
888 | return 0; | ||
889 | } | ||
890 | |||
891 | static void | ||
892 | i915_gem_execbuffer_move_to_active(struct list_head *objects, | ||
893 | struct intel_ring_buffer *ring, | ||
894 | u32 seqno) | ||
895 | { | ||
896 | struct drm_i915_gem_object *obj; | ||
897 | |||
898 | list_for_each_entry(obj, objects, exec_list) { | ||
899 | obj->base.read_domains = obj->base.pending_read_domains; | ||
900 | obj->base.write_domain = obj->base.pending_write_domain; | ||
901 | obj->fenced_gpu_access = obj->pending_fenced_gpu_access; | ||
902 | |||
903 | i915_gem_object_move_to_active(obj, ring, seqno); | ||
904 | if (obj->base.write_domain) { | ||
905 | obj->dirty = 1; | ||
906 | obj->pending_gpu_write = true; | ||
907 | list_move_tail(&obj->gpu_write_list, | ||
908 | &ring->gpu_write_list); | ||
909 | intel_mark_busy(ring->dev, obj); | ||
910 | } | ||
911 | |||
912 | trace_i915_gem_object_change_domain(obj, | ||
913 | obj->base.read_domains, | ||
914 | obj->base.write_domain); | ||
915 | } | ||
916 | } | ||
917 | |||
918 | static void | ||
919 | i915_gem_execbuffer_retire_commands(struct drm_device *dev, | ||
920 | struct drm_file *file, | ||
921 | struct intel_ring_buffer *ring) | ||
922 | { | ||
923 | struct drm_i915_gem_request *request; | ||
924 | u32 flush_domains; | ||
925 | |||
926 | /* | ||
927 | * Ensure that the commands in the batch buffer are | ||
928 | * finished before the interrupt fires. | ||
929 | * | ||
930 | * The sampler always gets flushed on i965 (sigh). | ||
931 | */ | ||
932 | flush_domains = 0; | ||
933 | if (INTEL_INFO(dev)->gen >= 4) | ||
934 | flush_domains |= I915_GEM_DOMAIN_SAMPLER; | ||
935 | |||
936 | ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains); | ||
937 | |||
938 | /* Add a breadcrumb for the completion of the batch buffer */ | ||
939 | request = kzalloc(sizeof(*request), GFP_KERNEL); | ||
940 | if (request == NULL || i915_add_request(dev, file, request, ring)) { | ||
941 | i915_gem_next_request_seqno(dev, ring); | ||
942 | kfree(request); | ||
943 | } | ||
944 | } | ||
945 | |||
946 | static int | ||
947 | i915_gem_do_execbuffer(struct drm_device *dev, void *data, | ||
948 | struct drm_file *file, | ||
949 | struct drm_i915_gem_execbuffer2 *args, | ||
950 | struct drm_i915_gem_exec_object2 *exec) | ||
951 | { | ||
952 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
953 | struct list_head objects; | ||
954 | struct eb_objects *eb; | ||
955 | struct drm_i915_gem_object *batch_obj; | ||
956 | struct drm_clip_rect *cliprects = NULL; | ||
957 | struct intel_ring_buffer *ring; | ||
958 | u32 exec_start, exec_len; | ||
959 | u32 seqno; | ||
960 | int ret, mode, i; | ||
961 | |||
962 | if (!i915_gem_check_execbuffer(args)) { | ||
963 | DRM_ERROR("execbuf with invalid offset/length\n"); | ||
964 | return -EINVAL; | ||
965 | } | ||
966 | |||
967 | ret = validate_exec_list(exec, args->buffer_count); | ||
968 | if (ret) | ||
969 | return ret; | ||
970 | |||
971 | #if WATCH_EXEC | ||
972 | DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", | ||
973 | (int) args->buffers_ptr, args->buffer_count, args->batch_len); | ||
974 | #endif | ||
975 | switch (args->flags & I915_EXEC_RING_MASK) { | ||
976 | case I915_EXEC_DEFAULT: | ||
977 | case I915_EXEC_RENDER: | ||
978 | ring = &dev_priv->ring[RCS]; | ||
979 | break; | ||
980 | case I915_EXEC_BSD: | ||
981 | if (!HAS_BSD(dev)) { | ||
982 | DRM_ERROR("execbuf with invalid ring (BSD)\n"); | ||
983 | return -EINVAL; | ||
984 | } | ||
985 | ring = &dev_priv->ring[VCS]; | ||
986 | break; | ||
987 | case I915_EXEC_BLT: | ||
988 | if (!HAS_BLT(dev)) { | ||
989 | DRM_ERROR("execbuf with invalid ring (BLT)\n"); | ||
990 | return -EINVAL; | ||
991 | } | ||
992 | ring = &dev_priv->ring[BCS]; | ||
993 | break; | ||
994 | default: | ||
995 | DRM_ERROR("execbuf with unknown ring: %d\n", | ||
996 | (int)(args->flags & I915_EXEC_RING_MASK)); | ||
997 | return -EINVAL; | ||
998 | } | ||
999 | |||
1000 | mode = args->flags & I915_EXEC_CONSTANTS_MASK; | ||
1001 | switch (mode) { | ||
1002 | case I915_EXEC_CONSTANTS_REL_GENERAL: | ||
1003 | case I915_EXEC_CONSTANTS_ABSOLUTE: | ||
1004 | case I915_EXEC_CONSTANTS_REL_SURFACE: | ||
1005 | if (ring == &dev_priv->ring[RCS] && | ||
1006 | mode != dev_priv->relative_constants_mode) { | ||
1007 | if (INTEL_INFO(dev)->gen < 4) | ||
1008 | return -EINVAL; | ||
1009 | |||
1010 | if (INTEL_INFO(dev)->gen > 5 && | ||
1011 | mode == I915_EXEC_CONSTANTS_REL_SURFACE) | ||
1012 | return -EINVAL; | ||
1013 | |||
1014 | ret = intel_ring_begin(ring, 4); | ||
1015 | if (ret) | ||
1016 | return ret; | ||
1017 | |||
1018 | intel_ring_emit(ring, MI_NOOP); | ||
1019 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); | ||
1020 | intel_ring_emit(ring, INSTPM); | ||
1021 | intel_ring_emit(ring, | ||
1022 | I915_EXEC_CONSTANTS_MASK << 16 | mode); | ||
1023 | intel_ring_advance(ring); | ||
1024 | |||
1025 | dev_priv->relative_constants_mode = mode; | ||
1026 | } | ||
1027 | break; | ||
1028 | default: | ||
1029 | DRM_ERROR("execbuf with unknown constants: %d\n", mode); | ||
1030 | return -EINVAL; | ||
1031 | } | ||
1032 | |||
1033 | if (args->buffer_count < 1) { | ||
1034 | DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); | ||
1035 | return -EINVAL; | ||
1036 | } | ||
1037 | |||
1038 | if (args->num_cliprects != 0) { | ||
1039 | if (ring != &dev_priv->ring[RCS]) { | ||
1040 | DRM_ERROR("clip rectangles are only valid with the render ring\n"); | ||
1041 | return -EINVAL; | ||
1042 | } | ||
1043 | |||
1044 | cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects), | ||
1045 | GFP_KERNEL); | ||
1046 | if (cliprects == NULL) { | ||
1047 | ret = -ENOMEM; | ||
1048 | goto pre_mutex_err; | ||
1049 | } | ||
1050 | |||
1051 | if (copy_from_user(cliprects, | ||
1052 | (struct drm_clip_rect __user *)(uintptr_t) | ||
1053 | args->cliprects_ptr, | ||
1054 | sizeof(*cliprects)*args->num_cliprects)) { | ||
1055 | ret = -EFAULT; | ||
1056 | goto pre_mutex_err; | ||
1057 | } | ||
1058 | } | ||
1059 | |||
1060 | ret = i915_mutex_lock_interruptible(dev); | ||
1061 | if (ret) | ||
1062 | goto pre_mutex_err; | ||
1063 | |||
1064 | if (dev_priv->mm.suspended) { | ||
1065 | mutex_unlock(&dev->struct_mutex); | ||
1066 | ret = -EBUSY; | ||
1067 | goto pre_mutex_err; | ||
1068 | } | ||
1069 | |||
1070 | eb = eb_create(args->buffer_count); | ||
1071 | if (eb == NULL) { | ||
1072 | mutex_unlock(&dev->struct_mutex); | ||
1073 | ret = -ENOMEM; | ||
1074 | goto pre_mutex_err; | ||
1075 | } | ||
1076 | |||
1077 | /* Look up object handles */ | ||
1078 | INIT_LIST_HEAD(&objects); | ||
1079 | for (i = 0; i < args->buffer_count; i++) { | ||
1080 | struct drm_i915_gem_object *obj; | ||
1081 | |||
1082 | obj = to_intel_bo(drm_gem_object_lookup(dev, file, | ||
1083 | exec[i].handle)); | ||
1084 | if (obj == NULL) { | ||
1085 | DRM_ERROR("Invalid object handle %d at index %d\n", | ||
1086 | exec[i].handle, i); | ||
1087 | /* prevent error path from reading uninitialized data */ | ||
1088 | ret = -ENOENT; | ||
1089 | goto err; | ||
1090 | } | ||
1091 | |||
1092 | if (!list_empty(&obj->exec_list)) { | ||
1093 | DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n", | ||
1094 | obj, exec[i].handle, i); | ||
1095 | ret = -EINVAL; | ||
1096 | goto err; | ||
1097 | } | ||
1098 | |||
1099 | list_add_tail(&obj->exec_list, &objects); | ||
1100 | obj->exec_handle = exec[i].handle; | ||
1101 | eb_add_object(eb, obj); | ||
1102 | } | ||
1103 | |||
1104 | /* Move the objects en-masse into the GTT, evicting if necessary. */ | ||
1105 | ret = i915_gem_execbuffer_reserve(ring, file, &objects, exec); | ||
1106 | if (ret) | ||
1107 | goto err; | ||
1108 | |||
1109 | /* The objects are in their final locations, apply the relocations. */ | ||
1110 | ret = i915_gem_execbuffer_relocate(dev, eb, &objects, exec); | ||
1111 | if (ret) { | ||
1112 | if (ret == -EFAULT) { | ||
1113 | ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, | ||
1114 | &objects, eb, | ||
1115 | exec, | ||
1116 | args->buffer_count); | ||
1117 | BUG_ON(!mutex_is_locked(&dev->struct_mutex)); | ||
1118 | } | ||
1119 | if (ret) | ||
1120 | goto err; | ||
1121 | } | ||
1122 | |||
1123 | /* Set the pending read domains for the batch buffer to COMMAND */ | ||
1124 | batch_obj = list_entry(objects.prev, | ||
1125 | struct drm_i915_gem_object, | ||
1126 | exec_list); | ||
1127 | if (batch_obj->base.pending_write_domain) { | ||
1128 | DRM_ERROR("Attempting to use self-modifying batch buffer\n"); | ||
1129 | ret = -EINVAL; | ||
1130 | goto err; | ||
1131 | } | ||
1132 | batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; | ||
1133 | |||
1134 | ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); | ||
1135 | if (ret) | ||
1136 | goto err; | ||
1137 | |||
1138 | ret = i915_gem_execbuffer_wait_for_flips(ring, &objects); | ||
1139 | if (ret) | ||
1140 | goto err; | ||
1141 | |||
1142 | seqno = i915_gem_next_request_seqno(dev, ring); | ||
1143 | for (i = 0; i < I915_NUM_RINGS-1; i++) { | ||
1144 | if (seqno < ring->sync_seqno[i]) { | ||
1145 | /* The GPU can not handle its semaphore value wrapping, | ||
1146 | * so every billion or so execbuffers, we need to stall | ||
1147 | * the GPU in order to reset the counters. | ||
1148 | */ | ||
1149 | ret = i915_gpu_idle(dev); | ||
1150 | if (ret) | ||
1151 | goto err; | ||
1152 | |||
1153 | BUG_ON(ring->sync_seqno[i]); | ||
1154 | } | ||
1155 | } | ||
1156 | |||
1157 | exec_start = batch_obj->gtt_offset + args->batch_start_offset; | ||
1158 | exec_len = args->batch_len; | ||
1159 | if (cliprects) { | ||
1160 | for (i = 0; i < args->num_cliprects; i++) { | ||
1161 | ret = i915_emit_box(dev, &cliprects[i], | ||
1162 | args->DR1, args->DR4); | ||
1163 | if (ret) | ||
1164 | goto err; | ||
1165 | |||
1166 | ret = ring->dispatch_execbuffer(ring, | ||
1167 | exec_start, exec_len); | ||
1168 | if (ret) | ||
1169 | goto err; | ||
1170 | } | ||
1171 | } else { | ||
1172 | ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); | ||
1173 | if (ret) | ||
1174 | goto err; | ||
1175 | } | ||
1176 | |||
1177 | i915_gem_execbuffer_move_to_active(&objects, ring, seqno); | ||
1178 | i915_gem_execbuffer_retire_commands(dev, file, ring); | ||
1179 | |||
1180 | err: | ||
1181 | eb_destroy(eb); | ||
1182 | while (!list_empty(&objects)) { | ||
1183 | struct drm_i915_gem_object *obj; | ||
1184 | |||
1185 | obj = list_first_entry(&objects, | ||
1186 | struct drm_i915_gem_object, | ||
1187 | exec_list); | ||
1188 | list_del_init(&obj->exec_list); | ||
1189 | drm_gem_object_unreference(&obj->base); | ||
1190 | } | ||
1191 | |||
1192 | mutex_unlock(&dev->struct_mutex); | ||
1193 | |||
1194 | pre_mutex_err: | ||
1195 | kfree(cliprects); | ||
1196 | return ret; | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | * Legacy execbuffer just creates an exec2 list from the original exec object | ||
1201 | * list array and passes it to the real function. | ||
1202 | */ | ||
1203 | int | ||
1204 | i915_gem_execbuffer(struct drm_device *dev, void *data, | ||
1205 | struct drm_file *file) | ||
1206 | { | ||
1207 | struct drm_i915_gem_execbuffer *args = data; | ||
1208 | struct drm_i915_gem_execbuffer2 exec2; | ||
1209 | struct drm_i915_gem_exec_object *exec_list = NULL; | ||
1210 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; | ||
1211 | int ret, i; | ||
1212 | |||
1213 | #if WATCH_EXEC | ||
1214 | DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", | ||
1215 | (int) args->buffers_ptr, args->buffer_count, args->batch_len); | ||
1216 | #endif | ||
1217 | |||
1218 | if (args->buffer_count < 1) { | ||
1219 | DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); | ||
1220 | return -EINVAL; | ||
1221 | } | ||
1222 | |||
1223 | /* Copy in the exec list from userland */ | ||
1224 | exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); | ||
1225 | exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); | ||
1226 | if (exec_list == NULL || exec2_list == NULL) { | ||
1227 | DRM_ERROR("Failed to allocate exec list for %d buffers\n", | ||
1228 | args->buffer_count); | ||
1229 | drm_free_large(exec_list); | ||
1230 | drm_free_large(exec2_list); | ||
1231 | return -ENOMEM; | ||
1232 | } | ||
1233 | ret = copy_from_user(exec_list, | ||
1234 | (struct drm_i915_relocation_entry __user *) | ||
1235 | (uintptr_t) args->buffers_ptr, | ||
1236 | sizeof(*exec_list) * args->buffer_count); | ||
1237 | if (ret != 0) { | ||
1238 | DRM_ERROR("copy %d exec entries failed %d\n", | ||
1239 | args->buffer_count, ret); | ||
1240 | drm_free_large(exec_list); | ||
1241 | drm_free_large(exec2_list); | ||
1242 | return -EFAULT; | ||
1243 | } | ||
1244 | |||
1245 | for (i = 0; i < args->buffer_count; i++) { | ||
1246 | exec2_list[i].handle = exec_list[i].handle; | ||
1247 | exec2_list[i].relocation_count = exec_list[i].relocation_count; | ||
1248 | exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; | ||
1249 | exec2_list[i].alignment = exec_list[i].alignment; | ||
1250 | exec2_list[i].offset = exec_list[i].offset; | ||
1251 | if (INTEL_INFO(dev)->gen < 4) | ||
1252 | exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; | ||
1253 | else | ||
1254 | exec2_list[i].flags = 0; | ||
1255 | } | ||
1256 | |||
1257 | exec2.buffers_ptr = args->buffers_ptr; | ||
1258 | exec2.buffer_count = args->buffer_count; | ||
1259 | exec2.batch_start_offset = args->batch_start_offset; | ||
1260 | exec2.batch_len = args->batch_len; | ||
1261 | exec2.DR1 = args->DR1; | ||
1262 | exec2.DR4 = args->DR4; | ||
1263 | exec2.num_cliprects = args->num_cliprects; | ||
1264 | exec2.cliprects_ptr = args->cliprects_ptr; | ||
1265 | exec2.flags = I915_EXEC_RENDER; | ||
1266 | |||
1267 | ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); | ||
1268 | if (!ret) { | ||
1269 | /* Copy the new buffer offsets back to the user's exec list. */ | ||
1270 | for (i = 0; i < args->buffer_count; i++) | ||
1271 | exec_list[i].offset = exec2_list[i].offset; | ||
1272 | /* ... and back out to userspace */ | ||
1273 | ret = copy_to_user((struct drm_i915_relocation_entry __user *) | ||
1274 | (uintptr_t) args->buffers_ptr, | ||
1275 | exec_list, | ||
1276 | sizeof(*exec_list) * args->buffer_count); | ||
1277 | if (ret) { | ||
1278 | ret = -EFAULT; | ||
1279 | DRM_ERROR("failed to copy %d exec entries " | ||
1280 | "back to user (%d)\n", | ||
1281 | args->buffer_count, ret); | ||
1282 | } | ||
1283 | } | ||
1284 | |||
1285 | drm_free_large(exec_list); | ||
1286 | drm_free_large(exec2_list); | ||
1287 | return ret; | ||
1288 | } | ||
1289 | |||
1290 | int | ||
1291 | i915_gem_execbuffer2(struct drm_device *dev, void *data, | ||
1292 | struct drm_file *file) | ||
1293 | { | ||
1294 | struct drm_i915_gem_execbuffer2 *args = data; | ||
1295 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; | ||
1296 | int ret; | ||
1297 | |||
1298 | #if WATCH_EXEC | ||
1299 | DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", | ||
1300 | (int) args->buffers_ptr, args->buffer_count, args->batch_len); | ||
1301 | #endif | ||
1302 | |||
1303 | if (args->buffer_count < 1) { | ||
1304 | DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); | ||
1305 | return -EINVAL; | ||
1306 | } | ||
1307 | |||
1308 | exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); | ||
1309 | if (exec2_list == NULL) { | ||
1310 | DRM_ERROR("Failed to allocate exec list for %d buffers\n", | ||
1311 | args->buffer_count); | ||
1312 | return -ENOMEM; | ||
1313 | } | ||
1314 | ret = copy_from_user(exec2_list, | ||
1315 | (struct drm_i915_relocation_entry __user *) | ||
1316 | (uintptr_t) args->buffers_ptr, | ||
1317 | sizeof(*exec2_list) * args->buffer_count); | ||
1318 | if (ret != 0) { | ||
1319 | DRM_ERROR("copy %d exec entries failed %d\n", | ||
1320 | args->buffer_count, ret); | ||
1321 | drm_free_large(exec2_list); | ||
1322 | return -EFAULT; | ||
1323 | } | ||
1324 | |||
1325 | ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); | ||
1326 | if (!ret) { | ||
1327 | /* Copy the new buffer offsets back to the user's exec list. */ | ||
1328 | ret = copy_to_user((struct drm_i915_relocation_entry __user *) | ||
1329 | (uintptr_t) args->buffers_ptr, | ||
1330 | exec2_list, | ||
1331 | sizeof(*exec2_list) * args->buffer_count); | ||
1332 | if (ret) { | ||
1333 | ret = -EFAULT; | ||
1334 | DRM_ERROR("failed to copy %d exec entries " | ||
1335 | "back to user (%d)\n", | ||
1336 | args->buffer_count, ret); | ||
1337 | } | ||
1338 | } | ||
1339 | |||
1340 | drm_free_large(exec2_list); | ||
1341 | return ret; | ||
1342 | } | ||
1343 | |||