aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c1916
1 files changed, 837 insertions, 1079 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0d1e4b7b4b99..c1e5c66553df 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -35,31 +35,41 @@
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/swap.h> 36#include <linux/swap.h>
37#include <linux/pci.h> 37#include <linux/pci.h>
38#include <linux/dma-buf.h>
38 39
39static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); 40static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
40static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); 41static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
41static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); 42static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
43 bool write);
44static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
45 uint64_t offset,
46 uint64_t size);
47static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
48static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, 43static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
49 unsigned alignment, 44 unsigned alignment,
50 bool map_and_fenceable); 45 bool map_and_fenceable);
51static void i915_gem_clear_fence_reg(struct drm_device *dev,
52 struct drm_i915_fence_reg *reg);
53static int i915_gem_phys_pwrite(struct drm_device *dev, 46static int i915_gem_phys_pwrite(struct drm_device *dev,
54 struct drm_i915_gem_object *obj, 47 struct drm_i915_gem_object *obj,
55 struct drm_i915_gem_pwrite *args, 48 struct drm_i915_gem_pwrite *args,
56 struct drm_file *file); 49 struct drm_file *file);
57static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); 50
51static void i915_gem_write_fence(struct drm_device *dev, int reg,
52 struct drm_i915_gem_object *obj);
53static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
54 struct drm_i915_fence_reg *fence,
55 bool enable);
58 56
59static int i915_gem_inactive_shrink(struct shrinker *shrinker, 57static int i915_gem_inactive_shrink(struct shrinker *shrinker,
60 struct shrink_control *sc); 58 struct shrink_control *sc);
61static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 59static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
62 60
61static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
62{
63 if (obj->tiling_mode)
64 i915_gem_release_mmap(obj);
65
66 /* As we do not have an associated fence register, we will force
67 * a tiling change if we ever need to acquire one.
68 */
69 obj->fence_dirty = false;
70 obj->fence_reg = I915_FENCE_REG_NONE;
71}
72
63/* some bookkeeping */ 73/* some bookkeeping */
64static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, 74static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
65 size_t size) 75 size_t size)
@@ -122,26 +132,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
122static inline bool 132static inline bool
123i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 133i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
124{ 134{
125 return obj->gtt_space && !obj->active && obj->pin_count == 0; 135 return !obj->active;
126}
127
128void i915_gem_do_init(struct drm_device *dev,
129 unsigned long start,
130 unsigned long mappable_end,
131 unsigned long end)
132{
133 drm_i915_private_t *dev_priv = dev->dev_private;
134
135 drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
136
137 dev_priv->mm.gtt_start = start;
138 dev_priv->mm.gtt_mappable_end = mappable_end;
139 dev_priv->mm.gtt_end = end;
140 dev_priv->mm.gtt_total = end - start;
141 dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
142
143 /* Take over this portion of the GTT */
144 intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
145} 136}
146 137
147int 138int
@@ -150,12 +141,20 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
150{ 141{
151 struct drm_i915_gem_init *args = data; 142 struct drm_i915_gem_init *args = data;
152 143
144 if (drm_core_check_feature(dev, DRIVER_MODESET))
145 return -ENODEV;
146
153 if (args->gtt_start >= args->gtt_end || 147 if (args->gtt_start >= args->gtt_end ||
154 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) 148 (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
155 return -EINVAL; 149 return -EINVAL;
156 150
151 /* GEM with user mode setting was never supported on ilk and later. */
152 if (INTEL_INFO(dev)->gen >= 5)
153 return -ENODEV;
154
157 mutex_lock(&dev->struct_mutex); 155 mutex_lock(&dev->struct_mutex);
158 i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); 156 i915_gem_init_global_gtt(dev, args->gtt_start,
157 args->gtt_end, args->gtt_end);
159 mutex_unlock(&dev->struct_mutex); 158 mutex_unlock(&dev->struct_mutex);
160 159
161 return 0; 160 return 0;
@@ -170,13 +169,11 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
170 struct drm_i915_gem_object *obj; 169 struct drm_i915_gem_object *obj;
171 size_t pinned; 170 size_t pinned;
172 171
173 if (!(dev->driver->driver_features & DRIVER_GEM))
174 return -ENODEV;
175
176 pinned = 0; 172 pinned = 0;
177 mutex_lock(&dev->struct_mutex); 173 mutex_lock(&dev->struct_mutex);
178 list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) 174 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list)
179 pinned += obj->gtt_space->size; 175 if (obj->pin_count)
176 pinned += obj->gtt_space->size;
180 mutex_unlock(&dev->struct_mutex); 177 mutex_unlock(&dev->struct_mutex);
181 178
182 args->aper_size = dev_priv->mm.gtt_total; 179 args->aper_size = dev_priv->mm.gtt_total;
@@ -247,6 +244,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
247 struct drm_file *file) 244 struct drm_file *file)
248{ 245{
249 struct drm_i915_gem_create *args = data; 246 struct drm_i915_gem_create *args = data;
247
250 return i915_gem_create(file, dev, 248 return i915_gem_create(file, dev,
251 args->size, &args->handle); 249 args->size, &args->handle);
252} 250}
@@ -259,66 +257,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
259 obj->tiling_mode != I915_TILING_NONE; 257 obj->tiling_mode != I915_TILING_NONE;
260} 258}
261 259
262/**
263 * This is the fast shmem pread path, which attempts to copy_from_user directly
264 * from the backing pages of the object to the user's address space. On a
265 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
266 */
267static int
268i915_gem_shmem_pread_fast(struct drm_device *dev,
269 struct drm_i915_gem_object *obj,
270 struct drm_i915_gem_pread *args,
271 struct drm_file *file)
272{
273 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
274 ssize_t remain;
275 loff_t offset;
276 char __user *user_data;
277 int page_offset, page_length;
278
279 user_data = (char __user *) (uintptr_t) args->data_ptr;
280 remain = args->size;
281
282 offset = args->offset;
283
284 while (remain > 0) {
285 struct page *page;
286 char *vaddr;
287 int ret;
288
289 /* Operation in this page
290 *
291 * page_offset = offset within page
292 * page_length = bytes to copy for this page
293 */
294 page_offset = offset_in_page(offset);
295 page_length = remain;
296 if ((page_offset + remain) > PAGE_SIZE)
297 page_length = PAGE_SIZE - page_offset;
298
299 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
300 if (IS_ERR(page))
301 return PTR_ERR(page);
302
303 vaddr = kmap_atomic(page);
304 ret = __copy_to_user_inatomic(user_data,
305 vaddr + page_offset,
306 page_length);
307 kunmap_atomic(vaddr);
308
309 mark_page_accessed(page);
310 page_cache_release(page);
311 if (ret)
312 return -EFAULT;
313
314 remain -= page_length;
315 user_data += page_length;
316 offset += page_length;
317 }
318
319 return 0;
320}
321
322static inline int 260static inline int
323__copy_to_user_swizzled(char __user *cpu_vaddr, 261__copy_to_user_swizzled(char __user *cpu_vaddr,
324 const char *gpu_vaddr, int gpu_offset, 262 const char *gpu_vaddr, int gpu_offset,
@@ -346,8 +284,8 @@ __copy_to_user_swizzled(char __user *cpu_vaddr,
346} 284}
347 285
348static inline int 286static inline int
349__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, 287__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
350 const char *cpu_vaddr, 288 const char __user *cpu_vaddr,
351 int length) 289 int length)
352{ 290{
353 int ret, cpu_offset = 0; 291 int ret, cpu_offset = 0;
@@ -371,37 +309,121 @@ __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
371 return 0; 309 return 0;
372} 310}
373 311
374/** 312/* Per-page copy function for the shmem pread fastpath.
375 * This is the fallback shmem pread path, which allocates temporary storage 313 * Flushes invalid cachelines before reading the target if
376 * in kernel space to copy_to_user into outside of the struct_mutex, so we 314 * needs_clflush is set. */
377 * can copy out of the object's backing pages while holding the struct mutex
378 * and not take page faults.
379 */
380static int 315static int
381i915_gem_shmem_pread_slow(struct drm_device *dev, 316shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
382 struct drm_i915_gem_object *obj, 317 char __user *user_data,
383 struct drm_i915_gem_pread *args, 318 bool page_do_bit17_swizzling, bool needs_clflush)
384 struct drm_file *file) 319{
320 char *vaddr;
321 int ret;
322
323 if (unlikely(page_do_bit17_swizzling))
324 return -EINVAL;
325
326 vaddr = kmap_atomic(page);
327 if (needs_clflush)
328 drm_clflush_virt_range(vaddr + shmem_page_offset,
329 page_length);
330 ret = __copy_to_user_inatomic(user_data,
331 vaddr + shmem_page_offset,
332 page_length);
333 kunmap_atomic(vaddr);
334
335 return ret;
336}
337
338static void
339shmem_clflush_swizzled_range(char *addr, unsigned long length,
340 bool swizzled)
341{
342 if (unlikely(swizzled)) {
343 unsigned long start = (unsigned long) addr;
344 unsigned long end = (unsigned long) addr + length;
345
346 /* For swizzling simply ensure that we always flush both
347 * channels. Lame, but simple and it works. Swizzled
348 * pwrite/pread is far from a hotpath - current userspace
349 * doesn't use it at all. */
350 start = round_down(start, 128);
351 end = round_up(end, 128);
352
353 drm_clflush_virt_range((void *)start, end - start);
354 } else {
355 drm_clflush_virt_range(addr, length);
356 }
357
358}
359
360/* Only difference to the fast-path function is that this can handle bit17
361 * and uses non-atomic copy and kmap functions. */
362static int
363shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
364 char __user *user_data,
365 bool page_do_bit17_swizzling, bool needs_clflush)
366{
367 char *vaddr;
368 int ret;
369
370 vaddr = kmap(page);
371 if (needs_clflush)
372 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
373 page_length,
374 page_do_bit17_swizzling);
375
376 if (page_do_bit17_swizzling)
377 ret = __copy_to_user_swizzled(user_data,
378 vaddr, shmem_page_offset,
379 page_length);
380 else
381 ret = __copy_to_user(user_data,
382 vaddr + shmem_page_offset,
383 page_length);
384 kunmap(page);
385
386 return ret;
387}
388
389static int
390i915_gem_shmem_pread(struct drm_device *dev,
391 struct drm_i915_gem_object *obj,
392 struct drm_i915_gem_pread *args,
393 struct drm_file *file)
385{ 394{
386 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 395 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
387 char __user *user_data; 396 char __user *user_data;
388 ssize_t remain; 397 ssize_t remain;
389 loff_t offset; 398 loff_t offset;
390 int shmem_page_offset, page_length, ret; 399 int shmem_page_offset, page_length, ret = 0;
391 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 400 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
401 int hit_slowpath = 0;
402 int prefaulted = 0;
403 int needs_clflush = 0;
404 int release_page;
392 405
393 user_data = (char __user *) (uintptr_t) args->data_ptr; 406 user_data = (char __user *) (uintptr_t) args->data_ptr;
394 remain = args->size; 407 remain = args->size;
395 408
396 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 409 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
397 410
398 offset = args->offset; 411 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
412 /* If we're not in the cpu read domain, set ourself into the gtt
413 * read domain and manually flush cachelines (if required). This
414 * optimizes for the case when the gpu will dirty the data
415 * anyway again before the next pread happens. */
416 if (obj->cache_level == I915_CACHE_NONE)
417 needs_clflush = 1;
418 ret = i915_gem_object_set_to_gtt_domain(obj, false);
419 if (ret)
420 return ret;
421 }
399 422
400 mutex_unlock(&dev->struct_mutex); 423 offset = args->offset;
401 424
402 while (remain > 0) { 425 while (remain > 0) {
403 struct page *page; 426 struct page *page;
404 char *vaddr;
405 427
406 /* Operation in this page 428 /* Operation in this page
407 * 429 *
@@ -413,28 +435,51 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
413 if ((shmem_page_offset + page_length) > PAGE_SIZE) 435 if ((shmem_page_offset + page_length) > PAGE_SIZE)
414 page_length = PAGE_SIZE - shmem_page_offset; 436 page_length = PAGE_SIZE - shmem_page_offset;
415 437
416 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 438 if (obj->pages) {
417 if (IS_ERR(page)) { 439 page = obj->pages[offset >> PAGE_SHIFT];
418 ret = PTR_ERR(page); 440 release_page = 0;
419 goto out; 441 } else {
442 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
443 if (IS_ERR(page)) {
444 ret = PTR_ERR(page);
445 goto out;
446 }
447 release_page = 1;
420 } 448 }
421 449
422 page_do_bit17_swizzling = obj_do_bit17_swizzling && 450 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
423 (page_to_phys(page) & (1 << 17)) != 0; 451 (page_to_phys(page) & (1 << 17)) != 0;
424 452
425 vaddr = kmap(page); 453 ret = shmem_pread_fast(page, shmem_page_offset, page_length,
426 if (page_do_bit17_swizzling) 454 user_data, page_do_bit17_swizzling,
427 ret = __copy_to_user_swizzled(user_data, 455 needs_clflush);
428 vaddr, shmem_page_offset, 456 if (ret == 0)
429 page_length); 457 goto next_page;
430 else
431 ret = __copy_to_user(user_data,
432 vaddr + shmem_page_offset,
433 page_length);
434 kunmap(page);
435 458
436 mark_page_accessed(page); 459 hit_slowpath = 1;
460 page_cache_get(page);
461 mutex_unlock(&dev->struct_mutex);
462
463 if (!prefaulted) {
464 ret = fault_in_multipages_writeable(user_data, remain);
465 /* Userspace is tricking us, but we've already clobbered
466 * its pages with the prefault and promised to write the
467 * data up to the first fault. Hence ignore any errors
468 * and just continue. */
469 (void)ret;
470 prefaulted = 1;
471 }
472
473 ret = shmem_pread_slow(page, shmem_page_offset, page_length,
474 user_data, page_do_bit17_swizzling,
475 needs_clflush);
476
477 mutex_lock(&dev->struct_mutex);
437 page_cache_release(page); 478 page_cache_release(page);
479next_page:
480 mark_page_accessed(page);
481 if (release_page)
482 page_cache_release(page);
438 483
439 if (ret) { 484 if (ret) {
440 ret = -EFAULT; 485 ret = -EFAULT;
@@ -447,10 +492,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev,
447 } 492 }
448 493
449out: 494out:
450 mutex_lock(&dev->struct_mutex); 495 if (hit_slowpath) {
451 /* Fixup: Kill any reinstated backing storage pages */ 496 /* Fixup: Kill any reinstated backing storage pages */
452 if (obj->madv == __I915_MADV_PURGED) 497 if (obj->madv == __I915_MADV_PURGED)
453 i915_gem_object_truncate(obj); 498 i915_gem_object_truncate(obj);
499 }
454 500
455 return ret; 501 return ret;
456} 502}
@@ -476,11 +522,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
476 args->size)) 522 args->size))
477 return -EFAULT; 523 return -EFAULT;
478 524
479 ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
480 args->size);
481 if (ret)
482 return -EFAULT;
483
484 ret = i915_mutex_lock_interruptible(dev); 525 ret = i915_mutex_lock_interruptible(dev);
485 if (ret) 526 if (ret)
486 return ret; 527 return ret;
@@ -498,19 +539,17 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
498 goto out; 539 goto out;
499 } 540 }
500 541
501 trace_i915_gem_object_pread(obj, args->offset, args->size); 542 /* prime objects have no backing filp to GEM pread/pwrite
502 543 * pages from.
503 ret = i915_gem_object_set_cpu_read_domain_range(obj, 544 */
504 args->offset, 545 if (!obj->base.filp) {
505 args->size); 546 ret = -EINVAL;
506 if (ret)
507 goto out; 547 goto out;
548 }
508 549
509 ret = -EFAULT; 550 trace_i915_gem_object_pread(obj, args->offset, args->size);
510 if (!i915_gem_object_needs_bit17_swizzle(obj)) 551
511 ret = i915_gem_shmem_pread_fast(dev, obj, args, file); 552 ret = i915_gem_shmem_pread(dev, obj, args, file);
512 if (ret == -EFAULT)
513 ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
514 553
515out: 554out:
516 drm_gem_object_unreference(&obj->base); 555 drm_gem_object_unreference(&obj->base);
@@ -529,40 +568,19 @@ fast_user_write(struct io_mapping *mapping,
529 char __user *user_data, 568 char __user *user_data,
530 int length) 569 int length)
531{ 570{
532 char *vaddr_atomic; 571 void __iomem *vaddr_atomic;
572 void *vaddr;
533 unsigned long unwritten; 573 unsigned long unwritten;
534 574
535 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); 575 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
536 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, 576 /* We can use the cpu mem copy function because this is X86. */
577 vaddr = (void __force*)vaddr_atomic + page_offset;
578 unwritten = __copy_from_user_inatomic_nocache(vaddr,
537 user_data, length); 579 user_data, length);
538 io_mapping_unmap_atomic(vaddr_atomic); 580 io_mapping_unmap_atomic(vaddr_atomic);
539 return unwritten; 581 return unwritten;
540} 582}
541 583
542/* Here's the write path which can sleep for
543 * page faults
544 */
545
546static inline void
547slow_kernel_write(struct io_mapping *mapping,
548 loff_t gtt_base, int gtt_offset,
549 struct page *user_page, int user_offset,
550 int length)
551{
552 char __iomem *dst_vaddr;
553 char *src_vaddr;
554
555 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
556 src_vaddr = kmap(user_page);
557
558 memcpy_toio(dst_vaddr + gtt_offset,
559 src_vaddr + user_offset,
560 length);
561
562 kunmap(user_page);
563 io_mapping_unmap(dst_vaddr);
564}
565
566/** 584/**
567 * This is the fast pwrite path, where we copy the data directly from the 585 * This is the fast pwrite path, where we copy the data directly from the
568 * user into the GTT, uncached. 586 * user into the GTT, uncached.
@@ -577,7 +595,19 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
577 ssize_t remain; 595 ssize_t remain;
578 loff_t offset, page_base; 596 loff_t offset, page_base;
579 char __user *user_data; 597 char __user *user_data;
580 int page_offset, page_length; 598 int page_offset, page_length, ret;
599
600 ret = i915_gem_object_pin(obj, 0, true);
601 if (ret)
602 goto out;
603
604 ret = i915_gem_object_set_to_gtt_domain(obj, true);
605 if (ret)
606 goto out_unpin;
607
608 ret = i915_gem_object_put_fence(obj);
609 if (ret)
610 goto out_unpin;
581 611
582 user_data = (char __user *) (uintptr_t) args->data_ptr; 612 user_data = (char __user *) (uintptr_t) args->data_ptr;
583 remain = args->size; 613 remain = args->size;
@@ -602,214 +632,133 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
602 * retry in the slow path. 632 * retry in the slow path.
603 */ 633 */
604 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, 634 if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
605 page_offset, user_data, page_length)) 635 page_offset, user_data, page_length)) {
606 return -EFAULT; 636 ret = -EFAULT;
637 goto out_unpin;
638 }
607 639
608 remain -= page_length; 640 remain -= page_length;
609 user_data += page_length; 641 user_data += page_length;
610 offset += page_length; 642 offset += page_length;
611 } 643 }
612 644
613 return 0; 645out_unpin:
646 i915_gem_object_unpin(obj);
647out:
648 return ret;
614} 649}
615 650
616/** 651/* Per-page copy function for the shmem pwrite fastpath.
617 * This is the fallback GTT pwrite path, which uses get_user_pages to pin 652 * Flushes invalid cachelines before writing to the target if
618 * the memory and maps it using kmap_atomic for copying. 653 * needs_clflush_before is set and flushes out any written cachelines after
619 * 654 * writing if needs_clflush is set. */
620 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
621 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
622 */
623static int 655static int
624i915_gem_gtt_pwrite_slow(struct drm_device *dev, 656shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
625 struct drm_i915_gem_object *obj, 657 char __user *user_data,
626 struct drm_i915_gem_pwrite *args, 658 bool page_do_bit17_swizzling,
627 struct drm_file *file) 659 bool needs_clflush_before,
660 bool needs_clflush_after)
628{ 661{
629 drm_i915_private_t *dev_priv = dev->dev_private; 662 char *vaddr;
630 ssize_t remain;
631 loff_t gtt_page_base, offset;
632 loff_t first_data_page, last_data_page, num_pages;
633 loff_t pinned_pages, i;
634 struct page **user_pages;
635 struct mm_struct *mm = current->mm;
636 int gtt_page_offset, data_page_offset, data_page_index, page_length;
637 int ret; 663 int ret;
638 uint64_t data_ptr = args->data_ptr;
639
640 remain = args->size;
641
642 /* Pin the user pages containing the data. We can't fault while
643 * holding the struct mutex, and all of the pwrite implementations
644 * want to hold it while dereferencing the user data.
645 */
646 first_data_page = data_ptr / PAGE_SIZE;
647 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
648 num_pages = last_data_page - first_data_page + 1;
649
650 user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
651 if (user_pages == NULL)
652 return -ENOMEM;
653
654 mutex_unlock(&dev->struct_mutex);
655 down_read(&mm->mmap_sem);
656 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
657 num_pages, 0, 0, user_pages, NULL);
658 up_read(&mm->mmap_sem);
659 mutex_lock(&dev->struct_mutex);
660 if (pinned_pages < num_pages) {
661 ret = -EFAULT;
662 goto out_unpin_pages;
663 }
664
665 ret = i915_gem_object_set_to_gtt_domain(obj, true);
666 if (ret)
667 goto out_unpin_pages;
668
669 ret = i915_gem_object_put_fence(obj);
670 if (ret)
671 goto out_unpin_pages;
672
673 offset = obj->gtt_offset + args->offset;
674
675 while (remain > 0) {
676 /* Operation in this page
677 *
678 * gtt_page_base = page offset within aperture
679 * gtt_page_offset = offset within page in aperture
680 * data_page_index = page number in get_user_pages return
681 * data_page_offset = offset with data_page_index page.
682 * page_length = bytes to copy for this page
683 */
684 gtt_page_base = offset & PAGE_MASK;
685 gtt_page_offset = offset_in_page(offset);
686 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
687 data_page_offset = offset_in_page(data_ptr);
688
689 page_length = remain;
690 if ((gtt_page_offset + page_length) > PAGE_SIZE)
691 page_length = PAGE_SIZE - gtt_page_offset;
692 if ((data_page_offset + page_length) > PAGE_SIZE)
693 page_length = PAGE_SIZE - data_page_offset;
694 664
695 slow_kernel_write(dev_priv->mm.gtt_mapping, 665 if (unlikely(page_do_bit17_swizzling))
696 gtt_page_base, gtt_page_offset, 666 return -EINVAL;
697 user_pages[data_page_index],
698 data_page_offset,
699 page_length);
700
701 remain -= page_length;
702 offset += page_length;
703 data_ptr += page_length;
704 }
705 667
706out_unpin_pages: 668 vaddr = kmap_atomic(page);
707 for (i = 0; i < pinned_pages; i++) 669 if (needs_clflush_before)
708 page_cache_release(user_pages[i]); 670 drm_clflush_virt_range(vaddr + shmem_page_offset,
709 drm_free_large(user_pages); 671 page_length);
672 ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
673 user_data,
674 page_length);
675 if (needs_clflush_after)
676 drm_clflush_virt_range(vaddr + shmem_page_offset,
677 page_length);
678 kunmap_atomic(vaddr);
710 679
711 return ret; 680 return ret;
712} 681}
713 682
714/** 683/* Only difference to the fast-path function is that this can handle bit17
715 * This is the fast shmem pwrite path, which attempts to directly 684 * and uses non-atomic copy and kmap functions. */
716 * copy_from_user into the kmapped pages backing the object.
717 */
718static int 685static int
719i915_gem_shmem_pwrite_fast(struct drm_device *dev, 686shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
720 struct drm_i915_gem_object *obj, 687 char __user *user_data,
721 struct drm_i915_gem_pwrite *args, 688 bool page_do_bit17_swizzling,
722 struct drm_file *file) 689 bool needs_clflush_before,
690 bool needs_clflush_after)
723{ 691{
724 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 692 char *vaddr;
725 ssize_t remain; 693 int ret;
726 loff_t offset;
727 char __user *user_data;
728 int page_offset, page_length;
729
730 user_data = (char __user *) (uintptr_t) args->data_ptr;
731 remain = args->size;
732
733 offset = args->offset;
734 obj->dirty = 1;
735
736 while (remain > 0) {
737 struct page *page;
738 char *vaddr;
739 int ret;
740
741 /* Operation in this page
742 *
743 * page_offset = offset within page
744 * page_length = bytes to copy for this page
745 */
746 page_offset = offset_in_page(offset);
747 page_length = remain;
748 if ((page_offset + remain) > PAGE_SIZE)
749 page_length = PAGE_SIZE - page_offset;
750
751 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
752 if (IS_ERR(page))
753 return PTR_ERR(page);
754 694
755 vaddr = kmap_atomic(page); 695 vaddr = kmap(page);
756 ret = __copy_from_user_inatomic(vaddr + page_offset, 696 if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
697 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
698 page_length,
699 page_do_bit17_swizzling);
700 if (page_do_bit17_swizzling)
701 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
757 user_data, 702 user_data,
758 page_length); 703 page_length);
759 kunmap_atomic(vaddr); 704 else
760 705 ret = __copy_from_user(vaddr + shmem_page_offset,
761 set_page_dirty(page); 706 user_data,
762 mark_page_accessed(page); 707 page_length);
763 page_cache_release(page); 708 if (needs_clflush_after)
764 709 shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
765 /* If we get a fault while copying data, then (presumably) our 710 page_length,
766 * source page isn't available. Return the error and we'll 711 page_do_bit17_swizzling);
767 * retry in the slow path. 712 kunmap(page);
768 */
769 if (ret)
770 return -EFAULT;
771
772 remain -= page_length;
773 user_data += page_length;
774 offset += page_length;
775 }
776 713
777 return 0; 714 return ret;
778} 715}
779 716
780/**
781 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
782 * the memory and maps it using kmap_atomic for copying.
783 *
784 * This avoids taking mmap_sem for faulting on the user's address while the
785 * struct_mutex is held.
786 */
787static int 717static int
788i915_gem_shmem_pwrite_slow(struct drm_device *dev, 718i915_gem_shmem_pwrite(struct drm_device *dev,
789 struct drm_i915_gem_object *obj, 719 struct drm_i915_gem_object *obj,
790 struct drm_i915_gem_pwrite *args, 720 struct drm_i915_gem_pwrite *args,
791 struct drm_file *file) 721 struct drm_file *file)
792{ 722{
793 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; 723 struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
794 ssize_t remain; 724 ssize_t remain;
795 loff_t offset; 725 loff_t offset;
796 char __user *user_data; 726 char __user *user_data;
797 int shmem_page_offset, page_length, ret; 727 int shmem_page_offset, page_length, ret = 0;
798 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 728 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
729 int hit_slowpath = 0;
730 int needs_clflush_after = 0;
731 int needs_clflush_before = 0;
732 int release_page;
799 733
800 user_data = (char __user *) (uintptr_t) args->data_ptr; 734 user_data = (char __user *) (uintptr_t) args->data_ptr;
801 remain = args->size; 735 remain = args->size;
802 736
803 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 737 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
804 738
739 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
740 /* If we're not in the cpu write domain, set ourself into the gtt
741 * write domain and manually flush cachelines (if required). This
742 * optimizes for the case when the gpu will use the data
743 * right away and we therefore have to clflush anyway. */
744 if (obj->cache_level == I915_CACHE_NONE)
745 needs_clflush_after = 1;
746 ret = i915_gem_object_set_to_gtt_domain(obj, true);
747 if (ret)
748 return ret;
749 }
750 /* Same trick applies for invalidate partially written cachelines before
751 * writing. */
752 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
753 && obj->cache_level == I915_CACHE_NONE)
754 needs_clflush_before = 1;
755
805 offset = args->offset; 756 offset = args->offset;
806 obj->dirty = 1; 757 obj->dirty = 1;
807 758
808 mutex_unlock(&dev->struct_mutex);
809
810 while (remain > 0) { 759 while (remain > 0) {
811 struct page *page; 760 struct page *page;
812 char *vaddr; 761 int partial_cacheline_write;
813 762
814 /* Operation in this page 763 /* Operation in this page
815 * 764 *
@@ -822,29 +771,51 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
822 if ((shmem_page_offset + page_length) > PAGE_SIZE) 771 if ((shmem_page_offset + page_length) > PAGE_SIZE)
823 page_length = PAGE_SIZE - shmem_page_offset; 772 page_length = PAGE_SIZE - shmem_page_offset;
824 773
825 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); 774 /* If we don't overwrite a cacheline completely we need to be
826 if (IS_ERR(page)) { 775 * careful to have up-to-date data by first clflushing. Don't
827 ret = PTR_ERR(page); 776 * overcomplicate things and flush the entire patch. */
828 goto out; 777 partial_cacheline_write = needs_clflush_before &&
778 ((shmem_page_offset | page_length)
779 & (boot_cpu_data.x86_clflush_size - 1));
780
781 if (obj->pages) {
782 page = obj->pages[offset >> PAGE_SHIFT];
783 release_page = 0;
784 } else {
785 page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
786 if (IS_ERR(page)) {
787 ret = PTR_ERR(page);
788 goto out;
789 }
790 release_page = 1;
829 } 791 }
830 792
831 page_do_bit17_swizzling = obj_do_bit17_swizzling && 793 page_do_bit17_swizzling = obj_do_bit17_swizzling &&
832 (page_to_phys(page) & (1 << 17)) != 0; 794 (page_to_phys(page) & (1 << 17)) != 0;
833 795
834 vaddr = kmap(page); 796 ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
835 if (page_do_bit17_swizzling) 797 user_data, page_do_bit17_swizzling,
836 ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, 798 partial_cacheline_write,
837 user_data, 799 needs_clflush_after);
838 page_length); 800 if (ret == 0)
839 else 801 goto next_page;
840 ret = __copy_from_user(vaddr + shmem_page_offset, 802
841 user_data, 803 hit_slowpath = 1;
842 page_length); 804 page_cache_get(page);
843 kunmap(page); 805 mutex_unlock(&dev->struct_mutex);
806
807 ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
808 user_data, page_do_bit17_swizzling,
809 partial_cacheline_write,
810 needs_clflush_after);
844 811
812 mutex_lock(&dev->struct_mutex);
813 page_cache_release(page);
814next_page:
845 set_page_dirty(page); 815 set_page_dirty(page);
846 mark_page_accessed(page); 816 mark_page_accessed(page);
847 page_cache_release(page); 817 if (release_page)
818 page_cache_release(page);
848 819
849 if (ret) { 820 if (ret) {
850 ret = -EFAULT; 821 ret = -EFAULT;
@@ -857,17 +828,21 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev,
857 } 828 }
858 829
859out: 830out:
860 mutex_lock(&dev->struct_mutex); 831 if (hit_slowpath) {
861 /* Fixup: Kill any reinstated backing storage pages */ 832 /* Fixup: Kill any reinstated backing storage pages */
862 if (obj->madv == __I915_MADV_PURGED) 833 if (obj->madv == __I915_MADV_PURGED)
863 i915_gem_object_truncate(obj); 834 i915_gem_object_truncate(obj);
864 /* and flush dirty cachelines in case the object isn't in the cpu write 835 /* and flush dirty cachelines in case the object isn't in the cpu write
865 * domain anymore. */ 836 * domain anymore. */
866 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 837 if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
867 i915_gem_clflush_object(obj); 838 i915_gem_clflush_object(obj);
868 intel_gtt_chipset_flush(); 839 intel_gtt_chipset_flush();
840 }
869 } 841 }
870 842
843 if (needs_clflush_after)
844 intel_gtt_chipset_flush();
845
871 return ret; 846 return ret;
872} 847}
873 848
@@ -892,8 +867,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
892 args->size)) 867 args->size))
893 return -EFAULT; 868 return -EFAULT;
894 869
895 ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, 870 ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr,
896 args->size); 871 args->size);
897 if (ret) 872 if (ret)
898 return -EFAULT; 873 return -EFAULT;
899 874
@@ -914,8 +889,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
914 goto out; 889 goto out;
915 } 890 }
916 891
892 /* prime objects have no backing filp to GEM pread/pwrite
893 * pages from.
894 */
895 if (!obj->base.filp) {
896 ret = -EINVAL;
897 goto out;
898 }
899
917 trace_i915_gem_object_pwrite(obj, args->offset, args->size); 900 trace_i915_gem_object_pwrite(obj, args->offset, args->size);
918 901
902 ret = -EFAULT;
919 /* We can only do the GTT pwrite on untiled buffers, as otherwise 903 /* We can only do the GTT pwrite on untiled buffers, as otherwise
920 * it would end up going through the fenced access, and we'll get 904 * it would end up going through the fenced access, and we'll get
921 * different detiling behavior between reading and writing. 905 * different detiling behavior between reading and writing.
@@ -928,42 +912,18 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
928 } 912 }
929 913
930 if (obj->gtt_space && 914 if (obj->gtt_space &&
915 obj->cache_level == I915_CACHE_NONE &&
916 obj->tiling_mode == I915_TILING_NONE &&
917 obj->map_and_fenceable &&
931 obj->base.write_domain != I915_GEM_DOMAIN_CPU) { 918 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
932 ret = i915_gem_object_pin(obj, 0, true);
933 if (ret)
934 goto out;
935
936 ret = i915_gem_object_set_to_gtt_domain(obj, true);
937 if (ret)
938 goto out_unpin;
939
940 ret = i915_gem_object_put_fence(obj);
941 if (ret)
942 goto out_unpin;
943
944 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); 919 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
945 if (ret == -EFAULT) 920 /* Note that the gtt paths might fail with non-page-backed user
946 ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); 921 * pointers (e.g. gtt mappings when moving data between
947 922 * textures). Fallback to the shmem path in that case. */
948out_unpin:
949 i915_gem_object_unpin(obj);
950
951 if (ret != -EFAULT)
952 goto out;
953 /* Fall through to the shmfs paths because the gtt paths might
954 * fail with non-page-backed user pointers (e.g. gtt mappings
955 * when moving data between textures). */
956 } 923 }
957 924
958 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
959 if (ret)
960 goto out;
961
962 ret = -EFAULT;
963 if (!i915_gem_object_needs_bit17_swizzle(obj))
964 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
965 if (ret == -EFAULT) 925 if (ret == -EFAULT)
966 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); 926 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
967 927
968out: 928out:
969 drm_gem_object_unreference(&obj->base); 929 drm_gem_object_unreference(&obj->base);
@@ -986,9 +946,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
986 uint32_t write_domain = args->write_domain; 946 uint32_t write_domain = args->write_domain;
987 int ret; 947 int ret;
988 948
989 if (!(dev->driver->driver_features & DRIVER_GEM))
990 return -ENODEV;
991
992 /* Only handle setting domains to types used by the CPU. */ 949 /* Only handle setting domains to types used by the CPU. */
993 if (write_domain & I915_GEM_GPU_DOMAINS) 950 if (write_domain & I915_GEM_GPU_DOMAINS)
994 return -EINVAL; 951 return -EINVAL;
@@ -1042,9 +999,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1042 struct drm_i915_gem_object *obj; 999 struct drm_i915_gem_object *obj;
1043 int ret = 0; 1000 int ret = 0;
1044 1001
1045 if (!(dev->driver->driver_features & DRIVER_GEM))
1046 return -ENODEV;
1047
1048 ret = i915_mutex_lock_interruptible(dev); 1002 ret = i915_mutex_lock_interruptible(dev);
1049 if (ret) 1003 if (ret)
1050 return ret; 1004 return ret;
@@ -1080,13 +1034,18 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1080 struct drm_gem_object *obj; 1034 struct drm_gem_object *obj;
1081 unsigned long addr; 1035 unsigned long addr;
1082 1036
1083 if (!(dev->driver->driver_features & DRIVER_GEM))
1084 return -ENODEV;
1085
1086 obj = drm_gem_object_lookup(dev, file, args->handle); 1037 obj = drm_gem_object_lookup(dev, file, args->handle);
1087 if (obj == NULL) 1038 if (obj == NULL)
1088 return -ENOENT; 1039 return -ENOENT;
1089 1040
1041 /* prime objects have no backing filp to GEM mmap
1042 * pages from.
1043 */
1044 if (!obj->filp) {
1045 drm_gem_object_unreference_unlocked(obj);
1046 return -EINVAL;
1047 }
1048
1090 addr = vm_mmap(obj->filp, 0, args->size, 1049 addr = vm_mmap(obj->filp, 0, args->size,
1091 PROT_READ | PROT_WRITE, MAP_SHARED, 1050 PROT_READ | PROT_WRITE, MAP_SHARED,
1092 args->offset); 1051 args->offset);
@@ -1151,10 +1110,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1151 goto unlock; 1110 goto unlock;
1152 } 1111 }
1153 1112
1154 if (obj->tiling_mode == I915_TILING_NONE) 1113 if (!obj->has_global_gtt_mapping)
1155 ret = i915_gem_object_put_fence(obj); 1114 i915_gem_gtt_bind_object(obj, obj->cache_level);
1156 else 1115
1157 ret = i915_gem_object_get_fence(obj, NULL); 1116 ret = i915_gem_object_get_fence(obj);
1158 if (ret) 1117 if (ret)
1159 goto unlock; 1118 goto unlock;
1160 1119
@@ -1308,9 +1267,6 @@ i915_gem_mmap_gtt(struct drm_file *file,
1308 struct drm_i915_gem_object *obj; 1267 struct drm_i915_gem_object *obj;
1309 int ret; 1268 int ret;
1310 1269
1311 if (!(dev->driver->driver_features & DRIVER_GEM))
1312 return -ENODEV;
1313
1314 ret = i915_mutex_lock_interruptible(dev); 1270 ret = i915_mutex_lock_interruptible(dev);
1315 if (ret) 1271 if (ret)
1316 return ret; 1272 return ret;
@@ -1368,14 +1324,10 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1368{ 1324{
1369 struct drm_i915_gem_mmap_gtt *args = data; 1325 struct drm_i915_gem_mmap_gtt *args = data;
1370 1326
1371 if (!(dev->driver->driver_features & DRIVER_GEM))
1372 return -ENODEV;
1373
1374 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); 1327 return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1375} 1328}
1376 1329
1377 1330int
1378static int
1379i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1331i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1380 gfp_t gfpmask) 1332 gfp_t gfpmask)
1381{ 1333{
@@ -1384,6 +1336,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1384 struct inode *inode; 1336 struct inode *inode;
1385 struct page *page; 1337 struct page *page;
1386 1338
1339 if (obj->pages || obj->sg_table)
1340 return 0;
1341
1387 /* Get the list of pages out of our struct file. They'll be pinned 1342 /* Get the list of pages out of our struct file. They'll be pinned
1388 * at this point until we release them. 1343 * at this point until we release them.
1389 */ 1344 */
@@ -1425,6 +1380,9 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1425 int page_count = obj->base.size / PAGE_SIZE; 1380 int page_count = obj->base.size / PAGE_SIZE;
1426 int i; 1381 int i;
1427 1382
1383 if (!obj->pages)
1384 return;
1385
1428 BUG_ON(obj->madv == __I915_MADV_PURGED); 1386 BUG_ON(obj->madv == __I915_MADV_PURGED);
1429 1387
1430 if (i915_gem_object_needs_bit17_swizzle(obj)) 1388 if (i915_gem_object_needs_bit17_swizzle(obj))
@@ -1473,7 +1431,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1473 1431
1474 if (obj->fenced_gpu_access) { 1432 if (obj->fenced_gpu_access) {
1475 obj->last_fenced_seqno = seqno; 1433 obj->last_fenced_seqno = seqno;
1476 obj->last_fenced_ring = ring;
1477 1434
1478 /* Bump MRU to take account of the delayed flush */ 1435 /* Bump MRU to take account of the delayed flush */
1479 if (obj->fence_reg != I915_FENCE_REG_NONE) { 1436 if (obj->fence_reg != I915_FENCE_REG_NONE) {
@@ -1512,15 +1469,11 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1512 struct drm_device *dev = obj->base.dev; 1469 struct drm_device *dev = obj->base.dev;
1513 struct drm_i915_private *dev_priv = dev->dev_private; 1470 struct drm_i915_private *dev_priv = dev->dev_private;
1514 1471
1515 if (obj->pin_count != 0) 1472 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1516 list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1517 else
1518 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1519 1473
1520 BUG_ON(!list_empty(&obj->gpu_write_list)); 1474 BUG_ON(!list_empty(&obj->gpu_write_list));
1521 BUG_ON(!obj->active); 1475 BUG_ON(!obj->active);
1522 obj->ring = NULL; 1476 obj->ring = NULL;
1523 obj->last_fenced_ring = NULL;
1524 1477
1525 i915_gem_object_move_off_active(obj); 1478 i915_gem_object_move_off_active(obj);
1526 obj->fenced_gpu_access = false; 1479 obj->fenced_gpu_access = false;
@@ -1546,6 +1499,9 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1546 inode = obj->base.filp->f_path.dentry->d_inode; 1499 inode = obj->base.filp->f_path.dentry->d_inode;
1547 shmem_truncate_range(inode, 0, (loff_t)-1); 1500 shmem_truncate_range(inode, 0, (loff_t)-1);
1548 1501
1502 if (obj->base.map_list.map)
1503 drm_gem_free_mmap_offset(&obj->base);
1504
1549 obj->madv = __I915_MADV_PURGED; 1505 obj->madv = __I915_MADV_PURGED;
1550} 1506}
1551 1507
@@ -1711,30 +1667,29 @@ static void i915_gem_reset_fences(struct drm_device *dev)
1711 1667
1712 for (i = 0; i < dev_priv->num_fence_regs; i++) { 1668 for (i = 0; i < dev_priv->num_fence_regs; i++) {
1713 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; 1669 struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1714 struct drm_i915_gem_object *obj = reg->obj;
1715 1670
1716 if (!obj) 1671 i915_gem_write_fence(dev, i, NULL);
1717 continue;
1718 1672
1719 if (obj->tiling_mode) 1673 if (reg->obj)
1720 i915_gem_release_mmap(obj); 1674 i915_gem_object_fence_lost(reg->obj);
1721 1675
1722 reg->obj->fence_reg = I915_FENCE_REG_NONE; 1676 reg->pin_count = 0;
1723 reg->obj->fenced_gpu_access = false; 1677 reg->obj = NULL;
1724 reg->obj->last_fenced_seqno = 0; 1678 INIT_LIST_HEAD(&reg->lru_list);
1725 reg->obj->last_fenced_ring = NULL;
1726 i915_gem_clear_fence_reg(dev, reg);
1727 } 1679 }
1680
1681 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
1728} 1682}
1729 1683
1730void i915_gem_reset(struct drm_device *dev) 1684void i915_gem_reset(struct drm_device *dev)
1731{ 1685{
1732 struct drm_i915_private *dev_priv = dev->dev_private; 1686 struct drm_i915_private *dev_priv = dev->dev_private;
1733 struct drm_i915_gem_object *obj; 1687 struct drm_i915_gem_object *obj;
1688 struct intel_ring_buffer *ring;
1734 int i; 1689 int i;
1735 1690
1736 for (i = 0; i < I915_NUM_RINGS; i++) 1691 for_each_ring(ring, dev_priv, i)
1737 i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); 1692 i915_gem_reset_ring_lists(dev_priv, ring);
1738 1693
1739 /* Remove anything from the flushing lists. The GPU cache is likely 1694 /* Remove anything from the flushing lists. The GPU cache is likely
1740 * to be lost on reset along with the data, so simply move the 1695 * to be lost on reset along with the data, so simply move the
@@ -1839,24 +1794,11 @@ void
1839i915_gem_retire_requests(struct drm_device *dev) 1794i915_gem_retire_requests(struct drm_device *dev)
1840{ 1795{
1841 drm_i915_private_t *dev_priv = dev->dev_private; 1796 drm_i915_private_t *dev_priv = dev->dev_private;
1797 struct intel_ring_buffer *ring;
1842 int i; 1798 int i;
1843 1799
1844 if (!list_empty(&dev_priv->mm.deferred_free_list)) { 1800 for_each_ring(ring, dev_priv, i)
1845 struct drm_i915_gem_object *obj, *next; 1801 i915_gem_retire_requests_ring(ring);
1846
1847 /* We must be careful that during unbind() we do not
1848 * accidentally infinitely recurse into retire requests.
1849 * Currently:
1850 * retire -> free -> unbind -> wait -> retire_ring
1851 */
1852 list_for_each_entry_safe(obj, next,
1853 &dev_priv->mm.deferred_free_list,
1854 mm_list)
1855 i915_gem_free_object_tail(obj);
1856 }
1857
1858 for (i = 0; i < I915_NUM_RINGS; i++)
1859 i915_gem_retire_requests_ring(&dev_priv->ring[i]);
1860} 1802}
1861 1803
1862static void 1804static void
@@ -1864,6 +1806,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
1864{ 1806{
1865 drm_i915_private_t *dev_priv; 1807 drm_i915_private_t *dev_priv;
1866 struct drm_device *dev; 1808 struct drm_device *dev;
1809 struct intel_ring_buffer *ring;
1867 bool idle; 1810 bool idle;
1868 int i; 1811 int i;
1869 1812
@@ -1883,9 +1826,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
1883 * objects indefinitely. 1826 * objects indefinitely.
1884 */ 1827 */
1885 idle = true; 1828 idle = true;
1886 for (i = 0; i < I915_NUM_RINGS; i++) { 1829 for_each_ring(ring, dev_priv, i) {
1887 struct intel_ring_buffer *ring = &dev_priv->ring[i];
1888
1889 if (!list_empty(&ring->gpu_write_list)) { 1830 if (!list_empty(&ring->gpu_write_list)) {
1890 struct drm_i915_gem_request *request; 1831 struct drm_i915_gem_request *request;
1891 int ret; 1832 int ret;
@@ -1907,20 +1848,10 @@ i915_gem_retire_work_handler(struct work_struct *work)
1907 mutex_unlock(&dev->struct_mutex); 1848 mutex_unlock(&dev->struct_mutex);
1908} 1849}
1909 1850
1910/** 1851static int
1911 * Waits for a sequence number to be signaled, and cleans up the 1852i915_gem_check_wedge(struct drm_i915_private *dev_priv)
1912 * request and object lists appropriately for that event.
1913 */
1914int
1915i915_wait_request(struct intel_ring_buffer *ring,
1916 uint32_t seqno,
1917 bool do_retire)
1918{ 1853{
1919 drm_i915_private_t *dev_priv = ring->dev->dev_private; 1854 BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
1920 u32 ier;
1921 int ret = 0;
1922
1923 BUG_ON(seqno == 0);
1924 1855
1925 if (atomic_read(&dev_priv->mm.wedged)) { 1856 if (atomic_read(&dev_priv->mm.wedged)) {
1926 struct completion *x = &dev_priv->error_completion; 1857 struct completion *x = &dev_priv->error_completion;
@@ -1935,6 +1866,20 @@ i915_wait_request(struct intel_ring_buffer *ring,
1935 return recovery_complete ? -EIO : -EAGAIN; 1866 return recovery_complete ? -EIO : -EAGAIN;
1936 } 1867 }
1937 1868
1869 return 0;
1870}
1871
1872/*
1873 * Compare seqno against outstanding lazy request. Emit a request if they are
1874 * equal.
1875 */
1876static int
1877i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno)
1878{
1879 int ret = 0;
1880
1881 BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1882
1938 if (seqno == ring->outstanding_lazy_request) { 1883 if (seqno == ring->outstanding_lazy_request) {
1939 struct drm_i915_gem_request *request; 1884 struct drm_i915_gem_request *request;
1940 1885
@@ -1948,54 +1893,67 @@ i915_wait_request(struct intel_ring_buffer *ring,
1948 return ret; 1893 return ret;
1949 } 1894 }
1950 1895
1951 seqno = request->seqno; 1896 BUG_ON(seqno != request->seqno);
1952 } 1897 }
1953 1898
1954 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { 1899 return ret;
1955 if (HAS_PCH_SPLIT(ring->dev)) 1900}
1956 ier = I915_READ(DEIER) | I915_READ(GTIER);
1957 else
1958 ier = I915_READ(IER);
1959 if (!ier) {
1960 DRM_ERROR("something (likely vbetool) disabled "
1961 "interrupts, re-enabling\n");
1962 ring->dev->driver->irq_preinstall(ring->dev);
1963 ring->dev->driver->irq_postinstall(ring->dev);
1964 }
1965 1901
1966 trace_i915_gem_request_wait_begin(ring, seqno); 1902static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno,
1967 1903 bool interruptible)
1968 ring->waiting_seqno = seqno; 1904{
1969 if (ring->irq_get(ring)) { 1905 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1970 if (dev_priv->mm.interruptible) 1906 int ret = 0;
1971 ret = wait_event_interruptible(ring->irq_queue, 1907
1972 i915_seqno_passed(ring->get_seqno(ring), seqno) 1908 if (i915_seqno_passed(ring->get_seqno(ring), seqno))
1973 || atomic_read(&dev_priv->mm.wedged)); 1909 return 0;
1974 else 1910
1975 wait_event(ring->irq_queue, 1911 trace_i915_gem_request_wait_begin(ring, seqno);
1976 i915_seqno_passed(ring->get_seqno(ring), seqno) 1912 if (WARN_ON(!ring->irq_get(ring)))
1977 || atomic_read(&dev_priv->mm.wedged)); 1913 return -ENODEV;
1978 1914
1979 ring->irq_put(ring); 1915#define EXIT_COND \
1980 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), 1916 (i915_seqno_passed(ring->get_seqno(ring), seqno) || \
1981 seqno) || 1917 atomic_read(&dev_priv->mm.wedged))
1982 atomic_read(&dev_priv->mm.wedged), 3000)) 1918
1983 ret = -EBUSY; 1919 if (interruptible)
1984 ring->waiting_seqno = 0; 1920 ret = wait_event_interruptible(ring->irq_queue,
1985 1921 EXIT_COND);
1986 trace_i915_gem_request_wait_end(ring, seqno); 1922 else
1987 } 1923 wait_event(ring->irq_queue, EXIT_COND);
1924
1925 ring->irq_put(ring);
1926 trace_i915_gem_request_wait_end(ring, seqno);
1927#undef EXIT_COND
1928
1929 return ret;
1930}
1931
1932/**
1933 * Waits for a sequence number to be signaled, and cleans up the
1934 * request and object lists appropriately for that event.
1935 */
1936int
1937i915_wait_request(struct intel_ring_buffer *ring,
1938 uint32_t seqno)
1939{
1940 drm_i915_private_t *dev_priv = ring->dev->dev_private;
1941 int ret = 0;
1942
1943 BUG_ON(seqno == 0);
1944
1945 ret = i915_gem_check_wedge(dev_priv);
1946 if (ret)
1947 return ret;
1948
1949 ret = i915_gem_check_olr(ring, seqno);
1950 if (ret)
1951 return ret;
1952
1953 ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible);
1988 if (atomic_read(&dev_priv->mm.wedged)) 1954 if (atomic_read(&dev_priv->mm.wedged))
1989 ret = -EAGAIN; 1955 ret = -EAGAIN;
1990 1956
1991 /* Directly dispatch request retiring. While we have the work queue
1992 * to handle this, the waiter on a request often wants an associated
1993 * buffer to have made it to the inactive list, and we would need
1994 * a separate wait queue to handle that.
1995 */
1996 if (ret == 0 && do_retire)
1997 i915_gem_retire_requests_ring(ring);
1998
1999 return ret; 1957 return ret;
2000} 1958}
2001 1959
@@ -2017,15 +1975,58 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
2017 * it. 1975 * it.
2018 */ 1976 */
2019 if (obj->active) { 1977 if (obj->active) {
2020 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno, 1978 ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
2021 true);
2022 if (ret) 1979 if (ret)
2023 return ret; 1980 return ret;
1981 i915_gem_retire_requests_ring(obj->ring);
2024 } 1982 }
2025 1983
2026 return 0; 1984 return 0;
2027} 1985}
2028 1986
1987/**
1988 * i915_gem_object_sync - sync an object to a ring.
1989 *
1990 * @obj: object which may be in use on another ring.
1991 * @to: ring we wish to use the object on. May be NULL.
1992 *
1993 * This code is meant to abstract object synchronization with the GPU.
1994 * Calling with NULL implies synchronizing the object with the CPU
1995 * rather than a particular GPU ring.
1996 *
1997 * Returns 0 if successful, else propagates up the lower layer error.
1998 */
1999int
2000i915_gem_object_sync(struct drm_i915_gem_object *obj,
2001 struct intel_ring_buffer *to)
2002{
2003 struct intel_ring_buffer *from = obj->ring;
2004 u32 seqno;
2005 int ret, idx;
2006
2007 if (from == NULL || to == from)
2008 return 0;
2009
2010 if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
2011 return i915_gem_object_wait_rendering(obj);
2012
2013 idx = intel_ring_sync_index(from, to);
2014
2015 seqno = obj->last_rendering_seqno;
2016 if (seqno <= from->sync_seqno[idx])
2017 return 0;
2018
2019 ret = i915_gem_check_olr(obj->ring, seqno);
2020 if (ret)
2021 return ret;
2022
2023 ret = to->sync_to(to, from, seqno);
2024 if (!ret)
2025 from->sync_seqno[idx] = seqno;
2026
2027 return ret;
2028}
2029
2029static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) 2030static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2030{ 2031{
2031 u32 old_write_domain, old_read_domains; 2032 u32 old_write_domain, old_read_domains;
@@ -2068,7 +2069,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2068 } 2069 }
2069 2070
2070 ret = i915_gem_object_finish_gpu(obj); 2071 ret = i915_gem_object_finish_gpu(obj);
2071 if (ret == -ERESTARTSYS) 2072 if (ret)
2072 return ret; 2073 return ret;
2073 /* Continue on if we fail due to EIO, the GPU is hung so we 2074 /* Continue on if we fail due to EIO, the GPU is hung so we
2074 * should be safe and we need to cleanup or else we might 2075 * should be safe and we need to cleanup or else we might
@@ -2095,16 +2096,18 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2095 2096
2096 /* release the fence reg _after_ flushing */ 2097 /* release the fence reg _after_ flushing */
2097 ret = i915_gem_object_put_fence(obj); 2098 ret = i915_gem_object_put_fence(obj);
2098 if (ret == -ERESTARTSYS) 2099 if (ret)
2099 return ret; 2100 return ret;
2100 2101
2101 trace_i915_gem_object_unbind(obj); 2102 trace_i915_gem_object_unbind(obj);
2102 2103
2103 i915_gem_gtt_unbind_object(obj); 2104 if (obj->has_global_gtt_mapping)
2105 i915_gem_gtt_unbind_object(obj);
2104 if (obj->has_aliasing_ppgtt_mapping) { 2106 if (obj->has_aliasing_ppgtt_mapping) {
2105 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); 2107 i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
2106 obj->has_aliasing_ppgtt_mapping = 0; 2108 obj->has_aliasing_ppgtt_mapping = 0;
2107 } 2109 }
2110 i915_gem_gtt_finish_object(obj);
2108 2111
2109 i915_gem_object_put_pages_gtt(obj); 2112 i915_gem_object_put_pages_gtt(obj);
2110 2113
@@ -2145,7 +2148,7 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring,
2145 return 0; 2148 return 0;
2146} 2149}
2147 2150
2148static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire) 2151static int i915_ring_idle(struct intel_ring_buffer *ring)
2149{ 2152{
2150 int ret; 2153 int ret;
2151 2154
@@ -2159,208 +2162,201 @@ static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
2159 return ret; 2162 return ret;
2160 } 2163 }
2161 2164
2162 return i915_wait_request(ring, i915_gem_next_request_seqno(ring), 2165 return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
2163 do_retire);
2164} 2166}
2165 2167
2166int i915_gpu_idle(struct drm_device *dev, bool do_retire) 2168int i915_gpu_idle(struct drm_device *dev)
2167{ 2169{
2168 drm_i915_private_t *dev_priv = dev->dev_private; 2170 drm_i915_private_t *dev_priv = dev->dev_private;
2171 struct intel_ring_buffer *ring;
2169 int ret, i; 2172 int ret, i;
2170 2173
2171 /* Flush everything onto the inactive list. */ 2174 /* Flush everything onto the inactive list. */
2172 for (i = 0; i < I915_NUM_RINGS; i++) { 2175 for_each_ring(ring, dev_priv, i) {
2173 ret = i915_ring_idle(&dev_priv->ring[i], do_retire); 2176 ret = i915_ring_idle(ring);
2174 if (ret) 2177 if (ret)
2175 return ret; 2178 return ret;
2179
2180 /* Is the device fubar? */
2181 if (WARN_ON(!list_empty(&ring->gpu_write_list)))
2182 return -EBUSY;
2176 } 2183 }
2177 2184
2178 return 0; 2185 return 0;
2179} 2186}
2180 2187
2181static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, 2188static void sandybridge_write_fence_reg(struct drm_device *dev, int reg,
2182 struct intel_ring_buffer *pipelined) 2189 struct drm_i915_gem_object *obj)
2183{ 2190{
2184 struct drm_device *dev = obj->base.dev;
2185 drm_i915_private_t *dev_priv = dev->dev_private; 2191 drm_i915_private_t *dev_priv = dev->dev_private;
2186 u32 size = obj->gtt_space->size;
2187 int regnum = obj->fence_reg;
2188 uint64_t val; 2192 uint64_t val;
2189 2193
2190 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2194 if (obj) {
2191 0xfffff000) << 32; 2195 u32 size = obj->gtt_space->size;
2192 val |= obj->gtt_offset & 0xfffff000;
2193 val |= (uint64_t)((obj->stride / 128) - 1) <<
2194 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2195 2196
2196 if (obj->tiling_mode == I915_TILING_Y) 2197 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2197 val |= 1 << I965_FENCE_TILING_Y_SHIFT; 2198 0xfffff000) << 32;
2198 val |= I965_FENCE_REG_VALID; 2199 val |= obj->gtt_offset & 0xfffff000;
2200 val |= (uint64_t)((obj->stride / 128) - 1) <<
2201 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2199 2202
2200 if (pipelined) { 2203 if (obj->tiling_mode == I915_TILING_Y)
2201 int ret = intel_ring_begin(pipelined, 6); 2204 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2202 if (ret) 2205 val |= I965_FENCE_REG_VALID;
2203 return ret;
2204
2205 intel_ring_emit(pipelined, MI_NOOP);
2206 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2207 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2208 intel_ring_emit(pipelined, (u32)val);
2209 intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2210 intel_ring_emit(pipelined, (u32)(val >> 32));
2211 intel_ring_advance(pipelined);
2212 } else 2206 } else
2213 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); 2207 val = 0;
2214 2208
2215 return 0; 2209 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val);
2210 POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8);
2216} 2211}
2217 2212
2218static int i965_write_fence_reg(struct drm_i915_gem_object *obj, 2213static void i965_write_fence_reg(struct drm_device *dev, int reg,
2219 struct intel_ring_buffer *pipelined) 2214 struct drm_i915_gem_object *obj)
2220{ 2215{
2221 struct drm_device *dev = obj->base.dev;
2222 drm_i915_private_t *dev_priv = dev->dev_private; 2216 drm_i915_private_t *dev_priv = dev->dev_private;
2223 u32 size = obj->gtt_space->size;
2224 int regnum = obj->fence_reg;
2225 uint64_t val; 2217 uint64_t val;
2226 2218
2227 val = (uint64_t)((obj->gtt_offset + size - 4096) & 2219 if (obj) {
2228 0xfffff000) << 32; 2220 u32 size = obj->gtt_space->size;
2229 val |= obj->gtt_offset & 0xfffff000;
2230 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2231 if (obj->tiling_mode == I915_TILING_Y)
2232 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2233 val |= I965_FENCE_REG_VALID;
2234 2221
2235 if (pipelined) { 2222 val = (uint64_t)((obj->gtt_offset + size - 4096) &
2236 int ret = intel_ring_begin(pipelined, 6); 2223 0xfffff000) << 32;
2237 if (ret) 2224 val |= obj->gtt_offset & 0xfffff000;
2238 return ret; 2225 val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2239 2226 if (obj->tiling_mode == I915_TILING_Y)
2240 intel_ring_emit(pipelined, MI_NOOP); 2227 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2241 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); 2228 val |= I965_FENCE_REG_VALID;
2242 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2243 intel_ring_emit(pipelined, (u32)val);
2244 intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2245 intel_ring_emit(pipelined, (u32)(val >> 32));
2246 intel_ring_advance(pipelined);
2247 } else 2229 } else
2248 I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); 2230 val = 0;
2249 2231
2250 return 0; 2232 I915_WRITE64(FENCE_REG_965_0 + reg * 8, val);
2233 POSTING_READ(FENCE_REG_965_0 + reg * 8);
2251} 2234}
2252 2235
2253static int i915_write_fence_reg(struct drm_i915_gem_object *obj, 2236static void i915_write_fence_reg(struct drm_device *dev, int reg,
2254 struct intel_ring_buffer *pipelined) 2237 struct drm_i915_gem_object *obj)
2255{ 2238{
2256 struct drm_device *dev = obj->base.dev;
2257 drm_i915_private_t *dev_priv = dev->dev_private; 2239 drm_i915_private_t *dev_priv = dev->dev_private;
2258 u32 size = obj->gtt_space->size; 2240 u32 val;
2259 u32 fence_reg, val, pitch_val;
2260 int tile_width;
2261
2262 if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2263 (size & -size) != size ||
2264 (obj->gtt_offset & (size - 1)),
2265 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2266 obj->gtt_offset, obj->map_and_fenceable, size))
2267 return -EINVAL;
2268 2241
2269 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) 2242 if (obj) {
2270 tile_width = 128; 2243 u32 size = obj->gtt_space->size;
2271 else 2244 int pitch_val;
2272 tile_width = 512; 2245 int tile_width;
2273
2274 /* Note: pitch better be a power of two tile widths */
2275 pitch_val = obj->stride / tile_width;
2276 pitch_val = ffs(pitch_val) - 1;
2277
2278 val = obj->gtt_offset;
2279 if (obj->tiling_mode == I915_TILING_Y)
2280 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2281 val |= I915_FENCE_SIZE_BITS(size);
2282 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2283 val |= I830_FENCE_REG_VALID;
2284
2285 fence_reg = obj->fence_reg;
2286 if (fence_reg < 8)
2287 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2288 else
2289 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2290 2246
2291 if (pipelined) { 2247 WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2292 int ret = intel_ring_begin(pipelined, 4); 2248 (size & -size) != size ||
2293 if (ret) 2249 (obj->gtt_offset & (size - 1)),
2294 return ret; 2250 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2251 obj->gtt_offset, obj->map_and_fenceable, size);
2295 2252
2296 intel_ring_emit(pipelined, MI_NOOP); 2253 if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2297 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2254 tile_width = 128;
2298 intel_ring_emit(pipelined, fence_reg); 2255 else
2299 intel_ring_emit(pipelined, val); 2256 tile_width = 512;
2300 intel_ring_advance(pipelined); 2257
2258 /* Note: pitch better be a power of two tile widths */
2259 pitch_val = obj->stride / tile_width;
2260 pitch_val = ffs(pitch_val) - 1;
2261
2262 val = obj->gtt_offset;
2263 if (obj->tiling_mode == I915_TILING_Y)
2264 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2265 val |= I915_FENCE_SIZE_BITS(size);
2266 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2267 val |= I830_FENCE_REG_VALID;
2301 } else 2268 } else
2302 I915_WRITE(fence_reg, val); 2269 val = 0;
2303 2270
2304 return 0; 2271 if (reg < 8)
2272 reg = FENCE_REG_830_0 + reg * 4;
2273 else
2274 reg = FENCE_REG_945_8 + (reg - 8) * 4;
2275
2276 I915_WRITE(reg, val);
2277 POSTING_READ(reg);
2305} 2278}
2306 2279
2307static int i830_write_fence_reg(struct drm_i915_gem_object *obj, 2280static void i830_write_fence_reg(struct drm_device *dev, int reg,
2308 struct intel_ring_buffer *pipelined) 2281 struct drm_i915_gem_object *obj)
2309{ 2282{
2310 struct drm_device *dev = obj->base.dev;
2311 drm_i915_private_t *dev_priv = dev->dev_private; 2283 drm_i915_private_t *dev_priv = dev->dev_private;
2312 u32 size = obj->gtt_space->size;
2313 int regnum = obj->fence_reg;
2314 uint32_t val; 2284 uint32_t val;
2315 uint32_t pitch_val;
2316 2285
2317 if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || 2286 if (obj) {
2318 (size & -size) != size || 2287 u32 size = obj->gtt_space->size;
2319 (obj->gtt_offset & (size - 1)), 2288 uint32_t pitch_val;
2320 "object 0x%08x not 512K or pot-size 0x%08x aligned\n", 2289
2321 obj->gtt_offset, size)) 2290 WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2322 return -EINVAL; 2291 (size & -size) != size ||
2323 2292 (obj->gtt_offset & (size - 1)),
2324 pitch_val = obj->stride / 128; 2293 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2325 pitch_val = ffs(pitch_val) - 1; 2294 obj->gtt_offset, size);
2326 2295
2327 val = obj->gtt_offset; 2296 pitch_val = obj->stride / 128;
2328 if (obj->tiling_mode == I915_TILING_Y) 2297 pitch_val = ffs(pitch_val) - 1;
2329 val |= 1 << I830_FENCE_TILING_Y_SHIFT; 2298
2330 val |= I830_FENCE_SIZE_BITS(size); 2299 val = obj->gtt_offset;
2331 val |= pitch_val << I830_FENCE_PITCH_SHIFT; 2300 if (obj->tiling_mode == I915_TILING_Y)
2332 val |= I830_FENCE_REG_VALID; 2301 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2302 val |= I830_FENCE_SIZE_BITS(size);
2303 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2304 val |= I830_FENCE_REG_VALID;
2305 } else
2306 val = 0;
2333 2307
2334 if (pipelined) { 2308 I915_WRITE(FENCE_REG_830_0 + reg * 4, val);
2335 int ret = intel_ring_begin(pipelined, 4); 2309 POSTING_READ(FENCE_REG_830_0 + reg * 4);
2336 if (ret) 2310}
2337 return ret;
2338 2311
2339 intel_ring_emit(pipelined, MI_NOOP); 2312static void i915_gem_write_fence(struct drm_device *dev, int reg,
2340 intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); 2313 struct drm_i915_gem_object *obj)
2341 intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); 2314{
2342 intel_ring_emit(pipelined, val); 2315 switch (INTEL_INFO(dev)->gen) {
2343 intel_ring_advance(pipelined); 2316 case 7:
2344 } else 2317 case 6: sandybridge_write_fence_reg(dev, reg, obj); break;
2345 I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); 2318 case 5:
2319 case 4: i965_write_fence_reg(dev, reg, obj); break;
2320 case 3: i915_write_fence_reg(dev, reg, obj); break;
2321 case 2: i830_write_fence_reg(dev, reg, obj); break;
2322 default: break;
2323 }
2324}
2346 2325
2347 return 0; 2326static inline int fence_number(struct drm_i915_private *dev_priv,
2327 struct drm_i915_fence_reg *fence)
2328{
2329 return fence - dev_priv->fence_regs;
2348} 2330}
2349 2331
2350static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) 2332static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
2333 struct drm_i915_fence_reg *fence,
2334 bool enable)
2351{ 2335{
2352 return i915_seqno_passed(ring->get_seqno(ring), seqno); 2336 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2337 int reg = fence_number(dev_priv, fence);
2338
2339 i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
2340
2341 if (enable) {
2342 obj->fence_reg = reg;
2343 fence->obj = obj;
2344 list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
2345 } else {
2346 obj->fence_reg = I915_FENCE_REG_NONE;
2347 fence->obj = NULL;
2348 list_del_init(&fence->lru_list);
2349 }
2353} 2350}
2354 2351
2355static int 2352static int
2356i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, 2353i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
2357 struct intel_ring_buffer *pipelined)
2358{ 2354{
2359 int ret; 2355 int ret;
2360 2356
2361 if (obj->fenced_gpu_access) { 2357 if (obj->fenced_gpu_access) {
2362 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 2358 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2363 ret = i915_gem_flush_ring(obj->last_fenced_ring, 2359 ret = i915_gem_flush_ring(obj->ring,
2364 0, obj->base.write_domain); 2360 0, obj->base.write_domain);
2365 if (ret) 2361 if (ret)
2366 return ret; 2362 return ret;
@@ -2369,18 +2365,12 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2369 obj->fenced_gpu_access = false; 2365 obj->fenced_gpu_access = false;
2370 } 2366 }
2371 2367
2372 if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { 2368 if (obj->last_fenced_seqno) {
2373 if (!ring_passed_seqno(obj->last_fenced_ring, 2369 ret = i915_wait_request(obj->ring, obj->last_fenced_seqno);
2374 obj->last_fenced_seqno)) { 2370 if (ret)
2375 ret = i915_wait_request(obj->last_fenced_ring, 2371 return ret;
2376 obj->last_fenced_seqno,
2377 true);
2378 if (ret)
2379 return ret;
2380 }
2381 2372
2382 obj->last_fenced_seqno = 0; 2373 obj->last_fenced_seqno = 0;
2383 obj->last_fenced_ring = NULL;
2384 } 2374 }
2385 2375
2386 /* Ensure that all CPU reads are completed before installing a fence 2376 /* Ensure that all CPU reads are completed before installing a fence
@@ -2395,34 +2385,29 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2395int 2385int
2396i915_gem_object_put_fence(struct drm_i915_gem_object *obj) 2386i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2397{ 2387{
2388 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2398 int ret; 2389 int ret;
2399 2390
2400 if (obj->tiling_mode) 2391 ret = i915_gem_object_flush_fence(obj);
2401 i915_gem_release_mmap(obj);
2402
2403 ret = i915_gem_object_flush_fence(obj, NULL);
2404 if (ret) 2392 if (ret)
2405 return ret; 2393 return ret;
2406 2394
2407 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2395 if (obj->fence_reg == I915_FENCE_REG_NONE)
2408 struct drm_i915_private *dev_priv = obj->base.dev->dev_private; 2396 return 0;
2409
2410 WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);
2411 i915_gem_clear_fence_reg(obj->base.dev,
2412 &dev_priv->fence_regs[obj->fence_reg]);
2413 2397
2414 obj->fence_reg = I915_FENCE_REG_NONE; 2398 i915_gem_object_update_fence(obj,
2415 } 2399 &dev_priv->fence_regs[obj->fence_reg],
2400 false);
2401 i915_gem_object_fence_lost(obj);
2416 2402
2417 return 0; 2403 return 0;
2418} 2404}
2419 2405
2420static struct drm_i915_fence_reg * 2406static struct drm_i915_fence_reg *
2421i915_find_fence_reg(struct drm_device *dev, 2407i915_find_fence_reg(struct drm_device *dev)
2422 struct intel_ring_buffer *pipelined)
2423{ 2408{
2424 struct drm_i915_private *dev_priv = dev->dev_private; 2409 struct drm_i915_private *dev_priv = dev->dev_private;
2425 struct drm_i915_fence_reg *reg, *first, *avail; 2410 struct drm_i915_fence_reg *reg, *avail;
2426 int i; 2411 int i;
2427 2412
2428 /* First try to find a free reg */ 2413 /* First try to find a free reg */
@@ -2440,204 +2425,77 @@ i915_find_fence_reg(struct drm_device *dev,
2440 return NULL; 2425 return NULL;
2441 2426
2442 /* None available, try to steal one or wait for a user to finish */ 2427 /* None available, try to steal one or wait for a user to finish */
2443 avail = first = NULL;
2444 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { 2428 list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2445 if (reg->pin_count) 2429 if (reg->pin_count)
2446 continue; 2430 continue;
2447 2431
2448 if (first == NULL) 2432 return reg;
2449 first = reg;
2450
2451 if (!pipelined ||
2452 !reg->obj->last_fenced_ring ||
2453 reg->obj->last_fenced_ring == pipelined) {
2454 avail = reg;
2455 break;
2456 }
2457 } 2433 }
2458 2434
2459 if (avail == NULL) 2435 return NULL;
2460 avail = first;
2461
2462 return avail;
2463} 2436}
2464 2437
2465/** 2438/**
2466 * i915_gem_object_get_fence - set up a fence reg for an object 2439 * i915_gem_object_get_fence - set up fencing for an object
2467 * @obj: object to map through a fence reg 2440 * @obj: object to map through a fence reg
2468 * @pipelined: ring on which to queue the change, or NULL for CPU access
2469 * @interruptible: must we wait uninterruptibly for the register to retire?
2470 * 2441 *
2471 * When mapping objects through the GTT, userspace wants to be able to write 2442 * When mapping objects through the GTT, userspace wants to be able to write
2472 * to them without having to worry about swizzling if the object is tiled. 2443 * to them without having to worry about swizzling if the object is tiled.
2473 *
2474 * This function walks the fence regs looking for a free one for @obj, 2444 * This function walks the fence regs looking for a free one for @obj,
2475 * stealing one if it can't find any. 2445 * stealing one if it can't find any.
2476 * 2446 *
2477 * It then sets up the reg based on the object's properties: address, pitch 2447 * It then sets up the reg based on the object's properties: address, pitch
2478 * and tiling format. 2448 * and tiling format.
2449 *
2450 * For an untiled surface, this removes any existing fence.
2479 */ 2451 */
2480int 2452int
2481i915_gem_object_get_fence(struct drm_i915_gem_object *obj, 2453i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
2482 struct intel_ring_buffer *pipelined)
2483{ 2454{
2484 struct drm_device *dev = obj->base.dev; 2455 struct drm_device *dev = obj->base.dev;
2485 struct drm_i915_private *dev_priv = dev->dev_private; 2456 struct drm_i915_private *dev_priv = dev->dev_private;
2457 bool enable = obj->tiling_mode != I915_TILING_NONE;
2486 struct drm_i915_fence_reg *reg; 2458 struct drm_i915_fence_reg *reg;
2487 int ret; 2459 int ret;
2488 2460
2489 /* XXX disable pipelining. There are bugs. Shocking. */ 2461 /* Have we updated the tiling parameters upon the object and so
2490 pipelined = NULL; 2462 * will need to serialise the write to the associated fence register?
2463 */
2464 if (obj->fence_dirty) {
2465 ret = i915_gem_object_flush_fence(obj);
2466 if (ret)
2467 return ret;
2468 }
2491 2469
2492 /* Just update our place in the LRU if our fence is getting reused. */ 2470 /* Just update our place in the LRU if our fence is getting reused. */
2493 if (obj->fence_reg != I915_FENCE_REG_NONE) { 2471 if (obj->fence_reg != I915_FENCE_REG_NONE) {
2494 reg = &dev_priv->fence_regs[obj->fence_reg]; 2472 reg = &dev_priv->fence_regs[obj->fence_reg];
2495 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list); 2473 if (!obj->fence_dirty) {
2496 2474 list_move_tail(&reg->lru_list,
2497 if (obj->tiling_changed) { 2475 &dev_priv->mm.fence_list);
2498 ret = i915_gem_object_flush_fence(obj, pipelined); 2476 return 0;
2499 if (ret)
2500 return ret;
2501
2502 if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2503 pipelined = NULL;
2504
2505 if (pipelined) {
2506 reg->setup_seqno =
2507 i915_gem_next_request_seqno(pipelined);
2508 obj->last_fenced_seqno = reg->setup_seqno;
2509 obj->last_fenced_ring = pipelined;
2510 }
2511
2512 goto update;
2513 } 2477 }
2478 } else if (enable) {
2479 reg = i915_find_fence_reg(dev);
2480 if (reg == NULL)
2481 return -EDEADLK;
2514 2482
2515 if (!pipelined) { 2483 if (reg->obj) {
2516 if (reg->setup_seqno) { 2484 struct drm_i915_gem_object *old = reg->obj;
2517 if (!ring_passed_seqno(obj->last_fenced_ring,
2518 reg->setup_seqno)) {
2519 ret = i915_wait_request(obj->last_fenced_ring,
2520 reg->setup_seqno,
2521 true);
2522 if (ret)
2523 return ret;
2524 }
2525 2485
2526 reg->setup_seqno = 0; 2486 ret = i915_gem_object_flush_fence(old);
2527 }
2528 } else if (obj->last_fenced_ring &&
2529 obj->last_fenced_ring != pipelined) {
2530 ret = i915_gem_object_flush_fence(obj, pipelined);
2531 if (ret) 2487 if (ret)
2532 return ret; 2488 return ret;
2533 }
2534
2535 return 0;
2536 }
2537
2538 reg = i915_find_fence_reg(dev, pipelined);
2539 if (reg == NULL)
2540 return -EDEADLK;
2541
2542 ret = i915_gem_object_flush_fence(obj, pipelined);
2543 if (ret)
2544 return ret;
2545
2546 if (reg->obj) {
2547 struct drm_i915_gem_object *old = reg->obj;
2548
2549 drm_gem_object_reference(&old->base);
2550
2551 if (old->tiling_mode)
2552 i915_gem_release_mmap(old);
2553 2489
2554 ret = i915_gem_object_flush_fence(old, pipelined); 2490 i915_gem_object_fence_lost(old);
2555 if (ret) {
2556 drm_gem_object_unreference(&old->base);
2557 return ret;
2558 } 2491 }
2492 } else
2493 return 0;
2559 2494
2560 if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) 2495 i915_gem_object_update_fence(obj, reg, enable);
2561 pipelined = NULL; 2496 obj->fence_dirty = false;
2562
2563 old->fence_reg = I915_FENCE_REG_NONE;
2564 old->last_fenced_ring = pipelined;
2565 old->last_fenced_seqno =
2566 pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2567
2568 drm_gem_object_unreference(&old->base);
2569 } else if (obj->last_fenced_seqno == 0)
2570 pipelined = NULL;
2571
2572 reg->obj = obj;
2573 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2574 obj->fence_reg = reg - dev_priv->fence_regs;
2575 obj->last_fenced_ring = pipelined;
2576
2577 reg->setup_seqno =
2578 pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2579 obj->last_fenced_seqno = reg->setup_seqno;
2580
2581update:
2582 obj->tiling_changed = false;
2583 switch (INTEL_INFO(dev)->gen) {
2584 case 7:
2585 case 6:
2586 ret = sandybridge_write_fence_reg(obj, pipelined);
2587 break;
2588 case 5:
2589 case 4:
2590 ret = i965_write_fence_reg(obj, pipelined);
2591 break;
2592 case 3:
2593 ret = i915_write_fence_reg(obj, pipelined);
2594 break;
2595 case 2:
2596 ret = i830_write_fence_reg(obj, pipelined);
2597 break;
2598 }
2599
2600 return ret;
2601}
2602
2603/**
2604 * i915_gem_clear_fence_reg - clear out fence register info
2605 * @obj: object to clear
2606 *
2607 * Zeroes out the fence register itself and clears out the associated
2608 * data structures in dev_priv and obj.
2609 */
2610static void
2611i915_gem_clear_fence_reg(struct drm_device *dev,
2612 struct drm_i915_fence_reg *reg)
2613{
2614 drm_i915_private_t *dev_priv = dev->dev_private;
2615 uint32_t fence_reg = reg - dev_priv->fence_regs;
2616
2617 switch (INTEL_INFO(dev)->gen) {
2618 case 7:
2619 case 6:
2620 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2621 break;
2622 case 5:
2623 case 4:
2624 I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2625 break;
2626 case 3:
2627 if (fence_reg >= 8)
2628 fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2629 else
2630 case 2:
2631 fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2632
2633 I915_WRITE(fence_reg, 0);
2634 break;
2635 }
2636 2497
2637 list_del_init(&reg->lru_list); 2498 return 0;
2638 reg->obj = NULL;
2639 reg->setup_seqno = 0;
2640 reg->pin_count = 0;
2641} 2499}
2642 2500
2643/** 2501/**
@@ -2749,7 +2607,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2749 return ret; 2607 return ret;
2750 } 2608 }
2751 2609
2752 ret = i915_gem_gtt_bind_object(obj); 2610 ret = i915_gem_gtt_prepare_object(obj);
2753 if (ret) { 2611 if (ret) {
2754 i915_gem_object_put_pages_gtt(obj); 2612 i915_gem_object_put_pages_gtt(obj);
2755 drm_mm_put_block(obj->gtt_space); 2613 drm_mm_put_block(obj->gtt_space);
@@ -2761,6 +2619,9 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2761 goto search_free; 2619 goto search_free;
2762 } 2620 }
2763 2621
2622 if (!dev_priv->mm.aliasing_ppgtt)
2623 i915_gem_gtt_bind_object(obj, obj->cache_level);
2624
2764 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2625 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2765 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2626 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2766 2627
@@ -2878,6 +2739,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2878int 2739int
2879i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) 2740i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2880{ 2741{
2742 drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
2881 uint32_t old_write_domain, old_read_domains; 2743 uint32_t old_write_domain, old_read_domains;
2882 int ret; 2744 int ret;
2883 2745
@@ -2918,6 +2780,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2918 old_read_domains, 2780 old_read_domains,
2919 old_write_domain); 2781 old_write_domain);
2920 2782
2783 /* And bump the LRU for this access */
2784 if (i915_gem_object_is_inactive(obj))
2785 list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2786
2921 return 0; 2787 return 0;
2922} 2788}
2923 2789
@@ -2953,7 +2819,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2953 return ret; 2819 return ret;
2954 } 2820 }
2955 2821
2956 i915_gem_gtt_rebind_object(obj, cache_level); 2822 if (obj->has_global_gtt_mapping)
2823 i915_gem_gtt_bind_object(obj, cache_level);
2957 if (obj->has_aliasing_ppgtt_mapping) 2824 if (obj->has_aliasing_ppgtt_mapping)
2958 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, 2825 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
2959 obj, cache_level); 2826 obj, cache_level);
@@ -2990,11 +2857,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2990 * Prepare buffer for display plane (scanout, cursors, etc). 2857 * Prepare buffer for display plane (scanout, cursors, etc).
2991 * Can be called from an uninterruptible phase (modesetting) and allows 2858 * Can be called from an uninterruptible phase (modesetting) and allows
2992 * any flushes to be pipelined (for pageflips). 2859 * any flushes to be pipelined (for pageflips).
2993 *
2994 * For the display plane, we want to be in the GTT but out of any write
2995 * domains. So in many ways this looks like set_to_gtt_domain() apart from the
2996 * ability to pipeline the waits, pinning and any additional subtleties
2997 * that may differentiate the display plane from ordinary buffers.
2998 */ 2860 */
2999int 2861int
3000i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, 2862i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
@@ -3009,8 +2871,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3009 return ret; 2871 return ret;
3010 2872
3011 if (pipelined != obj->ring) { 2873 if (pipelined != obj->ring) {
3012 ret = i915_gem_object_wait_rendering(obj); 2874 ret = i915_gem_object_sync(obj, pipelined);
3013 if (ret == -ERESTARTSYS) 2875 if (ret)
3014 return ret; 2876 return ret;
3015 } 2877 }
3016 2878
@@ -3082,7 +2944,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3082 * This function returns when the move is complete, including waiting on 2944 * This function returns when the move is complete, including waiting on
3083 * flushes to occur. 2945 * flushes to occur.
3084 */ 2946 */
3085static int 2947int
3086i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) 2948i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3087{ 2949{
3088 uint32_t old_write_domain, old_read_domains; 2950 uint32_t old_write_domain, old_read_domains;
@@ -3095,17 +2957,14 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3095 if (ret) 2957 if (ret)
3096 return ret; 2958 return ret;
3097 2959
3098 ret = i915_gem_object_wait_rendering(obj); 2960 if (write || obj->pending_gpu_write) {
3099 if (ret) 2961 ret = i915_gem_object_wait_rendering(obj);
3100 return ret; 2962 if (ret)
2963 return ret;
2964 }
3101 2965
3102 i915_gem_object_flush_gtt_write_domain(obj); 2966 i915_gem_object_flush_gtt_write_domain(obj);
3103 2967
3104 /* If we have a partially-valid cache of the object in the CPU,
3105 * finish invalidating it and free the per-page flags.
3106 */
3107 i915_gem_object_set_to_full_cpu_read_domain(obj);
3108
3109 old_write_domain = obj->base.write_domain; 2968 old_write_domain = obj->base.write_domain;
3110 old_read_domains = obj->base.read_domains; 2969 old_read_domains = obj->base.read_domains;
3111 2970
@@ -3136,113 +2995,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3136 return 0; 2995 return 0;
3137} 2996}
3138 2997
3139/**
3140 * Moves the object from a partially CPU read to a full one.
3141 *
3142 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3143 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3144 */
3145static void
3146i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3147{
3148 if (!obj->page_cpu_valid)
3149 return;
3150
3151 /* If we're partially in the CPU read domain, finish moving it in.
3152 */
3153 if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3154 int i;
3155
3156 for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3157 if (obj->page_cpu_valid[i])
3158 continue;
3159 drm_clflush_pages(obj->pages + i, 1);
3160 }
3161 }
3162
3163 /* Free the page_cpu_valid mappings which are now stale, whether
3164 * or not we've got I915_GEM_DOMAIN_CPU.
3165 */
3166 kfree(obj->page_cpu_valid);
3167 obj->page_cpu_valid = NULL;
3168}
3169
3170/**
3171 * Set the CPU read domain on a range of the object.
3172 *
3173 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3174 * not entirely valid. The page_cpu_valid member of the object flags which
3175 * pages have been flushed, and will be respected by
3176 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3177 * of the whole object.
3178 *
3179 * This function returns when the move is complete, including waiting on
3180 * flushes to occur.
3181 */
3182static int
3183i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3184 uint64_t offset, uint64_t size)
3185{
3186 uint32_t old_read_domains;
3187 int i, ret;
3188
3189 if (offset == 0 && size == obj->base.size)
3190 return i915_gem_object_set_to_cpu_domain(obj, 0);
3191
3192 ret = i915_gem_object_flush_gpu_write_domain(obj);
3193 if (ret)
3194 return ret;
3195
3196 ret = i915_gem_object_wait_rendering(obj);
3197 if (ret)
3198 return ret;
3199
3200 i915_gem_object_flush_gtt_write_domain(obj);
3201
3202 /* If we're already fully in the CPU read domain, we're done. */
3203 if (obj->page_cpu_valid == NULL &&
3204 (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3205 return 0;
3206
3207 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3208 * newly adding I915_GEM_DOMAIN_CPU
3209 */
3210 if (obj->page_cpu_valid == NULL) {
3211 obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3212 GFP_KERNEL);
3213 if (obj->page_cpu_valid == NULL)
3214 return -ENOMEM;
3215 } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3216 memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3217
3218 /* Flush the cache on any pages that are still invalid from the CPU's
3219 * perspective.
3220 */
3221 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3222 i++) {
3223 if (obj->page_cpu_valid[i])
3224 continue;
3225
3226 drm_clflush_pages(obj->pages + i, 1);
3227
3228 obj->page_cpu_valid[i] = 1;
3229 }
3230
3231 /* It should now be out of any other write domains, and we can update
3232 * the domain values for our changes.
3233 */
3234 BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3235
3236 old_read_domains = obj->base.read_domains;
3237 obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3238
3239 trace_i915_gem_object_change_domain(obj,
3240 old_read_domains,
3241 obj->base.write_domain);
3242
3243 return 0;
3244}
3245
3246/* Throttle our rendering by waiting until the ring has completed our requests 2998/* Throttle our rendering by waiting until the ring has completed our requests
3247 * emitted over 20 msec ago. 2999 * emitted over 20 msec ago.
3248 * 3000 *
@@ -3280,28 +3032,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3280 if (seqno == 0) 3032 if (seqno == 0)
3281 return 0; 3033 return 0;
3282 3034
3283 ret = 0; 3035 ret = __wait_seqno(ring, seqno, true);
3284 if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3285 /* And wait for the seqno passing without holding any locks and
3286 * causing extra latency for others. This is safe as the irq
3287 * generation is designed to be run atomically and so is
3288 * lockless.
3289 */
3290 if (ring->irq_get(ring)) {
3291 ret = wait_event_interruptible(ring->irq_queue,
3292 i915_seqno_passed(ring->get_seqno(ring), seqno)
3293 || atomic_read(&dev_priv->mm.wedged));
3294 ring->irq_put(ring);
3295
3296 if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3297 ret = -EIO;
3298 } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
3299 seqno) ||
3300 atomic_read(&dev_priv->mm.wedged), 3000)) {
3301 ret = -EBUSY;
3302 }
3303 }
3304
3305 if (ret == 0) 3036 if (ret == 0)
3306 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); 3037 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3307 3038
@@ -3313,12 +3044,9 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
3313 uint32_t alignment, 3044 uint32_t alignment,
3314 bool map_and_fenceable) 3045 bool map_and_fenceable)
3315{ 3046{
3316 struct drm_device *dev = obj->base.dev;
3317 struct drm_i915_private *dev_priv = dev->dev_private;
3318 int ret; 3047 int ret;
3319 3048
3320 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); 3049 BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3321 WARN_ON(i915_verify_lists(dev));
3322 3050
3323 if (obj->gtt_space != NULL) { 3051 if (obj->gtt_space != NULL) {
3324 if ((alignment && obj->gtt_offset & (alignment - 1)) || 3052 if ((alignment && obj->gtt_offset & (alignment - 1)) ||
@@ -3343,34 +3071,23 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
3343 return ret; 3071 return ret;
3344 } 3072 }
3345 3073
3346 if (obj->pin_count++ == 0) { 3074 if (!obj->has_global_gtt_mapping && map_and_fenceable)
3347 if (!obj->active) 3075 i915_gem_gtt_bind_object(obj, obj->cache_level);
3348 list_move_tail(&obj->mm_list, 3076
3349 &dev_priv->mm.pinned_list); 3077 obj->pin_count++;
3350 }
3351 obj->pin_mappable |= map_and_fenceable; 3078 obj->pin_mappable |= map_and_fenceable;
3352 3079
3353 WARN_ON(i915_verify_lists(dev));
3354 return 0; 3080 return 0;
3355} 3081}
3356 3082
3357void 3083void
3358i915_gem_object_unpin(struct drm_i915_gem_object *obj) 3084i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3359{ 3085{
3360 struct drm_device *dev = obj->base.dev;
3361 drm_i915_private_t *dev_priv = dev->dev_private;
3362
3363 WARN_ON(i915_verify_lists(dev));
3364 BUG_ON(obj->pin_count == 0); 3086 BUG_ON(obj->pin_count == 0);
3365 BUG_ON(obj->gtt_space == NULL); 3087 BUG_ON(obj->gtt_space == NULL);
3366 3088
3367 if (--obj->pin_count == 0) { 3089 if (--obj->pin_count == 0)
3368 if (!obj->active)
3369 list_move_tail(&obj->mm_list,
3370 &dev_priv->mm.inactive_list);
3371 obj->pin_mappable = false; 3090 obj->pin_mappable = false;
3372 }
3373 WARN_ON(i915_verify_lists(dev));
3374} 3091}
3375 3092
3376int 3093int
@@ -3494,20 +3211,9 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3494 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { 3211 if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3495 ret = i915_gem_flush_ring(obj->ring, 3212 ret = i915_gem_flush_ring(obj->ring,
3496 0, obj->base.write_domain); 3213 0, obj->base.write_domain);
3497 } else if (obj->ring->outstanding_lazy_request == 3214 } else {
3498 obj->last_rendering_seqno) { 3215 ret = i915_gem_check_olr(obj->ring,
3499 struct drm_i915_gem_request *request; 3216 obj->last_rendering_seqno);
3500
3501 /* This ring is not being cleared by active usage,
3502 * so emit a request to do so.
3503 */
3504 request = kzalloc(sizeof(*request), GFP_KERNEL);
3505 if (request) {
3506 ret = i915_add_request(obj->ring, NULL, request);
3507 if (ret)
3508 kfree(request);
3509 } else
3510 ret = -ENOMEM;
3511 } 3217 }
3512 3218
3513 /* Update the active list for the hardware's current position. 3219 /* Update the active list for the hardware's current position.
@@ -3643,46 +3349,42 @@ int i915_gem_init_object(struct drm_gem_object *obj)
3643 return 0; 3349 return 0;
3644} 3350}
3645 3351
3646static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) 3352void i915_gem_free_object(struct drm_gem_object *gem_obj)
3647{ 3353{
3354 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3648 struct drm_device *dev = obj->base.dev; 3355 struct drm_device *dev = obj->base.dev;
3649 drm_i915_private_t *dev_priv = dev->dev_private; 3356 drm_i915_private_t *dev_priv = dev->dev_private;
3650 int ret;
3651
3652 ret = i915_gem_object_unbind(obj);
3653 if (ret == -ERESTARTSYS) {
3654 list_move(&obj->mm_list,
3655 &dev_priv->mm.deferred_free_list);
3656 return;
3657 }
3658 3357
3659 trace_i915_gem_object_destroy(obj); 3358 trace_i915_gem_object_destroy(obj);
3660 3359
3360 if (gem_obj->import_attach)
3361 drm_prime_gem_destroy(gem_obj, obj->sg_table);
3362
3363 if (obj->phys_obj)
3364 i915_gem_detach_phys_object(dev, obj);
3365
3366 obj->pin_count = 0;
3367 if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
3368 bool was_interruptible;
3369
3370 was_interruptible = dev_priv->mm.interruptible;
3371 dev_priv->mm.interruptible = false;
3372
3373 WARN_ON(i915_gem_object_unbind(obj));
3374
3375 dev_priv->mm.interruptible = was_interruptible;
3376 }
3377
3661 if (obj->base.map_list.map) 3378 if (obj->base.map_list.map)
3662 drm_gem_free_mmap_offset(&obj->base); 3379 drm_gem_free_mmap_offset(&obj->base);
3663 3380
3664 drm_gem_object_release(&obj->base); 3381 drm_gem_object_release(&obj->base);
3665 i915_gem_info_remove_obj(dev_priv, obj->base.size); 3382 i915_gem_info_remove_obj(dev_priv, obj->base.size);
3666 3383
3667 kfree(obj->page_cpu_valid);
3668 kfree(obj->bit_17); 3384 kfree(obj->bit_17);
3669 kfree(obj); 3385 kfree(obj);
3670} 3386}
3671 3387
3672void i915_gem_free_object(struct drm_gem_object *gem_obj)
3673{
3674 struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3675 struct drm_device *dev = obj->base.dev;
3676
3677 while (obj->pin_count > 0)
3678 i915_gem_object_unpin(obj);
3679
3680 if (obj->phys_obj)
3681 i915_gem_detach_phys_object(dev, obj);
3682
3683 i915_gem_free_object_tail(obj);
3684}
3685
3686int 3388int
3687i915_gem_idle(struct drm_device *dev) 3389i915_gem_idle(struct drm_device *dev)
3688{ 3390{
@@ -3696,20 +3398,16 @@ i915_gem_idle(struct drm_device *dev)
3696 return 0; 3398 return 0;
3697 } 3399 }
3698 3400
3699 ret = i915_gpu_idle(dev, true); 3401 ret = i915_gpu_idle(dev);
3700 if (ret) { 3402 if (ret) {
3701 mutex_unlock(&dev->struct_mutex); 3403 mutex_unlock(&dev->struct_mutex);
3702 return ret; 3404 return ret;
3703 } 3405 }
3406 i915_gem_retire_requests(dev);
3704 3407
3705 /* Under UMS, be paranoid and evict. */ 3408 /* Under UMS, be paranoid and evict. */
3706 if (!drm_core_check_feature(dev, DRIVER_MODESET)) { 3409 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3707 ret = i915_gem_evict_inactive(dev, false); 3410 i915_gem_evict_everything(dev, false);
3708 if (ret) {
3709 mutex_unlock(&dev->struct_mutex);
3710 return ret;
3711 }
3712 }
3713 3411
3714 i915_gem_reset_fences(dev); 3412 i915_gem_reset_fences(dev);
3715 3413
@@ -3747,9 +3445,9 @@ void i915_gem_init_swizzling(struct drm_device *dev)
3747 3445
3748 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); 3446 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
3749 if (IS_GEN6(dev)) 3447 if (IS_GEN6(dev))
3750 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); 3448 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3751 else 3449 else
3752 I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); 3450 I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
3753} 3451}
3754 3452
3755void i915_gem_init_ppgtt(struct drm_device *dev) 3453void i915_gem_init_ppgtt(struct drm_device *dev)
@@ -3787,21 +3485,27 @@ void i915_gem_init_ppgtt(struct drm_device *dev)
3787 pd_offset <<= 16; 3485 pd_offset <<= 16;
3788 3486
3789 if (INTEL_INFO(dev)->gen == 6) { 3487 if (INTEL_INFO(dev)->gen == 6) {
3790 uint32_t ecochk = I915_READ(GAM_ECOCHK); 3488 uint32_t ecochk, gab_ctl, ecobits;
3489
3490 ecobits = I915_READ(GAC_ECO_BITS);
3491 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
3492
3493 gab_ctl = I915_READ(GAB_CTL);
3494 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
3495
3496 ecochk = I915_READ(GAM_ECOCHK);
3791 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | 3497 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
3792 ECOCHK_PPGTT_CACHE64B); 3498 ECOCHK_PPGTT_CACHE64B);
3793 I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 3499 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3794 } else if (INTEL_INFO(dev)->gen >= 7) { 3500 } else if (INTEL_INFO(dev)->gen >= 7) {
3795 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); 3501 I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
3796 /* GFX_MODE is per-ring on gen7+ */ 3502 /* GFX_MODE is per-ring on gen7+ */
3797 } 3503 }
3798 3504
3799 for (i = 0; i < I915_NUM_RINGS; i++) { 3505 for_each_ring(ring, dev_priv, i) {
3800 ring = &dev_priv->ring[i];
3801
3802 if (INTEL_INFO(dev)->gen >= 7) 3506 if (INTEL_INFO(dev)->gen >= 7)
3803 I915_WRITE(RING_MODE_GEN7(ring), 3507 I915_WRITE(RING_MODE_GEN7(ring),
3804 GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); 3508 _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3805 3509
3806 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); 3510 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
3807 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); 3511 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
@@ -3845,14 +3549,80 @@ cleanup_render_ring:
3845 return ret; 3549 return ret;
3846} 3550}
3847 3551
3552static bool
3553intel_enable_ppgtt(struct drm_device *dev)
3554{
3555 if (i915_enable_ppgtt >= 0)
3556 return i915_enable_ppgtt;
3557
3558#ifdef CONFIG_INTEL_IOMMU
3559 /* Disable ppgtt on SNB if VT-d is on. */
3560 if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
3561 return false;
3562#endif
3563
3564 return true;
3565}
3566
3567int i915_gem_init(struct drm_device *dev)
3568{
3569 struct drm_i915_private *dev_priv = dev->dev_private;
3570 unsigned long gtt_size, mappable_size;
3571 int ret;
3572
3573 gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
3574 mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
3575
3576 mutex_lock(&dev->struct_mutex);
3577 if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
3578 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
3579 * aperture accordingly when using aliasing ppgtt. */
3580 gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
3581
3582 i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size);
3583
3584 ret = i915_gem_init_aliasing_ppgtt(dev);
3585 if (ret) {
3586 mutex_unlock(&dev->struct_mutex);
3587 return ret;
3588 }
3589 } else {
3590 /* Let GEM Manage all of the aperture.
3591 *
3592 * However, leave one page at the end still bound to the scratch
3593 * page. There are a number of places where the hardware
3594 * apparently prefetches past the end of the object, and we've
3595 * seen multiple hangs with the GPU head pointer stuck in a
3596 * batchbuffer bound at the last page of the aperture. One page
3597 * should be enough to keep any prefetching inside of the
3598 * aperture.
3599 */
3600 i915_gem_init_global_gtt(dev, 0, mappable_size,
3601 gtt_size);
3602 }
3603
3604 ret = i915_gem_init_hw(dev);
3605 mutex_unlock(&dev->struct_mutex);
3606 if (ret) {
3607 i915_gem_cleanup_aliasing_ppgtt(dev);
3608 return ret;
3609 }
3610
3611 /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */
3612 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3613 dev_priv->dri1.allow_batchbuffer = 1;
3614 return 0;
3615}
3616
3848void 3617void
3849i915_gem_cleanup_ringbuffer(struct drm_device *dev) 3618i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3850{ 3619{
3851 drm_i915_private_t *dev_priv = dev->dev_private; 3620 drm_i915_private_t *dev_priv = dev->dev_private;
3621 struct intel_ring_buffer *ring;
3852 int i; 3622 int i;
3853 3623
3854 for (i = 0; i < I915_NUM_RINGS; i++) 3624 for_each_ring(ring, dev_priv, i)
3855 intel_cleanup_ring_buffer(&dev_priv->ring[i]); 3625 intel_cleanup_ring_buffer(ring);
3856} 3626}
3857 3627
3858int 3628int
@@ -3860,7 +3630,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3860 struct drm_file *file_priv) 3630 struct drm_file *file_priv)
3861{ 3631{
3862 drm_i915_private_t *dev_priv = dev->dev_private; 3632 drm_i915_private_t *dev_priv = dev->dev_private;
3863 int ret, i; 3633 int ret;
3864 3634
3865 if (drm_core_check_feature(dev, DRIVER_MODESET)) 3635 if (drm_core_check_feature(dev, DRIVER_MODESET))
3866 return 0; 3636 return 0;
@@ -3882,10 +3652,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3882 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 3652 BUG_ON(!list_empty(&dev_priv->mm.active_list));
3883 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 3653 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3884 BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); 3654 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3885 for (i = 0; i < I915_NUM_RINGS; i++) {
3886 BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3887 BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3888 }
3889 mutex_unlock(&dev->struct_mutex); 3655 mutex_unlock(&dev->struct_mutex);
3890 3656
3891 ret = drm_irq_install(dev); 3657 ret = drm_irq_install(dev);
@@ -3944,9 +3710,7 @@ i915_gem_load(struct drm_device *dev)
3944 INIT_LIST_HEAD(&dev_priv->mm.active_list); 3710 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3945 INIT_LIST_HEAD(&dev_priv->mm.flushing_list); 3711 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3946 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 3712 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3947 INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3948 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 3713 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3949 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
3950 INIT_LIST_HEAD(&dev_priv->mm.gtt_list); 3714 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3951 for (i = 0; i < I915_NUM_RINGS; i++) 3715 for (i = 0; i < I915_NUM_RINGS; i++)
3952 init_ring_lists(&dev_priv->ring[i]); 3716 init_ring_lists(&dev_priv->ring[i]);
@@ -3958,12 +3722,8 @@ i915_gem_load(struct drm_device *dev)
3958 3722
3959 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ 3723 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3960 if (IS_GEN3(dev)) { 3724 if (IS_GEN3(dev)) {
3961 u32 tmp = I915_READ(MI_ARB_STATE); 3725 I915_WRITE(MI_ARB_STATE,
3962 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { 3726 _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE));
3963 /* arb state is a masked write, so set bit + bit in mask */
3964 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3965 I915_WRITE(MI_ARB_STATE, tmp);
3966 }
3967 } 3727 }
3968 3728
3969 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; 3729 dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
@@ -3978,9 +3738,7 @@ i915_gem_load(struct drm_device *dev)
3978 dev_priv->num_fence_regs = 8; 3738 dev_priv->num_fence_regs = 8;
3979 3739
3980 /* Initialize fence registers to zero */ 3740 /* Initialize fence registers to zero */
3981 for (i = 0; i < dev_priv->num_fence_regs; i++) { 3741 i915_gem_reset_fences(dev);
3982 i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
3983 }
3984 3742
3985 i915_gem_detect_bit_6_swizzle(dev); 3743 i915_gem_detect_bit_6_swizzle(dev);
3986 init_waitqueue_head(&dev_priv->pending_flip_queue); 3744 init_waitqueue_head(&dev_priv->pending_flip_queue);
@@ -4268,7 +4026,7 @@ rescan:
4268 * This has a dramatic impact to reduce the number of 4026 * This has a dramatic impact to reduce the number of
4269 * OOM-killer events whilst running the GPU aggressively. 4027 * OOM-killer events whilst running the GPU aggressively.
4270 */ 4028 */
4271 if (i915_gpu_idle(dev, true) == 0) 4029 if (i915_gpu_idle(dev) == 0)
4272 goto rescan; 4030 goto rescan;
4273 } 4031 }
4274 mutex_unlock(&dev->struct_mutex); 4032 mutex_unlock(&dev->struct_mutex);