diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 1916 |
1 files changed, 837 insertions, 1079 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0d1e4b7b4b99..c1e5c66553df 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -35,31 +35,41 @@ | |||
35 | #include <linux/slab.h> | 35 | #include <linux/slab.h> |
36 | #include <linux/swap.h> | 36 | #include <linux/swap.h> |
37 | #include <linux/pci.h> | 37 | #include <linux/pci.h> |
38 | #include <linux/dma-buf.h> | ||
38 | 39 | ||
39 | static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); | 40 | static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); |
40 | static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); | 41 | static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); |
41 | static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); | 42 | static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); |
42 | static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, | ||
43 | bool write); | ||
44 | static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, | ||
45 | uint64_t offset, | ||
46 | uint64_t size); | ||
47 | static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj); | ||
48 | static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, | 43 | static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, |
49 | unsigned alignment, | 44 | unsigned alignment, |
50 | bool map_and_fenceable); | 45 | bool map_and_fenceable); |
51 | static void i915_gem_clear_fence_reg(struct drm_device *dev, | ||
52 | struct drm_i915_fence_reg *reg); | ||
53 | static int i915_gem_phys_pwrite(struct drm_device *dev, | 46 | static int i915_gem_phys_pwrite(struct drm_device *dev, |
54 | struct drm_i915_gem_object *obj, | 47 | struct drm_i915_gem_object *obj, |
55 | struct drm_i915_gem_pwrite *args, | 48 | struct drm_i915_gem_pwrite *args, |
56 | struct drm_file *file); | 49 | struct drm_file *file); |
57 | static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj); | 50 | |
51 | static void i915_gem_write_fence(struct drm_device *dev, int reg, | ||
52 | struct drm_i915_gem_object *obj); | ||
53 | static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, | ||
54 | struct drm_i915_fence_reg *fence, | ||
55 | bool enable); | ||
58 | 56 | ||
59 | static int i915_gem_inactive_shrink(struct shrinker *shrinker, | 57 | static int i915_gem_inactive_shrink(struct shrinker *shrinker, |
60 | struct shrink_control *sc); | 58 | struct shrink_control *sc); |
61 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); | 59 | static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); |
62 | 60 | ||
61 | static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) | ||
62 | { | ||
63 | if (obj->tiling_mode) | ||
64 | i915_gem_release_mmap(obj); | ||
65 | |||
66 | /* As we do not have an associated fence register, we will force | ||
67 | * a tiling change if we ever need to acquire one. | ||
68 | */ | ||
69 | obj->fence_dirty = false; | ||
70 | obj->fence_reg = I915_FENCE_REG_NONE; | ||
71 | } | ||
72 | |||
63 | /* some bookkeeping */ | 73 | /* some bookkeeping */ |
64 | static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, | 74 | static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, |
65 | size_t size) | 75 | size_t size) |
@@ -122,26 +132,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) | |||
122 | static inline bool | 132 | static inline bool |
123 | i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) | 133 | i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) |
124 | { | 134 | { |
125 | return obj->gtt_space && !obj->active && obj->pin_count == 0; | 135 | return !obj->active; |
126 | } | ||
127 | |||
128 | void i915_gem_do_init(struct drm_device *dev, | ||
129 | unsigned long start, | ||
130 | unsigned long mappable_end, | ||
131 | unsigned long end) | ||
132 | { | ||
133 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
134 | |||
135 | drm_mm_init(&dev_priv->mm.gtt_space, start, end - start); | ||
136 | |||
137 | dev_priv->mm.gtt_start = start; | ||
138 | dev_priv->mm.gtt_mappable_end = mappable_end; | ||
139 | dev_priv->mm.gtt_end = end; | ||
140 | dev_priv->mm.gtt_total = end - start; | ||
141 | dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; | ||
142 | |||
143 | /* Take over this portion of the GTT */ | ||
144 | intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); | ||
145 | } | 136 | } |
146 | 137 | ||
147 | int | 138 | int |
@@ -150,12 +141,20 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, | |||
150 | { | 141 | { |
151 | struct drm_i915_gem_init *args = data; | 142 | struct drm_i915_gem_init *args = data; |
152 | 143 | ||
144 | if (drm_core_check_feature(dev, DRIVER_MODESET)) | ||
145 | return -ENODEV; | ||
146 | |||
153 | if (args->gtt_start >= args->gtt_end || | 147 | if (args->gtt_start >= args->gtt_end || |
154 | (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) | 148 | (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) |
155 | return -EINVAL; | 149 | return -EINVAL; |
156 | 150 | ||
151 | /* GEM with user mode setting was never supported on ilk and later. */ | ||
152 | if (INTEL_INFO(dev)->gen >= 5) | ||
153 | return -ENODEV; | ||
154 | |||
157 | mutex_lock(&dev->struct_mutex); | 155 | mutex_lock(&dev->struct_mutex); |
158 | i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end); | 156 | i915_gem_init_global_gtt(dev, args->gtt_start, |
157 | args->gtt_end, args->gtt_end); | ||
159 | mutex_unlock(&dev->struct_mutex); | 158 | mutex_unlock(&dev->struct_mutex); |
160 | 159 | ||
161 | return 0; | 160 | return 0; |
@@ -170,13 +169,11 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, | |||
170 | struct drm_i915_gem_object *obj; | 169 | struct drm_i915_gem_object *obj; |
171 | size_t pinned; | 170 | size_t pinned; |
172 | 171 | ||
173 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
174 | return -ENODEV; | ||
175 | |||
176 | pinned = 0; | 172 | pinned = 0; |
177 | mutex_lock(&dev->struct_mutex); | 173 | mutex_lock(&dev->struct_mutex); |
178 | list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list) | 174 | list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) |
179 | pinned += obj->gtt_space->size; | 175 | if (obj->pin_count) |
176 | pinned += obj->gtt_space->size; | ||
180 | mutex_unlock(&dev->struct_mutex); | 177 | mutex_unlock(&dev->struct_mutex); |
181 | 178 | ||
182 | args->aper_size = dev_priv->mm.gtt_total; | 179 | args->aper_size = dev_priv->mm.gtt_total; |
@@ -247,6 +244,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, | |||
247 | struct drm_file *file) | 244 | struct drm_file *file) |
248 | { | 245 | { |
249 | struct drm_i915_gem_create *args = data; | 246 | struct drm_i915_gem_create *args = data; |
247 | |||
250 | return i915_gem_create(file, dev, | 248 | return i915_gem_create(file, dev, |
251 | args->size, &args->handle); | 249 | args->size, &args->handle); |
252 | } | 250 | } |
@@ -259,66 +257,6 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) | |||
259 | obj->tiling_mode != I915_TILING_NONE; | 257 | obj->tiling_mode != I915_TILING_NONE; |
260 | } | 258 | } |
261 | 259 | ||
262 | /** | ||
263 | * This is the fast shmem pread path, which attempts to copy_from_user directly | ||
264 | * from the backing pages of the object to the user's address space. On a | ||
265 | * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow(). | ||
266 | */ | ||
267 | static int | ||
268 | i915_gem_shmem_pread_fast(struct drm_device *dev, | ||
269 | struct drm_i915_gem_object *obj, | ||
270 | struct drm_i915_gem_pread *args, | ||
271 | struct drm_file *file) | ||
272 | { | ||
273 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; | ||
274 | ssize_t remain; | ||
275 | loff_t offset; | ||
276 | char __user *user_data; | ||
277 | int page_offset, page_length; | ||
278 | |||
279 | user_data = (char __user *) (uintptr_t) args->data_ptr; | ||
280 | remain = args->size; | ||
281 | |||
282 | offset = args->offset; | ||
283 | |||
284 | while (remain > 0) { | ||
285 | struct page *page; | ||
286 | char *vaddr; | ||
287 | int ret; | ||
288 | |||
289 | /* Operation in this page | ||
290 | * | ||
291 | * page_offset = offset within page | ||
292 | * page_length = bytes to copy for this page | ||
293 | */ | ||
294 | page_offset = offset_in_page(offset); | ||
295 | page_length = remain; | ||
296 | if ((page_offset + remain) > PAGE_SIZE) | ||
297 | page_length = PAGE_SIZE - page_offset; | ||
298 | |||
299 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | ||
300 | if (IS_ERR(page)) | ||
301 | return PTR_ERR(page); | ||
302 | |||
303 | vaddr = kmap_atomic(page); | ||
304 | ret = __copy_to_user_inatomic(user_data, | ||
305 | vaddr + page_offset, | ||
306 | page_length); | ||
307 | kunmap_atomic(vaddr); | ||
308 | |||
309 | mark_page_accessed(page); | ||
310 | page_cache_release(page); | ||
311 | if (ret) | ||
312 | return -EFAULT; | ||
313 | |||
314 | remain -= page_length; | ||
315 | user_data += page_length; | ||
316 | offset += page_length; | ||
317 | } | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static inline int | 260 | static inline int |
323 | __copy_to_user_swizzled(char __user *cpu_vaddr, | 261 | __copy_to_user_swizzled(char __user *cpu_vaddr, |
324 | const char *gpu_vaddr, int gpu_offset, | 262 | const char *gpu_vaddr, int gpu_offset, |
@@ -346,8 +284,8 @@ __copy_to_user_swizzled(char __user *cpu_vaddr, | |||
346 | } | 284 | } |
347 | 285 | ||
348 | static inline int | 286 | static inline int |
349 | __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, | 287 | __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, |
350 | const char *cpu_vaddr, | 288 | const char __user *cpu_vaddr, |
351 | int length) | 289 | int length) |
352 | { | 290 | { |
353 | int ret, cpu_offset = 0; | 291 | int ret, cpu_offset = 0; |
@@ -371,37 +309,121 @@ __copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset, | |||
371 | return 0; | 309 | return 0; |
372 | } | 310 | } |
373 | 311 | ||
374 | /** | 312 | /* Per-page copy function for the shmem pread fastpath. |
375 | * This is the fallback shmem pread path, which allocates temporary storage | 313 | * Flushes invalid cachelines before reading the target if |
376 | * in kernel space to copy_to_user into outside of the struct_mutex, so we | 314 | * needs_clflush is set. */ |
377 | * can copy out of the object's backing pages while holding the struct mutex | ||
378 | * and not take page faults. | ||
379 | */ | ||
380 | static int | 315 | static int |
381 | i915_gem_shmem_pread_slow(struct drm_device *dev, | 316 | shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, |
382 | struct drm_i915_gem_object *obj, | 317 | char __user *user_data, |
383 | struct drm_i915_gem_pread *args, | 318 | bool page_do_bit17_swizzling, bool needs_clflush) |
384 | struct drm_file *file) | 319 | { |
320 | char *vaddr; | ||
321 | int ret; | ||
322 | |||
323 | if (unlikely(page_do_bit17_swizzling)) | ||
324 | return -EINVAL; | ||
325 | |||
326 | vaddr = kmap_atomic(page); | ||
327 | if (needs_clflush) | ||
328 | drm_clflush_virt_range(vaddr + shmem_page_offset, | ||
329 | page_length); | ||
330 | ret = __copy_to_user_inatomic(user_data, | ||
331 | vaddr + shmem_page_offset, | ||
332 | page_length); | ||
333 | kunmap_atomic(vaddr); | ||
334 | |||
335 | return ret; | ||
336 | } | ||
337 | |||
338 | static void | ||
339 | shmem_clflush_swizzled_range(char *addr, unsigned long length, | ||
340 | bool swizzled) | ||
341 | { | ||
342 | if (unlikely(swizzled)) { | ||
343 | unsigned long start = (unsigned long) addr; | ||
344 | unsigned long end = (unsigned long) addr + length; | ||
345 | |||
346 | /* For swizzling simply ensure that we always flush both | ||
347 | * channels. Lame, but simple and it works. Swizzled | ||
348 | * pwrite/pread is far from a hotpath - current userspace | ||
349 | * doesn't use it at all. */ | ||
350 | start = round_down(start, 128); | ||
351 | end = round_up(end, 128); | ||
352 | |||
353 | drm_clflush_virt_range((void *)start, end - start); | ||
354 | } else { | ||
355 | drm_clflush_virt_range(addr, length); | ||
356 | } | ||
357 | |||
358 | } | ||
359 | |||
360 | /* Only difference to the fast-path function is that this can handle bit17 | ||
361 | * and uses non-atomic copy and kmap functions. */ | ||
362 | static int | ||
363 | shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, | ||
364 | char __user *user_data, | ||
365 | bool page_do_bit17_swizzling, bool needs_clflush) | ||
366 | { | ||
367 | char *vaddr; | ||
368 | int ret; | ||
369 | |||
370 | vaddr = kmap(page); | ||
371 | if (needs_clflush) | ||
372 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, | ||
373 | page_length, | ||
374 | page_do_bit17_swizzling); | ||
375 | |||
376 | if (page_do_bit17_swizzling) | ||
377 | ret = __copy_to_user_swizzled(user_data, | ||
378 | vaddr, shmem_page_offset, | ||
379 | page_length); | ||
380 | else | ||
381 | ret = __copy_to_user(user_data, | ||
382 | vaddr + shmem_page_offset, | ||
383 | page_length); | ||
384 | kunmap(page); | ||
385 | |||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | static int | ||
390 | i915_gem_shmem_pread(struct drm_device *dev, | ||
391 | struct drm_i915_gem_object *obj, | ||
392 | struct drm_i915_gem_pread *args, | ||
393 | struct drm_file *file) | ||
385 | { | 394 | { |
386 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; | 395 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; |
387 | char __user *user_data; | 396 | char __user *user_data; |
388 | ssize_t remain; | 397 | ssize_t remain; |
389 | loff_t offset; | 398 | loff_t offset; |
390 | int shmem_page_offset, page_length, ret; | 399 | int shmem_page_offset, page_length, ret = 0; |
391 | int obj_do_bit17_swizzling, page_do_bit17_swizzling; | 400 | int obj_do_bit17_swizzling, page_do_bit17_swizzling; |
401 | int hit_slowpath = 0; | ||
402 | int prefaulted = 0; | ||
403 | int needs_clflush = 0; | ||
404 | int release_page; | ||
392 | 405 | ||
393 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 406 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
394 | remain = args->size; | 407 | remain = args->size; |
395 | 408 | ||
396 | obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); | 409 | obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); |
397 | 410 | ||
398 | offset = args->offset; | 411 | if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { |
412 | /* If we're not in the cpu read domain, set ourself into the gtt | ||
413 | * read domain and manually flush cachelines (if required). This | ||
414 | * optimizes for the case when the gpu will dirty the data | ||
415 | * anyway again before the next pread happens. */ | ||
416 | if (obj->cache_level == I915_CACHE_NONE) | ||
417 | needs_clflush = 1; | ||
418 | ret = i915_gem_object_set_to_gtt_domain(obj, false); | ||
419 | if (ret) | ||
420 | return ret; | ||
421 | } | ||
399 | 422 | ||
400 | mutex_unlock(&dev->struct_mutex); | 423 | offset = args->offset; |
401 | 424 | ||
402 | while (remain > 0) { | 425 | while (remain > 0) { |
403 | struct page *page; | 426 | struct page *page; |
404 | char *vaddr; | ||
405 | 427 | ||
406 | /* Operation in this page | 428 | /* Operation in this page |
407 | * | 429 | * |
@@ -413,28 +435,51 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, | |||
413 | if ((shmem_page_offset + page_length) > PAGE_SIZE) | 435 | if ((shmem_page_offset + page_length) > PAGE_SIZE) |
414 | page_length = PAGE_SIZE - shmem_page_offset; | 436 | page_length = PAGE_SIZE - shmem_page_offset; |
415 | 437 | ||
416 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | 438 | if (obj->pages) { |
417 | if (IS_ERR(page)) { | 439 | page = obj->pages[offset >> PAGE_SHIFT]; |
418 | ret = PTR_ERR(page); | 440 | release_page = 0; |
419 | goto out; | 441 | } else { |
442 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | ||
443 | if (IS_ERR(page)) { | ||
444 | ret = PTR_ERR(page); | ||
445 | goto out; | ||
446 | } | ||
447 | release_page = 1; | ||
420 | } | 448 | } |
421 | 449 | ||
422 | page_do_bit17_swizzling = obj_do_bit17_swizzling && | 450 | page_do_bit17_swizzling = obj_do_bit17_swizzling && |
423 | (page_to_phys(page) & (1 << 17)) != 0; | 451 | (page_to_phys(page) & (1 << 17)) != 0; |
424 | 452 | ||
425 | vaddr = kmap(page); | 453 | ret = shmem_pread_fast(page, shmem_page_offset, page_length, |
426 | if (page_do_bit17_swizzling) | 454 | user_data, page_do_bit17_swizzling, |
427 | ret = __copy_to_user_swizzled(user_data, | 455 | needs_clflush); |
428 | vaddr, shmem_page_offset, | 456 | if (ret == 0) |
429 | page_length); | 457 | goto next_page; |
430 | else | ||
431 | ret = __copy_to_user(user_data, | ||
432 | vaddr + shmem_page_offset, | ||
433 | page_length); | ||
434 | kunmap(page); | ||
435 | 458 | ||
436 | mark_page_accessed(page); | 459 | hit_slowpath = 1; |
460 | page_cache_get(page); | ||
461 | mutex_unlock(&dev->struct_mutex); | ||
462 | |||
463 | if (!prefaulted) { | ||
464 | ret = fault_in_multipages_writeable(user_data, remain); | ||
465 | /* Userspace is tricking us, but we've already clobbered | ||
466 | * its pages with the prefault and promised to write the | ||
467 | * data up to the first fault. Hence ignore any errors | ||
468 | * and just continue. */ | ||
469 | (void)ret; | ||
470 | prefaulted = 1; | ||
471 | } | ||
472 | |||
473 | ret = shmem_pread_slow(page, shmem_page_offset, page_length, | ||
474 | user_data, page_do_bit17_swizzling, | ||
475 | needs_clflush); | ||
476 | |||
477 | mutex_lock(&dev->struct_mutex); | ||
437 | page_cache_release(page); | 478 | page_cache_release(page); |
479 | next_page: | ||
480 | mark_page_accessed(page); | ||
481 | if (release_page) | ||
482 | page_cache_release(page); | ||
438 | 483 | ||
439 | if (ret) { | 484 | if (ret) { |
440 | ret = -EFAULT; | 485 | ret = -EFAULT; |
@@ -447,10 +492,11 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, | |||
447 | } | 492 | } |
448 | 493 | ||
449 | out: | 494 | out: |
450 | mutex_lock(&dev->struct_mutex); | 495 | if (hit_slowpath) { |
451 | /* Fixup: Kill any reinstated backing storage pages */ | 496 | /* Fixup: Kill any reinstated backing storage pages */ |
452 | if (obj->madv == __I915_MADV_PURGED) | 497 | if (obj->madv == __I915_MADV_PURGED) |
453 | i915_gem_object_truncate(obj); | 498 | i915_gem_object_truncate(obj); |
499 | } | ||
454 | 500 | ||
455 | return ret; | 501 | return ret; |
456 | } | 502 | } |
@@ -476,11 +522,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, | |||
476 | args->size)) | 522 | args->size)) |
477 | return -EFAULT; | 523 | return -EFAULT; |
478 | 524 | ||
479 | ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr, | ||
480 | args->size); | ||
481 | if (ret) | ||
482 | return -EFAULT; | ||
483 | |||
484 | ret = i915_mutex_lock_interruptible(dev); | 525 | ret = i915_mutex_lock_interruptible(dev); |
485 | if (ret) | 526 | if (ret) |
486 | return ret; | 527 | return ret; |
@@ -498,19 +539,17 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, | |||
498 | goto out; | 539 | goto out; |
499 | } | 540 | } |
500 | 541 | ||
501 | trace_i915_gem_object_pread(obj, args->offset, args->size); | 542 | /* prime objects have no backing filp to GEM pread/pwrite |
502 | 543 | * pages from. | |
503 | ret = i915_gem_object_set_cpu_read_domain_range(obj, | 544 | */ |
504 | args->offset, | 545 | if (!obj->base.filp) { |
505 | args->size); | 546 | ret = -EINVAL; |
506 | if (ret) | ||
507 | goto out; | 547 | goto out; |
548 | } | ||
508 | 549 | ||
509 | ret = -EFAULT; | 550 | trace_i915_gem_object_pread(obj, args->offset, args->size); |
510 | if (!i915_gem_object_needs_bit17_swizzle(obj)) | 551 | |
511 | ret = i915_gem_shmem_pread_fast(dev, obj, args, file); | 552 | ret = i915_gem_shmem_pread(dev, obj, args, file); |
512 | if (ret == -EFAULT) | ||
513 | ret = i915_gem_shmem_pread_slow(dev, obj, args, file); | ||
514 | 553 | ||
515 | out: | 554 | out: |
516 | drm_gem_object_unreference(&obj->base); | 555 | drm_gem_object_unreference(&obj->base); |
@@ -529,40 +568,19 @@ fast_user_write(struct io_mapping *mapping, | |||
529 | char __user *user_data, | 568 | char __user *user_data, |
530 | int length) | 569 | int length) |
531 | { | 570 | { |
532 | char *vaddr_atomic; | 571 | void __iomem *vaddr_atomic; |
572 | void *vaddr; | ||
533 | unsigned long unwritten; | 573 | unsigned long unwritten; |
534 | 574 | ||
535 | vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); | 575 | vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); |
536 | unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, | 576 | /* We can use the cpu mem copy function because this is X86. */ |
577 | vaddr = (void __force*)vaddr_atomic + page_offset; | ||
578 | unwritten = __copy_from_user_inatomic_nocache(vaddr, | ||
537 | user_data, length); | 579 | user_data, length); |
538 | io_mapping_unmap_atomic(vaddr_atomic); | 580 | io_mapping_unmap_atomic(vaddr_atomic); |
539 | return unwritten; | 581 | return unwritten; |
540 | } | 582 | } |
541 | 583 | ||
542 | /* Here's the write path which can sleep for | ||
543 | * page faults | ||
544 | */ | ||
545 | |||
546 | static inline void | ||
547 | slow_kernel_write(struct io_mapping *mapping, | ||
548 | loff_t gtt_base, int gtt_offset, | ||
549 | struct page *user_page, int user_offset, | ||
550 | int length) | ||
551 | { | ||
552 | char __iomem *dst_vaddr; | ||
553 | char *src_vaddr; | ||
554 | |||
555 | dst_vaddr = io_mapping_map_wc(mapping, gtt_base); | ||
556 | src_vaddr = kmap(user_page); | ||
557 | |||
558 | memcpy_toio(dst_vaddr + gtt_offset, | ||
559 | src_vaddr + user_offset, | ||
560 | length); | ||
561 | |||
562 | kunmap(user_page); | ||
563 | io_mapping_unmap(dst_vaddr); | ||
564 | } | ||
565 | |||
566 | /** | 584 | /** |
567 | * This is the fast pwrite path, where we copy the data directly from the | 585 | * This is the fast pwrite path, where we copy the data directly from the |
568 | * user into the GTT, uncached. | 586 | * user into the GTT, uncached. |
@@ -577,7 +595,19 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, | |||
577 | ssize_t remain; | 595 | ssize_t remain; |
578 | loff_t offset, page_base; | 596 | loff_t offset, page_base; |
579 | char __user *user_data; | 597 | char __user *user_data; |
580 | int page_offset, page_length; | 598 | int page_offset, page_length, ret; |
599 | |||
600 | ret = i915_gem_object_pin(obj, 0, true); | ||
601 | if (ret) | ||
602 | goto out; | ||
603 | |||
604 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
605 | if (ret) | ||
606 | goto out_unpin; | ||
607 | |||
608 | ret = i915_gem_object_put_fence(obj); | ||
609 | if (ret) | ||
610 | goto out_unpin; | ||
581 | 611 | ||
582 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 612 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
583 | remain = args->size; | 613 | remain = args->size; |
@@ -602,214 +632,133 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, | |||
602 | * retry in the slow path. | 632 | * retry in the slow path. |
603 | */ | 633 | */ |
604 | if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, | 634 | if (fast_user_write(dev_priv->mm.gtt_mapping, page_base, |
605 | page_offset, user_data, page_length)) | 635 | page_offset, user_data, page_length)) { |
606 | return -EFAULT; | 636 | ret = -EFAULT; |
637 | goto out_unpin; | ||
638 | } | ||
607 | 639 | ||
608 | remain -= page_length; | 640 | remain -= page_length; |
609 | user_data += page_length; | 641 | user_data += page_length; |
610 | offset += page_length; | 642 | offset += page_length; |
611 | } | 643 | } |
612 | 644 | ||
613 | return 0; | 645 | out_unpin: |
646 | i915_gem_object_unpin(obj); | ||
647 | out: | ||
648 | return ret; | ||
614 | } | 649 | } |
615 | 650 | ||
616 | /** | 651 | /* Per-page copy function for the shmem pwrite fastpath. |
617 | * This is the fallback GTT pwrite path, which uses get_user_pages to pin | 652 | * Flushes invalid cachelines before writing to the target if |
618 | * the memory and maps it using kmap_atomic for copying. | 653 | * needs_clflush_before is set and flushes out any written cachelines after |
619 | * | 654 | * writing if needs_clflush is set. */ |
620 | * This code resulted in x11perf -rgb10text consuming about 10% more CPU | ||
621 | * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). | ||
622 | */ | ||
623 | static int | 655 | static int |
624 | i915_gem_gtt_pwrite_slow(struct drm_device *dev, | 656 | shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, |
625 | struct drm_i915_gem_object *obj, | 657 | char __user *user_data, |
626 | struct drm_i915_gem_pwrite *args, | 658 | bool page_do_bit17_swizzling, |
627 | struct drm_file *file) | 659 | bool needs_clflush_before, |
660 | bool needs_clflush_after) | ||
628 | { | 661 | { |
629 | drm_i915_private_t *dev_priv = dev->dev_private; | 662 | char *vaddr; |
630 | ssize_t remain; | ||
631 | loff_t gtt_page_base, offset; | ||
632 | loff_t first_data_page, last_data_page, num_pages; | ||
633 | loff_t pinned_pages, i; | ||
634 | struct page **user_pages; | ||
635 | struct mm_struct *mm = current->mm; | ||
636 | int gtt_page_offset, data_page_offset, data_page_index, page_length; | ||
637 | int ret; | 663 | int ret; |
638 | uint64_t data_ptr = args->data_ptr; | ||
639 | |||
640 | remain = args->size; | ||
641 | |||
642 | /* Pin the user pages containing the data. We can't fault while | ||
643 | * holding the struct mutex, and all of the pwrite implementations | ||
644 | * want to hold it while dereferencing the user data. | ||
645 | */ | ||
646 | first_data_page = data_ptr / PAGE_SIZE; | ||
647 | last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; | ||
648 | num_pages = last_data_page - first_data_page + 1; | ||
649 | |||
650 | user_pages = drm_malloc_ab(num_pages, sizeof(struct page *)); | ||
651 | if (user_pages == NULL) | ||
652 | return -ENOMEM; | ||
653 | |||
654 | mutex_unlock(&dev->struct_mutex); | ||
655 | down_read(&mm->mmap_sem); | ||
656 | pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, | ||
657 | num_pages, 0, 0, user_pages, NULL); | ||
658 | up_read(&mm->mmap_sem); | ||
659 | mutex_lock(&dev->struct_mutex); | ||
660 | if (pinned_pages < num_pages) { | ||
661 | ret = -EFAULT; | ||
662 | goto out_unpin_pages; | ||
663 | } | ||
664 | |||
665 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
666 | if (ret) | ||
667 | goto out_unpin_pages; | ||
668 | |||
669 | ret = i915_gem_object_put_fence(obj); | ||
670 | if (ret) | ||
671 | goto out_unpin_pages; | ||
672 | |||
673 | offset = obj->gtt_offset + args->offset; | ||
674 | |||
675 | while (remain > 0) { | ||
676 | /* Operation in this page | ||
677 | * | ||
678 | * gtt_page_base = page offset within aperture | ||
679 | * gtt_page_offset = offset within page in aperture | ||
680 | * data_page_index = page number in get_user_pages return | ||
681 | * data_page_offset = offset with data_page_index page. | ||
682 | * page_length = bytes to copy for this page | ||
683 | */ | ||
684 | gtt_page_base = offset & PAGE_MASK; | ||
685 | gtt_page_offset = offset_in_page(offset); | ||
686 | data_page_index = data_ptr / PAGE_SIZE - first_data_page; | ||
687 | data_page_offset = offset_in_page(data_ptr); | ||
688 | |||
689 | page_length = remain; | ||
690 | if ((gtt_page_offset + page_length) > PAGE_SIZE) | ||
691 | page_length = PAGE_SIZE - gtt_page_offset; | ||
692 | if ((data_page_offset + page_length) > PAGE_SIZE) | ||
693 | page_length = PAGE_SIZE - data_page_offset; | ||
694 | 664 | ||
695 | slow_kernel_write(dev_priv->mm.gtt_mapping, | 665 | if (unlikely(page_do_bit17_swizzling)) |
696 | gtt_page_base, gtt_page_offset, | 666 | return -EINVAL; |
697 | user_pages[data_page_index], | ||
698 | data_page_offset, | ||
699 | page_length); | ||
700 | |||
701 | remain -= page_length; | ||
702 | offset += page_length; | ||
703 | data_ptr += page_length; | ||
704 | } | ||
705 | 667 | ||
706 | out_unpin_pages: | 668 | vaddr = kmap_atomic(page); |
707 | for (i = 0; i < pinned_pages; i++) | 669 | if (needs_clflush_before) |
708 | page_cache_release(user_pages[i]); | 670 | drm_clflush_virt_range(vaddr + shmem_page_offset, |
709 | drm_free_large(user_pages); | 671 | page_length); |
672 | ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, | ||
673 | user_data, | ||
674 | page_length); | ||
675 | if (needs_clflush_after) | ||
676 | drm_clflush_virt_range(vaddr + shmem_page_offset, | ||
677 | page_length); | ||
678 | kunmap_atomic(vaddr); | ||
710 | 679 | ||
711 | return ret; | 680 | return ret; |
712 | } | 681 | } |
713 | 682 | ||
714 | /** | 683 | /* Only difference to the fast-path function is that this can handle bit17 |
715 | * This is the fast shmem pwrite path, which attempts to directly | 684 | * and uses non-atomic copy and kmap functions. */ |
716 | * copy_from_user into the kmapped pages backing the object. | ||
717 | */ | ||
718 | static int | 685 | static int |
719 | i915_gem_shmem_pwrite_fast(struct drm_device *dev, | 686 | shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, |
720 | struct drm_i915_gem_object *obj, | 687 | char __user *user_data, |
721 | struct drm_i915_gem_pwrite *args, | 688 | bool page_do_bit17_swizzling, |
722 | struct drm_file *file) | 689 | bool needs_clflush_before, |
690 | bool needs_clflush_after) | ||
723 | { | 691 | { |
724 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; | 692 | char *vaddr; |
725 | ssize_t remain; | 693 | int ret; |
726 | loff_t offset; | ||
727 | char __user *user_data; | ||
728 | int page_offset, page_length; | ||
729 | |||
730 | user_data = (char __user *) (uintptr_t) args->data_ptr; | ||
731 | remain = args->size; | ||
732 | |||
733 | offset = args->offset; | ||
734 | obj->dirty = 1; | ||
735 | |||
736 | while (remain > 0) { | ||
737 | struct page *page; | ||
738 | char *vaddr; | ||
739 | int ret; | ||
740 | |||
741 | /* Operation in this page | ||
742 | * | ||
743 | * page_offset = offset within page | ||
744 | * page_length = bytes to copy for this page | ||
745 | */ | ||
746 | page_offset = offset_in_page(offset); | ||
747 | page_length = remain; | ||
748 | if ((page_offset + remain) > PAGE_SIZE) | ||
749 | page_length = PAGE_SIZE - page_offset; | ||
750 | |||
751 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | ||
752 | if (IS_ERR(page)) | ||
753 | return PTR_ERR(page); | ||
754 | 694 | ||
755 | vaddr = kmap_atomic(page); | 695 | vaddr = kmap(page); |
756 | ret = __copy_from_user_inatomic(vaddr + page_offset, | 696 | if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) |
697 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, | ||
698 | page_length, | ||
699 | page_do_bit17_swizzling); | ||
700 | if (page_do_bit17_swizzling) | ||
701 | ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, | ||
757 | user_data, | 702 | user_data, |
758 | page_length); | 703 | page_length); |
759 | kunmap_atomic(vaddr); | 704 | else |
760 | 705 | ret = __copy_from_user(vaddr + shmem_page_offset, | |
761 | set_page_dirty(page); | 706 | user_data, |
762 | mark_page_accessed(page); | 707 | page_length); |
763 | page_cache_release(page); | 708 | if (needs_clflush_after) |
764 | 709 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, | |
765 | /* If we get a fault while copying data, then (presumably) our | 710 | page_length, |
766 | * source page isn't available. Return the error and we'll | 711 | page_do_bit17_swizzling); |
767 | * retry in the slow path. | 712 | kunmap(page); |
768 | */ | ||
769 | if (ret) | ||
770 | return -EFAULT; | ||
771 | |||
772 | remain -= page_length; | ||
773 | user_data += page_length; | ||
774 | offset += page_length; | ||
775 | } | ||
776 | 713 | ||
777 | return 0; | 714 | return ret; |
778 | } | 715 | } |
779 | 716 | ||
780 | /** | ||
781 | * This is the fallback shmem pwrite path, which uses get_user_pages to pin | ||
782 | * the memory and maps it using kmap_atomic for copying. | ||
783 | * | ||
784 | * This avoids taking mmap_sem for faulting on the user's address while the | ||
785 | * struct_mutex is held. | ||
786 | */ | ||
787 | static int | 717 | static int |
788 | i915_gem_shmem_pwrite_slow(struct drm_device *dev, | 718 | i915_gem_shmem_pwrite(struct drm_device *dev, |
789 | struct drm_i915_gem_object *obj, | 719 | struct drm_i915_gem_object *obj, |
790 | struct drm_i915_gem_pwrite *args, | 720 | struct drm_i915_gem_pwrite *args, |
791 | struct drm_file *file) | 721 | struct drm_file *file) |
792 | { | 722 | { |
793 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; | 723 | struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; |
794 | ssize_t remain; | 724 | ssize_t remain; |
795 | loff_t offset; | 725 | loff_t offset; |
796 | char __user *user_data; | 726 | char __user *user_data; |
797 | int shmem_page_offset, page_length, ret; | 727 | int shmem_page_offset, page_length, ret = 0; |
798 | int obj_do_bit17_swizzling, page_do_bit17_swizzling; | 728 | int obj_do_bit17_swizzling, page_do_bit17_swizzling; |
729 | int hit_slowpath = 0; | ||
730 | int needs_clflush_after = 0; | ||
731 | int needs_clflush_before = 0; | ||
732 | int release_page; | ||
799 | 733 | ||
800 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 734 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
801 | remain = args->size; | 735 | remain = args->size; |
802 | 736 | ||
803 | obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); | 737 | obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); |
804 | 738 | ||
739 | if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { | ||
740 | /* If we're not in the cpu write domain, set ourself into the gtt | ||
741 | * write domain and manually flush cachelines (if required). This | ||
742 | * optimizes for the case when the gpu will use the data | ||
743 | * right away and we therefore have to clflush anyway. */ | ||
744 | if (obj->cache_level == I915_CACHE_NONE) | ||
745 | needs_clflush_after = 1; | ||
746 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
747 | if (ret) | ||
748 | return ret; | ||
749 | } | ||
750 | /* Same trick applies for invalidate partially written cachelines before | ||
751 | * writing. */ | ||
752 | if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU) | ||
753 | && obj->cache_level == I915_CACHE_NONE) | ||
754 | needs_clflush_before = 1; | ||
755 | |||
805 | offset = args->offset; | 756 | offset = args->offset; |
806 | obj->dirty = 1; | 757 | obj->dirty = 1; |
807 | 758 | ||
808 | mutex_unlock(&dev->struct_mutex); | ||
809 | |||
810 | while (remain > 0) { | 759 | while (remain > 0) { |
811 | struct page *page; | 760 | struct page *page; |
812 | char *vaddr; | 761 | int partial_cacheline_write; |
813 | 762 | ||
814 | /* Operation in this page | 763 | /* Operation in this page |
815 | * | 764 | * |
@@ -822,29 +771,51 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, | |||
822 | if ((shmem_page_offset + page_length) > PAGE_SIZE) | 771 | if ((shmem_page_offset + page_length) > PAGE_SIZE) |
823 | page_length = PAGE_SIZE - shmem_page_offset; | 772 | page_length = PAGE_SIZE - shmem_page_offset; |
824 | 773 | ||
825 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | 774 | /* If we don't overwrite a cacheline completely we need to be |
826 | if (IS_ERR(page)) { | 775 | * careful to have up-to-date data by first clflushing. Don't |
827 | ret = PTR_ERR(page); | 776 | * overcomplicate things and flush the entire patch. */ |
828 | goto out; | 777 | partial_cacheline_write = needs_clflush_before && |
778 | ((shmem_page_offset | page_length) | ||
779 | & (boot_cpu_data.x86_clflush_size - 1)); | ||
780 | |||
781 | if (obj->pages) { | ||
782 | page = obj->pages[offset >> PAGE_SHIFT]; | ||
783 | release_page = 0; | ||
784 | } else { | ||
785 | page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT); | ||
786 | if (IS_ERR(page)) { | ||
787 | ret = PTR_ERR(page); | ||
788 | goto out; | ||
789 | } | ||
790 | release_page = 1; | ||
829 | } | 791 | } |
830 | 792 | ||
831 | page_do_bit17_swizzling = obj_do_bit17_swizzling && | 793 | page_do_bit17_swizzling = obj_do_bit17_swizzling && |
832 | (page_to_phys(page) & (1 << 17)) != 0; | 794 | (page_to_phys(page) & (1 << 17)) != 0; |
833 | 795 | ||
834 | vaddr = kmap(page); | 796 | ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, |
835 | if (page_do_bit17_swizzling) | 797 | user_data, page_do_bit17_swizzling, |
836 | ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, | 798 | partial_cacheline_write, |
837 | user_data, | 799 | needs_clflush_after); |
838 | page_length); | 800 | if (ret == 0) |
839 | else | 801 | goto next_page; |
840 | ret = __copy_from_user(vaddr + shmem_page_offset, | 802 | |
841 | user_data, | 803 | hit_slowpath = 1; |
842 | page_length); | 804 | page_cache_get(page); |
843 | kunmap(page); | 805 | mutex_unlock(&dev->struct_mutex); |
806 | |||
807 | ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, | ||
808 | user_data, page_do_bit17_swizzling, | ||
809 | partial_cacheline_write, | ||
810 | needs_clflush_after); | ||
844 | 811 | ||
812 | mutex_lock(&dev->struct_mutex); | ||
813 | page_cache_release(page); | ||
814 | next_page: | ||
845 | set_page_dirty(page); | 815 | set_page_dirty(page); |
846 | mark_page_accessed(page); | 816 | mark_page_accessed(page); |
847 | page_cache_release(page); | 817 | if (release_page) |
818 | page_cache_release(page); | ||
848 | 819 | ||
849 | if (ret) { | 820 | if (ret) { |
850 | ret = -EFAULT; | 821 | ret = -EFAULT; |
@@ -857,17 +828,21 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, | |||
857 | } | 828 | } |
858 | 829 | ||
859 | out: | 830 | out: |
860 | mutex_lock(&dev->struct_mutex); | 831 | if (hit_slowpath) { |
861 | /* Fixup: Kill any reinstated backing storage pages */ | 832 | /* Fixup: Kill any reinstated backing storage pages */ |
862 | if (obj->madv == __I915_MADV_PURGED) | 833 | if (obj->madv == __I915_MADV_PURGED) |
863 | i915_gem_object_truncate(obj); | 834 | i915_gem_object_truncate(obj); |
864 | /* and flush dirty cachelines in case the object isn't in the cpu write | 835 | /* and flush dirty cachelines in case the object isn't in the cpu write |
865 | * domain anymore. */ | 836 | * domain anymore. */ |
866 | if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { | 837 | if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { |
867 | i915_gem_clflush_object(obj); | 838 | i915_gem_clflush_object(obj); |
868 | intel_gtt_chipset_flush(); | 839 | intel_gtt_chipset_flush(); |
840 | } | ||
869 | } | 841 | } |
870 | 842 | ||
843 | if (needs_clflush_after) | ||
844 | intel_gtt_chipset_flush(); | ||
845 | |||
871 | return ret; | 846 | return ret; |
872 | } | 847 | } |
873 | 848 | ||
@@ -892,8 +867,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
892 | args->size)) | 867 | args->size)) |
893 | return -EFAULT; | 868 | return -EFAULT; |
894 | 869 | ||
895 | ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr, | 870 | ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->data_ptr, |
896 | args->size); | 871 | args->size); |
897 | if (ret) | 872 | if (ret) |
898 | return -EFAULT; | 873 | return -EFAULT; |
899 | 874 | ||
@@ -914,8 +889,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
914 | goto out; | 889 | goto out; |
915 | } | 890 | } |
916 | 891 | ||
892 | /* prime objects have no backing filp to GEM pread/pwrite | ||
893 | * pages from. | ||
894 | */ | ||
895 | if (!obj->base.filp) { | ||
896 | ret = -EINVAL; | ||
897 | goto out; | ||
898 | } | ||
899 | |||
917 | trace_i915_gem_object_pwrite(obj, args->offset, args->size); | 900 | trace_i915_gem_object_pwrite(obj, args->offset, args->size); |
918 | 901 | ||
902 | ret = -EFAULT; | ||
919 | /* We can only do the GTT pwrite on untiled buffers, as otherwise | 903 | /* We can only do the GTT pwrite on untiled buffers, as otherwise |
920 | * it would end up going through the fenced access, and we'll get | 904 | * it would end up going through the fenced access, and we'll get |
921 | * different detiling behavior between reading and writing. | 905 | * different detiling behavior between reading and writing. |
@@ -928,42 +912,18 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
928 | } | 912 | } |
929 | 913 | ||
930 | if (obj->gtt_space && | 914 | if (obj->gtt_space && |
915 | obj->cache_level == I915_CACHE_NONE && | ||
916 | obj->tiling_mode == I915_TILING_NONE && | ||
917 | obj->map_and_fenceable && | ||
931 | obj->base.write_domain != I915_GEM_DOMAIN_CPU) { | 918 | obj->base.write_domain != I915_GEM_DOMAIN_CPU) { |
932 | ret = i915_gem_object_pin(obj, 0, true); | ||
933 | if (ret) | ||
934 | goto out; | ||
935 | |||
936 | ret = i915_gem_object_set_to_gtt_domain(obj, true); | ||
937 | if (ret) | ||
938 | goto out_unpin; | ||
939 | |||
940 | ret = i915_gem_object_put_fence(obj); | ||
941 | if (ret) | ||
942 | goto out_unpin; | ||
943 | |||
944 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); | 919 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); |
945 | if (ret == -EFAULT) | 920 | /* Note that the gtt paths might fail with non-page-backed user |
946 | ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file); | 921 | * pointers (e.g. gtt mappings when moving data between |
947 | 922 | * textures). Fallback to the shmem path in that case. */ | |
948 | out_unpin: | ||
949 | i915_gem_object_unpin(obj); | ||
950 | |||
951 | if (ret != -EFAULT) | ||
952 | goto out; | ||
953 | /* Fall through to the shmfs paths because the gtt paths might | ||
954 | * fail with non-page-backed user pointers (e.g. gtt mappings | ||
955 | * when moving data between textures). */ | ||
956 | } | 923 | } |
957 | 924 | ||
958 | ret = i915_gem_object_set_to_cpu_domain(obj, 1); | ||
959 | if (ret) | ||
960 | goto out; | ||
961 | |||
962 | ret = -EFAULT; | ||
963 | if (!i915_gem_object_needs_bit17_swizzle(obj)) | ||
964 | ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file); | ||
965 | if (ret == -EFAULT) | 925 | if (ret == -EFAULT) |
966 | ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file); | 926 | ret = i915_gem_shmem_pwrite(dev, obj, args, file); |
967 | 927 | ||
968 | out: | 928 | out: |
969 | drm_gem_object_unreference(&obj->base); | 929 | drm_gem_object_unreference(&obj->base); |
@@ -986,9 +946,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, | |||
986 | uint32_t write_domain = args->write_domain; | 946 | uint32_t write_domain = args->write_domain; |
987 | int ret; | 947 | int ret; |
988 | 948 | ||
989 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
990 | return -ENODEV; | ||
991 | |||
992 | /* Only handle setting domains to types used by the CPU. */ | 949 | /* Only handle setting domains to types used by the CPU. */ |
993 | if (write_domain & I915_GEM_GPU_DOMAINS) | 950 | if (write_domain & I915_GEM_GPU_DOMAINS) |
994 | return -EINVAL; | 951 | return -EINVAL; |
@@ -1042,9 +999,6 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, | |||
1042 | struct drm_i915_gem_object *obj; | 999 | struct drm_i915_gem_object *obj; |
1043 | int ret = 0; | 1000 | int ret = 0; |
1044 | 1001 | ||
1045 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
1046 | return -ENODEV; | ||
1047 | |||
1048 | ret = i915_mutex_lock_interruptible(dev); | 1002 | ret = i915_mutex_lock_interruptible(dev); |
1049 | if (ret) | 1003 | if (ret) |
1050 | return ret; | 1004 | return ret; |
@@ -1080,13 +1034,18 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, | |||
1080 | struct drm_gem_object *obj; | 1034 | struct drm_gem_object *obj; |
1081 | unsigned long addr; | 1035 | unsigned long addr; |
1082 | 1036 | ||
1083 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
1084 | return -ENODEV; | ||
1085 | |||
1086 | obj = drm_gem_object_lookup(dev, file, args->handle); | 1037 | obj = drm_gem_object_lookup(dev, file, args->handle); |
1087 | if (obj == NULL) | 1038 | if (obj == NULL) |
1088 | return -ENOENT; | 1039 | return -ENOENT; |
1089 | 1040 | ||
1041 | /* prime objects have no backing filp to GEM mmap | ||
1042 | * pages from. | ||
1043 | */ | ||
1044 | if (!obj->filp) { | ||
1045 | drm_gem_object_unreference_unlocked(obj); | ||
1046 | return -EINVAL; | ||
1047 | } | ||
1048 | |||
1090 | addr = vm_mmap(obj->filp, 0, args->size, | 1049 | addr = vm_mmap(obj->filp, 0, args->size, |
1091 | PROT_READ | PROT_WRITE, MAP_SHARED, | 1050 | PROT_READ | PROT_WRITE, MAP_SHARED, |
1092 | args->offset); | 1051 | args->offset); |
@@ -1151,10 +1110,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1151 | goto unlock; | 1110 | goto unlock; |
1152 | } | 1111 | } |
1153 | 1112 | ||
1154 | if (obj->tiling_mode == I915_TILING_NONE) | 1113 | if (!obj->has_global_gtt_mapping) |
1155 | ret = i915_gem_object_put_fence(obj); | 1114 | i915_gem_gtt_bind_object(obj, obj->cache_level); |
1156 | else | 1115 | |
1157 | ret = i915_gem_object_get_fence(obj, NULL); | 1116 | ret = i915_gem_object_get_fence(obj); |
1158 | if (ret) | 1117 | if (ret) |
1159 | goto unlock; | 1118 | goto unlock; |
1160 | 1119 | ||
@@ -1308,9 +1267,6 @@ i915_gem_mmap_gtt(struct drm_file *file, | |||
1308 | struct drm_i915_gem_object *obj; | 1267 | struct drm_i915_gem_object *obj; |
1309 | int ret; | 1268 | int ret; |
1310 | 1269 | ||
1311 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
1312 | return -ENODEV; | ||
1313 | |||
1314 | ret = i915_mutex_lock_interruptible(dev); | 1270 | ret = i915_mutex_lock_interruptible(dev); |
1315 | if (ret) | 1271 | if (ret) |
1316 | return ret; | 1272 | return ret; |
@@ -1368,14 +1324,10 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, | |||
1368 | { | 1324 | { |
1369 | struct drm_i915_gem_mmap_gtt *args = data; | 1325 | struct drm_i915_gem_mmap_gtt *args = data; |
1370 | 1326 | ||
1371 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
1372 | return -ENODEV; | ||
1373 | |||
1374 | return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); | 1327 | return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); |
1375 | } | 1328 | } |
1376 | 1329 | ||
1377 | 1330 | int | |
1378 | static int | ||
1379 | i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, | 1331 | i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, |
1380 | gfp_t gfpmask) | 1332 | gfp_t gfpmask) |
1381 | { | 1333 | { |
@@ -1384,6 +1336,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, | |||
1384 | struct inode *inode; | 1336 | struct inode *inode; |
1385 | struct page *page; | 1337 | struct page *page; |
1386 | 1338 | ||
1339 | if (obj->pages || obj->sg_table) | ||
1340 | return 0; | ||
1341 | |||
1387 | /* Get the list of pages out of our struct file. They'll be pinned | 1342 | /* Get the list of pages out of our struct file. They'll be pinned |
1388 | * at this point until we release them. | 1343 | * at this point until we release them. |
1389 | */ | 1344 | */ |
@@ -1425,6 +1380,9 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) | |||
1425 | int page_count = obj->base.size / PAGE_SIZE; | 1380 | int page_count = obj->base.size / PAGE_SIZE; |
1426 | int i; | 1381 | int i; |
1427 | 1382 | ||
1383 | if (!obj->pages) | ||
1384 | return; | ||
1385 | |||
1428 | BUG_ON(obj->madv == __I915_MADV_PURGED); | 1386 | BUG_ON(obj->madv == __I915_MADV_PURGED); |
1429 | 1387 | ||
1430 | if (i915_gem_object_needs_bit17_swizzle(obj)) | 1388 | if (i915_gem_object_needs_bit17_swizzle(obj)) |
@@ -1473,7 +1431,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, | |||
1473 | 1431 | ||
1474 | if (obj->fenced_gpu_access) { | 1432 | if (obj->fenced_gpu_access) { |
1475 | obj->last_fenced_seqno = seqno; | 1433 | obj->last_fenced_seqno = seqno; |
1476 | obj->last_fenced_ring = ring; | ||
1477 | 1434 | ||
1478 | /* Bump MRU to take account of the delayed flush */ | 1435 | /* Bump MRU to take account of the delayed flush */ |
1479 | if (obj->fence_reg != I915_FENCE_REG_NONE) { | 1436 | if (obj->fence_reg != I915_FENCE_REG_NONE) { |
@@ -1512,15 +1469,11 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) | |||
1512 | struct drm_device *dev = obj->base.dev; | 1469 | struct drm_device *dev = obj->base.dev; |
1513 | struct drm_i915_private *dev_priv = dev->dev_private; | 1470 | struct drm_i915_private *dev_priv = dev->dev_private; |
1514 | 1471 | ||
1515 | if (obj->pin_count != 0) | 1472 | list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); |
1516 | list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list); | ||
1517 | else | ||
1518 | list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); | ||
1519 | 1473 | ||
1520 | BUG_ON(!list_empty(&obj->gpu_write_list)); | 1474 | BUG_ON(!list_empty(&obj->gpu_write_list)); |
1521 | BUG_ON(!obj->active); | 1475 | BUG_ON(!obj->active); |
1522 | obj->ring = NULL; | 1476 | obj->ring = NULL; |
1523 | obj->last_fenced_ring = NULL; | ||
1524 | 1477 | ||
1525 | i915_gem_object_move_off_active(obj); | 1478 | i915_gem_object_move_off_active(obj); |
1526 | obj->fenced_gpu_access = false; | 1479 | obj->fenced_gpu_access = false; |
@@ -1546,6 +1499,9 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) | |||
1546 | inode = obj->base.filp->f_path.dentry->d_inode; | 1499 | inode = obj->base.filp->f_path.dentry->d_inode; |
1547 | shmem_truncate_range(inode, 0, (loff_t)-1); | 1500 | shmem_truncate_range(inode, 0, (loff_t)-1); |
1548 | 1501 | ||
1502 | if (obj->base.map_list.map) | ||
1503 | drm_gem_free_mmap_offset(&obj->base); | ||
1504 | |||
1549 | obj->madv = __I915_MADV_PURGED; | 1505 | obj->madv = __I915_MADV_PURGED; |
1550 | } | 1506 | } |
1551 | 1507 | ||
@@ -1711,30 +1667,29 @@ static void i915_gem_reset_fences(struct drm_device *dev) | |||
1711 | 1667 | ||
1712 | for (i = 0; i < dev_priv->num_fence_regs; i++) { | 1668 | for (i = 0; i < dev_priv->num_fence_regs; i++) { |
1713 | struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; | 1669 | struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; |
1714 | struct drm_i915_gem_object *obj = reg->obj; | ||
1715 | 1670 | ||
1716 | if (!obj) | 1671 | i915_gem_write_fence(dev, i, NULL); |
1717 | continue; | ||
1718 | 1672 | ||
1719 | if (obj->tiling_mode) | 1673 | if (reg->obj) |
1720 | i915_gem_release_mmap(obj); | 1674 | i915_gem_object_fence_lost(reg->obj); |
1721 | 1675 | ||
1722 | reg->obj->fence_reg = I915_FENCE_REG_NONE; | 1676 | reg->pin_count = 0; |
1723 | reg->obj->fenced_gpu_access = false; | 1677 | reg->obj = NULL; |
1724 | reg->obj->last_fenced_seqno = 0; | 1678 | INIT_LIST_HEAD(®->lru_list); |
1725 | reg->obj->last_fenced_ring = NULL; | ||
1726 | i915_gem_clear_fence_reg(dev, reg); | ||
1727 | } | 1679 | } |
1680 | |||
1681 | INIT_LIST_HEAD(&dev_priv->mm.fence_list); | ||
1728 | } | 1682 | } |
1729 | 1683 | ||
1730 | void i915_gem_reset(struct drm_device *dev) | 1684 | void i915_gem_reset(struct drm_device *dev) |
1731 | { | 1685 | { |
1732 | struct drm_i915_private *dev_priv = dev->dev_private; | 1686 | struct drm_i915_private *dev_priv = dev->dev_private; |
1733 | struct drm_i915_gem_object *obj; | 1687 | struct drm_i915_gem_object *obj; |
1688 | struct intel_ring_buffer *ring; | ||
1734 | int i; | 1689 | int i; |
1735 | 1690 | ||
1736 | for (i = 0; i < I915_NUM_RINGS; i++) | 1691 | for_each_ring(ring, dev_priv, i) |
1737 | i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]); | 1692 | i915_gem_reset_ring_lists(dev_priv, ring); |
1738 | 1693 | ||
1739 | /* Remove anything from the flushing lists. The GPU cache is likely | 1694 | /* Remove anything from the flushing lists. The GPU cache is likely |
1740 | * to be lost on reset along with the data, so simply move the | 1695 | * to be lost on reset along with the data, so simply move the |
@@ -1839,24 +1794,11 @@ void | |||
1839 | i915_gem_retire_requests(struct drm_device *dev) | 1794 | i915_gem_retire_requests(struct drm_device *dev) |
1840 | { | 1795 | { |
1841 | drm_i915_private_t *dev_priv = dev->dev_private; | 1796 | drm_i915_private_t *dev_priv = dev->dev_private; |
1797 | struct intel_ring_buffer *ring; | ||
1842 | int i; | 1798 | int i; |
1843 | 1799 | ||
1844 | if (!list_empty(&dev_priv->mm.deferred_free_list)) { | 1800 | for_each_ring(ring, dev_priv, i) |
1845 | struct drm_i915_gem_object *obj, *next; | 1801 | i915_gem_retire_requests_ring(ring); |
1846 | |||
1847 | /* We must be careful that during unbind() we do not | ||
1848 | * accidentally infinitely recurse into retire requests. | ||
1849 | * Currently: | ||
1850 | * retire -> free -> unbind -> wait -> retire_ring | ||
1851 | */ | ||
1852 | list_for_each_entry_safe(obj, next, | ||
1853 | &dev_priv->mm.deferred_free_list, | ||
1854 | mm_list) | ||
1855 | i915_gem_free_object_tail(obj); | ||
1856 | } | ||
1857 | |||
1858 | for (i = 0; i < I915_NUM_RINGS; i++) | ||
1859 | i915_gem_retire_requests_ring(&dev_priv->ring[i]); | ||
1860 | } | 1802 | } |
1861 | 1803 | ||
1862 | static void | 1804 | static void |
@@ -1864,6 +1806,7 @@ i915_gem_retire_work_handler(struct work_struct *work) | |||
1864 | { | 1806 | { |
1865 | drm_i915_private_t *dev_priv; | 1807 | drm_i915_private_t *dev_priv; |
1866 | struct drm_device *dev; | 1808 | struct drm_device *dev; |
1809 | struct intel_ring_buffer *ring; | ||
1867 | bool idle; | 1810 | bool idle; |
1868 | int i; | 1811 | int i; |
1869 | 1812 | ||
@@ -1883,9 +1826,7 @@ i915_gem_retire_work_handler(struct work_struct *work) | |||
1883 | * objects indefinitely. | 1826 | * objects indefinitely. |
1884 | */ | 1827 | */ |
1885 | idle = true; | 1828 | idle = true; |
1886 | for (i = 0; i < I915_NUM_RINGS; i++) { | 1829 | for_each_ring(ring, dev_priv, i) { |
1887 | struct intel_ring_buffer *ring = &dev_priv->ring[i]; | ||
1888 | |||
1889 | if (!list_empty(&ring->gpu_write_list)) { | 1830 | if (!list_empty(&ring->gpu_write_list)) { |
1890 | struct drm_i915_gem_request *request; | 1831 | struct drm_i915_gem_request *request; |
1891 | int ret; | 1832 | int ret; |
@@ -1907,20 +1848,10 @@ i915_gem_retire_work_handler(struct work_struct *work) | |||
1907 | mutex_unlock(&dev->struct_mutex); | 1848 | mutex_unlock(&dev->struct_mutex); |
1908 | } | 1849 | } |
1909 | 1850 | ||
1910 | /** | 1851 | static int |
1911 | * Waits for a sequence number to be signaled, and cleans up the | 1852 | i915_gem_check_wedge(struct drm_i915_private *dev_priv) |
1912 | * request and object lists appropriately for that event. | ||
1913 | */ | ||
1914 | int | ||
1915 | i915_wait_request(struct intel_ring_buffer *ring, | ||
1916 | uint32_t seqno, | ||
1917 | bool do_retire) | ||
1918 | { | 1853 | { |
1919 | drm_i915_private_t *dev_priv = ring->dev->dev_private; | 1854 | BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); |
1920 | u32 ier; | ||
1921 | int ret = 0; | ||
1922 | |||
1923 | BUG_ON(seqno == 0); | ||
1924 | 1855 | ||
1925 | if (atomic_read(&dev_priv->mm.wedged)) { | 1856 | if (atomic_read(&dev_priv->mm.wedged)) { |
1926 | struct completion *x = &dev_priv->error_completion; | 1857 | struct completion *x = &dev_priv->error_completion; |
@@ -1935,6 +1866,20 @@ i915_wait_request(struct intel_ring_buffer *ring, | |||
1935 | return recovery_complete ? -EIO : -EAGAIN; | 1866 | return recovery_complete ? -EIO : -EAGAIN; |
1936 | } | 1867 | } |
1937 | 1868 | ||
1869 | return 0; | ||
1870 | } | ||
1871 | |||
1872 | /* | ||
1873 | * Compare seqno against outstanding lazy request. Emit a request if they are | ||
1874 | * equal. | ||
1875 | */ | ||
1876 | static int | ||
1877 | i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) | ||
1878 | { | ||
1879 | int ret = 0; | ||
1880 | |||
1881 | BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); | ||
1882 | |||
1938 | if (seqno == ring->outstanding_lazy_request) { | 1883 | if (seqno == ring->outstanding_lazy_request) { |
1939 | struct drm_i915_gem_request *request; | 1884 | struct drm_i915_gem_request *request; |
1940 | 1885 | ||
@@ -1948,54 +1893,67 @@ i915_wait_request(struct intel_ring_buffer *ring, | |||
1948 | return ret; | 1893 | return ret; |
1949 | } | 1894 | } |
1950 | 1895 | ||
1951 | seqno = request->seqno; | 1896 | BUG_ON(seqno != request->seqno); |
1952 | } | 1897 | } |
1953 | 1898 | ||
1954 | if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { | 1899 | return ret; |
1955 | if (HAS_PCH_SPLIT(ring->dev)) | 1900 | } |
1956 | ier = I915_READ(DEIER) | I915_READ(GTIER); | ||
1957 | else | ||
1958 | ier = I915_READ(IER); | ||
1959 | if (!ier) { | ||
1960 | DRM_ERROR("something (likely vbetool) disabled " | ||
1961 | "interrupts, re-enabling\n"); | ||
1962 | ring->dev->driver->irq_preinstall(ring->dev); | ||
1963 | ring->dev->driver->irq_postinstall(ring->dev); | ||
1964 | } | ||
1965 | 1901 | ||
1966 | trace_i915_gem_request_wait_begin(ring, seqno); | 1902 | static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, |
1967 | 1903 | bool interruptible) | |
1968 | ring->waiting_seqno = seqno; | 1904 | { |
1969 | if (ring->irq_get(ring)) { | 1905 | drm_i915_private_t *dev_priv = ring->dev->dev_private; |
1970 | if (dev_priv->mm.interruptible) | 1906 | int ret = 0; |
1971 | ret = wait_event_interruptible(ring->irq_queue, | 1907 | |
1972 | i915_seqno_passed(ring->get_seqno(ring), seqno) | 1908 | if (i915_seqno_passed(ring->get_seqno(ring), seqno)) |
1973 | || atomic_read(&dev_priv->mm.wedged)); | 1909 | return 0; |
1974 | else | 1910 | |
1975 | wait_event(ring->irq_queue, | 1911 | trace_i915_gem_request_wait_begin(ring, seqno); |
1976 | i915_seqno_passed(ring->get_seqno(ring), seqno) | 1912 | if (WARN_ON(!ring->irq_get(ring))) |
1977 | || atomic_read(&dev_priv->mm.wedged)); | 1913 | return -ENODEV; |
1978 | 1914 | ||
1979 | ring->irq_put(ring); | 1915 | #define EXIT_COND \ |
1980 | } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), | 1916 | (i915_seqno_passed(ring->get_seqno(ring), seqno) || \ |
1981 | seqno) || | 1917 | atomic_read(&dev_priv->mm.wedged)) |
1982 | atomic_read(&dev_priv->mm.wedged), 3000)) | 1918 | |
1983 | ret = -EBUSY; | 1919 | if (interruptible) |
1984 | ring->waiting_seqno = 0; | 1920 | ret = wait_event_interruptible(ring->irq_queue, |
1985 | 1921 | EXIT_COND); | |
1986 | trace_i915_gem_request_wait_end(ring, seqno); | 1922 | else |
1987 | } | 1923 | wait_event(ring->irq_queue, EXIT_COND); |
1924 | |||
1925 | ring->irq_put(ring); | ||
1926 | trace_i915_gem_request_wait_end(ring, seqno); | ||
1927 | #undef EXIT_COND | ||
1928 | |||
1929 | return ret; | ||
1930 | } | ||
1931 | |||
1932 | /** | ||
1933 | * Waits for a sequence number to be signaled, and cleans up the | ||
1934 | * request and object lists appropriately for that event. | ||
1935 | */ | ||
1936 | int | ||
1937 | i915_wait_request(struct intel_ring_buffer *ring, | ||
1938 | uint32_t seqno) | ||
1939 | { | ||
1940 | drm_i915_private_t *dev_priv = ring->dev->dev_private; | ||
1941 | int ret = 0; | ||
1942 | |||
1943 | BUG_ON(seqno == 0); | ||
1944 | |||
1945 | ret = i915_gem_check_wedge(dev_priv); | ||
1946 | if (ret) | ||
1947 | return ret; | ||
1948 | |||
1949 | ret = i915_gem_check_olr(ring, seqno); | ||
1950 | if (ret) | ||
1951 | return ret; | ||
1952 | |||
1953 | ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible); | ||
1988 | if (atomic_read(&dev_priv->mm.wedged)) | 1954 | if (atomic_read(&dev_priv->mm.wedged)) |
1989 | ret = -EAGAIN; | 1955 | ret = -EAGAIN; |
1990 | 1956 | ||
1991 | /* Directly dispatch request retiring. While we have the work queue | ||
1992 | * to handle this, the waiter on a request often wants an associated | ||
1993 | * buffer to have made it to the inactive list, and we would need | ||
1994 | * a separate wait queue to handle that. | ||
1995 | */ | ||
1996 | if (ret == 0 && do_retire) | ||
1997 | i915_gem_retire_requests_ring(ring); | ||
1998 | |||
1999 | return ret; | 1957 | return ret; |
2000 | } | 1958 | } |
2001 | 1959 | ||
@@ -2017,15 +1975,58 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) | |||
2017 | * it. | 1975 | * it. |
2018 | */ | 1976 | */ |
2019 | if (obj->active) { | 1977 | if (obj->active) { |
2020 | ret = i915_wait_request(obj->ring, obj->last_rendering_seqno, | 1978 | ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); |
2021 | true); | ||
2022 | if (ret) | 1979 | if (ret) |
2023 | return ret; | 1980 | return ret; |
1981 | i915_gem_retire_requests_ring(obj->ring); | ||
2024 | } | 1982 | } |
2025 | 1983 | ||
2026 | return 0; | 1984 | return 0; |
2027 | } | 1985 | } |
2028 | 1986 | ||
1987 | /** | ||
1988 | * i915_gem_object_sync - sync an object to a ring. | ||
1989 | * | ||
1990 | * @obj: object which may be in use on another ring. | ||
1991 | * @to: ring we wish to use the object on. May be NULL. | ||
1992 | * | ||
1993 | * This code is meant to abstract object synchronization with the GPU. | ||
1994 | * Calling with NULL implies synchronizing the object with the CPU | ||
1995 | * rather than a particular GPU ring. | ||
1996 | * | ||
1997 | * Returns 0 if successful, else propagates up the lower layer error. | ||
1998 | */ | ||
1999 | int | ||
2000 | i915_gem_object_sync(struct drm_i915_gem_object *obj, | ||
2001 | struct intel_ring_buffer *to) | ||
2002 | { | ||
2003 | struct intel_ring_buffer *from = obj->ring; | ||
2004 | u32 seqno; | ||
2005 | int ret, idx; | ||
2006 | |||
2007 | if (from == NULL || to == from) | ||
2008 | return 0; | ||
2009 | |||
2010 | if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) | ||
2011 | return i915_gem_object_wait_rendering(obj); | ||
2012 | |||
2013 | idx = intel_ring_sync_index(from, to); | ||
2014 | |||
2015 | seqno = obj->last_rendering_seqno; | ||
2016 | if (seqno <= from->sync_seqno[idx]) | ||
2017 | return 0; | ||
2018 | |||
2019 | ret = i915_gem_check_olr(obj->ring, seqno); | ||
2020 | if (ret) | ||
2021 | return ret; | ||
2022 | |||
2023 | ret = to->sync_to(to, from, seqno); | ||
2024 | if (!ret) | ||
2025 | from->sync_seqno[idx] = seqno; | ||
2026 | |||
2027 | return ret; | ||
2028 | } | ||
2029 | |||
2029 | static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) | 2030 | static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) |
2030 | { | 2031 | { |
2031 | u32 old_write_domain, old_read_domains; | 2032 | u32 old_write_domain, old_read_domains; |
@@ -2068,7 +2069,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) | |||
2068 | } | 2069 | } |
2069 | 2070 | ||
2070 | ret = i915_gem_object_finish_gpu(obj); | 2071 | ret = i915_gem_object_finish_gpu(obj); |
2071 | if (ret == -ERESTARTSYS) | 2072 | if (ret) |
2072 | return ret; | 2073 | return ret; |
2073 | /* Continue on if we fail due to EIO, the GPU is hung so we | 2074 | /* Continue on if we fail due to EIO, the GPU is hung so we |
2074 | * should be safe and we need to cleanup or else we might | 2075 | * should be safe and we need to cleanup or else we might |
@@ -2095,16 +2096,18 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) | |||
2095 | 2096 | ||
2096 | /* release the fence reg _after_ flushing */ | 2097 | /* release the fence reg _after_ flushing */ |
2097 | ret = i915_gem_object_put_fence(obj); | 2098 | ret = i915_gem_object_put_fence(obj); |
2098 | if (ret == -ERESTARTSYS) | 2099 | if (ret) |
2099 | return ret; | 2100 | return ret; |
2100 | 2101 | ||
2101 | trace_i915_gem_object_unbind(obj); | 2102 | trace_i915_gem_object_unbind(obj); |
2102 | 2103 | ||
2103 | i915_gem_gtt_unbind_object(obj); | 2104 | if (obj->has_global_gtt_mapping) |
2105 | i915_gem_gtt_unbind_object(obj); | ||
2104 | if (obj->has_aliasing_ppgtt_mapping) { | 2106 | if (obj->has_aliasing_ppgtt_mapping) { |
2105 | i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); | 2107 | i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); |
2106 | obj->has_aliasing_ppgtt_mapping = 0; | 2108 | obj->has_aliasing_ppgtt_mapping = 0; |
2107 | } | 2109 | } |
2110 | i915_gem_gtt_finish_object(obj); | ||
2108 | 2111 | ||
2109 | i915_gem_object_put_pages_gtt(obj); | 2112 | i915_gem_object_put_pages_gtt(obj); |
2110 | 2113 | ||
@@ -2145,7 +2148,7 @@ i915_gem_flush_ring(struct intel_ring_buffer *ring, | |||
2145 | return 0; | 2148 | return 0; |
2146 | } | 2149 | } |
2147 | 2150 | ||
2148 | static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire) | 2151 | static int i915_ring_idle(struct intel_ring_buffer *ring) |
2149 | { | 2152 | { |
2150 | int ret; | 2153 | int ret; |
2151 | 2154 | ||
@@ -2159,208 +2162,201 @@ static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire) | |||
2159 | return ret; | 2162 | return ret; |
2160 | } | 2163 | } |
2161 | 2164 | ||
2162 | return i915_wait_request(ring, i915_gem_next_request_seqno(ring), | 2165 | return i915_wait_request(ring, i915_gem_next_request_seqno(ring)); |
2163 | do_retire); | ||
2164 | } | 2166 | } |
2165 | 2167 | ||
2166 | int i915_gpu_idle(struct drm_device *dev, bool do_retire) | 2168 | int i915_gpu_idle(struct drm_device *dev) |
2167 | { | 2169 | { |
2168 | drm_i915_private_t *dev_priv = dev->dev_private; | 2170 | drm_i915_private_t *dev_priv = dev->dev_private; |
2171 | struct intel_ring_buffer *ring; | ||
2169 | int ret, i; | 2172 | int ret, i; |
2170 | 2173 | ||
2171 | /* Flush everything onto the inactive list. */ | 2174 | /* Flush everything onto the inactive list. */ |
2172 | for (i = 0; i < I915_NUM_RINGS; i++) { | 2175 | for_each_ring(ring, dev_priv, i) { |
2173 | ret = i915_ring_idle(&dev_priv->ring[i], do_retire); | 2176 | ret = i915_ring_idle(ring); |
2174 | if (ret) | 2177 | if (ret) |
2175 | return ret; | 2178 | return ret; |
2179 | |||
2180 | /* Is the device fubar? */ | ||
2181 | if (WARN_ON(!list_empty(&ring->gpu_write_list))) | ||
2182 | return -EBUSY; | ||
2176 | } | 2183 | } |
2177 | 2184 | ||
2178 | return 0; | 2185 | return 0; |
2179 | } | 2186 | } |
2180 | 2187 | ||
2181 | static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj, | 2188 | static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, |
2182 | struct intel_ring_buffer *pipelined) | 2189 | struct drm_i915_gem_object *obj) |
2183 | { | 2190 | { |
2184 | struct drm_device *dev = obj->base.dev; | ||
2185 | drm_i915_private_t *dev_priv = dev->dev_private; | 2191 | drm_i915_private_t *dev_priv = dev->dev_private; |
2186 | u32 size = obj->gtt_space->size; | ||
2187 | int regnum = obj->fence_reg; | ||
2188 | uint64_t val; | 2192 | uint64_t val; |
2189 | 2193 | ||
2190 | val = (uint64_t)((obj->gtt_offset + size - 4096) & | 2194 | if (obj) { |
2191 | 0xfffff000) << 32; | 2195 | u32 size = obj->gtt_space->size; |
2192 | val |= obj->gtt_offset & 0xfffff000; | ||
2193 | val |= (uint64_t)((obj->stride / 128) - 1) << | ||
2194 | SANDYBRIDGE_FENCE_PITCH_SHIFT; | ||
2195 | 2196 | ||
2196 | if (obj->tiling_mode == I915_TILING_Y) | 2197 | val = (uint64_t)((obj->gtt_offset + size - 4096) & |
2197 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; | 2198 | 0xfffff000) << 32; |
2198 | val |= I965_FENCE_REG_VALID; | 2199 | val |= obj->gtt_offset & 0xfffff000; |
2200 | val |= (uint64_t)((obj->stride / 128) - 1) << | ||
2201 | SANDYBRIDGE_FENCE_PITCH_SHIFT; | ||
2199 | 2202 | ||
2200 | if (pipelined) { | 2203 | if (obj->tiling_mode == I915_TILING_Y) |
2201 | int ret = intel_ring_begin(pipelined, 6); | 2204 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; |
2202 | if (ret) | 2205 | val |= I965_FENCE_REG_VALID; |
2203 | return ret; | ||
2204 | |||
2205 | intel_ring_emit(pipelined, MI_NOOP); | ||
2206 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); | ||
2207 | intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8); | ||
2208 | intel_ring_emit(pipelined, (u32)val); | ||
2209 | intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4); | ||
2210 | intel_ring_emit(pipelined, (u32)(val >> 32)); | ||
2211 | intel_ring_advance(pipelined); | ||
2212 | } else | 2206 | } else |
2213 | I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val); | 2207 | val = 0; |
2214 | 2208 | ||
2215 | return 0; | 2209 | I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); |
2210 | POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); | ||
2216 | } | 2211 | } |
2217 | 2212 | ||
2218 | static int i965_write_fence_reg(struct drm_i915_gem_object *obj, | 2213 | static void i965_write_fence_reg(struct drm_device *dev, int reg, |
2219 | struct intel_ring_buffer *pipelined) | 2214 | struct drm_i915_gem_object *obj) |
2220 | { | 2215 | { |
2221 | struct drm_device *dev = obj->base.dev; | ||
2222 | drm_i915_private_t *dev_priv = dev->dev_private; | 2216 | drm_i915_private_t *dev_priv = dev->dev_private; |
2223 | u32 size = obj->gtt_space->size; | ||
2224 | int regnum = obj->fence_reg; | ||
2225 | uint64_t val; | 2217 | uint64_t val; |
2226 | 2218 | ||
2227 | val = (uint64_t)((obj->gtt_offset + size - 4096) & | 2219 | if (obj) { |
2228 | 0xfffff000) << 32; | 2220 | u32 size = obj->gtt_space->size; |
2229 | val |= obj->gtt_offset & 0xfffff000; | ||
2230 | val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; | ||
2231 | if (obj->tiling_mode == I915_TILING_Y) | ||
2232 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; | ||
2233 | val |= I965_FENCE_REG_VALID; | ||
2234 | 2221 | ||
2235 | if (pipelined) { | 2222 | val = (uint64_t)((obj->gtt_offset + size - 4096) & |
2236 | int ret = intel_ring_begin(pipelined, 6); | 2223 | 0xfffff000) << 32; |
2237 | if (ret) | 2224 | val |= obj->gtt_offset & 0xfffff000; |
2238 | return ret; | 2225 | val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; |
2239 | 2226 | if (obj->tiling_mode == I915_TILING_Y) | |
2240 | intel_ring_emit(pipelined, MI_NOOP); | 2227 | val |= 1 << I965_FENCE_TILING_Y_SHIFT; |
2241 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2)); | 2228 | val |= I965_FENCE_REG_VALID; |
2242 | intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8); | ||
2243 | intel_ring_emit(pipelined, (u32)val); | ||
2244 | intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4); | ||
2245 | intel_ring_emit(pipelined, (u32)(val >> 32)); | ||
2246 | intel_ring_advance(pipelined); | ||
2247 | } else | 2229 | } else |
2248 | I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val); | 2230 | val = 0; |
2249 | 2231 | ||
2250 | return 0; | 2232 | I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); |
2233 | POSTING_READ(FENCE_REG_965_0 + reg * 8); | ||
2251 | } | 2234 | } |
2252 | 2235 | ||
2253 | static int i915_write_fence_reg(struct drm_i915_gem_object *obj, | 2236 | static void i915_write_fence_reg(struct drm_device *dev, int reg, |
2254 | struct intel_ring_buffer *pipelined) | 2237 | struct drm_i915_gem_object *obj) |
2255 | { | 2238 | { |
2256 | struct drm_device *dev = obj->base.dev; | ||
2257 | drm_i915_private_t *dev_priv = dev->dev_private; | 2239 | drm_i915_private_t *dev_priv = dev->dev_private; |
2258 | u32 size = obj->gtt_space->size; | 2240 | u32 val; |
2259 | u32 fence_reg, val, pitch_val; | ||
2260 | int tile_width; | ||
2261 | |||
2262 | if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || | ||
2263 | (size & -size) != size || | ||
2264 | (obj->gtt_offset & (size - 1)), | ||
2265 | "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", | ||
2266 | obj->gtt_offset, obj->map_and_fenceable, size)) | ||
2267 | return -EINVAL; | ||
2268 | 2241 | ||
2269 | if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) | 2242 | if (obj) { |
2270 | tile_width = 128; | 2243 | u32 size = obj->gtt_space->size; |
2271 | else | 2244 | int pitch_val; |
2272 | tile_width = 512; | 2245 | int tile_width; |
2273 | |||
2274 | /* Note: pitch better be a power of two tile widths */ | ||
2275 | pitch_val = obj->stride / tile_width; | ||
2276 | pitch_val = ffs(pitch_val) - 1; | ||
2277 | |||
2278 | val = obj->gtt_offset; | ||
2279 | if (obj->tiling_mode == I915_TILING_Y) | ||
2280 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; | ||
2281 | val |= I915_FENCE_SIZE_BITS(size); | ||
2282 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; | ||
2283 | val |= I830_FENCE_REG_VALID; | ||
2284 | |||
2285 | fence_reg = obj->fence_reg; | ||
2286 | if (fence_reg < 8) | ||
2287 | fence_reg = FENCE_REG_830_0 + fence_reg * 4; | ||
2288 | else | ||
2289 | fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; | ||
2290 | 2246 | ||
2291 | if (pipelined) { | 2247 | WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) || |
2292 | int ret = intel_ring_begin(pipelined, 4); | 2248 | (size & -size) != size || |
2293 | if (ret) | 2249 | (obj->gtt_offset & (size - 1)), |
2294 | return ret; | 2250 | "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", |
2251 | obj->gtt_offset, obj->map_and_fenceable, size); | ||
2295 | 2252 | ||
2296 | intel_ring_emit(pipelined, MI_NOOP); | 2253 | if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) |
2297 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); | 2254 | tile_width = 128; |
2298 | intel_ring_emit(pipelined, fence_reg); | 2255 | else |
2299 | intel_ring_emit(pipelined, val); | 2256 | tile_width = 512; |
2300 | intel_ring_advance(pipelined); | 2257 | |
2258 | /* Note: pitch better be a power of two tile widths */ | ||
2259 | pitch_val = obj->stride / tile_width; | ||
2260 | pitch_val = ffs(pitch_val) - 1; | ||
2261 | |||
2262 | val = obj->gtt_offset; | ||
2263 | if (obj->tiling_mode == I915_TILING_Y) | ||
2264 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; | ||
2265 | val |= I915_FENCE_SIZE_BITS(size); | ||
2266 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; | ||
2267 | val |= I830_FENCE_REG_VALID; | ||
2301 | } else | 2268 | } else |
2302 | I915_WRITE(fence_reg, val); | 2269 | val = 0; |
2303 | 2270 | ||
2304 | return 0; | 2271 | if (reg < 8) |
2272 | reg = FENCE_REG_830_0 + reg * 4; | ||
2273 | else | ||
2274 | reg = FENCE_REG_945_8 + (reg - 8) * 4; | ||
2275 | |||
2276 | I915_WRITE(reg, val); | ||
2277 | POSTING_READ(reg); | ||
2305 | } | 2278 | } |
2306 | 2279 | ||
2307 | static int i830_write_fence_reg(struct drm_i915_gem_object *obj, | 2280 | static void i830_write_fence_reg(struct drm_device *dev, int reg, |
2308 | struct intel_ring_buffer *pipelined) | 2281 | struct drm_i915_gem_object *obj) |
2309 | { | 2282 | { |
2310 | struct drm_device *dev = obj->base.dev; | ||
2311 | drm_i915_private_t *dev_priv = dev->dev_private; | 2283 | drm_i915_private_t *dev_priv = dev->dev_private; |
2312 | u32 size = obj->gtt_space->size; | ||
2313 | int regnum = obj->fence_reg; | ||
2314 | uint32_t val; | 2284 | uint32_t val; |
2315 | uint32_t pitch_val; | ||
2316 | 2285 | ||
2317 | if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || | 2286 | if (obj) { |
2318 | (size & -size) != size || | 2287 | u32 size = obj->gtt_space->size; |
2319 | (obj->gtt_offset & (size - 1)), | 2288 | uint32_t pitch_val; |
2320 | "object 0x%08x not 512K or pot-size 0x%08x aligned\n", | 2289 | |
2321 | obj->gtt_offset, size)) | 2290 | WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) || |
2322 | return -EINVAL; | 2291 | (size & -size) != size || |
2323 | 2292 | (obj->gtt_offset & (size - 1)), | |
2324 | pitch_val = obj->stride / 128; | 2293 | "object 0x%08x not 512K or pot-size 0x%08x aligned\n", |
2325 | pitch_val = ffs(pitch_val) - 1; | 2294 | obj->gtt_offset, size); |
2326 | 2295 | ||
2327 | val = obj->gtt_offset; | 2296 | pitch_val = obj->stride / 128; |
2328 | if (obj->tiling_mode == I915_TILING_Y) | 2297 | pitch_val = ffs(pitch_val) - 1; |
2329 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; | 2298 | |
2330 | val |= I830_FENCE_SIZE_BITS(size); | 2299 | val = obj->gtt_offset; |
2331 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; | 2300 | if (obj->tiling_mode == I915_TILING_Y) |
2332 | val |= I830_FENCE_REG_VALID; | 2301 | val |= 1 << I830_FENCE_TILING_Y_SHIFT; |
2302 | val |= I830_FENCE_SIZE_BITS(size); | ||
2303 | val |= pitch_val << I830_FENCE_PITCH_SHIFT; | ||
2304 | val |= I830_FENCE_REG_VALID; | ||
2305 | } else | ||
2306 | val = 0; | ||
2333 | 2307 | ||
2334 | if (pipelined) { | 2308 | I915_WRITE(FENCE_REG_830_0 + reg * 4, val); |
2335 | int ret = intel_ring_begin(pipelined, 4); | 2309 | POSTING_READ(FENCE_REG_830_0 + reg * 4); |
2336 | if (ret) | 2310 | } |
2337 | return ret; | ||
2338 | 2311 | ||
2339 | intel_ring_emit(pipelined, MI_NOOP); | 2312 | static void i915_gem_write_fence(struct drm_device *dev, int reg, |
2340 | intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1)); | 2313 | struct drm_i915_gem_object *obj) |
2341 | intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4); | 2314 | { |
2342 | intel_ring_emit(pipelined, val); | 2315 | switch (INTEL_INFO(dev)->gen) { |
2343 | intel_ring_advance(pipelined); | 2316 | case 7: |
2344 | } else | 2317 | case 6: sandybridge_write_fence_reg(dev, reg, obj); break; |
2345 | I915_WRITE(FENCE_REG_830_0 + regnum * 4, val); | 2318 | case 5: |
2319 | case 4: i965_write_fence_reg(dev, reg, obj); break; | ||
2320 | case 3: i915_write_fence_reg(dev, reg, obj); break; | ||
2321 | case 2: i830_write_fence_reg(dev, reg, obj); break; | ||
2322 | default: break; | ||
2323 | } | ||
2324 | } | ||
2346 | 2325 | ||
2347 | return 0; | 2326 | static inline int fence_number(struct drm_i915_private *dev_priv, |
2327 | struct drm_i915_fence_reg *fence) | ||
2328 | { | ||
2329 | return fence - dev_priv->fence_regs; | ||
2348 | } | 2330 | } |
2349 | 2331 | ||
2350 | static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno) | 2332 | static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, |
2333 | struct drm_i915_fence_reg *fence, | ||
2334 | bool enable) | ||
2351 | { | 2335 | { |
2352 | return i915_seqno_passed(ring->get_seqno(ring), seqno); | 2336 | struct drm_i915_private *dev_priv = obj->base.dev->dev_private; |
2337 | int reg = fence_number(dev_priv, fence); | ||
2338 | |||
2339 | i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); | ||
2340 | |||
2341 | if (enable) { | ||
2342 | obj->fence_reg = reg; | ||
2343 | fence->obj = obj; | ||
2344 | list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); | ||
2345 | } else { | ||
2346 | obj->fence_reg = I915_FENCE_REG_NONE; | ||
2347 | fence->obj = NULL; | ||
2348 | list_del_init(&fence->lru_list); | ||
2349 | } | ||
2353 | } | 2350 | } |
2354 | 2351 | ||
2355 | static int | 2352 | static int |
2356 | i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, | 2353 | i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) |
2357 | struct intel_ring_buffer *pipelined) | ||
2358 | { | 2354 | { |
2359 | int ret; | 2355 | int ret; |
2360 | 2356 | ||
2361 | if (obj->fenced_gpu_access) { | 2357 | if (obj->fenced_gpu_access) { |
2362 | if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { | 2358 | if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { |
2363 | ret = i915_gem_flush_ring(obj->last_fenced_ring, | 2359 | ret = i915_gem_flush_ring(obj->ring, |
2364 | 0, obj->base.write_domain); | 2360 | 0, obj->base.write_domain); |
2365 | if (ret) | 2361 | if (ret) |
2366 | return ret; | 2362 | return ret; |
@@ -2369,18 +2365,12 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, | |||
2369 | obj->fenced_gpu_access = false; | 2365 | obj->fenced_gpu_access = false; |
2370 | } | 2366 | } |
2371 | 2367 | ||
2372 | if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { | 2368 | if (obj->last_fenced_seqno) { |
2373 | if (!ring_passed_seqno(obj->last_fenced_ring, | 2369 | ret = i915_wait_request(obj->ring, obj->last_fenced_seqno); |
2374 | obj->last_fenced_seqno)) { | 2370 | if (ret) |
2375 | ret = i915_wait_request(obj->last_fenced_ring, | 2371 | return ret; |
2376 | obj->last_fenced_seqno, | ||
2377 | true); | ||
2378 | if (ret) | ||
2379 | return ret; | ||
2380 | } | ||
2381 | 2372 | ||
2382 | obj->last_fenced_seqno = 0; | 2373 | obj->last_fenced_seqno = 0; |
2383 | obj->last_fenced_ring = NULL; | ||
2384 | } | 2374 | } |
2385 | 2375 | ||
2386 | /* Ensure that all CPU reads are completed before installing a fence | 2376 | /* Ensure that all CPU reads are completed before installing a fence |
@@ -2395,34 +2385,29 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, | |||
2395 | int | 2385 | int |
2396 | i915_gem_object_put_fence(struct drm_i915_gem_object *obj) | 2386 | i915_gem_object_put_fence(struct drm_i915_gem_object *obj) |
2397 | { | 2387 | { |
2388 | struct drm_i915_private *dev_priv = obj->base.dev->dev_private; | ||
2398 | int ret; | 2389 | int ret; |
2399 | 2390 | ||
2400 | if (obj->tiling_mode) | 2391 | ret = i915_gem_object_flush_fence(obj); |
2401 | i915_gem_release_mmap(obj); | ||
2402 | |||
2403 | ret = i915_gem_object_flush_fence(obj, NULL); | ||
2404 | if (ret) | 2392 | if (ret) |
2405 | return ret; | 2393 | return ret; |
2406 | 2394 | ||
2407 | if (obj->fence_reg != I915_FENCE_REG_NONE) { | 2395 | if (obj->fence_reg == I915_FENCE_REG_NONE) |
2408 | struct drm_i915_private *dev_priv = obj->base.dev->dev_private; | 2396 | return 0; |
2409 | |||
2410 | WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count); | ||
2411 | i915_gem_clear_fence_reg(obj->base.dev, | ||
2412 | &dev_priv->fence_regs[obj->fence_reg]); | ||
2413 | 2397 | ||
2414 | obj->fence_reg = I915_FENCE_REG_NONE; | 2398 | i915_gem_object_update_fence(obj, |
2415 | } | 2399 | &dev_priv->fence_regs[obj->fence_reg], |
2400 | false); | ||
2401 | i915_gem_object_fence_lost(obj); | ||
2416 | 2402 | ||
2417 | return 0; | 2403 | return 0; |
2418 | } | 2404 | } |
2419 | 2405 | ||
2420 | static struct drm_i915_fence_reg * | 2406 | static struct drm_i915_fence_reg * |
2421 | i915_find_fence_reg(struct drm_device *dev, | 2407 | i915_find_fence_reg(struct drm_device *dev) |
2422 | struct intel_ring_buffer *pipelined) | ||
2423 | { | 2408 | { |
2424 | struct drm_i915_private *dev_priv = dev->dev_private; | 2409 | struct drm_i915_private *dev_priv = dev->dev_private; |
2425 | struct drm_i915_fence_reg *reg, *first, *avail; | 2410 | struct drm_i915_fence_reg *reg, *avail; |
2426 | int i; | 2411 | int i; |
2427 | 2412 | ||
2428 | /* First try to find a free reg */ | 2413 | /* First try to find a free reg */ |
@@ -2440,204 +2425,77 @@ i915_find_fence_reg(struct drm_device *dev, | |||
2440 | return NULL; | 2425 | return NULL; |
2441 | 2426 | ||
2442 | /* None available, try to steal one or wait for a user to finish */ | 2427 | /* None available, try to steal one or wait for a user to finish */ |
2443 | avail = first = NULL; | ||
2444 | list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { | 2428 | list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { |
2445 | if (reg->pin_count) | 2429 | if (reg->pin_count) |
2446 | continue; | 2430 | continue; |
2447 | 2431 | ||
2448 | if (first == NULL) | 2432 | return reg; |
2449 | first = reg; | ||
2450 | |||
2451 | if (!pipelined || | ||
2452 | !reg->obj->last_fenced_ring || | ||
2453 | reg->obj->last_fenced_ring == pipelined) { | ||
2454 | avail = reg; | ||
2455 | break; | ||
2456 | } | ||
2457 | } | 2433 | } |
2458 | 2434 | ||
2459 | if (avail == NULL) | 2435 | return NULL; |
2460 | avail = first; | ||
2461 | |||
2462 | return avail; | ||
2463 | } | 2436 | } |
2464 | 2437 | ||
2465 | /** | 2438 | /** |
2466 | * i915_gem_object_get_fence - set up a fence reg for an object | 2439 | * i915_gem_object_get_fence - set up fencing for an object |
2467 | * @obj: object to map through a fence reg | 2440 | * @obj: object to map through a fence reg |
2468 | * @pipelined: ring on which to queue the change, or NULL for CPU access | ||
2469 | * @interruptible: must we wait uninterruptibly for the register to retire? | ||
2470 | * | 2441 | * |
2471 | * When mapping objects through the GTT, userspace wants to be able to write | 2442 | * When mapping objects through the GTT, userspace wants to be able to write |
2472 | * to them without having to worry about swizzling if the object is tiled. | 2443 | * to them without having to worry about swizzling if the object is tiled. |
2473 | * | ||
2474 | * This function walks the fence regs looking for a free one for @obj, | 2444 | * This function walks the fence regs looking for a free one for @obj, |
2475 | * stealing one if it can't find any. | 2445 | * stealing one if it can't find any. |
2476 | * | 2446 | * |
2477 | * It then sets up the reg based on the object's properties: address, pitch | 2447 | * It then sets up the reg based on the object's properties: address, pitch |
2478 | * and tiling format. | 2448 | * and tiling format. |
2449 | * | ||
2450 | * For an untiled surface, this removes any existing fence. | ||
2479 | */ | 2451 | */ |
2480 | int | 2452 | int |
2481 | i915_gem_object_get_fence(struct drm_i915_gem_object *obj, | 2453 | i915_gem_object_get_fence(struct drm_i915_gem_object *obj) |
2482 | struct intel_ring_buffer *pipelined) | ||
2483 | { | 2454 | { |
2484 | struct drm_device *dev = obj->base.dev; | 2455 | struct drm_device *dev = obj->base.dev; |
2485 | struct drm_i915_private *dev_priv = dev->dev_private; | 2456 | struct drm_i915_private *dev_priv = dev->dev_private; |
2457 | bool enable = obj->tiling_mode != I915_TILING_NONE; | ||
2486 | struct drm_i915_fence_reg *reg; | 2458 | struct drm_i915_fence_reg *reg; |
2487 | int ret; | 2459 | int ret; |
2488 | 2460 | ||
2489 | /* XXX disable pipelining. There are bugs. Shocking. */ | 2461 | /* Have we updated the tiling parameters upon the object and so |
2490 | pipelined = NULL; | 2462 | * will need to serialise the write to the associated fence register? |
2463 | */ | ||
2464 | if (obj->fence_dirty) { | ||
2465 | ret = i915_gem_object_flush_fence(obj); | ||
2466 | if (ret) | ||
2467 | return ret; | ||
2468 | } | ||
2491 | 2469 | ||
2492 | /* Just update our place in the LRU if our fence is getting reused. */ | 2470 | /* Just update our place in the LRU if our fence is getting reused. */ |
2493 | if (obj->fence_reg != I915_FENCE_REG_NONE) { | 2471 | if (obj->fence_reg != I915_FENCE_REG_NONE) { |
2494 | reg = &dev_priv->fence_regs[obj->fence_reg]; | 2472 | reg = &dev_priv->fence_regs[obj->fence_reg]; |
2495 | list_move_tail(®->lru_list, &dev_priv->mm.fence_list); | 2473 | if (!obj->fence_dirty) { |
2496 | 2474 | list_move_tail(®->lru_list, | |
2497 | if (obj->tiling_changed) { | 2475 | &dev_priv->mm.fence_list); |
2498 | ret = i915_gem_object_flush_fence(obj, pipelined); | 2476 | return 0; |
2499 | if (ret) | ||
2500 | return ret; | ||
2501 | |||
2502 | if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) | ||
2503 | pipelined = NULL; | ||
2504 | |||
2505 | if (pipelined) { | ||
2506 | reg->setup_seqno = | ||
2507 | i915_gem_next_request_seqno(pipelined); | ||
2508 | obj->last_fenced_seqno = reg->setup_seqno; | ||
2509 | obj->last_fenced_ring = pipelined; | ||
2510 | } | ||
2511 | |||
2512 | goto update; | ||
2513 | } | 2477 | } |
2478 | } else if (enable) { | ||
2479 | reg = i915_find_fence_reg(dev); | ||
2480 | if (reg == NULL) | ||
2481 | return -EDEADLK; | ||
2514 | 2482 | ||
2515 | if (!pipelined) { | 2483 | if (reg->obj) { |
2516 | if (reg->setup_seqno) { | 2484 | struct drm_i915_gem_object *old = reg->obj; |
2517 | if (!ring_passed_seqno(obj->last_fenced_ring, | ||
2518 | reg->setup_seqno)) { | ||
2519 | ret = i915_wait_request(obj->last_fenced_ring, | ||
2520 | reg->setup_seqno, | ||
2521 | true); | ||
2522 | if (ret) | ||
2523 | return ret; | ||
2524 | } | ||
2525 | 2485 | ||
2526 | reg->setup_seqno = 0; | 2486 | ret = i915_gem_object_flush_fence(old); |
2527 | } | ||
2528 | } else if (obj->last_fenced_ring && | ||
2529 | obj->last_fenced_ring != pipelined) { | ||
2530 | ret = i915_gem_object_flush_fence(obj, pipelined); | ||
2531 | if (ret) | 2487 | if (ret) |
2532 | return ret; | 2488 | return ret; |
2533 | } | ||
2534 | |||
2535 | return 0; | ||
2536 | } | ||
2537 | |||
2538 | reg = i915_find_fence_reg(dev, pipelined); | ||
2539 | if (reg == NULL) | ||
2540 | return -EDEADLK; | ||
2541 | |||
2542 | ret = i915_gem_object_flush_fence(obj, pipelined); | ||
2543 | if (ret) | ||
2544 | return ret; | ||
2545 | |||
2546 | if (reg->obj) { | ||
2547 | struct drm_i915_gem_object *old = reg->obj; | ||
2548 | |||
2549 | drm_gem_object_reference(&old->base); | ||
2550 | |||
2551 | if (old->tiling_mode) | ||
2552 | i915_gem_release_mmap(old); | ||
2553 | 2489 | ||
2554 | ret = i915_gem_object_flush_fence(old, pipelined); | 2490 | i915_gem_object_fence_lost(old); |
2555 | if (ret) { | ||
2556 | drm_gem_object_unreference(&old->base); | ||
2557 | return ret; | ||
2558 | } | 2491 | } |
2492 | } else | ||
2493 | return 0; | ||
2559 | 2494 | ||
2560 | if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) | 2495 | i915_gem_object_update_fence(obj, reg, enable); |
2561 | pipelined = NULL; | 2496 | obj->fence_dirty = false; |
2562 | |||
2563 | old->fence_reg = I915_FENCE_REG_NONE; | ||
2564 | old->last_fenced_ring = pipelined; | ||
2565 | old->last_fenced_seqno = | ||
2566 | pipelined ? i915_gem_next_request_seqno(pipelined) : 0; | ||
2567 | |||
2568 | drm_gem_object_unreference(&old->base); | ||
2569 | } else if (obj->last_fenced_seqno == 0) | ||
2570 | pipelined = NULL; | ||
2571 | |||
2572 | reg->obj = obj; | ||
2573 | list_move_tail(®->lru_list, &dev_priv->mm.fence_list); | ||
2574 | obj->fence_reg = reg - dev_priv->fence_regs; | ||
2575 | obj->last_fenced_ring = pipelined; | ||
2576 | |||
2577 | reg->setup_seqno = | ||
2578 | pipelined ? i915_gem_next_request_seqno(pipelined) : 0; | ||
2579 | obj->last_fenced_seqno = reg->setup_seqno; | ||
2580 | |||
2581 | update: | ||
2582 | obj->tiling_changed = false; | ||
2583 | switch (INTEL_INFO(dev)->gen) { | ||
2584 | case 7: | ||
2585 | case 6: | ||
2586 | ret = sandybridge_write_fence_reg(obj, pipelined); | ||
2587 | break; | ||
2588 | case 5: | ||
2589 | case 4: | ||
2590 | ret = i965_write_fence_reg(obj, pipelined); | ||
2591 | break; | ||
2592 | case 3: | ||
2593 | ret = i915_write_fence_reg(obj, pipelined); | ||
2594 | break; | ||
2595 | case 2: | ||
2596 | ret = i830_write_fence_reg(obj, pipelined); | ||
2597 | break; | ||
2598 | } | ||
2599 | |||
2600 | return ret; | ||
2601 | } | ||
2602 | |||
2603 | /** | ||
2604 | * i915_gem_clear_fence_reg - clear out fence register info | ||
2605 | * @obj: object to clear | ||
2606 | * | ||
2607 | * Zeroes out the fence register itself and clears out the associated | ||
2608 | * data structures in dev_priv and obj. | ||
2609 | */ | ||
2610 | static void | ||
2611 | i915_gem_clear_fence_reg(struct drm_device *dev, | ||
2612 | struct drm_i915_fence_reg *reg) | ||
2613 | { | ||
2614 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
2615 | uint32_t fence_reg = reg - dev_priv->fence_regs; | ||
2616 | |||
2617 | switch (INTEL_INFO(dev)->gen) { | ||
2618 | case 7: | ||
2619 | case 6: | ||
2620 | I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0); | ||
2621 | break; | ||
2622 | case 5: | ||
2623 | case 4: | ||
2624 | I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0); | ||
2625 | break; | ||
2626 | case 3: | ||
2627 | if (fence_reg >= 8) | ||
2628 | fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4; | ||
2629 | else | ||
2630 | case 2: | ||
2631 | fence_reg = FENCE_REG_830_0 + fence_reg * 4; | ||
2632 | |||
2633 | I915_WRITE(fence_reg, 0); | ||
2634 | break; | ||
2635 | } | ||
2636 | 2497 | ||
2637 | list_del_init(®->lru_list); | 2498 | return 0; |
2638 | reg->obj = NULL; | ||
2639 | reg->setup_seqno = 0; | ||
2640 | reg->pin_count = 0; | ||
2641 | } | 2499 | } |
2642 | 2500 | ||
2643 | /** | 2501 | /** |
@@ -2749,7 +2607,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, | |||
2749 | return ret; | 2607 | return ret; |
2750 | } | 2608 | } |
2751 | 2609 | ||
2752 | ret = i915_gem_gtt_bind_object(obj); | 2610 | ret = i915_gem_gtt_prepare_object(obj); |
2753 | if (ret) { | 2611 | if (ret) { |
2754 | i915_gem_object_put_pages_gtt(obj); | 2612 | i915_gem_object_put_pages_gtt(obj); |
2755 | drm_mm_put_block(obj->gtt_space); | 2613 | drm_mm_put_block(obj->gtt_space); |
@@ -2761,6 +2619,9 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, | |||
2761 | goto search_free; | 2619 | goto search_free; |
2762 | } | 2620 | } |
2763 | 2621 | ||
2622 | if (!dev_priv->mm.aliasing_ppgtt) | ||
2623 | i915_gem_gtt_bind_object(obj, obj->cache_level); | ||
2624 | |||
2764 | list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); | 2625 | list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); |
2765 | list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); | 2626 | list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); |
2766 | 2627 | ||
@@ -2878,6 +2739,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) | |||
2878 | int | 2739 | int |
2879 | i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) | 2740 | i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) |
2880 | { | 2741 | { |
2742 | drm_i915_private_t *dev_priv = obj->base.dev->dev_private; | ||
2881 | uint32_t old_write_domain, old_read_domains; | 2743 | uint32_t old_write_domain, old_read_domains; |
2882 | int ret; | 2744 | int ret; |
2883 | 2745 | ||
@@ -2918,6 +2780,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) | |||
2918 | old_read_domains, | 2780 | old_read_domains, |
2919 | old_write_domain); | 2781 | old_write_domain); |
2920 | 2782 | ||
2783 | /* And bump the LRU for this access */ | ||
2784 | if (i915_gem_object_is_inactive(obj)) | ||
2785 | list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); | ||
2786 | |||
2921 | return 0; | 2787 | return 0; |
2922 | } | 2788 | } |
2923 | 2789 | ||
@@ -2953,7 +2819,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, | |||
2953 | return ret; | 2819 | return ret; |
2954 | } | 2820 | } |
2955 | 2821 | ||
2956 | i915_gem_gtt_rebind_object(obj, cache_level); | 2822 | if (obj->has_global_gtt_mapping) |
2823 | i915_gem_gtt_bind_object(obj, cache_level); | ||
2957 | if (obj->has_aliasing_ppgtt_mapping) | 2824 | if (obj->has_aliasing_ppgtt_mapping) |
2958 | i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, | 2825 | i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, |
2959 | obj, cache_level); | 2826 | obj, cache_level); |
@@ -2990,11 +2857,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, | |||
2990 | * Prepare buffer for display plane (scanout, cursors, etc). | 2857 | * Prepare buffer for display plane (scanout, cursors, etc). |
2991 | * Can be called from an uninterruptible phase (modesetting) and allows | 2858 | * Can be called from an uninterruptible phase (modesetting) and allows |
2992 | * any flushes to be pipelined (for pageflips). | 2859 | * any flushes to be pipelined (for pageflips). |
2993 | * | ||
2994 | * For the display plane, we want to be in the GTT but out of any write | ||
2995 | * domains. So in many ways this looks like set_to_gtt_domain() apart from the | ||
2996 | * ability to pipeline the waits, pinning and any additional subtleties | ||
2997 | * that may differentiate the display plane from ordinary buffers. | ||
2998 | */ | 2860 | */ |
2999 | int | 2861 | int |
3000 | i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, | 2862 | i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, |
@@ -3009,8 +2871,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, | |||
3009 | return ret; | 2871 | return ret; |
3010 | 2872 | ||
3011 | if (pipelined != obj->ring) { | 2873 | if (pipelined != obj->ring) { |
3012 | ret = i915_gem_object_wait_rendering(obj); | 2874 | ret = i915_gem_object_sync(obj, pipelined); |
3013 | if (ret == -ERESTARTSYS) | 2875 | if (ret) |
3014 | return ret; | 2876 | return ret; |
3015 | } | 2877 | } |
3016 | 2878 | ||
@@ -3082,7 +2944,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) | |||
3082 | * This function returns when the move is complete, including waiting on | 2944 | * This function returns when the move is complete, including waiting on |
3083 | * flushes to occur. | 2945 | * flushes to occur. |
3084 | */ | 2946 | */ |
3085 | static int | 2947 | int |
3086 | i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) | 2948 | i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) |
3087 | { | 2949 | { |
3088 | uint32_t old_write_domain, old_read_domains; | 2950 | uint32_t old_write_domain, old_read_domains; |
@@ -3095,17 +2957,14 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) | |||
3095 | if (ret) | 2957 | if (ret) |
3096 | return ret; | 2958 | return ret; |
3097 | 2959 | ||
3098 | ret = i915_gem_object_wait_rendering(obj); | 2960 | if (write || obj->pending_gpu_write) { |
3099 | if (ret) | 2961 | ret = i915_gem_object_wait_rendering(obj); |
3100 | return ret; | 2962 | if (ret) |
2963 | return ret; | ||
2964 | } | ||
3101 | 2965 | ||
3102 | i915_gem_object_flush_gtt_write_domain(obj); | 2966 | i915_gem_object_flush_gtt_write_domain(obj); |
3103 | 2967 | ||
3104 | /* If we have a partially-valid cache of the object in the CPU, | ||
3105 | * finish invalidating it and free the per-page flags. | ||
3106 | */ | ||
3107 | i915_gem_object_set_to_full_cpu_read_domain(obj); | ||
3108 | |||
3109 | old_write_domain = obj->base.write_domain; | 2968 | old_write_domain = obj->base.write_domain; |
3110 | old_read_domains = obj->base.read_domains; | 2969 | old_read_domains = obj->base.read_domains; |
3111 | 2970 | ||
@@ -3136,113 +2995,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) | |||
3136 | return 0; | 2995 | return 0; |
3137 | } | 2996 | } |
3138 | 2997 | ||
3139 | /** | ||
3140 | * Moves the object from a partially CPU read to a full one. | ||
3141 | * | ||
3142 | * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), | ||
3143 | * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). | ||
3144 | */ | ||
3145 | static void | ||
3146 | i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj) | ||
3147 | { | ||
3148 | if (!obj->page_cpu_valid) | ||
3149 | return; | ||
3150 | |||
3151 | /* If we're partially in the CPU read domain, finish moving it in. | ||
3152 | */ | ||
3153 | if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) { | ||
3154 | int i; | ||
3155 | |||
3156 | for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) { | ||
3157 | if (obj->page_cpu_valid[i]) | ||
3158 | continue; | ||
3159 | drm_clflush_pages(obj->pages + i, 1); | ||
3160 | } | ||
3161 | } | ||
3162 | |||
3163 | /* Free the page_cpu_valid mappings which are now stale, whether | ||
3164 | * or not we've got I915_GEM_DOMAIN_CPU. | ||
3165 | */ | ||
3166 | kfree(obj->page_cpu_valid); | ||
3167 | obj->page_cpu_valid = NULL; | ||
3168 | } | ||
3169 | |||
3170 | /** | ||
3171 | * Set the CPU read domain on a range of the object. | ||
3172 | * | ||
3173 | * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's | ||
3174 | * not entirely valid. The page_cpu_valid member of the object flags which | ||
3175 | * pages have been flushed, and will be respected by | ||
3176 | * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping | ||
3177 | * of the whole object. | ||
3178 | * | ||
3179 | * This function returns when the move is complete, including waiting on | ||
3180 | * flushes to occur. | ||
3181 | */ | ||
3182 | static int | ||
3183 | i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj, | ||
3184 | uint64_t offset, uint64_t size) | ||
3185 | { | ||
3186 | uint32_t old_read_domains; | ||
3187 | int i, ret; | ||
3188 | |||
3189 | if (offset == 0 && size == obj->base.size) | ||
3190 | return i915_gem_object_set_to_cpu_domain(obj, 0); | ||
3191 | |||
3192 | ret = i915_gem_object_flush_gpu_write_domain(obj); | ||
3193 | if (ret) | ||
3194 | return ret; | ||
3195 | |||
3196 | ret = i915_gem_object_wait_rendering(obj); | ||
3197 | if (ret) | ||
3198 | return ret; | ||
3199 | |||
3200 | i915_gem_object_flush_gtt_write_domain(obj); | ||
3201 | |||
3202 | /* If we're already fully in the CPU read domain, we're done. */ | ||
3203 | if (obj->page_cpu_valid == NULL && | ||
3204 | (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0) | ||
3205 | return 0; | ||
3206 | |||
3207 | /* Otherwise, create/clear the per-page CPU read domain flag if we're | ||
3208 | * newly adding I915_GEM_DOMAIN_CPU | ||
3209 | */ | ||
3210 | if (obj->page_cpu_valid == NULL) { | ||
3211 | obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE, | ||
3212 | GFP_KERNEL); | ||
3213 | if (obj->page_cpu_valid == NULL) | ||
3214 | return -ENOMEM; | ||
3215 | } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) | ||
3216 | memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE); | ||
3217 | |||
3218 | /* Flush the cache on any pages that are still invalid from the CPU's | ||
3219 | * perspective. | ||
3220 | */ | ||
3221 | for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; | ||
3222 | i++) { | ||
3223 | if (obj->page_cpu_valid[i]) | ||
3224 | continue; | ||
3225 | |||
3226 | drm_clflush_pages(obj->pages + i, 1); | ||
3227 | |||
3228 | obj->page_cpu_valid[i] = 1; | ||
3229 | } | ||
3230 | |||
3231 | /* It should now be out of any other write domains, and we can update | ||
3232 | * the domain values for our changes. | ||
3233 | */ | ||
3234 | BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); | ||
3235 | |||
3236 | old_read_domains = obj->base.read_domains; | ||
3237 | obj->base.read_domains |= I915_GEM_DOMAIN_CPU; | ||
3238 | |||
3239 | trace_i915_gem_object_change_domain(obj, | ||
3240 | old_read_domains, | ||
3241 | obj->base.write_domain); | ||
3242 | |||
3243 | return 0; | ||
3244 | } | ||
3245 | |||
3246 | /* Throttle our rendering by waiting until the ring has completed our requests | 2998 | /* Throttle our rendering by waiting until the ring has completed our requests |
3247 | * emitted over 20 msec ago. | 2999 | * emitted over 20 msec ago. |
3248 | * | 3000 | * |
@@ -3280,28 +3032,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) | |||
3280 | if (seqno == 0) | 3032 | if (seqno == 0) |
3281 | return 0; | 3033 | return 0; |
3282 | 3034 | ||
3283 | ret = 0; | 3035 | ret = __wait_seqno(ring, seqno, true); |
3284 | if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) { | ||
3285 | /* And wait for the seqno passing without holding any locks and | ||
3286 | * causing extra latency for others. This is safe as the irq | ||
3287 | * generation is designed to be run atomically and so is | ||
3288 | * lockless. | ||
3289 | */ | ||
3290 | if (ring->irq_get(ring)) { | ||
3291 | ret = wait_event_interruptible(ring->irq_queue, | ||
3292 | i915_seqno_passed(ring->get_seqno(ring), seqno) | ||
3293 | || atomic_read(&dev_priv->mm.wedged)); | ||
3294 | ring->irq_put(ring); | ||
3295 | |||
3296 | if (ret == 0 && atomic_read(&dev_priv->mm.wedged)) | ||
3297 | ret = -EIO; | ||
3298 | } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring), | ||
3299 | seqno) || | ||
3300 | atomic_read(&dev_priv->mm.wedged), 3000)) { | ||
3301 | ret = -EBUSY; | ||
3302 | } | ||
3303 | } | ||
3304 | |||
3305 | if (ret == 0) | 3036 | if (ret == 0) |
3306 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); | 3037 | queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0); |
3307 | 3038 | ||
@@ -3313,12 +3044,9 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, | |||
3313 | uint32_t alignment, | 3044 | uint32_t alignment, |
3314 | bool map_and_fenceable) | 3045 | bool map_and_fenceable) |
3315 | { | 3046 | { |
3316 | struct drm_device *dev = obj->base.dev; | ||
3317 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
3318 | int ret; | 3047 | int ret; |
3319 | 3048 | ||
3320 | BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); | 3049 | BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT); |
3321 | WARN_ON(i915_verify_lists(dev)); | ||
3322 | 3050 | ||
3323 | if (obj->gtt_space != NULL) { | 3051 | if (obj->gtt_space != NULL) { |
3324 | if ((alignment && obj->gtt_offset & (alignment - 1)) || | 3052 | if ((alignment && obj->gtt_offset & (alignment - 1)) || |
@@ -3343,34 +3071,23 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj, | |||
3343 | return ret; | 3071 | return ret; |
3344 | } | 3072 | } |
3345 | 3073 | ||
3346 | if (obj->pin_count++ == 0) { | 3074 | if (!obj->has_global_gtt_mapping && map_and_fenceable) |
3347 | if (!obj->active) | 3075 | i915_gem_gtt_bind_object(obj, obj->cache_level); |
3348 | list_move_tail(&obj->mm_list, | 3076 | |
3349 | &dev_priv->mm.pinned_list); | 3077 | obj->pin_count++; |
3350 | } | ||
3351 | obj->pin_mappable |= map_and_fenceable; | 3078 | obj->pin_mappable |= map_and_fenceable; |
3352 | 3079 | ||
3353 | WARN_ON(i915_verify_lists(dev)); | ||
3354 | return 0; | 3080 | return 0; |
3355 | } | 3081 | } |
3356 | 3082 | ||
3357 | void | 3083 | void |
3358 | i915_gem_object_unpin(struct drm_i915_gem_object *obj) | 3084 | i915_gem_object_unpin(struct drm_i915_gem_object *obj) |
3359 | { | 3085 | { |
3360 | struct drm_device *dev = obj->base.dev; | ||
3361 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
3362 | |||
3363 | WARN_ON(i915_verify_lists(dev)); | ||
3364 | BUG_ON(obj->pin_count == 0); | 3086 | BUG_ON(obj->pin_count == 0); |
3365 | BUG_ON(obj->gtt_space == NULL); | 3087 | BUG_ON(obj->gtt_space == NULL); |
3366 | 3088 | ||
3367 | if (--obj->pin_count == 0) { | 3089 | if (--obj->pin_count == 0) |
3368 | if (!obj->active) | ||
3369 | list_move_tail(&obj->mm_list, | ||
3370 | &dev_priv->mm.inactive_list); | ||
3371 | obj->pin_mappable = false; | 3090 | obj->pin_mappable = false; |
3372 | } | ||
3373 | WARN_ON(i915_verify_lists(dev)); | ||
3374 | } | 3091 | } |
3375 | 3092 | ||
3376 | int | 3093 | int |
@@ -3494,20 +3211,9 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, | |||
3494 | if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { | 3211 | if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { |
3495 | ret = i915_gem_flush_ring(obj->ring, | 3212 | ret = i915_gem_flush_ring(obj->ring, |
3496 | 0, obj->base.write_domain); | 3213 | 0, obj->base.write_domain); |
3497 | } else if (obj->ring->outstanding_lazy_request == | 3214 | } else { |
3498 | obj->last_rendering_seqno) { | 3215 | ret = i915_gem_check_olr(obj->ring, |
3499 | struct drm_i915_gem_request *request; | 3216 | obj->last_rendering_seqno); |
3500 | |||
3501 | /* This ring is not being cleared by active usage, | ||
3502 | * so emit a request to do so. | ||
3503 | */ | ||
3504 | request = kzalloc(sizeof(*request), GFP_KERNEL); | ||
3505 | if (request) { | ||
3506 | ret = i915_add_request(obj->ring, NULL, request); | ||
3507 | if (ret) | ||
3508 | kfree(request); | ||
3509 | } else | ||
3510 | ret = -ENOMEM; | ||
3511 | } | 3217 | } |
3512 | 3218 | ||
3513 | /* Update the active list for the hardware's current position. | 3219 | /* Update the active list for the hardware's current position. |
@@ -3643,46 +3349,42 @@ int i915_gem_init_object(struct drm_gem_object *obj) | |||
3643 | return 0; | 3349 | return 0; |
3644 | } | 3350 | } |
3645 | 3351 | ||
3646 | static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj) | 3352 | void i915_gem_free_object(struct drm_gem_object *gem_obj) |
3647 | { | 3353 | { |
3354 | struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); | ||
3648 | struct drm_device *dev = obj->base.dev; | 3355 | struct drm_device *dev = obj->base.dev; |
3649 | drm_i915_private_t *dev_priv = dev->dev_private; | 3356 | drm_i915_private_t *dev_priv = dev->dev_private; |
3650 | int ret; | ||
3651 | |||
3652 | ret = i915_gem_object_unbind(obj); | ||
3653 | if (ret == -ERESTARTSYS) { | ||
3654 | list_move(&obj->mm_list, | ||
3655 | &dev_priv->mm.deferred_free_list); | ||
3656 | return; | ||
3657 | } | ||
3658 | 3357 | ||
3659 | trace_i915_gem_object_destroy(obj); | 3358 | trace_i915_gem_object_destroy(obj); |
3660 | 3359 | ||
3360 | if (gem_obj->import_attach) | ||
3361 | drm_prime_gem_destroy(gem_obj, obj->sg_table); | ||
3362 | |||
3363 | if (obj->phys_obj) | ||
3364 | i915_gem_detach_phys_object(dev, obj); | ||
3365 | |||
3366 | obj->pin_count = 0; | ||
3367 | if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) { | ||
3368 | bool was_interruptible; | ||
3369 | |||
3370 | was_interruptible = dev_priv->mm.interruptible; | ||
3371 | dev_priv->mm.interruptible = false; | ||
3372 | |||
3373 | WARN_ON(i915_gem_object_unbind(obj)); | ||
3374 | |||
3375 | dev_priv->mm.interruptible = was_interruptible; | ||
3376 | } | ||
3377 | |||
3661 | if (obj->base.map_list.map) | 3378 | if (obj->base.map_list.map) |
3662 | drm_gem_free_mmap_offset(&obj->base); | 3379 | drm_gem_free_mmap_offset(&obj->base); |
3663 | 3380 | ||
3664 | drm_gem_object_release(&obj->base); | 3381 | drm_gem_object_release(&obj->base); |
3665 | i915_gem_info_remove_obj(dev_priv, obj->base.size); | 3382 | i915_gem_info_remove_obj(dev_priv, obj->base.size); |
3666 | 3383 | ||
3667 | kfree(obj->page_cpu_valid); | ||
3668 | kfree(obj->bit_17); | 3384 | kfree(obj->bit_17); |
3669 | kfree(obj); | 3385 | kfree(obj); |
3670 | } | 3386 | } |
3671 | 3387 | ||
3672 | void i915_gem_free_object(struct drm_gem_object *gem_obj) | ||
3673 | { | ||
3674 | struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); | ||
3675 | struct drm_device *dev = obj->base.dev; | ||
3676 | |||
3677 | while (obj->pin_count > 0) | ||
3678 | i915_gem_object_unpin(obj); | ||
3679 | |||
3680 | if (obj->phys_obj) | ||
3681 | i915_gem_detach_phys_object(dev, obj); | ||
3682 | |||
3683 | i915_gem_free_object_tail(obj); | ||
3684 | } | ||
3685 | |||
3686 | int | 3388 | int |
3687 | i915_gem_idle(struct drm_device *dev) | 3389 | i915_gem_idle(struct drm_device *dev) |
3688 | { | 3390 | { |
@@ -3696,20 +3398,16 @@ i915_gem_idle(struct drm_device *dev) | |||
3696 | return 0; | 3398 | return 0; |
3697 | } | 3399 | } |
3698 | 3400 | ||
3699 | ret = i915_gpu_idle(dev, true); | 3401 | ret = i915_gpu_idle(dev); |
3700 | if (ret) { | 3402 | if (ret) { |
3701 | mutex_unlock(&dev->struct_mutex); | 3403 | mutex_unlock(&dev->struct_mutex); |
3702 | return ret; | 3404 | return ret; |
3703 | } | 3405 | } |
3406 | i915_gem_retire_requests(dev); | ||
3704 | 3407 | ||
3705 | /* Under UMS, be paranoid and evict. */ | 3408 | /* Under UMS, be paranoid and evict. */ |
3706 | if (!drm_core_check_feature(dev, DRIVER_MODESET)) { | 3409 | if (!drm_core_check_feature(dev, DRIVER_MODESET)) |
3707 | ret = i915_gem_evict_inactive(dev, false); | 3410 | i915_gem_evict_everything(dev, false); |
3708 | if (ret) { | ||
3709 | mutex_unlock(&dev->struct_mutex); | ||
3710 | return ret; | ||
3711 | } | ||
3712 | } | ||
3713 | 3411 | ||
3714 | i915_gem_reset_fences(dev); | 3412 | i915_gem_reset_fences(dev); |
3715 | 3413 | ||
@@ -3747,9 +3445,9 @@ void i915_gem_init_swizzling(struct drm_device *dev) | |||
3747 | 3445 | ||
3748 | I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); | 3446 | I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); |
3749 | if (IS_GEN6(dev)) | 3447 | if (IS_GEN6(dev)) |
3750 | I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB)); | 3448 | I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); |
3751 | else | 3449 | else |
3752 | I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB)); | 3450 | I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); |
3753 | } | 3451 | } |
3754 | 3452 | ||
3755 | void i915_gem_init_ppgtt(struct drm_device *dev) | 3453 | void i915_gem_init_ppgtt(struct drm_device *dev) |
@@ -3787,21 +3485,27 @@ void i915_gem_init_ppgtt(struct drm_device *dev) | |||
3787 | pd_offset <<= 16; | 3485 | pd_offset <<= 16; |
3788 | 3486 | ||
3789 | if (INTEL_INFO(dev)->gen == 6) { | 3487 | if (INTEL_INFO(dev)->gen == 6) { |
3790 | uint32_t ecochk = I915_READ(GAM_ECOCHK); | 3488 | uint32_t ecochk, gab_ctl, ecobits; |
3489 | |||
3490 | ecobits = I915_READ(GAC_ECO_BITS); | ||
3491 | I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); | ||
3492 | |||
3493 | gab_ctl = I915_READ(GAB_CTL); | ||
3494 | I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); | ||
3495 | |||
3496 | ecochk = I915_READ(GAM_ECOCHK); | ||
3791 | I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | | 3497 | I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | |
3792 | ECOCHK_PPGTT_CACHE64B); | 3498 | ECOCHK_PPGTT_CACHE64B); |
3793 | I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); | 3499 | I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
3794 | } else if (INTEL_INFO(dev)->gen >= 7) { | 3500 | } else if (INTEL_INFO(dev)->gen >= 7) { |
3795 | I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); | 3501 | I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); |
3796 | /* GFX_MODE is per-ring on gen7+ */ | 3502 | /* GFX_MODE is per-ring on gen7+ */ |
3797 | } | 3503 | } |
3798 | 3504 | ||
3799 | for (i = 0; i < I915_NUM_RINGS; i++) { | 3505 | for_each_ring(ring, dev_priv, i) { |
3800 | ring = &dev_priv->ring[i]; | ||
3801 | |||
3802 | if (INTEL_INFO(dev)->gen >= 7) | 3506 | if (INTEL_INFO(dev)->gen >= 7) |
3803 | I915_WRITE(RING_MODE_GEN7(ring), | 3507 | I915_WRITE(RING_MODE_GEN7(ring), |
3804 | GFX_MODE_ENABLE(GFX_PPGTT_ENABLE)); | 3508 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
3805 | 3509 | ||
3806 | I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); | 3510 | I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); |
3807 | I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); | 3511 | I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); |
@@ -3845,14 +3549,80 @@ cleanup_render_ring: | |||
3845 | return ret; | 3549 | return ret; |
3846 | } | 3550 | } |
3847 | 3551 | ||
3552 | static bool | ||
3553 | intel_enable_ppgtt(struct drm_device *dev) | ||
3554 | { | ||
3555 | if (i915_enable_ppgtt >= 0) | ||
3556 | return i915_enable_ppgtt; | ||
3557 | |||
3558 | #ifdef CONFIG_INTEL_IOMMU | ||
3559 | /* Disable ppgtt on SNB if VT-d is on. */ | ||
3560 | if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) | ||
3561 | return false; | ||
3562 | #endif | ||
3563 | |||
3564 | return true; | ||
3565 | } | ||
3566 | |||
3567 | int i915_gem_init(struct drm_device *dev) | ||
3568 | { | ||
3569 | struct drm_i915_private *dev_priv = dev->dev_private; | ||
3570 | unsigned long gtt_size, mappable_size; | ||
3571 | int ret; | ||
3572 | |||
3573 | gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; | ||
3574 | mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; | ||
3575 | |||
3576 | mutex_lock(&dev->struct_mutex); | ||
3577 | if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { | ||
3578 | /* PPGTT pdes are stolen from global gtt ptes, so shrink the | ||
3579 | * aperture accordingly when using aliasing ppgtt. */ | ||
3580 | gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; | ||
3581 | |||
3582 | i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); | ||
3583 | |||
3584 | ret = i915_gem_init_aliasing_ppgtt(dev); | ||
3585 | if (ret) { | ||
3586 | mutex_unlock(&dev->struct_mutex); | ||
3587 | return ret; | ||
3588 | } | ||
3589 | } else { | ||
3590 | /* Let GEM Manage all of the aperture. | ||
3591 | * | ||
3592 | * However, leave one page at the end still bound to the scratch | ||
3593 | * page. There are a number of places where the hardware | ||
3594 | * apparently prefetches past the end of the object, and we've | ||
3595 | * seen multiple hangs with the GPU head pointer stuck in a | ||
3596 | * batchbuffer bound at the last page of the aperture. One page | ||
3597 | * should be enough to keep any prefetching inside of the | ||
3598 | * aperture. | ||
3599 | */ | ||
3600 | i915_gem_init_global_gtt(dev, 0, mappable_size, | ||
3601 | gtt_size); | ||
3602 | } | ||
3603 | |||
3604 | ret = i915_gem_init_hw(dev); | ||
3605 | mutex_unlock(&dev->struct_mutex); | ||
3606 | if (ret) { | ||
3607 | i915_gem_cleanup_aliasing_ppgtt(dev); | ||
3608 | return ret; | ||
3609 | } | ||
3610 | |||
3611 | /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ | ||
3612 | if (!drm_core_check_feature(dev, DRIVER_MODESET)) | ||
3613 | dev_priv->dri1.allow_batchbuffer = 1; | ||
3614 | return 0; | ||
3615 | } | ||
3616 | |||
3848 | void | 3617 | void |
3849 | i915_gem_cleanup_ringbuffer(struct drm_device *dev) | 3618 | i915_gem_cleanup_ringbuffer(struct drm_device *dev) |
3850 | { | 3619 | { |
3851 | drm_i915_private_t *dev_priv = dev->dev_private; | 3620 | drm_i915_private_t *dev_priv = dev->dev_private; |
3621 | struct intel_ring_buffer *ring; | ||
3852 | int i; | 3622 | int i; |
3853 | 3623 | ||
3854 | for (i = 0; i < I915_NUM_RINGS; i++) | 3624 | for_each_ring(ring, dev_priv, i) |
3855 | intel_cleanup_ring_buffer(&dev_priv->ring[i]); | 3625 | intel_cleanup_ring_buffer(ring); |
3856 | } | 3626 | } |
3857 | 3627 | ||
3858 | int | 3628 | int |
@@ -3860,7 +3630,7 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, | |||
3860 | struct drm_file *file_priv) | 3630 | struct drm_file *file_priv) |
3861 | { | 3631 | { |
3862 | drm_i915_private_t *dev_priv = dev->dev_private; | 3632 | drm_i915_private_t *dev_priv = dev->dev_private; |
3863 | int ret, i; | 3633 | int ret; |
3864 | 3634 | ||
3865 | if (drm_core_check_feature(dev, DRIVER_MODESET)) | 3635 | if (drm_core_check_feature(dev, DRIVER_MODESET)) |
3866 | return 0; | 3636 | return 0; |
@@ -3882,10 +3652,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, | |||
3882 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); | 3652 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); |
3883 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); | 3653 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); |
3884 | BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); | 3654 | BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); |
3885 | for (i = 0; i < I915_NUM_RINGS; i++) { | ||
3886 | BUG_ON(!list_empty(&dev_priv->ring[i].active_list)); | ||
3887 | BUG_ON(!list_empty(&dev_priv->ring[i].request_list)); | ||
3888 | } | ||
3889 | mutex_unlock(&dev->struct_mutex); | 3655 | mutex_unlock(&dev->struct_mutex); |
3890 | 3656 | ||
3891 | ret = drm_irq_install(dev); | 3657 | ret = drm_irq_install(dev); |
@@ -3944,9 +3710,7 @@ i915_gem_load(struct drm_device *dev) | |||
3944 | INIT_LIST_HEAD(&dev_priv->mm.active_list); | 3710 | INIT_LIST_HEAD(&dev_priv->mm.active_list); |
3945 | INIT_LIST_HEAD(&dev_priv->mm.flushing_list); | 3711 | INIT_LIST_HEAD(&dev_priv->mm.flushing_list); |
3946 | INIT_LIST_HEAD(&dev_priv->mm.inactive_list); | 3712 | INIT_LIST_HEAD(&dev_priv->mm.inactive_list); |
3947 | INIT_LIST_HEAD(&dev_priv->mm.pinned_list); | ||
3948 | INIT_LIST_HEAD(&dev_priv->mm.fence_list); | 3713 | INIT_LIST_HEAD(&dev_priv->mm.fence_list); |
3949 | INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); | ||
3950 | INIT_LIST_HEAD(&dev_priv->mm.gtt_list); | 3714 | INIT_LIST_HEAD(&dev_priv->mm.gtt_list); |
3951 | for (i = 0; i < I915_NUM_RINGS; i++) | 3715 | for (i = 0; i < I915_NUM_RINGS; i++) |
3952 | init_ring_lists(&dev_priv->ring[i]); | 3716 | init_ring_lists(&dev_priv->ring[i]); |
@@ -3958,12 +3722,8 @@ i915_gem_load(struct drm_device *dev) | |||
3958 | 3722 | ||
3959 | /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ | 3723 | /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ |
3960 | if (IS_GEN3(dev)) { | 3724 | if (IS_GEN3(dev)) { |
3961 | u32 tmp = I915_READ(MI_ARB_STATE); | 3725 | I915_WRITE(MI_ARB_STATE, |
3962 | if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) { | 3726 | _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); |
3963 | /* arb state is a masked write, so set bit + bit in mask */ | ||
3964 | tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT); | ||
3965 | I915_WRITE(MI_ARB_STATE, tmp); | ||
3966 | } | ||
3967 | } | 3727 | } |
3968 | 3728 | ||
3969 | dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; | 3729 | dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; |
@@ -3978,9 +3738,7 @@ i915_gem_load(struct drm_device *dev) | |||
3978 | dev_priv->num_fence_regs = 8; | 3738 | dev_priv->num_fence_regs = 8; |
3979 | 3739 | ||
3980 | /* Initialize fence registers to zero */ | 3740 | /* Initialize fence registers to zero */ |
3981 | for (i = 0; i < dev_priv->num_fence_regs; i++) { | 3741 | i915_gem_reset_fences(dev); |
3982 | i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]); | ||
3983 | } | ||
3984 | 3742 | ||
3985 | i915_gem_detect_bit_6_swizzle(dev); | 3743 | i915_gem_detect_bit_6_swizzle(dev); |
3986 | init_waitqueue_head(&dev_priv->pending_flip_queue); | 3744 | init_waitqueue_head(&dev_priv->pending_flip_queue); |
@@ -4268,7 +4026,7 @@ rescan: | |||
4268 | * This has a dramatic impact to reduce the number of | 4026 | * This has a dramatic impact to reduce the number of |
4269 | * OOM-killer events whilst running the GPU aggressively. | 4027 | * OOM-killer events whilst running the GPU aggressively. |
4270 | */ | 4028 | */ |
4271 | if (i915_gpu_idle(dev, true) == 0) | 4029 | if (i915_gpu_idle(dev) == 0) |
4272 | goto rescan; | 4030 | goto rescan; |
4273 | } | 4031 | } |
4274 | mutex_unlock(&dev->struct_mutex); | 4032 | mutex_unlock(&dev->struct_mutex); |