aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-08-20 05:40:46 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-08-21 08:34:11 -0400
commit6c085a728cf000ac1865d66f8c9b52935558b328 (patch)
tree416d56b284e9aba4da4aee1d6c6a8f5fa16a934e
parent225067eedf1f4d4065940232c9069fcb255206ee (diff)
drm/i915: Track unbound pages
When dealing with a working set larger than the GATT, or even the mappable aperture when touching through the GTT, we end up with evicting objects only to rebind them at a new offset again later. Moving an object into and out of the GTT requires clflushing the pages, thus causing a double-clflush penalty for rebinding. To avoid having to clflush on rebinding, we can track the pages as they are evicted from the GTT and only relinquish those pages on memory pressure. As usual, if it were not for the handling of out-of-memory condition and having to manually shrink our own bo caches, it would be a net reduction of code. Alas. Note: The patch also contains a few changes to the last-hope evict_everything logic in i916_gem_execbuffer.c - we no longer try to only evict the purgeable stuff in a first try (since that's superflous and only helps in OOM corner-cases, not fragmented-gtt trashing situations). Also, the extraction of the get_pages retry loop from bind_to_gtt (and other callsites) to get_pages should imo have been a separate patch. v2: Ditch the newly added put_pages (for unbound objects only) in i915_gem_reset. A quick irc discussion hasn't revealed any important reason for this, so if we need this, I'd like to have a git blame'able explanation for it. v3: Undo the s/drm_malloc_ab/kmalloc/ in get_pages that Chris noticed. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> [danvet: Split out code movements and rant a bit in the commit message with a few Notes. Done v2] Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c14
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h13
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c288
-rw-r--r--drivers/gpu/drm/i915/i915_gem_dmabuf.c20
-rw-r--r--drivers/gpu/drm/i915/i915_gem_evict.c13
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c9
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h10
9 files changed, 184 insertions, 189 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a18e93687b8..608d3ae2b5b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -211,7 +211,7 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
211 dev_priv->mm.object_memory); 211 dev_priv->mm.object_memory);
212 212
213 size = count = mappable_size = mappable_count = 0; 213 size = count = mappable_size = mappable_count = 0;
214 count_objects(&dev_priv->mm.gtt_list, gtt_list); 214 count_objects(&dev_priv->mm.bound_list, gtt_list);
215 seq_printf(m, "%u [%u] objects, %zu [%zu] bytes in gtt\n", 215 seq_printf(m, "%u [%u] objects, %zu [%zu] bytes in gtt\n",
216 count, mappable_count, size, mappable_size); 216 count, mappable_count, size, mappable_size);
217 217
@@ -225,8 +225,13 @@ static int i915_gem_object_info(struct seq_file *m, void* data)
225 seq_printf(m, " %u [%u] inactive objects, %zu [%zu] bytes\n", 225 seq_printf(m, " %u [%u] inactive objects, %zu [%zu] bytes\n",
226 count, mappable_count, size, mappable_size); 226 count, mappable_count, size, mappable_size);
227 227
228 size = count = 0;
229 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
230 size += obj->base.size, ++count;
231 seq_printf(m, "%u unbound objects, %zu bytes\n", count, size);
232
228 size = count = mappable_size = mappable_count = 0; 233 size = count = mappable_size = mappable_count = 0;
229 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 234 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
230 if (obj->fault_mappable) { 235 if (obj->fault_mappable) {
231 size += obj->gtt_space->size; 236 size += obj->gtt_space->size;
232 ++count; 237 ++count;
@@ -264,7 +269,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void* data)
264 return ret; 269 return ret;
265 270
266 total_obj_size = total_gtt_size = count = 0; 271 total_obj_size = total_gtt_size = count = 0;
267 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 272 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
268 if (list == PINNED_LIST && obj->pin_count == 0) 273 if (list == PINNED_LIST && obj->pin_count == 0)
269 continue; 274 continue;
270 275
@@ -526,7 +531,8 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
526 for (i = 0; i < dev_priv->num_fence_regs; i++) { 531 for (i = 0; i < dev_priv->num_fence_regs; i++) {
527 struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; 532 struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj;
528 533
529 seq_printf(m, "Fenced object[%2d] = ", i); 534 seq_printf(m, "Fence %d, pin count = %d, object = ",
535 i, dev_priv->fence_regs[i].pin_count);
530 if (obj == NULL) 536 if (obj == NULL)
531 seq_printf(m, "unused"); 537 seq_printf(m, "unused");
532 else 538 else
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ed3ba70923a..a2382a175d9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -685,7 +685,13 @@ typedef struct drm_i915_private {
685 struct drm_mm gtt_space; 685 struct drm_mm gtt_space;
686 /** List of all objects in gtt_space. Used to restore gtt 686 /** List of all objects in gtt_space. Used to restore gtt
687 * mappings on resume */ 687 * mappings on resume */
688 struct list_head gtt_list; 688 struct list_head bound_list;
689 /**
690 * List of objects which are not bound to the GTT (thus
691 * are idle and not used by the GPU) but still have
692 * (presumably uncached) pages still attached.
693 */
694 struct list_head unbound_list;
689 695
690 /** Usable portion of the GTT for GEM */ 696 /** Usable portion of the GTT for GEM */
691 unsigned long gtt_start; 697 unsigned long gtt_start;
@@ -1306,8 +1312,7 @@ int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
1306void i915_gem_release_mmap(struct drm_i915_gem_object *obj); 1312void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
1307void i915_gem_lastclose(struct drm_device *dev); 1313void i915_gem_lastclose(struct drm_device *dev);
1308 1314
1309int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1315int __must_check i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj);
1310 gfp_t gfpmask);
1311int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); 1316int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
1312int i915_gem_object_sync(struct drm_i915_gem_object *obj, 1317int i915_gem_object_sync(struct drm_i915_gem_object *obj,
1313 struct intel_ring_buffer *to); 1318 struct intel_ring_buffer *to);
@@ -1449,7 +1454,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
1449 unsigned alignment, 1454 unsigned alignment,
1450 unsigned cache_level, 1455 unsigned cache_level,
1451 bool mappable); 1456 bool mappable);
1452int i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only); 1457int i915_gem_evict_everything(struct drm_device *dev);
1453 1458
1454/* i915_gem_stolen.c */ 1459/* i915_gem_stolen.c */
1455int i915_gem_init_stolen(struct drm_device *dev); 1460int i915_gem_init_stolen(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0f70c2acfef..462a8f2ad3a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -55,6 +55,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
55 55
56static int i915_gem_inactive_shrink(struct shrinker *shrinker, 56static int i915_gem_inactive_shrink(struct shrinker *shrinker,
57 struct shrink_control *sc); 57 struct shrink_control *sc);
58static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
59static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
58static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); 60static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
59 61
60static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) 62static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
@@ -140,7 +142,7 @@ int i915_mutex_lock_interruptible(struct drm_device *dev)
140static inline bool 142static inline bool
141i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) 143i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
142{ 144{
143 return !obj->active; 145 return obj->gtt_space && !obj->active;
144} 146}
145 147
146int 148int
@@ -179,7 +181,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
179 181
180 pinned = 0; 182 pinned = 0;
181 mutex_lock(&dev->struct_mutex); 183 mutex_lock(&dev->struct_mutex);
182 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) 184 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
183 if (obj->pin_count) 185 if (obj->pin_count)
184 pinned += obj->gtt_space->size; 186 pinned += obj->gtt_space->size;
185 mutex_unlock(&dev->struct_mutex); 187 mutex_unlock(&dev->struct_mutex);
@@ -423,9 +425,11 @@ i915_gem_shmem_pread(struct drm_device *dev,
423 * anyway again before the next pread happens. */ 425 * anyway again before the next pread happens. */
424 if (obj->cache_level == I915_CACHE_NONE) 426 if (obj->cache_level == I915_CACHE_NONE)
425 needs_clflush = 1; 427 needs_clflush = 1;
426 ret = i915_gem_object_set_to_gtt_domain(obj, false); 428 if (obj->gtt_space) {
427 if (ret) 429 ret = i915_gem_object_set_to_gtt_domain(obj, false);
428 return ret; 430 if (ret)
431 return ret;
432 }
429 } 433 }
430 434
431 offset = args->offset; 435 offset = args->offset;
@@ -751,9 +755,11 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
751 * right away and we therefore have to clflush anyway. */ 755 * right away and we therefore have to clflush anyway. */
752 if (obj->cache_level == I915_CACHE_NONE) 756 if (obj->cache_level == I915_CACHE_NONE)
753 needs_clflush_after = 1; 757 needs_clflush_after = 1;
754 ret = i915_gem_object_set_to_gtt_domain(obj, true); 758 if (obj->gtt_space) {
755 if (ret) 759 ret = i915_gem_object_set_to_gtt_domain(obj, true);
756 return ret; 760 if (ret)
761 return ret;
762 }
757 } 763 }
758 /* Same trick applies for invalidate partially written cachelines before 764 /* Same trick applies for invalidate partially written cachelines before
759 * writing. */ 765 * writing. */
@@ -1366,17 +1372,28 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1366 return obj->madv == I915_MADV_DONTNEED; 1372 return obj->madv == I915_MADV_DONTNEED;
1367} 1373}
1368 1374
1369static void 1375static int
1370i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) 1376i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1371{ 1377{
1372 int page_count = obj->base.size / PAGE_SIZE; 1378 int page_count = obj->base.size / PAGE_SIZE;
1373 int i; 1379 int ret, i;
1374 1380
1375 if (!obj->pages) 1381 if (obj->pages == NULL)
1376 return; 1382 return 0;
1377 1383
1384 BUG_ON(obj->gtt_space);
1378 BUG_ON(obj->madv == __I915_MADV_PURGED); 1385 BUG_ON(obj->madv == __I915_MADV_PURGED);
1379 1386
1387 ret = i915_gem_object_set_to_cpu_domain(obj, true);
1388 if (ret) {
1389 /* In the event of a disaster, abandon all caches and
1390 * hope for the best.
1391 */
1392 WARN_ON(ret != -EIO);
1393 i915_gem_clflush_object(obj);
1394 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1395 }
1396
1380 if (i915_gem_object_needs_bit17_swizzle(obj)) 1397 if (i915_gem_object_needs_bit17_swizzle(obj))
1381 i915_gem_object_save_bit_17_swizzle(obj); 1398 i915_gem_object_save_bit_17_swizzle(obj);
1382 1399
@@ -1396,37 +1413,112 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1396 1413
1397 drm_free_large(obj->pages); 1414 drm_free_large(obj->pages);
1398 obj->pages = NULL; 1415 obj->pages = NULL;
1416
1417 list_del(&obj->gtt_list);
1418
1419 if (i915_gem_object_is_purgeable(obj))
1420 i915_gem_object_truncate(obj);
1421
1422 return 0;
1423}
1424
1425static long
1426i915_gem_purge(struct drm_i915_private *dev_priv, long target)
1427{
1428 struct drm_i915_gem_object *obj, *next;
1429 long count = 0;
1430
1431 list_for_each_entry_safe(obj, next,
1432 &dev_priv->mm.unbound_list,
1433 gtt_list) {
1434 if (i915_gem_object_is_purgeable(obj) &&
1435 i915_gem_object_put_pages_gtt(obj) == 0) {
1436 count += obj->base.size >> PAGE_SHIFT;
1437 if (count >= target)
1438 return count;
1439 }
1440 }
1441
1442 list_for_each_entry_safe(obj, next,
1443 &dev_priv->mm.inactive_list,
1444 mm_list) {
1445 if (i915_gem_object_is_purgeable(obj) &&
1446 i915_gem_object_unbind(obj) == 0 &&
1447 i915_gem_object_put_pages_gtt(obj) == 0) {
1448 count += obj->base.size >> PAGE_SHIFT;
1449 if (count >= target)
1450 return count;
1451 }
1452 }
1453
1454 return count;
1455}
1456
1457static void
1458i915_gem_shrink_all(struct drm_i915_private *dev_priv)
1459{
1460 struct drm_i915_gem_object *obj, *next;
1461
1462 i915_gem_evict_everything(dev_priv->dev);
1463
1464 list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list, gtt_list)
1465 i915_gem_object_put_pages_gtt(obj);
1399} 1466}
1400 1467
1401int 1468int
1402i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, 1469i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
1403 gfp_t gfpmask)
1404{ 1470{
1471 struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1405 int page_count, i; 1472 int page_count, i;
1406 struct address_space *mapping; 1473 struct address_space *mapping;
1407 struct inode *inode;
1408 struct page *page; 1474 struct page *page;
1475 gfp_t gfp;
1409 1476
1410 if (obj->pages || obj->sg_table) 1477 if (obj->pages || obj->sg_table)
1411 return 0; 1478 return 0;
1412 1479
1480 /* Assert that the object is not currently in any GPU domain. As it
1481 * wasn't in the GTT, there shouldn't be any way it could have been in
1482 * a GPU cache
1483 */
1484 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1485 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1486
1413 /* Get the list of pages out of our struct file. They'll be pinned 1487 /* Get the list of pages out of our struct file. They'll be pinned
1414 * at this point until we release them. 1488 * at this point until we release them.
1415 */ 1489 */
1416 page_count = obj->base.size / PAGE_SIZE; 1490 page_count = obj->base.size / PAGE_SIZE;
1417 BUG_ON(obj->pages != NULL);
1418 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *)); 1491 obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1419 if (obj->pages == NULL) 1492 if (obj->pages == NULL)
1420 return -ENOMEM; 1493 return -ENOMEM;
1421 1494
1422 inode = obj->base.filp->f_path.dentry->d_inode; 1495 /* Fail silently without starting the shrinker */
1423 mapping = inode->i_mapping; 1496 mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
1424 gfpmask |= mapping_gfp_mask(mapping); 1497 gfp = mapping_gfp_mask(mapping);
1425 1498 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
1499 gfp &= ~(__GFP_IO | __GFP_WAIT);
1426 for (i = 0; i < page_count; i++) { 1500 for (i = 0; i < page_count; i++) {
1427 page = shmem_read_mapping_page_gfp(mapping, i, gfpmask); 1501 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1428 if (IS_ERR(page)) 1502 if (IS_ERR(page)) {
1429 goto err_pages; 1503 i915_gem_purge(dev_priv, page_count);
1504 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1505 }
1506 if (IS_ERR(page)) {
1507 /* We've tried hard to allocate the memory by reaping
1508 * our own buffer, now let the real VM do its job and
1509 * go down in flames if truly OOM.
1510 */
1511 gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
1512 gfp |= __GFP_IO | __GFP_WAIT;
1513
1514 i915_gem_shrink_all(dev_priv);
1515 page = shmem_read_mapping_page_gfp(mapping, i, gfp);
1516 if (IS_ERR(page))
1517 goto err_pages;
1518
1519 gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
1520 gfp &= ~(__GFP_IO | __GFP_WAIT);
1521 }
1430 1522
1431 obj->pages[i] = page; 1523 obj->pages[i] = page;
1432 } 1524 }
@@ -1434,6 +1526,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1434 if (i915_gem_object_needs_bit17_swizzle(obj)) 1526 if (i915_gem_object_needs_bit17_swizzle(obj))
1435 i915_gem_object_do_bit_17_swizzle(obj); 1527 i915_gem_object_do_bit_17_swizzle(obj);
1436 1528
1529 list_add_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
1437 return 0; 1530 return 0;
1438 1531
1439err_pages: 1532err_pages:
@@ -1698,6 +1791,7 @@ void i915_gem_reset(struct drm_device *dev)
1698 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; 1791 obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1699 } 1792 }
1700 1793
1794
1701 /* The fence registers are invalidated so clear them out */ 1795 /* The fence registers are invalidated so clear them out */
1702 i915_gem_reset_fences(dev); 1796 i915_gem_reset_fences(dev);
1703} 1797}
@@ -2209,22 +2303,6 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2209 2303
2210 i915_gem_object_finish_gtt(obj); 2304 i915_gem_object_finish_gtt(obj);
2211 2305
2212 /* Move the object to the CPU domain to ensure that
2213 * any possible CPU writes while it's not in the GTT
2214 * are flushed when we go to remap it.
2215 */
2216 if (ret == 0)
2217 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2218 if (ret == -ERESTARTSYS)
2219 return ret;
2220 if (ret) {
2221 /* In the event of a disaster, abandon all caches and
2222 * hope for the best.
2223 */
2224 i915_gem_clflush_object(obj);
2225 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2226 }
2227
2228 /* release the fence reg _after_ flushing */ 2306 /* release the fence reg _after_ flushing */
2229 ret = i915_gem_object_put_fence(obj); 2307 ret = i915_gem_object_put_fence(obj);
2230 if (ret) 2308 if (ret)
@@ -2240,10 +2318,8 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2240 } 2318 }
2241 i915_gem_gtt_finish_object(obj); 2319 i915_gem_gtt_finish_object(obj);
2242 2320
2243 i915_gem_object_put_pages_gtt(obj); 2321 list_del(&obj->mm_list);
2244 2322 list_move_tail(&obj->gtt_list, &dev_priv->mm.unbound_list);
2245 list_del_init(&obj->gtt_list);
2246 list_del_init(&obj->mm_list);
2247 /* Avoid an unnecessary call to unbind on rebind. */ 2323 /* Avoid an unnecessary call to unbind on rebind. */
2248 obj->map_and_fenceable = true; 2324 obj->map_and_fenceable = true;
2249 2325
@@ -2251,10 +2327,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2251 obj->gtt_space = NULL; 2327 obj->gtt_space = NULL;
2252 obj->gtt_offset = 0; 2328 obj->gtt_offset = 0;
2253 2329
2254 if (i915_gem_object_is_purgeable(obj)) 2330 return 0;
2255 i915_gem_object_truncate(obj);
2256
2257 return ret;
2258} 2331}
2259 2332
2260static int i915_ring_idle(struct intel_ring_buffer *ring) 2333static int i915_ring_idle(struct intel_ring_buffer *ring)
@@ -2667,7 +2740,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2667 struct drm_device *dev = obj->base.dev; 2740 struct drm_device *dev = obj->base.dev;
2668 drm_i915_private_t *dev_priv = dev->dev_private; 2741 drm_i915_private_t *dev_priv = dev->dev_private;
2669 struct drm_mm_node *free_space; 2742 struct drm_mm_node *free_space;
2670 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2671 u32 size, fence_size, fence_alignment, unfenced_alignment; 2743 u32 size, fence_size, fence_alignment, unfenced_alignment;
2672 bool mappable, fenceable; 2744 bool mappable, fenceable;
2673 int ret; 2745 int ret;
@@ -2707,6 +2779,10 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2707 return -E2BIG; 2779 return -E2BIG;
2708 } 2780 }
2709 2781
2782 ret = i915_gem_object_get_pages_gtt(obj);
2783 if (ret)
2784 return ret;
2785
2710 search_free: 2786 search_free:
2711 if (map_and_fenceable) 2787 if (map_and_fenceable)
2712 free_space = 2788 free_space =
@@ -2733,9 +2809,6 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2733 false); 2809 false);
2734 } 2810 }
2735 if (obj->gtt_space == NULL) { 2811 if (obj->gtt_space == NULL) {
2736 /* If the gtt is empty and we're still having trouble
2737 * fitting our object in, we're out of memory.
2738 */
2739 ret = i915_gem_evict_something(dev, size, alignment, 2812 ret = i915_gem_evict_something(dev, size, alignment,
2740 obj->cache_level, 2813 obj->cache_level,
2741 map_and_fenceable); 2814 map_and_fenceable);
@@ -2752,55 +2825,20 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2752 return -EINVAL; 2825 return -EINVAL;
2753 } 2826 }
2754 2827
2755 ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2756 if (ret) {
2757 drm_mm_put_block(obj->gtt_space);
2758 obj->gtt_space = NULL;
2759
2760 if (ret == -ENOMEM) {
2761 /* first try to reclaim some memory by clearing the GTT */
2762 ret = i915_gem_evict_everything(dev, false);
2763 if (ret) {
2764 /* now try to shrink everyone else */
2765 if (gfpmask) {
2766 gfpmask = 0;
2767 goto search_free;
2768 }
2769
2770 return -ENOMEM;
2771 }
2772
2773 goto search_free;
2774 }
2775
2776 return ret;
2777 }
2778 2828
2779 ret = i915_gem_gtt_prepare_object(obj); 2829 ret = i915_gem_gtt_prepare_object(obj);
2780 if (ret) { 2830 if (ret) {
2781 i915_gem_object_put_pages_gtt(obj);
2782 drm_mm_put_block(obj->gtt_space); 2831 drm_mm_put_block(obj->gtt_space);
2783 obj->gtt_space = NULL; 2832 obj->gtt_space = NULL;
2784 2833 return ret;
2785 if (i915_gem_evict_everything(dev, false))
2786 return ret;
2787
2788 goto search_free;
2789 } 2834 }
2790 2835
2791 if (!dev_priv->mm.aliasing_ppgtt) 2836 if (!dev_priv->mm.aliasing_ppgtt)
2792 i915_gem_gtt_bind_object(obj, obj->cache_level); 2837 i915_gem_gtt_bind_object(obj, obj->cache_level);
2793 2838
2794 list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); 2839 list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
2795 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); 2840 list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2796 2841
2797 /* Assert that the object is not currently in any GPU domain. As it
2798 * wasn't in the GTT, there shouldn't be any way it could have been in
2799 * a GPU cache
2800 */
2801 BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2802 BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2803
2804 obj->gtt_offset = obj->gtt_space->start; 2842 obj->gtt_offset = obj->gtt_space->start;
2805 2843
2806 fenceable = 2844 fenceable =
@@ -3464,9 +3502,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3464 if (obj->madv != __I915_MADV_PURGED) 3502 if (obj->madv != __I915_MADV_PURGED)
3465 obj->madv = args->madv; 3503 obj->madv = args->madv;
3466 3504
3467 /* if the object is no longer bound, discard its backing storage */ 3505 /* if the object is no longer attached, discard its backing storage */
3468 if (i915_gem_object_is_purgeable(obj) && 3506 if (i915_gem_object_is_purgeable(obj) && obj->pages == NULL)
3469 obj->gtt_space == NULL)
3470 i915_gem_object_truncate(obj); 3507 i915_gem_object_truncate(obj);
3471 3508
3472 args->retained = obj->madv != __I915_MADV_PURGED; 3509 args->retained = obj->madv != __I915_MADV_PURGED;
@@ -3573,6 +3610,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
3573 dev_priv->mm.interruptible = was_interruptible; 3610 dev_priv->mm.interruptible = was_interruptible;
3574 } 3611 }
3575 3612
3613 i915_gem_object_put_pages_gtt(obj);
3576 if (obj->base.map_list.map) 3614 if (obj->base.map_list.map)
3577 drm_gem_free_mmap_offset(&obj->base); 3615 drm_gem_free_mmap_offset(&obj->base);
3578 3616
@@ -3605,7 +3643,7 @@ i915_gem_idle(struct drm_device *dev)
3605 3643
3606 /* Under UMS, be paranoid and evict. */ 3644 /* Under UMS, be paranoid and evict. */
3607 if (!drm_core_check_feature(dev, DRIVER_MODESET)) 3645 if (!drm_core_check_feature(dev, DRIVER_MODESET))
3608 i915_gem_evict_everything(dev, false); 3646 i915_gem_evict_everything(dev);
3609 3647
3610 i915_gem_reset_fences(dev); 3648 i915_gem_reset_fences(dev);
3611 3649
@@ -3963,8 +4001,9 @@ i915_gem_load(struct drm_device *dev)
3963 4001
3964 INIT_LIST_HEAD(&dev_priv->mm.active_list); 4002 INIT_LIST_HEAD(&dev_priv->mm.active_list);
3965 INIT_LIST_HEAD(&dev_priv->mm.inactive_list); 4003 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4004 INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4005 INIT_LIST_HEAD(&dev_priv->mm.bound_list);
3966 INIT_LIST_HEAD(&dev_priv->mm.fence_list); 4006 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3967 INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3968 for (i = 0; i < I915_NUM_RINGS; i++) 4007 for (i = 0; i < I915_NUM_RINGS; i++)
3969 init_ring_lists(&dev_priv->ring[i]); 4008 init_ring_lists(&dev_priv->ring[i]);
3970 for (i = 0; i < I915_MAX_NUM_FENCES; i++) 4009 for (i = 0; i < I915_MAX_NUM_FENCES; i++)
@@ -4209,13 +4248,6 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4209} 4248}
4210 4249
4211static int 4250static int
4212i915_gpu_is_active(struct drm_device *dev)
4213{
4214 drm_i915_private_t *dev_priv = dev->dev_private;
4215 return !list_empty(&dev_priv->mm.active_list);
4216}
4217
4218static int
4219i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) 4251i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4220{ 4252{
4221 struct drm_i915_private *dev_priv = 4253 struct drm_i915_private *dev_priv =
@@ -4223,60 +4255,26 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4223 struct drm_i915_private, 4255 struct drm_i915_private,
4224 mm.inactive_shrinker); 4256 mm.inactive_shrinker);
4225 struct drm_device *dev = dev_priv->dev; 4257 struct drm_device *dev = dev_priv->dev;
4226 struct drm_i915_gem_object *obj, *next; 4258 struct drm_i915_gem_object *obj;
4227 int nr_to_scan = sc->nr_to_scan; 4259 int nr_to_scan = sc->nr_to_scan;
4228 int cnt; 4260 int cnt;
4229 4261
4230 if (!mutex_trylock(&dev->struct_mutex)) 4262 if (!mutex_trylock(&dev->struct_mutex))
4231 return 0; 4263 return 0;
4232 4264
4233 /* "fast-path" to count number of available objects */ 4265 if (nr_to_scan) {
4234 if (nr_to_scan == 0) { 4266 nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
4235 cnt = 0; 4267 if (nr_to_scan > 0)
4236 list_for_each_entry(obj, 4268 i915_gem_shrink_all(dev_priv);
4237 &dev_priv->mm.inactive_list,
4238 mm_list)
4239 cnt++;
4240 mutex_unlock(&dev->struct_mutex);
4241 return cnt / 100 * sysctl_vfs_cache_pressure;
4242 } 4269 }
4243 4270
4244rescan:
4245 /* first scan for clean buffers */
4246 i915_gem_retire_requests(dev);
4247
4248 list_for_each_entry_safe(obj, next,
4249 &dev_priv->mm.inactive_list,
4250 mm_list) {
4251 if (i915_gem_object_is_purgeable(obj)) {
4252 if (i915_gem_object_unbind(obj) == 0 &&
4253 --nr_to_scan == 0)
4254 break;
4255 }
4256 }
4257
4258 /* second pass, evict/count anything still on the inactive list */
4259 cnt = 0; 4271 cnt = 0;
4260 list_for_each_entry_safe(obj, next, 4272 list_for_each_entry(obj, &dev_priv->mm.unbound_list, gtt_list)
4261 &dev_priv->mm.inactive_list, 4273 cnt += obj->base.size >> PAGE_SHIFT;
4262 mm_list) { 4274 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
4263 if (nr_to_scan && 4275 if (obj->pin_count == 0)
4264 i915_gem_object_unbind(obj) == 0) 4276 cnt += obj->base.size >> PAGE_SHIFT;
4265 nr_to_scan--;
4266 else
4267 cnt++;
4268 }
4269 4277
4270 if (nr_to_scan && i915_gpu_is_active(dev)) {
4271 /*
4272 * We are desperate for pages, so as a last resort, wait
4273 * for the GPU to finish and discard whatever we can.
4274 * This has a dramatic impact to reduce the number of
4275 * OOM-killer events whilst running the GPU aggressively.
4276 */
4277 if (i915_gpu_idle(dev) == 0)
4278 goto rescan;
4279 }
4280 mutex_unlock(&dev->struct_mutex); 4278 mutex_unlock(&dev->struct_mutex);
4281 return cnt / 100 * sysctl_vfs_cache_pressure; 4279 return cnt;
4282} 4280}
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index ceaad5af01a..43c95307f99 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -33,7 +33,7 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
33 struct drm_i915_gem_object *obj = attachment->dmabuf->priv; 33 struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
34 struct drm_device *dev = obj->base.dev; 34 struct drm_device *dev = obj->base.dev;
35 int npages = obj->base.size / PAGE_SIZE; 35 int npages = obj->base.size / PAGE_SIZE;
36 struct sg_table *sg = NULL; 36 struct sg_table *sg;
37 int ret; 37 int ret;
38 int nents; 38 int nents;
39 39
@@ -41,10 +41,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
41 if (ret) 41 if (ret)
42 return ERR_PTR(ret); 42 return ERR_PTR(ret);
43 43
44 if (!obj->pages) { 44 ret = i915_gem_object_get_pages_gtt(obj);
45 ret = i915_gem_object_get_pages_gtt(obj, __GFP_NORETRY | __GFP_NOWARN); 45 if (ret) {
46 if (ret) 46 sg = ERR_PTR(ret);
47 goto out; 47 goto out;
48 } 48 }
49 49
50 /* link the pages into an SG then map the sg */ 50 /* link the pages into an SG then map the sg */
@@ -89,12 +89,10 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
89 goto out_unlock; 89 goto out_unlock;
90 } 90 }
91 91
92 if (!obj->pages) { 92 ret = i915_gem_object_get_pages_gtt(obj);
93 ret = i915_gem_object_get_pages_gtt(obj, __GFP_NORETRY | __GFP_NOWARN); 93 if (ret) {
94 if (ret) { 94 mutex_unlock(&dev->struct_mutex);
95 mutex_unlock(&dev->struct_mutex); 95 return ERR_PTR(ret);
96 return ERR_PTR(ret);
97 }
98 } 96 }
99 97
100 obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL); 98 obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 7279c31d4a9..74635da7c49 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -148,7 +148,7 @@ found:
148} 148}
149 149
150int 150int
151i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only) 151i915_gem_evict_everything(struct drm_device *dev)
152{ 152{
153 drm_i915_private_t *dev_priv = dev->dev_private; 153 drm_i915_private_t *dev_priv = dev->dev_private;
154 struct drm_i915_gem_object *obj, *next; 154 struct drm_i915_gem_object *obj, *next;
@@ -160,7 +160,7 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
160 if (lists_empty) 160 if (lists_empty)
161 return -ENOSPC; 161 return -ENOSPC;
162 162
163 trace_i915_gem_evict_everything(dev, purgeable_only); 163 trace_i915_gem_evict_everything(dev);
164 164
165 /* The gpu_idle will flush everything in the write domain to the 165 /* The gpu_idle will flush everything in the write domain to the
166 * active list. Then we must move everything off the active list 166 * active list. Then we must move everything off the active list
@@ -174,12 +174,9 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only)
174 174
175 /* Having flushed everything, unbind() should never raise an error */ 175 /* Having flushed everything, unbind() should never raise an error */
176 list_for_each_entry_safe(obj, next, 176 list_for_each_entry_safe(obj, next,
177 &dev_priv->mm.inactive_list, mm_list) { 177 &dev_priv->mm.inactive_list, mm_list)
178 if (!purgeable_only || obj->madv != I915_MADV_WILLNEED) { 178 if (obj->pin_count == 0)
179 if (obj->pin_count == 0) 179 WARN_ON(i915_gem_object_unbind(obj));
180 WARN_ON(i915_gem_object_unbind(obj));
181 }
182 }
183 180
184 return 0; 181 return 0;
185} 182}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index afb312ee050..834a636b44f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -502,17 +502,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
502 } 502 }
503 } 503 }
504 504
505 if (ret != -ENOSPC || retry > 1) 505 if (ret != -ENOSPC || retry++)
506 return ret; 506 return ret;
507 507
508 /* First attempt, just clear anything that is purgeable. 508 ret = i915_gem_evict_everything(ring->dev);
509 * Second attempt, clear the entire GTT.
510 */
511 ret = i915_gem_evict_everything(ring->dev, retry == 0);
512 if (ret) 509 if (ret)
513 return ret; 510 return ret;
514
515 retry++;
516 } while (1); 511 } while (1);
517 512
518err: 513err:
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3b3b731a17c..8329a14862e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -348,7 +348,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
348 intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, 348 intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE,
349 (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); 349 (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
350 350
351 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { 351 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
352 i915_gem_clflush_object(obj); 352 i915_gem_clflush_object(obj);
353 i915_gem_gtt_bind_object(obj, obj->cache_level); 353 i915_gem_gtt_bind_object(obj, obj->cache_level);
354 } 354 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a61b41a8c60..002dceebbea 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1221,7 +1221,7 @@ static void i915_capture_error_state(struct drm_device *dev)
1221 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) 1221 list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list)
1222 i++; 1222 i++;
1223 error->active_bo_count = i; 1223 error->active_bo_count = i;
1224 list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) 1224 list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list)
1225 if (obj->pin_count) 1225 if (obj->pin_count)
1226 i++; 1226 i++;
1227 error->pinned_bo_count = i - error->active_bo_count; 1227 error->pinned_bo_count = i - error->active_bo_count;
@@ -1246,7 +1246,7 @@ static void i915_capture_error_state(struct drm_device *dev)
1246 error->pinned_bo_count = 1246 error->pinned_bo_count =
1247 capture_pinned_bo(error->pinned_bo, 1247 capture_pinned_bo(error->pinned_bo,
1248 error->pinned_bo_count, 1248 error->pinned_bo_count,
1249 &dev_priv->mm.gtt_list); 1249 &dev_priv->mm.bound_list);
1250 1250
1251 do_gettimeofday(&error->time); 1251 do_gettimeofday(&error->time);
1252 1252
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index fe90b3a84a6..3c4093d91f6 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -214,22 +214,18 @@ TRACE_EVENT(i915_gem_evict,
214); 214);
215 215
216TRACE_EVENT(i915_gem_evict_everything, 216TRACE_EVENT(i915_gem_evict_everything,
217 TP_PROTO(struct drm_device *dev, bool purgeable), 217 TP_PROTO(struct drm_device *dev),
218 TP_ARGS(dev, purgeable), 218 TP_ARGS(dev),
219 219
220 TP_STRUCT__entry( 220 TP_STRUCT__entry(
221 __field(u32, dev) 221 __field(u32, dev)
222 __field(bool, purgeable)
223 ), 222 ),
224 223
225 TP_fast_assign( 224 TP_fast_assign(
226 __entry->dev = dev->primary->index; 225 __entry->dev = dev->primary->index;
227 __entry->purgeable = purgeable;
228 ), 226 ),
229 227
230 TP_printk("dev=%d%s", 228 TP_printk("dev=%d", __entry->dev)
231 __entry->dev,
232 __entry->purgeable ? ", purgeable only" : "")
233); 229);
234 230
235TRACE_EVENT(i915_gem_ring_dispatch, 231TRACE_EVENT(i915_gem_ring_dispatch,