aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2012-06-29 08:45:58 -0400
committerJiri Kosina <jkosina@suse.cz>2012-06-29 08:45:58 -0400
commit59f91e5dd0504dc0ebfaa0b6f3a55e6931f96266 (patch)
treeb913718405d44a921905ac71044fbde410256865 /mm/vmscan.c
parent57bdfdd80077addf518a9b90c4a66890efc4f70e (diff)
parent89abfab133ef1f5902abafb744df72793213ac19 (diff)
Merge branch 'master' into for-next
Conflicts: include/linux/mmzone.h Synced with Linus' tree so that trivial patch can be applied on top of up-to-date code properly. Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c306
1 files changed, 39 insertions, 267 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3d1365c1786..8deb5f4da4d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -53,24 +53,6 @@
53#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
54#include <trace/events/vmscan.h> 54#include <trace/events/vmscan.h>
55 55
56/*
57 * reclaim_mode determines how the inactive list is shrunk
58 * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages
59 * RECLAIM_MODE_ASYNC: Do not block
60 * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback
61 * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference
62 * page from the LRU and reclaim all pages within a
63 * naturally aligned range
64 * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of
65 * order-0 pages and then compact the zone
66 */
67typedef unsigned __bitwise__ reclaim_mode_t;
68#define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u)
69#define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u)
70#define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u)
71#define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u)
72#define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u)
73
74struct scan_control { 56struct scan_control {
75 /* Incremented by the number of inactive pages that were scanned */ 57 /* Incremented by the number of inactive pages that were scanned */
76 unsigned long nr_scanned; 58 unsigned long nr_scanned;
@@ -97,12 +79,6 @@ struct scan_control {
97 int order; 79 int order;
98 80
99 /* 81 /*
100 * Intend to reclaim enough continuous memory rather than reclaim
101 * enough amount of memory. i.e, mode for high order allocation.
102 */
103 reclaim_mode_t reclaim_mode;
104
105 /*
106 * The memory cgroup that hit its limit and as a result is the 82 * The memory cgroup that hit its limit and as a result is the
107 * primary target of this reclaim invocation. 83 * primary target of this reclaim invocation.
108 */ 84 */
@@ -164,35 +140,22 @@ static bool global_reclaim(struct scan_control *sc)
164{ 140{
165 return !sc->target_mem_cgroup; 141 return !sc->target_mem_cgroup;
166} 142}
167
168static bool scanning_global_lru(struct mem_cgroup_zone *mz)
169{
170 return !mz->mem_cgroup;
171}
172#else 143#else
173static bool global_reclaim(struct scan_control *sc) 144static bool global_reclaim(struct scan_control *sc)
174{ 145{
175 return true; 146 return true;
176} 147}
177
178static bool scanning_global_lru(struct mem_cgroup_zone *mz)
179{
180 return true;
181}
182#endif 148#endif
183 149
184static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) 150static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
185{ 151{
186 if (!scanning_global_lru(mz)) 152 return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat;
187 return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
188
189 return &mz->zone->reclaim_stat;
190} 153}
191 154
192static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, 155static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
193 enum lru_list lru) 156 enum lru_list lru)
194{ 157{
195 if (!scanning_global_lru(mz)) 158 if (!mem_cgroup_disabled())
196 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, 159 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
197 zone_to_nid(mz->zone), 160 zone_to_nid(mz->zone),
198 zone_idx(mz->zone), 161 zone_idx(mz->zone),
@@ -364,39 +327,6 @@ out:
364 return ret; 327 return ret;
365} 328}
366 329
367static void set_reclaim_mode(int priority, struct scan_control *sc,
368 bool sync)
369{
370 reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC;
371
372 /*
373 * Initially assume we are entering either lumpy reclaim or
374 * reclaim/compaction.Depending on the order, we will either set the
375 * sync mode or just reclaim order-0 pages later.
376 */
377 if (COMPACTION_BUILD)
378 sc->reclaim_mode = RECLAIM_MODE_COMPACTION;
379 else
380 sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM;
381
382 /*
383 * Avoid using lumpy reclaim or reclaim/compaction if possible by
384 * restricting when its set to either costly allocations or when
385 * under memory pressure
386 */
387 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
388 sc->reclaim_mode |= syncmode;
389 else if (sc->order && priority < DEF_PRIORITY - 2)
390 sc->reclaim_mode |= syncmode;
391 else
392 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
393}
394
395static void reset_reclaim_mode(struct scan_control *sc)
396{
397 sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC;
398}
399
400static inline int is_page_cache_freeable(struct page *page) 330static inline int is_page_cache_freeable(struct page *page)
401{ 331{
402 /* 332 /*
@@ -416,10 +346,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
416 return 1; 346 return 1;
417 if (bdi == current->backing_dev_info) 347 if (bdi == current->backing_dev_info)
418 return 1; 348 return 1;
419
420 /* lumpy reclaim for hugepage often need a lot of write */
421 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
422 return 1;
423 return 0; 349 return 0;
424} 350}
425 351
@@ -523,8 +449,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
523 /* synchronous write or broken a_ops? */ 449 /* synchronous write or broken a_ops? */
524 ClearPageReclaim(page); 450 ClearPageReclaim(page);
525 } 451 }
526 trace_mm_vmscan_writepage(page, 452 trace_mm_vmscan_writepage(page, trace_reclaim_flags(page));
527 trace_reclaim_flags(page, sc->reclaim_mode));
528 inc_zone_page_state(page, NR_VMSCAN_WRITE); 453 inc_zone_page_state(page, NR_VMSCAN_WRITE);
529 return PAGE_SUCCESS; 454 return PAGE_SUCCESS;
530 } 455 }
@@ -707,13 +632,10 @@ static enum page_references page_check_references(struct page *page,
707 int referenced_ptes, referenced_page; 632 int referenced_ptes, referenced_page;
708 unsigned long vm_flags; 633 unsigned long vm_flags;
709 634
710 referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); 635 referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
636 &vm_flags);
711 referenced_page = TestClearPageReferenced(page); 637 referenced_page = TestClearPageReferenced(page);
712 638
713 /* Lumpy reclaim - ignore references */
714 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
715 return PAGEREF_RECLAIM;
716
717 /* 639 /*
718 * Mlock lost the isolation race with us. Let try_to_unmap() 640 * Mlock lost the isolation race with us. Let try_to_unmap()
719 * move the page to the unevictable list. 641 * move the page to the unevictable list.
@@ -722,7 +644,7 @@ static enum page_references page_check_references(struct page *page,
722 return PAGEREF_RECLAIM; 644 return PAGEREF_RECLAIM;
723 645
724 if (referenced_ptes) { 646 if (referenced_ptes) {
725 if (PageAnon(page)) 647 if (PageSwapBacked(page))
726 return PAGEREF_ACTIVATE; 648 return PAGEREF_ACTIVATE;
727 /* 649 /*
728 * All mapped pages start out with page table 650 * All mapped pages start out with page table
@@ -813,19 +735,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
813 735
814 if (PageWriteback(page)) { 736 if (PageWriteback(page)) {
815 nr_writeback++; 737 nr_writeback++;
816 /* 738 unlock_page(page);
817 * Synchronous reclaim cannot queue pages for 739 goto keep;
818 * writeback due to the possibility of stack overflow
819 * but if it encounters a page under writeback, wait
820 * for the IO to complete.
821 */
822 if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) &&
823 may_enter_fs)
824 wait_on_page_writeback(page);
825 else {
826 unlock_page(page);
827 goto keep_lumpy;
828 }
829 } 740 }
830 741
831 references = page_check_references(page, mz, sc); 742 references = page_check_references(page, mz, sc);
@@ -908,7 +819,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
908 goto activate_locked; 819 goto activate_locked;
909 case PAGE_SUCCESS: 820 case PAGE_SUCCESS:
910 if (PageWriteback(page)) 821 if (PageWriteback(page))
911 goto keep_lumpy; 822 goto keep;
912 if (PageDirty(page)) 823 if (PageDirty(page))
913 goto keep; 824 goto keep;
914 825
@@ -994,7 +905,6 @@ cull_mlocked:
994 try_to_free_swap(page); 905 try_to_free_swap(page);
995 unlock_page(page); 906 unlock_page(page);
996 putback_lru_page(page); 907 putback_lru_page(page);
997 reset_reclaim_mode(sc);
998 continue; 908 continue;
999 909
1000activate_locked: 910activate_locked:
@@ -1007,8 +917,6 @@ activate_locked:
1007keep_locked: 917keep_locked:
1008 unlock_page(page); 918 unlock_page(page);
1009keep: 919keep:
1010 reset_reclaim_mode(sc);
1011keep_lumpy:
1012 list_add(&page->lru, &ret_pages); 920 list_add(&page->lru, &ret_pages);
1013 VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); 921 VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
1014 } 922 }
@@ -1064,11 +972,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file)
1064 if (!all_lru_mode && !!page_is_file_cache(page) != file) 972 if (!all_lru_mode && !!page_is_file_cache(page) != file)
1065 return ret; 973 return ret;
1066 974
1067 /* 975 /* Do not give back unevictable pages for compaction */
1068 * When this function is being called for lumpy reclaim, we
1069 * initially look into all LRU pages, active, inactive and
1070 * unevictable; only give shrink_page_list evictable pages.
1071 */
1072 if (PageUnevictable(page)) 976 if (PageUnevictable(page))
1073 return ret; 977 return ret;
1074 978
@@ -1153,9 +1057,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1153 struct lruvec *lruvec; 1057 struct lruvec *lruvec;
1154 struct list_head *src; 1058 struct list_head *src;
1155 unsigned long nr_taken = 0; 1059 unsigned long nr_taken = 0;
1156 unsigned long nr_lumpy_taken = 0;
1157 unsigned long nr_lumpy_dirty = 0;
1158 unsigned long nr_lumpy_failed = 0;
1159 unsigned long scan; 1060 unsigned long scan;
1160 int lru = LRU_BASE; 1061 int lru = LRU_BASE;
1161 1062
@@ -1168,10 +1069,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1168 1069
1169 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { 1070 for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
1170 struct page *page; 1071 struct page *page;
1171 unsigned long pfn;
1172 unsigned long end_pfn;
1173 unsigned long page_pfn;
1174 int zone_id;
1175 1072
1176 page = lru_to_page(src); 1073 page = lru_to_page(src);
1177 prefetchw_prev_lru_page(page, src, flags); 1074 prefetchw_prev_lru_page(page, src, flags);
@@ -1193,84 +1090,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1193 default: 1090 default:
1194 BUG(); 1091 BUG();
1195 } 1092 }
1196
1197 if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM))
1198 continue;
1199
1200 /*
1201 * Attempt to take all pages in the order aligned region
1202 * surrounding the tag page. Only take those pages of
1203 * the same active state as that tag page. We may safely
1204 * round the target page pfn down to the requested order
1205 * as the mem_map is guaranteed valid out to MAX_ORDER,
1206 * where that page is in a different zone we will detect
1207 * it from its zone id and abort this block scan.
1208 */
1209 zone_id = page_zone_id(page);
1210 page_pfn = page_to_pfn(page);
1211 pfn = page_pfn & ~((1 << sc->order) - 1);
1212 end_pfn = pfn + (1 << sc->order);
1213 for (; pfn < end_pfn; pfn++) {
1214 struct page *cursor_page;
1215
1216 /* The target page is in the block, ignore it. */
1217 if (unlikely(pfn == page_pfn))
1218 continue;
1219
1220 /* Avoid holes within the zone. */
1221 if (unlikely(!pfn_valid_within(pfn)))
1222 break;
1223
1224 cursor_page = pfn_to_page(pfn);
1225
1226 /* Check that we have not crossed a zone boundary. */
1227 if (unlikely(page_zone_id(cursor_page) != zone_id))
1228 break;
1229
1230 /*
1231 * If we don't have enough swap space, reclaiming of
1232 * anon page which don't already have a swap slot is
1233 * pointless.
1234 */
1235 if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) &&
1236 !PageSwapCache(cursor_page))
1237 break;
1238
1239 if (__isolate_lru_page(cursor_page, mode, file) == 0) {
1240 unsigned int isolated_pages;
1241
1242 mem_cgroup_lru_del(cursor_page);
1243 list_move(&cursor_page->lru, dst);
1244 isolated_pages = hpage_nr_pages(cursor_page);
1245 nr_taken += isolated_pages;
1246 nr_lumpy_taken += isolated_pages;
1247 if (PageDirty(cursor_page))
1248 nr_lumpy_dirty += isolated_pages;
1249 scan++;
1250 pfn += isolated_pages - 1;
1251 } else {
1252 /*
1253 * Check if the page is freed already.
1254 *
1255 * We can't use page_count() as that
1256 * requires compound_head and we don't
1257 * have a pin on the page here. If a
1258 * page is tail, we may or may not
1259 * have isolated the head, so assume
1260 * it's not free, it'd be tricky to
1261 * track the head status without a
1262 * page pin.
1263 */
1264 if (!PageTail(cursor_page) &&
1265 !atomic_read(&cursor_page->_count))
1266 continue;
1267 break;
1268 }
1269 }
1270
1271 /* If we break out of the loop above, lumpy reclaim failed */
1272 if (pfn < end_pfn)
1273 nr_lumpy_failed++;
1274 } 1093 }
1275 1094
1276 *nr_scanned = scan; 1095 *nr_scanned = scan;
@@ -1278,7 +1097,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1278 trace_mm_vmscan_lru_isolate(sc->order, 1097 trace_mm_vmscan_lru_isolate(sc->order,
1279 nr_to_scan, scan, 1098 nr_to_scan, scan,
1280 nr_taken, 1099 nr_taken,
1281 nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
1282 mode, file); 1100 mode, file);
1283 return nr_taken; 1101 return nr_taken;
1284} 1102}
@@ -1454,47 +1272,6 @@ update_isolated_counts(struct mem_cgroup_zone *mz,
1454} 1272}
1455 1273
1456/* 1274/*
1457 * Returns true if a direct reclaim should wait on pages under writeback.
1458 *
1459 * If we are direct reclaiming for contiguous pages and we do not reclaim
1460 * everything in the list, try again and wait for writeback IO to complete.
1461 * This will stall high-order allocations noticeably. Only do that when really
1462 * need to free the pages under high memory pressure.
1463 */
1464static inline bool should_reclaim_stall(unsigned long nr_taken,
1465 unsigned long nr_freed,
1466 int priority,
1467 struct scan_control *sc)
1468{
1469 int lumpy_stall_priority;
1470
1471 /* kswapd should not stall on sync IO */
1472 if (current_is_kswapd())
1473 return false;
1474
1475 /* Only stall on lumpy reclaim */
1476 if (sc->reclaim_mode & RECLAIM_MODE_SINGLE)
1477 return false;
1478
1479 /* If we have reclaimed everything on the isolated list, no stall */
1480 if (nr_freed == nr_taken)
1481 return false;
1482
1483 /*
1484 * For high-order allocations, there are two stall thresholds.
1485 * High-cost allocations stall immediately where as lower
1486 * order allocations such as stacks require the scanning
1487 * priority to be much higher before stalling.
1488 */
1489 if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
1490 lumpy_stall_priority = DEF_PRIORITY;
1491 else
1492 lumpy_stall_priority = DEF_PRIORITY / 3;
1493
1494 return priority <= lumpy_stall_priority;
1495}
1496
1497/*
1498 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number 1275 * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
1499 * of reclaimed pages 1276 * of reclaimed pages
1500 */ 1277 */
@@ -1522,10 +1299,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1522 return SWAP_CLUSTER_MAX; 1299 return SWAP_CLUSTER_MAX;
1523 } 1300 }
1524 1301
1525 set_reclaim_mode(priority, sc, false);
1526 if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)
1527 isolate_mode |= ISOLATE_ACTIVE;
1528
1529 lru_add_drain(); 1302 lru_add_drain();
1530 1303
1531 if (!sc->may_unmap) 1304 if (!sc->may_unmap)
@@ -1556,13 +1329,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1556 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, 1329 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1557 &nr_dirty, &nr_writeback); 1330 &nr_dirty, &nr_writeback);
1558 1331
1559 /* Check if we should syncronously wait for writeback */
1560 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1561 set_reclaim_mode(priority, sc, true);
1562 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1563 priority, &nr_dirty, &nr_writeback);
1564 }
1565
1566 spin_lock_irq(&zone->lru_lock); 1332 spin_lock_irq(&zone->lru_lock);
1567 1333
1568 reclaim_stat->recent_scanned[0] += nr_anon; 1334 reclaim_stat->recent_scanned[0] += nr_anon;
@@ -1616,7 +1382,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1616 zone_idx(zone), 1382 zone_idx(zone),
1617 nr_scanned, nr_reclaimed, 1383 nr_scanned, nr_reclaimed,
1618 priority, 1384 priority,
1619 trace_shrink_flags(file, sc->reclaim_mode)); 1385 trace_shrink_flags(file));
1620 return nr_reclaimed; 1386 return nr_reclaimed;
1621} 1387}
1622 1388
@@ -1695,8 +1461,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
1695 1461
1696 lru_add_drain(); 1462 lru_add_drain();
1697 1463
1698 reset_reclaim_mode(sc);
1699
1700 if (!sc->may_unmap) 1464 if (!sc->may_unmap)
1701 isolate_mode |= ISOLATE_UNMAPPED; 1465 isolate_mode |= ISOLATE_UNMAPPED;
1702 if (!sc->may_writepage) 1466 if (!sc->may_writepage)
@@ -1737,7 +1501,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
1737 } 1501 }
1738 } 1502 }
1739 1503
1740 if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { 1504 if (page_referenced(page, 0, sc->target_mem_cgroup,
1505 &vm_flags)) {
1741 nr_rotated += hpage_nr_pages(page); 1506 nr_rotated += hpage_nr_pages(page);
1742 /* 1507 /*
1743 * Identify referenced, file-backed active pages and 1508 * Identify referenced, file-backed active pages and
@@ -1811,7 +1576,7 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1811 if (!total_swap_pages) 1576 if (!total_swap_pages)
1812 return 0; 1577 return 0;
1813 1578
1814 if (!scanning_global_lru(mz)) 1579 if (!mem_cgroup_disabled())
1815 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, 1580 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
1816 mz->zone); 1581 mz->zone);
1817 1582
@@ -1850,7 +1615,7 @@ static int inactive_file_is_low_global(struct zone *zone)
1850 */ 1615 */
1851static int inactive_file_is_low(struct mem_cgroup_zone *mz) 1616static int inactive_file_is_low(struct mem_cgroup_zone *mz)
1852{ 1617{
1853 if (!scanning_global_lru(mz)) 1618 if (!mem_cgroup_disabled())
1854 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, 1619 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
1855 mz->zone); 1620 mz->zone);
1856 1621
@@ -1984,10 +1749,10 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1984 * proportional to the fraction of recently scanned pages on 1749 * proportional to the fraction of recently scanned pages on
1985 * each list that were recently referenced and in active use. 1750 * each list that were recently referenced and in active use.
1986 */ 1751 */
1987 ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); 1752 ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1);
1988 ap /= reclaim_stat->recent_rotated[0] + 1; 1753 ap /= reclaim_stat->recent_rotated[0] + 1;
1989 1754
1990 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); 1755 fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
1991 fp /= reclaim_stat->recent_rotated[1] + 1; 1756 fp /= reclaim_stat->recent_rotated[1] + 1;
1992 spin_unlock_irq(&mz->zone->lru_lock); 1757 spin_unlock_irq(&mz->zone->lru_lock);
1993 1758
@@ -2000,7 +1765,7 @@ out:
2000 unsigned long scan; 1765 unsigned long scan;
2001 1766
2002 scan = zone_nr_lru_pages(mz, lru); 1767 scan = zone_nr_lru_pages(mz, lru);
2003 if (priority || noswap) { 1768 if (priority || noswap || !vmscan_swappiness(mz, sc)) {
2004 scan >>= priority; 1769 scan >>= priority;
2005 if (!scan && force_scan) 1770 if (!scan && force_scan)
2006 scan = SWAP_CLUSTER_MAX; 1771 scan = SWAP_CLUSTER_MAX;
@@ -2010,23 +1775,35 @@ out:
2010 } 1775 }
2011} 1776}
2012 1777
1778/* Use reclaim/compaction for costly allocs or under memory pressure */
1779static bool in_reclaim_compaction(int priority, struct scan_control *sc)
1780{
1781 if (COMPACTION_BUILD && sc->order &&
1782 (sc->order > PAGE_ALLOC_COSTLY_ORDER ||
1783 priority < DEF_PRIORITY - 2))
1784 return true;
1785
1786 return false;
1787}
1788
2013/* 1789/*
2014 * Reclaim/compaction depends on a number of pages being freed. To avoid 1790 * Reclaim/compaction is used for high-order allocation requests. It reclaims
2015 * disruption to the system, a small number of order-0 pages continue to be 1791 * order-0 pages before compacting the zone. should_continue_reclaim() returns
2016 * rotated and reclaimed in the normal fashion. However, by the time we get 1792 * true if more pages should be reclaimed such that when the page allocator
2017 * back to the allocator and call try_to_compact_zone(), we ensure that 1793 * calls try_to_compact_zone() that it will have enough free pages to succeed.
2018 * there are enough free pages for it to be likely successful 1794 * It will give up earlier than that if there is difficulty reclaiming pages.
2019 */ 1795 */
2020static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, 1796static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
2021 unsigned long nr_reclaimed, 1797 unsigned long nr_reclaimed,
2022 unsigned long nr_scanned, 1798 unsigned long nr_scanned,
1799 int priority,
2023 struct scan_control *sc) 1800 struct scan_control *sc)
2024{ 1801{
2025 unsigned long pages_for_compaction; 1802 unsigned long pages_for_compaction;
2026 unsigned long inactive_lru_pages; 1803 unsigned long inactive_lru_pages;
2027 1804
2028 /* If not in reclaim/compaction mode, stop */ 1805 /* If not in reclaim/compaction mode, stop */
2029 if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) 1806 if (!in_reclaim_compaction(priority, sc))
2030 return false; 1807 return false;
2031 1808
2032 /* Consider stopping depending on scan and reclaim activity */ 1809 /* Consider stopping depending on scan and reclaim activity */
@@ -2128,7 +1905,8 @@ restart:
2128 1905
2129 /* reclaim/compaction might need reclaim to continue */ 1906 /* reclaim/compaction might need reclaim to continue */
2130 if (should_continue_reclaim(mz, nr_reclaimed, 1907 if (should_continue_reclaim(mz, nr_reclaimed,
2131 sc->nr_scanned - nr_scanned, sc)) 1908 sc->nr_scanned - nr_scanned,
1909 priority, sc))
2132 goto restart; 1910 goto restart;
2133 1911
2134 throttle_vm_writeout(sc->gfp_mask); 1912 throttle_vm_writeout(sc->gfp_mask);
@@ -2353,8 +2131,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2353 2131
2354 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2132 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2355 sc->nr_scanned = 0; 2133 sc->nr_scanned = 0;
2356 if (!priority)
2357 disable_swap_token(sc->target_mem_cgroup);
2358 aborted_reclaim = shrink_zones(priority, zonelist, sc); 2134 aborted_reclaim = shrink_zones(priority, zonelist, sc);
2359 2135
2360 /* 2136 /*
@@ -2705,10 +2481,6 @@ loop_again:
2705 unsigned long lru_pages = 0; 2481 unsigned long lru_pages = 0;
2706 int has_under_min_watermark_zone = 0; 2482 int has_under_min_watermark_zone = 0;
2707 2483
2708 /* The swap token gets in the way of swapout... */
2709 if (!priority)
2710 disable_swap_token(NULL);
2711
2712 all_zones_ok = 1; 2484 all_zones_ok = 1;
2713 balanced = 0; 2485 balanced = 0;
2714 2486
@@ -3537,7 +3309,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma)
3537 if (mapping_unevictable(page_mapping(page))) 3309 if (mapping_unevictable(page_mapping(page)))
3538 return 0; 3310 return 0;
3539 3311
3540 if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) 3312 if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page)))
3541 return 0; 3313 return 0;
3542 3314
3543 return 1; 3315 return 1;