diff options
-rw-r--r-- | include/linux/compaction.h | 14 | ||||
-rw-r--r-- | include/linux/kernel.h | 7 | ||||
-rw-r--r-- | mm/compaction.c | 89 | ||||
-rw-r--r-- | mm/migrate.c | 17 | ||||
-rw-r--r-- | mm/page_alloc.c | 16 | ||||
-rw-r--r-- | mm/vmscan.c | 102 |
6 files changed, 196 insertions, 49 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 5ac51552d908..2592883d862d 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
@@ -22,6 +22,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, | |||
22 | extern int fragmentation_index(struct zone *zone, unsigned int order); | 22 | extern int fragmentation_index(struct zone *zone, unsigned int order); |
23 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, | 23 | extern unsigned long try_to_compact_pages(struct zonelist *zonelist, |
24 | int order, gfp_t gfp_mask, nodemask_t *mask); | 24 | int order, gfp_t gfp_mask, nodemask_t *mask); |
25 | extern unsigned long compaction_suitable(struct zone *zone, int order); | ||
26 | extern unsigned long compact_zone_order(struct zone *zone, int order, | ||
27 | gfp_t gfp_mask); | ||
25 | 28 | ||
26 | /* Do not skip compaction more than 64 times */ | 29 | /* Do not skip compaction more than 64 times */ |
27 | #define COMPACT_MAX_DEFER_SHIFT 6 | 30 | #define COMPACT_MAX_DEFER_SHIFT 6 |
@@ -59,6 +62,17 @@ static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
59 | return COMPACT_CONTINUE; | 62 | return COMPACT_CONTINUE; |
60 | } | 63 | } |
61 | 64 | ||
65 | static inline unsigned long compaction_suitable(struct zone *zone, int order) | ||
66 | { | ||
67 | return COMPACT_SKIPPED; | ||
68 | } | ||
69 | |||
70 | static inline unsigned long compact_zone_order(struct zone *zone, int order, | ||
71 | gfp_t gfp_mask) | ||
72 | { | ||
73 | return 0; | ||
74 | } | ||
75 | |||
62 | static inline void defer_compaction(struct zone *zone) | 76 | static inline void defer_compaction(struct zone *zone) |
63 | { | 77 | { |
64 | } | 78 | } |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 57dac7022b63..5a9d9059520b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -600,6 +600,13 @@ struct sysinfo { | |||
600 | #define NUMA_BUILD 0 | 600 | #define NUMA_BUILD 0 |
601 | #endif | 601 | #endif |
602 | 602 | ||
603 | /* This helps us avoid #ifdef CONFIG_COMPACTION */ | ||
604 | #ifdef CONFIG_COMPACTION | ||
605 | #define COMPACTION_BUILD 1 | ||
606 | #else | ||
607 | #define COMPACTION_BUILD 0 | ||
608 | #endif | ||
609 | |||
603 | /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ | 610 | /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ |
604 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 611 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
605 | # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD | 612 | # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD |
diff --git a/mm/compaction.c b/mm/compaction.c index 20011a850fef..8fe917ec7c11 100644 --- a/mm/compaction.c +++ b/mm/compaction.c | |||
@@ -384,10 +384,62 @@ static int compact_finished(struct zone *zone, | |||
384 | return COMPACT_CONTINUE; | 384 | return COMPACT_CONTINUE; |
385 | } | 385 | } |
386 | 386 | ||
387 | /* | ||
388 | * compaction_suitable: Is this suitable to run compaction on this zone now? | ||
389 | * Returns | ||
390 | * COMPACT_SKIPPED - If there are too few free pages for compaction | ||
391 | * COMPACT_PARTIAL - If the allocation would succeed without compaction | ||
392 | * COMPACT_CONTINUE - If compaction should run now | ||
393 | */ | ||
394 | unsigned long compaction_suitable(struct zone *zone, int order) | ||
395 | { | ||
396 | int fragindex; | ||
397 | unsigned long watermark; | ||
398 | |||
399 | /* | ||
400 | * Watermarks for order-0 must be met for compaction. Note the 2UL. | ||
401 | * This is because during migration, copies of pages need to be | ||
402 | * allocated and for a short time, the footprint is higher | ||
403 | */ | ||
404 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
405 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
406 | return COMPACT_SKIPPED; | ||
407 | |||
408 | /* | ||
409 | * fragmentation index determines if allocation failures are due to | ||
410 | * low memory or external fragmentation | ||
411 | * | ||
412 | * index of -1 implies allocations might succeed dependingon watermarks | ||
413 | * index towards 0 implies failure is due to lack of memory | ||
414 | * index towards 1000 implies failure is due to fragmentation | ||
415 | * | ||
416 | * Only compact if a failure would be due to fragmentation. | ||
417 | */ | ||
418 | fragindex = fragmentation_index(zone, order); | ||
419 | if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) | ||
420 | return COMPACT_SKIPPED; | ||
421 | |||
422 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) | ||
423 | return COMPACT_PARTIAL; | ||
424 | |||
425 | return COMPACT_CONTINUE; | ||
426 | } | ||
427 | |||
387 | static int compact_zone(struct zone *zone, struct compact_control *cc) | 428 | static int compact_zone(struct zone *zone, struct compact_control *cc) |
388 | { | 429 | { |
389 | int ret; | 430 | int ret; |
390 | 431 | ||
432 | ret = compaction_suitable(zone, cc->order); | ||
433 | switch (ret) { | ||
434 | case COMPACT_PARTIAL: | ||
435 | case COMPACT_SKIPPED: | ||
436 | /* Compaction is likely to fail */ | ||
437 | return ret; | ||
438 | case COMPACT_CONTINUE: | ||
439 | /* Fall through to compaction */ | ||
440 | ; | ||
441 | } | ||
442 | |||
391 | /* Setup to move all movable pages to the end of the zone */ | 443 | /* Setup to move all movable pages to the end of the zone */ |
392 | cc->migrate_pfn = zone->zone_start_pfn; | 444 | cc->migrate_pfn = zone->zone_start_pfn; |
393 | cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; | 445 | cc->free_pfn = cc->migrate_pfn + zone->spanned_pages; |
@@ -429,7 +481,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) | |||
429 | return ret; | 481 | return ret; |
430 | } | 482 | } |
431 | 483 | ||
432 | static unsigned long compact_zone_order(struct zone *zone, | 484 | unsigned long compact_zone_order(struct zone *zone, |
433 | int order, gfp_t gfp_mask) | 485 | int order, gfp_t gfp_mask) |
434 | { | 486 | { |
435 | struct compact_control cc = { | 487 | struct compact_control cc = { |
@@ -462,7 +514,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
462 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); | 514 | enum zone_type high_zoneidx = gfp_zone(gfp_mask); |
463 | int may_enter_fs = gfp_mask & __GFP_FS; | 515 | int may_enter_fs = gfp_mask & __GFP_FS; |
464 | int may_perform_io = gfp_mask & __GFP_IO; | 516 | int may_perform_io = gfp_mask & __GFP_IO; |
465 | unsigned long watermark; | ||
466 | struct zoneref *z; | 517 | struct zoneref *z; |
467 | struct zone *zone; | 518 | struct zone *zone; |
468 | int rc = COMPACT_SKIPPED; | 519 | int rc = COMPACT_SKIPPED; |
@@ -480,43 +531,13 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, | |||
480 | /* Compact each zone in the list */ | 531 | /* Compact each zone in the list */ |
481 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | 532 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, |
482 | nodemask) { | 533 | nodemask) { |
483 | int fragindex; | ||
484 | int status; | 534 | int status; |
485 | 535 | ||
486 | /* | ||
487 | * Watermarks for order-0 must be met for compaction. Note | ||
488 | * the 2UL. This is because during migration, copies of | ||
489 | * pages need to be allocated and for a short time, the | ||
490 | * footprint is higher | ||
491 | */ | ||
492 | watermark = low_wmark_pages(zone) + (2UL << order); | ||
493 | if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) | ||
494 | continue; | ||
495 | |||
496 | /* | ||
497 | * fragmentation index determines if allocation failures are | ||
498 | * due to low memory or external fragmentation | ||
499 | * | ||
500 | * index of -1 implies allocations might succeed depending | ||
501 | * on watermarks | ||
502 | * index towards 0 implies failure is due to lack of memory | ||
503 | * index towards 1000 implies failure is due to fragmentation | ||
504 | * | ||
505 | * Only compact if a failure would be due to fragmentation. | ||
506 | */ | ||
507 | fragindex = fragmentation_index(zone, order); | ||
508 | if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) | ||
509 | continue; | ||
510 | |||
511 | if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) { | ||
512 | rc = COMPACT_PARTIAL; | ||
513 | break; | ||
514 | } | ||
515 | |||
516 | status = compact_zone_order(zone, order, gfp_mask); | 536 | status = compact_zone_order(zone, order, gfp_mask); |
517 | rc = max(status, rc); | 537 | rc = max(status, rc); |
518 | 538 | ||
519 | if (zone_watermark_ok(zone, order, watermark, 0, 0)) | 539 | /* If a normal allocation would succeed, stop compacting */ |
540 | if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) | ||
520 | break; | 541 | break; |
521 | } | 542 | } |
522 | 543 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 6ae8a66a7045..94875b265928 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -639,6 +639,23 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
639 | if (!trylock_page(page)) { | 639 | if (!trylock_page(page)) { |
640 | if (!force) | 640 | if (!force) |
641 | goto move_newpage; | 641 | goto move_newpage; |
642 | |||
643 | /* | ||
644 | * It's not safe for direct compaction to call lock_page. | ||
645 | * For example, during page readahead pages are added locked | ||
646 | * to the LRU. Later, when the IO completes the pages are | ||
647 | * marked uptodate and unlocked. However, the queueing | ||
648 | * could be merging multiple pages for one bio (e.g. | ||
649 | * mpage_readpages). If an allocation happens for the | ||
650 | * second or third page, the process can end up locking | ||
651 | * the same page twice and deadlocking. Rather than | ||
652 | * trying to be clever about what pages can be locked, | ||
653 | * avoid the use of lock_page for direct compaction | ||
654 | * altogether. | ||
655 | */ | ||
656 | if (current->flags & PF_MEMALLOC) | ||
657 | goto move_newpage; | ||
658 | |||
642 | lock_page(page); | 659 | lock_page(page); |
643 | } | 660 | } |
644 | 661 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 22a1bb7723e4..03a66a31bfcd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1815,12 +1815,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, | |||
1815 | int migratetype, unsigned long *did_some_progress) | 1815 | int migratetype, unsigned long *did_some_progress) |
1816 | { | 1816 | { |
1817 | struct page *page; | 1817 | struct page *page; |
1818 | struct task_struct *tsk = current; | ||
1818 | 1819 | ||
1819 | if (!order || compaction_deferred(preferred_zone)) | 1820 | if (!order || compaction_deferred(preferred_zone)) |
1820 | return NULL; | 1821 | return NULL; |
1821 | 1822 | ||
1823 | tsk->flags |= PF_MEMALLOC; | ||
1822 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, | 1824 | *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, |
1823 | nodemask); | 1825 | nodemask); |
1826 | tsk->flags &= ~PF_MEMALLOC; | ||
1824 | if (*did_some_progress != COMPACT_SKIPPED) { | 1827 | if (*did_some_progress != COMPACT_SKIPPED) { |
1825 | 1828 | ||
1826 | /* Page migration frees to the PCP lists but we want merging */ | 1829 | /* Page migration frees to the PCP lists but we want merging */ |
@@ -2121,6 +2124,19 @@ rebalance: | |||
2121 | /* Wait for some write requests to complete then retry */ | 2124 | /* Wait for some write requests to complete then retry */ |
2122 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); | 2125 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); |
2123 | goto rebalance; | 2126 | goto rebalance; |
2127 | } else { | ||
2128 | /* | ||
2129 | * High-order allocations do not necessarily loop after | ||
2130 | * direct reclaim and reclaim/compaction depends on compaction | ||
2131 | * being called after reclaim so call directly if necessary | ||
2132 | */ | ||
2133 | page = __alloc_pages_direct_compact(gfp_mask, order, | ||
2134 | zonelist, high_zoneidx, | ||
2135 | nodemask, | ||
2136 | alloc_flags, preferred_zone, | ||
2137 | migratetype, &did_some_progress); | ||
2138 | if (page) | ||
2139 | goto got_pg; | ||
2124 | } | 2140 | } |
2125 | 2141 | ||
2126 | nopage: | 2142 | nopage: |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 3464312bde07..10ebd74a423c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/topology.h> | 32 | #include <linux/topology.h> |
33 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
34 | #include <linux/cpuset.h> | 34 | #include <linux/cpuset.h> |
35 | #include <linux/compaction.h> | ||
35 | #include <linux/notifier.h> | 36 | #include <linux/notifier.h> |
36 | #include <linux/rwsem.h> | 37 | #include <linux/rwsem.h> |
37 | #include <linux/delay.h> | 38 | #include <linux/delay.h> |
@@ -59,12 +60,15 @@ | |||
59 | * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference | 60 | * LUMPY_MODE_CONTIGRECLAIM: For high-order allocations, take a reference |
60 | * page from the LRU and reclaim all pages within a | 61 | * page from the LRU and reclaim all pages within a |
61 | * naturally aligned range | 62 | * naturally aligned range |
63 | * LUMPY_MODE_COMPACTION: For high-order allocations, reclaim a number of | ||
64 | * order-0 pages and then compact the zone | ||
62 | */ | 65 | */ |
63 | typedef unsigned __bitwise__ lumpy_mode; | 66 | typedef unsigned __bitwise__ lumpy_mode; |
64 | #define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) | 67 | #define LUMPY_MODE_SINGLE ((__force lumpy_mode)0x01u) |
65 | #define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) | 68 | #define LUMPY_MODE_ASYNC ((__force lumpy_mode)0x02u) |
66 | #define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) | 69 | #define LUMPY_MODE_SYNC ((__force lumpy_mode)0x04u) |
67 | #define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) | 70 | #define LUMPY_MODE_CONTIGRECLAIM ((__force lumpy_mode)0x08u) |
71 | #define LUMPY_MODE_COMPACTION ((__force lumpy_mode)0x10u) | ||
68 | 72 | ||
69 | struct scan_control { | 73 | struct scan_control { |
70 | /* Incremented by the number of inactive pages that were scanned */ | 74 | /* Incremented by the number of inactive pages that were scanned */ |
@@ -286,18 +290,20 @@ static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, | |||
286 | lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; | 290 | lumpy_mode syncmode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; |
287 | 291 | ||
288 | /* | 292 | /* |
289 | * Some reclaim have alredy been failed. No worth to try synchronous | 293 | * Initially assume we are entering either lumpy reclaim or |
290 | * lumpy reclaim. | 294 | * reclaim/compaction.Depending on the order, we will either set the |
295 | * sync mode or just reclaim order-0 pages later. | ||
291 | */ | 296 | */ |
292 | if (sync && sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE) | 297 | if (COMPACTION_BUILD) |
293 | return; | 298 | sc->lumpy_reclaim_mode = LUMPY_MODE_COMPACTION; |
299 | else | ||
300 | sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; | ||
294 | 301 | ||
295 | /* | 302 | /* |
296 | * If we need a large contiguous chunk of memory, or have | 303 | * Avoid using lumpy reclaim or reclaim/compaction if possible by |
297 | * trouble getting a small set of contiguous pages, we | 304 | * restricting when its set to either costly allocations or when |
298 | * will reclaim both active and inactive pages. | 305 | * under memory pressure |
299 | */ | 306 | */ |
300 | sc->lumpy_reclaim_mode = LUMPY_MODE_CONTIGRECLAIM; | ||
301 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | 307 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) |
302 | sc->lumpy_reclaim_mode |= syncmode; | 308 | sc->lumpy_reclaim_mode |= syncmode; |
303 | else if (sc->order && priority < DEF_PRIORITY - 2) | 309 | else if (sc->order && priority < DEF_PRIORITY - 2) |
@@ -1385,8 +1391,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1385 | if (scanning_global_lru(sc)) { | 1391 | if (scanning_global_lru(sc)) { |
1386 | nr_taken = isolate_pages_global(nr_to_scan, | 1392 | nr_taken = isolate_pages_global(nr_to_scan, |
1387 | &page_list, &nr_scanned, sc->order, | 1393 | &page_list, &nr_scanned, sc->order, |
1388 | sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? | 1394 | sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? |
1389 | ISOLATE_INACTIVE : ISOLATE_BOTH, | 1395 | ISOLATE_BOTH : ISOLATE_INACTIVE, |
1390 | zone, 0, file); | 1396 | zone, 0, file); |
1391 | zone->pages_scanned += nr_scanned; | 1397 | zone->pages_scanned += nr_scanned; |
1392 | if (current_is_kswapd()) | 1398 | if (current_is_kswapd()) |
@@ -1398,8 +1404,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1398 | } else { | 1404 | } else { |
1399 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | 1405 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, |
1400 | &page_list, &nr_scanned, sc->order, | 1406 | &page_list, &nr_scanned, sc->order, |
1401 | sc->lumpy_reclaim_mode & LUMPY_MODE_SINGLE ? | 1407 | sc->lumpy_reclaim_mode & LUMPY_MODE_CONTIGRECLAIM ? |
1402 | ISOLATE_INACTIVE : ISOLATE_BOTH, | 1408 | ISOLATE_BOTH : ISOLATE_INACTIVE, |
1403 | zone, sc->mem_cgroup, | 1409 | zone, sc->mem_cgroup, |
1404 | 0, file); | 1410 | 0, file); |
1405 | /* | 1411 | /* |
@@ -1815,6 +1821,57 @@ out: | |||
1815 | } | 1821 | } |
1816 | 1822 | ||
1817 | /* | 1823 | /* |
1824 | * Reclaim/compaction depends on a number of pages being freed. To avoid | ||
1825 | * disruption to the system, a small number of order-0 pages continue to be | ||
1826 | * rotated and reclaimed in the normal fashion. However, by the time we get | ||
1827 | * back to the allocator and call try_to_compact_zone(), we ensure that | ||
1828 | * there are enough free pages for it to be likely successful | ||
1829 | */ | ||
1830 | static inline bool should_continue_reclaim(struct zone *zone, | ||
1831 | unsigned long nr_reclaimed, | ||
1832 | unsigned long nr_scanned, | ||
1833 | struct scan_control *sc) | ||
1834 | { | ||
1835 | unsigned long pages_for_compaction; | ||
1836 | unsigned long inactive_lru_pages; | ||
1837 | |||
1838 | /* If not in reclaim/compaction mode, stop */ | ||
1839 | if (!(sc->lumpy_reclaim_mode & LUMPY_MODE_COMPACTION)) | ||
1840 | return false; | ||
1841 | |||
1842 | /* | ||
1843 | * If we failed to reclaim and have scanned the full list, stop. | ||
1844 | * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far | ||
1845 | * faster but obviously would be less likely to succeed | ||
1846 | * allocation. If this is desirable, use GFP_REPEAT to decide | ||
1847 | * if both reclaimed and scanned should be checked or just | ||
1848 | * reclaimed | ||
1849 | */ | ||
1850 | if (!nr_reclaimed && !nr_scanned) | ||
1851 | return false; | ||
1852 | |||
1853 | /* | ||
1854 | * If we have not reclaimed enough pages for compaction and the | ||
1855 | * inactive lists are large enough, continue reclaiming | ||
1856 | */ | ||
1857 | pages_for_compaction = (2UL << sc->order); | ||
1858 | inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON) + | ||
1859 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | ||
1860 | if (sc->nr_reclaimed < pages_for_compaction && | ||
1861 | inactive_lru_pages > pages_for_compaction) | ||
1862 | return true; | ||
1863 | |||
1864 | /* If compaction would go ahead or the allocation would succeed, stop */ | ||
1865 | switch (compaction_suitable(zone, sc->order)) { | ||
1866 | case COMPACT_PARTIAL: | ||
1867 | case COMPACT_CONTINUE: | ||
1868 | return false; | ||
1869 | default: | ||
1870 | return true; | ||
1871 | } | ||
1872 | } | ||
1873 | |||
1874 | /* | ||
1818 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1875 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1819 | */ | 1876 | */ |
1820 | static void shrink_zone(int priority, struct zone *zone, | 1877 | static void shrink_zone(int priority, struct zone *zone, |
@@ -1823,9 +1880,12 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1823 | unsigned long nr[NR_LRU_LISTS]; | 1880 | unsigned long nr[NR_LRU_LISTS]; |
1824 | unsigned long nr_to_scan; | 1881 | unsigned long nr_to_scan; |
1825 | enum lru_list l; | 1882 | enum lru_list l; |
1826 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1883 | unsigned long nr_reclaimed; |
1827 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 1884 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1885 | unsigned long nr_scanned = sc->nr_scanned; | ||
1828 | 1886 | ||
1887 | restart: | ||
1888 | nr_reclaimed = 0; | ||
1829 | get_scan_count(zone, sc, nr, priority); | 1889 | get_scan_count(zone, sc, nr, priority); |
1830 | 1890 | ||
1831 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1891 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -1851,8 +1911,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1851 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) | 1911 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) |
1852 | break; | 1912 | break; |
1853 | } | 1913 | } |
1854 | 1914 | sc->nr_reclaimed += nr_reclaimed; | |
1855 | sc->nr_reclaimed = nr_reclaimed; | ||
1856 | 1915 | ||
1857 | /* | 1916 | /* |
1858 | * Even if we did not try to evict anon pages at all, we want to | 1917 | * Even if we did not try to evict anon pages at all, we want to |
@@ -1861,6 +1920,11 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1861 | if (inactive_anon_is_low(zone, sc)) | 1920 | if (inactive_anon_is_low(zone, sc)) |
1862 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1921 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1863 | 1922 | ||
1923 | /* reclaim/compaction might need reclaim to continue */ | ||
1924 | if (should_continue_reclaim(zone, nr_reclaimed, | ||
1925 | sc->nr_scanned - nr_scanned, sc)) | ||
1926 | goto restart; | ||
1927 | |||
1864 | throttle_vm_writeout(sc->gfp_mask); | 1928 | throttle_vm_writeout(sc->gfp_mask); |
1865 | } | 1929 | } |
1866 | 1930 | ||
@@ -2307,6 +2371,14 @@ loop_again: | |||
2307 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) | 2371 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
2308 | sc.may_writepage = 1; | 2372 | sc.may_writepage = 1; |
2309 | 2373 | ||
2374 | /* | ||
2375 | * Compact the zone for higher orders to reduce | ||
2376 | * latencies for higher-order allocations that | ||
2377 | * would ordinarily call try_to_compact_pages() | ||
2378 | */ | ||
2379 | if (sc.order > PAGE_ALLOC_COSTLY_ORDER) | ||
2380 | compact_zone_order(zone, sc.order, sc.gfp_mask); | ||
2381 | |||
2310 | if (!zone_watermark_ok_safe(zone, order, | 2382 | if (!zone_watermark_ok_safe(zone, order, |
2311 | high_wmark_pages(zone), end_zone, 0)) { | 2383 | high_wmark_pages(zone), end_zone, 0)) { |
2312 | all_zones_ok = 0; | 2384 | all_zones_ok = 0; |