diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 321 |
1 files changed, 149 insertions, 172 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 777af57fd8c8..885207a6b6b7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -55,6 +55,11 @@ struct scan_control { | |||
55 | /* Number of pages freed so far during a call to shrink_zones() */ | 55 | /* Number of pages freed so far during a call to shrink_zones() */ |
56 | unsigned long nr_reclaimed; | 56 | unsigned long nr_reclaimed; |
57 | 57 | ||
58 | /* How many pages shrink_list() should reclaim */ | ||
59 | unsigned long nr_to_reclaim; | ||
60 | |||
61 | unsigned long hibernation_mode; | ||
62 | |||
58 | /* This context's GFP mask */ | 63 | /* This context's GFP mask */ |
59 | gfp_t gfp_mask; | 64 | gfp_t gfp_mask; |
60 | 65 | ||
@@ -66,12 +71,6 @@ struct scan_control { | |||
66 | /* Can pages be swapped as part of reclaim? */ | 71 | /* Can pages be swapped as part of reclaim? */ |
67 | int may_swap; | 72 | int may_swap; |
68 | 73 | ||
69 | /* This context's SWAP_CLUSTER_MAX. If freeing memory for | ||
70 | * suspend, we effectively ignore SWAP_CLUSTER_MAX. | ||
71 | * In this context, it doesn't matter that we scan the | ||
72 | * whole list at once. */ | ||
73 | int swap_cluster_max; | ||
74 | |||
75 | int swappiness; | 74 | int swappiness; |
76 | 75 | ||
77 | int all_unreclaimable; | 76 | int all_unreclaimable; |
@@ -358,7 +357,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
358 | * stalls if we need to run get_block(). We could test | 357 | * stalls if we need to run get_block(). We could test |
359 | * PagePrivate for that. | 358 | * PagePrivate for that. |
360 | * | 359 | * |
361 | * If this process is currently in generic_file_write() against | 360 | * If this process is currently in __generic_file_aio_write() against |
362 | * this page's queue, we can perform writeback even if that | 361 | * this page's queue, we can perform writeback even if that |
363 | * will block. | 362 | * will block. |
364 | * | 363 | * |
@@ -1132,7 +1131,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1132 | unsigned long nr_anon; | 1131 | unsigned long nr_anon; |
1133 | unsigned long nr_file; | 1132 | unsigned long nr_file; |
1134 | 1133 | ||
1135 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 1134 | nr_taken = sc->isolate_pages(SWAP_CLUSTER_MAX, |
1136 | &page_list, &nr_scan, sc->order, mode, | 1135 | &page_list, &nr_scan, sc->order, mode, |
1137 | zone, sc->mem_cgroup, 0, file); | 1136 | zone, sc->mem_cgroup, 0, file); |
1138 | 1137 | ||
@@ -1166,10 +1165,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
1166 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); | 1165 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); |
1167 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); | 1166 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); |
1168 | 1167 | ||
1169 | reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON]; | 1168 | reclaim_stat->recent_scanned[0] += nr_anon; |
1170 | reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON]; | 1169 | reclaim_stat->recent_scanned[1] += nr_file; |
1171 | reclaim_stat->recent_scanned[1] += count[LRU_INACTIVE_FILE]; | ||
1172 | reclaim_stat->recent_scanned[1] += count[LRU_ACTIVE_FILE]; | ||
1173 | 1170 | ||
1174 | spin_unlock_irq(&zone->lru_lock); | 1171 | spin_unlock_irq(&zone->lru_lock); |
1175 | 1172 | ||
@@ -1464,20 +1461,26 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | |||
1464 | return low; | 1461 | return low; |
1465 | } | 1462 | } |
1466 | 1463 | ||
1464 | static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, | ||
1465 | int file) | ||
1466 | { | ||
1467 | if (file) | ||
1468 | return inactive_file_is_low(zone, sc); | ||
1469 | else | ||
1470 | return inactive_anon_is_low(zone, sc); | ||
1471 | } | ||
1472 | |||
1467 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1473 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1468 | struct zone *zone, struct scan_control *sc, int priority) | 1474 | struct zone *zone, struct scan_control *sc, int priority) |
1469 | { | 1475 | { |
1470 | int file = is_file_lru(lru); | 1476 | int file = is_file_lru(lru); |
1471 | 1477 | ||
1472 | if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) { | 1478 | if (is_active_lru(lru)) { |
1473 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1479 | if (inactive_list_is_low(zone, sc, file)) |
1480 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
1474 | return 0; | 1481 | return 0; |
1475 | } | 1482 | } |
1476 | 1483 | ||
1477 | if (lru == LRU_ACTIVE_ANON && inactive_anon_is_low(zone, sc)) { | ||
1478 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
1479 | return 0; | ||
1480 | } | ||
1481 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); | 1484 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); |
1482 | } | 1485 | } |
1483 | 1486 | ||
@@ -1567,15 +1570,14 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | |||
1567 | * until we collected @swap_cluster_max pages to scan. | 1570 | * until we collected @swap_cluster_max pages to scan. |
1568 | */ | 1571 | */ |
1569 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, | 1572 | static unsigned long nr_scan_try_batch(unsigned long nr_to_scan, |
1570 | unsigned long *nr_saved_scan, | 1573 | unsigned long *nr_saved_scan) |
1571 | unsigned long swap_cluster_max) | ||
1572 | { | 1574 | { |
1573 | unsigned long nr; | 1575 | unsigned long nr; |
1574 | 1576 | ||
1575 | *nr_saved_scan += nr_to_scan; | 1577 | *nr_saved_scan += nr_to_scan; |
1576 | nr = *nr_saved_scan; | 1578 | nr = *nr_saved_scan; |
1577 | 1579 | ||
1578 | if (nr >= swap_cluster_max) | 1580 | if (nr >= SWAP_CLUSTER_MAX) |
1579 | *nr_saved_scan = 0; | 1581 | *nr_saved_scan = 0; |
1580 | else | 1582 | else |
1581 | nr = 0; | 1583 | nr = 0; |
@@ -1594,7 +1596,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1594 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ | 1596 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ |
1595 | enum lru_list l; | 1597 | enum lru_list l; |
1596 | unsigned long nr_reclaimed = sc->nr_reclaimed; | 1598 | unsigned long nr_reclaimed = sc->nr_reclaimed; |
1597 | unsigned long swap_cluster_max = sc->swap_cluster_max; | 1599 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1598 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1600 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1599 | int noswap = 0; | 1601 | int noswap = 0; |
1600 | 1602 | ||
@@ -1616,15 +1618,15 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1616 | scan = (scan * percent[file]) / 100; | 1618 | scan = (scan * percent[file]) / 100; |
1617 | } | 1619 | } |
1618 | nr[l] = nr_scan_try_batch(scan, | 1620 | nr[l] = nr_scan_try_batch(scan, |
1619 | &reclaim_stat->nr_saved_scan[l], | 1621 | &reclaim_stat->nr_saved_scan[l]); |
1620 | swap_cluster_max); | ||
1621 | } | 1622 | } |
1622 | 1623 | ||
1623 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1624 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1624 | nr[LRU_INACTIVE_FILE]) { | 1625 | nr[LRU_INACTIVE_FILE]) { |
1625 | for_each_evictable_lru(l) { | 1626 | for_each_evictable_lru(l) { |
1626 | if (nr[l]) { | 1627 | if (nr[l]) { |
1627 | nr_to_scan = min(nr[l], swap_cluster_max); | 1628 | nr_to_scan = min_t(unsigned long, |
1629 | nr[l], SWAP_CLUSTER_MAX); | ||
1628 | nr[l] -= nr_to_scan; | 1630 | nr[l] -= nr_to_scan; |
1629 | 1631 | ||
1630 | nr_reclaimed += shrink_list(l, nr_to_scan, | 1632 | nr_reclaimed += shrink_list(l, nr_to_scan, |
@@ -1639,8 +1641,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1639 | * with multiple processes reclaiming pages, the total | 1641 | * with multiple processes reclaiming pages, the total |
1640 | * freeing target can get unreasonably large. | 1642 | * freeing target can get unreasonably large. |
1641 | */ | 1643 | */ |
1642 | if (nr_reclaimed > swap_cluster_max && | 1644 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) |
1643 | priority < DEF_PRIORITY && !current_is_kswapd()) | ||
1644 | break; | 1645 | break; |
1645 | } | 1646 | } |
1646 | 1647 | ||
@@ -1738,6 +1739,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1738 | struct zoneref *z; | 1739 | struct zoneref *z; |
1739 | struct zone *zone; | 1740 | struct zone *zone; |
1740 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | 1741 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); |
1742 | unsigned long writeback_threshold; | ||
1741 | 1743 | ||
1742 | delayacct_freepages_start(); | 1744 | delayacct_freepages_start(); |
1743 | 1745 | ||
@@ -1773,7 +1775,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1773 | } | 1775 | } |
1774 | } | 1776 | } |
1775 | total_scanned += sc->nr_scanned; | 1777 | total_scanned += sc->nr_scanned; |
1776 | if (sc->nr_reclaimed >= sc->swap_cluster_max) { | 1778 | if (sc->nr_reclaimed >= sc->nr_to_reclaim) { |
1777 | ret = sc->nr_reclaimed; | 1779 | ret = sc->nr_reclaimed; |
1778 | goto out; | 1780 | goto out; |
1779 | } | 1781 | } |
@@ -1785,14 +1787,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1785 | * that's undesirable in laptop mode, where we *want* lumpy | 1787 | * that's undesirable in laptop mode, where we *want* lumpy |
1786 | * writeout. So in laptop mode, write out the whole world. | 1788 | * writeout. So in laptop mode, write out the whole world. |
1787 | */ | 1789 | */ |
1788 | if (total_scanned > sc->swap_cluster_max + | 1790 | writeback_threshold = sc->nr_to_reclaim + sc->nr_to_reclaim / 2; |
1789 | sc->swap_cluster_max / 2) { | 1791 | if (total_scanned > writeback_threshold) { |
1790 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); | 1792 | wakeup_flusher_threads(laptop_mode ? 0 : total_scanned); |
1791 | sc->may_writepage = 1; | 1793 | sc->may_writepage = 1; |
1792 | } | 1794 | } |
1793 | 1795 | ||
1794 | /* Take a nap, wait for some writeback to complete */ | 1796 | /* Take a nap, wait for some writeback to complete */ |
1795 | if (sc->nr_scanned && priority < DEF_PRIORITY - 2) | 1797 | if (!sc->hibernation_mode && sc->nr_scanned && |
1798 | priority < DEF_PRIORITY - 2) | ||
1796 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1799 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
1797 | } | 1800 | } |
1798 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 1801 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
@@ -1831,7 +1834,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1831 | struct scan_control sc = { | 1834 | struct scan_control sc = { |
1832 | .gfp_mask = gfp_mask, | 1835 | .gfp_mask = gfp_mask, |
1833 | .may_writepage = !laptop_mode, | 1836 | .may_writepage = !laptop_mode, |
1834 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1837 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
1835 | .may_unmap = 1, | 1838 | .may_unmap = 1, |
1836 | .may_swap = 1, | 1839 | .may_swap = 1, |
1837 | .swappiness = vm_swappiness, | 1840 | .swappiness = vm_swappiness, |
@@ -1855,7 +1858,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
1855 | .may_writepage = !laptop_mode, | 1858 | .may_writepage = !laptop_mode, |
1856 | .may_unmap = 1, | 1859 | .may_unmap = 1, |
1857 | .may_swap = !noswap, | 1860 | .may_swap = !noswap, |
1858 | .swap_cluster_max = SWAP_CLUSTER_MAX, | ||
1859 | .swappiness = swappiness, | 1861 | .swappiness = swappiness, |
1860 | .order = 0, | 1862 | .order = 0, |
1861 | .mem_cgroup = mem, | 1863 | .mem_cgroup = mem, |
@@ -1889,7 +1891,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1889 | .may_writepage = !laptop_mode, | 1891 | .may_writepage = !laptop_mode, |
1890 | .may_unmap = 1, | 1892 | .may_unmap = 1, |
1891 | .may_swap = !noswap, | 1893 | .may_swap = !noswap, |
1892 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1894 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
1893 | .swappiness = swappiness, | 1895 | .swappiness = swappiness, |
1894 | .order = 0, | 1896 | .order = 0, |
1895 | .mem_cgroup = mem_cont, | 1897 | .mem_cgroup = mem_cont, |
@@ -1904,6 +1906,30 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1904 | } | 1906 | } |
1905 | #endif | 1907 | #endif |
1906 | 1908 | ||
1909 | /* is kswapd sleeping prematurely? */ | ||
1910 | static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | ||
1911 | { | ||
1912 | int i; | ||
1913 | |||
1914 | /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ | ||
1915 | if (remaining) | ||
1916 | return 1; | ||
1917 | |||
1918 | /* If after HZ/10, a zone is below the high mark, it's premature */ | ||
1919 | for (i = 0; i < pgdat->nr_zones; i++) { | ||
1920 | struct zone *zone = pgdat->node_zones + i; | ||
1921 | |||
1922 | if (!populated_zone(zone)) | ||
1923 | continue; | ||
1924 | |||
1925 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), | ||
1926 | 0, 0)) | ||
1927 | return 1; | ||
1928 | } | ||
1929 | |||
1930 | return 0; | ||
1931 | } | ||
1932 | |||
1907 | /* | 1933 | /* |
1908 | * For kswapd, balance_pgdat() will work across all this node's zones until | 1934 | * For kswapd, balance_pgdat() will work across all this node's zones until |
1909 | * they are all at high_wmark_pages(zone). | 1935 | * they are all at high_wmark_pages(zone). |
@@ -1936,7 +1962,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1936 | .gfp_mask = GFP_KERNEL, | 1962 | .gfp_mask = GFP_KERNEL, |
1937 | .may_unmap = 1, | 1963 | .may_unmap = 1, |
1938 | .may_swap = 1, | 1964 | .may_swap = 1, |
1939 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1965 | /* |
1966 | * kswapd doesn't want to be bailed out while reclaim. because | ||
1967 | * we want to put equal scanning pressure on each zone. | ||
1968 | */ | ||
1969 | .nr_to_reclaim = ULONG_MAX, | ||
1940 | .swappiness = vm_swappiness, | 1970 | .swappiness = vm_swappiness, |
1941 | .order = order, | 1971 | .order = order, |
1942 | .mem_cgroup = NULL, | 1972 | .mem_cgroup = NULL, |
@@ -1961,6 +1991,7 @@ loop_again: | |||
1961 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 1991 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
1962 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 1992 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
1963 | unsigned long lru_pages = 0; | 1993 | unsigned long lru_pages = 0; |
1994 | int has_under_min_watermark_zone = 0; | ||
1964 | 1995 | ||
1965 | /* The swap token gets in the way of swapout... */ | 1996 | /* The swap token gets in the way of swapout... */ |
1966 | if (!priority) | 1997 | if (!priority) |
@@ -2067,6 +2098,15 @@ loop_again: | |||
2067 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && | 2098 | if (total_scanned > SWAP_CLUSTER_MAX * 2 && |
2068 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) | 2099 | total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) |
2069 | sc.may_writepage = 1; | 2100 | sc.may_writepage = 1; |
2101 | |||
2102 | /* | ||
2103 | * We are still under min water mark. it mean we have | ||
2104 | * GFP_ATOMIC allocation failure risk. Hurry up! | ||
2105 | */ | ||
2106 | if (!zone_watermark_ok(zone, order, min_wmark_pages(zone), | ||
2107 | end_zone, 0)) | ||
2108 | has_under_min_watermark_zone = 1; | ||
2109 | |||
2070 | } | 2110 | } |
2071 | if (all_zones_ok) | 2111 | if (all_zones_ok) |
2072 | break; /* kswapd: all done */ | 2112 | break; /* kswapd: all done */ |
@@ -2074,8 +2114,12 @@ loop_again: | |||
2074 | * OK, kswapd is getting into trouble. Take a nap, then take | 2114 | * OK, kswapd is getting into trouble. Take a nap, then take |
2075 | * another pass across the zones. | 2115 | * another pass across the zones. |
2076 | */ | 2116 | */ |
2077 | if (total_scanned && priority < DEF_PRIORITY - 2) | 2117 | if (total_scanned && (priority < DEF_PRIORITY - 2)) { |
2078 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 2118 | if (has_under_min_watermark_zone) |
2119 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | ||
2120 | else | ||
2121 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
2122 | } | ||
2079 | 2123 | ||
2080 | /* | 2124 | /* |
2081 | * We do this so kswapd doesn't build up large priorities for | 2125 | * We do this so kswapd doesn't build up large priorities for |
@@ -2173,6 +2217,7 @@ static int kswapd(void *p) | |||
2173 | order = 0; | 2217 | order = 0; |
2174 | for ( ; ; ) { | 2218 | for ( ; ; ) { |
2175 | unsigned long new_order; | 2219 | unsigned long new_order; |
2220 | int ret; | ||
2176 | 2221 | ||
2177 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | 2222 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); |
2178 | new_order = pgdat->kswapd_max_order; | 2223 | new_order = pgdat->kswapd_max_order; |
@@ -2184,19 +2229,45 @@ static int kswapd(void *p) | |||
2184 | */ | 2229 | */ |
2185 | order = new_order; | 2230 | order = new_order; |
2186 | } else { | 2231 | } else { |
2187 | if (!freezing(current)) | 2232 | if (!freezing(current) && !kthread_should_stop()) { |
2188 | schedule(); | 2233 | long remaining = 0; |
2234 | |||
2235 | /* Try to sleep for a short interval */ | ||
2236 | if (!sleeping_prematurely(pgdat, order, remaining)) { | ||
2237 | remaining = schedule_timeout(HZ/10); | ||
2238 | finish_wait(&pgdat->kswapd_wait, &wait); | ||
2239 | prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); | ||
2240 | } | ||
2241 | |||
2242 | /* | ||
2243 | * After a short sleep, check if it was a | ||
2244 | * premature sleep. If not, then go fully | ||
2245 | * to sleep until explicitly woken up | ||
2246 | */ | ||
2247 | if (!sleeping_prematurely(pgdat, order, remaining)) | ||
2248 | schedule(); | ||
2249 | else { | ||
2250 | if (remaining) | ||
2251 | count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY); | ||
2252 | else | ||
2253 | count_vm_event(KSWAPD_HIGH_WMARK_HIT_QUICKLY); | ||
2254 | } | ||
2255 | } | ||
2189 | 2256 | ||
2190 | order = pgdat->kswapd_max_order; | 2257 | order = pgdat->kswapd_max_order; |
2191 | } | 2258 | } |
2192 | finish_wait(&pgdat->kswapd_wait, &wait); | 2259 | finish_wait(&pgdat->kswapd_wait, &wait); |
2193 | 2260 | ||
2194 | if (!try_to_freeze()) { | 2261 | ret = try_to_freeze(); |
2195 | /* We can speed up thawing tasks if we don't call | 2262 | if (kthread_should_stop()) |
2196 | * balance_pgdat after returning from the refrigerator | 2263 | break; |
2197 | */ | 2264 | |
2265 | /* | ||
2266 | * We can speed up thawing tasks if we don't call balance_pgdat | ||
2267 | * after returning from the refrigerator | ||
2268 | */ | ||
2269 | if (!ret) | ||
2198 | balance_pgdat(pgdat, order); | 2270 | balance_pgdat(pgdat, order); |
2199 | } | ||
2200 | } | 2271 | } |
2201 | return 0; | 2272 | return 0; |
2202 | } | 2273 | } |
@@ -2260,148 +2331,43 @@ unsigned long zone_reclaimable_pages(struct zone *zone) | |||
2260 | 2331 | ||
2261 | #ifdef CONFIG_HIBERNATION | 2332 | #ifdef CONFIG_HIBERNATION |
2262 | /* | 2333 | /* |
2263 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages | 2334 | * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of |
2264 | * from LRU lists system-wide, for given pass and priority. | ||
2265 | * | ||
2266 | * For pass > 3 we also try to shrink the LRU lists that contain a few pages | ||
2267 | */ | ||
2268 | static void shrink_all_zones(unsigned long nr_pages, int prio, | ||
2269 | int pass, struct scan_control *sc) | ||
2270 | { | ||
2271 | struct zone *zone; | ||
2272 | unsigned long nr_reclaimed = 0; | ||
2273 | struct zone_reclaim_stat *reclaim_stat; | ||
2274 | |||
2275 | for_each_populated_zone(zone) { | ||
2276 | enum lru_list l; | ||
2277 | |||
2278 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) | ||
2279 | continue; | ||
2280 | |||
2281 | for_each_evictable_lru(l) { | ||
2282 | enum zone_stat_item ls = NR_LRU_BASE + l; | ||
2283 | unsigned long lru_pages = zone_page_state(zone, ls); | ||
2284 | |||
2285 | /* For pass = 0, we don't shrink the active list */ | ||
2286 | if (pass == 0 && (l == LRU_ACTIVE_ANON || | ||
2287 | l == LRU_ACTIVE_FILE)) | ||
2288 | continue; | ||
2289 | |||
2290 | reclaim_stat = get_reclaim_stat(zone, sc); | ||
2291 | reclaim_stat->nr_saved_scan[l] += | ||
2292 | (lru_pages >> prio) + 1; | ||
2293 | if (reclaim_stat->nr_saved_scan[l] | ||
2294 | >= nr_pages || pass > 3) { | ||
2295 | unsigned long nr_to_scan; | ||
2296 | |||
2297 | reclaim_stat->nr_saved_scan[l] = 0; | ||
2298 | nr_to_scan = min(nr_pages, lru_pages); | ||
2299 | nr_reclaimed += shrink_list(l, nr_to_scan, zone, | ||
2300 | sc, prio); | ||
2301 | if (nr_reclaimed >= nr_pages) { | ||
2302 | sc->nr_reclaimed += nr_reclaimed; | ||
2303 | return; | ||
2304 | } | ||
2305 | } | ||
2306 | } | ||
2307 | } | ||
2308 | sc->nr_reclaimed += nr_reclaimed; | ||
2309 | } | ||
2310 | |||
2311 | /* | ||
2312 | * Try to free `nr_pages' of memory, system-wide, and return the number of | ||
2313 | * freed pages. | 2335 | * freed pages. |
2314 | * | 2336 | * |
2315 | * Rather than trying to age LRUs the aim is to preserve the overall | 2337 | * Rather than trying to age LRUs the aim is to preserve the overall |
2316 | * LRU order by reclaiming preferentially | 2338 | * LRU order by reclaiming preferentially |
2317 | * inactive > active > active referenced > active mapped | 2339 | * inactive > active > active referenced > active mapped |
2318 | */ | 2340 | */ |
2319 | unsigned long shrink_all_memory(unsigned long nr_pages) | 2341 | unsigned long shrink_all_memory(unsigned long nr_to_reclaim) |
2320 | { | 2342 | { |
2321 | unsigned long lru_pages, nr_slab; | ||
2322 | int pass; | ||
2323 | struct reclaim_state reclaim_state; | 2343 | struct reclaim_state reclaim_state; |
2324 | struct scan_control sc = { | 2344 | struct scan_control sc = { |
2325 | .gfp_mask = GFP_KERNEL, | 2345 | .gfp_mask = GFP_HIGHUSER_MOVABLE, |
2326 | .may_unmap = 0, | 2346 | .may_swap = 1, |
2347 | .may_unmap = 1, | ||
2327 | .may_writepage = 1, | 2348 | .may_writepage = 1, |
2349 | .nr_to_reclaim = nr_to_reclaim, | ||
2350 | .hibernation_mode = 1, | ||
2351 | .swappiness = vm_swappiness, | ||
2352 | .order = 0, | ||
2328 | .isolate_pages = isolate_pages_global, | 2353 | .isolate_pages = isolate_pages_global, |
2329 | .nr_reclaimed = 0, | ||
2330 | }; | 2354 | }; |
2355 | struct zonelist * zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); | ||
2356 | struct task_struct *p = current; | ||
2357 | unsigned long nr_reclaimed; | ||
2331 | 2358 | ||
2332 | current->reclaim_state = &reclaim_state; | 2359 | p->flags |= PF_MEMALLOC; |
2333 | 2360 | lockdep_set_current_reclaim_state(sc.gfp_mask); | |
2334 | lru_pages = global_reclaimable_pages(); | 2361 | reclaim_state.reclaimed_slab = 0; |
2335 | nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); | 2362 | p->reclaim_state = &reclaim_state; |
2336 | /* If slab caches are huge, it's better to hit them first */ | ||
2337 | while (nr_slab >= lru_pages) { | ||
2338 | reclaim_state.reclaimed_slab = 0; | ||
2339 | shrink_slab(nr_pages, sc.gfp_mask, lru_pages); | ||
2340 | if (!reclaim_state.reclaimed_slab) | ||
2341 | break; | ||
2342 | |||
2343 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
2344 | if (sc.nr_reclaimed >= nr_pages) | ||
2345 | goto out; | ||
2346 | |||
2347 | nr_slab -= reclaim_state.reclaimed_slab; | ||
2348 | } | ||
2349 | |||
2350 | /* | ||
2351 | * We try to shrink LRUs in 5 passes: | ||
2352 | * 0 = Reclaim from inactive_list only | ||
2353 | * 1 = Reclaim from active list but don't reclaim mapped | ||
2354 | * 2 = 2nd pass of type 1 | ||
2355 | * 3 = Reclaim mapped (normal reclaim) | ||
2356 | * 4 = 2nd pass of type 3 | ||
2357 | */ | ||
2358 | for (pass = 0; pass < 5; pass++) { | ||
2359 | int prio; | ||
2360 | |||
2361 | /* Force reclaiming mapped pages in the passes #3 and #4 */ | ||
2362 | if (pass > 2) | ||
2363 | sc.may_unmap = 1; | ||
2364 | |||
2365 | for (prio = DEF_PRIORITY; prio >= 0; prio--) { | ||
2366 | unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed; | ||
2367 | |||
2368 | sc.nr_scanned = 0; | ||
2369 | sc.swap_cluster_max = nr_to_scan; | ||
2370 | shrink_all_zones(nr_to_scan, prio, pass, &sc); | ||
2371 | if (sc.nr_reclaimed >= nr_pages) | ||
2372 | goto out; | ||
2373 | |||
2374 | reclaim_state.reclaimed_slab = 0; | ||
2375 | shrink_slab(sc.nr_scanned, sc.gfp_mask, | ||
2376 | global_reclaimable_pages()); | ||
2377 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
2378 | if (sc.nr_reclaimed >= nr_pages) | ||
2379 | goto out; | ||
2380 | |||
2381 | if (sc.nr_scanned && prio < DEF_PRIORITY - 2) | ||
2382 | congestion_wait(BLK_RW_ASYNC, HZ / 10); | ||
2383 | } | ||
2384 | } | ||
2385 | |||
2386 | /* | ||
2387 | * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be | ||
2388 | * something in slab caches | ||
2389 | */ | ||
2390 | if (!sc.nr_reclaimed) { | ||
2391 | do { | ||
2392 | reclaim_state.reclaimed_slab = 0; | ||
2393 | shrink_slab(nr_pages, sc.gfp_mask, | ||
2394 | global_reclaimable_pages()); | ||
2395 | sc.nr_reclaimed += reclaim_state.reclaimed_slab; | ||
2396 | } while (sc.nr_reclaimed < nr_pages && | ||
2397 | reclaim_state.reclaimed_slab > 0); | ||
2398 | } | ||
2399 | 2363 | ||
2364 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | ||
2400 | 2365 | ||
2401 | out: | 2366 | p->reclaim_state = NULL; |
2402 | current->reclaim_state = NULL; | 2367 | lockdep_clear_current_reclaim_state(); |
2368 | p->flags &= ~PF_MEMALLOC; | ||
2403 | 2369 | ||
2404 | return sc.nr_reclaimed; | 2370 | return nr_reclaimed; |
2405 | } | 2371 | } |
2406 | #endif /* CONFIG_HIBERNATION */ | 2372 | #endif /* CONFIG_HIBERNATION */ |
2407 | 2373 | ||
@@ -2451,6 +2417,17 @@ int kswapd_run(int nid) | |||
2451 | return ret; | 2417 | return ret; |
2452 | } | 2418 | } |
2453 | 2419 | ||
2420 | /* | ||
2421 | * Called by memory hotplug when all memory in a node is offlined. | ||
2422 | */ | ||
2423 | void kswapd_stop(int nid) | ||
2424 | { | ||
2425 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; | ||
2426 | |||
2427 | if (kswapd) | ||
2428 | kthread_stop(kswapd); | ||
2429 | } | ||
2430 | |||
2454 | static int __init kswapd_init(void) | 2431 | static int __init kswapd_init(void) |
2455 | { | 2432 | { |
2456 | int nid; | 2433 | int nid; |
@@ -2553,8 +2530,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2553 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 2530 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
2554 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 2531 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
2555 | .may_swap = 1, | 2532 | .may_swap = 1, |
2556 | .swap_cluster_max = max_t(unsigned long, nr_pages, | 2533 | .nr_to_reclaim = max_t(unsigned long, nr_pages, |
2557 | SWAP_CLUSTER_MAX), | 2534 | SWAP_CLUSTER_MAX), |
2558 | .gfp_mask = gfp_mask, | 2535 | .gfp_mask = gfp_mask, |
2559 | .swappiness = vm_swappiness, | 2536 | .swappiness = vm_swappiness, |
2560 | .order = order, | 2537 | .order = order, |