aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-05-26 19:25:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-26 20:12:35 -0400
commit246e87a9393448c20873bc5dee64be68ed559e24 (patch)
treea17016142b267fcba2e3be9908f8138c8dcb3f3a /mm
parent889976dbcb1218119fdd950fb7819084e37d7d37 (diff)
memcg: fix get_scan_count() for small targets
During memory reclaim we determine the number of pages to be scanned per zone as (anon + file) >> priority. Assume scan = (anon + file) >> priority. If scan < SWAP_CLUSTER_MAX, the scan will be skipped for this time and priority gets higher. This has some problems. 1. This increases priority as 1 without any scan. To do scan in this priority, amount of pages should be larger than 512M. If pages>>priority < SWAP_CLUSTER_MAX, it's recorded and scan will be batched, later. (But we lose 1 priority.) If memory size is below 16M, pages >> priority is 0 and no scan in DEF_PRIORITY forever. 2. If zone->all_unreclaimabe==true, it's scanned only when priority==0. So, x86's ZONE_DMA will never be recoverred until the user of pages frees memory by itself. 3. With memcg, the limit of memory can be small. When using small memcg, it gets priority < DEF_PRIORITY-2 very easily and need to call wait_iff_congested(). For doing scan before priorty=9, 64MB of memory should be used. Then, this patch tries to scan SWAP_CLUSTER_MAX of pages in force...when 1. the target is enough small. 2. it's kswapd or memcg reclaim. Then we can avoid rapid priority drop and may be able to recover all_unreclaimable in a small zones. And this patch removes nr_saved_scan. This will allow scanning in this priority even when pages >> priority is very small. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Ying Han <yinghan@google.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/vmscan.c60
2 files changed, 34 insertions, 30 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2a00f17c3bf4..a4e1db3f1981 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4323,10 +4323,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
4323 zone->zone_pgdat = pgdat; 4323 zone->zone_pgdat = pgdat;
4324 4324
4325 zone_pcp_init(zone); 4325 zone_pcp_init(zone);
4326 for_each_lru(l) { 4326 for_each_lru(l)
4327 INIT_LIST_HEAD(&zone->lru[l].list); 4327 INIT_LIST_HEAD(&zone->lru[l].list);
4328 zone->reclaim_stat.nr_saved_scan[l] = 0;
4329 }
4330 zone->reclaim_stat.recent_rotated[0] = 0; 4328 zone->reclaim_stat.recent_rotated[0] = 0;
4331 zone->reclaim_stat.recent_rotated[1] = 0; 4329 zone->reclaim_stat.recent_rotated[1] = 0;
4332 zone->reclaim_stat.recent_scanned[0] = 0; 4330 zone->reclaim_stat.recent_scanned[0] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b0875871820d..2e8fbacd8744 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1718,26 +1718,6 @@ static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1718} 1718}
1719 1719
1720/* 1720/*
1721 * Smallish @nr_to_scan's are deposited in @nr_saved_scan,
1722 * until we collected @swap_cluster_max pages to scan.
1723 */
1724static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
1725 unsigned long *nr_saved_scan)
1726{
1727 unsigned long nr;
1728
1729 *nr_saved_scan += nr_to_scan;
1730 nr = *nr_saved_scan;
1731
1732 if (nr >= SWAP_CLUSTER_MAX)
1733 *nr_saved_scan = 0;
1734 else
1735 nr = 0;
1736
1737 return nr;
1738}
1739
1740/*
1741 * Determine how aggressively the anon and file LRU lists should be 1721 * Determine how aggressively the anon and file LRU lists should be
1742 * scanned. The relative value of each set of LRU lists is determined 1722 * scanned. The relative value of each set of LRU lists is determined
1743 * by looking at the fraction of the pages scanned we did rotate back 1723 * by looking at the fraction of the pages scanned we did rotate back
@@ -1755,6 +1735,22 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1755 u64 fraction[2], denominator; 1735 u64 fraction[2], denominator;
1756 enum lru_list l; 1736 enum lru_list l;
1757 int noswap = 0; 1737 int noswap = 0;
1738 int force_scan = 0;
1739
1740
1741 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1742 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1743 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1744 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1745
1746 if (((anon + file) >> priority) < SWAP_CLUSTER_MAX) {
1747 /* kswapd does zone balancing and need to scan this zone */
1748 if (scanning_global_lru(sc) && current_is_kswapd())
1749 force_scan = 1;
1750 /* memcg may have small limit and need to avoid priority drop */
1751 if (!scanning_global_lru(sc))
1752 force_scan = 1;
1753 }
1758 1754
1759 /* If we have no swap space, do not bother scanning anon pages. */ 1755 /* If we have no swap space, do not bother scanning anon pages. */
1760 if (!sc->may_swap || (nr_swap_pages <= 0)) { 1756 if (!sc->may_swap || (nr_swap_pages <= 0)) {
@@ -1765,11 +1761,6 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1765 goto out; 1761 goto out;
1766 } 1762 }
1767 1763
1768 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1769 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1770 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
1771 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
1772
1773 if (scanning_global_lru(sc)) { 1764 if (scanning_global_lru(sc)) {
1774 free = zone_page_state(zone, NR_FREE_PAGES); 1765 free = zone_page_state(zone, NR_FREE_PAGES);
1775 /* If we have very few page cache pages, 1766 /* If we have very few page cache pages,
@@ -1836,8 +1827,23 @@ out:
1836 scan >>= priority; 1827 scan >>= priority;
1837 scan = div64_u64(scan * fraction[file], denominator); 1828 scan = div64_u64(scan * fraction[file], denominator);
1838 } 1829 }
1839 nr[l] = nr_scan_try_batch(scan, 1830
1840 &reclaim_stat->nr_saved_scan[l]); 1831 /*
1832 * If zone is small or memcg is small, nr[l] can be 0.
1833 * This results no-scan on this priority and priority drop down.
1834 * For global direct reclaim, it can visit next zone and tend
1835 * not to have problems. For global kswapd, it's for zone
1836 * balancing and it need to scan a small amounts. When using
1837 * memcg, priority drop can cause big latency. So, it's better
1838 * to scan small amount. See may_noscan above.
1839 */
1840 if (!scan && force_scan) {
1841 if (file)
1842 scan = SWAP_CLUSTER_MAX;
1843 else if (!noswap)
1844 scan = SWAP_CLUSTER_MAX;
1845 }
1846 nr[l] = scan;
1841 } 1847 }
1842} 1848}
1843 1849