aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <jweiner@redhat.com>2012-01-12 20:17:52 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:04 -0500
commitf16015fbf2f6ac45505d6ad21455ff9f6c14473d (patch)
treead9c7574566ed492f2b2d9ff655bb90773442c05 /mm
parent89b5fae5368f6aec62fb09c8e19b6c61f1154603 (diff)
mm: vmscan: distinguish between memcg triggering reclaim and memcg being scanned
Memory cgroup hierarchies are currently handled completely outside of the traditional reclaim code, which is invoked with a single memory cgroup as an argument for the whole call stack. Subsequent patches will switch this code to do hierarchical reclaim, so there needs to be a distinction between a) the memory cgroup that is triggering reclaim due to hitting its limit and b) the memory cgroup that is being scanned as a child of a). This patch introduces a struct mem_cgroup_zone that contains the combination of the memory cgroup and the zone being scanned, which is then passed down the stack instead of the zone argument. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Johannes Weiner <jweiner@redhat.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Ying Han <yinghan@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/vmscan.c249
1 files changed, 141 insertions, 108 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ee4a46b8ae33..e0627d07c3ac 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -103,8 +103,11 @@ struct scan_control {
103 */ 103 */
104 reclaim_mode_t reclaim_mode; 104 reclaim_mode_t reclaim_mode;
105 105
106 /* Which cgroup do we reclaim from */ 106 /*
107 struct mem_cgroup *mem_cgroup; 107 * The memory cgroup that hit its limit and as a result is the
108 * primary target of this reclaim invocation.
109 */
110 struct mem_cgroup *target_mem_cgroup;
108 111
109 /* 112 /*
110 * Nodemask of nodes allowed by the caller. If NULL, all nodes 113 * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -113,6 +116,11 @@ struct scan_control {
113 nodemask_t *nodemask; 116 nodemask_t *nodemask;
114}; 117};
115 118
119struct mem_cgroup_zone {
120 struct mem_cgroup *mem_cgroup;
121 struct zone *zone;
122};
123
116#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 124#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
117 125
118#ifdef ARCH_HAS_PREFETCH 126#ifdef ARCH_HAS_PREFETCH
@@ -155,12 +163,12 @@ static DECLARE_RWSEM(shrinker_rwsem);
155#ifdef CONFIG_CGROUP_MEM_RES_CTLR 163#ifdef CONFIG_CGROUP_MEM_RES_CTLR
156static bool global_reclaim(struct scan_control *sc) 164static bool global_reclaim(struct scan_control *sc)
157{ 165{
158 return !sc->mem_cgroup; 166 return !sc->target_mem_cgroup;
159} 167}
160 168
161static bool scanning_global_lru(struct scan_control *sc) 169static bool scanning_global_lru(struct mem_cgroup_zone *mz)
162{ 170{
163 return !sc->mem_cgroup; 171 return !mz->mem_cgroup;
164} 172}
165#else 173#else
166static bool global_reclaim(struct scan_control *sc) 174static bool global_reclaim(struct scan_control *sc)
@@ -168,29 +176,30 @@ static bool global_reclaim(struct scan_control *sc)
168 return true; 176 return true;
169} 177}
170 178
171static bool scanning_global_lru(struct scan_control *sc) 179static bool scanning_global_lru(struct mem_cgroup_zone *mz)
172{ 180{
173 return true; 181 return true;
174} 182}
175#endif 183#endif
176 184
177static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, 185static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
178 struct scan_control *sc)
179{ 186{
180 if (!scanning_global_lru(sc)) 187 if (!scanning_global_lru(mz))
181 return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); 188 return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
182 189
183 return &zone->reclaim_stat; 190 return &mz->zone->reclaim_stat;
184} 191}
185 192
186static unsigned long zone_nr_lru_pages(struct zone *zone, 193static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
187 struct scan_control *sc, enum lru_list lru) 194 enum lru_list lru)
188{ 195{
189 if (!scanning_global_lru(sc)) 196 if (!scanning_global_lru(mz))
190 return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, 197 return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
191 zone_to_nid(zone), zone_idx(zone), BIT(lru)); 198 zone_to_nid(mz->zone),
199 zone_idx(mz->zone),
200 BIT(lru));
192 201
193 return zone_page_state(zone, NR_LRU_BASE + lru); 202 return zone_page_state(mz->zone, NR_LRU_BASE + lru);
194} 203}
195 204
196 205
@@ -693,12 +702,13 @@ enum page_references {
693}; 702};
694 703
695static enum page_references page_check_references(struct page *page, 704static enum page_references page_check_references(struct page *page,
705 struct mem_cgroup_zone *mz,
696 struct scan_control *sc) 706 struct scan_control *sc)
697{ 707{
698 int referenced_ptes, referenced_page; 708 int referenced_ptes, referenced_page;
699 unsigned long vm_flags; 709 unsigned long vm_flags;
700 710
701 referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); 711 referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
702 referenced_page = TestClearPageReferenced(page); 712 referenced_page = TestClearPageReferenced(page);
703 713
704 /* Lumpy reclaim - ignore references */ 714 /* Lumpy reclaim - ignore references */
@@ -754,7 +764,7 @@ static enum page_references page_check_references(struct page *page,
754 * shrink_page_list() returns the number of reclaimed pages 764 * shrink_page_list() returns the number of reclaimed pages
755 */ 765 */
756static unsigned long shrink_page_list(struct list_head *page_list, 766static unsigned long shrink_page_list(struct list_head *page_list,
757 struct zone *zone, 767 struct mem_cgroup_zone *mz,
758 struct scan_control *sc, 768 struct scan_control *sc,
759 int priority, 769 int priority,
760 unsigned long *ret_nr_dirty, 770 unsigned long *ret_nr_dirty,
@@ -785,7 +795,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
785 goto keep; 795 goto keep;
786 796
787 VM_BUG_ON(PageActive(page)); 797 VM_BUG_ON(PageActive(page));
788 VM_BUG_ON(page_zone(page) != zone); 798 VM_BUG_ON(page_zone(page) != mz->zone);
789 799
790 sc->nr_scanned++; 800 sc->nr_scanned++;
791 801
@@ -819,7 +829,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
819 } 829 }
820 } 830 }
821 831
822 references = page_check_references(page, sc); 832 references = page_check_references(page, mz, sc);
823 switch (references) { 833 switch (references) {
824 case PAGEREF_ACTIVATE: 834 case PAGEREF_ACTIVATE:
825 goto activate_locked; 835 goto activate_locked;
@@ -1011,7 +1021,7 @@ keep_lumpy:
1011 * will encounter the same problem 1021 * will encounter the same problem
1012 */ 1022 */
1013 if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) 1023 if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
1014 zone_set_flag(zone, ZONE_CONGESTED); 1024 zone_set_flag(mz->zone, ZONE_CONGESTED);
1015 1025
1016 free_hot_cold_page_list(&free_pages, 1); 1026 free_hot_cold_page_list(&free_pages, 1);
1017 1027
@@ -1347,13 +1357,14 @@ static int too_many_isolated(struct zone *zone, int file,
1347 * TODO: Try merging with migrations version of putback_lru_pages 1357 * TODO: Try merging with migrations version of putback_lru_pages
1348 */ 1358 */
1349static noinline_for_stack void 1359static noinline_for_stack void
1350putback_lru_pages(struct zone *zone, struct scan_control *sc, 1360putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc,
1351 unsigned long nr_anon, unsigned long nr_file, 1361 unsigned long nr_anon, unsigned long nr_file,
1352 struct list_head *page_list) 1362 struct list_head *page_list)
1353{ 1363{
1354 struct page *page; 1364 struct page *page;
1355 struct pagevec pvec; 1365 struct pagevec pvec;
1356 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1366 struct zone *zone = mz->zone;
1367 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1357 1368
1358 pagevec_init(&pvec, 1); 1369 pagevec_init(&pvec, 1);
1359 1370
@@ -1393,15 +1404,17 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
1393 pagevec_release(&pvec); 1404 pagevec_release(&pvec);
1394} 1405}
1395 1406
1396static noinline_for_stack void update_isolated_counts(struct zone *zone, 1407static noinline_for_stack void
1397 struct scan_control *sc, 1408update_isolated_counts(struct mem_cgroup_zone *mz,
1398 unsigned long *nr_anon, 1409 struct scan_control *sc,
1399 unsigned long *nr_file, 1410 unsigned long *nr_anon,
1400 struct list_head *isolated_list) 1411 unsigned long *nr_file,
1412 struct list_head *isolated_list)
1401{ 1413{
1402 unsigned long nr_active; 1414 unsigned long nr_active;
1415 struct zone *zone = mz->zone;
1403 unsigned int count[NR_LRU_LISTS] = { 0, }; 1416 unsigned int count[NR_LRU_LISTS] = { 0, };
1404 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1417 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1405 1418
1406 nr_active = clear_active_flags(isolated_list, count); 1419 nr_active = clear_active_flags(isolated_list, count);
1407 __count_vm_events(PGDEACTIVATE, nr_active); 1420 __count_vm_events(PGDEACTIVATE, nr_active);
@@ -1470,8 +1483,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken,
1470 * of reclaimed pages 1483 * of reclaimed pages
1471 */ 1484 */
1472static noinline_for_stack unsigned long 1485static noinline_for_stack unsigned long
1473shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, 1486shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
1474 struct scan_control *sc, int priority, int file) 1487 struct scan_control *sc, int priority, int file)
1475{ 1488{
1476 LIST_HEAD(page_list); 1489 LIST_HEAD(page_list);
1477 unsigned long nr_scanned; 1490 unsigned long nr_scanned;
@@ -1482,6 +1495,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1482 unsigned long nr_dirty = 0; 1495 unsigned long nr_dirty = 0;
1483 unsigned long nr_writeback = 0; 1496 unsigned long nr_writeback = 0;
1484 isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; 1497 isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
1498 struct zone *zone = mz->zone;
1485 1499
1486 while (unlikely(too_many_isolated(zone, file, sc))) { 1500 while (unlikely(too_many_isolated(zone, file, sc))) {
1487 congestion_wait(BLK_RW_ASYNC, HZ/10); 1501 congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1504,13 +1518,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1504 1518
1505 spin_lock_irq(&zone->lru_lock); 1519 spin_lock_irq(&zone->lru_lock);
1506 1520
1507 if (scanning_global_lru(sc)) { 1521 if (scanning_global_lru(mz)) {
1508 nr_taken = isolate_pages_global(nr_to_scan, &page_list, 1522 nr_taken = isolate_pages_global(nr_to_scan, &page_list,
1509 &nr_scanned, sc->order, reclaim_mode, zone, 0, file); 1523 &nr_scanned, sc->order, reclaim_mode, zone, 0, file);
1510 } else { 1524 } else {
1511 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, 1525 nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
1512 &nr_scanned, sc->order, reclaim_mode, zone, 1526 &nr_scanned, sc->order, reclaim_mode, zone,
1513 sc->mem_cgroup, 0, file); 1527 mz->mem_cgroup, 0, file);
1514 } 1528 }
1515 if (global_reclaim(sc)) { 1529 if (global_reclaim(sc)) {
1516 zone->pages_scanned += nr_scanned; 1530 zone->pages_scanned += nr_scanned;
@@ -1527,17 +1541,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1527 return 0; 1541 return 0;
1528 } 1542 }
1529 1543
1530 update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); 1544 update_isolated_counts(mz, sc, &nr_anon, &nr_file, &page_list);
1531 1545
1532 spin_unlock_irq(&zone->lru_lock); 1546 spin_unlock_irq(&zone->lru_lock);
1533 1547
1534 nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, 1548 nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
1535 &nr_dirty, &nr_writeback); 1549 &nr_dirty, &nr_writeback);
1536 1550
1537 /* Check if we should syncronously wait for writeback */ 1551 /* Check if we should syncronously wait for writeback */
1538 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { 1552 if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
1539 set_reclaim_mode(priority, sc, true); 1553 set_reclaim_mode(priority, sc, true);
1540 nr_reclaimed += shrink_page_list(&page_list, zone, sc, 1554 nr_reclaimed += shrink_page_list(&page_list, mz, sc,
1541 priority, &nr_dirty, &nr_writeback); 1555 priority, &nr_dirty, &nr_writeback);
1542 } 1556 }
1543 1557
@@ -1546,7 +1560,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1546 __count_vm_events(KSWAPD_STEAL, nr_reclaimed); 1560 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
1547 __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); 1561 __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
1548 1562
1549 putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); 1563 putback_lru_pages(mz, sc, nr_anon, nr_file, &page_list);
1550 1564
1551 /* 1565 /*
1552 * If reclaim is isolating dirty pages under writeback, it implies 1566 * If reclaim is isolating dirty pages under writeback, it implies
@@ -1633,8 +1647,10 @@ static void move_active_pages_to_lru(struct zone *zone,
1633 __count_vm_events(PGDEACTIVATE, pgmoved); 1647 __count_vm_events(PGDEACTIVATE, pgmoved);
1634} 1648}
1635 1649
1636static void shrink_active_list(unsigned long nr_pages, struct zone *zone, 1650static void shrink_active_list(unsigned long nr_pages,
1637 struct scan_control *sc, int priority, int file) 1651 struct mem_cgroup_zone *mz,
1652 struct scan_control *sc,
1653 int priority, int file)
1638{ 1654{
1639 unsigned long nr_taken; 1655 unsigned long nr_taken;
1640 unsigned long pgscanned; 1656 unsigned long pgscanned;
@@ -1643,9 +1659,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1643 LIST_HEAD(l_active); 1659 LIST_HEAD(l_active);
1644 LIST_HEAD(l_inactive); 1660 LIST_HEAD(l_inactive);
1645 struct page *page; 1661 struct page *page;
1646 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1662 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1647 unsigned long nr_rotated = 0; 1663 unsigned long nr_rotated = 0;
1648 isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; 1664 isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
1665 struct zone *zone = mz->zone;
1649 1666
1650 lru_add_drain(); 1667 lru_add_drain();
1651 1668
@@ -1655,7 +1672,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1655 reclaim_mode |= ISOLATE_CLEAN; 1672 reclaim_mode |= ISOLATE_CLEAN;
1656 1673
1657 spin_lock_irq(&zone->lru_lock); 1674 spin_lock_irq(&zone->lru_lock);
1658 if (scanning_global_lru(sc)) { 1675 if (scanning_global_lru(mz)) {
1659 nr_taken = isolate_pages_global(nr_pages, &l_hold, 1676 nr_taken = isolate_pages_global(nr_pages, &l_hold,
1660 &pgscanned, sc->order, 1677 &pgscanned, sc->order,
1661 reclaim_mode, zone, 1678 reclaim_mode, zone,
@@ -1664,7 +1681,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1664 nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, 1681 nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
1665 &pgscanned, sc->order, 1682 &pgscanned, sc->order,
1666 reclaim_mode, zone, 1683 reclaim_mode, zone,
1667 sc->mem_cgroup, 1, file); 1684 mz->mem_cgroup, 1, file);
1668 } 1685 }
1669 1686
1670 if (global_reclaim(sc)) 1687 if (global_reclaim(sc))
@@ -1690,7 +1707,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1690 continue; 1707 continue;
1691 } 1708 }
1692 1709
1693 if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { 1710 if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
1694 nr_rotated += hpage_nr_pages(page); 1711 nr_rotated += hpage_nr_pages(page);
1695 /* 1712 /*
1696 * Identify referenced, file-backed active pages and 1713 * Identify referenced, file-backed active pages and
@@ -1753,10 +1770,8 @@ static int inactive_anon_is_low_global(struct zone *zone)
1753 * Returns true if the zone does not have enough inactive anon pages, 1770 * Returns true if the zone does not have enough inactive anon pages,
1754 * meaning some active anon pages need to be deactivated. 1771 * meaning some active anon pages need to be deactivated.
1755 */ 1772 */
1756static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) 1773static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1757{ 1774{
1758 int low;
1759
1760 /* 1775 /*
1761 * If we don't have swap space, anonymous page deactivation 1776 * If we don't have swap space, anonymous page deactivation
1762 * is pointless. 1777 * is pointless.
@@ -1764,15 +1779,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1764 if (!total_swap_pages) 1779 if (!total_swap_pages)
1765 return 0; 1780 return 0;
1766 1781
1767 if (scanning_global_lru(sc)) 1782 if (!scanning_global_lru(mz))
1768 low = inactive_anon_is_low_global(zone); 1783 return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
1769 else 1784 mz->zone);
1770 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); 1785
1771 return low; 1786 return inactive_anon_is_low_global(mz->zone);
1772} 1787}
1773#else 1788#else
1774static inline int inactive_anon_is_low(struct zone *zone, 1789static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
1775 struct scan_control *sc)
1776{ 1790{
1777 return 0; 1791 return 0;
1778} 1792}
@@ -1790,8 +1804,7 @@ static int inactive_file_is_low_global(struct zone *zone)
1790 1804
1791/** 1805/**
1792 * inactive_file_is_low - check if file pages need to be deactivated 1806 * inactive_file_is_low - check if file pages need to be deactivated
1793 * @zone: zone to check 1807 * @mz: memory cgroup and zone to check
1794 * @sc: scan control of this context
1795 * 1808 *
1796 * When the system is doing streaming IO, memory pressure here 1809 * When the system is doing streaming IO, memory pressure here
1797 * ensures that active file pages get deactivated, until more 1810 * ensures that active file pages get deactivated, until more
@@ -1803,45 +1816,44 @@ static int inactive_file_is_low_global(struct zone *zone)
1803 * This uses a different ratio than the anonymous pages, because 1816 * This uses a different ratio than the anonymous pages, because
1804 * the page cache uses a use-once replacement algorithm. 1817 * the page cache uses a use-once replacement algorithm.
1805 */ 1818 */
1806static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) 1819static int inactive_file_is_low(struct mem_cgroup_zone *mz)
1807{ 1820{
1808 int low; 1821 if (!scanning_global_lru(mz))
1822 return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
1823 mz->zone);
1809 1824
1810 if (scanning_global_lru(sc)) 1825 return inactive_file_is_low_global(mz->zone);
1811 low = inactive_file_is_low_global(zone);
1812 else
1813 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
1814 return low;
1815} 1826}
1816 1827
1817static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, 1828static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
1818 int file)
1819{ 1829{
1820 if (file) 1830 if (file)
1821 return inactive_file_is_low(zone, sc); 1831 return inactive_file_is_low(mz);
1822 else 1832 else
1823 return inactive_anon_is_low(zone, sc); 1833 return inactive_anon_is_low(mz);
1824} 1834}
1825 1835
1826static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, 1836static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
1827 struct zone *zone, struct scan_control *sc, int priority) 1837 struct mem_cgroup_zone *mz,
1838 struct scan_control *sc, int priority)
1828{ 1839{
1829 int file = is_file_lru(lru); 1840 int file = is_file_lru(lru);
1830 1841
1831 if (is_active_lru(lru)) { 1842 if (is_active_lru(lru)) {
1832 if (inactive_list_is_low(zone, sc, file)) 1843 if (inactive_list_is_low(mz, file))
1833 shrink_active_list(nr_to_scan, zone, sc, priority, file); 1844 shrink_active_list(nr_to_scan, mz, sc, priority, file);
1834 return 0; 1845 return 0;
1835 } 1846 }
1836 1847
1837 return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); 1848 return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
1838} 1849}
1839 1850
1840static int vmscan_swappiness(struct scan_control *sc) 1851static int vmscan_swappiness(struct mem_cgroup_zone *mz,
1852 struct scan_control *sc)
1841{ 1853{
1842 if (global_reclaim(sc)) 1854 if (global_reclaim(sc))
1843 return vm_swappiness; 1855 return vm_swappiness;
1844 return mem_cgroup_swappiness(sc->mem_cgroup); 1856 return mem_cgroup_swappiness(mz->mem_cgroup);
1845} 1857}
1846 1858
1847/* 1859/*
@@ -1852,13 +1864,13 @@ static int vmscan_swappiness(struct scan_control *sc)
1852 * 1864 *
1853 * nr[0] = anon pages to scan; nr[1] = file pages to scan 1865 * nr[0] = anon pages to scan; nr[1] = file pages to scan
1854 */ 1866 */
1855static void get_scan_count(struct zone *zone, struct scan_control *sc, 1867static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
1856 unsigned long *nr, int priority) 1868 unsigned long *nr, int priority)
1857{ 1869{
1858 unsigned long anon, file, free; 1870 unsigned long anon, file, free;
1859 unsigned long anon_prio, file_prio; 1871 unsigned long anon_prio, file_prio;
1860 unsigned long ap, fp; 1872 unsigned long ap, fp;
1861 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1873 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
1862 u64 fraction[2], denominator; 1874 u64 fraction[2], denominator;
1863 enum lru_list l; 1875 enum lru_list l;
1864 int noswap = 0; 1876 int noswap = 0;
@@ -1888,16 +1900,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1888 goto out; 1900 goto out;
1889 } 1901 }
1890 1902
1891 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + 1903 anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
1892 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 1904 zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
1893 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + 1905 file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
1894 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); 1906 zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
1895 1907
1896 if (global_reclaim(sc)) { 1908 if (global_reclaim(sc)) {
1897 free = zone_page_state(zone, NR_FREE_PAGES); 1909 free = zone_page_state(mz->zone, NR_FREE_PAGES);
1898 /* If we have very few page cache pages, 1910 /* If we have very few page cache pages,
1899 force-scan anon pages. */ 1911 force-scan anon pages. */
1900 if (unlikely(file + free <= high_wmark_pages(zone))) { 1912 if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
1901 fraction[0] = 1; 1913 fraction[0] = 1;
1902 fraction[1] = 0; 1914 fraction[1] = 0;
1903 denominator = 1; 1915 denominator = 1;
@@ -1909,8 +1921,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1909 * With swappiness at 100, anonymous and file have the same priority. 1921 * With swappiness at 100, anonymous and file have the same priority.
1910 * This scanning priority is essentially the inverse of IO cost. 1922 * This scanning priority is essentially the inverse of IO cost.
1911 */ 1923 */
1912 anon_prio = vmscan_swappiness(sc); 1924 anon_prio = vmscan_swappiness(mz, sc);
1913 file_prio = 200 - vmscan_swappiness(sc); 1925 file_prio = 200 - vmscan_swappiness(mz, sc);
1914 1926
1915 /* 1927 /*
1916 * OK, so we have swap space and a fair amount of page cache 1928 * OK, so we have swap space and a fair amount of page cache
@@ -1923,7 +1935,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1923 * 1935 *
1924 * anon in [0], file in [1] 1936 * anon in [0], file in [1]
1925 */ 1937 */
1926 spin_lock_irq(&zone->lru_lock); 1938 spin_lock_irq(&mz->zone->lru_lock);
1927 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 1939 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
1928 reclaim_stat->recent_scanned[0] /= 2; 1940 reclaim_stat->recent_scanned[0] /= 2;
1929 reclaim_stat->recent_rotated[0] /= 2; 1941 reclaim_stat->recent_rotated[0] /= 2;
@@ -1944,7 +1956,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc,
1944 1956
1945 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); 1957 fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
1946 fp /= reclaim_stat->recent_rotated[1] + 1; 1958 fp /= reclaim_stat->recent_rotated[1] + 1;
1947 spin_unlock_irq(&zone->lru_lock); 1959 spin_unlock_irq(&mz->zone->lru_lock);
1948 1960
1949 fraction[0] = ap; 1961 fraction[0] = ap;
1950 fraction[1] = fp; 1962 fraction[1] = fp;
@@ -1954,7 +1966,7 @@ out:
1954 int file = is_file_lru(l); 1966 int file = is_file_lru(l);
1955 unsigned long scan; 1967 unsigned long scan;
1956 1968
1957 scan = zone_nr_lru_pages(zone, sc, l); 1969 scan = zone_nr_lru_pages(mz, l);
1958 if (priority || noswap) { 1970 if (priority || noswap) {
1959 scan >>= priority; 1971 scan >>= priority;
1960 if (!scan && force_scan) 1972 if (!scan && force_scan)
@@ -1972,7 +1984,7 @@ out:
1972 * back to the allocator and call try_to_compact_zone(), we ensure that 1984 * back to the allocator and call try_to_compact_zone(), we ensure that
1973 * there are enough free pages for it to be likely successful 1985 * there are enough free pages for it to be likely successful
1974 */ 1986 */
1975static inline bool should_continue_reclaim(struct zone *zone, 1987static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
1976 unsigned long nr_reclaimed, 1988 unsigned long nr_reclaimed,
1977 unsigned long nr_scanned, 1989 unsigned long nr_scanned,
1978 struct scan_control *sc) 1990 struct scan_control *sc)
@@ -2012,15 +2024,15 @@ static inline bool should_continue_reclaim(struct zone *zone,
2012 * inactive lists are large enough, continue reclaiming 2024 * inactive lists are large enough, continue reclaiming
2013 */ 2025 */
2014 pages_for_compaction = (2UL << sc->order); 2026 pages_for_compaction = (2UL << sc->order);
2015 inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); 2027 inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
2016 if (nr_swap_pages > 0) 2028 if (nr_swap_pages > 0)
2017 inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 2029 inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
2018 if (sc->nr_reclaimed < pages_for_compaction && 2030 if (sc->nr_reclaimed < pages_for_compaction &&
2019 inactive_lru_pages > pages_for_compaction) 2031 inactive_lru_pages > pages_for_compaction)
2020 return true; 2032 return true;
2021 2033
2022 /* If compaction would go ahead or the allocation would succeed, stop */ 2034 /* If compaction would go ahead or the allocation would succeed, stop */
2023 switch (compaction_suitable(zone, sc->order)) { 2035 switch (compaction_suitable(mz->zone, sc->order)) {
2024 case COMPACT_PARTIAL: 2036 case COMPACT_PARTIAL:
2025 case COMPACT_CONTINUE: 2037 case COMPACT_CONTINUE:
2026 return false; 2038 return false;
@@ -2032,8 +2044,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
2032/* 2044/*
2033 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. 2045 * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
2034 */ 2046 */
2035static void shrink_zone(int priority, struct zone *zone, 2047static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
2036 struct scan_control *sc) 2048 struct scan_control *sc)
2037{ 2049{
2038 unsigned long nr[NR_LRU_LISTS]; 2050 unsigned long nr[NR_LRU_LISTS];
2039 unsigned long nr_to_scan; 2051 unsigned long nr_to_scan;
@@ -2045,7 +2057,7 @@ static void shrink_zone(int priority, struct zone *zone,
2045restart: 2057restart:
2046 nr_reclaimed = 0; 2058 nr_reclaimed = 0;
2047 nr_scanned = sc->nr_scanned; 2059 nr_scanned = sc->nr_scanned;
2048 get_scan_count(zone, sc, nr, priority); 2060 get_scan_count(mz, sc, nr, priority);
2049 2061
2050 blk_start_plug(&plug); 2062 blk_start_plug(&plug);
2051 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 2063 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2057,7 +2069,7 @@ restart:
2057 nr[l] -= nr_to_scan; 2069 nr[l] -= nr_to_scan;
2058 2070
2059 nr_reclaimed += shrink_list(l, nr_to_scan, 2071 nr_reclaimed += shrink_list(l, nr_to_scan,
2060 zone, sc, priority); 2072 mz, sc, priority);
2061 } 2073 }
2062 } 2074 }
2063 /* 2075 /*
@@ -2078,17 +2090,28 @@ restart:
2078 * Even if we did not try to evict anon pages at all, we want to 2090 * Even if we did not try to evict anon pages at all, we want to
2079 * rebalance the anon lru active/inactive ratio. 2091 * rebalance the anon lru active/inactive ratio.
2080 */ 2092 */
2081 if (inactive_anon_is_low(zone, sc)) 2093 if (inactive_anon_is_low(mz))
2082 shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); 2094 shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
2083 2095
2084 /* reclaim/compaction might need reclaim to continue */ 2096 /* reclaim/compaction might need reclaim to continue */
2085 if (should_continue_reclaim(zone, nr_reclaimed, 2097 if (should_continue_reclaim(mz, nr_reclaimed,
2086 sc->nr_scanned - nr_scanned, sc)) 2098 sc->nr_scanned - nr_scanned, sc))
2087 goto restart; 2099 goto restart;
2088 2100
2089 throttle_vm_writeout(sc->gfp_mask); 2101 throttle_vm_writeout(sc->gfp_mask);
2090} 2102}
2091 2103
2104static void shrink_zone(int priority, struct zone *zone,
2105 struct scan_control *sc)
2106{
2107 struct mem_cgroup_zone mz = {
2108 .mem_cgroup = sc->target_mem_cgroup,
2109 .zone = zone,
2110 };
2111
2112 shrink_mem_cgroup_zone(priority, &mz, sc);
2113}
2114
2092/* 2115/*
2093 * This is the direct reclaim path, for page-allocating processes. We only 2116 * This is the direct reclaim path, for page-allocating processes. We only
2094 * try to reclaim pages from zones which will satisfy the caller's allocation 2117 * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2230,7 +2253,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2230 for (priority = DEF_PRIORITY; priority >= 0; priority--) { 2253 for (priority = DEF_PRIORITY; priority >= 0; priority--) {
2231 sc->nr_scanned = 0; 2254 sc->nr_scanned = 0;
2232 if (!priority) 2255 if (!priority)
2233 disable_swap_token(sc->mem_cgroup); 2256 disable_swap_token(sc->target_mem_cgroup);
2234 if (shrink_zones(priority, zonelist, sc)) 2257 if (shrink_zones(priority, zonelist, sc))
2235 break; 2258 break;
2236 2259
@@ -2317,7 +2340,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2317 .may_unmap = 1, 2340 .may_unmap = 1,
2318 .may_swap = 1, 2341 .may_swap = 1,
2319 .order = order, 2342 .order = order,
2320 .mem_cgroup = NULL, 2343 .target_mem_cgroup = NULL,
2321 .nodemask = nodemask, 2344 .nodemask = nodemask,
2322 }; 2345 };
2323 struct shrink_control shrink = { 2346 struct shrink_control shrink = {
@@ -2349,7 +2372,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2349 .may_unmap = 1, 2372 .may_unmap = 1,
2350 .may_swap = !noswap, 2373 .may_swap = !noswap,
2351 .order = 0, 2374 .order = 0,
2352 .mem_cgroup = mem, 2375 .target_mem_cgroup = mem,
2353 }; 2376 };
2354 2377
2355 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2378 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2387,7 +2410,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2387 .may_swap = !noswap, 2410 .may_swap = !noswap,
2388 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2411 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2389 .order = 0, 2412 .order = 0,
2390 .mem_cgroup = mem_cont, 2413 .target_mem_cgroup = mem_cont,
2391 .nodemask = NULL, /* we don't care the placement */ 2414 .nodemask = NULL, /* we don't care the placement */
2392 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2415 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2393 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2416 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2417,6 +2440,18 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2417} 2440}
2418#endif 2441#endif
2419 2442
2443static void age_active_anon(struct zone *zone, struct scan_control *sc,
2444 int priority)
2445{
2446 struct mem_cgroup_zone mz = {
2447 .mem_cgroup = NULL,
2448 .zone = zone,
2449 };
2450
2451 if (inactive_anon_is_low(&mz))
2452 shrink_active_list(SWAP_CLUSTER_MAX, &mz, sc, priority, 0);
2453}
2454
2420/* 2455/*
2421 * pgdat_balanced is used when checking if a node is balanced for high-order 2456 * pgdat_balanced is used when checking if a node is balanced for high-order
2422 * allocations. Only zones that meet watermarks and are in a zone allowed 2457 * allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2537,7 +2572,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2537 */ 2572 */
2538 .nr_to_reclaim = ULONG_MAX, 2573 .nr_to_reclaim = ULONG_MAX,
2539 .order = order, 2574 .order = order,
2540 .mem_cgroup = NULL, 2575 .target_mem_cgroup = NULL,
2541 }; 2576 };
2542 struct shrink_control shrink = { 2577 struct shrink_control shrink = {
2543 .gfp_mask = sc.gfp_mask, 2578 .gfp_mask = sc.gfp_mask,
@@ -2576,9 +2611,7 @@ loop_again:
2576 * Do some background aging of the anon list, to give 2611 * Do some background aging of the anon list, to give
2577 * pages a chance to be referenced before reclaiming. 2612 * pages a chance to be referenced before reclaiming.
2578 */ 2613 */
2579 if (inactive_anon_is_low(zone, &sc)) 2614 age_active_anon(zone, &sc, priority);
2580 shrink_active_list(SWAP_CLUSTER_MAX, zone,
2581 &sc, priority, 0);
2582 2615
2583 if (!zone_watermark_ok_safe(zone, order, 2616 if (!zone_watermark_ok_safe(zone, order,
2584 high_wmark_pages(zone), 0, 0)) { 2617 high_wmark_pages(zone), 0, 0)) {