diff options
author | Johannes Weiner <jweiner@redhat.com> | 2012-01-12 20:17:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:04 -0500 |
commit | f16015fbf2f6ac45505d6ad21455ff9f6c14473d (patch) | |
tree | ad9c7574566ed492f2b2d9ff655bb90773442c05 /mm/vmscan.c | |
parent | 89b5fae5368f6aec62fb09c8e19b6c61f1154603 (diff) |
mm: vmscan: distinguish between memcg triggering reclaim and memcg being scanned
Memory cgroup hierarchies are currently handled completely outside of
the traditional reclaim code, which is invoked with a single memory
cgroup as an argument for the whole call stack.
Subsequent patches will switch this code to do hierarchical reclaim, so
there needs to be a distinction between a) the memory cgroup that is
triggering reclaim due to hitting its limit and b) the memory cgroup
that is being scanned as a child of a).
This patch introduces a struct mem_cgroup_zone that contains the
combination of the memory cgroup and the zone being scanned, which is
then passed down the stack instead of the zone argument.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ying Han <yinghan@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 249 |
1 files changed, 141 insertions, 108 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index ee4a46b8ae33..e0627d07c3ac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -103,8 +103,11 @@ struct scan_control { | |||
103 | */ | 103 | */ |
104 | reclaim_mode_t reclaim_mode; | 104 | reclaim_mode_t reclaim_mode; |
105 | 105 | ||
106 | /* Which cgroup do we reclaim from */ | 106 | /* |
107 | struct mem_cgroup *mem_cgroup; | 107 | * The memory cgroup that hit its limit and as a result is the |
108 | * primary target of this reclaim invocation. | ||
109 | */ | ||
110 | struct mem_cgroup *target_mem_cgroup; | ||
108 | 111 | ||
109 | /* | 112 | /* |
110 | * Nodemask of nodes allowed by the caller. If NULL, all nodes | 113 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
@@ -113,6 +116,11 @@ struct scan_control { | |||
113 | nodemask_t *nodemask; | 116 | nodemask_t *nodemask; |
114 | }; | 117 | }; |
115 | 118 | ||
119 | struct mem_cgroup_zone { | ||
120 | struct mem_cgroup *mem_cgroup; | ||
121 | struct zone *zone; | ||
122 | }; | ||
123 | |||
116 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 124 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
117 | 125 | ||
118 | #ifdef ARCH_HAS_PREFETCH | 126 | #ifdef ARCH_HAS_PREFETCH |
@@ -155,12 +163,12 @@ static DECLARE_RWSEM(shrinker_rwsem); | |||
155 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 163 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
156 | static bool global_reclaim(struct scan_control *sc) | 164 | static bool global_reclaim(struct scan_control *sc) |
157 | { | 165 | { |
158 | return !sc->mem_cgroup; | 166 | return !sc->target_mem_cgroup; |
159 | } | 167 | } |
160 | 168 | ||
161 | static bool scanning_global_lru(struct scan_control *sc) | 169 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) |
162 | { | 170 | { |
163 | return !sc->mem_cgroup; | 171 | return !mz->mem_cgroup; |
164 | } | 172 | } |
165 | #else | 173 | #else |
166 | static bool global_reclaim(struct scan_control *sc) | 174 | static bool global_reclaim(struct scan_control *sc) |
@@ -168,29 +176,30 @@ static bool global_reclaim(struct scan_control *sc) | |||
168 | return true; | 176 | return true; |
169 | } | 177 | } |
170 | 178 | ||
171 | static bool scanning_global_lru(struct scan_control *sc) | 179 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) |
172 | { | 180 | { |
173 | return true; | 181 | return true; |
174 | } | 182 | } |
175 | #endif | 183 | #endif |
176 | 184 | ||
177 | static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, | 185 | static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) |
178 | struct scan_control *sc) | ||
179 | { | 186 | { |
180 | if (!scanning_global_lru(sc)) | 187 | if (!scanning_global_lru(mz)) |
181 | return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); | 188 | return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); |
182 | 189 | ||
183 | return &zone->reclaim_stat; | 190 | return &mz->zone->reclaim_stat; |
184 | } | 191 | } |
185 | 192 | ||
186 | static unsigned long zone_nr_lru_pages(struct zone *zone, | 193 | static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, |
187 | struct scan_control *sc, enum lru_list lru) | 194 | enum lru_list lru) |
188 | { | 195 | { |
189 | if (!scanning_global_lru(sc)) | 196 | if (!scanning_global_lru(mz)) |
190 | return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, | 197 | return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, |
191 | zone_to_nid(zone), zone_idx(zone), BIT(lru)); | 198 | zone_to_nid(mz->zone), |
199 | zone_idx(mz->zone), | ||
200 | BIT(lru)); | ||
192 | 201 | ||
193 | return zone_page_state(zone, NR_LRU_BASE + lru); | 202 | return zone_page_state(mz->zone, NR_LRU_BASE + lru); |
194 | } | 203 | } |
195 | 204 | ||
196 | 205 | ||
@@ -693,12 +702,13 @@ enum page_references { | |||
693 | }; | 702 | }; |
694 | 703 | ||
695 | static enum page_references page_check_references(struct page *page, | 704 | static enum page_references page_check_references(struct page *page, |
705 | struct mem_cgroup_zone *mz, | ||
696 | struct scan_control *sc) | 706 | struct scan_control *sc) |
697 | { | 707 | { |
698 | int referenced_ptes, referenced_page; | 708 | int referenced_ptes, referenced_page; |
699 | unsigned long vm_flags; | 709 | unsigned long vm_flags; |
700 | 710 | ||
701 | referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); | 711 | referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); |
702 | referenced_page = TestClearPageReferenced(page); | 712 | referenced_page = TestClearPageReferenced(page); |
703 | 713 | ||
704 | /* Lumpy reclaim - ignore references */ | 714 | /* Lumpy reclaim - ignore references */ |
@@ -754,7 +764,7 @@ static enum page_references page_check_references(struct page *page, | |||
754 | * shrink_page_list() returns the number of reclaimed pages | 764 | * shrink_page_list() returns the number of reclaimed pages |
755 | */ | 765 | */ |
756 | static unsigned long shrink_page_list(struct list_head *page_list, | 766 | static unsigned long shrink_page_list(struct list_head *page_list, |
757 | struct zone *zone, | 767 | struct mem_cgroup_zone *mz, |
758 | struct scan_control *sc, | 768 | struct scan_control *sc, |
759 | int priority, | 769 | int priority, |
760 | unsigned long *ret_nr_dirty, | 770 | unsigned long *ret_nr_dirty, |
@@ -785,7 +795,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
785 | goto keep; | 795 | goto keep; |
786 | 796 | ||
787 | VM_BUG_ON(PageActive(page)); | 797 | VM_BUG_ON(PageActive(page)); |
788 | VM_BUG_ON(page_zone(page) != zone); | 798 | VM_BUG_ON(page_zone(page) != mz->zone); |
789 | 799 | ||
790 | sc->nr_scanned++; | 800 | sc->nr_scanned++; |
791 | 801 | ||
@@ -819,7 +829,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
819 | } | 829 | } |
820 | } | 830 | } |
821 | 831 | ||
822 | references = page_check_references(page, sc); | 832 | references = page_check_references(page, mz, sc); |
823 | switch (references) { | 833 | switch (references) { |
824 | case PAGEREF_ACTIVATE: | 834 | case PAGEREF_ACTIVATE: |
825 | goto activate_locked; | 835 | goto activate_locked; |
@@ -1011,7 +1021,7 @@ keep_lumpy: | |||
1011 | * will encounter the same problem | 1021 | * will encounter the same problem |
1012 | */ | 1022 | */ |
1013 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) | 1023 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) |
1014 | zone_set_flag(zone, ZONE_CONGESTED); | 1024 | zone_set_flag(mz->zone, ZONE_CONGESTED); |
1015 | 1025 | ||
1016 | free_hot_cold_page_list(&free_pages, 1); | 1026 | free_hot_cold_page_list(&free_pages, 1); |
1017 | 1027 | ||
@@ -1347,13 +1357,14 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1347 | * TODO: Try merging with migrations version of putback_lru_pages | 1357 | * TODO: Try merging with migrations version of putback_lru_pages |
1348 | */ | 1358 | */ |
1349 | static noinline_for_stack void | 1359 | static noinline_for_stack void |
1350 | putback_lru_pages(struct zone *zone, struct scan_control *sc, | 1360 | putback_lru_pages(struct mem_cgroup_zone *mz, struct scan_control *sc, |
1351 | unsigned long nr_anon, unsigned long nr_file, | 1361 | unsigned long nr_anon, unsigned long nr_file, |
1352 | struct list_head *page_list) | 1362 | struct list_head *page_list) |
1353 | { | 1363 | { |
1354 | struct page *page; | 1364 | struct page *page; |
1355 | struct pagevec pvec; | 1365 | struct pagevec pvec; |
1356 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1366 | struct zone *zone = mz->zone; |
1367 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | ||
1357 | 1368 | ||
1358 | pagevec_init(&pvec, 1); | 1369 | pagevec_init(&pvec, 1); |
1359 | 1370 | ||
@@ -1393,15 +1404,17 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, | |||
1393 | pagevec_release(&pvec); | 1404 | pagevec_release(&pvec); |
1394 | } | 1405 | } |
1395 | 1406 | ||
1396 | static noinline_for_stack void update_isolated_counts(struct zone *zone, | 1407 | static noinline_for_stack void |
1397 | struct scan_control *sc, | 1408 | update_isolated_counts(struct mem_cgroup_zone *mz, |
1398 | unsigned long *nr_anon, | 1409 | struct scan_control *sc, |
1399 | unsigned long *nr_file, | 1410 | unsigned long *nr_anon, |
1400 | struct list_head *isolated_list) | 1411 | unsigned long *nr_file, |
1412 | struct list_head *isolated_list) | ||
1401 | { | 1413 | { |
1402 | unsigned long nr_active; | 1414 | unsigned long nr_active; |
1415 | struct zone *zone = mz->zone; | ||
1403 | unsigned int count[NR_LRU_LISTS] = { 0, }; | 1416 | unsigned int count[NR_LRU_LISTS] = { 0, }; |
1404 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1417 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1405 | 1418 | ||
1406 | nr_active = clear_active_flags(isolated_list, count); | 1419 | nr_active = clear_active_flags(isolated_list, count); |
1407 | __count_vm_events(PGDEACTIVATE, nr_active); | 1420 | __count_vm_events(PGDEACTIVATE, nr_active); |
@@ -1470,8 +1483,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1470 | * of reclaimed pages | 1483 | * of reclaimed pages |
1471 | */ | 1484 | */ |
1472 | static noinline_for_stack unsigned long | 1485 | static noinline_for_stack unsigned long |
1473 | shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | 1486 | shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, |
1474 | struct scan_control *sc, int priority, int file) | 1487 | struct scan_control *sc, int priority, int file) |
1475 | { | 1488 | { |
1476 | LIST_HEAD(page_list); | 1489 | LIST_HEAD(page_list); |
1477 | unsigned long nr_scanned; | 1490 | unsigned long nr_scanned; |
@@ -1482,6 +1495,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1482 | unsigned long nr_dirty = 0; | 1495 | unsigned long nr_dirty = 0; |
1483 | unsigned long nr_writeback = 0; | 1496 | unsigned long nr_writeback = 0; |
1484 | isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; | 1497 | isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; |
1498 | struct zone *zone = mz->zone; | ||
1485 | 1499 | ||
1486 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1500 | while (unlikely(too_many_isolated(zone, file, sc))) { |
1487 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1501 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
@@ -1504,13 +1518,13 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1504 | 1518 | ||
1505 | spin_lock_irq(&zone->lru_lock); | 1519 | spin_lock_irq(&zone->lru_lock); |
1506 | 1520 | ||
1507 | if (scanning_global_lru(sc)) { | 1521 | if (scanning_global_lru(mz)) { |
1508 | nr_taken = isolate_pages_global(nr_to_scan, &page_list, | 1522 | nr_taken = isolate_pages_global(nr_to_scan, &page_list, |
1509 | &nr_scanned, sc->order, reclaim_mode, zone, 0, file); | 1523 | &nr_scanned, sc->order, reclaim_mode, zone, 0, file); |
1510 | } else { | 1524 | } else { |
1511 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, | 1525 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, |
1512 | &nr_scanned, sc->order, reclaim_mode, zone, | 1526 | &nr_scanned, sc->order, reclaim_mode, zone, |
1513 | sc->mem_cgroup, 0, file); | 1527 | mz->mem_cgroup, 0, file); |
1514 | } | 1528 | } |
1515 | if (global_reclaim(sc)) { | 1529 | if (global_reclaim(sc)) { |
1516 | zone->pages_scanned += nr_scanned; | 1530 | zone->pages_scanned += nr_scanned; |
@@ -1527,17 +1541,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1527 | return 0; | 1541 | return 0; |
1528 | } | 1542 | } |
1529 | 1543 | ||
1530 | update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); | 1544 | update_isolated_counts(mz, sc, &nr_anon, &nr_file, &page_list); |
1531 | 1545 | ||
1532 | spin_unlock_irq(&zone->lru_lock); | 1546 | spin_unlock_irq(&zone->lru_lock); |
1533 | 1547 | ||
1534 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, | 1548 | nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, |
1535 | &nr_dirty, &nr_writeback); | 1549 | &nr_dirty, &nr_writeback); |
1536 | 1550 | ||
1537 | /* Check if we should syncronously wait for writeback */ | 1551 | /* Check if we should syncronously wait for writeback */ |
1538 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1552 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
1539 | set_reclaim_mode(priority, sc, true); | 1553 | set_reclaim_mode(priority, sc, true); |
1540 | nr_reclaimed += shrink_page_list(&page_list, zone, sc, | 1554 | nr_reclaimed += shrink_page_list(&page_list, mz, sc, |
1541 | priority, &nr_dirty, &nr_writeback); | 1555 | priority, &nr_dirty, &nr_writeback); |
1542 | } | 1556 | } |
1543 | 1557 | ||
@@ -1546,7 +1560,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1546 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); | 1560 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
1547 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); | 1561 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); |
1548 | 1562 | ||
1549 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); | 1563 | putback_lru_pages(mz, sc, nr_anon, nr_file, &page_list); |
1550 | 1564 | ||
1551 | /* | 1565 | /* |
1552 | * If reclaim is isolating dirty pages under writeback, it implies | 1566 | * If reclaim is isolating dirty pages under writeback, it implies |
@@ -1633,8 +1647,10 @@ static void move_active_pages_to_lru(struct zone *zone, | |||
1633 | __count_vm_events(PGDEACTIVATE, pgmoved); | 1647 | __count_vm_events(PGDEACTIVATE, pgmoved); |
1634 | } | 1648 | } |
1635 | 1649 | ||
1636 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1650 | static void shrink_active_list(unsigned long nr_pages, |
1637 | struct scan_control *sc, int priority, int file) | 1651 | struct mem_cgroup_zone *mz, |
1652 | struct scan_control *sc, | ||
1653 | int priority, int file) | ||
1638 | { | 1654 | { |
1639 | unsigned long nr_taken; | 1655 | unsigned long nr_taken; |
1640 | unsigned long pgscanned; | 1656 | unsigned long pgscanned; |
@@ -1643,9 +1659,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1643 | LIST_HEAD(l_active); | 1659 | LIST_HEAD(l_active); |
1644 | LIST_HEAD(l_inactive); | 1660 | LIST_HEAD(l_inactive); |
1645 | struct page *page; | 1661 | struct page *page; |
1646 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1662 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1647 | unsigned long nr_rotated = 0; | 1663 | unsigned long nr_rotated = 0; |
1648 | isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; | 1664 | isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; |
1665 | struct zone *zone = mz->zone; | ||
1649 | 1666 | ||
1650 | lru_add_drain(); | 1667 | lru_add_drain(); |
1651 | 1668 | ||
@@ -1655,7 +1672,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1655 | reclaim_mode |= ISOLATE_CLEAN; | 1672 | reclaim_mode |= ISOLATE_CLEAN; |
1656 | 1673 | ||
1657 | spin_lock_irq(&zone->lru_lock); | 1674 | spin_lock_irq(&zone->lru_lock); |
1658 | if (scanning_global_lru(sc)) { | 1675 | if (scanning_global_lru(mz)) { |
1659 | nr_taken = isolate_pages_global(nr_pages, &l_hold, | 1676 | nr_taken = isolate_pages_global(nr_pages, &l_hold, |
1660 | &pgscanned, sc->order, | 1677 | &pgscanned, sc->order, |
1661 | reclaim_mode, zone, | 1678 | reclaim_mode, zone, |
@@ -1664,7 +1681,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1664 | nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, | 1681 | nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, |
1665 | &pgscanned, sc->order, | 1682 | &pgscanned, sc->order, |
1666 | reclaim_mode, zone, | 1683 | reclaim_mode, zone, |
1667 | sc->mem_cgroup, 1, file); | 1684 | mz->mem_cgroup, 1, file); |
1668 | } | 1685 | } |
1669 | 1686 | ||
1670 | if (global_reclaim(sc)) | 1687 | if (global_reclaim(sc)) |
@@ -1690,7 +1707,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1690 | continue; | 1707 | continue; |
1691 | } | 1708 | } |
1692 | 1709 | ||
1693 | if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { | 1710 | if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { |
1694 | nr_rotated += hpage_nr_pages(page); | 1711 | nr_rotated += hpage_nr_pages(page); |
1695 | /* | 1712 | /* |
1696 | * Identify referenced, file-backed active pages and | 1713 | * Identify referenced, file-backed active pages and |
@@ -1753,10 +1770,8 @@ static int inactive_anon_is_low_global(struct zone *zone) | |||
1753 | * Returns true if the zone does not have enough inactive anon pages, | 1770 | * Returns true if the zone does not have enough inactive anon pages, |
1754 | * meaning some active anon pages need to be deactivated. | 1771 | * meaning some active anon pages need to be deactivated. |
1755 | */ | 1772 | */ |
1756 | static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | 1773 | static int inactive_anon_is_low(struct mem_cgroup_zone *mz) |
1757 | { | 1774 | { |
1758 | int low; | ||
1759 | |||
1760 | /* | 1775 | /* |
1761 | * If we don't have swap space, anonymous page deactivation | 1776 | * If we don't have swap space, anonymous page deactivation |
1762 | * is pointless. | 1777 | * is pointless. |
@@ -1764,15 +1779,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1764 | if (!total_swap_pages) | 1779 | if (!total_swap_pages) |
1765 | return 0; | 1780 | return 0; |
1766 | 1781 | ||
1767 | if (scanning_global_lru(sc)) | 1782 | if (!scanning_global_lru(mz)) |
1768 | low = inactive_anon_is_low_global(zone); | 1783 | return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, |
1769 | else | 1784 | mz->zone); |
1770 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); | 1785 | |
1771 | return low; | 1786 | return inactive_anon_is_low_global(mz->zone); |
1772 | } | 1787 | } |
1773 | #else | 1788 | #else |
1774 | static inline int inactive_anon_is_low(struct zone *zone, | 1789 | static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) |
1775 | struct scan_control *sc) | ||
1776 | { | 1790 | { |
1777 | return 0; | 1791 | return 0; |
1778 | } | 1792 | } |
@@ -1790,8 +1804,7 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1790 | 1804 | ||
1791 | /** | 1805 | /** |
1792 | * inactive_file_is_low - check if file pages need to be deactivated | 1806 | * inactive_file_is_low - check if file pages need to be deactivated |
1793 | * @zone: zone to check | 1807 | * @mz: memory cgroup and zone to check |
1794 | * @sc: scan control of this context | ||
1795 | * | 1808 | * |
1796 | * When the system is doing streaming IO, memory pressure here | 1809 | * When the system is doing streaming IO, memory pressure here |
1797 | * ensures that active file pages get deactivated, until more | 1810 | * ensures that active file pages get deactivated, until more |
@@ -1803,45 +1816,44 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1803 | * This uses a different ratio than the anonymous pages, because | 1816 | * This uses a different ratio than the anonymous pages, because |
1804 | * the page cache uses a use-once replacement algorithm. | 1817 | * the page cache uses a use-once replacement algorithm. |
1805 | */ | 1818 | */ |
1806 | static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | 1819 | static int inactive_file_is_low(struct mem_cgroup_zone *mz) |
1807 | { | 1820 | { |
1808 | int low; | 1821 | if (!scanning_global_lru(mz)) |
1822 | return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, | ||
1823 | mz->zone); | ||
1809 | 1824 | ||
1810 | if (scanning_global_lru(sc)) | 1825 | return inactive_file_is_low_global(mz->zone); |
1811 | low = inactive_file_is_low_global(zone); | ||
1812 | else | ||
1813 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone); | ||
1814 | return low; | ||
1815 | } | 1826 | } |
1816 | 1827 | ||
1817 | static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, | 1828 | static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) |
1818 | int file) | ||
1819 | { | 1829 | { |
1820 | if (file) | 1830 | if (file) |
1821 | return inactive_file_is_low(zone, sc); | 1831 | return inactive_file_is_low(mz); |
1822 | else | 1832 | else |
1823 | return inactive_anon_is_low(zone, sc); | 1833 | return inactive_anon_is_low(mz); |
1824 | } | 1834 | } |
1825 | 1835 | ||
1826 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1836 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1827 | struct zone *zone, struct scan_control *sc, int priority) | 1837 | struct mem_cgroup_zone *mz, |
1838 | struct scan_control *sc, int priority) | ||
1828 | { | 1839 | { |
1829 | int file = is_file_lru(lru); | 1840 | int file = is_file_lru(lru); |
1830 | 1841 | ||
1831 | if (is_active_lru(lru)) { | 1842 | if (is_active_lru(lru)) { |
1832 | if (inactive_list_is_low(zone, sc, file)) | 1843 | if (inactive_list_is_low(mz, file)) |
1833 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1844 | shrink_active_list(nr_to_scan, mz, sc, priority, file); |
1834 | return 0; | 1845 | return 0; |
1835 | } | 1846 | } |
1836 | 1847 | ||
1837 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); | 1848 | return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); |
1838 | } | 1849 | } |
1839 | 1850 | ||
1840 | static int vmscan_swappiness(struct scan_control *sc) | 1851 | static int vmscan_swappiness(struct mem_cgroup_zone *mz, |
1852 | struct scan_control *sc) | ||
1841 | { | 1853 | { |
1842 | if (global_reclaim(sc)) | 1854 | if (global_reclaim(sc)) |
1843 | return vm_swappiness; | 1855 | return vm_swappiness; |
1844 | return mem_cgroup_swappiness(sc->mem_cgroup); | 1856 | return mem_cgroup_swappiness(mz->mem_cgroup); |
1845 | } | 1857 | } |
1846 | 1858 | ||
1847 | /* | 1859 | /* |
@@ -1852,13 +1864,13 @@ static int vmscan_swappiness(struct scan_control *sc) | |||
1852 | * | 1864 | * |
1853 | * nr[0] = anon pages to scan; nr[1] = file pages to scan | 1865 | * nr[0] = anon pages to scan; nr[1] = file pages to scan |
1854 | */ | 1866 | */ |
1855 | static void get_scan_count(struct zone *zone, struct scan_control *sc, | 1867 | static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, |
1856 | unsigned long *nr, int priority) | 1868 | unsigned long *nr, int priority) |
1857 | { | 1869 | { |
1858 | unsigned long anon, file, free; | 1870 | unsigned long anon, file, free; |
1859 | unsigned long anon_prio, file_prio; | 1871 | unsigned long anon_prio, file_prio; |
1860 | unsigned long ap, fp; | 1872 | unsigned long ap, fp; |
1861 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1873 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1862 | u64 fraction[2], denominator; | 1874 | u64 fraction[2], denominator; |
1863 | enum lru_list l; | 1875 | enum lru_list l; |
1864 | int noswap = 0; | 1876 | int noswap = 0; |
@@ -1888,16 +1900,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1888 | goto out; | 1900 | goto out; |
1889 | } | 1901 | } |
1890 | 1902 | ||
1891 | anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + | 1903 | anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + |
1892 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); | 1904 | zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); |
1893 | file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + | 1905 | file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + |
1894 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | 1906 | zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); |
1895 | 1907 | ||
1896 | if (global_reclaim(sc)) { | 1908 | if (global_reclaim(sc)) { |
1897 | free = zone_page_state(zone, NR_FREE_PAGES); | 1909 | free = zone_page_state(mz->zone, NR_FREE_PAGES); |
1898 | /* If we have very few page cache pages, | 1910 | /* If we have very few page cache pages, |
1899 | force-scan anon pages. */ | 1911 | force-scan anon pages. */ |
1900 | if (unlikely(file + free <= high_wmark_pages(zone))) { | 1912 | if (unlikely(file + free <= high_wmark_pages(mz->zone))) { |
1901 | fraction[0] = 1; | 1913 | fraction[0] = 1; |
1902 | fraction[1] = 0; | 1914 | fraction[1] = 0; |
1903 | denominator = 1; | 1915 | denominator = 1; |
@@ -1909,8 +1921,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1909 | * With swappiness at 100, anonymous and file have the same priority. | 1921 | * With swappiness at 100, anonymous and file have the same priority. |
1910 | * This scanning priority is essentially the inverse of IO cost. | 1922 | * This scanning priority is essentially the inverse of IO cost. |
1911 | */ | 1923 | */ |
1912 | anon_prio = vmscan_swappiness(sc); | 1924 | anon_prio = vmscan_swappiness(mz, sc); |
1913 | file_prio = 200 - vmscan_swappiness(sc); | 1925 | file_prio = 200 - vmscan_swappiness(mz, sc); |
1914 | 1926 | ||
1915 | /* | 1927 | /* |
1916 | * OK, so we have swap space and a fair amount of page cache | 1928 | * OK, so we have swap space and a fair amount of page cache |
@@ -1923,7 +1935,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1923 | * | 1935 | * |
1924 | * anon in [0], file in [1] | 1936 | * anon in [0], file in [1] |
1925 | */ | 1937 | */ |
1926 | spin_lock_irq(&zone->lru_lock); | 1938 | spin_lock_irq(&mz->zone->lru_lock); |
1927 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 1939 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1928 | reclaim_stat->recent_scanned[0] /= 2; | 1940 | reclaim_stat->recent_scanned[0] /= 2; |
1929 | reclaim_stat->recent_rotated[0] /= 2; | 1941 | reclaim_stat->recent_rotated[0] /= 2; |
@@ -1944,7 +1956,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1944 | 1956 | ||
1945 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); | 1957 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); |
1946 | fp /= reclaim_stat->recent_rotated[1] + 1; | 1958 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1947 | spin_unlock_irq(&zone->lru_lock); | 1959 | spin_unlock_irq(&mz->zone->lru_lock); |
1948 | 1960 | ||
1949 | fraction[0] = ap; | 1961 | fraction[0] = ap; |
1950 | fraction[1] = fp; | 1962 | fraction[1] = fp; |
@@ -1954,7 +1966,7 @@ out: | |||
1954 | int file = is_file_lru(l); | 1966 | int file = is_file_lru(l); |
1955 | unsigned long scan; | 1967 | unsigned long scan; |
1956 | 1968 | ||
1957 | scan = zone_nr_lru_pages(zone, sc, l); | 1969 | scan = zone_nr_lru_pages(mz, l); |
1958 | if (priority || noswap) { | 1970 | if (priority || noswap) { |
1959 | scan >>= priority; | 1971 | scan >>= priority; |
1960 | if (!scan && force_scan) | 1972 | if (!scan && force_scan) |
@@ -1972,7 +1984,7 @@ out: | |||
1972 | * back to the allocator and call try_to_compact_zone(), we ensure that | 1984 | * back to the allocator and call try_to_compact_zone(), we ensure that |
1973 | * there are enough free pages for it to be likely successful | 1985 | * there are enough free pages for it to be likely successful |
1974 | */ | 1986 | */ |
1975 | static inline bool should_continue_reclaim(struct zone *zone, | 1987 | static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, |
1976 | unsigned long nr_reclaimed, | 1988 | unsigned long nr_reclaimed, |
1977 | unsigned long nr_scanned, | 1989 | unsigned long nr_scanned, |
1978 | struct scan_control *sc) | 1990 | struct scan_control *sc) |
@@ -2012,15 +2024,15 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2012 | * inactive lists are large enough, continue reclaiming | 2024 | * inactive lists are large enough, continue reclaiming |
2013 | */ | 2025 | */ |
2014 | pages_for_compaction = (2UL << sc->order); | 2026 | pages_for_compaction = (2UL << sc->order); |
2015 | inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | 2027 | inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); |
2016 | if (nr_swap_pages > 0) | 2028 | if (nr_swap_pages > 0) |
2017 | inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); | 2029 | inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); |
2018 | if (sc->nr_reclaimed < pages_for_compaction && | 2030 | if (sc->nr_reclaimed < pages_for_compaction && |
2019 | inactive_lru_pages > pages_for_compaction) | 2031 | inactive_lru_pages > pages_for_compaction) |
2020 | return true; | 2032 | return true; |
2021 | 2033 | ||
2022 | /* If compaction would go ahead or the allocation would succeed, stop */ | 2034 | /* If compaction would go ahead or the allocation would succeed, stop */ |
2023 | switch (compaction_suitable(zone, sc->order)) { | 2035 | switch (compaction_suitable(mz->zone, sc->order)) { |
2024 | case COMPACT_PARTIAL: | 2036 | case COMPACT_PARTIAL: |
2025 | case COMPACT_CONTINUE: | 2037 | case COMPACT_CONTINUE: |
2026 | return false; | 2038 | return false; |
@@ -2032,8 +2044,8 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2032 | /* | 2044 | /* |
2033 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 2045 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
2034 | */ | 2046 | */ |
2035 | static void shrink_zone(int priority, struct zone *zone, | 2047 | static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, |
2036 | struct scan_control *sc) | 2048 | struct scan_control *sc) |
2037 | { | 2049 | { |
2038 | unsigned long nr[NR_LRU_LISTS]; | 2050 | unsigned long nr[NR_LRU_LISTS]; |
2039 | unsigned long nr_to_scan; | 2051 | unsigned long nr_to_scan; |
@@ -2045,7 +2057,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
2045 | restart: | 2057 | restart: |
2046 | nr_reclaimed = 0; | 2058 | nr_reclaimed = 0; |
2047 | nr_scanned = sc->nr_scanned; | 2059 | nr_scanned = sc->nr_scanned; |
2048 | get_scan_count(zone, sc, nr, priority); | 2060 | get_scan_count(mz, sc, nr, priority); |
2049 | 2061 | ||
2050 | blk_start_plug(&plug); | 2062 | blk_start_plug(&plug); |
2051 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 2063 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -2057,7 +2069,7 @@ restart: | |||
2057 | nr[l] -= nr_to_scan; | 2069 | nr[l] -= nr_to_scan; |
2058 | 2070 | ||
2059 | nr_reclaimed += shrink_list(l, nr_to_scan, | 2071 | nr_reclaimed += shrink_list(l, nr_to_scan, |
2060 | zone, sc, priority); | 2072 | mz, sc, priority); |
2061 | } | 2073 | } |
2062 | } | 2074 | } |
2063 | /* | 2075 | /* |
@@ -2078,17 +2090,28 @@ restart: | |||
2078 | * Even if we did not try to evict anon pages at all, we want to | 2090 | * Even if we did not try to evict anon pages at all, we want to |
2079 | * rebalance the anon lru active/inactive ratio. | 2091 | * rebalance the anon lru active/inactive ratio. |
2080 | */ | 2092 | */ |
2081 | if (inactive_anon_is_low(zone, sc)) | 2093 | if (inactive_anon_is_low(mz)) |
2082 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 2094 | shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); |
2083 | 2095 | ||
2084 | /* reclaim/compaction might need reclaim to continue */ | 2096 | /* reclaim/compaction might need reclaim to continue */ |
2085 | if (should_continue_reclaim(zone, nr_reclaimed, | 2097 | if (should_continue_reclaim(mz, nr_reclaimed, |
2086 | sc->nr_scanned - nr_scanned, sc)) | 2098 | sc->nr_scanned - nr_scanned, sc)) |
2087 | goto restart; | 2099 | goto restart; |
2088 | 2100 | ||
2089 | throttle_vm_writeout(sc->gfp_mask); | 2101 | throttle_vm_writeout(sc->gfp_mask); |
2090 | } | 2102 | } |
2091 | 2103 | ||
2104 | static void shrink_zone(int priority, struct zone *zone, | ||
2105 | struct scan_control *sc) | ||
2106 | { | ||
2107 | struct mem_cgroup_zone mz = { | ||
2108 | .mem_cgroup = sc->target_mem_cgroup, | ||
2109 | .zone = zone, | ||
2110 | }; | ||
2111 | |||
2112 | shrink_mem_cgroup_zone(priority, &mz, sc); | ||
2113 | } | ||
2114 | |||
2092 | /* | 2115 | /* |
2093 | * This is the direct reclaim path, for page-allocating processes. We only | 2116 | * This is the direct reclaim path, for page-allocating processes. We only |
2094 | * try to reclaim pages from zones which will satisfy the caller's allocation | 2117 | * try to reclaim pages from zones which will satisfy the caller's allocation |
@@ -2230,7 +2253,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2230 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2253 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
2231 | sc->nr_scanned = 0; | 2254 | sc->nr_scanned = 0; |
2232 | if (!priority) | 2255 | if (!priority) |
2233 | disable_swap_token(sc->mem_cgroup); | 2256 | disable_swap_token(sc->target_mem_cgroup); |
2234 | if (shrink_zones(priority, zonelist, sc)) | 2257 | if (shrink_zones(priority, zonelist, sc)) |
2235 | break; | 2258 | break; |
2236 | 2259 | ||
@@ -2317,7 +2340,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2317 | .may_unmap = 1, | 2340 | .may_unmap = 1, |
2318 | .may_swap = 1, | 2341 | .may_swap = 1, |
2319 | .order = order, | 2342 | .order = order, |
2320 | .mem_cgroup = NULL, | 2343 | .target_mem_cgroup = NULL, |
2321 | .nodemask = nodemask, | 2344 | .nodemask = nodemask, |
2322 | }; | 2345 | }; |
2323 | struct shrink_control shrink = { | 2346 | struct shrink_control shrink = { |
@@ -2349,7 +2372,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2349 | .may_unmap = 1, | 2372 | .may_unmap = 1, |
2350 | .may_swap = !noswap, | 2373 | .may_swap = !noswap, |
2351 | .order = 0, | 2374 | .order = 0, |
2352 | .mem_cgroup = mem, | 2375 | .target_mem_cgroup = mem, |
2353 | }; | 2376 | }; |
2354 | 2377 | ||
2355 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2378 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
@@ -2387,7 +2410,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2387 | .may_swap = !noswap, | 2410 | .may_swap = !noswap, |
2388 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2411 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2389 | .order = 0, | 2412 | .order = 0, |
2390 | .mem_cgroup = mem_cont, | 2413 | .target_mem_cgroup = mem_cont, |
2391 | .nodemask = NULL, /* we don't care the placement */ | 2414 | .nodemask = NULL, /* we don't care the placement */ |
2392 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2415 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2393 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), | 2416 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
@@ -2417,6 +2440,18 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2417 | } | 2440 | } |
2418 | #endif | 2441 | #endif |
2419 | 2442 | ||
2443 | static void age_active_anon(struct zone *zone, struct scan_control *sc, | ||
2444 | int priority) | ||
2445 | { | ||
2446 | struct mem_cgroup_zone mz = { | ||
2447 | .mem_cgroup = NULL, | ||
2448 | .zone = zone, | ||
2449 | }; | ||
2450 | |||
2451 | if (inactive_anon_is_low(&mz)) | ||
2452 | shrink_active_list(SWAP_CLUSTER_MAX, &mz, sc, priority, 0); | ||
2453 | } | ||
2454 | |||
2420 | /* | 2455 | /* |
2421 | * pgdat_balanced is used when checking if a node is balanced for high-order | 2456 | * pgdat_balanced is used when checking if a node is balanced for high-order |
2422 | * allocations. Only zones that meet watermarks and are in a zone allowed | 2457 | * allocations. Only zones that meet watermarks and are in a zone allowed |
@@ -2537,7 +2572,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2537 | */ | 2572 | */ |
2538 | .nr_to_reclaim = ULONG_MAX, | 2573 | .nr_to_reclaim = ULONG_MAX, |
2539 | .order = order, | 2574 | .order = order, |
2540 | .mem_cgroup = NULL, | 2575 | .target_mem_cgroup = NULL, |
2541 | }; | 2576 | }; |
2542 | struct shrink_control shrink = { | 2577 | struct shrink_control shrink = { |
2543 | .gfp_mask = sc.gfp_mask, | 2578 | .gfp_mask = sc.gfp_mask, |
@@ -2576,9 +2611,7 @@ loop_again: | |||
2576 | * Do some background aging of the anon list, to give | 2611 | * Do some background aging of the anon list, to give |
2577 | * pages a chance to be referenced before reclaiming. | 2612 | * pages a chance to be referenced before reclaiming. |
2578 | */ | 2613 | */ |
2579 | if (inactive_anon_is_low(zone, &sc)) | 2614 | age_active_anon(zone, &sc, priority); |
2580 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | ||
2581 | &sc, priority, 0); | ||
2582 | 2615 | ||
2583 | if (!zone_watermark_ok_safe(zone, order, | 2616 | if (!zone_watermark_ok_safe(zone, order, |
2584 | high_wmark_pages(zone), 0, 0)) { | 2617 | high_wmark_pages(zone), 0, 0)) { |