aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:18 -0400
commit747db954cab64c6b7a95b121b517165f34751898 (patch)
tree35149b31a5f3a0bb85df2e40c79c46ed2df4f4ed /mm
parent0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 (diff)
mm: memcontrol: use page lists for uncharge batching
Pages are now uncharged at release time, and all sources of batched uncharges operate on lists of pages. Directly use those lists, and get rid of the per-task batching state. This also batches statistics accounting, in addition to the res counter charges, to reduce IRQ-disabling and re-enabling. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c206
-rw-r--r--mm/swap.c6
-rw-r--r--mm/vmscan.c12
3 files changed, 115 insertions, 109 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9106f1b12f56..a6e2be0241af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3581,53 +3581,6 @@ out:
3581 return ret; 3581 return ret;
3582} 3582}
3583 3583
3584/*
3585 * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
3586 * In that cases, pages are freed continuously and we can expect pages
3587 * are in the same memcg. All these calls itself limits the number of
3588 * pages freed at once, then uncharge_start/end() is called properly.
3589 * This may be called prural(2) times in a context,
3590 */
3591
3592void mem_cgroup_uncharge_start(void)
3593{
3594 unsigned long flags;
3595
3596 local_irq_save(flags);
3597 current->memcg_batch.do_batch++;
3598 /* We can do nest. */
3599 if (current->memcg_batch.do_batch == 1) {
3600 current->memcg_batch.memcg = NULL;
3601 current->memcg_batch.nr_pages = 0;
3602 current->memcg_batch.memsw_nr_pages = 0;
3603 }
3604 local_irq_restore(flags);
3605}
3606
3607void mem_cgroup_uncharge_end(void)
3608{
3609 struct memcg_batch_info *batch = &current->memcg_batch;
3610 unsigned long flags;
3611
3612 local_irq_save(flags);
3613 VM_BUG_ON(!batch->do_batch);
3614 if (--batch->do_batch) /* If stacked, do nothing */
3615 goto out;
3616 /*
3617 * This "batch->memcg" is valid without any css_get/put etc...
3618 * bacause we hide charges behind us.
3619 */
3620 if (batch->nr_pages)
3621 res_counter_uncharge(&batch->memcg->res,
3622 batch->nr_pages * PAGE_SIZE);
3623 if (batch->memsw_nr_pages)
3624 res_counter_uncharge(&batch->memcg->memsw,
3625 batch->memsw_nr_pages * PAGE_SIZE);
3626 memcg_oom_recover(batch->memcg);
3627out:
3628 local_irq_restore(flags);
3629}
3630
3631#ifdef CONFIG_MEMCG_SWAP 3584#ifdef CONFIG_MEMCG_SWAP
3632static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, 3585static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
3633 bool charge) 3586 bool charge)
@@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
6554 cancel_charge(memcg, nr_pages); 6507 cancel_charge(memcg, nr_pages);
6555} 6508}
6556 6509
6510static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
6511 unsigned long nr_mem, unsigned long nr_memsw,
6512 unsigned long nr_anon, unsigned long nr_file,
6513 unsigned long nr_huge, struct page *dummy_page)
6514{
6515 unsigned long flags;
6516
6517 if (nr_mem)
6518 res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
6519 if (nr_memsw)
6520 res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
6521
6522 memcg_oom_recover(memcg);
6523
6524 local_irq_save(flags);
6525 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
6526 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
6527 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
6528 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
6529 __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
6530 memcg_check_events(memcg, dummy_page);
6531 local_irq_restore(flags);
6532}
6533
6534static void uncharge_list(struct list_head *page_list)
6535{
6536 struct mem_cgroup *memcg = NULL;
6537 unsigned long nr_memsw = 0;
6538 unsigned long nr_anon = 0;
6539 unsigned long nr_file = 0;
6540 unsigned long nr_huge = 0;
6541 unsigned long pgpgout = 0;
6542 unsigned long nr_mem = 0;
6543 struct list_head *next;
6544 struct page *page;
6545
6546 next = page_list->next;
6547 do {
6548 unsigned int nr_pages = 1;
6549 struct page_cgroup *pc;
6550
6551 page = list_entry(next, struct page, lru);
6552 next = page->lru.next;
6553
6554 VM_BUG_ON_PAGE(PageLRU(page), page);
6555 VM_BUG_ON_PAGE(page_count(page), page);
6556
6557 pc = lookup_page_cgroup(page);
6558 if (!PageCgroupUsed(pc))
6559 continue;
6560
6561 /*
6562 * Nobody should be changing or seriously looking at
6563 * pc->mem_cgroup and pc->flags at this point, we have
6564 * fully exclusive access to the page.
6565 */
6566
6567 if (memcg != pc->mem_cgroup) {
6568 if (memcg) {
6569 uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
6570 nr_anon, nr_file, nr_huge, page);
6571 pgpgout = nr_mem = nr_memsw = 0;
6572 nr_anon = nr_file = nr_huge = 0;
6573 }
6574 memcg = pc->mem_cgroup;
6575 }
6576
6577 if (PageTransHuge(page)) {
6578 nr_pages <<= compound_order(page);
6579 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6580 nr_huge += nr_pages;
6581 }
6582
6583 if (PageAnon(page))
6584 nr_anon += nr_pages;
6585 else
6586 nr_file += nr_pages;
6587
6588 if (pc->flags & PCG_MEM)
6589 nr_mem += nr_pages;
6590 if (pc->flags & PCG_MEMSW)
6591 nr_memsw += nr_pages;
6592 pc->flags = 0;
6593
6594 pgpgout++;
6595 } while (next != page_list);
6596
6597 if (memcg)
6598 uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
6599 nr_anon, nr_file, nr_huge, page);
6600}
6601
6557/** 6602/**
6558 * mem_cgroup_uncharge - uncharge a page 6603 * mem_cgroup_uncharge - uncharge a page
6559 * @page: page to uncharge 6604 * @page: page to uncharge
@@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
6563 */ 6608 */
6564void mem_cgroup_uncharge(struct page *page) 6609void mem_cgroup_uncharge(struct page *page)
6565{ 6610{
6566 struct memcg_batch_info *batch;
6567 unsigned int nr_pages = 1;
6568 struct mem_cgroup *memcg;
6569 struct page_cgroup *pc; 6611 struct page_cgroup *pc;
6570 unsigned long pc_flags;
6571 unsigned long flags;
6572
6573 VM_BUG_ON_PAGE(PageLRU(page), page);
6574 VM_BUG_ON_PAGE(page_count(page), page);
6575 6612
6576 if (mem_cgroup_disabled()) 6613 if (mem_cgroup_disabled())
6577 return; 6614 return;
6578 6615
6616 /* Don't touch page->lru of any random page, pre-check: */
6579 pc = lookup_page_cgroup(page); 6617 pc = lookup_page_cgroup(page);
6580
6581 /* Every final put_page() ends up here */
6582 if (!PageCgroupUsed(pc)) 6618 if (!PageCgroupUsed(pc))
6583 return; 6619 return;
6584 6620
6585 if (PageTransHuge(page)) { 6621 INIT_LIST_HEAD(&page->lru);
6586 nr_pages <<= compound_order(page); 6622 uncharge_list(&page->lru);
6587 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 6623}
6588 }
6589 /*
6590 * Nobody should be changing or seriously looking at
6591 * pc->mem_cgroup and pc->flags at this point, we have fully
6592 * exclusive access to the page.
6593 */
6594 memcg = pc->mem_cgroup;
6595 pc_flags = pc->flags;
6596 pc->flags = 0;
6597
6598 local_irq_save(flags);
6599 6624
6600 if (nr_pages > 1) 6625/**
6601 goto direct; 6626 * mem_cgroup_uncharge_list - uncharge a list of page
6602 if (unlikely(test_thread_flag(TIF_MEMDIE))) 6627 * @page_list: list of pages to uncharge
6603 goto direct; 6628 *
6604 batch = &current->memcg_batch; 6629 * Uncharge a list of pages previously charged with
6605 if (!batch->do_batch) 6630 * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
6606 goto direct; 6631 */
6607 if (batch->memcg && batch->memcg != memcg) 6632void mem_cgroup_uncharge_list(struct list_head *page_list)
6608 goto direct; 6633{
6609 if (!batch->memcg) 6634 if (mem_cgroup_disabled())
6610 batch->memcg = memcg; 6635 return;
6611 if (pc_flags & PCG_MEM)
6612 batch->nr_pages++;
6613 if (pc_flags & PCG_MEMSW)
6614 batch->memsw_nr_pages++;
6615 goto out;
6616direct:
6617 if (pc_flags & PCG_MEM)
6618 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
6619 if (pc_flags & PCG_MEMSW)
6620 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
6621 memcg_oom_recover(memcg);
6622out:
6623 mem_cgroup_charge_statistics(memcg, page, -nr_pages);
6624 memcg_check_events(memcg, page);
6625 6636
6626 local_irq_restore(flags); 6637 if (!list_empty(page_list))
6638 uncharge_list(page_list);
6627} 6639}
6628 6640
6629/** 6641/**
diff --git a/mm/swap.c b/mm/swap.c
index 00523fffa5ed..6b2dc3897cd5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold)
908 struct lruvec *lruvec; 908 struct lruvec *lruvec;
909 unsigned long uninitialized_var(flags); 909 unsigned long uninitialized_var(flags);
910 910
911 mem_cgroup_uncharge_start();
912
913 for (i = 0; i < nr; i++) { 911 for (i = 0; i < nr; i++) {
914 struct page *page = pages[i]; 912 struct page *page = pages[i];
915 913
@@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold)
941 __ClearPageLRU(page); 939 __ClearPageLRU(page);
942 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 940 del_page_from_lru_list(page, lruvec, page_off_lru(page));
943 } 941 }
944 mem_cgroup_uncharge(page);
945 942
946 /* Clear Active bit in case of parallel mark_page_accessed */ 943 /* Clear Active bit in case of parallel mark_page_accessed */
947 __ClearPageActive(page); 944 __ClearPageActive(page);
@@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
951 if (zone) 948 if (zone)
952 spin_unlock_irqrestore(&zone->lru_lock, flags); 949 spin_unlock_irqrestore(&zone->lru_lock, flags);
953 950
954 mem_cgroup_uncharge_end(); 951 mem_cgroup_uncharge_list(&pages_to_free);
955
956 free_hot_cold_page_list(&pages_to_free, cold); 952 free_hot_cold_page_list(&pages_to_free, cold);
957} 953}
958EXPORT_SYMBOL(release_pages); 954EXPORT_SYMBOL(release_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7068e838d22b..2836b5373b2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
822 822
823 cond_resched(); 823 cond_resched();
824 824
825 mem_cgroup_uncharge_start();
826 while (!list_empty(page_list)) { 825 while (!list_empty(page_list)) {
827 struct address_space *mapping; 826 struct address_space *mapping;
828 struct page *page; 827 struct page *page;
@@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1103 */ 1102 */
1104 __clear_page_locked(page); 1103 __clear_page_locked(page);
1105free_it: 1104free_it:
1106 mem_cgroup_uncharge(page);
1107 nr_reclaimed++; 1105 nr_reclaimed++;
1108 1106
1109 /* 1107 /*
@@ -1133,8 +1131,8 @@ keep:
1133 list_add(&page->lru, &ret_pages); 1131 list_add(&page->lru, &ret_pages);
1134 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); 1132 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
1135 } 1133 }
1136 mem_cgroup_uncharge_end();
1137 1134
1135 mem_cgroup_uncharge_list(&free_pages);
1138 free_hot_cold_page_list(&free_pages, true); 1136 free_hot_cold_page_list(&free_pages, true);
1139 1137
1140 list_splice(&ret_pages, page_list); 1138 list_splice(&ret_pages, page_list);
@@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1437 __ClearPageActive(page); 1435 __ClearPageActive(page);
1438 del_page_from_lru_list(page, lruvec, lru); 1436 del_page_from_lru_list(page, lruvec, lru);
1439 1437
1440 mem_cgroup_uncharge(page);
1441
1442 if (unlikely(PageCompound(page))) { 1438 if (unlikely(PageCompound(page))) {
1443 spin_unlock_irq(&zone->lru_lock); 1439 spin_unlock_irq(&zone->lru_lock);
1440 mem_cgroup_uncharge(page);
1444 (*get_compound_page_dtor(page))(page); 1441 (*get_compound_page_dtor(page))(page);
1445 spin_lock_irq(&zone->lru_lock); 1442 spin_lock_irq(&zone->lru_lock);
1446 } else 1443 } else
@@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1548 1545
1549 spin_unlock_irq(&zone->lru_lock); 1546 spin_unlock_irq(&zone->lru_lock);
1550 1547
1548 mem_cgroup_uncharge_list(&page_list);
1551 free_hot_cold_page_list(&page_list, true); 1549 free_hot_cold_page_list(&page_list, true);
1552 1550
1553 /* 1551 /*
@@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
1660 __ClearPageActive(page); 1658 __ClearPageActive(page);
1661 del_page_from_lru_list(page, lruvec, lru); 1659 del_page_from_lru_list(page, lruvec, lru);
1662 1660
1663 mem_cgroup_uncharge(page);
1664
1665 if (unlikely(PageCompound(page))) { 1661 if (unlikely(PageCompound(page))) {
1666 spin_unlock_irq(&zone->lru_lock); 1662 spin_unlock_irq(&zone->lru_lock);
1663 mem_cgroup_uncharge(page);
1667 (*get_compound_page_dtor(page))(page); 1664 (*get_compound_page_dtor(page))(page);
1668 spin_lock_irq(&zone->lru_lock); 1665 spin_lock_irq(&zone->lru_lock);
1669 } else 1666 } else
@@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1771 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1768 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
1772 spin_unlock_irq(&zone->lru_lock); 1769 spin_unlock_irq(&zone->lru_lock);
1773 1770
1771 mem_cgroup_uncharge_list(&l_hold);
1774 free_hot_cold_page_list(&l_hold, true); 1772 free_hot_cold_page_list(&l_hold, true);
1775} 1773}
1776 1774