aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-07-25 04:47:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:37 -0400
commite8589cc189f96b87348ae83ea4db38eaac624135 (patch)
tree6693422dc81e6da78c4ad892b0d326fb7f946dda /mm/memcontrol.c
parent508b7be0a5b06b64203512ed9b34191cddc83f56 (diff)
memcg: better migration handling
This patch changes page migration under memory controller to use a different algorithm. (thanks to Christoph for new idea.) Before: - page_cgroup is migrated from an old page to a new page. After: - a new page is accounted , no reuse of page_cgroup. Pros: - We can avoid compliated lock depndencies and races in migration. Cons: - new param to mem_cgroup_charge_common(). - mem_cgroup_getref() is added for handling ref_cnt ping-pong. This version simplifies complicated lock dependency in page migraiton under memory resource controller. new refcnt sequence is following. a mapped page: prepage_migration() ..... +1 to NEW page try_to_unmap() ..... all refs to OLD page is gone. move_pages() ..... +1 to NEW page if page cache. remap... ..... all refs from *map* is added to NEW one. end_migration() ..... -1 to New page. page's mapcount + (page_is_cache) refs are added to NEW one. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Hugh Dickins <hugh@veritas.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c128
1 files changed, 65 insertions, 63 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90ccc1326356..da5912b84551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
524 * < 0 if the cgroup is over its limit 524 * < 0 if the cgroup is over its limit
525 */ 525 */
526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
527 gfp_t gfp_mask, enum charge_type ctype) 527 gfp_t gfp_mask, enum charge_type ctype,
528 struct mem_cgroup *memcg)
528{ 529{
529 struct mem_cgroup *mem; 530 struct mem_cgroup *mem;
530 struct page_cgroup *pc; 531 struct page_cgroup *pc;
@@ -569,16 +570,21 @@ retry:
569 * thread group leader migrates. It's possible that mm is not 570 * thread group leader migrates. It's possible that mm is not
570 * set, if so charge the init_mm (happens for pagecache usage). 571 * set, if so charge the init_mm (happens for pagecache usage).
571 */ 572 */
572 if (!mm) 573 if (!memcg) {
573 mm = &init_mm; 574 if (!mm)
575 mm = &init_mm;
574 576
575 rcu_read_lock(); 577 rcu_read_lock();
576 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 578 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
577 /* 579 /*
578 * For every charge from the cgroup, increment reference count 580 * For every charge from the cgroup, increment reference count
579 */ 581 */
580 css_get(&mem->css); 582 css_get(&mem->css);
581 rcu_read_unlock(); 583 rcu_read_unlock();
584 } else {
585 mem = memcg;
586 css_get(&memcg->css);
587 }
582 588
583 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 589 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
584 if (!(gfp_mask & __GFP_WAIT)) 590 if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ err:
648int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 654int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
649{ 655{
650 return mem_cgroup_charge_common(page, mm, gfp_mask, 656 return mem_cgroup_charge_common(page, mm, gfp_mask,
651 MEM_CGROUP_CHARGE_TYPE_MAPPED); 657 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
652} 658}
653 659
654int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 660int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
657 if (!mm) 663 if (!mm)
658 mm = &init_mm; 664 mm = &init_mm;
659 return mem_cgroup_charge_common(page, mm, gfp_mask, 665 return mem_cgroup_charge_common(page, mm, gfp_mask,
660 MEM_CGROUP_CHARGE_TYPE_CACHE); 666 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
667}
668
669int mem_cgroup_getref(struct page *page)
670{
671 struct page_cgroup *pc;
672
673 if (mem_cgroup_subsys.disabled)
674 return 0;
675
676 lock_page_cgroup(page);
677 pc = page_get_page_cgroup(page);
678 VM_BUG_ON(!pc);
679 pc->ref_cnt++;
680 unlock_page_cgroup(page);
681 return 0;
661} 682}
662 683
663/* 684/*
@@ -707,65 +728,39 @@ unlock:
707} 728}
708 729
709/* 730/*
710 * Returns non-zero if a page (under migration) has valid page_cgroup member. 731 * Before starting migration, account against new page.
711 * Refcnt of page_cgroup is incremented.
712 */ 732 */
713int mem_cgroup_prepare_migration(struct page *page) 733int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
714{ 734{
715 struct page_cgroup *pc; 735 struct page_cgroup *pc;
736 struct mem_cgroup *mem = NULL;
737 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
738 int ret = 0;
716 739
717 if (mem_cgroup_subsys.disabled) 740 if (mem_cgroup_subsys.disabled)
718 return 0; 741 return 0;
719 742
720 lock_page_cgroup(page); 743 lock_page_cgroup(page);
721 pc = page_get_page_cgroup(page); 744 pc = page_get_page_cgroup(page);
722 if (pc) 745 if (pc) {
723 pc->ref_cnt++; 746 mem = pc->mem_cgroup;
747 css_get(&mem->css);
748 if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
749 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
750 }
724 unlock_page_cgroup(page); 751 unlock_page_cgroup(page);
725 return pc != NULL; 752 if (mem) {
726} 753 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
727 754 ctype, mem);
728void mem_cgroup_end_migration(struct page *page) 755 css_put(&mem->css);
729{ 756 }
730 mem_cgroup_uncharge_page(page); 757 return ret;
731} 758}
732 759
733/* 760/* remove redundant charge */
734 * We know both *page* and *newpage* are now not-on-LRU and PG_locked. 761void mem_cgroup_end_migration(struct page *newpage)
735 * And no race with uncharge() routines because page_cgroup for *page*
736 * has extra one reference by mem_cgroup_prepare_migration.
737 */
738void mem_cgroup_page_migration(struct page *page, struct page *newpage)
739{ 762{
740 struct page_cgroup *pc; 763 mem_cgroup_uncharge_page(newpage);
741 struct mem_cgroup_per_zone *mz;
742 unsigned long flags;
743
744 lock_page_cgroup(page);
745 pc = page_get_page_cgroup(page);
746 if (!pc) {
747 unlock_page_cgroup(page);
748 return;
749 }
750
751 mz = page_cgroup_zoneinfo(pc);
752 spin_lock_irqsave(&mz->lru_lock, flags);
753 __mem_cgroup_remove_list(mz, pc);
754 spin_unlock_irqrestore(&mz->lru_lock, flags);
755
756 page_assign_page_cgroup(page, NULL);
757 unlock_page_cgroup(page);
758
759 pc->page = newpage;
760 lock_page_cgroup(newpage);
761 page_assign_page_cgroup(newpage, pc);
762
763 mz = page_cgroup_zoneinfo(pc);
764 spin_lock_irqsave(&mz->lru_lock, flags);
765 __mem_cgroup_add_list(mz, pc);
766 spin_unlock_irqrestore(&mz->lru_lock, flags);
767
768 unlock_page_cgroup(newpage);
769} 764}
770 765
771/* 766/*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
795 page = pc->page; 790 page = pc->page;
796 get_page(page); 791 get_page(page);
797 spin_unlock_irqrestore(&mz->lru_lock, flags); 792 spin_unlock_irqrestore(&mz->lru_lock, flags);
798 mem_cgroup_uncharge_page(page); 793 /*
799 put_page(page); 794 * Check if this page is on LRU. !LRU page can be found
800 if (--count <= 0) { 795 * if it's under page migration.
801 count = FORCE_UNCHARGE_BATCH; 796 */
797 if (PageLRU(page)) {
798 mem_cgroup_uncharge_page(page);
799 put_page(page);
800 if (--count <= 0) {
801 count = FORCE_UNCHARGE_BATCH;
802 cond_resched();
803 }
804 } else
802 cond_resched(); 805 cond_resched();
803 }
804 spin_lock_irqsave(&mz->lru_lock, flags); 806 spin_lock_irqsave(&mz->lru_lock, flags);
805 } 807 }
806 spin_unlock_irqrestore(&mz->lru_lock, flags); 808 spin_unlock_irqrestore(&mz->lru_lock, flags);