aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-07-25 04:47:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:37 -0400
commite8589cc189f96b87348ae83ea4db38eaac624135 (patch)
tree6693422dc81e6da78c4ad892b0d326fb7f946dda
parent508b7be0a5b06b64203512ed9b34191cddc83f56 (diff)
memcg: better migration handling
This patch changes page migration under memory controller to use a different algorithm. (thanks to Christoph for new idea.) Before: - page_cgroup is migrated from an old page to a new page. After: - a new page is accounted , no reuse of page_cgroup. Pros: - We can avoid compliated lock depndencies and races in migration. Cons: - new param to mem_cgroup_charge_common(). - mem_cgroup_getref() is added for handling ref_cnt ping-pong. This version simplifies complicated lock dependency in page migraiton under memory resource controller. new refcnt sequence is following. a mapped page: prepage_migration() ..... +1 to NEW page try_to_unmap() ..... all refs to OLD page is gone. move_pages() ..... +1 to NEW page if page cache. remap... ..... all refs from *map* is added to NEW one. end_migration() ..... -1 to New page. page's mapcount + (page_is_cache) refs are added to NEW one. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Hugh Dickins <hugh@veritas.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h11
-rw-r--r--mm/memcontrol.c128
-rw-r--r--mm/migrate.c22
3 files changed, 86 insertions, 75 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6608776bc96..84ead2aa6f18 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,9 +50,10 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
50#define mm_match_cgroup(mm, cgroup) \ 50#define mm_match_cgroup(mm, cgroup) \
51 ((cgroup) == mem_cgroup_from_task((mm)->owner)) 51 ((cgroup) == mem_cgroup_from_task((mm)->owner))
52 52
53extern int mem_cgroup_prepare_migration(struct page *page); 53extern int
54mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
54extern void mem_cgroup_end_migration(struct page *page); 55extern void mem_cgroup_end_migration(struct page *page);
55extern void mem_cgroup_page_migration(struct page *page, struct page *newpage); 56extern int mem_cgroup_getref(struct page *page);
56 57
57/* 58/*
58 * For memory reclaim. 59 * For memory reclaim.
@@ -112,7 +113,8 @@ static inline int task_in_mem_cgroup(struct task_struct *task,
112 return 1; 113 return 1;
113} 114}
114 115
115static inline int mem_cgroup_prepare_migration(struct page *page) 116static inline int
117mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
116{ 118{
117 return 0; 119 return 0;
118} 120}
@@ -121,8 +123,7 @@ static inline void mem_cgroup_end_migration(struct page *page)
121{ 123{
122} 124}
123 125
124static inline void 126static inline void mem_cgroup_getref(struct page *page)
125mem_cgroup_page_migration(struct page *page, struct page *newpage)
126{ 127{
127} 128}
128 129
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90ccc1326356..da5912b84551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -524,7 +524,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
524 * < 0 if the cgroup is over its limit 524 * < 0 if the cgroup is over its limit
525 */ 525 */
526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
527 gfp_t gfp_mask, enum charge_type ctype) 527 gfp_t gfp_mask, enum charge_type ctype,
528 struct mem_cgroup *memcg)
528{ 529{
529 struct mem_cgroup *mem; 530 struct mem_cgroup *mem;
530 struct page_cgroup *pc; 531 struct page_cgroup *pc;
@@ -569,16 +570,21 @@ retry:
569 * thread group leader migrates. It's possible that mm is not 570 * thread group leader migrates. It's possible that mm is not
570 * set, if so charge the init_mm (happens for pagecache usage). 571 * set, if so charge the init_mm (happens for pagecache usage).
571 */ 572 */
572 if (!mm) 573 if (!memcg) {
573 mm = &init_mm; 574 if (!mm)
575 mm = &init_mm;
574 576
575 rcu_read_lock(); 577 rcu_read_lock();
576 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 578 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
577 /* 579 /*
578 * For every charge from the cgroup, increment reference count 580 * For every charge from the cgroup, increment reference count
579 */ 581 */
580 css_get(&mem->css); 582 css_get(&mem->css);
581 rcu_read_unlock(); 583 rcu_read_unlock();
584 } else {
585 mem = memcg;
586 css_get(&memcg->css);
587 }
582 588
583 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 589 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
584 if (!(gfp_mask & __GFP_WAIT)) 590 if (!(gfp_mask & __GFP_WAIT))
@@ -648,7 +654,7 @@ err:
648int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 654int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
649{ 655{
650 return mem_cgroup_charge_common(page, mm, gfp_mask, 656 return mem_cgroup_charge_common(page, mm, gfp_mask,
651 MEM_CGROUP_CHARGE_TYPE_MAPPED); 657 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
652} 658}
653 659
654int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 660int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
@@ -657,7 +663,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
657 if (!mm) 663 if (!mm)
658 mm = &init_mm; 664 mm = &init_mm;
659 return mem_cgroup_charge_common(page, mm, gfp_mask, 665 return mem_cgroup_charge_common(page, mm, gfp_mask,
660 MEM_CGROUP_CHARGE_TYPE_CACHE); 666 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
667}
668
669int mem_cgroup_getref(struct page *page)
670{
671 struct page_cgroup *pc;
672
673 if (mem_cgroup_subsys.disabled)
674 return 0;
675
676 lock_page_cgroup(page);
677 pc = page_get_page_cgroup(page);
678 VM_BUG_ON(!pc);
679 pc->ref_cnt++;
680 unlock_page_cgroup(page);
681 return 0;
661} 682}
662 683
663/* 684/*
@@ -707,65 +728,39 @@ unlock:
707} 728}
708 729
709/* 730/*
710 * Returns non-zero if a page (under migration) has valid page_cgroup member. 731 * Before starting migration, account against new page.
711 * Refcnt of page_cgroup is incremented.
712 */ 732 */
713int mem_cgroup_prepare_migration(struct page *page) 733int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
714{ 734{
715 struct page_cgroup *pc; 735 struct page_cgroup *pc;
736 struct mem_cgroup *mem = NULL;
737 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
738 int ret = 0;
716 739
717 if (mem_cgroup_subsys.disabled) 740 if (mem_cgroup_subsys.disabled)
718 return 0; 741 return 0;
719 742
720 lock_page_cgroup(page); 743 lock_page_cgroup(page);
721 pc = page_get_page_cgroup(page); 744 pc = page_get_page_cgroup(page);
722 if (pc) 745 if (pc) {
723 pc->ref_cnt++; 746 mem = pc->mem_cgroup;
747 css_get(&mem->css);
748 if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
749 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
750 }
724 unlock_page_cgroup(page); 751 unlock_page_cgroup(page);
725 return pc != NULL; 752 if (mem) {
726} 753 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
727 754 ctype, mem);
728void mem_cgroup_end_migration(struct page *page) 755 css_put(&mem->css);
729{ 756 }
730 mem_cgroup_uncharge_page(page); 757 return ret;
731} 758}
732 759
733/* 760/* remove redundant charge */
734 * We know both *page* and *newpage* are now not-on-LRU and PG_locked. 761void mem_cgroup_end_migration(struct page *newpage)
735 * And no race with uncharge() routines because page_cgroup for *page*
736 * has extra one reference by mem_cgroup_prepare_migration.
737 */
738void mem_cgroup_page_migration(struct page *page, struct page *newpage)
739{ 762{
740 struct page_cgroup *pc; 763 mem_cgroup_uncharge_page(newpage);
741 struct mem_cgroup_per_zone *mz;
742 unsigned long flags;
743
744 lock_page_cgroup(page);
745 pc = page_get_page_cgroup(page);
746 if (!pc) {
747 unlock_page_cgroup(page);
748 return;
749 }
750
751 mz = page_cgroup_zoneinfo(pc);
752 spin_lock_irqsave(&mz->lru_lock, flags);
753 __mem_cgroup_remove_list(mz, pc);
754 spin_unlock_irqrestore(&mz->lru_lock, flags);
755
756 page_assign_page_cgroup(page, NULL);
757 unlock_page_cgroup(page);
758
759 pc->page = newpage;
760 lock_page_cgroup(newpage);
761 page_assign_page_cgroup(newpage, pc);
762
763 mz = page_cgroup_zoneinfo(pc);
764 spin_lock_irqsave(&mz->lru_lock, flags);
765 __mem_cgroup_add_list(mz, pc);
766 spin_unlock_irqrestore(&mz->lru_lock, flags);
767
768 unlock_page_cgroup(newpage);
769} 764}
770 765
771/* 766/*
@@ -795,12 +790,19 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
795 page = pc->page; 790 page = pc->page;
796 get_page(page); 791 get_page(page);
797 spin_unlock_irqrestore(&mz->lru_lock, flags); 792 spin_unlock_irqrestore(&mz->lru_lock, flags);
798 mem_cgroup_uncharge_page(page); 793 /*
799 put_page(page); 794 * Check if this page is on LRU. !LRU page can be found
800 if (--count <= 0) { 795 * if it's under page migration.
801 count = FORCE_UNCHARGE_BATCH; 796 */
797 if (PageLRU(page)) {
798 mem_cgroup_uncharge_page(page);
799 put_page(page);
800 if (--count <= 0) {
801 count = FORCE_UNCHARGE_BATCH;
802 cond_resched();
803 }
804 } else
802 cond_resched(); 805 cond_resched();
803 }
804 spin_lock_irqsave(&mz->lru_lock, flags); 806 spin_lock_irqsave(&mz->lru_lock, flags);
805 } 807 }
806 spin_unlock_irqrestore(&mz->lru_lock, flags); 808 spin_unlock_irqrestore(&mz->lru_lock, flags);
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f9..f6d7f8efd1a8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -358,6 +358,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
358 __inc_zone_page_state(newpage, NR_FILE_PAGES); 358 __inc_zone_page_state(newpage, NR_FILE_PAGES);
359 359
360 write_unlock_irq(&mapping->tree_lock); 360 write_unlock_irq(&mapping->tree_lock);
361 if (!PageSwapCache(newpage)) {
362 mem_cgroup_uncharge_page(page);
363 mem_cgroup_getref(newpage);
364 }
361 365
362 return 0; 366 return 0;
363} 367}
@@ -611,7 +615,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
611 rc = fallback_migrate_page(mapping, newpage, page); 615 rc = fallback_migrate_page(mapping, newpage, page);
612 616
613 if (!rc) { 617 if (!rc) {
614 mem_cgroup_page_migration(page, newpage);
615 remove_migration_ptes(page, newpage); 618 remove_migration_ptes(page, newpage);
616 } else 619 } else
617 newpage->mapping = NULL; 620 newpage->mapping = NULL;
@@ -641,6 +644,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
641 /* page was freed from under us. So we are done. */ 644 /* page was freed from under us. So we are done. */
642 goto move_newpage; 645 goto move_newpage;
643 646
647 charge = mem_cgroup_prepare_migration(page, newpage);
648 if (charge == -ENOMEM) {
649 rc = -ENOMEM;
650 goto move_newpage;
651 }
652 /* prepare cgroup just returns 0 or -ENOMEM */
653 BUG_ON(charge);
654
644 rc = -EAGAIN; 655 rc = -EAGAIN;
645 if (TestSetPageLocked(page)) { 656 if (TestSetPageLocked(page)) {
646 if (!force) 657 if (!force)
@@ -692,19 +703,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
692 goto rcu_unlock; 703 goto rcu_unlock;
693 } 704 }
694 705
695 charge = mem_cgroup_prepare_migration(page);
696 /* Establish migration ptes or remove ptes */ 706 /* Establish migration ptes or remove ptes */
697 try_to_unmap(page, 1); 707 try_to_unmap(page, 1);
698 708
699 if (!page_mapped(page)) 709 if (!page_mapped(page))
700 rc = move_to_new_page(newpage, page); 710 rc = move_to_new_page(newpage, page);
701 711
702 if (rc) { 712 if (rc)
703 remove_migration_ptes(page, page); 713 remove_migration_ptes(page, page);
704 if (charge)
705 mem_cgroup_end_migration(page);
706 } else if (charge)
707 mem_cgroup_end_migration(newpage);
708rcu_unlock: 714rcu_unlock:
709 if (rcu_locked) 715 if (rcu_locked)
710 rcu_read_unlock(); 716 rcu_read_unlock();
@@ -725,6 +731,8 @@ unlock:
725 } 731 }
726 732
727move_newpage: 733move_newpage:
734 if (!charge)
735 mem_cgroup_end_migration(newpage);
728 /* 736 /*
729 * Move the new page to the LRU. If migration was not successful 737 * Move the new page to the LRU. If migration was not successful
730 * then this will free the page. 738 * then this will free the page.