aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-07-25 04:47:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-25 13:53:37 -0400
commit69029cd550284e32de13d6dd2f77b723c8a0e444 (patch)
treeb57b87e5025b6c01722f39302cb98d0dfcd58940 /mm/memcontrol.c
parente8589cc189f96b87348ae83ea4db38eaac624135 (diff)
memcg: remove refcnt from page_cgroup
memcg: performance improvements Patch Description 1/5 ... remove refcnt fron page_cgroup patch (shmem handling is fixed) 2/5 ... swapcache handling patch 3/5 ... add helper function for shmem's memory reclaim patch 4/5 ... optimize by likely/unlikely ppatch 5/5 ... remove redundunt check patch (shmem handling is fixed.) Unix bench result. == 2.6.26-rc2-mm1 + memory resource controller Execl Throughput 2915.4 lps (29.6 secs, 3 samples) C Compiler Throughput 1019.3 lpm (60.0 secs, 3 samples) Shell Scripts (1 concurrent) 5796.0 lpm (60.0 secs, 3 samples) Shell Scripts (8 concurrent) 1097.7 lpm (60.0 secs, 3 samples) Shell Scripts (16 concurrent) 565.3 lpm (60.0 secs, 3 samples) File Read 1024 bufsize 2000 maxblocks 1022128.0 KBps (30.0 secs, 3 samples) File Write 1024 bufsize 2000 maxblocks 544057.0 KBps (30.0 secs, 3 samples) File Copy 1024 bufsize 2000 maxblocks 346481.0 KBps (30.0 secs, 3 samples) File Read 256 bufsize 500 maxblocks 319325.0 KBps (30.0 secs, 3 samples) File Write 256 bufsize 500 maxblocks 148788.0 KBps (30.0 secs, 3 samples) File Copy 256 bufsize 500 maxblocks 99051.0 KBps (30.0 secs, 3 samples) File Read 4096 bufsize 8000 maxblocks 2058917.0 KBps (30.0 secs, 3 samples) File Write 4096 bufsize 8000 maxblocks 1606109.0 KBps (30.0 secs, 3 samples) File Copy 4096 bufsize 8000 maxblocks 854789.0 KBps (30.0 secs, 3 samples) Dc: sqrt(2) to 99 decimal places 126145.2 lpm (30.0 secs, 3 samples) INDEX VALUES TEST BASELINE RESULT INDEX Execl Throughput 43.0 2915.4 678.0 File Copy 1024 bufsize 2000 maxblocks 3960.0 346481.0 875.0 File Copy 256 bufsize 500 maxblocks 1655.0 99051.0 598.5 File Copy 4096 bufsize 8000 maxblocks 5800.0 854789.0 1473.8 Shell Scripts (8 concurrent) 6.0 1097.7 1829.5 ========= FINAL SCORE 991.3 == 2.6.26-rc2-mm1 + this set == Execl Throughput 3012.9 lps (29.9 secs, 3 samples) C Compiler Throughput 981.0 lpm (60.0 secs, 3 samples) Shell Scripts (1 concurrent) 5872.0 lpm (60.0 secs, 3 samples) Shell Scripts (8 concurrent) 1120.3 lpm (60.0 secs, 3 samples) Shell Scripts (16 concurrent) 578.0 lpm (60.0 secs, 3 samples) File Read 1024 bufsize 2000 maxblocks 1003993.0 KBps (30.0 secs, 3 samples) File Write 1024 bufsize 2000 maxblocks 550452.0 KBps (30.0 secs, 3 samples) File Copy 1024 bufsize 2000 maxblocks 347159.0 KBps (30.0 secs, 3 samples) File Read 256 bufsize 500 maxblocks 314644.0 KBps (30.0 secs, 3 samples) File Write 256 bufsize 500 maxblocks 151852.0 KBps (30.0 secs, 3 samples) File Copy 256 bufsize 500 maxblocks 101000.0 KBps (30.0 secs, 3 samples) File Read 4096 bufsize 8000 maxblocks 2033256.0 KBps (30.0 secs, 3 samples) File Write 4096 bufsize 8000 maxblocks 1611814.0 KBps (30.0 secs, 3 samples) File Copy 4096 bufsize 8000 maxblocks 847979.0 KBps (30.0 secs, 3 samples) Dc: sqrt(2) to 99 decimal places 128148.7 lpm (30.0 secs, 3 samples) INDEX VALUES TEST BASELINE RESULT INDEX Execl Throughput 43.0 3012.9 700.7 File Copy 1024 bufsize 2000 maxblocks 3960.0 347159.0 876.7 File Copy 256 bufsize 500 maxblocks 1655.0 101000.0 610.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 847979.0 1462.0 Shell Scripts (8 concurrent) 6.0 1120.3 1867.2 ========= FINAL SCORE 1004.6 This patch: Remove refcnt from page_cgroup(). After this, * A page is charged only when !page_mapped() && no page_cgroup is assigned. * Anon page is newly mapped. * File page is added to mapping->tree. * A page is uncharged only when * Anon page is fully unmapped. * File page is removed from LRU. There is no change in behavior from user's view. This patch also removes unnecessary calls in rmap.c which was used only for refcnt mangement. [akpm@linux-foundation.org: fix warning] [hugh@veritas.com: fix shmem_unuse_inode charging] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Emelyanov <xemul@openvz.org> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c109
1 files changed, 63 insertions, 46 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da5912b84551..a61706193c31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -166,7 +166,6 @@ struct page_cgroup {
166 struct list_head lru; /* per cgroup LRU list */ 166 struct list_head lru; /* per cgroup LRU list */
167 struct page *page; 167 struct page *page;
168 struct mem_cgroup *mem_cgroup; 168 struct mem_cgroup *mem_cgroup;
169 int ref_cnt; /* cached, mapped, migrating */
170 int flags; 169 int flags;
171}; 170};
172#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 171#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
185enum charge_type { 184enum charge_type {
186 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 185 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
187 MEM_CGROUP_CHARGE_TYPE_MAPPED, 186 MEM_CGROUP_CHARGE_TYPE_MAPPED,
187 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
188}; 188};
189 189
190/* 190/*
@@ -552,9 +552,7 @@ retry:
552 */ 552 */
553 if (pc) { 553 if (pc) {
554 VM_BUG_ON(pc->page != page); 554 VM_BUG_ON(pc->page != page);
555 VM_BUG_ON(pc->ref_cnt <= 0); 555 VM_BUG_ON(!pc->mem_cgroup);
556
557 pc->ref_cnt++;
558 unlock_page_cgroup(page); 556 unlock_page_cgroup(page);
559 goto done; 557 goto done;
560 } 558 }
@@ -570,10 +568,7 @@ retry:
570 * thread group leader migrates. It's possible that mm is not 568 * thread group leader migrates. It's possible that mm is not
571 * set, if so charge the init_mm (happens for pagecache usage). 569 * set, if so charge the init_mm (happens for pagecache usage).
572 */ 570 */
573 if (!memcg) { 571 if (likely(!memcg)) {
574 if (!mm)
575 mm = &init_mm;
576
577 rcu_read_lock(); 572 rcu_read_lock();
578 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 573 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
579 /* 574 /*
@@ -609,7 +604,6 @@ retry:
609 } 604 }
610 } 605 }
611 606
612 pc->ref_cnt = 1;
613 pc->mem_cgroup = mem; 607 pc->mem_cgroup = mem;
614 pc->page = page; 608 pc->page = page;
615 /* 609 /*
@@ -653,6 +647,17 @@ err:
653 647
654int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 648int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
655{ 649{
650 /*
651 * If already mapped, we don't have to account.
652 * If page cache, page->mapping has address_space.
653 * But page->mapping may have out-of-use anon_vma pointer,
654 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
655 * is NULL.
656 */
657 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
658 return 0;
659 if (unlikely(!mm))
660 mm = &init_mm;
656 return mem_cgroup_charge_common(page, mm, gfp_mask, 661 return mem_cgroup_charge_common(page, mm, gfp_mask,
657 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); 662 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
658} 663}
@@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
660int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 665int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
661 gfp_t gfp_mask) 666 gfp_t gfp_mask)
662{ 667{
663 if (!mm) 668 if (unlikely(!mm))
664 mm = &init_mm; 669 mm = &init_mm;
665 return mem_cgroup_charge_common(page, mm, gfp_mask, 670 return mem_cgroup_charge_common(page, mm, gfp_mask,
666 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); 671 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
667} 672}
668 673
669int mem_cgroup_getref(struct page *page)
670{
671 struct page_cgroup *pc;
672
673 if (mem_cgroup_subsys.disabled)
674 return 0;
675
676 lock_page_cgroup(page);
677 pc = page_get_page_cgroup(page);
678 VM_BUG_ON(!pc);
679 pc->ref_cnt++;
680 unlock_page_cgroup(page);
681 return 0;
682}
683
684/* 674/*
685 * Uncharging is always a welcome operation, we never complain, simply 675 * uncharge if !page_mapped(page)
686 * uncharge.
687 */ 676 */
688void mem_cgroup_uncharge_page(struct page *page) 677static void
678__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
689{ 679{
690 struct page_cgroup *pc; 680 struct page_cgroup *pc;
691 struct mem_cgroup *mem; 681 struct mem_cgroup *mem;
@@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page)
704 goto unlock; 694 goto unlock;
705 695
706 VM_BUG_ON(pc->page != page); 696 VM_BUG_ON(pc->page != page);
707 VM_BUG_ON(pc->ref_cnt <= 0);
708 697
709 if (--(pc->ref_cnt) == 0) { 698 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
710 mz = page_cgroup_zoneinfo(pc); 699 && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
711 spin_lock_irqsave(&mz->lru_lock, flags); 700 || page_mapped(page)))
712 __mem_cgroup_remove_list(mz, pc); 701 goto unlock;
713 spin_unlock_irqrestore(&mz->lru_lock, flags);
714 702
715 page_assign_page_cgroup(page, NULL); 703 mz = page_cgroup_zoneinfo(pc);
716 unlock_page_cgroup(page); 704 spin_lock_irqsave(&mz->lru_lock, flags);
705 __mem_cgroup_remove_list(mz, pc);
706 spin_unlock_irqrestore(&mz->lru_lock, flags);
717 707
718 mem = pc->mem_cgroup; 708 page_assign_page_cgroup(page, NULL);
719 res_counter_uncharge(&mem->res, PAGE_SIZE); 709 unlock_page_cgroup(page);
720 css_put(&mem->css);
721 710
722 kmem_cache_free(page_cgroup_cache, pc); 711 mem = pc->mem_cgroup;
723 return; 712 res_counter_uncharge(&mem->res, PAGE_SIZE);
724 } 713 css_put(&mem->css);
725 714
715 kmem_cache_free(page_cgroup_cache, pc);
716 return;
726unlock: 717unlock:
727 unlock_page_cgroup(page); 718 unlock_page_cgroup(page);
728} 719}
729 720
721void mem_cgroup_uncharge_page(struct page *page)
722{
723 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
724}
725
726void mem_cgroup_uncharge_cache_page(struct page *page)
727{
728 VM_BUG_ON(page_mapped(page));
729 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
730}
731
730/* 732/*
731 * Before starting migration, account against new page. 733 * Before starting migration, account against new page.
732 */ 734 */
@@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
757 return ret; 759 return ret;
758} 760}
759 761
760/* remove redundant charge */ 762/* remove redundant charge if migration failed*/
761void mem_cgroup_end_migration(struct page *newpage) 763void mem_cgroup_end_migration(struct page *newpage)
762{ 764{
763 mem_cgroup_uncharge_page(newpage); 765 /*
766 * At success, page->mapping is not NULL.
767 * special rollback care is necessary when
768 * 1. at migration failure. (newpage->mapping is cleared in this case)
769 * 2. the newpage was moved but not remapped again because the task
770 * exits and the newpage is obsolete. In this case, the new page
771 * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
772 * always for avoiding mess. The page_cgroup will be removed if
773 * unnecessary. File cache pages is still on radix-tree. Don't
774 * care it.
775 */
776 if (!newpage->mapping)
777 __mem_cgroup_uncharge_common(newpage,
778 MEM_CGROUP_CHARGE_TYPE_FORCE);
779 else if (PageAnon(newpage))
780 mem_cgroup_uncharge_page(newpage);
764} 781}
765 782
766/* 783/*
767 * This routine traverse page_cgroup in given list and drop them all. 784 * This routine traverse page_cgroup in given list and drop them all.
768 * This routine ignores page_cgroup->ref_cnt.
769 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 785 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
770 */ 786 */
771#define FORCE_UNCHARGE_BATCH (128) 787#define FORCE_UNCHARGE_BATCH (128)
@@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
795 * if it's under page migration. 811 * if it's under page migration.
796 */ 812 */
797 if (PageLRU(page)) { 813 if (PageLRU(page)) {
798 mem_cgroup_uncharge_page(page); 814 __mem_cgroup_uncharge_common(page,
815 MEM_CGROUP_CHARGE_TYPE_FORCE);
799 put_page(page); 816 put_page(page);
800 if (--count <= 0) { 817 if (--count <= 0) {
801 count = FORCE_UNCHARGE_BATCH; 818 count = FORCE_UNCHARGE_BATCH;