aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h10
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/memcontrol.c109
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/shmem.c35
6 files changed, 97 insertions, 80 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 84ead2aa6f18..b4980b8f048e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,6 +35,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 35extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask); 36 gfp_t gfp_mask);
37extern void mem_cgroup_uncharge_page(struct page *page); 37extern void mem_cgroup_uncharge_page(struct page *page);
38extern void mem_cgroup_uncharge_cache_page(struct page *page);
38extern void mem_cgroup_move_lists(struct page *page, bool active); 39extern void mem_cgroup_move_lists(struct page *page, bool active);
39extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 40extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
40 struct list_head *dst, 41 struct list_head *dst,
@@ -53,7 +54,6 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
53extern int 54extern int
54mem_cgroup_prepare_migration(struct page *page, struct page *newpage); 55mem_cgroup_prepare_migration(struct page *page, struct page *newpage);
55extern void mem_cgroup_end_migration(struct page *page); 56extern void mem_cgroup_end_migration(struct page *page);
56extern int mem_cgroup_getref(struct page *page);
57 57
58/* 58/*
59 * For memory reclaim. 59 * For memory reclaim.
@@ -98,6 +98,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
98{ 98{
99} 99}
100 100
101static inline void mem_cgroup_uncharge_cache_page(struct page *page)
102{
103}
104
101static inline void mem_cgroup_move_lists(struct page *page, bool active) 105static inline void mem_cgroup_move_lists(struct page *page, bool active)
102{ 106{
103} 107}
@@ -123,10 +127,6 @@ static inline void mem_cgroup_end_migration(struct page *page)
123{ 127{
124} 128}
125 129
126static inline void mem_cgroup_getref(struct page *page)
127{
128}
129
130static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) 130static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
131{ 131{
132 return 0; 132 return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d4c880d7cd9..2d3ec1ffc66e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page)
115{ 115{
116 struct address_space *mapping = page->mapping; 116 struct address_space *mapping = page->mapping;
117 117
118 mem_cgroup_uncharge_page(page); 118 mem_cgroup_uncharge_cache_page(page);
119 radix_tree_delete(&mapping->page_tree, page->index); 119 radix_tree_delete(&mapping->page_tree, page->index);
120 page->mapping = NULL; 120 page->mapping = NULL;
121 mapping->nrpages--; 121 mapping->nrpages--;
@@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
474 mapping->nrpages++; 474 mapping->nrpages++;
475 __inc_zone_page_state(page, NR_FILE_PAGES); 475 __inc_zone_page_state(page, NR_FILE_PAGES);
476 } else 476 } else
477 mem_cgroup_uncharge_page(page); 477 mem_cgroup_uncharge_cache_page(page);
478 478
479 write_unlock_irq(&mapping->tree_lock); 479 write_unlock_irq(&mapping->tree_lock);
480 radix_tree_preload_end(); 480 radix_tree_preload_end();
481 } else 481 } else
482 mem_cgroup_uncharge_page(page); 482 mem_cgroup_uncharge_cache_page(page);
483out: 483out:
484 return error; 484 return error;
485} 485}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index da5912b84551..a61706193c31 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -166,7 +166,6 @@ struct page_cgroup {
166 struct list_head lru; /* per cgroup LRU list */ 166 struct list_head lru; /* per cgroup LRU list */
167 struct page *page; 167 struct page *page;
168 struct mem_cgroup *mem_cgroup; 168 struct mem_cgroup *mem_cgroup;
169 int ref_cnt; /* cached, mapped, migrating */
170 int flags; 169 int flags;
171}; 170};
172#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 171#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
185enum charge_type { 184enum charge_type {
186 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 185 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
187 MEM_CGROUP_CHARGE_TYPE_MAPPED, 186 MEM_CGROUP_CHARGE_TYPE_MAPPED,
187 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
188}; 188};
189 189
190/* 190/*
@@ -552,9 +552,7 @@ retry:
552 */ 552 */
553 if (pc) { 553 if (pc) {
554 VM_BUG_ON(pc->page != page); 554 VM_BUG_ON(pc->page != page);
555 VM_BUG_ON(pc->ref_cnt <= 0); 555 VM_BUG_ON(!pc->mem_cgroup);
556
557 pc->ref_cnt++;
558 unlock_page_cgroup(page); 556 unlock_page_cgroup(page);
559 goto done; 557 goto done;
560 } 558 }
@@ -570,10 +568,7 @@ retry:
570 * thread group leader migrates. It's possible that mm is not 568 * thread group leader migrates. It's possible that mm is not
571 * set, if so charge the init_mm (happens for pagecache usage). 569 * set, if so charge the init_mm (happens for pagecache usage).
572 */ 570 */
573 if (!memcg) { 571 if (likely(!memcg)) {
574 if (!mm)
575 mm = &init_mm;
576
577 rcu_read_lock(); 572 rcu_read_lock();
578 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 573 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
579 /* 574 /*
@@ -609,7 +604,6 @@ retry:
609 } 604 }
610 } 605 }
611 606
612 pc->ref_cnt = 1;
613 pc->mem_cgroup = mem; 607 pc->mem_cgroup = mem;
614 pc->page = page; 608 pc->page = page;
615 /* 609 /*
@@ -653,6 +647,17 @@ err:
653 647
654int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 648int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
655{ 649{
650 /*
651 * If already mapped, we don't have to account.
652 * If page cache, page->mapping has address_space.
653 * But page->mapping may have out-of-use anon_vma pointer,
654 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
655 * is NULL.
656 */
657 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
658 return 0;
659 if (unlikely(!mm))
660 mm = &init_mm;
656 return mem_cgroup_charge_common(page, mm, gfp_mask, 661 return mem_cgroup_charge_common(page, mm, gfp_mask,
657 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); 662 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
658} 663}
@@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
660int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 665int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
661 gfp_t gfp_mask) 666 gfp_t gfp_mask)
662{ 667{
663 if (!mm) 668 if (unlikely(!mm))
664 mm = &init_mm; 669 mm = &init_mm;
665 return mem_cgroup_charge_common(page, mm, gfp_mask, 670 return mem_cgroup_charge_common(page, mm, gfp_mask,
666 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); 671 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
667} 672}
668 673
669int mem_cgroup_getref(struct page *page)
670{
671 struct page_cgroup *pc;
672
673 if (mem_cgroup_subsys.disabled)
674 return 0;
675
676 lock_page_cgroup(page);
677 pc = page_get_page_cgroup(page);
678 VM_BUG_ON(!pc);
679 pc->ref_cnt++;
680 unlock_page_cgroup(page);
681 return 0;
682}
683
684/* 674/*
685 * Uncharging is always a welcome operation, we never complain, simply 675 * uncharge if !page_mapped(page)
686 * uncharge.
687 */ 676 */
688void mem_cgroup_uncharge_page(struct page *page) 677static void
678__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
689{ 679{
690 struct page_cgroup *pc; 680 struct page_cgroup *pc;
691 struct mem_cgroup *mem; 681 struct mem_cgroup *mem;
@@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page)
704 goto unlock; 694 goto unlock;
705 695
706 VM_BUG_ON(pc->page != page); 696 VM_BUG_ON(pc->page != page);
707 VM_BUG_ON(pc->ref_cnt <= 0);
708 697
709 if (--(pc->ref_cnt) == 0) { 698 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
710 mz = page_cgroup_zoneinfo(pc); 699 && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
711 spin_lock_irqsave(&mz->lru_lock, flags); 700 || page_mapped(page)))
712 __mem_cgroup_remove_list(mz, pc); 701 goto unlock;
713 spin_unlock_irqrestore(&mz->lru_lock, flags);
714 702
715 page_assign_page_cgroup(page, NULL); 703 mz = page_cgroup_zoneinfo(pc);
716 unlock_page_cgroup(page); 704 spin_lock_irqsave(&mz->lru_lock, flags);
705 __mem_cgroup_remove_list(mz, pc);
706 spin_unlock_irqrestore(&mz->lru_lock, flags);
717 707
718 mem = pc->mem_cgroup; 708 page_assign_page_cgroup(page, NULL);
719 res_counter_uncharge(&mem->res, PAGE_SIZE); 709 unlock_page_cgroup(page);
720 css_put(&mem->css);
721 710
722 kmem_cache_free(page_cgroup_cache, pc); 711 mem = pc->mem_cgroup;
723 return; 712 res_counter_uncharge(&mem->res, PAGE_SIZE);
724 } 713 css_put(&mem->css);
725 714
715 kmem_cache_free(page_cgroup_cache, pc);
716 return;
726unlock: 717unlock:
727 unlock_page_cgroup(page); 718 unlock_page_cgroup(page);
728} 719}
729 720
721void mem_cgroup_uncharge_page(struct page *page)
722{
723 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
724}
725
726void mem_cgroup_uncharge_cache_page(struct page *page)
727{
728 VM_BUG_ON(page_mapped(page));
729 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
730}
731
730/* 732/*
731 * Before starting migration, account against new page. 733 * Before starting migration, account against new page.
732 */ 734 */
@@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
757 return ret; 759 return ret;
758} 760}
759 761
760/* remove redundant charge */ 762/* remove redundant charge if migration failed*/
761void mem_cgroup_end_migration(struct page *newpage) 763void mem_cgroup_end_migration(struct page *newpage)
762{ 764{
763 mem_cgroup_uncharge_page(newpage); 765 /*
766 * At success, page->mapping is not NULL.
767 * special rollback care is necessary when
768 * 1. at migration failure. (newpage->mapping is cleared in this case)
769 * 2. the newpage was moved but not remapped again because the task
770 * exits and the newpage is obsolete. In this case, the new page
771 * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
772 * always for avoiding mess. The page_cgroup will be removed if
773 * unnecessary. File cache pages is still on radix-tree. Don't
774 * care it.
775 */
776 if (!newpage->mapping)
777 __mem_cgroup_uncharge_common(newpage,
778 MEM_CGROUP_CHARGE_TYPE_FORCE);
779 else if (PageAnon(newpage))
780 mem_cgroup_uncharge_page(newpage);
764} 781}
765 782
766/* 783/*
767 * This routine traverse page_cgroup in given list and drop them all. 784 * This routine traverse page_cgroup in given list and drop them all.
768 * This routine ignores page_cgroup->ref_cnt.
769 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 785 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
770 */ 786 */
771#define FORCE_UNCHARGE_BATCH (128) 787#define FORCE_UNCHARGE_BATCH (128)
@@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
795 * if it's under page migration. 811 * if it's under page migration.
796 */ 812 */
797 if (PageLRU(page)) { 813 if (PageLRU(page)) {
798 mem_cgroup_uncharge_page(page); 814 __mem_cgroup_uncharge_common(page,
815 MEM_CGROUP_CHARGE_TYPE_FORCE);
799 put_page(page); 816 put_page(page);
800 if (--count <= 0) { 817 if (--count <= 0) {
801 count = FORCE_UNCHARGE_BATCH; 818 count = FORCE_UNCHARGE_BATCH;
diff --git a/mm/migrate.c b/mm/migrate.c
index f6d7f8efd1a8..d8c65a65c61d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -359,8 +359,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
359 359
360 write_unlock_irq(&mapping->tree_lock); 360 write_unlock_irq(&mapping->tree_lock);
361 if (!PageSwapCache(newpage)) { 361 if (!PageSwapCache(newpage)) {
362 mem_cgroup_uncharge_page(page); 362 mem_cgroup_uncharge_cache_page(page);
363 mem_cgroup_getref(newpage);
364 } 363 }
365 364
366 return 0; 365 return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8e..abbd29f7c43f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
576 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 576 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
577 if (atomic_inc_and_test(&page->_mapcount)) 577 if (atomic_inc_and_test(&page->_mapcount))
578 __page_set_anon_rmap(page, vma, address); 578 __page_set_anon_rmap(page, vma, address);
579 else { 579 else
580 __page_check_anon_rmap(page, vma, address); 580 __page_check_anon_rmap(page, vma, address);
581 /*
582 * We unconditionally charged during prepare, we uncharge here
583 * This takes care of balancing the reference counts
584 */
585 mem_cgroup_uncharge_page(page);
586 }
587} 581}
588 582
589/** 583/**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
614{ 608{
615 if (atomic_inc_and_test(&page->_mapcount)) 609 if (atomic_inc_and_test(&page->_mapcount))
616 __inc_zone_page_state(page, NR_FILE_MAPPED); 610 __inc_zone_page_state(page, NR_FILE_MAPPED);
617 else
618 /*
619 * We unconditionally charged during prepare, we uncharge here
620 * This takes care of balancing the reference counts
621 */
622 mem_cgroup_uncharge_page(page);
623} 611}
624 612
625#ifdef CONFIG_DEBUG_VM 613#ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e1..d58305e8a484 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
922 error = 1; 922 error = 1;
923 if (!inode) 923 if (!inode)
924 goto out; 924 goto out;
925 /* Precharge page while we can wait, compensate afterwards */ 925 /* Precharge page using GFP_KERNEL while we can wait */
926 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); 926 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
927 if (error) 927 if (error)
928 goto out; 928 goto out;
929 error = radix_tree_preload(GFP_KERNEL); 929 error = radix_tree_preload(GFP_KERNEL);
930 if (error) 930 if (error) {
931 goto uncharge; 931 mem_cgroup_uncharge_cache_page(page);
932 goto out;
933 }
932 error = 1; 934 error = 1;
933 935
934 spin_lock(&info->lock); 936 spin_lock(&info->lock);
935 ptr = shmem_swp_entry(info, idx, NULL); 937 ptr = shmem_swp_entry(info, idx, NULL);
936 if (ptr && ptr->val == entry.val) 938 if (ptr && ptr->val == entry.val) {
937 error = add_to_page_cache(page, inode->i_mapping, 939 error = add_to_page_cache(page, inode->i_mapping,
938 idx, GFP_NOWAIT); 940 idx, GFP_NOWAIT);
941 /* does mem_cgroup_uncharge_cache_page on error */
942 } else /* we must compensate for our precharge above */
943 mem_cgroup_uncharge_cache_page(page);
944
939 if (error == -EEXIST) { 945 if (error == -EEXIST) {
940 struct page *filepage = find_get_page(inode->i_mapping, idx); 946 struct page *filepage = find_get_page(inode->i_mapping, idx);
941 error = 1; 947 error = 1;
@@ -961,8 +967,6 @@ found:
961 shmem_swp_unmap(ptr); 967 shmem_swp_unmap(ptr);
962 spin_unlock(&info->lock); 968 spin_unlock(&info->lock);
963 radix_tree_preload_end(); 969 radix_tree_preload_end();
964uncharge:
965 mem_cgroup_uncharge_page(page);
966out: 970out:
967 unlock_page(page); 971 unlock_page(page);
968 page_cache_release(page); 972 page_cache_release(page);
@@ -1319,7 +1323,7 @@ repeat:
1319 page_cache_release(swappage); 1323 page_cache_release(swappage);
1320 goto failed; 1324 goto failed;
1321 } 1325 }
1322 mem_cgroup_uncharge_page(swappage); 1326 mem_cgroup_uncharge_cache_page(swappage);
1323 } 1327 }
1324 page_cache_release(swappage); 1328 page_cache_release(swappage);
1325 goto repeat; 1329 goto repeat;
@@ -1358,6 +1362,8 @@ repeat:
1358 } 1362 }
1359 1363
1360 if (!filepage) { 1364 if (!filepage) {
1365 int ret;
1366
1361 spin_unlock(&info->lock); 1367 spin_unlock(&info->lock);
1362 filepage = shmem_alloc_page(gfp, info, idx); 1368 filepage = shmem_alloc_page(gfp, info, idx);
1363 if (!filepage) { 1369 if (!filepage) {
@@ -1386,10 +1392,18 @@ repeat:
1386 swap = *entry; 1392 swap = *entry;
1387 shmem_swp_unmap(entry); 1393 shmem_swp_unmap(entry);
1388 } 1394 }
1389 if (error || swap.val || 0 != add_to_page_cache_lru( 1395 ret = error || swap.val;
1390 filepage, mapping, idx, GFP_NOWAIT)) { 1396 if (ret)
1397 mem_cgroup_uncharge_cache_page(filepage);
1398 else
1399 ret = add_to_page_cache_lru(filepage, mapping,
1400 idx, GFP_NOWAIT);
1401 /*
1402 * At add_to_page_cache_lru() failure, uncharge will
1403 * be done automatically.
1404 */
1405 if (ret) {
1391 spin_unlock(&info->lock); 1406 spin_unlock(&info->lock);
1392 mem_cgroup_uncharge_page(filepage);
1393 page_cache_release(filepage); 1407 page_cache_release(filepage);
1394 shmem_unacct_blocks(info->flags, 1); 1408 shmem_unacct_blocks(info->flags, 1);
1395 shmem_free_blocks(inode, 1); 1409 shmem_free_blocks(inode, 1);
@@ -1398,7 +1412,6 @@ repeat:
1398 goto failed; 1412 goto failed;
1399 goto repeat; 1413 goto repeat;
1400 } 1414 }
1401 mem_cgroup_uncharge_page(filepage);
1402 info->flags |= SHMEM_PAGEIN; 1415 info->flags |= SHMEM_PAGEIN;
1403 } 1416 }
1404 1417