aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c385
1 files changed, 238 insertions, 147 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b7..36896f3eb7f5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
35 35
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38struct cgroup_subsys mem_cgroup_subsys; 38struct cgroup_subsys mem_cgroup_subsys __read_mostly;
39static const int MEM_CGROUP_RECLAIM_RETRIES = 5; 39static struct kmem_cache *page_cgroup_cache __read_mostly;
40static struct kmem_cache *page_cgroup_cache; 40#define MEM_CGROUP_RECLAIM_RETRIES 5
41 41
42/* 42/*
43 * Statistics for memory cgroup. 43 * Statistics for memory cgroup.
@@ -166,7 +166,6 @@ struct page_cgroup {
166 struct list_head lru; /* per cgroup LRU list */ 166 struct list_head lru; /* per cgroup LRU list */
167 struct page *page; 167 struct page *page;
168 struct mem_cgroup *mem_cgroup; 168 struct mem_cgroup *mem_cgroup;
169 int ref_cnt; /* cached, mapped, migrating */
170 int flags; 169 int flags;
171}; 170};
172#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 171#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
185enum charge_type { 184enum charge_type {
186 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 185 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
187 MEM_CGROUP_CHARGE_TYPE_MAPPED, 186 MEM_CGROUP_CHARGE_TYPE_MAPPED,
187 MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
188}; 188};
189 189
190/* 190/*
@@ -250,6 +250,14 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
250 250
251struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) 251struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
252{ 252{
253 /*
254 * mm_update_next_owner() may clear mm->owner to NULL
255 * if it races with swapoff, page migration, etc.
256 * So this can be called with p == NULL.
257 */
258 if (unlikely(!p))
259 return NULL;
260
253 return container_of(task_subsys_state(p, mem_cgroup_subsys_id), 261 return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
254 struct mem_cgroup, css); 262 struct mem_cgroup, css);
255} 263}
@@ -296,7 +304,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
296 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; 304 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
297 305
298 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); 306 mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
299 list_del_init(&pc->lru); 307 list_del(&pc->lru);
300} 308}
301 309
302static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, 310static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -354,6 +362,9 @@ void mem_cgroup_move_lists(struct page *page, bool active)
354 struct mem_cgroup_per_zone *mz; 362 struct mem_cgroup_per_zone *mz;
355 unsigned long flags; 363 unsigned long flags;
356 364
365 if (mem_cgroup_subsys.disabled)
366 return;
367
357 /* 368 /*
358 * We cannot lock_page_cgroup while holding zone's lru_lock, 369 * We cannot lock_page_cgroup while holding zone's lru_lock,
359 * because other holders of lock_page_cgroup can be interrupted 370 * because other holders of lock_page_cgroup can be interrupted
@@ -524,7 +535,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
524 * < 0 if the cgroup is over its limit 535 * < 0 if the cgroup is over its limit
525 */ 536 */
526static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 537static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
527 gfp_t gfp_mask, enum charge_type ctype) 538 gfp_t gfp_mask, enum charge_type ctype,
539 struct mem_cgroup *memcg)
528{ 540{
529 struct mem_cgroup *mem; 541 struct mem_cgroup *mem;
530 struct page_cgroup *pc; 542 struct page_cgroup *pc;
@@ -532,35 +544,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
532 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 544 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
533 struct mem_cgroup_per_zone *mz; 545 struct mem_cgroup_per_zone *mz;
534 546
535 if (mem_cgroup_subsys.disabled) 547 pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
536 return 0; 548 if (unlikely(pc == NULL))
537
538 /*
539 * Should page_cgroup's go to their own slab?
540 * One could optimize the performance of the charging routine
541 * by saving a bit in the page_flags and using it as a lock
542 * to see if the cgroup page already has a page_cgroup associated
543 * with it
544 */
545retry:
546 lock_page_cgroup(page);
547 pc = page_get_page_cgroup(page);
548 /*
549 * The page_cgroup exists and
550 * the page has already been accounted.
551 */
552 if (pc) {
553 VM_BUG_ON(pc->page != page);
554 VM_BUG_ON(pc->ref_cnt <= 0);
555
556 pc->ref_cnt++;
557 unlock_page_cgroup(page);
558 goto done;
559 }
560 unlock_page_cgroup(page);
561
562 pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
563 if (pc == NULL)
564 goto err; 549 goto err;
565 550
566 /* 551 /*
@@ -569,16 +554,23 @@ retry:
569 * thread group leader migrates. It's possible that mm is not 554 * thread group leader migrates. It's possible that mm is not
570 * set, if so charge the init_mm (happens for pagecache usage). 555 * set, if so charge the init_mm (happens for pagecache usage).
571 */ 556 */
572 if (!mm) 557 if (likely(!memcg)) {
573 mm = &init_mm; 558 rcu_read_lock();
574 559 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
575 rcu_read_lock(); 560 if (unlikely(!mem)) {
576 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 561 rcu_read_unlock();
577 /* 562 kmem_cache_free(page_cgroup_cache, pc);
578 * For every charge from the cgroup, increment reference count 563 return 0;
579 */ 564 }
580 css_get(&mem->css); 565 /*
581 rcu_read_unlock(); 566 * For every charge from the cgroup, increment reference count
567 */
568 css_get(&mem->css);
569 rcu_read_unlock();
570 } else {
571 mem = memcg;
572 css_get(&memcg->css);
573 }
582 574
583 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 575 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
584 if (!(gfp_mask & __GFP_WAIT)) 576 if (!(gfp_mask & __GFP_WAIT))
@@ -603,25 +595,24 @@ retry:
603 } 595 }
604 } 596 }
605 597
606 pc->ref_cnt = 1;
607 pc->mem_cgroup = mem; 598 pc->mem_cgroup = mem;
608 pc->page = page; 599 pc->page = page;
609 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 600 /*
601 * If a page is accounted as a page cache, insert to inactive list.
602 * If anon, insert to active list.
603 */
610 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 604 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
611 pc->flags = PAGE_CGROUP_FLAG_CACHE; 605 pc->flags = PAGE_CGROUP_FLAG_CACHE;
606 else
607 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
612 608
613 lock_page_cgroup(page); 609 lock_page_cgroup(page);
614 if (page_get_page_cgroup(page)) { 610 if (unlikely(page_get_page_cgroup(page))) {
615 unlock_page_cgroup(page); 611 unlock_page_cgroup(page);
616 /*
617 * Another charge has been added to this page already.
618 * We take lock_page_cgroup(page) again and read
619 * page->cgroup, increment refcnt.... just retry is OK.
620 */
621 res_counter_uncharge(&mem->res, PAGE_SIZE); 612 res_counter_uncharge(&mem->res, PAGE_SIZE);
622 css_put(&mem->css); 613 css_put(&mem->css);
623 kmem_cache_free(page_cgroup_cache, pc); 614 kmem_cache_free(page_cgroup_cache, pc);
624 goto retry; 615 goto done;
625 } 616 }
626 page_assign_page_cgroup(page, pc); 617 page_assign_page_cgroup(page, pc);
627 618
@@ -642,24 +633,65 @@ err:
642 633
643int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 634int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
644{ 635{
636 if (mem_cgroup_subsys.disabled)
637 return 0;
638
639 /*
640 * If already mapped, we don't have to account.
641 * If page cache, page->mapping has address_space.
642 * But page->mapping may have out-of-use anon_vma pointer,
643 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
644 * is NULL.
645 */
646 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
647 return 0;
648 if (unlikely(!mm))
649 mm = &init_mm;
645 return mem_cgroup_charge_common(page, mm, gfp_mask, 650 return mem_cgroup_charge_common(page, mm, gfp_mask,
646 MEM_CGROUP_CHARGE_TYPE_MAPPED); 651 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
647} 652}
648 653
649int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 654int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
650 gfp_t gfp_mask) 655 gfp_t gfp_mask)
651{ 656{
652 if (!mm) 657 if (mem_cgroup_subsys.disabled)
658 return 0;
659
660 /*
661 * Corner case handling. This is called from add_to_page_cache()
662 * in usual. But some FS (shmem) precharges this page before calling it
663 * and call add_to_page_cache() with GFP_NOWAIT.
664 *
665 * For GFP_NOWAIT case, the page may be pre-charged before calling
666 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
667 * charge twice. (It works but has to pay a bit larger cost.)
668 */
669 if (!(gfp_mask & __GFP_WAIT)) {
670 struct page_cgroup *pc;
671
672 lock_page_cgroup(page);
673 pc = page_get_page_cgroup(page);
674 if (pc) {
675 VM_BUG_ON(pc->page != page);
676 VM_BUG_ON(!pc->mem_cgroup);
677 unlock_page_cgroup(page);
678 return 0;
679 }
680 unlock_page_cgroup(page);
681 }
682
683 if (unlikely(!mm))
653 mm = &init_mm; 684 mm = &init_mm;
685
654 return mem_cgroup_charge_common(page, mm, gfp_mask, 686 return mem_cgroup_charge_common(page, mm, gfp_mask,
655 MEM_CGROUP_CHARGE_TYPE_CACHE); 687 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
656} 688}
657 689
658/* 690/*
659 * Uncharging is always a welcome operation, we never complain, simply 691 * uncharge if !page_mapped(page)
660 * uncharge.
661 */ 692 */
662void mem_cgroup_uncharge_page(struct page *page) 693static void
694__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
663{ 695{
664 struct page_cgroup *pc; 696 struct page_cgroup *pc;
665 struct mem_cgroup *mem; 697 struct mem_cgroup *mem;
@@ -674,98 +706,158 @@ void mem_cgroup_uncharge_page(struct page *page)
674 */ 706 */
675 lock_page_cgroup(page); 707 lock_page_cgroup(page);
676 pc = page_get_page_cgroup(page); 708 pc = page_get_page_cgroup(page);
677 if (!pc) 709 if (unlikely(!pc))
678 goto unlock; 710 goto unlock;
679 711
680 VM_BUG_ON(pc->page != page); 712 VM_BUG_ON(pc->page != page);
681 VM_BUG_ON(pc->ref_cnt <= 0);
682 713
683 if (--(pc->ref_cnt) == 0) { 714 if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
684 mz = page_cgroup_zoneinfo(pc); 715 && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
685 spin_lock_irqsave(&mz->lru_lock, flags); 716 || page_mapped(page)))
686 __mem_cgroup_remove_list(mz, pc); 717 goto unlock;
687 spin_unlock_irqrestore(&mz->lru_lock, flags);
688 718
689 page_assign_page_cgroup(page, NULL); 719 mz = page_cgroup_zoneinfo(pc);
690 unlock_page_cgroup(page); 720 spin_lock_irqsave(&mz->lru_lock, flags);
721 __mem_cgroup_remove_list(mz, pc);
722 spin_unlock_irqrestore(&mz->lru_lock, flags);
691 723
692 mem = pc->mem_cgroup; 724 page_assign_page_cgroup(page, NULL);
693 res_counter_uncharge(&mem->res, PAGE_SIZE); 725 unlock_page_cgroup(page);
694 css_put(&mem->css);
695 726
696 kmem_cache_free(page_cgroup_cache, pc); 727 mem = pc->mem_cgroup;
697 return; 728 res_counter_uncharge(&mem->res, PAGE_SIZE);
698 } 729 css_put(&mem->css);
699 730
731 kmem_cache_free(page_cgroup_cache, pc);
732 return;
700unlock: 733unlock:
701 unlock_page_cgroup(page); 734 unlock_page_cgroup(page);
702} 735}
703 736
737void mem_cgroup_uncharge_page(struct page *page)
738{
739 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
740}
741
742void mem_cgroup_uncharge_cache_page(struct page *page)
743{
744 VM_BUG_ON(page_mapped(page));
745 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
746}
747
704/* 748/*
705 * Returns non-zero if a page (under migration) has valid page_cgroup member. 749 * Before starting migration, account against new page.
706 * Refcnt of page_cgroup is incremented.
707 */ 750 */
708int mem_cgroup_prepare_migration(struct page *page) 751int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
709{ 752{
710 struct page_cgroup *pc; 753 struct page_cgroup *pc;
754 struct mem_cgroup *mem = NULL;
755 enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
756 int ret = 0;
711 757
712 if (mem_cgroup_subsys.disabled) 758 if (mem_cgroup_subsys.disabled)
713 return 0; 759 return 0;
714 760
715 lock_page_cgroup(page); 761 lock_page_cgroup(page);
716 pc = page_get_page_cgroup(page); 762 pc = page_get_page_cgroup(page);
717 if (pc) 763 if (pc) {
718 pc->ref_cnt++; 764 mem = pc->mem_cgroup;
765 css_get(&mem->css);
766 if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
767 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
768 }
719 unlock_page_cgroup(page); 769 unlock_page_cgroup(page);
720 return pc != NULL; 770 if (mem) {
771 ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
772 ctype, mem);
773 css_put(&mem->css);
774 }
775 return ret;
721} 776}
722 777
723void mem_cgroup_end_migration(struct page *page) 778/* remove redundant charge if migration failed*/
779void mem_cgroup_end_migration(struct page *newpage)
724{ 780{
725 mem_cgroup_uncharge_page(page); 781 /*
782 * At success, page->mapping is not NULL.
783 * special rollback care is necessary when
784 * 1. at migration failure. (newpage->mapping is cleared in this case)
785 * 2. the newpage was moved but not remapped again because the task
786 * exits and the newpage is obsolete. In this case, the new page
787 * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
788 * always for avoiding mess. The page_cgroup will be removed if
789 * unnecessary. File cache pages is still on radix-tree. Don't
790 * care it.
791 */
792 if (!newpage->mapping)
793 __mem_cgroup_uncharge_common(newpage,
794 MEM_CGROUP_CHARGE_TYPE_FORCE);
795 else if (PageAnon(newpage))
796 mem_cgroup_uncharge_page(newpage);
726} 797}
727 798
728/* 799/*
729 * We know both *page* and *newpage* are now not-on-LRU and PG_locked. 800 * A call to try to shrink memory usage under specified resource controller.
730 * And no race with uncharge() routines because page_cgroup for *page* 801 * This is typically used for page reclaiming for shmem for reducing side
731 * has extra one reference by mem_cgroup_prepare_migration. 802 * effect of page allocation from shmem, which is used by some mem_cgroup.
732 */ 803 */
733void mem_cgroup_page_migration(struct page *page, struct page *newpage) 804int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
734{ 805{
735 struct page_cgroup *pc; 806 struct mem_cgroup *mem;
736 struct mem_cgroup_per_zone *mz; 807 int progress = 0;
737 unsigned long flags; 808 int retry = MEM_CGROUP_RECLAIM_RETRIES;
738 809
739 lock_page_cgroup(page); 810 if (mem_cgroup_subsys.disabled)
740 pc = page_get_page_cgroup(page); 811 return 0;
741 if (!pc) { 812 if (!mm)
742 unlock_page_cgroup(page); 813 return 0;
743 return; 814
815 rcu_read_lock();
816 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
817 if (unlikely(!mem)) {
818 rcu_read_unlock();
819 return 0;
744 } 820 }
821 css_get(&mem->css);
822 rcu_read_unlock();
745 823
746 mz = page_cgroup_zoneinfo(pc); 824 do {
747 spin_lock_irqsave(&mz->lru_lock, flags); 825 progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
748 __mem_cgroup_remove_list(mz, pc); 826 progress += res_counter_check_under_limit(&mem->res);
749 spin_unlock_irqrestore(&mz->lru_lock, flags); 827 } while (!progress && --retry);
750 828
751 page_assign_page_cgroup(page, NULL); 829 css_put(&mem->css);
752 unlock_page_cgroup(page); 830 if (!retry)
831 return -ENOMEM;
832 return 0;
833}
753 834
754 pc->page = newpage; 835int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
755 lock_page_cgroup(newpage); 836{
756 page_assign_page_cgroup(newpage, pc);
757 837
758 mz = page_cgroup_zoneinfo(pc); 838 int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
759 spin_lock_irqsave(&mz->lru_lock, flags); 839 int progress;
760 __mem_cgroup_add_list(mz, pc); 840 int ret = 0;
761 spin_unlock_irqrestore(&mz->lru_lock, flags);
762 841
763 unlock_page_cgroup(newpage); 842 while (res_counter_set_limit(&memcg->res, val)) {
843 if (signal_pending(current)) {
844 ret = -EINTR;
845 break;
846 }
847 if (!retry_count) {
848 ret = -EBUSY;
849 break;
850 }
851 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
852 if (!progress)
853 retry_count--;
854 }
855 return ret;
764} 856}
765 857
858
766/* 859/*
767 * This routine traverse page_cgroup in given list and drop them all. 860 * This routine traverse page_cgroup in given list and drop them all.
768 * This routine ignores page_cgroup->ref_cnt.
769 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 861 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
770 */ 862 */
771#define FORCE_UNCHARGE_BATCH (128) 863#define FORCE_UNCHARGE_BATCH (128)
@@ -790,12 +882,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
790 page = pc->page; 882 page = pc->page;
791 get_page(page); 883 get_page(page);
792 spin_unlock_irqrestore(&mz->lru_lock, flags); 884 spin_unlock_irqrestore(&mz->lru_lock, flags);
793 mem_cgroup_uncharge_page(page); 885 /*
794 put_page(page); 886 * Check if this page is on LRU. !LRU page can be found
795 if (--count <= 0) { 887 * if it's under page migration.
796 count = FORCE_UNCHARGE_BATCH; 888 */
889 if (PageLRU(page)) {
890 __mem_cgroup_uncharge_common(page,
891 MEM_CGROUP_CHARGE_TYPE_FORCE);
892 put_page(page);
893 if (--count <= 0) {
894 count = FORCE_UNCHARGE_BATCH;
895 cond_resched();
896 }
897 } else
797 cond_resched(); 898 cond_resched();
798 }
799 spin_lock_irqsave(&mz->lru_lock, flags); 899 spin_lock_irqsave(&mz->lru_lock, flags);
800 } 900 }
801 spin_unlock_irqrestore(&mz->lru_lock, flags); 901 spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -810,9 +910,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
810 int ret = -EBUSY; 910 int ret = -EBUSY;
811 int node, zid; 911 int node, zid;
812 912
813 if (mem_cgroup_subsys.disabled)
814 return 0;
815
816 css_get(&mem->css); 913 css_get(&mem->css);
817 /* 914 /*
818 * page reclaim code (kswapd etc..) will move pages between 915 * page reclaim code (kswapd etc..) will move pages between
@@ -838,32 +935,34 @@ out:
838 return ret; 935 return ret;
839} 936}
840 937
841static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
842{
843 *tmp = memparse(buf, &buf);
844 if (*buf != '\0')
845 return -EINVAL;
846
847 /*
848 * Round up the value to the closest page size
849 */
850 *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
851 return 0;
852}
853
854static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 938static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
855{ 939{
856 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, 940 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
857 cft->private); 941 cft->private);
858} 942}
859 943/*
860static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, 944 * The user of this function is...
861 struct file *file, const char __user *userbuf, 945 * RES_LIMIT.
862 size_t nbytes, loff_t *ppos) 946 */
947static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
948 const char *buffer)
863{ 949{
864 return res_counter_write(&mem_cgroup_from_cont(cont)->res, 950 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
865 cft->private, userbuf, nbytes, ppos, 951 unsigned long long val;
866 mem_cgroup_write_strategy); 952 int ret;
953
954 switch (cft->private) {
955 case RES_LIMIT:
956 /* This function does all necessary parse...reuse it */
957 ret = res_counter_memparse_write_strategy(buffer, &val);
958 if (!ret)
959 ret = mem_cgroup_resize_limit(memcg, val);
960 break;
961 default:
962 ret = -EINVAL; /* should be BUG() ? */
963 break;
964 }
965 return ret;
867} 966}
868 967
869static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 968static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +1039,7 @@ static struct cftype mem_cgroup_files[] = {
940 { 1039 {
941 .name = "limit_in_bytes", 1040 .name = "limit_in_bytes",
942 .private = RES_LIMIT, 1041 .private = RES_LIMIT,
943 .write = mem_cgroup_write, 1042 .write_string = mem_cgroup_write,
944 .read_u64 = mem_cgroup_read, 1043 .read_u64 = mem_cgroup_read,
945 }, 1044 },
946 { 1045 {
@@ -1070,8 +1169,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
1070static int mem_cgroup_populate(struct cgroup_subsys *ss, 1169static int mem_cgroup_populate(struct cgroup_subsys *ss,
1071 struct cgroup *cont) 1170 struct cgroup *cont)
1072{ 1171{
1073 if (mem_cgroup_subsys.disabled)
1074 return 0;
1075 return cgroup_add_files(cont, ss, mem_cgroup_files, 1172 return cgroup_add_files(cont, ss, mem_cgroup_files,
1076 ARRAY_SIZE(mem_cgroup_files)); 1173 ARRAY_SIZE(mem_cgroup_files));
1077} 1174}
@@ -1084,9 +1181,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1084 struct mm_struct *mm; 1181 struct mm_struct *mm;
1085 struct mem_cgroup *mem, *old_mem; 1182 struct mem_cgroup *mem, *old_mem;
1086 1183
1087 if (mem_cgroup_subsys.disabled)
1088 return;
1089
1090 mm = get_task_mm(p); 1184 mm = get_task_mm(p);
1091 if (mm == NULL) 1185 if (mm == NULL)
1092 return; 1186 return;
@@ -1094,9 +1188,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1094 mem = mem_cgroup_from_cont(cont); 1188 mem = mem_cgroup_from_cont(cont);
1095 old_mem = mem_cgroup_from_cont(old_cont); 1189 old_mem = mem_cgroup_from_cont(old_cont);
1096 1190
1097 if (mem == old_mem)
1098 goto out;
1099
1100 /* 1191 /*
1101 * Only thread group leaders are allowed to migrate, the mm_struct is 1192 * Only thread group leaders are allowed to migrate, the mm_struct is
1102 * in effect owned by the leader 1193 * in effect owned by the leader