diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-26 11:48:49 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-26 11:48:49 -0400 |
commit | c3cc99ff5d24e2eeaf7ec2032e720681916990e3 (patch) | |
tree | c3e74171bbbd2adde9d60b9db1c440415c8d2831 /mm/memcontrol.c | |
parent | 38ffbe66d59051fd9cfcfc8545f164700e2fa3bc (diff) | |
parent | 024e8ac04453b3525448c31ef39848cf675ba6db (diff) |
Merge branch 'linus' into x86/xen
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 364 |
1 files changed, 219 insertions, 145 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e46451e1d9b7..fba566c51322 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -35,9 +35,9 @@ | |||
35 | 35 | ||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | 37 | ||
38 | struct cgroup_subsys mem_cgroup_subsys; | 38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
39 | static const int MEM_CGROUP_RECLAIM_RETRIES = 5; | 39 | static struct kmem_cache *page_cgroup_cache __read_mostly; |
40 | static struct kmem_cache *page_cgroup_cache; | 40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Statistics for memory cgroup. | 43 | * Statistics for memory cgroup. |
@@ -166,7 +166,6 @@ struct page_cgroup { | |||
166 | struct list_head lru; /* per cgroup LRU list */ | 166 | struct list_head lru; /* per cgroup LRU list */ |
167 | struct page *page; | 167 | struct page *page; |
168 | struct mem_cgroup *mem_cgroup; | 168 | struct mem_cgroup *mem_cgroup; |
169 | int ref_cnt; /* cached, mapped, migrating */ | ||
170 | int flags; | 169 | int flags; |
171 | }; | 170 | }; |
172 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | |||
185 | enum charge_type { | 184 | enum charge_type { |
186 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
187 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | ||
188 | }; | 188 | }; |
189 | 189 | ||
190 | /* | 190 | /* |
@@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | 296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
297 | 297 | ||
298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); |
299 | list_del_init(&pc->lru); | 299 | list_del(&pc->lru); |
300 | } | 300 | } |
301 | 301 | ||
302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
@@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
354 | struct mem_cgroup_per_zone *mz; | 354 | struct mem_cgroup_per_zone *mz; |
355 | unsigned long flags; | 355 | unsigned long flags; |
356 | 356 | ||
357 | if (mem_cgroup_subsys.disabled) | ||
358 | return; | ||
359 | |||
357 | /* | 360 | /* |
358 | * We cannot lock_page_cgroup while holding zone's lru_lock, | 361 | * We cannot lock_page_cgroup while holding zone's lru_lock, |
359 | * because other holders of lock_page_cgroup can be interrupted | 362 | * because other holders of lock_page_cgroup can be interrupted |
@@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
524 | * < 0 if the cgroup is over its limit | 527 | * < 0 if the cgroup is over its limit |
525 | */ | 528 | */ |
526 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 529 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, |
527 | gfp_t gfp_mask, enum charge_type ctype) | 530 | gfp_t gfp_mask, enum charge_type ctype, |
531 | struct mem_cgroup *memcg) | ||
528 | { | 532 | { |
529 | struct mem_cgroup *mem; | 533 | struct mem_cgroup *mem; |
530 | struct page_cgroup *pc; | 534 | struct page_cgroup *pc; |
@@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
532 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 536 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
533 | struct mem_cgroup_per_zone *mz; | 537 | struct mem_cgroup_per_zone *mz; |
534 | 538 | ||
535 | if (mem_cgroup_subsys.disabled) | 539 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); |
536 | return 0; | 540 | if (unlikely(pc == NULL)) |
537 | |||
538 | /* | ||
539 | * Should page_cgroup's go to their own slab? | ||
540 | * One could optimize the performance of the charging routine | ||
541 | * by saving a bit in the page_flags and using it as a lock | ||
542 | * to see if the cgroup page already has a page_cgroup associated | ||
543 | * with it | ||
544 | */ | ||
545 | retry: | ||
546 | lock_page_cgroup(page); | ||
547 | pc = page_get_page_cgroup(page); | ||
548 | /* | ||
549 | * The page_cgroup exists and | ||
550 | * the page has already been accounted. | ||
551 | */ | ||
552 | if (pc) { | ||
553 | VM_BUG_ON(pc->page != page); | ||
554 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
555 | |||
556 | pc->ref_cnt++; | ||
557 | unlock_page_cgroup(page); | ||
558 | goto done; | ||
559 | } | ||
560 | unlock_page_cgroup(page); | ||
561 | |||
562 | pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); | ||
563 | if (pc == NULL) | ||
564 | goto err; | 541 | goto err; |
565 | 542 | ||
566 | /* | 543 | /* |
@@ -569,16 +546,18 @@ retry: | |||
569 | * thread group leader migrates. It's possible that mm is not | 546 | * thread group leader migrates. It's possible that mm is not |
570 | * set, if so charge the init_mm (happens for pagecache usage). | 547 | * set, if so charge the init_mm (happens for pagecache usage). |
571 | */ | 548 | */ |
572 | if (!mm) | 549 | if (likely(!memcg)) { |
573 | mm = &init_mm; | 550 | rcu_read_lock(); |
574 | 551 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | |
575 | rcu_read_lock(); | 552 | /* |
576 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 553 | * For every charge from the cgroup, increment reference count |
577 | /* | 554 | */ |
578 | * For every charge from the cgroup, increment reference count | 555 | css_get(&mem->css); |
579 | */ | 556 | rcu_read_unlock(); |
580 | css_get(&mem->css); | 557 | } else { |
581 | rcu_read_unlock(); | 558 | mem = memcg; |
559 | css_get(&memcg->css); | ||
560 | } | ||
582 | 561 | ||
583 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 562 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
584 | if (!(gfp_mask & __GFP_WAIT)) | 563 | if (!(gfp_mask & __GFP_WAIT)) |
@@ -603,25 +582,24 @@ retry: | |||
603 | } | 582 | } |
604 | } | 583 | } |
605 | 584 | ||
606 | pc->ref_cnt = 1; | ||
607 | pc->mem_cgroup = mem; | 585 | pc->mem_cgroup = mem; |
608 | pc->page = page; | 586 | pc->page = page; |
609 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 587 | /* |
588 | * If a page is accounted as a page cache, insert to inactive list. | ||
589 | * If anon, insert to active list. | ||
590 | */ | ||
610 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 591 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
611 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | 592 | pc->flags = PAGE_CGROUP_FLAG_CACHE; |
593 | else | ||
594 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | ||
612 | 595 | ||
613 | lock_page_cgroup(page); | 596 | lock_page_cgroup(page); |
614 | if (page_get_page_cgroup(page)) { | 597 | if (unlikely(page_get_page_cgroup(page))) { |
615 | unlock_page_cgroup(page); | 598 | unlock_page_cgroup(page); |
616 | /* | ||
617 | * Another charge has been added to this page already. | ||
618 | * We take lock_page_cgroup(page) again and read | ||
619 | * page->cgroup, increment refcnt.... just retry is OK. | ||
620 | */ | ||
621 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 599 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
622 | css_put(&mem->css); | 600 | css_put(&mem->css); |
623 | kmem_cache_free(page_cgroup_cache, pc); | 601 | kmem_cache_free(page_cgroup_cache, pc); |
624 | goto retry; | 602 | goto done; |
625 | } | 603 | } |
626 | page_assign_page_cgroup(page, pc); | 604 | page_assign_page_cgroup(page, pc); |
627 | 605 | ||
@@ -642,24 +620,65 @@ err: | |||
642 | 620 | ||
643 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | 621 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
644 | { | 622 | { |
623 | if (mem_cgroup_subsys.disabled) | ||
624 | return 0; | ||
625 | |||
626 | /* | ||
627 | * If already mapped, we don't have to account. | ||
628 | * If page cache, page->mapping has address_space. | ||
629 | * But page->mapping may have out-of-use anon_vma pointer, | ||
630 | * detecit it by PageAnon() check. newly-mapped-anon's page->mapping | ||
631 | * is NULL. | ||
632 | */ | ||
633 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | ||
634 | return 0; | ||
635 | if (unlikely(!mm)) | ||
636 | mm = &init_mm; | ||
645 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 637 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
646 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 638 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
647 | } | 639 | } |
648 | 640 | ||
649 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 641 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
650 | gfp_t gfp_mask) | 642 | gfp_t gfp_mask) |
651 | { | 643 | { |
652 | if (!mm) | 644 | if (mem_cgroup_subsys.disabled) |
645 | return 0; | ||
646 | |||
647 | /* | ||
648 | * Corner case handling. This is called from add_to_page_cache() | ||
649 | * in usual. But some FS (shmem) precharges this page before calling it | ||
650 | * and call add_to_page_cache() with GFP_NOWAIT. | ||
651 | * | ||
652 | * For GFP_NOWAIT case, the page may be pre-charged before calling | ||
653 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | ||
654 | * charge twice. (It works but has to pay a bit larger cost.) | ||
655 | */ | ||
656 | if (!(gfp_mask & __GFP_WAIT)) { | ||
657 | struct page_cgroup *pc; | ||
658 | |||
659 | lock_page_cgroup(page); | ||
660 | pc = page_get_page_cgroup(page); | ||
661 | if (pc) { | ||
662 | VM_BUG_ON(pc->page != page); | ||
663 | VM_BUG_ON(!pc->mem_cgroup); | ||
664 | unlock_page_cgroup(page); | ||
665 | return 0; | ||
666 | } | ||
667 | unlock_page_cgroup(page); | ||
668 | } | ||
669 | |||
670 | if (unlikely(!mm)) | ||
653 | mm = &init_mm; | 671 | mm = &init_mm; |
672 | |||
654 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 673 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
655 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 674 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
656 | } | 675 | } |
657 | 676 | ||
658 | /* | 677 | /* |
659 | * Uncharging is always a welcome operation, we never complain, simply | 678 | * uncharge if !page_mapped(page) |
660 | * uncharge. | ||
661 | */ | 679 | */ |
662 | void mem_cgroup_uncharge_page(struct page *page) | 680 | static void |
681 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | ||
663 | { | 682 | { |
664 | struct page_cgroup *pc; | 683 | struct page_cgroup *pc; |
665 | struct mem_cgroup *mem; | 684 | struct mem_cgroup *mem; |
@@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
674 | */ | 693 | */ |
675 | lock_page_cgroup(page); | 694 | lock_page_cgroup(page); |
676 | pc = page_get_page_cgroup(page); | 695 | pc = page_get_page_cgroup(page); |
677 | if (!pc) | 696 | if (unlikely(!pc)) |
678 | goto unlock; | 697 | goto unlock; |
679 | 698 | ||
680 | VM_BUG_ON(pc->page != page); | 699 | VM_BUG_ON(pc->page != page); |
681 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
682 | 700 | ||
683 | if (--(pc->ref_cnt) == 0) { | 701 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
684 | mz = page_cgroup_zoneinfo(pc); | 702 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) |
685 | spin_lock_irqsave(&mz->lru_lock, flags); | 703 | || page_mapped(page))) |
686 | __mem_cgroup_remove_list(mz, pc); | 704 | goto unlock; |
687 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
688 | 705 | ||
689 | page_assign_page_cgroup(page, NULL); | 706 | mz = page_cgroup_zoneinfo(pc); |
690 | unlock_page_cgroup(page); | 707 | spin_lock_irqsave(&mz->lru_lock, flags); |
708 | __mem_cgroup_remove_list(mz, pc); | ||
709 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
691 | 710 | ||
692 | mem = pc->mem_cgroup; | 711 | page_assign_page_cgroup(page, NULL); |
693 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 712 | unlock_page_cgroup(page); |
694 | css_put(&mem->css); | ||
695 | 713 | ||
696 | kmem_cache_free(page_cgroup_cache, pc); | 714 | mem = pc->mem_cgroup; |
697 | return; | 715 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
698 | } | 716 | css_put(&mem->css); |
699 | 717 | ||
718 | kmem_cache_free(page_cgroup_cache, pc); | ||
719 | return; | ||
700 | unlock: | 720 | unlock: |
701 | unlock_page_cgroup(page); | 721 | unlock_page_cgroup(page); |
702 | } | 722 | } |
703 | 723 | ||
724 | void mem_cgroup_uncharge_page(struct page *page) | ||
725 | { | ||
726 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | ||
727 | } | ||
728 | |||
729 | void mem_cgroup_uncharge_cache_page(struct page *page) | ||
730 | { | ||
731 | VM_BUG_ON(page_mapped(page)); | ||
732 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | ||
733 | } | ||
734 | |||
704 | /* | 735 | /* |
705 | * Returns non-zero if a page (under migration) has valid page_cgroup member. | 736 | * Before starting migration, account against new page. |
706 | * Refcnt of page_cgroup is incremented. | ||
707 | */ | 737 | */ |
708 | int mem_cgroup_prepare_migration(struct page *page) | 738 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) |
709 | { | 739 | { |
710 | struct page_cgroup *pc; | 740 | struct page_cgroup *pc; |
741 | struct mem_cgroup *mem = NULL; | ||
742 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
743 | int ret = 0; | ||
711 | 744 | ||
712 | if (mem_cgroup_subsys.disabled) | 745 | if (mem_cgroup_subsys.disabled) |
713 | return 0; | 746 | return 0; |
714 | 747 | ||
715 | lock_page_cgroup(page); | 748 | lock_page_cgroup(page); |
716 | pc = page_get_page_cgroup(page); | 749 | pc = page_get_page_cgroup(page); |
717 | if (pc) | 750 | if (pc) { |
718 | pc->ref_cnt++; | 751 | mem = pc->mem_cgroup; |
752 | css_get(&mem->css); | ||
753 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | ||
754 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
755 | } | ||
719 | unlock_page_cgroup(page); | 756 | unlock_page_cgroup(page); |
720 | return pc != NULL; | 757 | if (mem) { |
758 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | ||
759 | ctype, mem); | ||
760 | css_put(&mem->css); | ||
761 | } | ||
762 | return ret; | ||
721 | } | 763 | } |
722 | 764 | ||
723 | void mem_cgroup_end_migration(struct page *page) | 765 | /* remove redundant charge if migration failed*/ |
766 | void mem_cgroup_end_migration(struct page *newpage) | ||
724 | { | 767 | { |
725 | mem_cgroup_uncharge_page(page); | 768 | /* |
769 | * At success, page->mapping is not NULL. | ||
770 | * special rollback care is necessary when | ||
771 | * 1. at migration failure. (newpage->mapping is cleared in this case) | ||
772 | * 2. the newpage was moved but not remapped again because the task | ||
773 | * exits and the newpage is obsolete. In this case, the new page | ||
774 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() | ||
775 | * always for avoiding mess. The page_cgroup will be removed if | ||
776 | * unnecessary. File cache pages is still on radix-tree. Don't | ||
777 | * care it. | ||
778 | */ | ||
779 | if (!newpage->mapping) | ||
780 | __mem_cgroup_uncharge_common(newpage, | ||
781 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
782 | else if (PageAnon(newpage)) | ||
783 | mem_cgroup_uncharge_page(newpage); | ||
726 | } | 784 | } |
727 | 785 | ||
728 | /* | 786 | /* |
729 | * We know both *page* and *newpage* are now not-on-LRU and PG_locked. | 787 | * A call to try to shrink memory usage under specified resource controller. |
730 | * And no race with uncharge() routines because page_cgroup for *page* | 788 | * This is typically used for page reclaiming for shmem for reducing side |
731 | * has extra one reference by mem_cgroup_prepare_migration. | 789 | * effect of page allocation from shmem, which is used by some mem_cgroup. |
732 | */ | 790 | */ |
733 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) | 791 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) |
734 | { | 792 | { |
735 | struct page_cgroup *pc; | 793 | struct mem_cgroup *mem; |
736 | struct mem_cgroup_per_zone *mz; | 794 | int progress = 0; |
737 | unsigned long flags; | 795 | int retry = MEM_CGROUP_RECLAIM_RETRIES; |
738 | 796 | ||
739 | lock_page_cgroup(page); | 797 | if (mem_cgroup_subsys.disabled) |
740 | pc = page_get_page_cgroup(page); | 798 | return 0; |
741 | if (!pc) { | ||
742 | unlock_page_cgroup(page); | ||
743 | return; | ||
744 | } | ||
745 | 799 | ||
746 | mz = page_cgroup_zoneinfo(pc); | 800 | rcu_read_lock(); |
747 | spin_lock_irqsave(&mz->lru_lock, flags); | 801 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
748 | __mem_cgroup_remove_list(mz, pc); | 802 | css_get(&mem->css); |
749 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 803 | rcu_read_unlock(); |
750 | 804 | ||
751 | page_assign_page_cgroup(page, NULL); | 805 | do { |
752 | unlock_page_cgroup(page); | 806 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); |
807 | } while (!progress && --retry); | ||
753 | 808 | ||
754 | pc->page = newpage; | 809 | css_put(&mem->css); |
755 | lock_page_cgroup(newpage); | 810 | if (!retry) |
756 | page_assign_page_cgroup(newpage, pc); | 811 | return -ENOMEM; |
812 | return 0; | ||
813 | } | ||
757 | 814 | ||
758 | mz = page_cgroup_zoneinfo(pc); | 815 | int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) |
759 | spin_lock_irqsave(&mz->lru_lock, flags); | 816 | { |
760 | __mem_cgroup_add_list(mz, pc); | 817 | |
761 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 818 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; |
819 | int progress; | ||
820 | int ret = 0; | ||
762 | 821 | ||
763 | unlock_page_cgroup(newpage); | 822 | while (res_counter_set_limit(&memcg->res, val)) { |
823 | if (signal_pending(current)) { | ||
824 | ret = -EINTR; | ||
825 | break; | ||
826 | } | ||
827 | if (!retry_count) { | ||
828 | ret = -EBUSY; | ||
829 | break; | ||
830 | } | ||
831 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); | ||
832 | if (!progress) | ||
833 | retry_count--; | ||
834 | } | ||
835 | return ret; | ||
764 | } | 836 | } |
765 | 837 | ||
838 | |||
766 | /* | 839 | /* |
767 | * This routine traverse page_cgroup in given list and drop them all. | 840 | * This routine traverse page_cgroup in given list and drop them all. |
768 | * This routine ignores page_cgroup->ref_cnt. | ||
769 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 841 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
770 | */ | 842 | */ |
771 | #define FORCE_UNCHARGE_BATCH (128) | 843 | #define FORCE_UNCHARGE_BATCH (128) |
@@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
790 | page = pc->page; | 862 | page = pc->page; |
791 | get_page(page); | 863 | get_page(page); |
792 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 864 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
793 | mem_cgroup_uncharge_page(page); | 865 | /* |
794 | put_page(page); | 866 | * Check if this page is on LRU. !LRU page can be found |
795 | if (--count <= 0) { | 867 | * if it's under page migration. |
796 | count = FORCE_UNCHARGE_BATCH; | 868 | */ |
869 | if (PageLRU(page)) { | ||
870 | __mem_cgroup_uncharge_common(page, | ||
871 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
872 | put_page(page); | ||
873 | if (--count <= 0) { | ||
874 | count = FORCE_UNCHARGE_BATCH; | ||
875 | cond_resched(); | ||
876 | } | ||
877 | } else | ||
797 | cond_resched(); | 878 | cond_resched(); |
798 | } | ||
799 | spin_lock_irqsave(&mz->lru_lock, flags); | 879 | spin_lock_irqsave(&mz->lru_lock, flags); |
800 | } | 880 | } |
801 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 881 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
@@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
810 | int ret = -EBUSY; | 890 | int ret = -EBUSY; |
811 | int node, zid; | 891 | int node, zid; |
812 | 892 | ||
813 | if (mem_cgroup_subsys.disabled) | ||
814 | return 0; | ||
815 | |||
816 | css_get(&mem->css); | 893 | css_get(&mem->css); |
817 | /* | 894 | /* |
818 | * page reclaim code (kswapd etc..) will move pages between | 895 | * page reclaim code (kswapd etc..) will move pages between |
@@ -838,32 +915,34 @@ out: | |||
838 | return ret; | 915 | return ret; |
839 | } | 916 | } |
840 | 917 | ||
841 | static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | ||
842 | { | ||
843 | *tmp = memparse(buf, &buf); | ||
844 | if (*buf != '\0') | ||
845 | return -EINVAL; | ||
846 | |||
847 | /* | ||
848 | * Round up the value to the closest page size | ||
849 | */ | ||
850 | *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; | ||
851 | return 0; | ||
852 | } | ||
853 | |||
854 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 918 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
855 | { | 919 | { |
856 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, | 920 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, |
857 | cft->private); | 921 | cft->private); |
858 | } | 922 | } |
859 | 923 | /* | |
860 | static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | 924 | * The user of this function is... |
861 | struct file *file, const char __user *userbuf, | 925 | * RES_LIMIT. |
862 | size_t nbytes, loff_t *ppos) | 926 | */ |
927 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
928 | const char *buffer) | ||
863 | { | 929 | { |
864 | return res_counter_write(&mem_cgroup_from_cont(cont)->res, | 930 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
865 | cft->private, userbuf, nbytes, ppos, | 931 | unsigned long long val; |
866 | mem_cgroup_write_strategy); | 932 | int ret; |
933 | |||
934 | switch (cft->private) { | ||
935 | case RES_LIMIT: | ||
936 | /* This function does all necessary parse...reuse it */ | ||
937 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
938 | if (!ret) | ||
939 | ret = mem_cgroup_resize_limit(memcg, val); | ||
940 | break; | ||
941 | default: | ||
942 | ret = -EINVAL; /* should be BUG() ? */ | ||
943 | break; | ||
944 | } | ||
945 | return ret; | ||
867 | } | 946 | } |
868 | 947 | ||
869 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 948 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
@@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = { | |||
940 | { | 1019 | { |
941 | .name = "limit_in_bytes", | 1020 | .name = "limit_in_bytes", |
942 | .private = RES_LIMIT, | 1021 | .private = RES_LIMIT, |
943 | .write = mem_cgroup_write, | 1022 | .write_string = mem_cgroup_write, |
944 | .read_u64 = mem_cgroup_read, | 1023 | .read_u64 = mem_cgroup_read, |
945 | }, | 1024 | }, |
946 | { | 1025 | { |
@@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, | |||
1070 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 1149 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
1071 | struct cgroup *cont) | 1150 | struct cgroup *cont) |
1072 | { | 1151 | { |
1073 | if (mem_cgroup_subsys.disabled) | ||
1074 | return 0; | ||
1075 | return cgroup_add_files(cont, ss, mem_cgroup_files, | 1152 | return cgroup_add_files(cont, ss, mem_cgroup_files, |
1076 | ARRAY_SIZE(mem_cgroup_files)); | 1153 | ARRAY_SIZE(mem_cgroup_files)); |
1077 | } | 1154 | } |
@@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
1084 | struct mm_struct *mm; | 1161 | struct mm_struct *mm; |
1085 | struct mem_cgroup *mem, *old_mem; | 1162 | struct mem_cgroup *mem, *old_mem; |
1086 | 1163 | ||
1087 | if (mem_cgroup_subsys.disabled) | ||
1088 | return; | ||
1089 | |||
1090 | mm = get_task_mm(p); | 1164 | mm = get_task_mm(p); |
1091 | if (mm == NULL) | 1165 | if (mm == NULL) |
1092 | return; | 1166 | return; |