diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 385 |
1 files changed, 238 insertions, 147 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e46451e1d9b7..36896f3eb7f5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -35,9 +35,9 @@ | |||
35 | 35 | ||
36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
37 | 37 | ||
38 | struct cgroup_subsys mem_cgroup_subsys; | 38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
39 | static const int MEM_CGROUP_RECLAIM_RETRIES = 5; | 39 | static struct kmem_cache *page_cgroup_cache __read_mostly; |
40 | static struct kmem_cache *page_cgroup_cache; | 40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Statistics for memory cgroup. | 43 | * Statistics for memory cgroup. |
@@ -166,7 +166,6 @@ struct page_cgroup { | |||
166 | struct list_head lru; /* per cgroup LRU list */ | 166 | struct list_head lru; /* per cgroup LRU list */ |
167 | struct page *page; | 167 | struct page *page; |
168 | struct mem_cgroup *mem_cgroup; | 168 | struct mem_cgroup *mem_cgroup; |
169 | int ref_cnt; /* cached, mapped, migrating */ | ||
170 | int flags; | 169 | int flags; |
171 | }; | 170 | }; |
172 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | |||
185 | enum charge_type { | 184 | enum charge_type { |
186 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
187 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | ||
188 | }; | 188 | }; |
189 | 189 | ||
190 | /* | 190 | /* |
@@ -250,6 +250,14 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | |||
250 | 250 | ||
251 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 251 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
252 | { | 252 | { |
253 | /* | ||
254 | * mm_update_next_owner() may clear mm->owner to NULL | ||
255 | * if it races with swapoff, page migration, etc. | ||
256 | * So this can be called with p == NULL. | ||
257 | */ | ||
258 | if (unlikely(!p)) | ||
259 | return NULL; | ||
260 | |||
253 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), | 261 | return container_of(task_subsys_state(p, mem_cgroup_subsys_id), |
254 | struct mem_cgroup, css); | 262 | struct mem_cgroup, css); |
255 | } | 263 | } |
@@ -296,7 +304,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | 304 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
297 | 305 | ||
298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 306 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); |
299 | list_del_init(&pc->lru); | 307 | list_del(&pc->lru); |
300 | } | 308 | } |
301 | 309 | ||
302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 310 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
@@ -354,6 +362,9 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
354 | struct mem_cgroup_per_zone *mz; | 362 | struct mem_cgroup_per_zone *mz; |
355 | unsigned long flags; | 363 | unsigned long flags; |
356 | 364 | ||
365 | if (mem_cgroup_subsys.disabled) | ||
366 | return; | ||
367 | |||
357 | /* | 368 | /* |
358 | * We cannot lock_page_cgroup while holding zone's lru_lock, | 369 | * We cannot lock_page_cgroup while holding zone's lru_lock, |
359 | * because other holders of lock_page_cgroup can be interrupted | 370 | * because other holders of lock_page_cgroup can be interrupted |
@@ -524,7 +535,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
524 | * < 0 if the cgroup is over its limit | 535 | * < 0 if the cgroup is over its limit |
525 | */ | 536 | */ |
526 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 537 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, |
527 | gfp_t gfp_mask, enum charge_type ctype) | 538 | gfp_t gfp_mask, enum charge_type ctype, |
539 | struct mem_cgroup *memcg) | ||
528 | { | 540 | { |
529 | struct mem_cgroup *mem; | 541 | struct mem_cgroup *mem; |
530 | struct page_cgroup *pc; | 542 | struct page_cgroup *pc; |
@@ -532,35 +544,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
532 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 544 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
533 | struct mem_cgroup_per_zone *mz; | 545 | struct mem_cgroup_per_zone *mz; |
534 | 546 | ||
535 | if (mem_cgroup_subsys.disabled) | 547 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); |
536 | return 0; | 548 | if (unlikely(pc == NULL)) |
537 | |||
538 | /* | ||
539 | * Should page_cgroup's go to their own slab? | ||
540 | * One could optimize the performance of the charging routine | ||
541 | * by saving a bit in the page_flags and using it as a lock | ||
542 | * to see if the cgroup page already has a page_cgroup associated | ||
543 | * with it | ||
544 | */ | ||
545 | retry: | ||
546 | lock_page_cgroup(page); | ||
547 | pc = page_get_page_cgroup(page); | ||
548 | /* | ||
549 | * The page_cgroup exists and | ||
550 | * the page has already been accounted. | ||
551 | */ | ||
552 | if (pc) { | ||
553 | VM_BUG_ON(pc->page != page); | ||
554 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
555 | |||
556 | pc->ref_cnt++; | ||
557 | unlock_page_cgroup(page); | ||
558 | goto done; | ||
559 | } | ||
560 | unlock_page_cgroup(page); | ||
561 | |||
562 | pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); | ||
563 | if (pc == NULL) | ||
564 | goto err; | 549 | goto err; |
565 | 550 | ||
566 | /* | 551 | /* |
@@ -569,16 +554,23 @@ retry: | |||
569 | * thread group leader migrates. It's possible that mm is not | 554 | * thread group leader migrates. It's possible that mm is not |
570 | * set, if so charge the init_mm (happens for pagecache usage). | 555 | * set, if so charge the init_mm (happens for pagecache usage). |
571 | */ | 556 | */ |
572 | if (!mm) | 557 | if (likely(!memcg)) { |
573 | mm = &init_mm; | 558 | rcu_read_lock(); |
574 | 559 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | |
575 | rcu_read_lock(); | 560 | if (unlikely(!mem)) { |
576 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 561 | rcu_read_unlock(); |
577 | /* | 562 | kmem_cache_free(page_cgroup_cache, pc); |
578 | * For every charge from the cgroup, increment reference count | 563 | return 0; |
579 | */ | 564 | } |
580 | css_get(&mem->css); | 565 | /* |
581 | rcu_read_unlock(); | 566 | * For every charge from the cgroup, increment reference count |
567 | */ | ||
568 | css_get(&mem->css); | ||
569 | rcu_read_unlock(); | ||
570 | } else { | ||
571 | mem = memcg; | ||
572 | css_get(&memcg->css); | ||
573 | } | ||
582 | 574 | ||
583 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 575 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
584 | if (!(gfp_mask & __GFP_WAIT)) | 576 | if (!(gfp_mask & __GFP_WAIT)) |
@@ -603,25 +595,24 @@ retry: | |||
603 | } | 595 | } |
604 | } | 596 | } |
605 | 597 | ||
606 | pc->ref_cnt = 1; | ||
607 | pc->mem_cgroup = mem; | 598 | pc->mem_cgroup = mem; |
608 | pc->page = page; | 599 | pc->page = page; |
609 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 600 | /* |
601 | * If a page is accounted as a page cache, insert to inactive list. | ||
602 | * If anon, insert to active list. | ||
603 | */ | ||
610 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 604 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
611 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | 605 | pc->flags = PAGE_CGROUP_FLAG_CACHE; |
606 | else | ||
607 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | ||
612 | 608 | ||
613 | lock_page_cgroup(page); | 609 | lock_page_cgroup(page); |
614 | if (page_get_page_cgroup(page)) { | 610 | if (unlikely(page_get_page_cgroup(page))) { |
615 | unlock_page_cgroup(page); | 611 | unlock_page_cgroup(page); |
616 | /* | ||
617 | * Another charge has been added to this page already. | ||
618 | * We take lock_page_cgroup(page) again and read | ||
619 | * page->cgroup, increment refcnt.... just retry is OK. | ||
620 | */ | ||
621 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 612 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
622 | css_put(&mem->css); | 613 | css_put(&mem->css); |
623 | kmem_cache_free(page_cgroup_cache, pc); | 614 | kmem_cache_free(page_cgroup_cache, pc); |
624 | goto retry; | 615 | goto done; |
625 | } | 616 | } |
626 | page_assign_page_cgroup(page, pc); | 617 | page_assign_page_cgroup(page, pc); |
627 | 618 | ||
@@ -642,24 +633,65 @@ err: | |||
642 | 633 | ||
643 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | 634 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
644 | { | 635 | { |
636 | if (mem_cgroup_subsys.disabled) | ||
637 | return 0; | ||
638 | |||
639 | /* | ||
640 | * If already mapped, we don't have to account. | ||
641 | * If page cache, page->mapping has address_space. | ||
642 | * But page->mapping may have out-of-use anon_vma pointer, | ||
643 | * detecit it by PageAnon() check. newly-mapped-anon's page->mapping | ||
644 | * is NULL. | ||
645 | */ | ||
646 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | ||
647 | return 0; | ||
648 | if (unlikely(!mm)) | ||
649 | mm = &init_mm; | ||
645 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 650 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
646 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 651 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
647 | } | 652 | } |
648 | 653 | ||
649 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 654 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
650 | gfp_t gfp_mask) | 655 | gfp_t gfp_mask) |
651 | { | 656 | { |
652 | if (!mm) | 657 | if (mem_cgroup_subsys.disabled) |
658 | return 0; | ||
659 | |||
660 | /* | ||
661 | * Corner case handling. This is called from add_to_page_cache() | ||
662 | * in usual. But some FS (shmem) precharges this page before calling it | ||
663 | * and call add_to_page_cache() with GFP_NOWAIT. | ||
664 | * | ||
665 | * For GFP_NOWAIT case, the page may be pre-charged before calling | ||
666 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | ||
667 | * charge twice. (It works but has to pay a bit larger cost.) | ||
668 | */ | ||
669 | if (!(gfp_mask & __GFP_WAIT)) { | ||
670 | struct page_cgroup *pc; | ||
671 | |||
672 | lock_page_cgroup(page); | ||
673 | pc = page_get_page_cgroup(page); | ||
674 | if (pc) { | ||
675 | VM_BUG_ON(pc->page != page); | ||
676 | VM_BUG_ON(!pc->mem_cgroup); | ||
677 | unlock_page_cgroup(page); | ||
678 | return 0; | ||
679 | } | ||
680 | unlock_page_cgroup(page); | ||
681 | } | ||
682 | |||
683 | if (unlikely(!mm)) | ||
653 | mm = &init_mm; | 684 | mm = &init_mm; |
685 | |||
654 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 686 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
655 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 687 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
656 | } | 688 | } |
657 | 689 | ||
658 | /* | 690 | /* |
659 | * Uncharging is always a welcome operation, we never complain, simply | 691 | * uncharge if !page_mapped(page) |
660 | * uncharge. | ||
661 | */ | 692 | */ |
662 | void mem_cgroup_uncharge_page(struct page *page) | 693 | static void |
694 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | ||
663 | { | 695 | { |
664 | struct page_cgroup *pc; | 696 | struct page_cgroup *pc; |
665 | struct mem_cgroup *mem; | 697 | struct mem_cgroup *mem; |
@@ -674,98 +706,158 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
674 | */ | 706 | */ |
675 | lock_page_cgroup(page); | 707 | lock_page_cgroup(page); |
676 | pc = page_get_page_cgroup(page); | 708 | pc = page_get_page_cgroup(page); |
677 | if (!pc) | 709 | if (unlikely(!pc)) |
678 | goto unlock; | 710 | goto unlock; |
679 | 711 | ||
680 | VM_BUG_ON(pc->page != page); | 712 | VM_BUG_ON(pc->page != page); |
681 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
682 | 713 | ||
683 | if (--(pc->ref_cnt) == 0) { | 714 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
684 | mz = page_cgroup_zoneinfo(pc); | 715 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) |
685 | spin_lock_irqsave(&mz->lru_lock, flags); | 716 | || page_mapped(page))) |
686 | __mem_cgroup_remove_list(mz, pc); | 717 | goto unlock; |
687 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
688 | 718 | ||
689 | page_assign_page_cgroup(page, NULL); | 719 | mz = page_cgroup_zoneinfo(pc); |
690 | unlock_page_cgroup(page); | 720 | spin_lock_irqsave(&mz->lru_lock, flags); |
721 | __mem_cgroup_remove_list(mz, pc); | ||
722 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
691 | 723 | ||
692 | mem = pc->mem_cgroup; | 724 | page_assign_page_cgroup(page, NULL); |
693 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 725 | unlock_page_cgroup(page); |
694 | css_put(&mem->css); | ||
695 | 726 | ||
696 | kmem_cache_free(page_cgroup_cache, pc); | 727 | mem = pc->mem_cgroup; |
697 | return; | 728 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
698 | } | 729 | css_put(&mem->css); |
699 | 730 | ||
731 | kmem_cache_free(page_cgroup_cache, pc); | ||
732 | return; | ||
700 | unlock: | 733 | unlock: |
701 | unlock_page_cgroup(page); | 734 | unlock_page_cgroup(page); |
702 | } | 735 | } |
703 | 736 | ||
737 | void mem_cgroup_uncharge_page(struct page *page) | ||
738 | { | ||
739 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | ||
740 | } | ||
741 | |||
742 | void mem_cgroup_uncharge_cache_page(struct page *page) | ||
743 | { | ||
744 | VM_BUG_ON(page_mapped(page)); | ||
745 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | ||
746 | } | ||
747 | |||
704 | /* | 748 | /* |
705 | * Returns non-zero if a page (under migration) has valid page_cgroup member. | 749 | * Before starting migration, account against new page. |
706 | * Refcnt of page_cgroup is incremented. | ||
707 | */ | 750 | */ |
708 | int mem_cgroup_prepare_migration(struct page *page) | 751 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) |
709 | { | 752 | { |
710 | struct page_cgroup *pc; | 753 | struct page_cgroup *pc; |
754 | struct mem_cgroup *mem = NULL; | ||
755 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
756 | int ret = 0; | ||
711 | 757 | ||
712 | if (mem_cgroup_subsys.disabled) | 758 | if (mem_cgroup_subsys.disabled) |
713 | return 0; | 759 | return 0; |
714 | 760 | ||
715 | lock_page_cgroup(page); | 761 | lock_page_cgroup(page); |
716 | pc = page_get_page_cgroup(page); | 762 | pc = page_get_page_cgroup(page); |
717 | if (pc) | 763 | if (pc) { |
718 | pc->ref_cnt++; | 764 | mem = pc->mem_cgroup; |
765 | css_get(&mem->css); | ||
766 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | ||
767 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
768 | } | ||
719 | unlock_page_cgroup(page); | 769 | unlock_page_cgroup(page); |
720 | return pc != NULL; | 770 | if (mem) { |
771 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | ||
772 | ctype, mem); | ||
773 | css_put(&mem->css); | ||
774 | } | ||
775 | return ret; | ||
721 | } | 776 | } |
722 | 777 | ||
723 | void mem_cgroup_end_migration(struct page *page) | 778 | /* remove redundant charge if migration failed*/ |
779 | void mem_cgroup_end_migration(struct page *newpage) | ||
724 | { | 780 | { |
725 | mem_cgroup_uncharge_page(page); | 781 | /* |
782 | * At success, page->mapping is not NULL. | ||
783 | * special rollback care is necessary when | ||
784 | * 1. at migration failure. (newpage->mapping is cleared in this case) | ||
785 | * 2. the newpage was moved but not remapped again because the task | ||
786 | * exits and the newpage is obsolete. In this case, the new page | ||
787 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() | ||
788 | * always for avoiding mess. The page_cgroup will be removed if | ||
789 | * unnecessary. File cache pages is still on radix-tree. Don't | ||
790 | * care it. | ||
791 | */ | ||
792 | if (!newpage->mapping) | ||
793 | __mem_cgroup_uncharge_common(newpage, | ||
794 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
795 | else if (PageAnon(newpage)) | ||
796 | mem_cgroup_uncharge_page(newpage); | ||
726 | } | 797 | } |
727 | 798 | ||
728 | /* | 799 | /* |
729 | * We know both *page* and *newpage* are now not-on-LRU and PG_locked. | 800 | * A call to try to shrink memory usage under specified resource controller. |
730 | * And no race with uncharge() routines because page_cgroup for *page* | 801 | * This is typically used for page reclaiming for shmem for reducing side |
731 | * has extra one reference by mem_cgroup_prepare_migration. | 802 | * effect of page allocation from shmem, which is used by some mem_cgroup. |
732 | */ | 803 | */ |
733 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) | 804 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) |
734 | { | 805 | { |
735 | struct page_cgroup *pc; | 806 | struct mem_cgroup *mem; |
736 | struct mem_cgroup_per_zone *mz; | 807 | int progress = 0; |
737 | unsigned long flags; | 808 | int retry = MEM_CGROUP_RECLAIM_RETRIES; |
738 | 809 | ||
739 | lock_page_cgroup(page); | 810 | if (mem_cgroup_subsys.disabled) |
740 | pc = page_get_page_cgroup(page); | 811 | return 0; |
741 | if (!pc) { | 812 | if (!mm) |
742 | unlock_page_cgroup(page); | 813 | return 0; |
743 | return; | 814 | |
815 | rcu_read_lock(); | ||
816 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||
817 | if (unlikely(!mem)) { | ||
818 | rcu_read_unlock(); | ||
819 | return 0; | ||
744 | } | 820 | } |
821 | css_get(&mem->css); | ||
822 | rcu_read_unlock(); | ||
745 | 823 | ||
746 | mz = page_cgroup_zoneinfo(pc); | 824 | do { |
747 | spin_lock_irqsave(&mz->lru_lock, flags); | 825 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); |
748 | __mem_cgroup_remove_list(mz, pc); | 826 | progress += res_counter_check_under_limit(&mem->res); |
749 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 827 | } while (!progress && --retry); |
750 | 828 | ||
751 | page_assign_page_cgroup(page, NULL); | 829 | css_put(&mem->css); |
752 | unlock_page_cgroup(page); | 830 | if (!retry) |
831 | return -ENOMEM; | ||
832 | return 0; | ||
833 | } | ||
753 | 834 | ||
754 | pc->page = newpage; | 835 | int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) |
755 | lock_page_cgroup(newpage); | 836 | { |
756 | page_assign_page_cgroup(newpage, pc); | ||
757 | 837 | ||
758 | mz = page_cgroup_zoneinfo(pc); | 838 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; |
759 | spin_lock_irqsave(&mz->lru_lock, flags); | 839 | int progress; |
760 | __mem_cgroup_add_list(mz, pc); | 840 | int ret = 0; |
761 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
762 | 841 | ||
763 | unlock_page_cgroup(newpage); | 842 | while (res_counter_set_limit(&memcg->res, val)) { |
843 | if (signal_pending(current)) { | ||
844 | ret = -EINTR; | ||
845 | break; | ||
846 | } | ||
847 | if (!retry_count) { | ||
848 | ret = -EBUSY; | ||
849 | break; | ||
850 | } | ||
851 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); | ||
852 | if (!progress) | ||
853 | retry_count--; | ||
854 | } | ||
855 | return ret; | ||
764 | } | 856 | } |
765 | 857 | ||
858 | |||
766 | /* | 859 | /* |
767 | * This routine traverse page_cgroup in given list and drop them all. | 860 | * This routine traverse page_cgroup in given list and drop them all. |
768 | * This routine ignores page_cgroup->ref_cnt. | ||
769 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 861 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
770 | */ | 862 | */ |
771 | #define FORCE_UNCHARGE_BATCH (128) | 863 | #define FORCE_UNCHARGE_BATCH (128) |
@@ -790,12 +882,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
790 | page = pc->page; | 882 | page = pc->page; |
791 | get_page(page); | 883 | get_page(page); |
792 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 884 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
793 | mem_cgroup_uncharge_page(page); | 885 | /* |
794 | put_page(page); | 886 | * Check if this page is on LRU. !LRU page can be found |
795 | if (--count <= 0) { | 887 | * if it's under page migration. |
796 | count = FORCE_UNCHARGE_BATCH; | 888 | */ |
889 | if (PageLRU(page)) { | ||
890 | __mem_cgroup_uncharge_common(page, | ||
891 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
892 | put_page(page); | ||
893 | if (--count <= 0) { | ||
894 | count = FORCE_UNCHARGE_BATCH; | ||
895 | cond_resched(); | ||
896 | } | ||
897 | } else | ||
797 | cond_resched(); | 898 | cond_resched(); |
798 | } | ||
799 | spin_lock_irqsave(&mz->lru_lock, flags); | 899 | spin_lock_irqsave(&mz->lru_lock, flags); |
800 | } | 900 | } |
801 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 901 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
@@ -810,9 +910,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
810 | int ret = -EBUSY; | 910 | int ret = -EBUSY; |
811 | int node, zid; | 911 | int node, zid; |
812 | 912 | ||
813 | if (mem_cgroup_subsys.disabled) | ||
814 | return 0; | ||
815 | |||
816 | css_get(&mem->css); | 913 | css_get(&mem->css); |
817 | /* | 914 | /* |
818 | * page reclaim code (kswapd etc..) will move pages between | 915 | * page reclaim code (kswapd etc..) will move pages between |
@@ -838,32 +935,34 @@ out: | |||
838 | return ret; | 935 | return ret; |
839 | } | 936 | } |
840 | 937 | ||
841 | static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | ||
842 | { | ||
843 | *tmp = memparse(buf, &buf); | ||
844 | if (*buf != '\0') | ||
845 | return -EINVAL; | ||
846 | |||
847 | /* | ||
848 | * Round up the value to the closest page size | ||
849 | */ | ||
850 | *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; | ||
851 | return 0; | ||
852 | } | ||
853 | |||
854 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 938 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
855 | { | 939 | { |
856 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, | 940 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, |
857 | cft->private); | 941 | cft->private); |
858 | } | 942 | } |
859 | 943 | /* | |
860 | static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | 944 | * The user of this function is... |
861 | struct file *file, const char __user *userbuf, | 945 | * RES_LIMIT. |
862 | size_t nbytes, loff_t *ppos) | 946 | */ |
947 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
948 | const char *buffer) | ||
863 | { | 949 | { |
864 | return res_counter_write(&mem_cgroup_from_cont(cont)->res, | 950 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
865 | cft->private, userbuf, nbytes, ppos, | 951 | unsigned long long val; |
866 | mem_cgroup_write_strategy); | 952 | int ret; |
953 | |||
954 | switch (cft->private) { | ||
955 | case RES_LIMIT: | ||
956 | /* This function does all necessary parse...reuse it */ | ||
957 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
958 | if (!ret) | ||
959 | ret = mem_cgroup_resize_limit(memcg, val); | ||
960 | break; | ||
961 | default: | ||
962 | ret = -EINVAL; /* should be BUG() ? */ | ||
963 | break; | ||
964 | } | ||
965 | return ret; | ||
867 | } | 966 | } |
868 | 967 | ||
869 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 968 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
@@ -940,7 +1039,7 @@ static struct cftype mem_cgroup_files[] = { | |||
940 | { | 1039 | { |
941 | .name = "limit_in_bytes", | 1040 | .name = "limit_in_bytes", |
942 | .private = RES_LIMIT, | 1041 | .private = RES_LIMIT, |
943 | .write = mem_cgroup_write, | 1042 | .write_string = mem_cgroup_write, |
944 | .read_u64 = mem_cgroup_read, | 1043 | .read_u64 = mem_cgroup_read, |
945 | }, | 1044 | }, |
946 | { | 1045 | { |
@@ -1070,8 +1169,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, | |||
1070 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 1169 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
1071 | struct cgroup *cont) | 1170 | struct cgroup *cont) |
1072 | { | 1171 | { |
1073 | if (mem_cgroup_subsys.disabled) | ||
1074 | return 0; | ||
1075 | return cgroup_add_files(cont, ss, mem_cgroup_files, | 1172 | return cgroup_add_files(cont, ss, mem_cgroup_files, |
1076 | ARRAY_SIZE(mem_cgroup_files)); | 1173 | ARRAY_SIZE(mem_cgroup_files)); |
1077 | } | 1174 | } |
@@ -1084,9 +1181,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
1084 | struct mm_struct *mm; | 1181 | struct mm_struct *mm; |
1085 | struct mem_cgroup *mem, *old_mem; | 1182 | struct mem_cgroup *mem, *old_mem; |
1086 | 1183 | ||
1087 | if (mem_cgroup_subsys.disabled) | ||
1088 | return; | ||
1089 | |||
1090 | mm = get_task_mm(p); | 1184 | mm = get_task_mm(p); |
1091 | if (mm == NULL) | 1185 | if (mm == NULL) |
1092 | return; | 1186 | return; |
@@ -1094,9 +1188,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
1094 | mem = mem_cgroup_from_cont(cont); | 1188 | mem = mem_cgroup_from_cont(cont); |
1095 | old_mem = mem_cgroup_from_cont(old_cont); | 1189 | old_mem = mem_cgroup_from_cont(old_cont); |
1096 | 1190 | ||
1097 | if (mem == old_mem) | ||
1098 | goto out; | ||
1099 | |||
1100 | /* | 1191 | /* |
1101 | * Only thread group leaders are allowed to migrate, the mm_struct is | 1192 | * Only thread group leaders are allowed to migrate, the mm_struct is |
1102 | * in effect owned by the leader | 1193 | * in effect owned by the leader |