diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
| commit | 8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch) | |
| tree | 8129b5907161bc6ae26deb3645ce1e280c5e1f51 /mm/memcontrol.c | |
| parent | b2139aa0eec330c711c5a279db361e5ef1178e78 (diff) | |
| parent | 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff) | |
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts:
include/asm-x86/dma-mapping.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 369 |
1 files changed, 221 insertions, 148 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e46451e1d9b7..0f1f7a7374ba 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -35,9 +35,9 @@ | |||
| 35 | 35 | ||
| 36 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
| 37 | 37 | ||
| 38 | struct cgroup_subsys mem_cgroup_subsys; | 38 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
| 39 | static const int MEM_CGROUP_RECLAIM_RETRIES = 5; | 39 | static struct kmem_cache *page_cgroup_cache __read_mostly; |
| 40 | static struct kmem_cache *page_cgroup_cache; | 40 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
| 41 | 41 | ||
| 42 | /* | 42 | /* |
| 43 | * Statistics for memory cgroup. | 43 | * Statistics for memory cgroup. |
| @@ -166,7 +166,6 @@ struct page_cgroup { | |||
| 166 | struct list_head lru; /* per cgroup LRU list */ | 166 | struct list_head lru; /* per cgroup LRU list */ |
| 167 | struct page *page; | 167 | struct page *page; |
| 168 | struct mem_cgroup *mem_cgroup; | 168 | struct mem_cgroup *mem_cgroup; |
| 169 | int ref_cnt; /* cached, mapped, migrating */ | ||
| 170 | int flags; | 169 | int flags; |
| 171 | }; | 170 | }; |
| 172 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 171 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
| @@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) | |||
| 185 | enum charge_type { | 184 | enum charge_type { |
| 186 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 185 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
| 187 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 186 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
| 187 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | ||
| 188 | }; | 188 | }; |
| 189 | 189 | ||
| 190 | /* | 190 | /* |
| @@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, | |||
| 296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; | 296 | MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1; |
| 297 | 297 | ||
| 298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); | 298 | mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false); |
| 299 | list_del_init(&pc->lru); | 299 | list_del(&pc->lru); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, | 302 | static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, |
| @@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active) | |||
| 354 | struct mem_cgroup_per_zone *mz; | 354 | struct mem_cgroup_per_zone *mz; |
| 355 | unsigned long flags; | 355 | unsigned long flags; |
| 356 | 356 | ||
| 357 | if (mem_cgroup_subsys.disabled) | ||
| 358 | return; | ||
| 359 | |||
| 357 | /* | 360 | /* |
| 358 | * We cannot lock_page_cgroup while holding zone's lru_lock, | 361 | * We cannot lock_page_cgroup while holding zone's lru_lock, |
| 359 | * because other holders of lock_page_cgroup can be interrupted | 362 | * because other holders of lock_page_cgroup can be interrupted |
| @@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
| 524 | * < 0 if the cgroup is over its limit | 527 | * < 0 if the cgroup is over its limit |
| 525 | */ | 528 | */ |
| 526 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 529 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, |
| 527 | gfp_t gfp_mask, enum charge_type ctype) | 530 | gfp_t gfp_mask, enum charge_type ctype, |
| 531 | struct mem_cgroup *memcg) | ||
| 528 | { | 532 | { |
| 529 | struct mem_cgroup *mem; | 533 | struct mem_cgroup *mem; |
| 530 | struct page_cgroup *pc; | 534 | struct page_cgroup *pc; |
| @@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
| 532 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 536 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
| 533 | struct mem_cgroup_per_zone *mz; | 537 | struct mem_cgroup_per_zone *mz; |
| 534 | 538 | ||
| 535 | if (mem_cgroup_subsys.disabled) | 539 | pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask); |
| 536 | return 0; | 540 | if (unlikely(pc == NULL)) |
| 537 | |||
| 538 | /* | ||
| 539 | * Should page_cgroup's go to their own slab? | ||
| 540 | * One could optimize the performance of the charging routine | ||
| 541 | * by saving a bit in the page_flags and using it as a lock | ||
| 542 | * to see if the cgroup page already has a page_cgroup associated | ||
| 543 | * with it | ||
| 544 | */ | ||
| 545 | retry: | ||
| 546 | lock_page_cgroup(page); | ||
| 547 | pc = page_get_page_cgroup(page); | ||
| 548 | /* | ||
| 549 | * The page_cgroup exists and | ||
| 550 | * the page has already been accounted. | ||
| 551 | */ | ||
| 552 | if (pc) { | ||
| 553 | VM_BUG_ON(pc->page != page); | ||
| 554 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
| 555 | |||
| 556 | pc->ref_cnt++; | ||
| 557 | unlock_page_cgroup(page); | ||
| 558 | goto done; | ||
| 559 | } | ||
| 560 | unlock_page_cgroup(page); | ||
| 561 | |||
| 562 | pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); | ||
| 563 | if (pc == NULL) | ||
| 564 | goto err; | 541 | goto err; |
| 565 | 542 | ||
| 566 | /* | 543 | /* |
| @@ -569,16 +546,18 @@ retry: | |||
| 569 | * thread group leader migrates. It's possible that mm is not | 546 | * thread group leader migrates. It's possible that mm is not |
| 570 | * set, if so charge the init_mm (happens for pagecache usage). | 547 | * set, if so charge the init_mm (happens for pagecache usage). |
| 571 | */ | 548 | */ |
| 572 | if (!mm) | 549 | if (likely(!memcg)) { |
| 573 | mm = &init_mm; | 550 | rcu_read_lock(); |
| 574 | 551 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | |
| 575 | rcu_read_lock(); | 552 | /* |
| 576 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 553 | * For every charge from the cgroup, increment reference count |
| 577 | /* | 554 | */ |
| 578 | * For every charge from the cgroup, increment reference count | 555 | css_get(&mem->css); |
| 579 | */ | 556 | rcu_read_unlock(); |
| 580 | css_get(&mem->css); | 557 | } else { |
| 581 | rcu_read_unlock(); | 558 | mem = memcg; |
| 559 | css_get(&memcg->css); | ||
| 560 | } | ||
| 582 | 561 | ||
| 583 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 562 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
| 584 | if (!(gfp_mask & __GFP_WAIT)) | 563 | if (!(gfp_mask & __GFP_WAIT)) |
| @@ -603,25 +582,24 @@ retry: | |||
| 603 | } | 582 | } |
| 604 | } | 583 | } |
| 605 | 584 | ||
| 606 | pc->ref_cnt = 1; | ||
| 607 | pc->mem_cgroup = mem; | 585 | pc->mem_cgroup = mem; |
| 608 | pc->page = page; | 586 | pc->page = page; |
| 609 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 587 | /* |
| 588 | * If a page is accounted as a page cache, insert to inactive list. | ||
| 589 | * If anon, insert to active list. | ||
| 590 | */ | ||
| 610 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) | 591 | if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) |
| 611 | pc->flags = PAGE_CGROUP_FLAG_CACHE; | 592 | pc->flags = PAGE_CGROUP_FLAG_CACHE; |
| 593 | else | ||
| 594 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | ||
| 612 | 595 | ||
| 613 | lock_page_cgroup(page); | 596 | lock_page_cgroup(page); |
| 614 | if (page_get_page_cgroup(page)) { | 597 | if (unlikely(page_get_page_cgroup(page))) { |
| 615 | unlock_page_cgroup(page); | 598 | unlock_page_cgroup(page); |
| 616 | /* | ||
| 617 | * Another charge has been added to this page already. | ||
| 618 | * We take lock_page_cgroup(page) again and read | ||
| 619 | * page->cgroup, increment refcnt.... just retry is OK. | ||
| 620 | */ | ||
| 621 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 599 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 622 | css_put(&mem->css); | 600 | css_put(&mem->css); |
| 623 | kmem_cache_free(page_cgroup_cache, pc); | 601 | kmem_cache_free(page_cgroup_cache, pc); |
| 624 | goto retry; | 602 | goto done; |
| 625 | } | 603 | } |
| 626 | page_assign_page_cgroup(page, pc); | 604 | page_assign_page_cgroup(page, pc); |
| 627 | 605 | ||
| @@ -642,24 +620,65 @@ err: | |||
| 642 | 620 | ||
| 643 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | 621 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
| 644 | { | 622 | { |
| 623 | if (mem_cgroup_subsys.disabled) | ||
| 624 | return 0; | ||
| 625 | |||
| 626 | /* | ||
| 627 | * If already mapped, we don't have to account. | ||
| 628 | * If page cache, page->mapping has address_space. | ||
| 629 | * But page->mapping may have out-of-use anon_vma pointer, | ||
| 630 | * detecit it by PageAnon() check. newly-mapped-anon's page->mapping | ||
| 631 | * is NULL. | ||
| 632 | */ | ||
| 633 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | ||
| 634 | return 0; | ||
| 635 | if (unlikely(!mm)) | ||
| 636 | mm = &init_mm; | ||
| 645 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 637 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
| 646 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 638 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
| 647 | } | 639 | } |
| 648 | 640 | ||
| 649 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 641 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
| 650 | gfp_t gfp_mask) | 642 | gfp_t gfp_mask) |
| 651 | { | 643 | { |
| 652 | if (!mm) | 644 | if (mem_cgroup_subsys.disabled) |
| 645 | return 0; | ||
| 646 | |||
| 647 | /* | ||
| 648 | * Corner case handling. This is called from add_to_page_cache() | ||
| 649 | * in usual. But some FS (shmem) precharges this page before calling it | ||
| 650 | * and call add_to_page_cache() with GFP_NOWAIT. | ||
| 651 | * | ||
| 652 | * For GFP_NOWAIT case, the page may be pre-charged before calling | ||
| 653 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | ||
| 654 | * charge twice. (It works but has to pay a bit larger cost.) | ||
| 655 | */ | ||
| 656 | if (!(gfp_mask & __GFP_WAIT)) { | ||
| 657 | struct page_cgroup *pc; | ||
| 658 | |||
| 659 | lock_page_cgroup(page); | ||
| 660 | pc = page_get_page_cgroup(page); | ||
| 661 | if (pc) { | ||
| 662 | VM_BUG_ON(pc->page != page); | ||
| 663 | VM_BUG_ON(!pc->mem_cgroup); | ||
| 664 | unlock_page_cgroup(page); | ||
| 665 | return 0; | ||
| 666 | } | ||
| 667 | unlock_page_cgroup(page); | ||
| 668 | } | ||
| 669 | |||
| 670 | if (unlikely(!mm)) | ||
| 653 | mm = &init_mm; | 671 | mm = &init_mm; |
| 672 | |||
| 654 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 673 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
| 655 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 674 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
| 656 | } | 675 | } |
| 657 | 676 | ||
| 658 | /* | 677 | /* |
| 659 | * Uncharging is always a welcome operation, we never complain, simply | 678 | * uncharge if !page_mapped(page) |
| 660 | * uncharge. | ||
| 661 | */ | 679 | */ |
| 662 | void mem_cgroup_uncharge_page(struct page *page) | 680 | static void |
| 681 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | ||
| 663 | { | 682 | { |
| 664 | struct page_cgroup *pc; | 683 | struct page_cgroup *pc; |
| 665 | struct mem_cgroup *mem; | 684 | struct mem_cgroup *mem; |
| @@ -674,98 +693,153 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
| 674 | */ | 693 | */ |
| 675 | lock_page_cgroup(page); | 694 | lock_page_cgroup(page); |
| 676 | pc = page_get_page_cgroup(page); | 695 | pc = page_get_page_cgroup(page); |
| 677 | if (!pc) | 696 | if (unlikely(!pc)) |
| 678 | goto unlock; | 697 | goto unlock; |
| 679 | 698 | ||
| 680 | VM_BUG_ON(pc->page != page); | 699 | VM_BUG_ON(pc->page != page); |
| 681 | VM_BUG_ON(pc->ref_cnt <= 0); | ||
| 682 | 700 | ||
| 683 | if (--(pc->ref_cnt) == 0) { | 701 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) |
| 684 | mz = page_cgroup_zoneinfo(pc); | 702 | && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) |
| 685 | spin_lock_irqsave(&mz->lru_lock, flags); | 703 | || page_mapped(page))) |
| 686 | __mem_cgroup_remove_list(mz, pc); | 704 | goto unlock; |
| 687 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
| 688 | 705 | ||
| 689 | page_assign_page_cgroup(page, NULL); | 706 | mz = page_cgroup_zoneinfo(pc); |
| 690 | unlock_page_cgroup(page); | 707 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 708 | __mem_cgroup_remove_list(mz, pc); | ||
| 709 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
| 691 | 710 | ||
| 692 | mem = pc->mem_cgroup; | 711 | page_assign_page_cgroup(page, NULL); |
| 693 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 712 | unlock_page_cgroup(page); |
| 694 | css_put(&mem->css); | ||
| 695 | 713 | ||
| 696 | kmem_cache_free(page_cgroup_cache, pc); | 714 | mem = pc->mem_cgroup; |
| 697 | return; | 715 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 698 | } | 716 | css_put(&mem->css); |
| 699 | 717 | ||
| 718 | kmem_cache_free(page_cgroup_cache, pc); | ||
| 719 | return; | ||
| 700 | unlock: | 720 | unlock: |
| 701 | unlock_page_cgroup(page); | 721 | unlock_page_cgroup(page); |
| 702 | } | 722 | } |
| 703 | 723 | ||
| 724 | void mem_cgroup_uncharge_page(struct page *page) | ||
| 725 | { | ||
| 726 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); | ||
| 727 | } | ||
| 728 | |||
| 729 | void mem_cgroup_uncharge_cache_page(struct page *page) | ||
| 730 | { | ||
| 731 | VM_BUG_ON(page_mapped(page)); | ||
| 732 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | ||
| 733 | } | ||
| 734 | |||
| 704 | /* | 735 | /* |
| 705 | * Returns non-zero if a page (under migration) has valid page_cgroup member. | 736 | * Before starting migration, account against new page. |
| 706 | * Refcnt of page_cgroup is incremented. | ||
| 707 | */ | 737 | */ |
| 708 | int mem_cgroup_prepare_migration(struct page *page) | 738 | int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) |
| 709 | { | 739 | { |
| 710 | struct page_cgroup *pc; | 740 | struct page_cgroup *pc; |
| 741 | struct mem_cgroup *mem = NULL; | ||
| 742 | enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED; | ||
| 743 | int ret = 0; | ||
| 711 | 744 | ||
| 712 | if (mem_cgroup_subsys.disabled) | 745 | if (mem_cgroup_subsys.disabled) |
| 713 | return 0; | 746 | return 0; |
| 714 | 747 | ||
| 715 | lock_page_cgroup(page); | 748 | lock_page_cgroup(page); |
| 716 | pc = page_get_page_cgroup(page); | 749 | pc = page_get_page_cgroup(page); |
| 717 | if (pc) | 750 | if (pc) { |
| 718 | pc->ref_cnt++; | 751 | mem = pc->mem_cgroup; |
| 752 | css_get(&mem->css); | ||
| 753 | if (pc->flags & PAGE_CGROUP_FLAG_CACHE) | ||
| 754 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
| 755 | } | ||
| 719 | unlock_page_cgroup(page); | 756 | unlock_page_cgroup(page); |
| 720 | return pc != NULL; | 757 | if (mem) { |
| 758 | ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL, | ||
| 759 | ctype, mem); | ||
| 760 | css_put(&mem->css); | ||
| 761 | } | ||
| 762 | return ret; | ||
| 721 | } | 763 | } |
| 722 | 764 | ||
| 723 | void mem_cgroup_end_migration(struct page *page) | 765 | /* remove redundant charge if migration failed*/ |
| 766 | void mem_cgroup_end_migration(struct page *newpage) | ||
| 724 | { | 767 | { |
| 725 | mem_cgroup_uncharge_page(page); | 768 | /* |
| 769 | * At success, page->mapping is not NULL. | ||
| 770 | * special rollback care is necessary when | ||
| 771 | * 1. at migration failure. (newpage->mapping is cleared in this case) | ||
| 772 | * 2. the newpage was moved but not remapped again because the task | ||
| 773 | * exits and the newpage is obsolete. In this case, the new page | ||
| 774 | * may be a swapcache. So, we just call mem_cgroup_uncharge_page() | ||
| 775 | * always for avoiding mess. The page_cgroup will be removed if | ||
| 776 | * unnecessary. File cache pages is still on radix-tree. Don't | ||
| 777 | * care it. | ||
| 778 | */ | ||
| 779 | if (!newpage->mapping) | ||
| 780 | __mem_cgroup_uncharge_common(newpage, | ||
| 781 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
| 782 | else if (PageAnon(newpage)) | ||
| 783 | mem_cgroup_uncharge_page(newpage); | ||
| 726 | } | 784 | } |
| 727 | 785 | ||
| 728 | /* | 786 | /* |
| 729 | * We know both *page* and *newpage* are now not-on-LRU and PG_locked. | 787 | * A call to try to shrink memory usage under specified resource controller. |
| 730 | * And no race with uncharge() routines because page_cgroup for *page* | 788 | * This is typically used for page reclaiming for shmem for reducing side |
| 731 | * has extra one reference by mem_cgroup_prepare_migration. | 789 | * effect of page allocation from shmem, which is used by some mem_cgroup. |
| 732 | */ | 790 | */ |
| 733 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) | 791 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) |
| 734 | { | 792 | { |
| 735 | struct page_cgroup *pc; | 793 | struct mem_cgroup *mem; |
| 736 | struct mem_cgroup_per_zone *mz; | 794 | int progress = 0; |
| 737 | unsigned long flags; | 795 | int retry = MEM_CGROUP_RECLAIM_RETRIES; |
| 738 | 796 | ||
| 739 | lock_page_cgroup(page); | 797 | if (mem_cgroup_subsys.disabled) |
| 740 | pc = page_get_page_cgroup(page); | 798 | return 0; |
| 741 | if (!pc) { | 799 | if (!mm) |
| 742 | unlock_page_cgroup(page); | 800 | return 0; |
| 743 | return; | ||
| 744 | } | ||
| 745 | 801 | ||
| 746 | mz = page_cgroup_zoneinfo(pc); | 802 | rcu_read_lock(); |
| 747 | spin_lock_irqsave(&mz->lru_lock, flags); | 803 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
| 748 | __mem_cgroup_remove_list(mz, pc); | 804 | css_get(&mem->css); |
| 749 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 805 | rcu_read_unlock(); |
| 750 | 806 | ||
| 751 | page_assign_page_cgroup(page, NULL); | 807 | do { |
| 752 | unlock_page_cgroup(page); | 808 | progress = try_to_free_mem_cgroup_pages(mem, gfp_mask); |
| 809 | } while (!progress && --retry); | ||
| 753 | 810 | ||
| 754 | pc->page = newpage; | 811 | css_put(&mem->css); |
| 755 | lock_page_cgroup(newpage); | 812 | if (!retry) |
| 756 | page_assign_page_cgroup(newpage, pc); | 813 | return -ENOMEM; |
| 814 | return 0; | ||
| 815 | } | ||
| 757 | 816 | ||
| 758 | mz = page_cgroup_zoneinfo(pc); | 817 | int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val) |
| 759 | spin_lock_irqsave(&mz->lru_lock, flags); | 818 | { |
| 760 | __mem_cgroup_add_list(mz, pc); | 819 | |
| 761 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 820 | int retry_count = MEM_CGROUP_RECLAIM_RETRIES; |
| 821 | int progress; | ||
| 822 | int ret = 0; | ||
| 762 | 823 | ||
| 763 | unlock_page_cgroup(newpage); | 824 | while (res_counter_set_limit(&memcg->res, val)) { |
| 825 | if (signal_pending(current)) { | ||
| 826 | ret = -EINTR; | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | if (!retry_count) { | ||
| 830 | ret = -EBUSY; | ||
| 831 | break; | ||
| 832 | } | ||
| 833 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL); | ||
| 834 | if (!progress) | ||
| 835 | retry_count--; | ||
| 836 | } | ||
| 837 | return ret; | ||
| 764 | } | 838 | } |
| 765 | 839 | ||
| 840 | |||
| 766 | /* | 841 | /* |
| 767 | * This routine traverse page_cgroup in given list and drop them all. | 842 | * This routine traverse page_cgroup in given list and drop them all. |
| 768 | * This routine ignores page_cgroup->ref_cnt. | ||
| 769 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 843 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
| 770 | */ | 844 | */ |
| 771 | #define FORCE_UNCHARGE_BATCH (128) | 845 | #define FORCE_UNCHARGE_BATCH (128) |
| @@ -790,12 +864,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
| 790 | page = pc->page; | 864 | page = pc->page; |
| 791 | get_page(page); | 865 | get_page(page); |
| 792 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 866 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 793 | mem_cgroup_uncharge_page(page); | 867 | /* |
| 794 | put_page(page); | 868 | * Check if this page is on LRU. !LRU page can be found |
| 795 | if (--count <= 0) { | 869 | * if it's under page migration. |
| 796 | count = FORCE_UNCHARGE_BATCH; | 870 | */ |
| 871 | if (PageLRU(page)) { | ||
| 872 | __mem_cgroup_uncharge_common(page, | ||
| 873 | MEM_CGROUP_CHARGE_TYPE_FORCE); | ||
| 874 | put_page(page); | ||
| 875 | if (--count <= 0) { | ||
| 876 | count = FORCE_UNCHARGE_BATCH; | ||
| 877 | cond_resched(); | ||
| 878 | } | ||
| 879 | } else | ||
| 797 | cond_resched(); | 880 | cond_resched(); |
| 798 | } | ||
| 799 | spin_lock_irqsave(&mz->lru_lock, flags); | 881 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 800 | } | 882 | } |
| 801 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 883 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| @@ -810,9 +892,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
| 810 | int ret = -EBUSY; | 892 | int ret = -EBUSY; |
| 811 | int node, zid; | 893 | int node, zid; |
| 812 | 894 | ||
| 813 | if (mem_cgroup_subsys.disabled) | ||
| 814 | return 0; | ||
| 815 | |||
| 816 | css_get(&mem->css); | 895 | css_get(&mem->css); |
| 817 | /* | 896 | /* |
| 818 | * page reclaim code (kswapd etc..) will move pages between | 897 | * page reclaim code (kswapd etc..) will move pages between |
| @@ -838,32 +917,34 @@ out: | |||
| 838 | return ret; | 917 | return ret; |
| 839 | } | 918 | } |
| 840 | 919 | ||
| 841 | static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | ||
| 842 | { | ||
| 843 | *tmp = memparse(buf, &buf); | ||
| 844 | if (*buf != '\0') | ||
| 845 | return -EINVAL; | ||
| 846 | |||
| 847 | /* | ||
| 848 | * Round up the value to the closest page size | ||
| 849 | */ | ||
| 850 | *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; | ||
| 851 | return 0; | ||
| 852 | } | ||
| 853 | |||
| 854 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 920 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
| 855 | { | 921 | { |
| 856 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, | 922 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, |
| 857 | cft->private); | 923 | cft->private); |
| 858 | } | 924 | } |
| 859 | 925 | /* | |
| 860 | static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | 926 | * The user of this function is... |
| 861 | struct file *file, const char __user *userbuf, | 927 | * RES_LIMIT. |
| 862 | size_t nbytes, loff_t *ppos) | 928 | */ |
| 929 | static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
| 930 | const char *buffer) | ||
| 863 | { | 931 | { |
| 864 | return res_counter_write(&mem_cgroup_from_cont(cont)->res, | 932 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
| 865 | cft->private, userbuf, nbytes, ppos, | 933 | unsigned long long val; |
| 866 | mem_cgroup_write_strategy); | 934 | int ret; |
| 935 | |||
| 936 | switch (cft->private) { | ||
| 937 | case RES_LIMIT: | ||
| 938 | /* This function does all necessary parse...reuse it */ | ||
| 939 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
| 940 | if (!ret) | ||
| 941 | ret = mem_cgroup_resize_limit(memcg, val); | ||
| 942 | break; | ||
| 943 | default: | ||
| 944 | ret = -EINVAL; /* should be BUG() ? */ | ||
| 945 | break; | ||
| 946 | } | ||
| 947 | return ret; | ||
| 867 | } | 948 | } |
| 868 | 949 | ||
| 869 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 950 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
| @@ -940,7 +1021,7 @@ static struct cftype mem_cgroup_files[] = { | |||
| 940 | { | 1021 | { |
| 941 | .name = "limit_in_bytes", | 1022 | .name = "limit_in_bytes", |
| 942 | .private = RES_LIMIT, | 1023 | .private = RES_LIMIT, |
| 943 | .write = mem_cgroup_write, | 1024 | .write_string = mem_cgroup_write, |
| 944 | .read_u64 = mem_cgroup_read, | 1025 | .read_u64 = mem_cgroup_read, |
| 945 | }, | 1026 | }, |
| 946 | { | 1027 | { |
| @@ -1070,8 +1151,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, | |||
| 1070 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 1151 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
| 1071 | struct cgroup *cont) | 1152 | struct cgroup *cont) |
| 1072 | { | 1153 | { |
| 1073 | if (mem_cgroup_subsys.disabled) | ||
| 1074 | return 0; | ||
| 1075 | return cgroup_add_files(cont, ss, mem_cgroup_files, | 1154 | return cgroup_add_files(cont, ss, mem_cgroup_files, |
| 1076 | ARRAY_SIZE(mem_cgroup_files)); | 1155 | ARRAY_SIZE(mem_cgroup_files)); |
| 1077 | } | 1156 | } |
| @@ -1084,9 +1163,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
| 1084 | struct mm_struct *mm; | 1163 | struct mm_struct *mm; |
| 1085 | struct mem_cgroup *mem, *old_mem; | 1164 | struct mem_cgroup *mem, *old_mem; |
| 1086 | 1165 | ||
| 1087 | if (mem_cgroup_subsys.disabled) | ||
| 1088 | return; | ||
| 1089 | |||
| 1090 | mm = get_task_mm(p); | 1166 | mm = get_task_mm(p); |
| 1091 | if (mm == NULL) | 1167 | if (mm == NULL) |
| 1092 | return; | 1168 | return; |
| @@ -1094,9 +1170,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
| 1094 | mem = mem_cgroup_from_cont(cont); | 1170 | mem = mem_cgroup_from_cont(cont); |
| 1095 | old_mem = mem_cgroup_from_cont(old_cont); | 1171 | old_mem = mem_cgroup_from_cont(old_cont); |
| 1096 | 1172 | ||
| 1097 | if (mem == old_mem) | ||
| 1098 | goto out; | ||
| 1099 | |||
| 1100 | /* | 1173 | /* |
| 1101 | * Only thread group leaders are allowed to migrate, the mm_struct is | 1174 | * Only thread group leaders are allowed to migrate, the mm_struct is |
| 1102 | * in effect owned by the leader | 1175 | * in effect owned by the leader |
