diff options
| author | Hugh Dickins <hugh@veritas.com> | 2008-03-04 17:29:11 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-03-04 19:35:15 -0500 |
| commit | b9c565d5a29a795f970b4a1340393d8fc6722fb9 (patch) | |
| tree | cbd1e0c762738dee438dfd1093a1bb21ce56664b | |
| parent | d5b69e38f8cdb1e41cc022305c86c9739bf1ffdb (diff) | |
memcg: remove clear_page_cgroup and atomics
Remove clear_page_cgroup: it's an unhelpful helper, see for example how
mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it
(serious races from that? I'm not sure).
Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be
atomic: it's always manipulated under lock_page_cgroup, except where
force_empty unilaterally reset it to 0 (and how does uncharge's
atomic_dec_and_test protect against that?).
Simplify this page_cgroup locking: if you've got the lock and the pc is
attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to
check that pc->page matches page (we're on the way to finding why sometimes it
doesn't, but this patch doesn't fix that).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/memcontrol.c | 106 |
1 files changed, 43 insertions, 63 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a59f946c9338..13e9e7d8e49e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -161,8 +161,7 @@ struct page_cgroup { | |||
| 161 | struct list_head lru; /* per cgroup LRU list */ | 161 | struct list_head lru; /* per cgroup LRU list */ |
| 162 | struct page *page; | 162 | struct page *page; |
| 163 | struct mem_cgroup *mem_cgroup; | 163 | struct mem_cgroup *mem_cgroup; |
| 164 | atomic_t ref_cnt; /* Helpful when pages move b/w */ | 164 | int ref_cnt; /* cached, mapped, migrating */ |
| 165 | /* mapped and cached states */ | ||
| 166 | int flags; | 165 | int flags; |
| 167 | }; | 166 | }; |
| 168 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 167 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
| @@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page) | |||
| 283 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 282 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
| 284 | } | 283 | } |
| 285 | 284 | ||
| 286 | /* | ||
| 287 | * Clear page->page_cgroup member under lock_page_cgroup(). | ||
| 288 | * If given "pc" value is different from one page->page_cgroup, | ||
| 289 | * page->cgroup is not cleared. | ||
| 290 | * Returns a value of page->page_cgroup at lock taken. | ||
| 291 | * A can can detect failure of clearing by following | ||
| 292 | * clear_page_cgroup(page, pc) == pc | ||
| 293 | */ | ||
| 294 | static struct page_cgroup *clear_page_cgroup(struct page *page, | ||
| 295 | struct page_cgroup *pc) | ||
| 296 | { | ||
| 297 | struct page_cgroup *ret; | ||
| 298 | /* lock and clear */ | ||
| 299 | lock_page_cgroup(page); | ||
| 300 | ret = page_get_page_cgroup(page); | ||
| 301 | if (likely(ret == pc)) | ||
| 302 | page_assign_page_cgroup(page, NULL); | ||
| 303 | unlock_page_cgroup(page); | ||
| 304 | return ret; | ||
| 305 | } | ||
| 306 | |||
| 307 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) | 285 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) |
| 308 | { | 286 | { |
| 309 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 287 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
| @@ -555,15 +533,12 @@ retry: | |||
| 555 | * the page has already been accounted. | 533 | * the page has already been accounted. |
| 556 | */ | 534 | */ |
| 557 | if (pc) { | 535 | if (pc) { |
| 558 | if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { | 536 | VM_BUG_ON(pc->page != page); |
| 559 | /* this page is under being uncharged ? */ | 537 | VM_BUG_ON(pc->ref_cnt <= 0); |
| 560 | unlock_page_cgroup(page); | 538 | |
| 561 | cpu_relax(); | 539 | pc->ref_cnt++; |
| 562 | goto retry; | 540 | unlock_page_cgroup(page); |
| 563 | } else { | 541 | goto done; |
| 564 | unlock_page_cgroup(page); | ||
| 565 | goto done; | ||
| 566 | } | ||
| 567 | } | 542 | } |
| 568 | unlock_page_cgroup(page); | 543 | unlock_page_cgroup(page); |
| 569 | 544 | ||
| @@ -612,7 +587,7 @@ retry: | |||
| 612 | congestion_wait(WRITE, HZ/10); | 587 | congestion_wait(WRITE, HZ/10); |
| 613 | } | 588 | } |
| 614 | 589 | ||
| 615 | atomic_set(&pc->ref_cnt, 1); | 590 | pc->ref_cnt = 1; |
| 616 | pc->mem_cgroup = mem; | 591 | pc->mem_cgroup = mem; |
| 617 | pc->page = page; | 592 | pc->page = page; |
| 618 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 593 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
| @@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
| 683 | if (!pc) | 658 | if (!pc) |
| 684 | goto unlock; | 659 | goto unlock; |
| 685 | 660 | ||
| 686 | if (atomic_dec_and_test(&pc->ref_cnt)) { | 661 | VM_BUG_ON(pc->page != page); |
| 687 | page = pc->page; | 662 | VM_BUG_ON(pc->ref_cnt <= 0); |
| 688 | mz = page_cgroup_zoneinfo(pc); | 663 | |
| 689 | /* | 664 | if (--(pc->ref_cnt) == 0) { |
| 690 | * get page->cgroup and clear it under lock. | 665 | page_assign_page_cgroup(page, NULL); |
| 691 | * force_empty can drop page->cgroup without checking refcnt. | ||
| 692 | */ | ||
| 693 | unlock_page_cgroup(page); | 666 | unlock_page_cgroup(page); |
| 694 | if (clear_page_cgroup(page, pc) == pc) { | 667 | |
| 695 | mem = pc->mem_cgroup; | 668 | mem = pc->mem_cgroup; |
| 696 | css_put(&mem->css); | 669 | css_put(&mem->css); |
| 697 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 670 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 698 | spin_lock_irqsave(&mz->lru_lock, flags); | 671 | |
| 699 | __mem_cgroup_remove_list(pc); | 672 | mz = page_cgroup_zoneinfo(pc); |
| 700 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 673 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 701 | kfree(pc); | 674 | __mem_cgroup_remove_list(pc); |
| 702 | } | 675 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| 703 | lock_page_cgroup(page); | 676 | |
| 677 | kfree(pc); | ||
| 678 | return; | ||
| 704 | } | 679 | } |
| 705 | 680 | ||
| 706 | unlock: | 681 | unlock: |
| @@ -714,14 +689,13 @@ unlock: | |||
| 714 | int mem_cgroup_prepare_migration(struct page *page) | 689 | int mem_cgroup_prepare_migration(struct page *page) |
| 715 | { | 690 | { |
| 716 | struct page_cgroup *pc; | 691 | struct page_cgroup *pc; |
| 717 | int ret = 0; | ||
| 718 | 692 | ||
| 719 | lock_page_cgroup(page); | 693 | lock_page_cgroup(page); |
| 720 | pc = page_get_page_cgroup(page); | 694 | pc = page_get_page_cgroup(page); |
| 721 | if (pc && atomic_inc_not_zero(&pc->ref_cnt)) | 695 | if (pc) |
| 722 | ret = 1; | 696 | pc->ref_cnt++; |
| 723 | unlock_page_cgroup(page); | 697 | unlock_page_cgroup(page); |
| 724 | return ret; | 698 | return pc != NULL; |
| 725 | } | 699 | } |
| 726 | 700 | ||
| 727 | void mem_cgroup_end_migration(struct page *page) | 701 | void mem_cgroup_end_migration(struct page *page) |
| @@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) | |||
| 740 | struct mem_cgroup_per_zone *mz; | 714 | struct mem_cgroup_per_zone *mz; |
| 741 | unsigned long flags; | 715 | unsigned long flags; |
| 742 | 716 | ||
| 743 | retry: | 717 | lock_page_cgroup(page); |
| 744 | pc = page_get_page_cgroup(page); | 718 | pc = page_get_page_cgroup(page); |
| 745 | if (!pc) | 719 | if (!pc) { |
| 720 | unlock_page_cgroup(page); | ||
| 746 | return; | 721 | return; |
| 722 | } | ||
| 747 | 723 | ||
| 748 | mz = page_cgroup_zoneinfo(pc); | 724 | page_assign_page_cgroup(page, NULL); |
| 749 | if (clear_page_cgroup(page, pc) != pc) | 725 | unlock_page_cgroup(page); |
| 750 | goto retry; | ||
| 751 | 726 | ||
| 727 | mz = page_cgroup_zoneinfo(pc); | ||
| 752 | spin_lock_irqsave(&mz->lru_lock, flags); | 728 | spin_lock_irqsave(&mz->lru_lock, flags); |
| 753 | __mem_cgroup_remove_list(pc); | 729 | __mem_cgroup_remove_list(pc); |
| 754 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 730 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
| @@ -794,15 +770,19 @@ retry: | |||
| 794 | while (--count && !list_empty(list)) { | 770 | while (--count && !list_empty(list)) { |
| 795 | pc = list_entry(list->prev, struct page_cgroup, lru); | 771 | pc = list_entry(list->prev, struct page_cgroup, lru); |
| 796 | page = pc->page; | 772 | page = pc->page; |
| 797 | /* Avoid race with charge */ | 773 | lock_page_cgroup(page); |
| 798 | atomic_set(&pc->ref_cnt, 0); | 774 | if (page_get_page_cgroup(page) == pc) { |
| 799 | if (clear_page_cgroup(page, pc) == pc) { | 775 | page_assign_page_cgroup(page, NULL); |
| 776 | unlock_page_cgroup(page); | ||
| 800 | css_put(&mem->css); | 777 | css_put(&mem->css); |
| 801 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 778 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
| 802 | __mem_cgroup_remove_list(pc); | 779 | __mem_cgroup_remove_list(pc); |
| 803 | kfree(pc); | 780 | kfree(pc); |
| 804 | } else /* being uncharged ? ...do relax */ | 781 | } else { |
| 782 | /* racing uncharge: let page go then retry */ | ||
| 783 | unlock_page_cgroup(page); | ||
| 805 | break; | 784 | break; |
| 785 | } | ||
| 806 | } | 786 | } |
| 807 | 787 | ||
| 808 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 788 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
