diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-03-04 17:29:11 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-03-04 19:35:15 -0500 |
commit | b9c565d5a29a795f970b4a1340393d8fc6722fb9 (patch) | |
tree | cbd1e0c762738dee438dfd1093a1bb21ce56664b /mm/memcontrol.c | |
parent | d5b69e38f8cdb1e41cc022305c86c9739bf1ffdb (diff) |
memcg: remove clear_page_cgroup and atomics
Remove clear_page_cgroup: it's an unhelpful helper, see for example how
mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it
(serious races from that? I'm not sure).
Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be
atomic: it's always manipulated under lock_page_cgroup, except where
force_empty unilaterally reset it to 0 (and how does uncharge's
atomic_dec_and_test protect against that?).
Simplify this page_cgroup locking: if you've got the lock and the pc is
attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to
check that pc->page matches page (we're on the way to finding why sometimes it
doesn't, but this patch doesn't fix that).
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 106 |
1 files changed, 43 insertions, 63 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a59f946c9338..13e9e7d8e49e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -161,8 +161,7 @@ struct page_cgroup { | |||
161 | struct list_head lru; /* per cgroup LRU list */ | 161 | struct list_head lru; /* per cgroup LRU list */ |
162 | struct page *page; | 162 | struct page *page; |
163 | struct mem_cgroup *mem_cgroup; | 163 | struct mem_cgroup *mem_cgroup; |
164 | atomic_t ref_cnt; /* Helpful when pages move b/w */ | 164 | int ref_cnt; /* cached, mapped, migrating */ |
165 | /* mapped and cached states */ | ||
166 | int flags; | 165 | int flags; |
167 | }; | 166 | }; |
168 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 167 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page) | |||
283 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 282 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
284 | } | 283 | } |
285 | 284 | ||
286 | /* | ||
287 | * Clear page->page_cgroup member under lock_page_cgroup(). | ||
288 | * If given "pc" value is different from one page->page_cgroup, | ||
289 | * page->cgroup is not cleared. | ||
290 | * Returns a value of page->page_cgroup at lock taken. | ||
291 | * A can can detect failure of clearing by following | ||
292 | * clear_page_cgroup(page, pc) == pc | ||
293 | */ | ||
294 | static struct page_cgroup *clear_page_cgroup(struct page *page, | ||
295 | struct page_cgroup *pc) | ||
296 | { | ||
297 | struct page_cgroup *ret; | ||
298 | /* lock and clear */ | ||
299 | lock_page_cgroup(page); | ||
300 | ret = page_get_page_cgroup(page); | ||
301 | if (likely(ret == pc)) | ||
302 | page_assign_page_cgroup(page, NULL); | ||
303 | unlock_page_cgroup(page); | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) | 285 | static void __mem_cgroup_remove_list(struct page_cgroup *pc) |
308 | { | 286 | { |
309 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; | 287 | int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; |
@@ -555,15 +533,12 @@ retry: | |||
555 | * the page has already been accounted. | 533 | * the page has already been accounted. |
556 | */ | 534 | */ |
557 | if (pc) { | 535 | if (pc) { |
558 | if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { | 536 | VM_BUG_ON(pc->page != page); |
559 | /* this page is under being uncharged ? */ | 537 | VM_BUG_ON(pc->ref_cnt <= 0); |
560 | unlock_page_cgroup(page); | 538 | |
561 | cpu_relax(); | 539 | pc->ref_cnt++; |
562 | goto retry; | 540 | unlock_page_cgroup(page); |
563 | } else { | 541 | goto done; |
564 | unlock_page_cgroup(page); | ||
565 | goto done; | ||
566 | } | ||
567 | } | 542 | } |
568 | unlock_page_cgroup(page); | 543 | unlock_page_cgroup(page); |
569 | 544 | ||
@@ -612,7 +587,7 @@ retry: | |||
612 | congestion_wait(WRITE, HZ/10); | 587 | congestion_wait(WRITE, HZ/10); |
613 | } | 588 | } |
614 | 589 | ||
615 | atomic_set(&pc->ref_cnt, 1); | 590 | pc->ref_cnt = 1; |
616 | pc->mem_cgroup = mem; | 591 | pc->mem_cgroup = mem; |
617 | pc->page = page; | 592 | pc->page = page; |
618 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; | 593 | pc->flags = PAGE_CGROUP_FLAG_ACTIVE; |
@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page) | |||
683 | if (!pc) | 658 | if (!pc) |
684 | goto unlock; | 659 | goto unlock; |
685 | 660 | ||
686 | if (atomic_dec_and_test(&pc->ref_cnt)) { | 661 | VM_BUG_ON(pc->page != page); |
687 | page = pc->page; | 662 | VM_BUG_ON(pc->ref_cnt <= 0); |
688 | mz = page_cgroup_zoneinfo(pc); | 663 | |
689 | /* | 664 | if (--(pc->ref_cnt) == 0) { |
690 | * get page->cgroup and clear it under lock. | 665 | page_assign_page_cgroup(page, NULL); |
691 | * force_empty can drop page->cgroup without checking refcnt. | ||
692 | */ | ||
693 | unlock_page_cgroup(page); | 666 | unlock_page_cgroup(page); |
694 | if (clear_page_cgroup(page, pc) == pc) { | 667 | |
695 | mem = pc->mem_cgroup; | 668 | mem = pc->mem_cgroup; |
696 | css_put(&mem->css); | 669 | css_put(&mem->css); |
697 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 670 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
698 | spin_lock_irqsave(&mz->lru_lock, flags); | 671 | |
699 | __mem_cgroup_remove_list(pc); | 672 | mz = page_cgroup_zoneinfo(pc); |
700 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 673 | spin_lock_irqsave(&mz->lru_lock, flags); |
701 | kfree(pc); | 674 | __mem_cgroup_remove_list(pc); |
702 | } | 675 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
703 | lock_page_cgroup(page); | 676 | |
677 | kfree(pc); | ||
678 | return; | ||
704 | } | 679 | } |
705 | 680 | ||
706 | unlock: | 681 | unlock: |
@@ -714,14 +689,13 @@ unlock: | |||
714 | int mem_cgroup_prepare_migration(struct page *page) | 689 | int mem_cgroup_prepare_migration(struct page *page) |
715 | { | 690 | { |
716 | struct page_cgroup *pc; | 691 | struct page_cgroup *pc; |
717 | int ret = 0; | ||
718 | 692 | ||
719 | lock_page_cgroup(page); | 693 | lock_page_cgroup(page); |
720 | pc = page_get_page_cgroup(page); | 694 | pc = page_get_page_cgroup(page); |
721 | if (pc && atomic_inc_not_zero(&pc->ref_cnt)) | 695 | if (pc) |
722 | ret = 1; | 696 | pc->ref_cnt++; |
723 | unlock_page_cgroup(page); | 697 | unlock_page_cgroup(page); |
724 | return ret; | 698 | return pc != NULL; |
725 | } | 699 | } |
726 | 700 | ||
727 | void mem_cgroup_end_migration(struct page *page) | 701 | void mem_cgroup_end_migration(struct page *page) |
@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) | |||
740 | struct mem_cgroup_per_zone *mz; | 714 | struct mem_cgroup_per_zone *mz; |
741 | unsigned long flags; | 715 | unsigned long flags; |
742 | 716 | ||
743 | retry: | 717 | lock_page_cgroup(page); |
744 | pc = page_get_page_cgroup(page); | 718 | pc = page_get_page_cgroup(page); |
745 | if (!pc) | 719 | if (!pc) { |
720 | unlock_page_cgroup(page); | ||
746 | return; | 721 | return; |
722 | } | ||
747 | 723 | ||
748 | mz = page_cgroup_zoneinfo(pc); | 724 | page_assign_page_cgroup(page, NULL); |
749 | if (clear_page_cgroup(page, pc) != pc) | 725 | unlock_page_cgroup(page); |
750 | goto retry; | ||
751 | 726 | ||
727 | mz = page_cgroup_zoneinfo(pc); | ||
752 | spin_lock_irqsave(&mz->lru_lock, flags); | 728 | spin_lock_irqsave(&mz->lru_lock, flags); |
753 | __mem_cgroup_remove_list(pc); | 729 | __mem_cgroup_remove_list(pc); |
754 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 730 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
@@ -794,15 +770,19 @@ retry: | |||
794 | while (--count && !list_empty(list)) { | 770 | while (--count && !list_empty(list)) { |
795 | pc = list_entry(list->prev, struct page_cgroup, lru); | 771 | pc = list_entry(list->prev, struct page_cgroup, lru); |
796 | page = pc->page; | 772 | page = pc->page; |
797 | /* Avoid race with charge */ | 773 | lock_page_cgroup(page); |
798 | atomic_set(&pc->ref_cnt, 0); | 774 | if (page_get_page_cgroup(page) == pc) { |
799 | if (clear_page_cgroup(page, pc) == pc) { | 775 | page_assign_page_cgroup(page, NULL); |
776 | unlock_page_cgroup(page); | ||
800 | css_put(&mem->css); | 777 | css_put(&mem->css); |
801 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 778 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
802 | __mem_cgroup_remove_list(pc); | 779 | __mem_cgroup_remove_list(pc); |
803 | kfree(pc); | 780 | kfree(pc); |
804 | } else /* being uncharged ? ...do relax */ | 781 | } else { |
782 | /* racing uncharge: let page go then retry */ | ||
783 | unlock_page_cgroup(page); | ||
805 | break; | 784 | break; |
785 | } | ||
806 | } | 786 | } |
807 | 787 | ||
808 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 788 | spin_unlock_irqrestore(&mz->lru_lock, flags); |