diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2008-02-07 03:14:39 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:22 -0500 |
commit | 072c56c13e1302fcdc39961dc64e76485731ad67 (patch) | |
tree | 934896fdd8aeaa72cb93cb3be2c816a2ac0f0ae7 /mm/memcontrol.c | |
parent | 1ecaab2bd221251a3fd148abb08e8b877f1e93c8 (diff) |
per-zone and reclaim enhancements for memory controller: per-zone-lock for cgroup
Now, lru is per-zone.
Then, lru_lock can be (should be) per-zone, too.
This patch implementes per-zone lru lock.
lru_lock is placed into mem_cgroup_per_zone struct.
lock can be accessed by
mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone);
&mz->lru_lock
or
mz = page_cgroup_zoneinfo(page_cgroup);
&mz->lru_lock
Signed-off-by: KAMEZAWA hiroyuki <kmaezawa.hiroyu@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Paul Menage <menage@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 71 |
1 files changed, 44 insertions, 27 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f728d67a3267..315dee180129 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -89,6 +89,10 @@ enum mem_cgroup_zstat_index { | |||
89 | }; | 89 | }; |
90 | 90 | ||
91 | struct mem_cgroup_per_zone { | 91 | struct mem_cgroup_per_zone { |
92 | /* | ||
93 | * spin_lock to protect the per cgroup LRU | ||
94 | */ | ||
95 | spinlock_t lru_lock; | ||
92 | struct list_head active_list; | 96 | struct list_head active_list; |
93 | struct list_head inactive_list; | 97 | struct list_head inactive_list; |
94 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; | 98 | unsigned long count[NR_MEM_CGROUP_ZSTAT]; |
@@ -126,10 +130,7 @@ struct mem_cgroup { | |||
126 | * per zone LRU lists. | 130 | * per zone LRU lists. |
127 | */ | 131 | */ |
128 | struct mem_cgroup_lru_info info; | 132 | struct mem_cgroup_lru_info info; |
129 | /* | 133 | |
130 | * spin_lock to protect the per cgroup LRU | ||
131 | */ | ||
132 | spinlock_t lru_lock; | ||
133 | unsigned long control_type; /* control RSS or RSS+Pagecache */ | 134 | unsigned long control_type; /* control RSS or RSS+Pagecache */ |
134 | int prev_priority; /* for recording reclaim priority */ | 135 | int prev_priority; /* for recording reclaim priority */ |
135 | /* | 136 | /* |
@@ -409,15 +410,16 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
409 | */ | 410 | */ |
410 | void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 411 | void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
411 | { | 412 | { |
412 | struct mem_cgroup *mem; | 413 | struct mem_cgroup_per_zone *mz; |
414 | unsigned long flags; | ||
415 | |||
413 | if (!pc) | 416 | if (!pc) |
414 | return; | 417 | return; |
415 | 418 | ||
416 | mem = pc->mem_cgroup; | 419 | mz = page_cgroup_zoneinfo(pc); |
417 | 420 | spin_lock_irqsave(&mz->lru_lock, flags); | |
418 | spin_lock(&mem->lru_lock); | ||
419 | __mem_cgroup_move_lists(pc, active); | 421 | __mem_cgroup_move_lists(pc, active); |
420 | spin_unlock(&mem->lru_lock); | 422 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
421 | } | 423 | } |
422 | 424 | ||
423 | /* | 425 | /* |
@@ -527,7 +529,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
527 | src = &mz->inactive_list; | 529 | src = &mz->inactive_list; |
528 | 530 | ||
529 | 531 | ||
530 | spin_lock(&mem_cont->lru_lock); | 532 | spin_lock(&mz->lru_lock); |
531 | scan = 0; | 533 | scan = 0; |
532 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { | 534 | list_for_each_entry_safe_reverse(pc, tmp, src, lru) { |
533 | if (scan >= nr_to_scan) | 535 | if (scan >= nr_to_scan) |
@@ -557,7 +559,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
557 | } | 559 | } |
558 | 560 | ||
559 | list_splice(&pc_list, src); | 561 | list_splice(&pc_list, src); |
560 | spin_unlock(&mem_cont->lru_lock); | 562 | spin_unlock(&mz->lru_lock); |
561 | 563 | ||
562 | *scanned = scan; | 564 | *scanned = scan; |
563 | return nr_taken; | 565 | return nr_taken; |
@@ -576,6 +578,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
576 | struct page_cgroup *pc; | 578 | struct page_cgroup *pc; |
577 | unsigned long flags; | 579 | unsigned long flags; |
578 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 580 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
581 | struct mem_cgroup_per_zone *mz; | ||
579 | 582 | ||
580 | /* | 583 | /* |
581 | * Should page_cgroup's go to their own slab? | 584 | * Should page_cgroup's go to their own slab? |
@@ -677,10 +680,11 @@ retry: | |||
677 | goto retry; | 680 | goto retry; |
678 | } | 681 | } |
679 | 682 | ||
680 | spin_lock_irqsave(&mem->lru_lock, flags); | 683 | mz = page_cgroup_zoneinfo(pc); |
684 | spin_lock_irqsave(&mz->lru_lock, flags); | ||
681 | /* Update statistics vector */ | 685 | /* Update statistics vector */ |
682 | __mem_cgroup_add_list(pc); | 686 | __mem_cgroup_add_list(pc); |
683 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 687 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
684 | 688 | ||
685 | done: | 689 | done: |
686 | return 0; | 690 | return 0; |
@@ -727,6 +731,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
727 | void mem_cgroup_uncharge(struct page_cgroup *pc) | 731 | void mem_cgroup_uncharge(struct page_cgroup *pc) |
728 | { | 732 | { |
729 | struct mem_cgroup *mem; | 733 | struct mem_cgroup *mem; |
734 | struct mem_cgroup_per_zone *mz; | ||
730 | struct page *page; | 735 | struct page *page; |
731 | unsigned long flags; | 736 | unsigned long flags; |
732 | 737 | ||
@@ -739,6 +744,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | |||
739 | 744 | ||
740 | if (atomic_dec_and_test(&pc->ref_cnt)) { | 745 | if (atomic_dec_and_test(&pc->ref_cnt)) { |
741 | page = pc->page; | 746 | page = pc->page; |
747 | mz = page_cgroup_zoneinfo(pc); | ||
742 | /* | 748 | /* |
743 | * get page->cgroup and clear it under lock. | 749 | * get page->cgroup and clear it under lock. |
744 | * force_empty can drop page->cgroup without checking refcnt. | 750 | * force_empty can drop page->cgroup without checking refcnt. |
@@ -747,9 +753,9 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | |||
747 | mem = pc->mem_cgroup; | 753 | mem = pc->mem_cgroup; |
748 | css_put(&mem->css); | 754 | css_put(&mem->css); |
749 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 755 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
750 | spin_lock_irqsave(&mem->lru_lock, flags); | 756 | spin_lock_irqsave(&mz->lru_lock, flags); |
751 | __mem_cgroup_remove_list(pc); | 757 | __mem_cgroup_remove_list(pc); |
752 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 758 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
753 | kfree(pc); | 759 | kfree(pc); |
754 | } | 760 | } |
755 | } | 761 | } |
@@ -788,24 +794,29 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) | |||
788 | struct page_cgroup *pc; | 794 | struct page_cgroup *pc; |
789 | struct mem_cgroup *mem; | 795 | struct mem_cgroup *mem; |
790 | unsigned long flags; | 796 | unsigned long flags; |
797 | struct mem_cgroup_per_zone *mz; | ||
791 | retry: | 798 | retry: |
792 | pc = page_get_page_cgroup(page); | 799 | pc = page_get_page_cgroup(page); |
793 | if (!pc) | 800 | if (!pc) |
794 | return; | 801 | return; |
795 | mem = pc->mem_cgroup; | 802 | mem = pc->mem_cgroup; |
803 | mz = page_cgroup_zoneinfo(pc); | ||
796 | if (clear_page_cgroup(page, pc) != pc) | 804 | if (clear_page_cgroup(page, pc) != pc) |
797 | goto retry; | 805 | goto retry; |
798 | 806 | spin_lock_irqsave(&mz->lru_lock, flags); | |
799 | spin_lock_irqsave(&mem->lru_lock, flags); | ||
800 | 807 | ||
801 | __mem_cgroup_remove_list(pc); | 808 | __mem_cgroup_remove_list(pc); |
809 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
810 | |||
802 | pc->page = newpage; | 811 | pc->page = newpage; |
803 | lock_page_cgroup(newpage); | 812 | lock_page_cgroup(newpage); |
804 | page_assign_page_cgroup(newpage, pc); | 813 | page_assign_page_cgroup(newpage, pc); |
805 | unlock_page_cgroup(newpage); | 814 | unlock_page_cgroup(newpage); |
806 | __mem_cgroup_add_list(pc); | ||
807 | 815 | ||
808 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 816 | mz = page_cgroup_zoneinfo(pc); |
817 | spin_lock_irqsave(&mz->lru_lock, flags); | ||
818 | __mem_cgroup_add_list(pc); | ||
819 | spin_unlock_irqrestore(&mz->lru_lock, flags); | ||
809 | return; | 820 | return; |
810 | } | 821 | } |
811 | 822 | ||
@@ -816,18 +827,26 @@ retry: | |||
816 | */ | 827 | */ |
817 | #define FORCE_UNCHARGE_BATCH (128) | 828 | #define FORCE_UNCHARGE_BATCH (128) |
818 | static void | 829 | static void |
819 | mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list) | 830 | mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
831 | struct mem_cgroup_per_zone *mz, | ||
832 | int active) | ||
820 | { | 833 | { |
821 | struct page_cgroup *pc; | 834 | struct page_cgroup *pc; |
822 | struct page *page; | 835 | struct page *page; |
823 | int count; | 836 | int count; |
824 | unsigned long flags; | 837 | unsigned long flags; |
838 | struct list_head *list; | ||
839 | |||
840 | if (active) | ||
841 | list = &mz->active_list; | ||
842 | else | ||
843 | list = &mz->inactive_list; | ||
825 | 844 | ||
826 | if (list_empty(list)) | 845 | if (list_empty(list)) |
827 | return; | 846 | return; |
828 | retry: | 847 | retry: |
829 | count = FORCE_UNCHARGE_BATCH; | 848 | count = FORCE_UNCHARGE_BATCH; |
830 | spin_lock_irqsave(&mem->lru_lock, flags); | 849 | spin_lock_irqsave(&mz->lru_lock, flags); |
831 | 850 | ||
832 | while (--count && !list_empty(list)) { | 851 | while (--count && !list_empty(list)) { |
833 | pc = list_entry(list->prev, struct page_cgroup, lru); | 852 | pc = list_entry(list->prev, struct page_cgroup, lru); |
@@ -842,7 +861,7 @@ retry: | |||
842 | } else /* being uncharged ? ...do relax */ | 861 | } else /* being uncharged ? ...do relax */ |
843 | break; | 862 | break; |
844 | } | 863 | } |
845 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 864 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
846 | if (!list_empty(list)) { | 865 | if (!list_empty(list)) { |
847 | cond_resched(); | 866 | cond_resched(); |
848 | goto retry; | 867 | goto retry; |
@@ -873,11 +892,9 @@ int mem_cgroup_force_empty(struct mem_cgroup *mem) | |||
873 | struct mem_cgroup_per_zone *mz; | 892 | struct mem_cgroup_per_zone *mz; |
874 | mz = mem_cgroup_zoneinfo(mem, node, zid); | 893 | mz = mem_cgroup_zoneinfo(mem, node, zid); |
875 | /* drop all page_cgroup in active_list */ | 894 | /* drop all page_cgroup in active_list */ |
876 | mem_cgroup_force_empty_list(mem, | 895 | mem_cgroup_force_empty_list(mem, mz, 1); |
877 | &mz->active_list); | ||
878 | /* drop all page_cgroup in inactive_list */ | 896 | /* drop all page_cgroup in inactive_list */ |
879 | mem_cgroup_force_empty_list(mem, | 897 | mem_cgroup_force_empty_list(mem, mz, 0); |
880 | &mz->inactive_list); | ||
881 | } | 898 | } |
882 | } | 899 | } |
883 | ret = 0; | 900 | ret = 0; |
@@ -1114,6 +1131,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
1114 | mz = &pn->zoneinfo[zone]; | 1131 | mz = &pn->zoneinfo[zone]; |
1115 | INIT_LIST_HEAD(&mz->active_list); | 1132 | INIT_LIST_HEAD(&mz->active_list); |
1116 | INIT_LIST_HEAD(&mz->inactive_list); | 1133 | INIT_LIST_HEAD(&mz->inactive_list); |
1134 | spin_lock_init(&mz->lru_lock); | ||
1117 | } | 1135 | } |
1118 | return 0; | 1136 | return 0; |
1119 | } | 1137 | } |
@@ -1143,7 +1161,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1143 | 1161 | ||
1144 | res_counter_init(&mem->res); | 1162 | res_counter_init(&mem->res); |
1145 | 1163 | ||
1146 | spin_lock_init(&mem->lru_lock); | ||
1147 | mem->control_type = MEM_CGROUP_TYPE_ALL; | 1164 | mem->control_type = MEM_CGROUP_TYPE_ALL; |
1148 | memset(&mem->info, 0, sizeof(mem->info)); | 1165 | memset(&mem->info, 0, sizeof(mem->info)); |
1149 | 1166 | ||