aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-02-07 03:14:39 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:22 -0500
commit072c56c13e1302fcdc39961dc64e76485731ad67 (patch)
tree934896fdd8aeaa72cb93cb3be2c816a2ac0f0ae7 /mm
parent1ecaab2bd221251a3fd148abb08e8b877f1e93c8 (diff)
per-zone and reclaim enhancements for memory controller: per-zone-lock for cgroup
Now, lru is per-zone. Then, lru_lock can be (should be) per-zone, too. This patch implementes per-zone lru lock. lru_lock is placed into mem_cgroup_per_zone struct. lock can be accessed by mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone); &mz->lru_lock or mz = page_cgroup_zoneinfo(page_cgroup); &mz->lru_lock Signed-off-by: KAMEZAWA hiroyuki <kmaezawa.hiroyu@jp.fujitsu.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: David Rientjes <rientjes@google.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Kirill Korotaev <dev@sw.ru> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Menage <menage@google.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c71
1 files changed, 44 insertions, 27 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f728d67a3267..315dee180129 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -89,6 +89,10 @@ enum mem_cgroup_zstat_index {
89}; 89};
90 90
91struct mem_cgroup_per_zone { 91struct mem_cgroup_per_zone {
92 /*
93 * spin_lock to protect the per cgroup LRU
94 */
95 spinlock_t lru_lock;
92 struct list_head active_list; 96 struct list_head active_list;
93 struct list_head inactive_list; 97 struct list_head inactive_list;
94 unsigned long count[NR_MEM_CGROUP_ZSTAT]; 98 unsigned long count[NR_MEM_CGROUP_ZSTAT];
@@ -126,10 +130,7 @@ struct mem_cgroup {
126 * per zone LRU lists. 130 * per zone LRU lists.
127 */ 131 */
128 struct mem_cgroup_lru_info info; 132 struct mem_cgroup_lru_info info;
129 /* 133
130 * spin_lock to protect the per cgroup LRU
131 */
132 spinlock_t lru_lock;
133 unsigned long control_type; /* control RSS or RSS+Pagecache */ 134 unsigned long control_type; /* control RSS or RSS+Pagecache */
134 int prev_priority; /* for recording reclaim priority */ 135 int prev_priority; /* for recording reclaim priority */
135 /* 136 /*
@@ -409,15 +410,16 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
409 */ 410 */
410void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 411void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
411{ 412{
412 struct mem_cgroup *mem; 413 struct mem_cgroup_per_zone *mz;
414 unsigned long flags;
415
413 if (!pc) 416 if (!pc)
414 return; 417 return;
415 418
416 mem = pc->mem_cgroup; 419 mz = page_cgroup_zoneinfo(pc);
417 420 spin_lock_irqsave(&mz->lru_lock, flags);
418 spin_lock(&mem->lru_lock);
419 __mem_cgroup_move_lists(pc, active); 421 __mem_cgroup_move_lists(pc, active);
420 spin_unlock(&mem->lru_lock); 422 spin_unlock_irqrestore(&mz->lru_lock, flags);
421} 423}
422 424
423/* 425/*
@@ -527,7 +529,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
527 src = &mz->inactive_list; 529 src = &mz->inactive_list;
528 530
529 531
530 spin_lock(&mem_cont->lru_lock); 532 spin_lock(&mz->lru_lock);
531 scan = 0; 533 scan = 0;
532 list_for_each_entry_safe_reverse(pc, tmp, src, lru) { 534 list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
533 if (scan >= nr_to_scan) 535 if (scan >= nr_to_scan)
@@ -557,7 +559,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
557 } 559 }
558 560
559 list_splice(&pc_list, src); 561 list_splice(&pc_list, src);
560 spin_unlock(&mem_cont->lru_lock); 562 spin_unlock(&mz->lru_lock);
561 563
562 *scanned = scan; 564 *scanned = scan;
563 return nr_taken; 565 return nr_taken;
@@ -576,6 +578,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
576 struct page_cgroup *pc; 578 struct page_cgroup *pc;
577 unsigned long flags; 579 unsigned long flags;
578 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 580 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
581 struct mem_cgroup_per_zone *mz;
579 582
580 /* 583 /*
581 * Should page_cgroup's go to their own slab? 584 * Should page_cgroup's go to their own slab?
@@ -677,10 +680,11 @@ retry:
677 goto retry; 680 goto retry;
678 } 681 }
679 682
680 spin_lock_irqsave(&mem->lru_lock, flags); 683 mz = page_cgroup_zoneinfo(pc);
684 spin_lock_irqsave(&mz->lru_lock, flags);
681 /* Update statistics vector */ 685 /* Update statistics vector */
682 __mem_cgroup_add_list(pc); 686 __mem_cgroup_add_list(pc);
683 spin_unlock_irqrestore(&mem->lru_lock, flags); 687 spin_unlock_irqrestore(&mz->lru_lock, flags);
684 688
685done: 689done:
686 return 0; 690 return 0;
@@ -727,6 +731,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
727void mem_cgroup_uncharge(struct page_cgroup *pc) 731void mem_cgroup_uncharge(struct page_cgroup *pc)
728{ 732{
729 struct mem_cgroup *mem; 733 struct mem_cgroup *mem;
734 struct mem_cgroup_per_zone *mz;
730 struct page *page; 735 struct page *page;
731 unsigned long flags; 736 unsigned long flags;
732 737
@@ -739,6 +744,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
739 744
740 if (atomic_dec_and_test(&pc->ref_cnt)) { 745 if (atomic_dec_and_test(&pc->ref_cnt)) {
741 page = pc->page; 746 page = pc->page;
747 mz = page_cgroup_zoneinfo(pc);
742 /* 748 /*
743 * get page->cgroup and clear it under lock. 749 * get page->cgroup and clear it under lock.
744 * force_empty can drop page->cgroup without checking refcnt. 750 * force_empty can drop page->cgroup without checking refcnt.
@@ -747,9 +753,9 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
747 mem = pc->mem_cgroup; 753 mem = pc->mem_cgroup;
748 css_put(&mem->css); 754 css_put(&mem->css);
749 res_counter_uncharge(&mem->res, PAGE_SIZE); 755 res_counter_uncharge(&mem->res, PAGE_SIZE);
750 spin_lock_irqsave(&mem->lru_lock, flags); 756 spin_lock_irqsave(&mz->lru_lock, flags);
751 __mem_cgroup_remove_list(pc); 757 __mem_cgroup_remove_list(pc);
752 spin_unlock_irqrestore(&mem->lru_lock, flags); 758 spin_unlock_irqrestore(&mz->lru_lock, flags);
753 kfree(pc); 759 kfree(pc);
754 } 760 }
755 } 761 }
@@ -788,24 +794,29 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
788 struct page_cgroup *pc; 794 struct page_cgroup *pc;
789 struct mem_cgroup *mem; 795 struct mem_cgroup *mem;
790 unsigned long flags; 796 unsigned long flags;
797 struct mem_cgroup_per_zone *mz;
791retry: 798retry:
792 pc = page_get_page_cgroup(page); 799 pc = page_get_page_cgroup(page);
793 if (!pc) 800 if (!pc)
794 return; 801 return;
795 mem = pc->mem_cgroup; 802 mem = pc->mem_cgroup;
803 mz = page_cgroup_zoneinfo(pc);
796 if (clear_page_cgroup(page, pc) != pc) 804 if (clear_page_cgroup(page, pc) != pc)
797 goto retry; 805 goto retry;
798 806 spin_lock_irqsave(&mz->lru_lock, flags);
799 spin_lock_irqsave(&mem->lru_lock, flags);
800 807
801 __mem_cgroup_remove_list(pc); 808 __mem_cgroup_remove_list(pc);
809 spin_unlock_irqrestore(&mz->lru_lock, flags);
810
802 pc->page = newpage; 811 pc->page = newpage;
803 lock_page_cgroup(newpage); 812 lock_page_cgroup(newpage);
804 page_assign_page_cgroup(newpage, pc); 813 page_assign_page_cgroup(newpage, pc);
805 unlock_page_cgroup(newpage); 814 unlock_page_cgroup(newpage);
806 __mem_cgroup_add_list(pc);
807 815
808 spin_unlock_irqrestore(&mem->lru_lock, flags); 816 mz = page_cgroup_zoneinfo(pc);
817 spin_lock_irqsave(&mz->lru_lock, flags);
818 __mem_cgroup_add_list(pc);
819 spin_unlock_irqrestore(&mz->lru_lock, flags);
809 return; 820 return;
810} 821}
811 822
@@ -816,18 +827,26 @@ retry:
816 */ 827 */
817#define FORCE_UNCHARGE_BATCH (128) 828#define FORCE_UNCHARGE_BATCH (128)
818static void 829static void
819mem_cgroup_force_empty_list(struct mem_cgroup *mem, struct list_head *list) 830mem_cgroup_force_empty_list(struct mem_cgroup *mem,
831 struct mem_cgroup_per_zone *mz,
832 int active)
820{ 833{
821 struct page_cgroup *pc; 834 struct page_cgroup *pc;
822 struct page *page; 835 struct page *page;
823 int count; 836 int count;
824 unsigned long flags; 837 unsigned long flags;
838 struct list_head *list;
839
840 if (active)
841 list = &mz->active_list;
842 else
843 list = &mz->inactive_list;
825 844
826 if (list_empty(list)) 845 if (list_empty(list))
827 return; 846 return;
828retry: 847retry:
829 count = FORCE_UNCHARGE_BATCH; 848 count = FORCE_UNCHARGE_BATCH;
830 spin_lock_irqsave(&mem->lru_lock, flags); 849 spin_lock_irqsave(&mz->lru_lock, flags);
831 850
832 while (--count && !list_empty(list)) { 851 while (--count && !list_empty(list)) {
833 pc = list_entry(list->prev, struct page_cgroup, lru); 852 pc = list_entry(list->prev, struct page_cgroup, lru);
@@ -842,7 +861,7 @@ retry:
842 } else /* being uncharged ? ...do relax */ 861 } else /* being uncharged ? ...do relax */
843 break; 862 break;
844 } 863 }
845 spin_unlock_irqrestore(&mem->lru_lock, flags); 864 spin_unlock_irqrestore(&mz->lru_lock, flags);
846 if (!list_empty(list)) { 865 if (!list_empty(list)) {
847 cond_resched(); 866 cond_resched();
848 goto retry; 867 goto retry;
@@ -873,11 +892,9 @@ int mem_cgroup_force_empty(struct mem_cgroup *mem)
873 struct mem_cgroup_per_zone *mz; 892 struct mem_cgroup_per_zone *mz;
874 mz = mem_cgroup_zoneinfo(mem, node, zid); 893 mz = mem_cgroup_zoneinfo(mem, node, zid);
875 /* drop all page_cgroup in active_list */ 894 /* drop all page_cgroup in active_list */
876 mem_cgroup_force_empty_list(mem, 895 mem_cgroup_force_empty_list(mem, mz, 1);
877 &mz->active_list);
878 /* drop all page_cgroup in inactive_list */ 896 /* drop all page_cgroup in inactive_list */
879 mem_cgroup_force_empty_list(mem, 897 mem_cgroup_force_empty_list(mem, mz, 0);
880 &mz->inactive_list);
881 } 898 }
882 } 899 }
883 ret = 0; 900 ret = 0;
@@ -1114,6 +1131,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1114 mz = &pn->zoneinfo[zone]; 1131 mz = &pn->zoneinfo[zone];
1115 INIT_LIST_HEAD(&mz->active_list); 1132 INIT_LIST_HEAD(&mz->active_list);
1116 INIT_LIST_HEAD(&mz->inactive_list); 1133 INIT_LIST_HEAD(&mz->inactive_list);
1134 spin_lock_init(&mz->lru_lock);
1117 } 1135 }
1118 return 0; 1136 return 0;
1119} 1137}
@@ -1143,7 +1161,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1143 1161
1144 res_counter_init(&mem->res); 1162 res_counter_init(&mem->res);
1145 1163
1146 spin_lock_init(&mem->lru_lock);
1147 mem->control_type = MEM_CGROUP_TYPE_ALL; 1164 mem->control_type = MEM_CGROUP_TYPE_ALL;
1148 memset(&mem->info, 0, sizeof(mem->info)); 1165 memset(&mem->info, 0, sizeof(mem->info));
1149 1166