diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2011-03-23 19:42:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 22:46:28 -0400 |
commit | 6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch) | |
tree | 6460e4e1ce206d391b862a3d398a9e22e33ecb3c | |
parent | 5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff) |
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are
reserved. Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.
This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/page_cgroup.h | 75 | ||||
-rw-r--r-- | kernel/bounds.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/page_cgroup.c | 91 |
4 files changed, 117 insertions, 55 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 6b63679ce8a1..f5de21de31dd 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h | |||
@@ -1,8 +1,26 @@ | |||
1 | #ifndef __LINUX_PAGE_CGROUP_H | 1 | #ifndef __LINUX_PAGE_CGROUP_H |
2 | #define __LINUX_PAGE_CGROUP_H | 2 | #define __LINUX_PAGE_CGROUP_H |
3 | 3 | ||
4 | enum { | ||
5 | /* flags for mem_cgroup */ | ||
6 | PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ | ||
7 | PCG_CACHE, /* charged as cache */ | ||
8 | PCG_USED, /* this object is in use. */ | ||
9 | PCG_MIGRATION, /* under page migration */ | ||
10 | /* flags for mem_cgroup and file and I/O status */ | ||
11 | PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ | ||
12 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ | ||
13 | /* No lock in page_cgroup */ | ||
14 | PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ | ||
15 | __NR_PCG_FLAGS, | ||
16 | }; | ||
17 | |||
18 | #ifndef __GENERATING_BOUNDS_H | ||
19 | #include <generated/bounds.h> | ||
20 | |||
4 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 21 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
5 | #include <linux/bit_spinlock.h> | 22 | #include <linux/bit_spinlock.h> |
23 | |||
6 | /* | 24 | /* |
7 | * Page Cgroup can be considered as an extended mem_map. | 25 | * Page Cgroup can be considered as an extended mem_map. |
8 | * A page_cgroup page is associated with every page descriptor. The | 26 | * A page_cgroup page is associated with every page descriptor. The |
@@ -13,7 +31,6 @@ | |||
13 | struct page_cgroup { | 31 | struct page_cgroup { |
14 | unsigned long flags; | 32 | unsigned long flags; |
15 | struct mem_cgroup *mem_cgroup; | 33 | struct mem_cgroup *mem_cgroup; |
16 | struct page *page; | ||
17 | struct list_head lru; /* per cgroup LRU list */ | 34 | struct list_head lru; /* per cgroup LRU list */ |
18 | }; | 35 | }; |
19 | 36 | ||
@@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void) | |||
32 | #endif | 49 | #endif |
33 | 50 | ||
34 | struct page_cgroup *lookup_page_cgroup(struct page *page); | 51 | struct page_cgroup *lookup_page_cgroup(struct page *page); |
35 | 52 | struct page *lookup_cgroup_page(struct page_cgroup *pc); | |
36 | enum { | ||
37 | /* flags for mem_cgroup */ | ||
38 | PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ | ||
39 | PCG_CACHE, /* charged as cache */ | ||
40 | PCG_USED, /* this object is in use. */ | ||
41 | PCG_MIGRATION, /* under page migration */ | ||
42 | /* flags for mem_cgroup and file and I/O status */ | ||
43 | PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ | ||
44 | PCG_FILE_MAPPED, /* page is accounted as "mapped" */ | ||
45 | /* No lock in page_cgroup */ | ||
46 | PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ | ||
47 | }; | ||
48 | 53 | ||
49 | #define TESTPCGFLAG(uname, lname) \ | 54 | #define TESTPCGFLAG(uname, lname) \ |
50 | static inline int PageCgroup##uname(struct page_cgroup *pc) \ | 55 | static inline int PageCgroup##uname(struct page_cgroup *pc) \ |
@@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, | |||
117 | local_irq_restore(*flags); | 122 | local_irq_restore(*flags); |
118 | } | 123 | } |
119 | 124 | ||
125 | #ifdef CONFIG_SPARSEMEM | ||
126 | #define PCG_ARRAYID_WIDTH SECTIONS_SHIFT | ||
127 | #else | ||
128 | #define PCG_ARRAYID_WIDTH NODES_SHIFT | ||
129 | #endif | ||
130 | |||
131 | #if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) | ||
132 | #error Not enough space left in pc->flags to store page_cgroup array IDs | ||
133 | #endif | ||
134 | |||
135 | /* pc->flags: ARRAY-ID | FLAGS */ | ||
136 | |||
137 | #define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1) | ||
138 | |||
139 | #define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH) | ||
140 | /* | ||
141 | * Zero the shift count for non-existant fields, to prevent compiler | ||
142 | * warnings and ensure references are optimized away. | ||
143 | */ | ||
144 | #define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) | ||
145 | |||
146 | static inline void set_page_cgroup_array_id(struct page_cgroup *pc, | ||
147 | unsigned long id) | ||
148 | { | ||
149 | pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); | ||
150 | pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; | ||
151 | } | ||
152 | |||
153 | static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) | ||
154 | { | ||
155 | return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; | ||
156 | } | ||
157 | |||
120 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ | 158 | #else /* CONFIG_CGROUP_MEM_RES_CTLR */ |
121 | struct page_cgroup; | 159 | struct page_cgroup; |
122 | 160 | ||
@@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void) | |||
137 | { | 175 | { |
138 | } | 176 | } |
139 | 177 | ||
140 | #endif | 178 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR */ |
141 | 179 | ||
142 | #include <linux/swap.h> | 180 | #include <linux/swap.h> |
143 | 181 | ||
@@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type) | |||
173 | return; | 211 | return; |
174 | } | 212 | } |
175 | 213 | ||
176 | #endif | 214 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ |
177 | #endif | 215 | |
216 | #endif /* !__GENERATING_BOUNDS_H */ | ||
217 | |||
218 | #endif /* __LINUX_PAGE_CGROUP_H */ | ||
diff --git a/kernel/bounds.c b/kernel/bounds.c index 98a51f26c136..0c9b862292b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -9,11 +9,13 @@ | |||
9 | #include <linux/page-flags.h> | 9 | #include <linux/page-flags.h> |
10 | #include <linux/mmzone.h> | 10 | #include <linux/mmzone.h> |
11 | #include <linux/kbuild.h> | 11 | #include <linux/kbuild.h> |
12 | #include <linux/page_cgroup.h> | ||
12 | 13 | ||
13 | void foo(void) | 14 | void foo(void) |
14 | { | 15 | { |
15 | /* The enum constants to put into include/generated/bounds.h */ | 16 | /* The enum constants to put into include/generated/bounds.h */ |
16 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); | 17 | DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); |
17 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); | 18 | DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); |
19 | DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS); | ||
18 | /* End of constants */ | 20 | /* End of constants */ |
19 | } | 21 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e286e1603e4f..660dfc27d971 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
1080 | if (unlikely(!PageCgroupUsed(pc))) | 1080 | if (unlikely(!PageCgroupUsed(pc))) |
1081 | continue; | 1081 | continue; |
1082 | 1082 | ||
1083 | page = pc->page; | 1083 | page = lookup_cgroup_page(pc); |
1084 | 1084 | ||
1085 | if (unlikely(!PageLRU(page))) | 1085 | if (unlikely(!PageLRU(page))) |
1086 | continue; | 1086 | continue; |
@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3344 | } | 3344 | } |
3345 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 3345 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
3346 | 3346 | ||
3347 | page = pc->page; | 3347 | page = lookup_cgroup_page(pc); |
3348 | 3348 | ||
3349 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); | 3349 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); |
3350 | if (ret == -ENOMEM) | 3350 | if (ret == -ENOMEM) |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 59a3cd4c799d..6c3f7a6a481a 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -11,12 +11,11 @@ | |||
11 | #include <linux/swapops.h> | 11 | #include <linux/swapops.h> |
12 | #include <linux/kmemleak.h> | 12 | #include <linux/kmemleak.h> |
13 | 13 | ||
14 | static void __meminit | 14 | static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) |
15 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | ||
16 | { | 15 | { |
17 | pc->flags = 0; | 16 | pc->flags = 0; |
17 | set_page_cgroup_array_id(pc, id); | ||
18 | pc->mem_cgroup = NULL; | 18 | pc->mem_cgroup = NULL; |
19 | pc->page = pfn_to_page(pfn); | ||
20 | INIT_LIST_HEAD(&pc->lru); | 19 | INIT_LIST_HEAD(&pc->lru); |
21 | } | 20 | } |
22 | static unsigned long total_usage; | 21 | static unsigned long total_usage; |
@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
43 | return base + offset; | 42 | return base + offset; |
44 | } | 43 | } |
45 | 44 | ||
45 | struct page *lookup_cgroup_page(struct page_cgroup *pc) | ||
46 | { | ||
47 | unsigned long pfn; | ||
48 | struct page *page; | ||
49 | pg_data_t *pgdat; | ||
50 | |||
51 | pgdat = NODE_DATA(page_cgroup_array_id(pc)); | ||
52 | pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; | ||
53 | page = pfn_to_page(pfn); | ||
54 | VM_BUG_ON(pc != lookup_page_cgroup(page)); | ||
55 | return page; | ||
56 | } | ||
57 | |||
46 | static int __init alloc_node_page_cgroup(int nid) | 58 | static int __init alloc_node_page_cgroup(int nid) |
47 | { | 59 | { |
48 | struct page_cgroup *base, *pc; | 60 | struct page_cgroup *base, *pc; |
@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid) | |||
63 | return -ENOMEM; | 75 | return -ENOMEM; |
64 | for (index = 0; index < nr_pages; index++) { | 76 | for (index = 0; index < nr_pages; index++) { |
65 | pc = base + index; | 77 | pc = base + index; |
66 | __init_page_cgroup(pc, start_pfn + index); | 78 | init_page_cgroup(pc, nid); |
67 | } | 79 | } |
68 | NODE_DATA(nid)->node_page_cgroup = base; | 80 | NODE_DATA(nid)->node_page_cgroup = base; |
69 | total_usage += table_size; | 81 | total_usage += table_size; |
@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
105 | return section->page_cgroup + pfn; | 117 | return section->page_cgroup + pfn; |
106 | } | 118 | } |
107 | 119 | ||
120 | struct page *lookup_cgroup_page(struct page_cgroup *pc) | ||
121 | { | ||
122 | struct mem_section *section; | ||
123 | struct page *page; | ||
124 | unsigned long nr; | ||
125 | |||
126 | nr = page_cgroup_array_id(pc); | ||
127 | section = __nr_to_section(nr); | ||
128 | page = pfn_to_page(pc - section->page_cgroup); | ||
129 | VM_BUG_ON(pc != lookup_page_cgroup(page)); | ||
130 | return page; | ||
131 | } | ||
132 | |||
108 | /* __alloc_bootmem...() is protected by !slab_available() */ | 133 | /* __alloc_bootmem...() is protected by !slab_available() */ |
109 | static int __init_refok init_section_page_cgroup(unsigned long pfn) | 134 | static int __init_refok init_section_page_cgroup(unsigned long pfn) |
110 | { | 135 | { |
111 | struct mem_section *section = __pfn_to_section(pfn); | ||
112 | struct page_cgroup *base, *pc; | 136 | struct page_cgroup *base, *pc; |
137 | struct mem_section *section; | ||
113 | unsigned long table_size; | 138 | unsigned long table_size; |
139 | unsigned long nr; | ||
114 | int nid, index; | 140 | int nid, index; |
115 | 141 | ||
116 | if (!section->page_cgroup) { | 142 | nr = pfn_to_section_nr(pfn); |
117 | nid = page_to_nid(pfn_to_page(pfn)); | 143 | section = __nr_to_section(nr); |
118 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 144 | |
119 | VM_BUG_ON(!slab_is_available()); | 145 | if (section->page_cgroup) |
120 | if (node_state(nid, N_HIGH_MEMORY)) { | 146 | return 0; |
121 | base = kmalloc_node(table_size, | 147 | |
122 | GFP_KERNEL | __GFP_NOWARN, nid); | 148 | nid = page_to_nid(pfn_to_page(pfn)); |
123 | if (!base) | 149 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
124 | base = vmalloc_node(table_size, nid); | 150 | VM_BUG_ON(!slab_is_available()); |
125 | } else { | 151 | if (node_state(nid, N_HIGH_MEMORY)) { |
126 | base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); | 152 | base = kmalloc_node(table_size, |
127 | if (!base) | 153 | GFP_KERNEL | __GFP_NOWARN, nid); |
128 | base = vmalloc(table_size); | 154 | if (!base) |
129 | } | 155 | base = vmalloc_node(table_size, nid); |
130 | /* | ||
131 | * The value stored in section->page_cgroup is (base - pfn) | ||
132 | * and it does not point to the memory block allocated above, | ||
133 | * causing kmemleak false positives. | ||
134 | */ | ||
135 | kmemleak_not_leak(base); | ||
136 | } else { | 156 | } else { |
137 | /* | 157 | base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); |
138 | * We don't have to allocate page_cgroup again, but | 158 | if (!base) |
139 | * address of memmap may be changed. So, we have to initialize | 159 | base = vmalloc(table_size); |
140 | * again. | ||
141 | */ | ||
142 | base = section->page_cgroup + pfn; | ||
143 | table_size = 0; | ||
144 | /* check address of memmap is changed or not. */ | ||
145 | if (base->page == pfn_to_page(pfn)) | ||
146 | return 0; | ||
147 | } | 160 | } |
161 | /* | ||
162 | * The value stored in section->page_cgroup is (base - pfn) | ||
163 | * and it does not point to the memory block allocated above, | ||
164 | * causing kmemleak false positives. | ||
165 | */ | ||
166 | kmemleak_not_leak(base); | ||
148 | 167 | ||
149 | if (!base) { | 168 | if (!base) { |
150 | printk(KERN_ERR "page cgroup allocation failure\n"); | 169 | printk(KERN_ERR "page cgroup allocation failure\n"); |
@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) | |||
153 | 172 | ||
154 | for (index = 0; index < PAGES_PER_SECTION; index++) { | 173 | for (index = 0; index < PAGES_PER_SECTION; index++) { |
155 | pc = base + index; | 174 | pc = base + index; |
156 | __init_page_cgroup(pc, pfn + index); | 175 | init_page_cgroup(pc, nr); |
157 | } | 176 | } |
158 | 177 | ||
159 | section->page_cgroup = base - pfn; | 178 | section->page_cgroup = base - pfn; |