diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2011-03-23 19:42:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 22:46:28 -0400 |
commit | 6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch) | |
tree | 6460e4e1ce206d391b862a3d398a9e22e33ecb3c /mm | |
parent | 5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff) |
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are
reserved. Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.
This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/page_cgroup.c | 91 |
2 files changed, 57 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e286e1603e4f..660dfc27d971 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
1080 | if (unlikely(!PageCgroupUsed(pc))) | 1080 | if (unlikely(!PageCgroupUsed(pc))) |
1081 | continue; | 1081 | continue; |
1082 | 1082 | ||
1083 | page = pc->page; | 1083 | page = lookup_cgroup_page(pc); |
1084 | 1084 | ||
1085 | if (unlikely(!PageLRU(page))) | 1085 | if (unlikely(!PageLRU(page))) |
1086 | continue; | 1086 | continue; |
@@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3344 | } | 3344 | } |
3345 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 3345 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
3346 | 3346 | ||
3347 | page = pc->page; | 3347 | page = lookup_cgroup_page(pc); |
3348 | 3348 | ||
3349 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); | 3349 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); |
3350 | if (ret == -ENOMEM) | 3350 | if (ret == -ENOMEM) |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 59a3cd4c799d..6c3f7a6a481a 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -11,12 +11,11 @@ | |||
11 | #include <linux/swapops.h> | 11 | #include <linux/swapops.h> |
12 | #include <linux/kmemleak.h> | 12 | #include <linux/kmemleak.h> |
13 | 13 | ||
14 | static void __meminit | 14 | static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) |
15 | __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) | ||
16 | { | 15 | { |
17 | pc->flags = 0; | 16 | pc->flags = 0; |
17 | set_page_cgroup_array_id(pc, id); | ||
18 | pc->mem_cgroup = NULL; | 18 | pc->mem_cgroup = NULL; |
19 | pc->page = pfn_to_page(pfn); | ||
20 | INIT_LIST_HEAD(&pc->lru); | 19 | INIT_LIST_HEAD(&pc->lru); |
21 | } | 20 | } |
22 | static unsigned long total_usage; | 21 | static unsigned long total_usage; |
@@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
43 | return base + offset; | 42 | return base + offset; |
44 | } | 43 | } |
45 | 44 | ||
45 | struct page *lookup_cgroup_page(struct page_cgroup *pc) | ||
46 | { | ||
47 | unsigned long pfn; | ||
48 | struct page *page; | ||
49 | pg_data_t *pgdat; | ||
50 | |||
51 | pgdat = NODE_DATA(page_cgroup_array_id(pc)); | ||
52 | pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; | ||
53 | page = pfn_to_page(pfn); | ||
54 | VM_BUG_ON(pc != lookup_page_cgroup(page)); | ||
55 | return page; | ||
56 | } | ||
57 | |||
46 | static int __init alloc_node_page_cgroup(int nid) | 58 | static int __init alloc_node_page_cgroup(int nid) |
47 | { | 59 | { |
48 | struct page_cgroup *base, *pc; | 60 | struct page_cgroup *base, *pc; |
@@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid) | |||
63 | return -ENOMEM; | 75 | return -ENOMEM; |
64 | for (index = 0; index < nr_pages; index++) { | 76 | for (index = 0; index < nr_pages; index++) { |
65 | pc = base + index; | 77 | pc = base + index; |
66 | __init_page_cgroup(pc, start_pfn + index); | 78 | init_page_cgroup(pc, nid); |
67 | } | 79 | } |
68 | NODE_DATA(nid)->node_page_cgroup = base; | 80 | NODE_DATA(nid)->node_page_cgroup = base; |
69 | total_usage += table_size; | 81 | total_usage += table_size; |
@@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) | |||
105 | return section->page_cgroup + pfn; | 117 | return section->page_cgroup + pfn; |
106 | } | 118 | } |
107 | 119 | ||
120 | struct page *lookup_cgroup_page(struct page_cgroup *pc) | ||
121 | { | ||
122 | struct mem_section *section; | ||
123 | struct page *page; | ||
124 | unsigned long nr; | ||
125 | |||
126 | nr = page_cgroup_array_id(pc); | ||
127 | section = __nr_to_section(nr); | ||
128 | page = pfn_to_page(pc - section->page_cgroup); | ||
129 | VM_BUG_ON(pc != lookup_page_cgroup(page)); | ||
130 | return page; | ||
131 | } | ||
132 | |||
108 | /* __alloc_bootmem...() is protected by !slab_available() */ | 133 | /* __alloc_bootmem...() is protected by !slab_available() */ |
109 | static int __init_refok init_section_page_cgroup(unsigned long pfn) | 134 | static int __init_refok init_section_page_cgroup(unsigned long pfn) |
110 | { | 135 | { |
111 | struct mem_section *section = __pfn_to_section(pfn); | ||
112 | struct page_cgroup *base, *pc; | 136 | struct page_cgroup *base, *pc; |
137 | struct mem_section *section; | ||
113 | unsigned long table_size; | 138 | unsigned long table_size; |
139 | unsigned long nr; | ||
114 | int nid, index; | 140 | int nid, index; |
115 | 141 | ||
116 | if (!section->page_cgroup) { | 142 | nr = pfn_to_section_nr(pfn); |
117 | nid = page_to_nid(pfn_to_page(pfn)); | 143 | section = __nr_to_section(nr); |
118 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 144 | |
119 | VM_BUG_ON(!slab_is_available()); | 145 | if (section->page_cgroup) |
120 | if (node_state(nid, N_HIGH_MEMORY)) { | 146 | return 0; |
121 | base = kmalloc_node(table_size, | 147 | |
122 | GFP_KERNEL | __GFP_NOWARN, nid); | 148 | nid = page_to_nid(pfn_to_page(pfn)); |
123 | if (!base) | 149 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
124 | base = vmalloc_node(table_size, nid); | 150 | VM_BUG_ON(!slab_is_available()); |
125 | } else { | 151 | if (node_state(nid, N_HIGH_MEMORY)) { |
126 | base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); | 152 | base = kmalloc_node(table_size, |
127 | if (!base) | 153 | GFP_KERNEL | __GFP_NOWARN, nid); |
128 | base = vmalloc(table_size); | 154 | if (!base) |
129 | } | 155 | base = vmalloc_node(table_size, nid); |
130 | /* | ||
131 | * The value stored in section->page_cgroup is (base - pfn) | ||
132 | * and it does not point to the memory block allocated above, | ||
133 | * causing kmemleak false positives. | ||
134 | */ | ||
135 | kmemleak_not_leak(base); | ||
136 | } else { | 156 | } else { |
137 | /* | 157 | base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); |
138 | * We don't have to allocate page_cgroup again, but | 158 | if (!base) |
139 | * address of memmap may be changed. So, we have to initialize | 159 | base = vmalloc(table_size); |
140 | * again. | ||
141 | */ | ||
142 | base = section->page_cgroup + pfn; | ||
143 | table_size = 0; | ||
144 | /* check address of memmap is changed or not. */ | ||
145 | if (base->page == pfn_to_page(pfn)) | ||
146 | return 0; | ||
147 | } | 160 | } |
161 | /* | ||
162 | * The value stored in section->page_cgroup is (base - pfn) | ||
163 | * and it does not point to the memory block allocated above, | ||
164 | * causing kmemleak false positives. | ||
165 | */ | ||
166 | kmemleak_not_leak(base); | ||
148 | 167 | ||
149 | if (!base) { | 168 | if (!base) { |
150 | printk(KERN_ERR "page cgroup allocation failure\n"); | 169 | printk(KERN_ERR "page cgroup allocation failure\n"); |
@@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) | |||
153 | 172 | ||
154 | for (index = 0; index < PAGES_PER_SECTION; index++) { | 173 | for (index = 0; index < PAGES_PER_SECTION; index++) { |
155 | pc = base + index; | 174 | pc = base + index; |
156 | __init_page_cgroup(pc, pfn + index); | 175 | init_page_cgroup(pc, nr); |
157 | } | 176 | } |
158 | 177 | ||
159 | section->page_cgroup = base - pfn; | 178 | section->page_cgroup = base - pfn; |