aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-06-15 18:08:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-15 23:04:01 -0400
commit37573e8c718277103f61f03741bdc5606d31b07e (patch)
tree7142211508420a8ea90b8ae01bfa5ee93ffa1a44
parent8957712710e045044e3c44375c6a87d7ffa17d51 (diff)
memcg: fix init_page_cgroup nid with sparsemem
Commit 21a3c9646873 ("memcg: allocate memory cgroup structures in local nodes") makes page_cgroup allocation as NUMA aware. But that caused a problem https://bugzilla.kernel.org/show_bug.cgi?id=36192. The problem was getting a NID from invalid struct pages, which was not initialized because it was out-of-node, out of [node_start_pfn, node_end_pfn) Now, with sparsemem, page_cgroup_init scans pfn from 0 to max_pfn. But this may scan a pfn which is not on any node and can access memmap which is not initialized. This makes page_cgroup_init() for SPARSEMEM node aware and remove a code to get nid from page->flags. (Then, we'll use valid NID always.) [akpm@linux-foundation.org: try to fix up comments] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/page_cgroup.c71
1 files changed, 53 insertions, 18 deletions
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 74ccff61d1be..53bffc6c293e 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -162,13 +162,13 @@ static void free_page_cgroup(void *addr)
162} 162}
163#endif 163#endif
164 164
165static int __meminit init_section_page_cgroup(unsigned long pfn) 165static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
166{ 166{
167 struct page_cgroup *base, *pc; 167 struct page_cgroup *base, *pc;
168 struct mem_section *section; 168 struct mem_section *section;
169 unsigned long table_size; 169 unsigned long table_size;
170 unsigned long nr; 170 unsigned long nr;
171 int nid, index; 171 int index;
172 172
173 nr = pfn_to_section_nr(pfn); 173 nr = pfn_to_section_nr(pfn);
174 section = __nr_to_section(nr); 174 section = __nr_to_section(nr);
@@ -176,7 +176,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn)
176 if (section->page_cgroup) 176 if (section->page_cgroup)
177 return 0; 177 return 0;
178 178
179 nid = page_to_nid(pfn_to_page(pfn));
180 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; 179 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
181 base = alloc_page_cgroup(table_size, nid); 180 base = alloc_page_cgroup(table_size, nid);
182 181
@@ -196,7 +195,11 @@ static int __meminit init_section_page_cgroup(unsigned long pfn)
196 pc = base + index; 195 pc = base + index;
197 init_page_cgroup(pc, nr); 196 init_page_cgroup(pc, nr);
198 } 197 }
199 198 /*
199 * The passed "pfn" may not be aligned to SECTION. For the calculation
200 * we need to apply a mask.
201 */
202 pfn &= PAGE_SECTION_MASK;
200 section->page_cgroup = base - pfn; 203 section->page_cgroup = base - pfn;
201 total_usage += table_size; 204 total_usage += table_size;
202 return 0; 205 return 0;
@@ -225,10 +228,20 @@ int __meminit online_page_cgroup(unsigned long start_pfn,
225 start = start_pfn & ~(PAGES_PER_SECTION - 1); 228 start = start_pfn & ~(PAGES_PER_SECTION - 1);
226 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION); 229 end = ALIGN(start_pfn + nr_pages, PAGES_PER_SECTION);
227 230
231 if (nid == -1) {
232 /*
233 * In this case, "nid" already exists and contains valid memory.
234 * "start_pfn" passed to us is a pfn which is an arg for
235 * online__pages(), and start_pfn should exist.
236 */
237 nid = pfn_to_nid(start_pfn);
238 VM_BUG_ON(!node_state(nid, N_ONLINE));
239 }
240
228 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) { 241 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
229 if (!pfn_present(pfn)) 242 if (!pfn_present(pfn))
230 continue; 243 continue;
231 fail = init_section_page_cgroup(pfn); 244 fail = init_section_page_cgroup(pfn, nid);
232 } 245 }
233 if (!fail) 246 if (!fail)
234 return 0; 247 return 0;
@@ -284,25 +297,47 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
284void __init page_cgroup_init(void) 297void __init page_cgroup_init(void)
285{ 298{
286 unsigned long pfn; 299 unsigned long pfn;
287 int fail = 0; 300 int nid;
288 301
289 if (mem_cgroup_disabled()) 302 if (mem_cgroup_disabled())
290 return; 303 return;
291 304
292 for (pfn = 0; !fail && pfn < max_pfn; pfn += PAGES_PER_SECTION) { 305 for_each_node_state(nid, N_HIGH_MEMORY) {
293 if (!pfn_present(pfn)) 306 unsigned long start_pfn, end_pfn;
294 continue; 307
295 fail = init_section_page_cgroup(pfn); 308 start_pfn = node_start_pfn(nid);
296 } 309 end_pfn = node_end_pfn(nid);
297 if (fail) { 310 /*
298 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n"); 311 * start_pfn and end_pfn may not be aligned to SECTION and the
299 panic("Out of memory"); 312 * page->flags of out of node pages are not initialized. So we
300 } else { 313 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
301 hotplug_memory_notifier(page_cgroup_callback, 0); 314 */
315 for (pfn = start_pfn;
316 pfn < end_pfn;
317 pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
318
319 if (!pfn_valid(pfn))
320 continue;
321 /*
322 * Nodes's pfns can be overlapping.
323 * We know some arch can have a nodes layout such as
324 * -------------pfn-------------->
325 * N0 | N1 | N2 | N0 | N1 | N2|....
326 */
327 if (pfn_to_nid(pfn) != nid)
328 continue;
329 if (init_section_page_cgroup(pfn, nid))
330 goto oom;
331 }
302 } 332 }
333 hotplug_memory_notifier(page_cgroup_callback, 0);
303 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage); 334 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
304 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you don't" 335 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
305 " want memory cgroups\n"); 336 "don't want memory cgroups\n");
337 return;
338oom:
339 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
340 panic("Out of memory");
306} 341}
307 342
308void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) 343void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)