diff options
Diffstat (limited to 'mm/sparse.c')
-rw-r--r-- | mm/sparse.c | 145 |
1 files changed, 134 insertions, 11 deletions
diff --git a/mm/sparse.c b/mm/sparse.c index 98d6b39c3472..dff71f173ae9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | #include <linux/vmalloc.h> | 10 | #include <linux/vmalloc.h> |
11 | #include "internal.h" | ||
11 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
12 | #include <asm/pgalloc.h> | 13 | #include <asm/pgalloc.h> |
13 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
@@ -208,12 +209,12 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p | |||
208 | } | 209 | } |
209 | 210 | ||
210 | /* | 211 | /* |
211 | * We need this if we ever free the mem_maps. While not implemented yet, | 212 | * Decode mem_map from the coded memmap |
212 | * this function is included for parity with its sibling. | ||
213 | */ | 213 | */ |
214 | static __attribute((unused)) | ||
215 | struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) | 214 | struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) |
216 | { | 215 | { |
216 | /* mask off the extra low bits of information */ | ||
217 | coded_mem_map &= SECTION_MAP_MASK; | ||
217 | return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); | 218 | return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); |
218 | } | 219 | } |
219 | 220 | ||
@@ -232,7 +233,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms, | |||
232 | return 1; | 233 | return 1; |
233 | } | 234 | } |
234 | 235 | ||
235 | static unsigned long usemap_size(void) | 236 | unsigned long usemap_size(void) |
236 | { | 237 | { |
237 | unsigned long size_bytes; | 238 | unsigned long size_bytes; |
238 | size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8; | 239 | size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8; |
@@ -249,11 +250,22 @@ static unsigned long *__kmalloc_section_usemap(void) | |||
249 | 250 | ||
250 | static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) | 251 | static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) |
251 | { | 252 | { |
252 | unsigned long *usemap; | 253 | unsigned long *usemap, section_nr; |
253 | struct mem_section *ms = __nr_to_section(pnum); | 254 | struct mem_section *ms = __nr_to_section(pnum); |
254 | int nid = sparse_early_nid(ms); | 255 | int nid = sparse_early_nid(ms); |
256 | struct pglist_data *pgdat = NODE_DATA(nid); | ||
255 | 257 | ||
256 | usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); | 258 | /* |
259 | * Usemap's page can't be freed until freeing other sections | ||
260 | * which use it. And, Pgdat has same feature. | ||
261 | * If section A has pgdat and section B has usemap for other | ||
262 | * sections (includes section A), both sections can't be removed, | ||
263 | * because there is the dependency each other. | ||
264 | * To solve above issue, this collects all usemap on the same section | ||
265 | * which has pgdat. | ||
266 | */ | ||
267 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); | ||
268 | usemap = alloc_bootmem_section(usemap_size(), section_nr); | ||
257 | if (usemap) | 269 | if (usemap) |
258 | return usemap; | 270 | return usemap; |
259 | 271 | ||
@@ -273,8 +285,8 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
273 | if (map) | 285 | if (map) |
274 | return map; | 286 | return map; |
275 | 287 | ||
276 | map = alloc_bootmem_node(NODE_DATA(nid), | 288 | map = alloc_bootmem_pages_node(NODE_DATA(nid), |
277 | sizeof(struct page) * PAGES_PER_SECTION); | 289 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); |
278 | return map; | 290 | return map; |
279 | } | 291 | } |
280 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 292 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
@@ -295,6 +307,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
295 | return NULL; | 307 | return NULL; |
296 | } | 308 | } |
297 | 309 | ||
310 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | ||
311 | { | ||
312 | } | ||
298 | /* | 313 | /* |
299 | * Allocate the accumulated non-linear sections, allocate a mem_map | 314 | * Allocate the accumulated non-linear sections, allocate a mem_map |
300 | * for each and record the physical to section mapping. | 315 | * for each and record the physical to section mapping. |
@@ -304,22 +319,50 @@ void __init sparse_init(void) | |||
304 | unsigned long pnum; | 319 | unsigned long pnum; |
305 | struct page *map; | 320 | struct page *map; |
306 | unsigned long *usemap; | 321 | unsigned long *usemap; |
322 | unsigned long **usemap_map; | ||
323 | int size; | ||
324 | |||
325 | /* | ||
326 | * map is using big page (aka 2M in x86 64 bit) | ||
327 | * usemap is less one page (aka 24 bytes) | ||
328 | * so alloc 2M (with 2M align) and 24 bytes in turn will | ||
329 | * make next 2M slip to one more 2M later. | ||
330 | * then in big system, the memory will have a lot of holes... | ||
331 | * here try to allocate 2M pages continously. | ||
332 | * | ||
333 | * powerpc need to call sparse_init_one_section right after each | ||
334 | * sparse_early_mem_map_alloc, so allocate usemap_map at first. | ||
335 | */ | ||
336 | size = sizeof(unsigned long *) * NR_MEM_SECTIONS; | ||
337 | usemap_map = alloc_bootmem(size); | ||
338 | if (!usemap_map) | ||
339 | panic("can not allocate usemap_map\n"); | ||
307 | 340 | ||
308 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 341 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
309 | if (!present_section_nr(pnum)) | 342 | if (!present_section_nr(pnum)) |
310 | continue; | 343 | continue; |
344 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | ||
345 | } | ||
311 | 346 | ||
312 | map = sparse_early_mem_map_alloc(pnum); | 347 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
313 | if (!map) | 348 | if (!present_section_nr(pnum)) |
314 | continue; | 349 | continue; |
315 | 350 | ||
316 | usemap = sparse_early_usemap_alloc(pnum); | 351 | usemap = usemap_map[pnum]; |
317 | if (!usemap) | 352 | if (!usemap) |
318 | continue; | 353 | continue; |
319 | 354 | ||
355 | map = sparse_early_mem_map_alloc(pnum); | ||
356 | if (!map) | ||
357 | continue; | ||
358 | |||
320 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, | 359 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, |
321 | usemap); | 360 | usemap); |
322 | } | 361 | } |
362 | |||
363 | vmemmap_populate_print_last(); | ||
364 | |||
365 | free_bootmem(__pa(usemap_map), size); | ||
323 | } | 366 | } |
324 | 367 | ||
325 | #ifdef CONFIG_MEMORY_HOTPLUG | 368 | #ifdef CONFIG_MEMORY_HOTPLUG |
@@ -334,6 +377,9 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) | |||
334 | { | 377 | { |
335 | return; /* XXX: Not implemented yet */ | 378 | return; /* XXX: Not implemented yet */ |
336 | } | 379 | } |
380 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) | ||
381 | { | ||
382 | } | ||
337 | #else | 383 | #else |
338 | static struct page *__kmalloc_section_memmap(unsigned long nr_pages) | 384 | static struct page *__kmalloc_section_memmap(unsigned long nr_pages) |
339 | { | 385 | { |
@@ -371,8 +417,69 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) | |||
371 | free_pages((unsigned long)memmap, | 417 | free_pages((unsigned long)memmap, |
372 | get_order(sizeof(struct page) * nr_pages)); | 418 | get_order(sizeof(struct page) * nr_pages)); |
373 | } | 419 | } |
420 | |||
421 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) | ||
422 | { | ||
423 | unsigned long maps_section_nr, removing_section_nr, i; | ||
424 | int magic; | ||
425 | |||
426 | for (i = 0; i < nr_pages; i++, page++) { | ||
427 | magic = atomic_read(&page->_mapcount); | ||
428 | |||
429 | BUG_ON(magic == NODE_INFO); | ||
430 | |||
431 | maps_section_nr = pfn_to_section_nr(page_to_pfn(page)); | ||
432 | removing_section_nr = page->private; | ||
433 | |||
434 | /* | ||
435 | * When this function is called, the removing section is | ||
436 | * logical offlined state. This means all pages are isolated | ||
437 | * from page allocator. If removing section's memmap is placed | ||
438 | * on the same section, it must not be freed. | ||
439 | * If it is freed, page allocator may allocate it which will | ||
440 | * be removed physically soon. | ||
441 | */ | ||
442 | if (maps_section_nr != removing_section_nr) | ||
443 | put_page_bootmem(page); | ||
444 | } | ||
445 | } | ||
374 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | 446 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
375 | 447 | ||
448 | static void free_section_usemap(struct page *memmap, unsigned long *usemap) | ||
449 | { | ||
450 | struct page *usemap_page; | ||
451 | unsigned long nr_pages; | ||
452 | |||
453 | if (!usemap) | ||
454 | return; | ||
455 | |||
456 | usemap_page = virt_to_page(usemap); | ||
457 | /* | ||
458 | * Check to see if allocation came from hot-plug-add | ||
459 | */ | ||
460 | if (PageSlab(usemap_page)) { | ||
461 | kfree(usemap); | ||
462 | if (memmap) | ||
463 | __kfree_section_memmap(memmap, PAGES_PER_SECTION); | ||
464 | return; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * The usemap came from bootmem. This is packed with other usemaps | ||
469 | * on the section which has pgdat at boot time. Just keep it as is now. | ||
470 | */ | ||
471 | |||
472 | if (memmap) { | ||
473 | struct page *memmap_page; | ||
474 | memmap_page = virt_to_page(memmap); | ||
475 | |||
476 | nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) | ||
477 | >> PAGE_SHIFT; | ||
478 | |||
479 | free_map_bootmem(memmap_page, nr_pages); | ||
480 | } | ||
481 | } | ||
482 | |||
376 | /* | 483 | /* |
377 | * returns the number of sections whose mem_maps were properly | 484 | * returns the number of sections whose mem_maps were properly |
378 | * set. If this is <=0, then that means that the passed-in | 485 | * set. If this is <=0, then that means that the passed-in |
@@ -425,4 +532,20 @@ out: | |||
425 | } | 532 | } |
426 | return ret; | 533 | return ret; |
427 | } | 534 | } |
535 | |||
536 | void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) | ||
537 | { | ||
538 | struct page *memmap = NULL; | ||
539 | unsigned long *usemap = NULL; | ||
540 | |||
541 | if (ms->section_mem_map) { | ||
542 | usemap = ms->pageblock_flags; | ||
543 | memmap = sparse_decode_mem_map(ms->section_mem_map, | ||
544 | __section_nr(ms)); | ||
545 | ms->section_mem_map = 0; | ||
546 | ms->pageblock_flags = NULL; | ||
547 | } | ||
548 | |||
549 | free_section_usemap(memmap, usemap); | ||
550 | } | ||
428 | #endif | 551 | #endif |