diff options
-rw-r--r-- | arch/ia64/Kconfig | 3 | ||||
-rw-r--r-- | include/linux/kernel.h | 1 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 5 | ||||
-rw-r--r-- | mm/Kconfig | 5 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 254 | ||||
-rw-r--r-- | mm/page_alloc.c | 47 |
6 files changed, 314 insertions, 1 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80f5e2aec87..59b91ac861ac 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -305,6 +305,9 @@ config HOTPLUG_CPU | |||
305 | config ARCH_ENABLE_MEMORY_HOTPLUG | 305 | config ARCH_ENABLE_MEMORY_HOTPLUG |
306 | def_bool y | 306 | def_bool y |
307 | 307 | ||
308 | config ARCH_ENABLE_MEMORY_HOTREMOVE | ||
309 | def_bool y | ||
310 | |||
308 | config SCHED_SMT | 311 | config SCHED_SMT |
309 | bool "SMT scheduler support" | 312 | bool "SMT scheduler support" |
310 | depends on SMP | 313 | depends on SMP |
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d9725a28a265..5fdbc814c2eb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -35,6 +35,7 @@ extern const char linux_proc_banner[]; | |||
35 | #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) | 35 | #define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) |
36 | #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) | 36 | #define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) |
37 | #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) | 37 | #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) |
38 | #define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) | ||
38 | 39 | ||
39 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) | 40 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) |
40 | 41 | ||
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 0a14dad95453..665951ef0390 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -58,7 +58,10 @@ extern int add_one_highpage(struct page *page, int pfn, int bad_ppro); | |||
58 | extern void online_page(struct page *page); | 58 | extern void online_page(struct page *page); |
59 | /* VM interface that may be used by firmware interface */ | 59 | /* VM interface that may be used by firmware interface */ |
60 | extern int online_pages(unsigned long, unsigned long); | 60 | extern int online_pages(unsigned long, unsigned long); |
61 | 61 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
62 | extern int offline_pages(unsigned long, unsigned long, unsigned long); | ||
63 | extern void __offline_isolated_pages(unsigned long, unsigned long); | ||
64 | #endif | ||
62 | /* reasonably generic interface to expand the physical pages in a zone */ | 65 | /* reasonably generic interface to expand the physical pages in a zone */ |
63 | extern int __add_pages(struct zone *zone, unsigned long start_pfn, | 66 | extern int __add_pages(struct zone *zone, unsigned long start_pfn, |
64 | unsigned long nr_pages); | 67 | unsigned long nr_pages); |
diff --git a/mm/Kconfig b/mm/Kconfig index b06730668412..1cc6cada2bbf 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -139,6 +139,11 @@ config MEMORY_HOTPLUG_SPARSE | |||
139 | def_bool y | 139 | def_bool y |
140 | depends on SPARSEMEM && MEMORY_HOTPLUG | 140 | depends on SPARSEMEM && MEMORY_HOTPLUG |
141 | 141 | ||
142 | config MEMORY_HOTREMOVE | ||
143 | bool "Allow for memory hot remove" | ||
144 | depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE | ||
145 | depends on MIGRATION | ||
146 | |||
142 | # Heavily threaded applications may benefit from splitting the mm-wide | 147 | # Heavily threaded applications may benefit from splitting the mm-wide |
143 | # page_table_lock, so that faults on different parts of the user address | 148 | # page_table_lock, so that faults on different parts of the user address |
144 | # space can be handled with less contention: split it at this NR_CPUS. | 149 | # space can be handled with less contention: split it at this NR_CPUS. |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1cbe9579e233..c4e1b958efde 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -23,6 +23,9 @@ | |||
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/ioport.h> | 24 | #include <linux/ioport.h> |
25 | #include <linux/cpuset.h> | 25 | #include <linux/cpuset.h> |
26 | #include <linux/delay.h> | ||
27 | #include <linux/migrate.h> | ||
28 | #include <linux/page-isolation.h> | ||
26 | 29 | ||
27 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
28 | 31 | ||
@@ -302,3 +305,254 @@ error: | |||
302 | return ret; | 305 | return ret; |
303 | } | 306 | } |
304 | EXPORT_SYMBOL_GPL(add_memory); | 307 | EXPORT_SYMBOL_GPL(add_memory); |
308 | |||
309 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
310 | /* | ||
311 | * Confirm all pages in a range [start, end) is belongs to the same zone. | ||
312 | */ | ||
313 | static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | ||
314 | { | ||
315 | unsigned long pfn; | ||
316 | struct zone *zone = NULL; | ||
317 | struct page *page; | ||
318 | int i; | ||
319 | for (pfn = start_pfn; | ||
320 | pfn < end_pfn; | ||
321 | pfn += MAX_ORDER_NR_PAGES) { | ||
322 | i = 0; | ||
323 | /* This is just a CONFIG_HOLES_IN_ZONE check.*/ | ||
324 | while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) | ||
325 | i++; | ||
326 | if (i == MAX_ORDER_NR_PAGES) | ||
327 | continue; | ||
328 | page = pfn_to_page(pfn + i); | ||
329 | if (zone && page_zone(page) != zone) | ||
330 | return 0; | ||
331 | zone = page_zone(page); | ||
332 | } | ||
333 | return 1; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Scanning pfn is much easier than scanning lru list. | ||
338 | * Scan pfn from start to end and Find LRU page. | ||
339 | */ | ||
340 | int scan_lru_pages(unsigned long start, unsigned long end) | ||
341 | { | ||
342 | unsigned long pfn; | ||
343 | struct page *page; | ||
344 | for (pfn = start; pfn < end; pfn++) { | ||
345 | if (pfn_valid(pfn)) { | ||
346 | page = pfn_to_page(pfn); | ||
347 | if (PageLRU(page)) | ||
348 | return pfn; | ||
349 | } | ||
350 | } | ||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static struct page * | ||
355 | hotremove_migrate_alloc(struct page *page, | ||
356 | unsigned long private, | ||
357 | int **x) | ||
358 | { | ||
359 | /* This should be improoooooved!! */ | ||
360 | return alloc_page(GFP_HIGHUSER_PAGECACHE); | ||
361 | } | ||
362 | |||
363 | |||
364 | #define NR_OFFLINE_AT_ONCE_PAGES (256) | ||
365 | static int | ||
366 | do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | ||
367 | { | ||
368 | unsigned long pfn; | ||
369 | struct page *page; | ||
370 | int move_pages = NR_OFFLINE_AT_ONCE_PAGES; | ||
371 | int not_managed = 0; | ||
372 | int ret = 0; | ||
373 | LIST_HEAD(source); | ||
374 | |||
375 | for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) { | ||
376 | if (!pfn_valid(pfn)) | ||
377 | continue; | ||
378 | page = pfn_to_page(pfn); | ||
379 | if (!page_count(page)) | ||
380 | continue; | ||
381 | /* | ||
382 | * We can skip free pages. And we can only deal with pages on | ||
383 | * LRU. | ||
384 | */ | ||
385 | ret = isolate_lru_page(page, &source); | ||
386 | if (!ret) { /* Success */ | ||
387 | move_pages--; | ||
388 | } else { | ||
389 | /* Becasue we don't have big zone->lock. we should | ||
390 | check this again here. */ | ||
391 | if (page_count(page)) | ||
392 | not_managed++; | ||
393 | #ifdef CONFIG_DEBUG_VM | ||
394 | printk(KERN_INFO "removing from LRU failed" | ||
395 | " %lx/%d/%lx\n", | ||
396 | pfn, page_count(page), page->flags); | ||
397 | #endif | ||
398 | } | ||
399 | } | ||
400 | ret = -EBUSY; | ||
401 | if (not_managed) { | ||
402 | if (!list_empty(&source)) | ||
403 | putback_lru_pages(&source); | ||
404 | goto out; | ||
405 | } | ||
406 | ret = 0; | ||
407 | if (list_empty(&source)) | ||
408 | goto out; | ||
409 | /* this function returns # of failed pages */ | ||
410 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0); | ||
411 | |||
412 | out: | ||
413 | return ret; | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * remove from free_area[] and mark all as Reserved. | ||
418 | */ | ||
419 | static int | ||
420 | offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages, | ||
421 | void *data) | ||
422 | { | ||
423 | __offline_isolated_pages(start, start + nr_pages); | ||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static void | ||
428 | offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | ||
429 | { | ||
430 | walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL, | ||
431 | offline_isolated_pages_cb); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * Check all pages in range, recoreded as memory resource, are isolated. | ||
436 | */ | ||
437 | static int | ||
438 | check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages, | ||
439 | void *data) | ||
440 | { | ||
441 | int ret; | ||
442 | long offlined = *(long *)data; | ||
443 | ret = test_pages_isolated(start_pfn, start_pfn + nr_pages); | ||
444 | offlined = nr_pages; | ||
445 | if (!ret) | ||
446 | *(long *)data += offlined; | ||
447 | return ret; | ||
448 | } | ||
449 | |||
450 | static long | ||
451 | check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | ||
452 | { | ||
453 | long offlined = 0; | ||
454 | int ret; | ||
455 | |||
456 | ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined, | ||
457 | check_pages_isolated_cb); | ||
458 | if (ret < 0) | ||
459 | offlined = (long)ret; | ||
460 | return offlined; | ||
461 | } | ||
462 | |||
463 | extern void drain_all_local_pages(void); | ||
464 | |||
465 | int offline_pages(unsigned long start_pfn, | ||
466 | unsigned long end_pfn, unsigned long timeout) | ||
467 | { | ||
468 | unsigned long pfn, nr_pages, expire; | ||
469 | long offlined_pages; | ||
470 | int ret, drain, retry_max; | ||
471 | struct zone *zone; | ||
472 | |||
473 | BUG_ON(start_pfn >= end_pfn); | ||
474 | /* at least, alignment against pageblock is necessary */ | ||
475 | if (!IS_ALIGNED(start_pfn, pageblock_nr_pages)) | ||
476 | return -EINVAL; | ||
477 | if (!IS_ALIGNED(end_pfn, pageblock_nr_pages)) | ||
478 | return -EINVAL; | ||
479 | /* This makes hotplug much easier...and readable. | ||
480 | we assume this for now. .*/ | ||
481 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | ||
482 | return -EINVAL; | ||
483 | /* set above range as isolated */ | ||
484 | ret = start_isolate_page_range(start_pfn, end_pfn); | ||
485 | if (ret) | ||
486 | return ret; | ||
487 | nr_pages = end_pfn - start_pfn; | ||
488 | pfn = start_pfn; | ||
489 | expire = jiffies + timeout; | ||
490 | drain = 0; | ||
491 | retry_max = 5; | ||
492 | repeat: | ||
493 | /* start memory hot removal */ | ||
494 | ret = -EAGAIN; | ||
495 | if (time_after(jiffies, expire)) | ||
496 | goto failed_removal; | ||
497 | ret = -EINTR; | ||
498 | if (signal_pending(current)) | ||
499 | goto failed_removal; | ||
500 | ret = 0; | ||
501 | if (drain) { | ||
502 | lru_add_drain_all(); | ||
503 | flush_scheduled_work(); | ||
504 | cond_resched(); | ||
505 | drain_all_local_pages(); | ||
506 | } | ||
507 | |||
508 | pfn = scan_lru_pages(start_pfn, end_pfn); | ||
509 | if (pfn) { /* We have page on LRU */ | ||
510 | ret = do_migrate_range(pfn, end_pfn); | ||
511 | if (!ret) { | ||
512 | drain = 1; | ||
513 | goto repeat; | ||
514 | } else { | ||
515 | if (ret < 0) | ||
516 | if (--retry_max == 0) | ||
517 | goto failed_removal; | ||
518 | yield(); | ||
519 | drain = 1; | ||
520 | goto repeat; | ||
521 | } | ||
522 | } | ||
523 | /* drain all zone's lru pagevec, this is asyncronous... */ | ||
524 | lru_add_drain_all(); | ||
525 | flush_scheduled_work(); | ||
526 | yield(); | ||
527 | /* drain pcp pages , this is synchrouns. */ | ||
528 | drain_all_local_pages(); | ||
529 | /* check again */ | ||
530 | offlined_pages = check_pages_isolated(start_pfn, end_pfn); | ||
531 | if (offlined_pages < 0) { | ||
532 | ret = -EBUSY; | ||
533 | goto failed_removal; | ||
534 | } | ||
535 | printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages); | ||
536 | /* Ok, all of our target is islaoted. | ||
537 | We cannot do rollback at this point. */ | ||
538 | offline_isolated_pages(start_pfn, end_pfn); | ||
539 | /* reset pagetype flags */ | ||
540 | start_isolate_page_range(start_pfn, end_pfn); | ||
541 | /* removal success */ | ||
542 | zone = page_zone(pfn_to_page(start_pfn)); | ||
543 | zone->present_pages -= offlined_pages; | ||
544 | zone->zone_pgdat->node_present_pages -= offlined_pages; | ||
545 | totalram_pages -= offlined_pages; | ||
546 | num_physpages -= offlined_pages; | ||
547 | vm_total_pages = nr_free_pagecache_pages(); | ||
548 | writeback_set_ratelimit(); | ||
549 | return 0; | ||
550 | |||
551 | failed_removal: | ||
552 | printk(KERN_INFO "memory offlining %lx to %lx failed\n", | ||
553 | start_pfn, end_pfn); | ||
554 | /* pushback to free area */ | ||
555 | undo_isolate_page_range(start_pfn, end_pfn); | ||
556 | return ret; | ||
557 | } | ||
558 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a44715e82058..d315e1127dc9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -4477,3 +4477,50 @@ void unset_migratetype_isolate(struct page *page) | |||
4477 | out: | 4477 | out: |
4478 | spin_unlock_irqrestore(&zone->lock, flags); | 4478 | spin_unlock_irqrestore(&zone->lock, flags); |
4479 | } | 4479 | } |
4480 | |||
4481 | #ifdef CONFIG_MEMORY_HOTREMOVE | ||
4482 | /* | ||
4483 | * All pages in the range must be isolated before calling this. | ||
4484 | */ | ||
4485 | void | ||
4486 | __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) | ||
4487 | { | ||
4488 | struct page *page; | ||
4489 | struct zone *zone; | ||
4490 | int order, i; | ||
4491 | unsigned long pfn; | ||
4492 | unsigned long flags; | ||
4493 | /* find the first valid pfn */ | ||
4494 | for (pfn = start_pfn; pfn < end_pfn; pfn++) | ||
4495 | if (pfn_valid(pfn)) | ||
4496 | break; | ||
4497 | if (pfn == end_pfn) | ||
4498 | return; | ||
4499 | zone = page_zone(pfn_to_page(pfn)); | ||
4500 | spin_lock_irqsave(&zone->lock, flags); | ||
4501 | pfn = start_pfn; | ||
4502 | while (pfn < end_pfn) { | ||
4503 | if (!pfn_valid(pfn)) { | ||
4504 | pfn++; | ||
4505 | continue; | ||
4506 | } | ||
4507 | page = pfn_to_page(pfn); | ||
4508 | BUG_ON(page_count(page)); | ||
4509 | BUG_ON(!PageBuddy(page)); | ||
4510 | order = page_order(page); | ||
4511 | #ifdef CONFIG_DEBUG_VM | ||
4512 | printk(KERN_INFO "remove from free list %lx %d %lx\n", | ||
4513 | pfn, 1 << order, end_pfn); | ||
4514 | #endif | ||
4515 | list_del(&page->lru); | ||
4516 | rmv_page_order(page); | ||
4517 | zone->free_area[order].nr_free--; | ||
4518 | __mod_zone_page_state(zone, NR_FREE_PAGES, | ||
4519 | - (1UL << order)); | ||
4520 | for (i = 0; i < (1 << order); i++) | ||
4521 | SetPageReserved((page+i)); | ||
4522 | pfn += (1 << order); | ||
4523 | } | ||
4524 | spin_unlock_irqrestore(&zone->lock, flags); | ||
4525 | } | ||
4526 | #endif | ||