diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 131 |
1 files changed, 74 insertions, 57 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index dd186c1a5d53..c46887b5a11e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -34,6 +34,23 @@ | |||
34 | 34 | ||
35 | #include "internal.h" | 35 | #include "internal.h" |
36 | 36 | ||
37 | DEFINE_MUTEX(mem_hotplug_mutex); | ||
38 | |||
39 | void lock_memory_hotplug(void) | ||
40 | { | ||
41 | mutex_lock(&mem_hotplug_mutex); | ||
42 | |||
43 | /* for exclusive hibernation if CONFIG_HIBERNATION=y */ | ||
44 | lock_system_sleep(); | ||
45 | } | ||
46 | |||
47 | void unlock_memory_hotplug(void) | ||
48 | { | ||
49 | unlock_system_sleep(); | ||
50 | mutex_unlock(&mem_hotplug_mutex); | ||
51 | } | ||
52 | |||
53 | |||
37 | /* add this memory to iomem resource */ | 54 | /* add this memory to iomem resource */ |
38 | static struct resource *register_memory_resource(u64 start, u64 size) | 55 | static struct resource *register_memory_resource(u64 start, u64 size) |
39 | { | 56 | { |
@@ -65,9 +82,10 @@ static void release_memory_resource(struct resource *res) | |||
65 | 82 | ||
66 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 83 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
67 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 84 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
68 | static void get_page_bootmem(unsigned long info, struct page *page, int type) | 85 | static void get_page_bootmem(unsigned long info, struct page *page, |
86 | unsigned long type) | ||
69 | { | 87 | { |
70 | atomic_set(&page->_mapcount, type); | 88 | page->lru.next = (struct list_head *) type; |
71 | SetPagePrivate(page); | 89 | SetPagePrivate(page); |
72 | set_page_private(page, info); | 90 | set_page_private(page, info); |
73 | atomic_inc(&page->_count); | 91 | atomic_inc(&page->_count); |
@@ -77,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type) | |||
77 | * so use __ref to tell modpost not to generate a warning */ | 95 | * so use __ref to tell modpost not to generate a warning */ |
78 | void __ref put_page_bootmem(struct page *page) | 96 | void __ref put_page_bootmem(struct page *page) |
79 | { | 97 | { |
80 | int type; | 98 | unsigned long type; |
81 | 99 | ||
82 | type = atomic_read(&page->_mapcount); | 100 | type = (unsigned long) page->lru.next; |
83 | BUG_ON(type >= -1); | 101 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || |
102 | type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); | ||
84 | 103 | ||
85 | if (atomic_dec_return(&page->_count) == 1) { | 104 | if (atomic_dec_return(&page->_count) == 1) { |
86 | ClearPagePrivate(page); | 105 | ClearPagePrivate(page); |
87 | set_page_private(page, 0); | 106 | set_page_private(page, 0); |
88 | reset_page_mapcount(page); | 107 | INIT_LIST_HEAD(&page->lru); |
89 | __free_pages_bootmem(page, 0); | 108 | __free_pages_bootmem(page, 0); |
90 | } | 109 | } |
91 | 110 | ||
@@ -355,10 +374,6 @@ void online_page(struct page *page) | |||
355 | totalhigh_pages++; | 374 | totalhigh_pages++; |
356 | #endif | 375 | #endif |
357 | 376 | ||
358 | #ifdef CONFIG_FLATMEM | ||
359 | max_mapnr = max(page_to_pfn(page), max_mapnr); | ||
360 | #endif | ||
361 | |||
362 | ClearPageReserved(page); | 377 | ClearPageReserved(page); |
363 | init_page_count(page); | 378 | init_page_count(page); |
364 | __free_page(page); | 379 | __free_page(page); |
@@ -381,7 +396,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, | |||
381 | } | 396 | } |
382 | 397 | ||
383 | 398 | ||
384 | int online_pages(unsigned long pfn, unsigned long nr_pages) | 399 | int __ref online_pages(unsigned long pfn, unsigned long nr_pages) |
385 | { | 400 | { |
386 | unsigned long onlined_pages = 0; | 401 | unsigned long onlined_pages = 0; |
387 | struct zone *zone; | 402 | struct zone *zone; |
@@ -390,6 +405,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
390 | int ret; | 405 | int ret; |
391 | struct memory_notify arg; | 406 | struct memory_notify arg; |
392 | 407 | ||
408 | lock_memory_hotplug(); | ||
393 | arg.start_pfn = pfn; | 409 | arg.start_pfn = pfn; |
394 | arg.nr_pages = nr_pages; | 410 | arg.nr_pages = nr_pages; |
395 | arg.status_change_nid = -1; | 411 | arg.status_change_nid = -1; |
@@ -402,6 +418,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
402 | ret = notifier_to_errno(ret); | 418 | ret = notifier_to_errno(ret); |
403 | if (ret) { | 419 | if (ret) { |
404 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 420 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
421 | unlock_memory_hotplug(); | ||
405 | return ret; | 422 | return ret; |
406 | } | 423 | } |
407 | /* | 424 | /* |
@@ -426,6 +443,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
426 | printk(KERN_DEBUG "online_pages %lx at %lx failed\n", | 443 | printk(KERN_DEBUG "online_pages %lx at %lx failed\n", |
427 | nr_pages, pfn); | 444 | nr_pages, pfn); |
428 | memory_notify(MEM_CANCEL_ONLINE, &arg); | 445 | memory_notify(MEM_CANCEL_ONLINE, &arg); |
446 | unlock_memory_hotplug(); | ||
429 | return ret; | 447 | return ret; |
430 | } | 448 | } |
431 | 449 | ||
@@ -437,8 +455,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
437 | zone_pcp_update(zone); | 455 | zone_pcp_update(zone); |
438 | 456 | ||
439 | mutex_unlock(&zonelists_mutex); | 457 | mutex_unlock(&zonelists_mutex); |
440 | setup_per_zone_wmarks(); | 458 | |
441 | calculate_zone_inactive_ratio(zone); | 459 | init_per_zone_wmark_min(); |
460 | |||
442 | if (onlined_pages) { | 461 | if (onlined_pages) { |
443 | kswapd_run(zone_to_nid(zone)); | 462 | kswapd_run(zone_to_nid(zone)); |
444 | node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); | 463 | node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); |
@@ -450,6 +469,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) | |||
450 | 469 | ||
451 | if (onlined_pages) | 470 | if (onlined_pages) |
452 | memory_notify(MEM_ONLINE, &arg); | 471 | memory_notify(MEM_ONLINE, &arg); |
472 | unlock_memory_hotplug(); | ||
453 | 473 | ||
454 | return 0; | 474 | return 0; |
455 | } | 475 | } |
@@ -474,6 +494,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) | |||
474 | /* init node's zones as empty zones, we don't have any present pages.*/ | 494 | /* init node's zones as empty zones, we don't have any present pages.*/ |
475 | free_area_init_node(nid, zones_size, start_pfn, zholes_size); | 495 | free_area_init_node(nid, zones_size, start_pfn, zholes_size); |
476 | 496 | ||
497 | /* | ||
498 | * The node we allocated has no zone fallback lists. For avoiding | ||
499 | * to access not-initialized zonelist, build here. | ||
500 | */ | ||
501 | mutex_lock(&zonelists_mutex); | ||
502 | build_all_zonelists(NULL); | ||
503 | mutex_unlock(&zonelists_mutex); | ||
504 | |||
477 | return pgdat; | 505 | return pgdat; |
478 | } | 506 | } |
479 | 507 | ||
@@ -493,9 +521,9 @@ int mem_online_node(int nid) | |||
493 | pg_data_t *pgdat; | 521 | pg_data_t *pgdat; |
494 | int ret; | 522 | int ret; |
495 | 523 | ||
496 | lock_system_sleep(); | 524 | lock_memory_hotplug(); |
497 | pgdat = hotadd_new_pgdat(nid, 0); | 525 | pgdat = hotadd_new_pgdat(nid, 0); |
498 | if (pgdat) { | 526 | if (!pgdat) { |
499 | ret = -ENOMEM; | 527 | ret = -ENOMEM; |
500 | goto out; | 528 | goto out; |
501 | } | 529 | } |
@@ -504,7 +532,7 @@ int mem_online_node(int nid) | |||
504 | BUG_ON(ret); | 532 | BUG_ON(ret); |
505 | 533 | ||
506 | out: | 534 | out: |
507 | unlock_system_sleep(); | 535 | unlock_memory_hotplug(); |
508 | return ret; | 536 | return ret; |
509 | } | 537 | } |
510 | 538 | ||
@@ -516,7 +544,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
516 | struct resource *res; | 544 | struct resource *res; |
517 | int ret; | 545 | int ret; |
518 | 546 | ||
519 | lock_system_sleep(); | 547 | lock_memory_hotplug(); |
520 | 548 | ||
521 | res = register_memory_resource(start, size); | 549 | res = register_memory_resource(start, size); |
522 | ret = -EEXIST; | 550 | ret = -EEXIST; |
@@ -563,7 +591,7 @@ error: | |||
563 | release_memory_resource(res); | 591 | release_memory_resource(res); |
564 | 592 | ||
565 | out: | 593 | out: |
566 | unlock_system_sleep(); | 594 | unlock_memory_hotplug(); |
567 | return ret; | 595 | return ret; |
568 | } | 596 | } |
569 | EXPORT_SYMBOL_GPL(add_memory); | 597 | EXPORT_SYMBOL_GPL(add_memory); |
@@ -602,27 +630,14 @@ static struct page *next_active_pageblock(struct page *page) | |||
602 | /* Checks if this range of memory is likely to be hot-removable. */ | 630 | /* Checks if this range of memory is likely to be hot-removable. */ |
603 | int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) | 631 | int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) |
604 | { | 632 | { |
605 | int type; | ||
606 | struct page *page = pfn_to_page(start_pfn); | 633 | struct page *page = pfn_to_page(start_pfn); |
607 | struct page *end_page = page + nr_pages; | 634 | struct page *end_page = page + nr_pages; |
608 | 635 | ||
609 | /* Check the starting page of each pageblock within the range */ | 636 | /* Check the starting page of each pageblock within the range */ |
610 | for (; page < end_page; page = next_active_pageblock(page)) { | 637 | for (; page < end_page; page = next_active_pageblock(page)) { |
611 | type = get_pageblock_migratetype(page); | 638 | if (!is_pageblock_removable_nolock(page)) |
612 | |||
613 | /* | ||
614 | * A pageblock containing MOVABLE or free pages is considered | ||
615 | * removable | ||
616 | */ | ||
617 | if (type != MIGRATE_MOVABLE && !pageblock_free(page)) | ||
618 | return 0; | ||
619 | |||
620 | /* | ||
621 | * A pageblock starting with a PageReserved page is not | ||
622 | * considered removable. | ||
623 | */ | ||
624 | if (PageReserved(page)) | ||
625 | return 0; | 639 | return 0; |
640 | cond_resched(); | ||
626 | } | 641 | } |
627 | 642 | ||
628 | /* All pageblocks in the memory block are likely to be hot-removable */ | 643 | /* All pageblocks in the memory block are likely to be hot-removable */ |
@@ -659,7 +674,7 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) | |||
659 | * Scanning pfn is much easier than scanning lru list. | 674 | * Scanning pfn is much easier than scanning lru list. |
660 | * Scan pfn from start to end and Find LRU page. | 675 | * Scan pfn from start to end and Find LRU page. |
661 | */ | 676 | */ |
662 | int scan_lru_pages(unsigned long start, unsigned long end) | 677 | static unsigned long scan_lru_pages(unsigned long start, unsigned long end) |
663 | { | 678 | { |
664 | unsigned long pfn; | 679 | unsigned long pfn; |
665 | struct page *page; | 680 | struct page *page; |
@@ -695,7 +710,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
695 | if (!pfn_valid(pfn)) | 710 | if (!pfn_valid(pfn)) |
696 | continue; | 711 | continue; |
697 | page = pfn_to_page(pfn); | 712 | page = pfn_to_page(pfn); |
698 | if (!page_count(page)) | 713 | if (!get_page_unless_zero(page)) |
699 | continue; | 714 | continue; |
700 | /* | 715 | /* |
701 | * We can skip free pages. And we can only deal with pages on | 716 | * We can skip free pages. And we can only deal with pages on |
@@ -703,35 +718,39 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) | |||
703 | */ | 718 | */ |
704 | ret = isolate_lru_page(page); | 719 | ret = isolate_lru_page(page); |
705 | if (!ret) { /* Success */ | 720 | if (!ret) { /* Success */ |
721 | put_page(page); | ||
706 | list_add_tail(&page->lru, &source); | 722 | list_add_tail(&page->lru, &source); |
707 | move_pages--; | 723 | move_pages--; |
708 | inc_zone_page_state(page, NR_ISOLATED_ANON + | 724 | inc_zone_page_state(page, NR_ISOLATED_ANON + |
709 | page_is_file_cache(page)); | 725 | page_is_file_cache(page)); |
710 | 726 | ||
711 | } else { | 727 | } else { |
712 | /* Becasue we don't have big zone->lock. we should | ||
713 | check this again here. */ | ||
714 | if (page_count(page)) | ||
715 | not_managed++; | ||
716 | #ifdef CONFIG_DEBUG_VM | 728 | #ifdef CONFIG_DEBUG_VM |
717 | printk(KERN_ALERT "removing pfn %lx from LRU failed\n", | 729 | printk(KERN_ALERT "removing pfn %lx from LRU failed\n", |
718 | pfn); | 730 | pfn); |
719 | dump_page(page); | 731 | dump_page(page); |
720 | #endif | 732 | #endif |
733 | put_page(page); | ||
734 | /* Because we don't have big zone->lock. we should | ||
735 | check this again here. */ | ||
736 | if (page_count(page)) { | ||
737 | not_managed++; | ||
738 | ret = -EBUSY; | ||
739 | break; | ||
740 | } | ||
721 | } | 741 | } |
722 | } | 742 | } |
723 | ret = -EBUSY; | 743 | if (!list_empty(&source)) { |
724 | if (not_managed) { | 744 | if (not_managed) { |
725 | if (!list_empty(&source)) | 745 | putback_lru_pages(&source); |
746 | goto out; | ||
747 | } | ||
748 | /* this function returns # of failed pages */ | ||
749 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0, | ||
750 | true, true); | ||
751 | if (ret) | ||
726 | putback_lru_pages(&source); | 752 | putback_lru_pages(&source); |
727 | goto out; | ||
728 | } | 753 | } |
729 | ret = 0; | ||
730 | if (list_empty(&source)) | ||
731 | goto out; | ||
732 | /* this function returns # of failed pages */ | ||
733 | ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1); | ||
734 | |||
735 | out: | 754 | out: |
736 | return ret; | 755 | return ret; |
737 | } | 756 | } |
@@ -783,7 +802,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) | |||
783 | return offlined; | 802 | return offlined; |
784 | } | 803 | } |
785 | 804 | ||
786 | static int offline_pages(unsigned long start_pfn, | 805 | static int __ref offline_pages(unsigned long start_pfn, |
787 | unsigned long end_pfn, unsigned long timeout) | 806 | unsigned long end_pfn, unsigned long timeout) |
788 | { | 807 | { |
789 | unsigned long pfn, nr_pages, expire; | 808 | unsigned long pfn, nr_pages, expire; |
@@ -803,7 +822,7 @@ static int offline_pages(unsigned long start_pfn, | |||
803 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 822 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
804 | return -EINVAL; | 823 | return -EINVAL; |
805 | 824 | ||
806 | lock_system_sleep(); | 825 | lock_memory_hotplug(); |
807 | 826 | ||
808 | zone = page_zone(pfn_to_page(start_pfn)); | 827 | zone = page_zone(pfn_to_page(start_pfn)); |
809 | node = zone_to_nid(zone); | 828 | node = zone_to_nid(zone); |
@@ -840,7 +859,6 @@ repeat: | |||
840 | ret = 0; | 859 | ret = 0; |
841 | if (drain) { | 860 | if (drain) { |
842 | lru_add_drain_all(); | 861 | lru_add_drain_all(); |
843 | flush_scheduled_work(); | ||
844 | cond_resched(); | 862 | cond_resched(); |
845 | drain_all_pages(); | 863 | drain_all_pages(); |
846 | } | 864 | } |
@@ -862,7 +880,6 @@ repeat: | |||
862 | } | 880 | } |
863 | /* drain all zone's lru pagevec, this is asyncronous... */ | 881 | /* drain all zone's lru pagevec, this is asyncronous... */ |
864 | lru_add_drain_all(); | 882 | lru_add_drain_all(); |
865 | flush_scheduled_work(); | ||
866 | yield(); | 883 | yield(); |
867 | /* drain pcp pages , this is synchrouns. */ | 884 | /* drain pcp pages , this is synchrouns. */ |
868 | drain_all_pages(); | 885 | drain_all_pages(); |
@@ -883,8 +900,8 @@ repeat: | |||
883 | zone->zone_pgdat->node_present_pages -= offlined_pages; | 900 | zone->zone_pgdat->node_present_pages -= offlined_pages; |
884 | totalram_pages -= offlined_pages; | 901 | totalram_pages -= offlined_pages; |
885 | 902 | ||
886 | setup_per_zone_wmarks(); | 903 | init_per_zone_wmark_min(); |
887 | calculate_zone_inactive_ratio(zone); | 904 | |
888 | if (!node_present_pages(node)) { | 905 | if (!node_present_pages(node)) { |
889 | node_clear_state(node, N_HIGH_MEMORY); | 906 | node_clear_state(node, N_HIGH_MEMORY); |
890 | kswapd_stop(node); | 907 | kswapd_stop(node); |
@@ -894,7 +911,7 @@ repeat: | |||
894 | writeback_set_ratelimit(); | 911 | writeback_set_ratelimit(); |
895 | 912 | ||
896 | memory_notify(MEM_OFFLINE, &arg); | 913 | memory_notify(MEM_OFFLINE, &arg); |
897 | unlock_system_sleep(); | 914 | unlock_memory_hotplug(); |
898 | return 0; | 915 | return 0; |
899 | 916 | ||
900 | failed_removal: | 917 | failed_removal: |
@@ -905,7 +922,7 @@ failed_removal: | |||
905 | undo_isolate_page_range(start_pfn, end_pfn); | 922 | undo_isolate_page_range(start_pfn, end_pfn); |
906 | 923 | ||
907 | out: | 924 | out: |
908 | unlock_system_sleep(); | 925 | unlock_memory_hotplug(); |
909 | return ret; | 926 | return ret; |
910 | } | 927 | } |
911 | 928 | ||