aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c131
1 files changed, 74 insertions, 57 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index dd186c1a5d53..c46887b5a11e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -34,6 +34,23 @@
34 34
35#include "internal.h" 35#include "internal.h"
36 36
37DEFINE_MUTEX(mem_hotplug_mutex);
38
39void lock_memory_hotplug(void)
40{
41 mutex_lock(&mem_hotplug_mutex);
42
43 /* for exclusive hibernation if CONFIG_HIBERNATION=y */
44 lock_system_sleep();
45}
46
47void unlock_memory_hotplug(void)
48{
49 unlock_system_sleep();
50 mutex_unlock(&mem_hotplug_mutex);
51}
52
53
37/* add this memory to iomem resource */ 54/* add this memory to iomem resource */
38static struct resource *register_memory_resource(u64 start, u64 size) 55static struct resource *register_memory_resource(u64 start, u64 size)
39{ 56{
@@ -65,9 +82,10 @@ static void release_memory_resource(struct resource *res)
65 82
66#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 83#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
67#ifndef CONFIG_SPARSEMEM_VMEMMAP 84#ifndef CONFIG_SPARSEMEM_VMEMMAP
68static void get_page_bootmem(unsigned long info, struct page *page, int type) 85static void get_page_bootmem(unsigned long info, struct page *page,
86 unsigned long type)
69{ 87{
70 atomic_set(&page->_mapcount, type); 88 page->lru.next = (struct list_head *) type;
71 SetPagePrivate(page); 89 SetPagePrivate(page);
72 set_page_private(page, info); 90 set_page_private(page, info);
73 atomic_inc(&page->_count); 91 atomic_inc(&page->_count);
@@ -77,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type)
77 * so use __ref to tell modpost not to generate a warning */ 95 * so use __ref to tell modpost not to generate a warning */
78void __ref put_page_bootmem(struct page *page) 96void __ref put_page_bootmem(struct page *page)
79{ 97{
80 int type; 98 unsigned long type;
81 99
82 type = atomic_read(&page->_mapcount); 100 type = (unsigned long) page->lru.next;
83 BUG_ON(type >= -1); 101 BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
102 type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
84 103
85 if (atomic_dec_return(&page->_count) == 1) { 104 if (atomic_dec_return(&page->_count) == 1) {
86 ClearPagePrivate(page); 105 ClearPagePrivate(page);
87 set_page_private(page, 0); 106 set_page_private(page, 0);
88 reset_page_mapcount(page); 107 INIT_LIST_HEAD(&page->lru);
89 __free_pages_bootmem(page, 0); 108 __free_pages_bootmem(page, 0);
90 } 109 }
91 110
@@ -355,10 +374,6 @@ void online_page(struct page *page)
355 totalhigh_pages++; 374 totalhigh_pages++;
356#endif 375#endif
357 376
358#ifdef CONFIG_FLATMEM
359 max_mapnr = max(page_to_pfn(page), max_mapnr);
360#endif
361
362 ClearPageReserved(page); 377 ClearPageReserved(page);
363 init_page_count(page); 378 init_page_count(page);
364 __free_page(page); 379 __free_page(page);
@@ -381,7 +396,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
381} 396}
382 397
383 398
384int online_pages(unsigned long pfn, unsigned long nr_pages) 399int __ref online_pages(unsigned long pfn, unsigned long nr_pages)
385{ 400{
386 unsigned long onlined_pages = 0; 401 unsigned long onlined_pages = 0;
387 struct zone *zone; 402 struct zone *zone;
@@ -390,6 +405,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
390 int ret; 405 int ret;
391 struct memory_notify arg; 406 struct memory_notify arg;
392 407
408 lock_memory_hotplug();
393 arg.start_pfn = pfn; 409 arg.start_pfn = pfn;
394 arg.nr_pages = nr_pages; 410 arg.nr_pages = nr_pages;
395 arg.status_change_nid = -1; 411 arg.status_change_nid = -1;
@@ -402,6 +418,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
402 ret = notifier_to_errno(ret); 418 ret = notifier_to_errno(ret);
403 if (ret) { 419 if (ret) {
404 memory_notify(MEM_CANCEL_ONLINE, &arg); 420 memory_notify(MEM_CANCEL_ONLINE, &arg);
421 unlock_memory_hotplug();
405 return ret; 422 return ret;
406 } 423 }
407 /* 424 /*
@@ -426,6 +443,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
426 printk(KERN_DEBUG "online_pages %lx at %lx failed\n", 443 printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
427 nr_pages, pfn); 444 nr_pages, pfn);
428 memory_notify(MEM_CANCEL_ONLINE, &arg); 445 memory_notify(MEM_CANCEL_ONLINE, &arg);
446 unlock_memory_hotplug();
429 return ret; 447 return ret;
430 } 448 }
431 449
@@ -437,8 +455,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
437 zone_pcp_update(zone); 455 zone_pcp_update(zone);
438 456
439 mutex_unlock(&zonelists_mutex); 457 mutex_unlock(&zonelists_mutex);
440 setup_per_zone_wmarks(); 458
441 calculate_zone_inactive_ratio(zone); 459 init_per_zone_wmark_min();
460
442 if (onlined_pages) { 461 if (onlined_pages) {
443 kswapd_run(zone_to_nid(zone)); 462 kswapd_run(zone_to_nid(zone));
444 node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); 463 node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
@@ -450,6 +469,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
450 469
451 if (onlined_pages) 470 if (onlined_pages)
452 memory_notify(MEM_ONLINE, &arg); 471 memory_notify(MEM_ONLINE, &arg);
472 unlock_memory_hotplug();
453 473
454 return 0; 474 return 0;
455} 475}
@@ -474,6 +494,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
474 /* init node's zones as empty zones, we don't have any present pages.*/ 494 /* init node's zones as empty zones, we don't have any present pages.*/
475 free_area_init_node(nid, zones_size, start_pfn, zholes_size); 495 free_area_init_node(nid, zones_size, start_pfn, zholes_size);
476 496
497 /*
498 * The node we allocated has no zone fallback lists. For avoiding
499 * to access not-initialized zonelist, build here.
500 */
501 mutex_lock(&zonelists_mutex);
502 build_all_zonelists(NULL);
503 mutex_unlock(&zonelists_mutex);
504
477 return pgdat; 505 return pgdat;
478} 506}
479 507
@@ -493,9 +521,9 @@ int mem_online_node(int nid)
493 pg_data_t *pgdat; 521 pg_data_t *pgdat;
494 int ret; 522 int ret;
495 523
496 lock_system_sleep(); 524 lock_memory_hotplug();
497 pgdat = hotadd_new_pgdat(nid, 0); 525 pgdat = hotadd_new_pgdat(nid, 0);
498 if (pgdat) { 526 if (!pgdat) {
499 ret = -ENOMEM; 527 ret = -ENOMEM;
500 goto out; 528 goto out;
501 } 529 }
@@ -504,7 +532,7 @@ int mem_online_node(int nid)
504 BUG_ON(ret); 532 BUG_ON(ret);
505 533
506out: 534out:
507 unlock_system_sleep(); 535 unlock_memory_hotplug();
508 return ret; 536 return ret;
509} 537}
510 538
@@ -516,7 +544,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
516 struct resource *res; 544 struct resource *res;
517 int ret; 545 int ret;
518 546
519 lock_system_sleep(); 547 lock_memory_hotplug();
520 548
521 res = register_memory_resource(start, size); 549 res = register_memory_resource(start, size);
522 ret = -EEXIST; 550 ret = -EEXIST;
@@ -563,7 +591,7 @@ error:
563 release_memory_resource(res); 591 release_memory_resource(res);
564 592
565out: 593out:
566 unlock_system_sleep(); 594 unlock_memory_hotplug();
567 return ret; 595 return ret;
568} 596}
569EXPORT_SYMBOL_GPL(add_memory); 597EXPORT_SYMBOL_GPL(add_memory);
@@ -602,27 +630,14 @@ static struct page *next_active_pageblock(struct page *page)
602/* Checks if this range of memory is likely to be hot-removable. */ 630/* Checks if this range of memory is likely to be hot-removable. */
603int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) 631int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
604{ 632{
605 int type;
606 struct page *page = pfn_to_page(start_pfn); 633 struct page *page = pfn_to_page(start_pfn);
607 struct page *end_page = page + nr_pages; 634 struct page *end_page = page + nr_pages;
608 635
609 /* Check the starting page of each pageblock within the range */ 636 /* Check the starting page of each pageblock within the range */
610 for (; page < end_page; page = next_active_pageblock(page)) { 637 for (; page < end_page; page = next_active_pageblock(page)) {
611 type = get_pageblock_migratetype(page); 638 if (!is_pageblock_removable_nolock(page))
612
613 /*
614 * A pageblock containing MOVABLE or free pages is considered
615 * removable
616 */
617 if (type != MIGRATE_MOVABLE && !pageblock_free(page))
618 return 0;
619
620 /*
621 * A pageblock starting with a PageReserved page is not
622 * considered removable.
623 */
624 if (PageReserved(page))
625 return 0; 639 return 0;
640 cond_resched();
626 } 641 }
627 642
628 /* All pageblocks in the memory block are likely to be hot-removable */ 643 /* All pageblocks in the memory block are likely to be hot-removable */
@@ -659,7 +674,7 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
659 * Scanning pfn is much easier than scanning lru list. 674 * Scanning pfn is much easier than scanning lru list.
660 * Scan pfn from start to end and Find LRU page. 675 * Scan pfn from start to end and Find LRU page.
661 */ 676 */
662int scan_lru_pages(unsigned long start, unsigned long end) 677static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
663{ 678{
664 unsigned long pfn; 679 unsigned long pfn;
665 struct page *page; 680 struct page *page;
@@ -695,7 +710,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
695 if (!pfn_valid(pfn)) 710 if (!pfn_valid(pfn))
696 continue; 711 continue;
697 page = pfn_to_page(pfn); 712 page = pfn_to_page(pfn);
698 if (!page_count(page)) 713 if (!get_page_unless_zero(page))
699 continue; 714 continue;
700 /* 715 /*
701 * We can skip free pages. And we can only deal with pages on 716 * We can skip free pages. And we can only deal with pages on
@@ -703,35 +718,39 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
703 */ 718 */
704 ret = isolate_lru_page(page); 719 ret = isolate_lru_page(page);
705 if (!ret) { /* Success */ 720 if (!ret) { /* Success */
721 put_page(page);
706 list_add_tail(&page->lru, &source); 722 list_add_tail(&page->lru, &source);
707 move_pages--; 723 move_pages--;
708 inc_zone_page_state(page, NR_ISOLATED_ANON + 724 inc_zone_page_state(page, NR_ISOLATED_ANON +
709 page_is_file_cache(page)); 725 page_is_file_cache(page));
710 726
711 } else { 727 } else {
712 /* Becasue we don't have big zone->lock. we should
713 check this again here. */
714 if (page_count(page))
715 not_managed++;
716#ifdef CONFIG_DEBUG_VM 728#ifdef CONFIG_DEBUG_VM
717 printk(KERN_ALERT "removing pfn %lx from LRU failed\n", 729 printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
718 pfn); 730 pfn);
719 dump_page(page); 731 dump_page(page);
720#endif 732#endif
733 put_page(page);
734 /* Because we don't have big zone->lock. we should
735 check this again here. */
736 if (page_count(page)) {
737 not_managed++;
738 ret = -EBUSY;
739 break;
740 }
721 } 741 }
722 } 742 }
723 ret = -EBUSY; 743 if (!list_empty(&source)) {
724 if (not_managed) { 744 if (not_managed) {
725 if (!list_empty(&source)) 745 putback_lru_pages(&source);
746 goto out;
747 }
748 /* this function returns # of failed pages */
749 ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
750 true, true);
751 if (ret)
726 putback_lru_pages(&source); 752 putback_lru_pages(&source);
727 goto out;
728 } 753 }
729 ret = 0;
730 if (list_empty(&source))
731 goto out;
732 /* this function returns # of failed pages */
733 ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1);
734
735out: 754out:
736 return ret; 755 return ret;
737} 756}
@@ -783,7 +802,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
783 return offlined; 802 return offlined;
784} 803}
785 804
786static int offline_pages(unsigned long start_pfn, 805static int __ref offline_pages(unsigned long start_pfn,
787 unsigned long end_pfn, unsigned long timeout) 806 unsigned long end_pfn, unsigned long timeout)
788{ 807{
789 unsigned long pfn, nr_pages, expire; 808 unsigned long pfn, nr_pages, expire;
@@ -803,7 +822,7 @@ static int offline_pages(unsigned long start_pfn,
803 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 822 if (!test_pages_in_a_zone(start_pfn, end_pfn))
804 return -EINVAL; 823 return -EINVAL;
805 824
806 lock_system_sleep(); 825 lock_memory_hotplug();
807 826
808 zone = page_zone(pfn_to_page(start_pfn)); 827 zone = page_zone(pfn_to_page(start_pfn));
809 node = zone_to_nid(zone); 828 node = zone_to_nid(zone);
@@ -840,7 +859,6 @@ repeat:
840 ret = 0; 859 ret = 0;
841 if (drain) { 860 if (drain) {
842 lru_add_drain_all(); 861 lru_add_drain_all();
843 flush_scheduled_work();
844 cond_resched(); 862 cond_resched();
845 drain_all_pages(); 863 drain_all_pages();
846 } 864 }
@@ -862,7 +880,6 @@ repeat:
862 } 880 }
863 /* drain all zone's lru pagevec, this is asyncronous... */ 881 /* drain all zone's lru pagevec, this is asyncronous... */
864 lru_add_drain_all(); 882 lru_add_drain_all();
865 flush_scheduled_work();
866 yield(); 883 yield();
867 /* drain pcp pages , this is synchrouns. */ 884 /* drain pcp pages , this is synchrouns. */
868 drain_all_pages(); 885 drain_all_pages();
@@ -883,8 +900,8 @@ repeat:
883 zone->zone_pgdat->node_present_pages -= offlined_pages; 900 zone->zone_pgdat->node_present_pages -= offlined_pages;
884 totalram_pages -= offlined_pages; 901 totalram_pages -= offlined_pages;
885 902
886 setup_per_zone_wmarks(); 903 init_per_zone_wmark_min();
887 calculate_zone_inactive_ratio(zone); 904
888 if (!node_present_pages(node)) { 905 if (!node_present_pages(node)) {
889 node_clear_state(node, N_HIGH_MEMORY); 906 node_clear_state(node, N_HIGH_MEMORY);
890 kswapd_stop(node); 907 kswapd_stop(node);
@@ -894,7 +911,7 @@ repeat:
894 writeback_set_ratelimit(); 911 writeback_set_ratelimit();
895 912
896 memory_notify(MEM_OFFLINE, &arg); 913 memory_notify(MEM_OFFLINE, &arg);
897 unlock_system_sleep(); 914 unlock_memory_hotplug();
898 return 0; 915 return 0;
899 916
900failed_removal: 917failed_removal:
@@ -905,7 +922,7 @@ failed_removal:
905 undo_isolate_page_range(start_pfn, end_pfn); 922 undo_isolate_page_range(start_pfn, end_pfn);
906 923
907out: 924out:
908 unlock_system_sleep(); 925 unlock_memory_hotplug();
909 return ret; 926 return ret;
910} 927}
911 928