diff options
-rw-r--r-- | arch/ia64/mm/init.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 10 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 30 | ||||
-rw-r--r-- | arch/sh/mm/init.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_32.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 9 | ||||
-rw-r--r-- | drivers/base/memory.c | 52 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 13 | ||||
-rw-r--r-- | include/linux/mmzone.h | 16 | ||||
-rw-r--r-- | kernel/memremap.c | 4 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 201 | ||||
-rw-r--r-- | mm/sparse.c | 3 |
12 files changed, 185 insertions, 175 deletions
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 39e2aeb4669d..80db57d063d0 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c | |||
@@ -648,18 +648,11 @@ mem_init (void) | |||
648 | #ifdef CONFIG_MEMORY_HOTPLUG | 648 | #ifdef CONFIG_MEMORY_HOTPLUG |
649 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 649 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
650 | { | 650 | { |
651 | pg_data_t *pgdat; | ||
652 | struct zone *zone; | ||
653 | unsigned long start_pfn = start >> PAGE_SHIFT; | 651 | unsigned long start_pfn = start >> PAGE_SHIFT; |
654 | unsigned long nr_pages = size >> PAGE_SHIFT; | 652 | unsigned long nr_pages = size >> PAGE_SHIFT; |
655 | int ret; | 653 | int ret; |
656 | 654 | ||
657 | pgdat = NODE_DATA(nid); | 655 | ret = __add_pages(nid, start_pfn, nr_pages, !for_device); |
658 | |||
659 | zone = pgdat->node_zones + | ||
660 | zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); | ||
661 | ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device); | ||
662 | |||
663 | if (ret) | 656 | if (ret) |
664 | printk("%s: Problem encountered in __add_pages() as ret=%d\n", | 657 | printk("%s: Problem encountered in __add_pages() as ret=%d\n", |
665 | __func__, ret); | 658 | __func__, ret); |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e6b2e6618b6c..72c46eb53215 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -128,16 +128,12 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) | |||
128 | 128 | ||
129 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 129 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
130 | { | 130 | { |
131 | struct pglist_data *pgdata; | ||
132 | struct zone *zone; | ||
133 | unsigned long start_pfn = start >> PAGE_SHIFT; | 131 | unsigned long start_pfn = start >> PAGE_SHIFT; |
134 | unsigned long nr_pages = size >> PAGE_SHIFT; | 132 | unsigned long nr_pages = size >> PAGE_SHIFT; |
135 | int rc; | 133 | int rc; |
136 | 134 | ||
137 | resize_hpt_for_hotplug(memblock_phys_mem_size()); | 135 | resize_hpt_for_hotplug(memblock_phys_mem_size()); |
138 | 136 | ||
139 | pgdata = NODE_DATA(nid); | ||
140 | |||
141 | start = (unsigned long)__va(start); | 137 | start = (unsigned long)__va(start); |
142 | rc = create_section_mapping(start, start + size); | 138 | rc = create_section_mapping(start, start + size); |
143 | if (rc) { | 139 | if (rc) { |
@@ -147,11 +143,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | |||
147 | return -EFAULT; | 143 | return -EFAULT; |
148 | } | 144 | } |
149 | 145 | ||
150 | /* this should work for most non-highmem platforms */ | 146 | return __add_pages(nid, start_pfn, nr_pages, !for_device); |
151 | zone = pgdata->node_zones + | ||
152 | zone_for_memory(nid, start, size, 0, for_device); | ||
153 | |||
154 | return __add_pages(nid, zone, start_pfn, nr_pages, !for_device); | ||
155 | } | 147 | } |
156 | 148 | ||
157 | #ifdef CONFIG_MEMORY_HOTREMOVE | 149 | #ifdef CONFIG_MEMORY_HOTREMOVE |
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a3d549966b6a..bfa918e3592b 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c | |||
@@ -168,41 +168,15 @@ unsigned long memory_block_size_bytes(void) | |||
168 | #ifdef CONFIG_MEMORY_HOTPLUG | 168 | #ifdef CONFIG_MEMORY_HOTPLUG |
169 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 169 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
170 | { | 170 | { |
171 | unsigned long zone_start_pfn, zone_end_pfn, nr_pages; | ||
172 | unsigned long start_pfn = PFN_DOWN(start); | 171 | unsigned long start_pfn = PFN_DOWN(start); |
173 | unsigned long size_pages = PFN_DOWN(size); | 172 | unsigned long size_pages = PFN_DOWN(size); |
174 | pg_data_t *pgdat = NODE_DATA(nid); | 173 | int rc; |
175 | struct zone *zone; | ||
176 | int rc, i; | ||
177 | 174 | ||
178 | rc = vmem_add_mapping(start, size); | 175 | rc = vmem_add_mapping(start, size); |
179 | if (rc) | 176 | if (rc) |
180 | return rc; | 177 | return rc; |
181 | 178 | ||
182 | for (i = 0; i < MAX_NR_ZONES; i++) { | 179 | rc = __add_pages(nid, start_pfn, size_pages, !for_device); |
183 | zone = pgdat->node_zones + i; | ||
184 | if (zone_idx(zone) != ZONE_MOVABLE) { | ||
185 | /* Add range within existing zone limits, if possible */ | ||
186 | zone_start_pfn = zone->zone_start_pfn; | ||
187 | zone_end_pfn = zone->zone_start_pfn + | ||
188 | zone->spanned_pages; | ||
189 | } else { | ||
190 | /* Add remaining range to ZONE_MOVABLE */ | ||
191 | zone_start_pfn = start_pfn; | ||
192 | zone_end_pfn = start_pfn + size_pages; | ||
193 | } | ||
194 | if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) | ||
195 | continue; | ||
196 | nr_pages = (start_pfn + size_pages > zone_end_pfn) ? | ||
197 | zone_end_pfn - start_pfn : size_pages; | ||
198 | rc = __add_pages(nid, zone, start_pfn, nr_pages, !for_device); | ||
199 | if (rc) | ||
200 | break; | ||
201 | start_pfn += nr_pages; | ||
202 | size_pages -= nr_pages; | ||
203 | if (!size_pages) | ||
204 | break; | ||
205 | } | ||
206 | if (rc) | 180 | if (rc) |
207 | vmem_remove_mapping(start, size); | 181 | vmem_remove_mapping(start, size); |
208 | return rc; | 182 | return rc; |
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index a9d57f75ae8c..3813a610a2bb 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c | |||
@@ -487,18 +487,12 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
487 | #ifdef CONFIG_MEMORY_HOTPLUG | 487 | #ifdef CONFIG_MEMORY_HOTPLUG |
488 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 488 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
489 | { | 489 | { |
490 | pg_data_t *pgdat; | ||
491 | unsigned long start_pfn = PFN_DOWN(start); | 490 | unsigned long start_pfn = PFN_DOWN(start); |
492 | unsigned long nr_pages = size >> PAGE_SHIFT; | 491 | unsigned long nr_pages = size >> PAGE_SHIFT; |
493 | int ret; | 492 | int ret; |
494 | 493 | ||
495 | pgdat = NODE_DATA(nid); | ||
496 | |||
497 | /* We only have ZONE_NORMAL, so this is easy.. */ | 494 | /* We only have ZONE_NORMAL, so this is easy.. */ |
498 | ret = __add_pages(nid, pgdat->node_zones + | 495 | ret = __add_pages(nid, start_pfn, nr_pages, !for_device); |
499 | zone_for_memory(nid, start, size, ZONE_NORMAL, | ||
500 | for_device), | ||
501 | start_pfn, nr_pages, !for_device); | ||
502 | if (unlikely(ret)) | 496 | if (unlikely(ret)) |
503 | printk("%s: Failed, __add_pages() == %d\n", __func__, ret); | 497 | printk("%s: Failed, __add_pages() == %d\n", __func__, ret); |
504 | 498 | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 94594b889144..a424066d0552 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -825,13 +825,10 @@ void __init mem_init(void) | |||
825 | #ifdef CONFIG_MEMORY_HOTPLUG | 825 | #ifdef CONFIG_MEMORY_HOTPLUG |
826 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 826 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
827 | { | 827 | { |
828 | struct pglist_data *pgdata = NODE_DATA(nid); | ||
829 | struct zone *zone = pgdata->node_zones + | ||
830 | zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device); | ||
831 | unsigned long start_pfn = start >> PAGE_SHIFT; | 828 | unsigned long start_pfn = start >> PAGE_SHIFT; |
832 | unsigned long nr_pages = size >> PAGE_SHIFT; | 829 | unsigned long nr_pages = size >> PAGE_SHIFT; |
833 | 830 | ||
834 | return __add_pages(nid, zone, start_pfn, nr_pages, !for_device); | 831 | return __add_pages(nid, start_pfn, nr_pages, !for_device); |
835 | } | 832 | } |
836 | 833 | ||
837 | #ifdef CONFIG_MEMORY_HOTREMOVE | 834 | #ifdef CONFIG_MEMORY_HOTREMOVE |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9d64291459b6..06afa84ac0a0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -772,22 +772,15 @@ static void update_end_of_memory_vars(u64 start, u64 size) | |||
772 | } | 772 | } |
773 | } | 773 | } |
774 | 774 | ||
775 | /* | ||
776 | * Memory is added always to NORMAL zone. This means you will never get | ||
777 | * additional DMA/DMA32 memory. | ||
778 | */ | ||
779 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) | 775 | int arch_add_memory(int nid, u64 start, u64 size, bool for_device) |
780 | { | 776 | { |
781 | struct pglist_data *pgdat = NODE_DATA(nid); | ||
782 | struct zone *zone = pgdat->node_zones + | ||
783 | zone_for_memory(nid, start, size, ZONE_NORMAL, for_device); | ||
784 | unsigned long start_pfn = start >> PAGE_SHIFT; | 777 | unsigned long start_pfn = start >> PAGE_SHIFT; |
785 | unsigned long nr_pages = size >> PAGE_SHIFT; | 778 | unsigned long nr_pages = size >> PAGE_SHIFT; |
786 | int ret; | 779 | int ret; |
787 | 780 | ||
788 | init_memory_mapping(start, start + size); | 781 | init_memory_mapping(start, start + size); |
789 | 782 | ||
790 | ret = __add_pages(nid, zone, start_pfn, nr_pages, !for_device); | 783 | ret = __add_pages(nid, start_pfn, nr_pages, !for_device); |
791 | WARN_ON_ONCE(ret); | 784 | WARN_ON_ONCE(ret); |
792 | 785 | ||
793 | /* update max_pfn, max_low_pfn and high_memory */ | 786 | /* update max_pfn, max_low_pfn and high_memory */ |
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 1e884d82af6f..b86fda30ce62 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c | |||
@@ -392,39 +392,43 @@ static ssize_t show_valid_zones(struct device *dev, | |||
392 | struct device_attribute *attr, char *buf) | 392 | struct device_attribute *attr, char *buf) |
393 | { | 393 | { |
394 | struct memory_block *mem = to_memory_block(dev); | 394 | struct memory_block *mem = to_memory_block(dev); |
395 | unsigned long start_pfn, end_pfn; | 395 | unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); |
396 | unsigned long valid_start, valid_end, valid_pages; | ||
397 | unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; | 396 | unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; |
398 | struct zone *zone; | 397 | unsigned long valid_start_pfn, valid_end_pfn; |
399 | int zone_shift = 0; | 398 | bool append = false; |
399 | int nid; | ||
400 | 400 | ||
401 | start_pfn = section_nr_to_pfn(mem->start_section_nr); | 401 | /* |
402 | end_pfn = start_pfn + nr_pages; | 402 | * The block contains more than one zone can not be offlined. |
403 | 403 | * This can happen e.g. for ZONE_DMA and ZONE_DMA32 | |
404 | /* The block contains more than one zone can not be offlined. */ | 404 | */ |
405 | if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start, &valid_end)) | 405 | if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn)) |
406 | return sprintf(buf, "none\n"); | 406 | return sprintf(buf, "none\n"); |
407 | 407 | ||
408 | zone = page_zone(pfn_to_page(valid_start)); | 408 | start_pfn = valid_start_pfn; |
409 | valid_pages = valid_end - valid_start; | 409 | nr_pages = valid_end_pfn - start_pfn; |
410 | |||
411 | /* MMOP_ONLINE_KEEP */ | ||
412 | sprintf(buf, "%s", zone->name); | ||
413 | 410 | ||
414 | /* MMOP_ONLINE_KERNEL */ | 411 | /* |
415 | zone_can_shift(valid_start, valid_pages, ZONE_NORMAL, &zone_shift); | 412 | * Check the existing zone. Make sure that we do that only on the |
416 | if (zone_shift) { | 413 | * online nodes otherwise the page_zone is not reliable |
417 | strcat(buf, " "); | 414 | */ |
418 | strcat(buf, (zone + zone_shift)->name); | 415 | if (mem->state == MEM_ONLINE) { |
416 | strcat(buf, page_zone(pfn_to_page(start_pfn))->name); | ||
417 | goto out; | ||
419 | } | 418 | } |
420 | 419 | ||
421 | /* MMOP_ONLINE_MOVABLE */ | 420 | nid = pfn_to_nid(start_pfn); |
422 | zone_can_shift(valid_start, valid_pages, ZONE_MOVABLE, &zone_shift); | 421 | if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) { |
423 | if (zone_shift) { | 422 | strcat(buf, NODE_DATA(nid)->node_zones[ZONE_NORMAL].name); |
424 | strcat(buf, " "); | 423 | append = true; |
425 | strcat(buf, (zone + zone_shift)->name); | ||
426 | } | 424 | } |
427 | 425 | ||
426 | if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE)) { | ||
427 | if (append) | ||
428 | strcat(buf, " "); | ||
429 | strcat(buf, NODE_DATA(nid)->node_zones[ZONE_MOVABLE].name); | ||
430 | } | ||
431 | out: | ||
428 | strcat(buf, "\n"); | 432 | strcat(buf, "\n"); |
429 | 433 | ||
430 | return strlen(buf); | 434 | return strlen(buf); |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index a61aede1b391..8a07a49fd8dc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -123,8 +123,8 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, | |||
123 | unsigned long nr_pages); | 123 | unsigned long nr_pages); |
124 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 124 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
125 | 125 | ||
126 | /* reasonably generic interface to expand the physical pages in a zone */ | 126 | /* reasonably generic interface to expand the physical pages */ |
127 | extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn, | 127 | extern int __add_pages(int nid, unsigned long start_pfn, |
128 | unsigned long nr_pages, bool want_memblock); | 128 | unsigned long nr_pages, bool want_memblock); |
129 | 129 | ||
130 | #ifdef CONFIG_NUMA | 130 | #ifdef CONFIG_NUMA |
@@ -299,15 +299,16 @@ extern int add_memory_resource(int nid, struct resource *resource, bool online); | |||
299 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, | 299 | extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, |
300 | bool for_device); | 300 | bool for_device); |
301 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); | 301 | extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); |
302 | extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, | ||
303 | unsigned long nr_pages); | ||
302 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); | 304 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); |
303 | extern bool is_memblock_offlined(struct memory_block *mem); | 305 | extern bool is_memblock_offlined(struct memory_block *mem); |
304 | extern void remove_memory(int nid, u64 start, u64 size); | 306 | extern void remove_memory(int nid, u64 start, u64 size); |
305 | extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn); | 307 | extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); |
306 | extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, | 308 | extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, |
307 | unsigned long map_offset); | 309 | unsigned long map_offset); |
308 | extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, | 310 | extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, |
309 | unsigned long pnum); | 311 | unsigned long pnum); |
310 | extern bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, | 312 | extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, |
311 | enum zone_type target, int *zone_shift); | 313 | int online_type); |
312 | |||
313 | #endif /* __LINUX_MEMORY_HOTPLUG_H */ | 314 | #endif /* __LINUX_MEMORY_HOTPLUG_H */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2aaf7e08c5a8..abc1641011f2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -533,6 +533,22 @@ static inline bool zone_is_empty(struct zone *zone) | |||
533 | } | 533 | } |
534 | 534 | ||
535 | /* | 535 | /* |
536 | * Return true if [start_pfn, start_pfn + nr_pages) range has a non-empty | ||
537 | * intersection with the given zone | ||
538 | */ | ||
539 | static inline bool zone_intersects(struct zone *zone, | ||
540 | unsigned long start_pfn, unsigned long nr_pages) | ||
541 | { | ||
542 | if (zone_is_empty(zone)) | ||
543 | return false; | ||
544 | if (start_pfn >= zone_end_pfn(zone) || | ||
545 | start_pfn + nr_pages <= zone->zone_start_pfn) | ||
546 | return false; | ||
547 | |||
548 | return true; | ||
549 | } | ||
550 | |||
551 | /* | ||
536 | * The "priority" of VM scanning is how much of the queues we will scan in one | 552 | * The "priority" of VM scanning is how much of the queues we will scan in one |
537 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the | 553 | * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the |
538 | * queues ("queue_length >> 12") during an aging round. | 554 | * queues ("queue_length >> 12") during an aging round. |
diff --git a/kernel/memremap.c b/kernel/memremap.c index 23a6483c3666..281eb478856a 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c | |||
@@ -359,6 +359,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, | |||
359 | 359 | ||
360 | mem_hotplug_begin(); | 360 | mem_hotplug_begin(); |
361 | error = arch_add_memory(nid, align_start, align_size, true); | 361 | error = arch_add_memory(nid, align_start, align_size, true); |
362 | if (!error) | ||
363 | move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], | ||
364 | align_start >> PAGE_SHIFT, | ||
365 | align_size >> PAGE_SHIFT); | ||
362 | mem_hotplug_done(); | 366 | mem_hotplug_done(); |
363 | if (error) | 367 | if (error) |
364 | goto err_add_memory; | 368 | goto err_add_memory; |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b2ebe9ad7f6c..9438ffe24cb2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -433,25 +433,6 @@ out_fail: | |||
433 | return -1; | 433 | return -1; |
434 | } | 434 | } |
435 | 435 | ||
436 | static struct zone * __meminit move_pfn_range(int zone_shift, | ||
437 | unsigned long start_pfn, unsigned long end_pfn) | ||
438 | { | ||
439 | struct zone *zone = page_zone(pfn_to_page(start_pfn)); | ||
440 | int ret = 0; | ||
441 | |||
442 | if (zone_shift < 0) | ||
443 | ret = move_pfn_range_left(zone + zone_shift, zone, | ||
444 | start_pfn, end_pfn); | ||
445 | else if (zone_shift) | ||
446 | ret = move_pfn_range_right(zone, zone + zone_shift, | ||
447 | start_pfn, end_pfn); | ||
448 | |||
449 | if (ret) | ||
450 | return NULL; | ||
451 | |||
452 | return zone + zone_shift; | ||
453 | } | ||
454 | |||
455 | static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, | 436 | static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, |
456 | unsigned long end_pfn) | 437 | unsigned long end_pfn) |
457 | { | 438 | { |
@@ -493,23 +474,35 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) | |||
493 | return 0; | 474 | return 0; |
494 | } | 475 | } |
495 | 476 | ||
496 | static int __meminit __add_section(int nid, struct zone *zone, | 477 | static int __meminit __add_section(int nid, unsigned long phys_start_pfn, |
497 | unsigned long phys_start_pfn, bool want_memblock) | 478 | bool want_memblock) |
498 | { | 479 | { |
499 | int ret; | 480 | int ret; |
481 | int i; | ||
500 | 482 | ||
501 | if (pfn_valid(phys_start_pfn)) | 483 | if (pfn_valid(phys_start_pfn)) |
502 | return -EEXIST; | 484 | return -EEXIST; |
503 | 485 | ||
504 | ret = sparse_add_one_section(zone, phys_start_pfn); | 486 | ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); |
505 | |||
506 | if (ret < 0) | 487 | if (ret < 0) |
507 | return ret; | 488 | return ret; |
508 | 489 | ||
509 | ret = __add_zone(zone, phys_start_pfn); | 490 | /* |
491 | * Make all the pages reserved so that nobody will stumble over half | ||
492 | * initialized state. | ||
493 | * FIXME: We also have to associate it with a node because pfn_to_node | ||
494 | * relies on having page with the proper node. | ||
495 | */ | ||
496 | for (i = 0; i < PAGES_PER_SECTION; i++) { | ||
497 | unsigned long pfn = phys_start_pfn + i; | ||
498 | struct page *page; | ||
499 | if (!pfn_valid(pfn)) | ||
500 | continue; | ||
510 | 501 | ||
511 | if (ret < 0) | 502 | page = pfn_to_page(pfn); |
512 | return ret; | 503 | set_page_node(page, nid); |
504 | SetPageReserved(page); | ||
505 | } | ||
513 | 506 | ||
514 | if (!want_memblock) | 507 | if (!want_memblock) |
515 | return 0; | 508 | return 0; |
@@ -523,7 +516,7 @@ static int __meminit __add_section(int nid, struct zone *zone, | |||
523 | * call this function after deciding the zone to which to | 516 | * call this function after deciding the zone to which to |
524 | * add the new pages. | 517 | * add the new pages. |
525 | */ | 518 | */ |
526 | int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | 519 | int __ref __add_pages(int nid, unsigned long phys_start_pfn, |
527 | unsigned long nr_pages, bool want_memblock) | 520 | unsigned long nr_pages, bool want_memblock) |
528 | { | 521 | { |
529 | unsigned long i; | 522 | unsigned long i; |
@@ -531,8 +524,6 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
531 | int start_sec, end_sec; | 524 | int start_sec, end_sec; |
532 | struct vmem_altmap *altmap; | 525 | struct vmem_altmap *altmap; |
533 | 526 | ||
534 | clear_zone_contiguous(zone); | ||
535 | |||
536 | /* during initialize mem_map, align hot-added range to section */ | 527 | /* during initialize mem_map, align hot-added range to section */ |
537 | start_sec = pfn_to_section_nr(phys_start_pfn); | 528 | start_sec = pfn_to_section_nr(phys_start_pfn); |
538 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); | 529 | end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); |
@@ -552,7 +543,7 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
552 | } | 543 | } |
553 | 544 | ||
554 | for (i = start_sec; i <= end_sec; i++) { | 545 | for (i = start_sec; i <= end_sec; i++) { |
555 | err = __add_section(nid, zone, section_nr_to_pfn(i), want_memblock); | 546 | err = __add_section(nid, section_nr_to_pfn(i), want_memblock); |
556 | 547 | ||
557 | /* | 548 | /* |
558 | * EEXIST is finally dealt with by ioresource collision | 549 | * EEXIST is finally dealt with by ioresource collision |
@@ -565,7 +556,6 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, | |||
565 | } | 556 | } |
566 | vmemmap_populate_print_last(); | 557 | vmemmap_populate_print_last(); |
567 | out: | 558 | out: |
568 | set_zone_contiguous(zone); | ||
569 | return err; | 559 | return err; |
570 | } | 560 | } |
571 | EXPORT_SYMBOL_GPL(__add_pages); | 561 | EXPORT_SYMBOL_GPL(__add_pages); |
@@ -1034,39 +1024,109 @@ static void node_states_set_node(int node, struct memory_notify *arg) | |||
1034 | node_set_state(node, N_MEMORY); | 1024 | node_set_state(node, N_MEMORY); |
1035 | } | 1025 | } |
1036 | 1026 | ||
1037 | bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, | 1027 | bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type) |
1038 | enum zone_type target, int *zone_shift) | ||
1039 | { | 1028 | { |
1040 | struct zone *zone = page_zone(pfn_to_page(pfn)); | 1029 | struct pglist_data *pgdat = NODE_DATA(nid); |
1041 | enum zone_type idx = zone_idx(zone); | 1030 | struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; |
1042 | int i; | 1031 | struct zone *normal_zone = &pgdat->node_zones[ZONE_NORMAL]; |
1043 | 1032 | ||
1044 | *zone_shift = 0; | 1033 | /* |
1034 | * TODO there shouldn't be any inherent reason to have ZONE_NORMAL | ||
1035 | * physically before ZONE_MOVABLE. All we need is they do not | ||
1036 | * overlap. Historically we didn't allow ZONE_NORMAL after ZONE_MOVABLE | ||
1037 | * though so let's stick with it for simplicity for now. | ||
1038 | * TODO make sure we do not overlap with ZONE_DEVICE | ||
1039 | */ | ||
1040 | if (online_type == MMOP_ONLINE_KERNEL) { | ||
1041 | if (zone_is_empty(movable_zone)) | ||
1042 | return true; | ||
1043 | return movable_zone->zone_start_pfn >= pfn + nr_pages; | ||
1044 | } else if (online_type == MMOP_ONLINE_MOVABLE) { | ||
1045 | return zone_end_pfn(normal_zone) <= pfn; | ||
1046 | } | ||
1045 | 1047 | ||
1046 | if (idx < target) { | 1048 | /* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */ |
1047 | /* pages must be at end of current zone */ | 1049 | return online_type == MMOP_ONLINE_KEEP; |
1048 | if (pfn + nr_pages != zone_end_pfn(zone)) | 1050 | } |
1049 | return false; | ||
1050 | 1051 | ||
1051 | /* no zones in use between current zone and target */ | 1052 | static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn, |
1052 | for (i = idx + 1; i < target; i++) | 1053 | unsigned long nr_pages) |
1053 | if (zone_is_initialized(zone - idx + i)) | 1054 | { |
1054 | return false; | 1055 | unsigned long old_end_pfn = zone_end_pfn(zone); |
1055 | } | ||
1056 | 1056 | ||
1057 | if (target < idx) { | 1057 | if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn) |
1058 | /* pages must be at beginning of current zone */ | 1058 | zone->zone_start_pfn = start_pfn; |
1059 | if (pfn != zone->zone_start_pfn) | 1059 | |
1060 | return false; | 1060 | zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn; |
1061 | } | ||
1062 | |||
1063 | static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn, | ||
1064 | unsigned long nr_pages) | ||
1065 | { | ||
1066 | unsigned long old_end_pfn = pgdat_end_pfn(pgdat); | ||
1061 | 1067 | ||
1062 | /* no zones in use between current zone and target */ | 1068 | if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn) |
1063 | for (i = target + 1; i < idx; i++) | 1069 | pgdat->node_start_pfn = start_pfn; |
1064 | if (zone_is_initialized(zone - idx + i)) | 1070 | |
1065 | return false; | 1071 | pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; |
1072 | } | ||
1073 | |||
1074 | void move_pfn_range_to_zone(struct zone *zone, | ||
1075 | unsigned long start_pfn, unsigned long nr_pages) | ||
1076 | { | ||
1077 | struct pglist_data *pgdat = zone->zone_pgdat; | ||
1078 | int nid = pgdat->node_id; | ||
1079 | unsigned long flags; | ||
1080 | |||
1081 | if (zone_is_empty(zone)) | ||
1082 | init_currently_empty_zone(zone, start_pfn, nr_pages); | ||
1083 | |||
1084 | clear_zone_contiguous(zone); | ||
1085 | |||
1086 | /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ | ||
1087 | pgdat_resize_lock(pgdat, &flags); | ||
1088 | zone_span_writelock(zone); | ||
1089 | resize_zone_range(zone, start_pfn, nr_pages); | ||
1090 | zone_span_writeunlock(zone); | ||
1091 | resize_pgdat_range(pgdat, start_pfn, nr_pages); | ||
1092 | pgdat_resize_unlock(pgdat, &flags); | ||
1093 | |||
1094 | /* | ||
1095 | * TODO now we have a visible range of pages which are not associated | ||
1096 | * with their zone properly. Not nice but set_pfnblock_flags_mask | ||
1097 | * expects the zone spans the pfn range. All the pages in the range | ||
1098 | * are reserved so nobody should be touching them so we should be safe | ||
1099 | */ | ||
1100 | memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); | ||
1101 | |||
1102 | set_zone_contiguous(zone); | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * Associates the given pfn range with the given node and the zone appropriate | ||
1107 | * for the given online type. | ||
1108 | */ | ||
1109 | static struct zone * __meminit move_pfn_range(int online_type, int nid, | ||
1110 | unsigned long start_pfn, unsigned long nr_pages) | ||
1111 | { | ||
1112 | struct pglist_data *pgdat = NODE_DATA(nid); | ||
1113 | struct zone *zone = &pgdat->node_zones[ZONE_NORMAL]; | ||
1114 | |||
1115 | if (online_type == MMOP_ONLINE_KEEP) { | ||
1116 | struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; | ||
1117 | /* | ||
1118 | * MMOP_ONLINE_KEEP inherits the current zone which is | ||
1119 | * ZONE_NORMAL by default but we might be within ZONE_MOVABLE | ||
1120 | * already. | ||
1121 | */ | ||
1122 | if (zone_intersects(movable_zone, start_pfn, nr_pages)) | ||
1123 | zone = movable_zone; | ||
1124 | } else if (online_type == MMOP_ONLINE_MOVABLE) { | ||
1125 | zone = &pgdat->node_zones[ZONE_MOVABLE]; | ||
1066 | } | 1126 | } |
1067 | 1127 | ||
1068 | *zone_shift = target - idx; | 1128 | move_pfn_range_to_zone(zone, start_pfn, nr_pages); |
1069 | return true; | 1129 | return zone; |
1070 | } | 1130 | } |
1071 | 1131 | ||
1072 | /* Must be protected by mem_hotplug_begin() */ | 1132 | /* Must be protected by mem_hotplug_begin() */ |
@@ -1079,38 +1139,21 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ | |||
1079 | int nid; | 1139 | int nid; |
1080 | int ret; | 1140 | int ret; |
1081 | struct memory_notify arg; | 1141 | struct memory_notify arg; |
1082 | int zone_shift = 0; | ||
1083 | 1142 | ||
1084 | /* | 1143 | nid = pfn_to_nid(pfn); |
1085 | * This doesn't need a lock to do pfn_to_page(). | 1144 | if (!allow_online_pfn_range(nid, pfn, nr_pages, online_type)) |
1086 | * The section can't be removed here because of the | ||
1087 | * memory_block->state_mutex. | ||
1088 | */ | ||
1089 | zone = page_zone(pfn_to_page(pfn)); | ||
1090 | |||
1091 | if ((zone_idx(zone) > ZONE_NORMAL || | ||
1092 | online_type == MMOP_ONLINE_MOVABLE) && | ||
1093 | !can_online_high_movable(pfn_to_nid(pfn))) | ||
1094 | return -EINVAL; | 1145 | return -EINVAL; |
1095 | 1146 | ||
1096 | if (online_type == MMOP_ONLINE_KERNEL) { | 1147 | if (online_type == MMOP_ONLINE_MOVABLE && !can_online_high_movable(nid)) |
1097 | if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) | ||
1098 | return -EINVAL; | ||
1099 | } else if (online_type == MMOP_ONLINE_MOVABLE) { | ||
1100 | if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) | ||
1101 | return -EINVAL; | ||
1102 | } | ||
1103 | |||
1104 | zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); | ||
1105 | if (!zone) | ||
1106 | return -EINVAL; | 1148 | return -EINVAL; |
1107 | 1149 | ||
1150 | /* associate pfn range with the zone */ | ||
1151 | zone = move_pfn_range(online_type, nid, pfn, nr_pages); | ||
1152 | |||
1108 | arg.start_pfn = pfn; | 1153 | arg.start_pfn = pfn; |
1109 | arg.nr_pages = nr_pages; | 1154 | arg.nr_pages = nr_pages; |
1110 | node_states_check_changes_online(nr_pages, zone, &arg); | 1155 | node_states_check_changes_online(nr_pages, zone, &arg); |
1111 | 1156 | ||
1112 | nid = zone_to_nid(zone); | ||
1113 | |||
1114 | ret = memory_notify(MEM_GOING_ONLINE, &arg); | 1157 | ret = memory_notify(MEM_GOING_ONLINE, &arg); |
1115 | ret = notifier_to_errno(ret); | 1158 | ret = notifier_to_errno(ret); |
1116 | if (ret) | 1159 | if (ret) |
diff --git a/mm/sparse.c b/mm/sparse.c index 9d7fd666015e..7b4be3fd5cac 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -761,10 +761,9 @@ static void free_map_bootmem(struct page *memmap) | |||
761 | * set. If this is <=0, then that means that the passed-in | 761 | * set. If this is <=0, then that means that the passed-in |
762 | * map was not consumed and must be freed. | 762 | * map was not consumed and must be freed. |
763 | */ | 763 | */ |
764 | int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) | 764 | int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) |
765 | { | 765 | { |
766 | unsigned long section_nr = pfn_to_section_nr(start_pfn); | 766 | unsigned long section_nr = pfn_to_section_nr(start_pfn); |
767 | struct pglist_data *pgdat = zone->zone_pgdat; | ||
768 | struct mem_section *ms; | 767 | struct mem_section *ms; |
769 | struct page *memmap; | 768 | struct page *memmap; |
770 | unsigned long *usemap; | 769 | unsigned long *usemap; |