diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-01 20:47:51 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-01 20:47:51 -0400 |
commit | 2d01eedf1d14432f4db5388a49dc5596a8c5bd02 (patch) | |
tree | 646525acc0475b2899827c1bfbd25f05ec1b8092 /mm | |
parent | 6ac15baacb6ecd87c66209627753b96ded3b4515 (diff) | |
parent | abdd4a7025282fbe3737e1bcb5f51afc8d8ea1b8 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge third patchbomb from Andrew Morton:
- the rest of MM
- scripts/gdb updates
- ipc/ updates
- lib/ updates
- MAINTAINERS updates
- various other misc things
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (67 commits)
genalloc: rename of_get_named_gen_pool() to of_gen_pool_get()
genalloc: rename dev_get_gen_pool() to gen_pool_get()
x86: opt into HAVE_COPY_THREAD_TLS, for both 32-bit and 64-bit
MAINTAINERS: add zpool
MAINTAINERS: BCACHE: Kent Overstreet has changed email address
MAINTAINERS: move Jens Osterkamp to CREDITS
MAINTAINERS: remove unused nbd.h pattern
MAINTAINERS: update brcm gpio filename pattern
MAINTAINERS: update brcm dts pattern
MAINTAINERS: update sound soc intel patterns
MAINTAINERS: remove website for paride
MAINTAINERS: update Emulex ocrdma email addresses
bcache: use kvfree() in various places
libcxgbi: use kvfree() in cxgbi_free_big_mem()
target: use kvfree() in session alloc and free
IB/ehca: use kvfree() in ipz_queue_{cd}tor()
drm/nouveau/gem: use kvfree() in u_free()
drm: use kvfree() in drm_free_large()
cxgb4: use kvfree() in t4_free_mem()
cxgb3: use kvfree() in cxgb_free_mem()
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 18 | ||||
-rw-r--r-- | mm/bootmem.c | 13 | ||||
-rw-r--r-- | mm/internal.h | 11 | ||||
-rw-r--r-- | mm/memblock.c | 34 | ||||
-rw-r--r-- | mm/mm_init.c | 9 | ||||
-rw-r--r-- | mm/nobootmem.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 442 |
7 files changed, 453 insertions, 81 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index c180af880ed5..e79de2bd12cd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB | |||
636 | changed to a smaller value in which case that is used. | 636 | changed to a smaller value in which case that is used. |
637 | 637 | ||
638 | A sane initial value is 80 MB. | 638 | A sane initial value is 80 MB. |
639 | |||
640 | # For architectures that support deferred memory initialisation | ||
641 | config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | ||
642 | bool | ||
643 | |||
644 | config DEFERRED_STRUCT_PAGE_INIT | ||
645 | bool "Defer initialisation of struct pages to kswapd" | ||
646 | default n | ||
647 | depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT | ||
648 | depends on MEMORY_HOTPLUG | ||
649 | help | ||
650 | Ordinarily all struct pages are initialised during early boot in a | ||
651 | single thread. On very large machines this can take a considerable | ||
652 | amount of time. If this option is set, large machines will bring up | ||
653 | a subset of memmap at boot and then initialise the rest in parallel | ||
654 | when kswapd starts. This has a potential performance impact on | ||
655 | processes running early in the lifetime of the systemm until kswapd | ||
656 | finishes the initialisation. | ||
diff --git a/mm/bootmem.c b/mm/bootmem.c index 477be696511d..a23dd1934654 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -164,7 +164,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size) | |||
164 | end = PFN_DOWN(physaddr + size); | 164 | end = PFN_DOWN(physaddr + size); |
165 | 165 | ||
166 | for (; cursor < end; cursor++) { | 166 | for (; cursor < end; cursor++) { |
167 | __free_pages_bootmem(pfn_to_page(cursor), 0); | 167 | __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); |
168 | totalram_pages++; | 168 | totalram_pages++; |
169 | } | 169 | } |
170 | } | 170 | } |
@@ -172,7 +172,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size) | |||
172 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | 172 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) |
173 | { | 173 | { |
174 | struct page *page; | 174 | struct page *page; |
175 | unsigned long *map, start, end, pages, count = 0; | 175 | unsigned long *map, start, end, pages, cur, count = 0; |
176 | 176 | ||
177 | if (!bdata->node_bootmem_map) | 177 | if (!bdata->node_bootmem_map) |
178 | return 0; | 178 | return 0; |
@@ -210,17 +210,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
210 | if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { | 210 | if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { |
211 | int order = ilog2(BITS_PER_LONG); | 211 | int order = ilog2(BITS_PER_LONG); |
212 | 212 | ||
213 | __free_pages_bootmem(pfn_to_page(start), order); | 213 | __free_pages_bootmem(pfn_to_page(start), start, order); |
214 | count += BITS_PER_LONG; | 214 | count += BITS_PER_LONG; |
215 | start += BITS_PER_LONG; | 215 | start += BITS_PER_LONG; |
216 | } else { | 216 | } else { |
217 | unsigned long cur = start; | 217 | cur = start; |
218 | 218 | ||
219 | start = ALIGN(start + 1, BITS_PER_LONG); | 219 | start = ALIGN(start + 1, BITS_PER_LONG); |
220 | while (vec && cur != start) { | 220 | while (vec && cur != start) { |
221 | if (vec & 1) { | 221 | if (vec & 1) { |
222 | page = pfn_to_page(cur); | 222 | page = pfn_to_page(cur); |
223 | __free_pages_bootmem(page, 0); | 223 | __free_pages_bootmem(page, cur, 0); |
224 | count++; | 224 | count++; |
225 | } | 225 | } |
226 | vec >>= 1; | 226 | vec >>= 1; |
@@ -229,12 +229,13 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | cur = bdata->node_min_pfn; | ||
232 | page = virt_to_page(bdata->node_bootmem_map); | 233 | page = virt_to_page(bdata->node_bootmem_map); |
233 | pages = bdata->node_low_pfn - bdata->node_min_pfn; | 234 | pages = bdata->node_low_pfn - bdata->node_min_pfn; |
234 | pages = bootmem_bootmap_pages(pages); | 235 | pages = bootmem_bootmap_pages(pages); |
235 | count += pages; | 236 | count += pages; |
236 | while (pages--) | 237 | while (pages--) |
237 | __free_pages_bootmem(page++, 0); | 238 | __free_pages_bootmem(page++, cur++, 0); |
238 | 239 | ||
239 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); | 240 | bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); |
240 | 241 | ||
diff --git a/mm/internal.h b/mm/internal.h index a25e359a4039..36b23f1e2ca6 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -155,7 +155,8 @@ __find_buddy_index(unsigned long page_idx, unsigned int order) | |||
155 | } | 155 | } |
156 | 156 | ||
157 | extern int __isolate_free_page(struct page *page, unsigned int order); | 157 | extern int __isolate_free_page(struct page *page, unsigned int order); |
158 | extern void __free_pages_bootmem(struct page *page, unsigned int order); | 158 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, |
159 | unsigned int order); | ||
159 | extern void prep_compound_page(struct page *page, unsigned long order); | 160 | extern void prep_compound_page(struct page *page, unsigned long order); |
160 | #ifdef CONFIG_MEMORY_FAILURE | 161 | #ifdef CONFIG_MEMORY_FAILURE |
161 | extern bool is_free_buddy_page(struct page *page); | 162 | extern bool is_free_buddy_page(struct page *page); |
@@ -361,10 +362,7 @@ do { \ | |||
361 | } while (0) | 362 | } while (0) |
362 | 363 | ||
363 | extern void mminit_verify_pageflags_layout(void); | 364 | extern void mminit_verify_pageflags_layout(void); |
364 | extern void mminit_verify_page_links(struct page *page, | ||
365 | enum zone_type zone, unsigned long nid, unsigned long pfn); | ||
366 | extern void mminit_verify_zonelist(void); | 365 | extern void mminit_verify_zonelist(void); |
367 | |||
368 | #else | 366 | #else |
369 | 367 | ||
370 | static inline void mminit_dprintk(enum mminit_level level, | 368 | static inline void mminit_dprintk(enum mminit_level level, |
@@ -376,11 +374,6 @@ static inline void mminit_verify_pageflags_layout(void) | |||
376 | { | 374 | { |
377 | } | 375 | } |
378 | 376 | ||
379 | static inline void mminit_verify_page_links(struct page *page, | ||
380 | enum zone_type zone, unsigned long nid, unsigned long pfn) | ||
381 | { | ||
382 | } | ||
383 | |||
384 | static inline void mminit_verify_zonelist(void) | 377 | static inline void mminit_verify_zonelist(void) |
385 | { | 378 | { |
386 | } | 379 | } |
diff --git a/mm/memblock.c b/mm/memblock.c index 1b444c730846..87108e77e476 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -820,6 +820,38 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size) | |||
820 | 820 | ||
821 | 821 | ||
822 | /** | 822 | /** |
823 | * __next_reserved_mem_region - next function for for_each_reserved_region() | ||
824 | * @idx: pointer to u64 loop variable | ||
825 | * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL | ||
826 | * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL | ||
827 | * | ||
828 | * Iterate over all reserved memory regions. | ||
829 | */ | ||
830 | void __init_memblock __next_reserved_mem_region(u64 *idx, | ||
831 | phys_addr_t *out_start, | ||
832 | phys_addr_t *out_end) | ||
833 | { | ||
834 | struct memblock_type *rsv = &memblock.reserved; | ||
835 | |||
836 | if (*idx >= 0 && *idx < rsv->cnt) { | ||
837 | struct memblock_region *r = &rsv->regions[*idx]; | ||
838 | phys_addr_t base = r->base; | ||
839 | phys_addr_t size = r->size; | ||
840 | |||
841 | if (out_start) | ||
842 | *out_start = base; | ||
843 | if (out_end) | ||
844 | *out_end = base + size - 1; | ||
845 | |||
846 | *idx += 1; | ||
847 | return; | ||
848 | } | ||
849 | |||
850 | /* signal end of iteration */ | ||
851 | *idx = ULLONG_MAX; | ||
852 | } | ||
853 | |||
854 | /** | ||
823 | * __next__mem_range - next function for for_each_free_mem_range() etc. | 855 | * __next__mem_range - next function for for_each_free_mem_range() etc. |
824 | * @idx: pointer to u64 loop variable | 856 | * @idx: pointer to u64 loop variable |
825 | * @nid: node selector, %NUMA_NO_NODE for all nodes | 857 | * @nid: node selector, %NUMA_NO_NODE for all nodes |
@@ -1387,7 +1419,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) | |||
1387 | end = PFN_DOWN(base + size); | 1419 | end = PFN_DOWN(base + size); |
1388 | 1420 | ||
1389 | for (; cursor < end; cursor++) { | 1421 | for (; cursor < end; cursor++) { |
1390 | __free_pages_bootmem(pfn_to_page(cursor), 0); | 1422 | __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); |
1391 | totalram_pages++; | 1423 | totalram_pages++; |
1392 | } | 1424 | } |
1393 | } | 1425 | } |
diff --git a/mm/mm_init.c b/mm/mm_init.c index 5f420f7fafa1..fdadf918de76 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/export.h> | 11 | #include <linux/export.h> |
12 | #include <linux/memory.h> | 12 | #include <linux/memory.h> |
13 | #include <linux/notifier.h> | 13 | #include <linux/notifier.h> |
14 | #include <linux/sched.h> | ||
14 | #include "internal.h" | 15 | #include "internal.h" |
15 | 16 | ||
16 | #ifdef CONFIG_DEBUG_MEMORY_INIT | 17 | #ifdef CONFIG_DEBUG_MEMORY_INIT |
@@ -130,14 +131,6 @@ void __init mminit_verify_pageflags_layout(void) | |||
130 | BUG_ON(or_mask != add_mask); | 131 | BUG_ON(or_mask != add_mask); |
131 | } | 132 | } |
132 | 133 | ||
133 | void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone, | ||
134 | unsigned long nid, unsigned long pfn) | ||
135 | { | ||
136 | BUG_ON(page_to_nid(page) != nid); | ||
137 | BUG_ON(page_zonenum(page) != zone); | ||
138 | BUG_ON(page_to_pfn(page) != pfn); | ||
139 | } | ||
140 | |||
141 | static __init int set_mminit_loglevel(char *str) | 134 | static __init int set_mminit_loglevel(char *str) |
142 | { | 135 | { |
143 | get_option(&str, &mminit_loglevel); | 136 | get_option(&str, &mminit_loglevel); |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 5258386fa1be..e57cf24babd6 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -86,7 +86,7 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) | |||
86 | end = PFN_DOWN(addr + size); | 86 | end = PFN_DOWN(addr + size); |
87 | 87 | ||
88 | for (; cursor < end; cursor++) { | 88 | for (; cursor < end; cursor++) { |
89 | __free_pages_bootmem(pfn_to_page(cursor), 0); | 89 | __free_pages_bootmem(pfn_to_page(cursor), cursor, 0); |
90 | totalram_pages++; | 90 | totalram_pages++; |
91 | } | 91 | } |
92 | } | 92 | } |
@@ -101,7 +101,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) | |||
101 | while (start + (1UL << order) > end) | 101 | while (start + (1UL << order) > end) |
102 | order--; | 102 | order--; |
103 | 103 | ||
104 | __free_pages_bootmem(pfn_to_page(start), order); | 104 | __free_pages_bootmem(pfn_to_page(start), start, order); |
105 | 105 | ||
106 | start += (1UL << order); | 106 | start += (1UL << order); |
107 | } | 107 | } |
@@ -130,6 +130,9 @@ static unsigned long __init free_low_memory_core_early(void) | |||
130 | 130 | ||
131 | memblock_clear_hotplug(0, -1); | 131 | memblock_clear_hotplug(0, -1); |
132 | 132 | ||
133 | for_each_reserved_mem_region(i, &start, &end) | ||
134 | reserve_bootmem_region(start, end); | ||
135 | |||
133 | for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, | 136 | for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, |
134 | NULL) | 137 | NULL) |
135 | count += __free_memory_core(start, end); | 138 | count += __free_memory_core(start, end); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5e6fa06f2784..506eac8b38af 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/swap.h> | 19 | #include <linux/swap.h> |
20 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
21 | #include <linux/rwsem.h> | ||
21 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
22 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
23 | #include <linux/bootmem.h> | 24 | #include <linux/bootmem.h> |
@@ -61,6 +62,7 @@ | |||
61 | #include <linux/hugetlb.h> | 62 | #include <linux/hugetlb.h> |
62 | #include <linux/sched/rt.h> | 63 | #include <linux/sched/rt.h> |
63 | #include <linux/page_owner.h> | 64 | #include <linux/page_owner.h> |
65 | #include <linux/kthread.h> | ||
64 | 66 | ||
65 | #include <asm/sections.h> | 67 | #include <asm/sections.h> |
66 | #include <asm/tlbflush.h> | 68 | #include <asm/tlbflush.h> |
@@ -235,6 +237,77 @@ EXPORT_SYMBOL(nr_online_nodes); | |||
235 | 237 | ||
236 | int page_group_by_mobility_disabled __read_mostly; | 238 | int page_group_by_mobility_disabled __read_mostly; |
237 | 239 | ||
240 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
241 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
242 | { | ||
243 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
244 | } | ||
245 | |||
246 | /* Returns true if the struct page for the pfn is uninitialised */ | ||
247 | static inline bool __meminit early_page_uninitialised(unsigned long pfn) | ||
248 | { | ||
249 | int nid = early_pfn_to_nid(pfn); | ||
250 | |||
251 | if (pfn >= NODE_DATA(nid)->first_deferred_pfn) | ||
252 | return true; | ||
253 | |||
254 | return false; | ||
255 | } | ||
256 | |||
257 | static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid) | ||
258 | { | ||
259 | if (pfn >= NODE_DATA(nid)->first_deferred_pfn) | ||
260 | return true; | ||
261 | |||
262 | return false; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Returns false when the remaining initialisation should be deferred until | ||
267 | * later in the boot cycle when it can be parallelised. | ||
268 | */ | ||
269 | static inline bool update_defer_init(pg_data_t *pgdat, | ||
270 | unsigned long pfn, unsigned long zone_end, | ||
271 | unsigned long *nr_initialised) | ||
272 | { | ||
273 | /* Always populate low zones for address-contrained allocations */ | ||
274 | if (zone_end < pgdat_end_pfn(pgdat)) | ||
275 | return true; | ||
276 | |||
277 | /* Initialise at least 2G of the highest zone */ | ||
278 | (*nr_initialised)++; | ||
279 | if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) && | ||
280 | (pfn & (PAGES_PER_SECTION - 1)) == 0) { | ||
281 | pgdat->first_deferred_pfn = pfn; | ||
282 | return false; | ||
283 | } | ||
284 | |||
285 | return true; | ||
286 | } | ||
287 | #else | ||
288 | static inline void reset_deferred_meminit(pg_data_t *pgdat) | ||
289 | { | ||
290 | } | ||
291 | |||
292 | static inline bool early_page_uninitialised(unsigned long pfn) | ||
293 | { | ||
294 | return false; | ||
295 | } | ||
296 | |||
297 | static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid) | ||
298 | { | ||
299 | return false; | ||
300 | } | ||
301 | |||
302 | static inline bool update_defer_init(pg_data_t *pgdat, | ||
303 | unsigned long pfn, unsigned long zone_end, | ||
304 | unsigned long *nr_initialised) | ||
305 | { | ||
306 | return true; | ||
307 | } | ||
308 | #endif | ||
309 | |||
310 | |||
238 | void set_pageblock_migratetype(struct page *page, int migratetype) | 311 | void set_pageblock_migratetype(struct page *page, int migratetype) |
239 | { | 312 | { |
240 | if (unlikely(page_group_by_mobility_disabled && | 313 | if (unlikely(page_group_by_mobility_disabled && |
@@ -764,6 +837,75 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) | |||
764 | return 0; | 837 | return 0; |
765 | } | 838 | } |
766 | 839 | ||
840 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | ||
841 | unsigned long zone, int nid) | ||
842 | { | ||
843 | set_page_links(page, zone, nid, pfn); | ||
844 | init_page_count(page); | ||
845 | page_mapcount_reset(page); | ||
846 | page_cpupid_reset_last(page); | ||
847 | |||
848 | INIT_LIST_HEAD(&page->lru); | ||
849 | #ifdef WANT_PAGE_VIRTUAL | ||
850 | /* The shift won't overflow because ZONE_NORMAL is below 4G. */ | ||
851 | if (!is_highmem_idx(zone)) | ||
852 | set_page_address(page, __va(pfn << PAGE_SHIFT)); | ||
853 | #endif | ||
854 | } | ||
855 | |||
856 | static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone, | ||
857 | int nid) | ||
858 | { | ||
859 | return __init_single_page(pfn_to_page(pfn), pfn, zone, nid); | ||
860 | } | ||
861 | |||
862 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
863 | static void init_reserved_page(unsigned long pfn) | ||
864 | { | ||
865 | pg_data_t *pgdat; | ||
866 | int nid, zid; | ||
867 | |||
868 | if (!early_page_uninitialised(pfn)) | ||
869 | return; | ||
870 | |||
871 | nid = early_pfn_to_nid(pfn); | ||
872 | pgdat = NODE_DATA(nid); | ||
873 | |||
874 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | ||
875 | struct zone *zone = &pgdat->node_zones[zid]; | ||
876 | |||
877 | if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone)) | ||
878 | break; | ||
879 | } | ||
880 | __init_single_pfn(pfn, zid, nid); | ||
881 | } | ||
882 | #else | ||
883 | static inline void init_reserved_page(unsigned long pfn) | ||
884 | { | ||
885 | } | ||
886 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
887 | |||
888 | /* | ||
889 | * Initialised pages do not have PageReserved set. This function is | ||
890 | * called for each range allocated by the bootmem allocator and | ||
891 | * marks the pages PageReserved. The remaining valid pages are later | ||
892 | * sent to the buddy page allocator. | ||
893 | */ | ||
894 | void __meminit reserve_bootmem_region(unsigned long start, unsigned long end) | ||
895 | { | ||
896 | unsigned long start_pfn = PFN_DOWN(start); | ||
897 | unsigned long end_pfn = PFN_UP(end); | ||
898 | |||
899 | for (; start_pfn < end_pfn; start_pfn++) { | ||
900 | if (pfn_valid(start_pfn)) { | ||
901 | struct page *page = pfn_to_page(start_pfn); | ||
902 | |||
903 | init_reserved_page(start_pfn); | ||
904 | SetPageReserved(page); | ||
905 | } | ||
906 | } | ||
907 | } | ||
908 | |||
767 | static bool free_pages_prepare(struct page *page, unsigned int order) | 909 | static bool free_pages_prepare(struct page *page, unsigned int order) |
768 | { | 910 | { |
769 | bool compound = PageCompound(page); | 911 | bool compound = PageCompound(page); |
@@ -818,7 +960,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
818 | local_irq_restore(flags); | 960 | local_irq_restore(flags); |
819 | } | 961 | } |
820 | 962 | ||
821 | void __init __free_pages_bootmem(struct page *page, unsigned int order) | 963 | static void __init __free_pages_boot_core(struct page *page, |
964 | unsigned long pfn, unsigned int order) | ||
822 | { | 965 | { |
823 | unsigned int nr_pages = 1 << order; | 966 | unsigned int nr_pages = 1 << order; |
824 | struct page *p = page; | 967 | struct page *p = page; |
@@ -838,6 +981,223 @@ void __init __free_pages_bootmem(struct page *page, unsigned int order) | |||
838 | __free_pages(page, order); | 981 | __free_pages(page, order); |
839 | } | 982 | } |
840 | 983 | ||
984 | #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ | ||
985 | defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) | ||
986 | /* Only safe to use early in boot when initialisation is single-threaded */ | ||
987 | static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; | ||
988 | |||
989 | int __meminit early_pfn_to_nid(unsigned long pfn) | ||
990 | { | ||
991 | int nid; | ||
992 | |||
993 | /* The system will behave unpredictably otherwise */ | ||
994 | BUG_ON(system_state != SYSTEM_BOOTING); | ||
995 | |||
996 | nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); | ||
997 | if (nid >= 0) | ||
998 | return nid; | ||
999 | /* just returns 0 */ | ||
1000 | return 0; | ||
1001 | } | ||
1002 | #endif | ||
1003 | |||
1004 | #ifdef CONFIG_NODES_SPAN_OTHER_NODES | ||
1005 | static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node, | ||
1006 | struct mminit_pfnnid_cache *state) | ||
1007 | { | ||
1008 | int nid; | ||
1009 | |||
1010 | nid = __early_pfn_to_nid(pfn, state); | ||
1011 | if (nid >= 0 && nid != node) | ||
1012 | return false; | ||
1013 | return true; | ||
1014 | } | ||
1015 | |||
1016 | /* Only safe to use early in boot when initialisation is single-threaded */ | ||
1017 | static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | ||
1018 | { | ||
1019 | return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache); | ||
1020 | } | ||
1021 | |||
1022 | #else | ||
1023 | |||
1024 | static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | ||
1025 | { | ||
1026 | return true; | ||
1027 | } | ||
1028 | static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node, | ||
1029 | struct mminit_pfnnid_cache *state) | ||
1030 | { | ||
1031 | return true; | ||
1032 | } | ||
1033 | #endif | ||
1034 | |||
1035 | |||
1036 | void __init __free_pages_bootmem(struct page *page, unsigned long pfn, | ||
1037 | unsigned int order) | ||
1038 | { | ||
1039 | if (early_page_uninitialised(pfn)) | ||
1040 | return; | ||
1041 | return __free_pages_boot_core(page, pfn, order); | ||
1042 | } | ||
1043 | |||
1044 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | ||
1045 | static void __init deferred_free_range(struct page *page, | ||
1046 | unsigned long pfn, int nr_pages) | ||
1047 | { | ||
1048 | int i; | ||
1049 | |||
1050 | if (!page) | ||
1051 | return; | ||
1052 | |||
1053 | /* Free a large naturally-aligned chunk if possible */ | ||
1054 | if (nr_pages == MAX_ORDER_NR_PAGES && | ||
1055 | (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) { | ||
1056 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | ||
1057 | __free_pages_boot_core(page, pfn, MAX_ORDER-1); | ||
1058 | return; | ||
1059 | } | ||
1060 | |||
1061 | for (i = 0; i < nr_pages; i++, page++, pfn++) | ||
1062 | __free_pages_boot_core(page, pfn, 0); | ||
1063 | } | ||
1064 | |||
1065 | static __initdata DECLARE_RWSEM(pgdat_init_rwsem); | ||
1066 | |||
1067 | /* Initialise remaining memory on a node */ | ||
1068 | static int __init deferred_init_memmap(void *data) | ||
1069 | { | ||
1070 | pg_data_t *pgdat = data; | ||
1071 | int nid = pgdat->node_id; | ||
1072 | struct mminit_pfnnid_cache nid_init_state = { }; | ||
1073 | unsigned long start = jiffies; | ||
1074 | unsigned long nr_pages = 0; | ||
1075 | unsigned long walk_start, walk_end; | ||
1076 | int i, zid; | ||
1077 | struct zone *zone; | ||
1078 | unsigned long first_init_pfn = pgdat->first_deferred_pfn; | ||
1079 | const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); | ||
1080 | |||
1081 | if (first_init_pfn == ULONG_MAX) { | ||
1082 | up_read(&pgdat_init_rwsem); | ||
1083 | return 0; | ||
1084 | } | ||
1085 | |||
1086 | /* Bind memory initialisation thread to a local node if possible */ | ||
1087 | if (!cpumask_empty(cpumask)) | ||
1088 | set_cpus_allowed_ptr(current, cpumask); | ||
1089 | |||
1090 | /* Sanity check boundaries */ | ||
1091 | BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); | ||
1092 | BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); | ||
1093 | pgdat->first_deferred_pfn = ULONG_MAX; | ||
1094 | |||
1095 | /* Only the highest zone is deferred so find it */ | ||
1096 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | ||
1097 | zone = pgdat->node_zones + zid; | ||
1098 | if (first_init_pfn < zone_end_pfn(zone)) | ||
1099 | break; | ||
1100 | } | ||
1101 | |||
1102 | for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) { | ||
1103 | unsigned long pfn, end_pfn; | ||
1104 | struct page *page = NULL; | ||
1105 | struct page *free_base_page = NULL; | ||
1106 | unsigned long free_base_pfn = 0; | ||
1107 | int nr_to_free = 0; | ||
1108 | |||
1109 | end_pfn = min(walk_end, zone_end_pfn(zone)); | ||
1110 | pfn = first_init_pfn; | ||
1111 | if (pfn < walk_start) | ||
1112 | pfn = walk_start; | ||
1113 | if (pfn < zone->zone_start_pfn) | ||
1114 | pfn = zone->zone_start_pfn; | ||
1115 | |||
1116 | for (; pfn < end_pfn; pfn++) { | ||
1117 | if (!pfn_valid_within(pfn)) | ||
1118 | goto free_range; | ||
1119 | |||
1120 | /* | ||
1121 | * Ensure pfn_valid is checked every | ||
1122 | * MAX_ORDER_NR_PAGES for memory holes | ||
1123 | */ | ||
1124 | if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) { | ||
1125 | if (!pfn_valid(pfn)) { | ||
1126 | page = NULL; | ||
1127 | goto free_range; | ||
1128 | } | ||
1129 | } | ||
1130 | |||
1131 | if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) { | ||
1132 | page = NULL; | ||
1133 | goto free_range; | ||
1134 | } | ||
1135 | |||
1136 | /* Minimise pfn page lookups and scheduler checks */ | ||
1137 | if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) { | ||
1138 | page++; | ||
1139 | } else { | ||
1140 | nr_pages += nr_to_free; | ||
1141 | deferred_free_range(free_base_page, | ||
1142 | free_base_pfn, nr_to_free); | ||
1143 | free_base_page = NULL; | ||
1144 | free_base_pfn = nr_to_free = 0; | ||
1145 | |||
1146 | page = pfn_to_page(pfn); | ||
1147 | cond_resched(); | ||
1148 | } | ||
1149 | |||
1150 | if (page->flags) { | ||
1151 | VM_BUG_ON(page_zone(page) != zone); | ||
1152 | goto free_range; | ||
1153 | } | ||
1154 | |||
1155 | __init_single_page(page, pfn, zid, nid); | ||
1156 | if (!free_base_page) { | ||
1157 | free_base_page = page; | ||
1158 | free_base_pfn = pfn; | ||
1159 | nr_to_free = 0; | ||
1160 | } | ||
1161 | nr_to_free++; | ||
1162 | |||
1163 | /* Where possible, batch up pages for a single free */ | ||
1164 | continue; | ||
1165 | free_range: | ||
1166 | /* Free the current block of pages to allocator */ | ||
1167 | nr_pages += nr_to_free; | ||
1168 | deferred_free_range(free_base_page, free_base_pfn, | ||
1169 | nr_to_free); | ||
1170 | free_base_page = NULL; | ||
1171 | free_base_pfn = nr_to_free = 0; | ||
1172 | } | ||
1173 | |||
1174 | first_init_pfn = max(end_pfn, first_init_pfn); | ||
1175 | } | ||
1176 | |||
1177 | /* Sanity check that the next zone really is unpopulated */ | ||
1178 | WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); | ||
1179 | |||
1180 | pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, | ||
1181 | jiffies_to_msecs(jiffies - start)); | ||
1182 | up_read(&pgdat_init_rwsem); | ||
1183 | return 0; | ||
1184 | } | ||
1185 | |||
1186 | void __init page_alloc_init_late(void) | ||
1187 | { | ||
1188 | int nid; | ||
1189 | |||
1190 | for_each_node_state(nid, N_MEMORY) { | ||
1191 | down_read(&pgdat_init_rwsem); | ||
1192 | kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); | ||
1193 | } | ||
1194 | |||
1195 | /* Block until all are initialised */ | ||
1196 | down_write(&pgdat_init_rwsem); | ||
1197 | up_write(&pgdat_init_rwsem); | ||
1198 | } | ||
1199 | #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ | ||
1200 | |||
841 | #ifdef CONFIG_CMA | 1201 | #ifdef CONFIG_CMA |
842 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ | 1202 | /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ |
843 | void __init init_cma_reserved_pageblock(struct page *page) | 1203 | void __init init_cma_reserved_pageblock(struct page *page) |
@@ -4150,6 +4510,9 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
4150 | zone->nr_migrate_reserve_block = reserve; | 4510 | zone->nr_migrate_reserve_block = reserve; |
4151 | 4511 | ||
4152 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { | 4512 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
4513 | if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone))) | ||
4514 | return; | ||
4515 | |||
4153 | if (!pfn_valid(pfn)) | 4516 | if (!pfn_valid(pfn)) |
4154 | continue; | 4517 | continue; |
4155 | page = pfn_to_page(pfn); | 4518 | page = pfn_to_page(pfn); |
@@ -4212,15 +4575,16 @@ static void setup_zone_migrate_reserve(struct zone *zone) | |||
4212 | void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | 4575 | void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, |
4213 | unsigned long start_pfn, enum memmap_context context) | 4576 | unsigned long start_pfn, enum memmap_context context) |
4214 | { | 4577 | { |
4215 | struct page *page; | 4578 | pg_data_t *pgdat = NODE_DATA(nid); |
4216 | unsigned long end_pfn = start_pfn + size; | 4579 | unsigned long end_pfn = start_pfn + size; |
4217 | unsigned long pfn; | 4580 | unsigned long pfn; |
4218 | struct zone *z; | 4581 | struct zone *z; |
4582 | unsigned long nr_initialised = 0; | ||
4219 | 4583 | ||
4220 | if (highest_memmap_pfn < end_pfn - 1) | 4584 | if (highest_memmap_pfn < end_pfn - 1) |
4221 | highest_memmap_pfn = end_pfn - 1; | 4585 | highest_memmap_pfn = end_pfn - 1; |
4222 | 4586 | ||
4223 | z = &NODE_DATA(nid)->node_zones[zone]; | 4587 | z = &pgdat->node_zones[zone]; |
4224 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | 4588 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { |
4225 | /* | 4589 | /* |
4226 | * There can be holes in boot-time mem_map[]s | 4590 | * There can be holes in boot-time mem_map[]s |
@@ -4232,14 +4596,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4232 | continue; | 4596 | continue; |
4233 | if (!early_pfn_in_nid(pfn, nid)) | 4597 | if (!early_pfn_in_nid(pfn, nid)) |
4234 | continue; | 4598 | continue; |
4599 | if (!update_defer_init(pgdat, pfn, end_pfn, | ||
4600 | &nr_initialised)) | ||
4601 | break; | ||
4235 | } | 4602 | } |
4236 | page = pfn_to_page(pfn); | 4603 | |
4237 | set_page_links(page, zone, nid, pfn); | ||
4238 | mminit_verify_page_links(page, zone, nid, pfn); | ||
4239 | init_page_count(page); | ||
4240 | page_mapcount_reset(page); | ||
4241 | page_cpupid_reset_last(page); | ||
4242 | SetPageReserved(page); | ||
4243 | /* | 4604 | /* |
4244 | * Mark the block movable so that blocks are reserved for | 4605 | * Mark the block movable so that blocks are reserved for |
4245 | * movable at startup. This will force kernel allocations | 4606 | * movable at startup. This will force kernel allocations |
@@ -4254,17 +4615,14 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
4254 | * check here not to call set_pageblock_migratetype() against | 4615 | * check here not to call set_pageblock_migratetype() against |
4255 | * pfn out of zone. | 4616 | * pfn out of zone. |
4256 | */ | 4617 | */ |
4257 | if ((z->zone_start_pfn <= pfn) | 4618 | if (!(pfn & (pageblock_nr_pages - 1))) { |
4258 | && (pfn < zone_end_pfn(z)) | 4619 | struct page *page = pfn_to_page(pfn); |
4259 | && !(pfn & (pageblock_nr_pages - 1))) | ||
4260 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); | ||
4261 | 4620 | ||
4262 | INIT_LIST_HEAD(&page->lru); | 4621 | __init_single_page(page, pfn, zone, nid); |
4263 | #ifdef WANT_PAGE_VIRTUAL | 4622 | set_pageblock_migratetype(page, MIGRATE_MOVABLE); |
4264 | /* The shift won't overflow because ZONE_NORMAL is below 4G. */ | 4623 | } else { |
4265 | if (!is_highmem_idx(zone)) | 4624 | __init_single_pfn(pfn, zone, nid); |
4266 | set_page_address(page, __va(pfn << PAGE_SHIFT)); | 4625 | } |
4267 | #endif | ||
4268 | } | 4626 | } |
4269 | } | 4627 | } |
4270 | 4628 | ||
@@ -4522,57 +4880,30 @@ int __meminit init_currently_empty_zone(struct zone *zone, | |||
4522 | 4880 | ||
4523 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 4881 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4524 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID | 4882 | #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID |
4883 | |||
4525 | /* | 4884 | /* |
4526 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. | 4885 | * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. |
4527 | */ | 4886 | */ |
4528 | int __meminit __early_pfn_to_nid(unsigned long pfn) | 4887 | int __meminit __early_pfn_to_nid(unsigned long pfn, |
4888 | struct mminit_pfnnid_cache *state) | ||
4529 | { | 4889 | { |
4530 | unsigned long start_pfn, end_pfn; | 4890 | unsigned long start_pfn, end_pfn; |
4531 | int nid; | 4891 | int nid; |
4532 | /* | ||
4533 | * NOTE: The following SMP-unsafe globals are only used early in boot | ||
4534 | * when the kernel is running single-threaded. | ||
4535 | */ | ||
4536 | static unsigned long __meminitdata last_start_pfn, last_end_pfn; | ||
4537 | static int __meminitdata last_nid; | ||
4538 | 4892 | ||
4539 | if (last_start_pfn <= pfn && pfn < last_end_pfn) | 4893 | if (state->last_start <= pfn && pfn < state->last_end) |
4540 | return last_nid; | 4894 | return state->last_nid; |
4541 | 4895 | ||
4542 | nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); | 4896 | nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); |
4543 | if (nid != -1) { | 4897 | if (nid != -1) { |
4544 | last_start_pfn = start_pfn; | 4898 | state->last_start = start_pfn; |
4545 | last_end_pfn = end_pfn; | 4899 | state->last_end = end_pfn; |
4546 | last_nid = nid; | 4900 | state->last_nid = nid; |
4547 | } | 4901 | } |
4548 | 4902 | ||
4549 | return nid; | 4903 | return nid; |
4550 | } | 4904 | } |
4551 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | 4905 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ |
4552 | 4906 | ||
4553 | int __meminit early_pfn_to_nid(unsigned long pfn) | ||
4554 | { | ||
4555 | int nid; | ||
4556 | |||
4557 | nid = __early_pfn_to_nid(pfn); | ||
4558 | if (nid >= 0) | ||
4559 | return nid; | ||
4560 | /* just returns 0 */ | ||
4561 | return 0; | ||
4562 | } | ||
4563 | |||
4564 | #ifdef CONFIG_NODES_SPAN_OTHER_NODES | ||
4565 | bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | ||
4566 | { | ||
4567 | int nid; | ||
4568 | |||
4569 | nid = __early_pfn_to_nid(pfn); | ||
4570 | if (nid >= 0 && nid != node) | ||
4571 | return false; | ||
4572 | return true; | ||
4573 | } | ||
4574 | #endif | ||
4575 | |||
4576 | /** | 4907 | /** |
4577 | * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range | 4908 | * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range |
4578 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. | 4909 | * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. |
@@ -5090,6 +5421,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
5090 | /* pg_data_t should be reset to zero when it's allocated */ | 5421 | /* pg_data_t should be reset to zero when it's allocated */ |
5091 | WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); | 5422 | WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); |
5092 | 5423 | ||
5424 | reset_deferred_meminit(pgdat); | ||
5093 | pgdat->node_id = nid; | 5425 | pgdat->node_id = nid; |
5094 | pgdat->node_start_pfn = node_start_pfn; | 5426 | pgdat->node_start_pfn = node_start_pfn; |
5095 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 5427 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |