summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-07-01 20:47:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-07-01 20:47:51 -0400
commit2d01eedf1d14432f4db5388a49dc5596a8c5bd02 (patch)
tree646525acc0475b2899827c1bfbd25f05ec1b8092 /mm
parent6ac15baacb6ecd87c66209627753b96ded3b4515 (diff)
parentabdd4a7025282fbe3737e1bcb5f51afc8d8ea1b8 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge third patchbomb from Andrew Morton: - the rest of MM - scripts/gdb updates - ipc/ updates - lib/ updates - MAINTAINERS updates - various other misc things * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (67 commits) genalloc: rename of_get_named_gen_pool() to of_gen_pool_get() genalloc: rename dev_get_gen_pool() to gen_pool_get() x86: opt into HAVE_COPY_THREAD_TLS, for both 32-bit and 64-bit MAINTAINERS: add zpool MAINTAINERS: BCACHE: Kent Overstreet has changed email address MAINTAINERS: move Jens Osterkamp to CREDITS MAINTAINERS: remove unused nbd.h pattern MAINTAINERS: update brcm gpio filename pattern MAINTAINERS: update brcm dts pattern MAINTAINERS: update sound soc intel patterns MAINTAINERS: remove website for paride MAINTAINERS: update Emulex ocrdma email addresses bcache: use kvfree() in various places libcxgbi: use kvfree() in cxgbi_free_big_mem() target: use kvfree() in session alloc and free IB/ehca: use kvfree() in ipz_queue_{cd}tor() drm/nouveau/gem: use kvfree() in u_free() drm: use kvfree() in drm_free_large() cxgb4: use kvfree() in t4_free_mem() cxgb3: use kvfree() in cxgb_free_mem() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig18
-rw-r--r--mm/bootmem.c13
-rw-r--r--mm/internal.h11
-rw-r--r--mm/memblock.c34
-rw-r--r--mm/mm_init.c9
-rw-r--r--mm/nobootmem.c7
-rw-r--r--mm/page_alloc.c442
7 files changed, 453 insertions, 81 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c180af880ed5..e79de2bd12cd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -636,3 +636,21 @@ config MAX_STACK_SIZE_MB
636 changed to a smaller value in which case that is used. 636 changed to a smaller value in which case that is used.
637 637
638 A sane initial value is 80 MB. 638 A sane initial value is 80 MB.
639
640# For architectures that support deferred memory initialisation
641config ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
642 bool
643
644config DEFERRED_STRUCT_PAGE_INIT
645 bool "Defer initialisation of struct pages to kswapd"
646 default n
647 depends on ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
648 depends on MEMORY_HOTPLUG
649 help
650 Ordinarily all struct pages are initialised during early boot in a
651 single thread. On very large machines this can take a considerable
652 amount of time. If this option is set, large machines will bring up
653 a subset of memmap at boot and then initialise the rest in parallel
654 when kswapd starts. This has a potential performance impact on
655 processes running early in the lifetime of the systemm until kswapd
656 finishes the initialisation.
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 477be696511d..a23dd1934654 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -164,7 +164,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
164 end = PFN_DOWN(physaddr + size); 164 end = PFN_DOWN(physaddr + size);
165 165
166 for (; cursor < end; cursor++) { 166 for (; cursor < end; cursor++) {
167 __free_pages_bootmem(pfn_to_page(cursor), 0); 167 __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
168 totalram_pages++; 168 totalram_pages++;
169 } 169 }
170} 170}
@@ -172,7 +172,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
172static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 172static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
173{ 173{
174 struct page *page; 174 struct page *page;
175 unsigned long *map, start, end, pages, count = 0; 175 unsigned long *map, start, end, pages, cur, count = 0;
176 176
177 if (!bdata->node_bootmem_map) 177 if (!bdata->node_bootmem_map)
178 return 0; 178 return 0;
@@ -210,17 +210,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
210 if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { 210 if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
211 int order = ilog2(BITS_PER_LONG); 211 int order = ilog2(BITS_PER_LONG);
212 212
213 __free_pages_bootmem(pfn_to_page(start), order); 213 __free_pages_bootmem(pfn_to_page(start), start, order);
214 count += BITS_PER_LONG; 214 count += BITS_PER_LONG;
215 start += BITS_PER_LONG; 215 start += BITS_PER_LONG;
216 } else { 216 } else {
217 unsigned long cur = start; 217 cur = start;
218 218
219 start = ALIGN(start + 1, BITS_PER_LONG); 219 start = ALIGN(start + 1, BITS_PER_LONG);
220 while (vec && cur != start) { 220 while (vec && cur != start) {
221 if (vec & 1) { 221 if (vec & 1) {
222 page = pfn_to_page(cur); 222 page = pfn_to_page(cur);
223 __free_pages_bootmem(page, 0); 223 __free_pages_bootmem(page, cur, 0);
224 count++; 224 count++;
225 } 225 }
226 vec >>= 1; 226 vec >>= 1;
@@ -229,12 +229,13 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
229 } 229 }
230 } 230 }
231 231
232 cur = bdata->node_min_pfn;
232 page = virt_to_page(bdata->node_bootmem_map); 233 page = virt_to_page(bdata->node_bootmem_map);
233 pages = bdata->node_low_pfn - bdata->node_min_pfn; 234 pages = bdata->node_low_pfn - bdata->node_min_pfn;
234 pages = bootmem_bootmap_pages(pages); 235 pages = bootmem_bootmap_pages(pages);
235 count += pages; 236 count += pages;
236 while (pages--) 237 while (pages--)
237 __free_pages_bootmem(page++, 0); 238 __free_pages_bootmem(page++, cur++, 0);
238 239
239 bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); 240 bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
240 241
diff --git a/mm/internal.h b/mm/internal.h
index a25e359a4039..36b23f1e2ca6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -155,7 +155,8 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
155} 155}
156 156
157extern int __isolate_free_page(struct page *page, unsigned int order); 157extern int __isolate_free_page(struct page *page, unsigned int order);
158extern void __free_pages_bootmem(struct page *page, unsigned int order); 158extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
159 unsigned int order);
159extern void prep_compound_page(struct page *page, unsigned long order); 160extern void prep_compound_page(struct page *page, unsigned long order);
160#ifdef CONFIG_MEMORY_FAILURE 161#ifdef CONFIG_MEMORY_FAILURE
161extern bool is_free_buddy_page(struct page *page); 162extern bool is_free_buddy_page(struct page *page);
@@ -361,10 +362,7 @@ do { \
361} while (0) 362} while (0)
362 363
363extern void mminit_verify_pageflags_layout(void); 364extern void mminit_verify_pageflags_layout(void);
364extern void mminit_verify_page_links(struct page *page,
365 enum zone_type zone, unsigned long nid, unsigned long pfn);
366extern void mminit_verify_zonelist(void); 365extern void mminit_verify_zonelist(void);
367
368#else 366#else
369 367
370static inline void mminit_dprintk(enum mminit_level level, 368static inline void mminit_dprintk(enum mminit_level level,
@@ -376,11 +374,6 @@ static inline void mminit_verify_pageflags_layout(void)
376{ 374{
377} 375}
378 376
379static inline void mminit_verify_page_links(struct page *page,
380 enum zone_type zone, unsigned long nid, unsigned long pfn)
381{
382}
383
384static inline void mminit_verify_zonelist(void) 377static inline void mminit_verify_zonelist(void)
385{ 378{
386} 379}
diff --git a/mm/memblock.c b/mm/memblock.c
index 1b444c730846..87108e77e476 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -820,6 +820,38 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
820 820
821 821
822/** 822/**
823 * __next_reserved_mem_region - next function for for_each_reserved_region()
824 * @idx: pointer to u64 loop variable
825 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
826 * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL
827 *
828 * Iterate over all reserved memory regions.
829 */
830void __init_memblock __next_reserved_mem_region(u64 *idx,
831 phys_addr_t *out_start,
832 phys_addr_t *out_end)
833{
834 struct memblock_type *rsv = &memblock.reserved;
835
836 if (*idx >= 0 && *idx < rsv->cnt) {
837 struct memblock_region *r = &rsv->regions[*idx];
838 phys_addr_t base = r->base;
839 phys_addr_t size = r->size;
840
841 if (out_start)
842 *out_start = base;
843 if (out_end)
844 *out_end = base + size - 1;
845
846 *idx += 1;
847 return;
848 }
849
850 /* signal end of iteration */
851 *idx = ULLONG_MAX;
852}
853
854/**
823 * __next__mem_range - next function for for_each_free_mem_range() etc. 855 * __next__mem_range - next function for for_each_free_mem_range() etc.
824 * @idx: pointer to u64 loop variable 856 * @idx: pointer to u64 loop variable
825 * @nid: node selector, %NUMA_NO_NODE for all nodes 857 * @nid: node selector, %NUMA_NO_NODE for all nodes
@@ -1387,7 +1419,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
1387 end = PFN_DOWN(base + size); 1419 end = PFN_DOWN(base + size);
1388 1420
1389 for (; cursor < end; cursor++) { 1421 for (; cursor < end; cursor++) {
1390 __free_pages_bootmem(pfn_to_page(cursor), 0); 1422 __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
1391 totalram_pages++; 1423 totalram_pages++;
1392 } 1424 }
1393} 1425}
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5f420f7fafa1..fdadf918de76 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -11,6 +11,7 @@
11#include <linux/export.h> 11#include <linux/export.h>
12#include <linux/memory.h> 12#include <linux/memory.h>
13#include <linux/notifier.h> 13#include <linux/notifier.h>
14#include <linux/sched.h>
14#include "internal.h" 15#include "internal.h"
15 16
16#ifdef CONFIG_DEBUG_MEMORY_INIT 17#ifdef CONFIG_DEBUG_MEMORY_INIT
@@ -130,14 +131,6 @@ void __init mminit_verify_pageflags_layout(void)
130 BUG_ON(or_mask != add_mask); 131 BUG_ON(or_mask != add_mask);
131} 132}
132 133
133void __meminit mminit_verify_page_links(struct page *page, enum zone_type zone,
134 unsigned long nid, unsigned long pfn)
135{
136 BUG_ON(page_to_nid(page) != nid);
137 BUG_ON(page_zonenum(page) != zone);
138 BUG_ON(page_to_pfn(page) != pfn);
139}
140
141static __init int set_mminit_loglevel(char *str) 134static __init int set_mminit_loglevel(char *str)
142{ 135{
143 get_option(&str, &mminit_loglevel); 136 get_option(&str, &mminit_loglevel);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 5258386fa1be..e57cf24babd6 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -86,7 +86,7 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
86 end = PFN_DOWN(addr + size); 86 end = PFN_DOWN(addr + size);
87 87
88 for (; cursor < end; cursor++) { 88 for (; cursor < end; cursor++) {
89 __free_pages_bootmem(pfn_to_page(cursor), 0); 89 __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
90 totalram_pages++; 90 totalram_pages++;
91 } 91 }
92} 92}
@@ -101,7 +101,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
101 while (start + (1UL << order) > end) 101 while (start + (1UL << order) > end)
102 order--; 102 order--;
103 103
104 __free_pages_bootmem(pfn_to_page(start), order); 104 __free_pages_bootmem(pfn_to_page(start), start, order);
105 105
106 start += (1UL << order); 106 start += (1UL << order);
107 } 107 }
@@ -130,6 +130,9 @@ static unsigned long __init free_low_memory_core_early(void)
130 130
131 memblock_clear_hotplug(0, -1); 131 memblock_clear_hotplug(0, -1);
132 132
133 for_each_reserved_mem_region(i, &start, &end)
134 reserve_bootmem_region(start, end);
135
133 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, 136 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
134 NULL) 137 NULL)
135 count += __free_memory_core(start, end); 138 count += __free_memory_core(start, end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5e6fa06f2784..506eac8b38af 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,6 +18,7 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <linux/swap.h> 19#include <linux/swap.h>
20#include <linux/interrupt.h> 20#include <linux/interrupt.h>
21#include <linux/rwsem.h>
21#include <linux/pagemap.h> 22#include <linux/pagemap.h>
22#include <linux/jiffies.h> 23#include <linux/jiffies.h>
23#include <linux/bootmem.h> 24#include <linux/bootmem.h>
@@ -61,6 +62,7 @@
61#include <linux/hugetlb.h> 62#include <linux/hugetlb.h>
62#include <linux/sched/rt.h> 63#include <linux/sched/rt.h>
63#include <linux/page_owner.h> 64#include <linux/page_owner.h>
65#include <linux/kthread.h>
64 66
65#include <asm/sections.h> 67#include <asm/sections.h>
66#include <asm/tlbflush.h> 68#include <asm/tlbflush.h>
@@ -235,6 +237,77 @@ EXPORT_SYMBOL(nr_online_nodes);
235 237
236int page_group_by_mobility_disabled __read_mostly; 238int page_group_by_mobility_disabled __read_mostly;
237 239
240#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
241static inline void reset_deferred_meminit(pg_data_t *pgdat)
242{
243 pgdat->first_deferred_pfn = ULONG_MAX;
244}
245
246/* Returns true if the struct page for the pfn is uninitialised */
247static inline bool __meminit early_page_uninitialised(unsigned long pfn)
248{
249 int nid = early_pfn_to_nid(pfn);
250
251 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
252 return true;
253
254 return false;
255}
256
257static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
258{
259 if (pfn >= NODE_DATA(nid)->first_deferred_pfn)
260 return true;
261
262 return false;
263}
264
265/*
266 * Returns false when the remaining initialisation should be deferred until
267 * later in the boot cycle when it can be parallelised.
268 */
269static inline bool update_defer_init(pg_data_t *pgdat,
270 unsigned long pfn, unsigned long zone_end,
271 unsigned long *nr_initialised)
272{
273 /* Always populate low zones for address-contrained allocations */
274 if (zone_end < pgdat_end_pfn(pgdat))
275 return true;
276
277 /* Initialise at least 2G of the highest zone */
278 (*nr_initialised)++;
279 if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
280 (pfn & (PAGES_PER_SECTION - 1)) == 0) {
281 pgdat->first_deferred_pfn = pfn;
282 return false;
283 }
284
285 return true;
286}
287#else
288static inline void reset_deferred_meminit(pg_data_t *pgdat)
289{
290}
291
292static inline bool early_page_uninitialised(unsigned long pfn)
293{
294 return false;
295}
296
297static inline bool early_page_nid_uninitialised(unsigned long pfn, int nid)
298{
299 return false;
300}
301
302static inline bool update_defer_init(pg_data_t *pgdat,
303 unsigned long pfn, unsigned long zone_end,
304 unsigned long *nr_initialised)
305{
306 return true;
307}
308#endif
309
310
238void set_pageblock_migratetype(struct page *page, int migratetype) 311void set_pageblock_migratetype(struct page *page, int migratetype)
239{ 312{
240 if (unlikely(page_group_by_mobility_disabled && 313 if (unlikely(page_group_by_mobility_disabled &&
@@ -764,6 +837,75 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
764 return 0; 837 return 0;
765} 838}
766 839
840static void __meminit __init_single_page(struct page *page, unsigned long pfn,
841 unsigned long zone, int nid)
842{
843 set_page_links(page, zone, nid, pfn);
844 init_page_count(page);
845 page_mapcount_reset(page);
846 page_cpupid_reset_last(page);
847
848 INIT_LIST_HEAD(&page->lru);
849#ifdef WANT_PAGE_VIRTUAL
850 /* The shift won't overflow because ZONE_NORMAL is below 4G. */
851 if (!is_highmem_idx(zone))
852 set_page_address(page, __va(pfn << PAGE_SHIFT));
853#endif
854}
855
856static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
857 int nid)
858{
859 return __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
860}
861
862#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
863static void init_reserved_page(unsigned long pfn)
864{
865 pg_data_t *pgdat;
866 int nid, zid;
867
868 if (!early_page_uninitialised(pfn))
869 return;
870
871 nid = early_pfn_to_nid(pfn);
872 pgdat = NODE_DATA(nid);
873
874 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
875 struct zone *zone = &pgdat->node_zones[zid];
876
877 if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
878 break;
879 }
880 __init_single_pfn(pfn, zid, nid);
881}
882#else
883static inline void init_reserved_page(unsigned long pfn)
884{
885}
886#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
887
888/*
889 * Initialised pages do not have PageReserved set. This function is
890 * called for each range allocated by the bootmem allocator and
891 * marks the pages PageReserved. The remaining valid pages are later
892 * sent to the buddy page allocator.
893 */
894void __meminit reserve_bootmem_region(unsigned long start, unsigned long end)
895{
896 unsigned long start_pfn = PFN_DOWN(start);
897 unsigned long end_pfn = PFN_UP(end);
898
899 for (; start_pfn < end_pfn; start_pfn++) {
900 if (pfn_valid(start_pfn)) {
901 struct page *page = pfn_to_page(start_pfn);
902
903 init_reserved_page(start_pfn);
904 SetPageReserved(page);
905 }
906 }
907}
908
767static bool free_pages_prepare(struct page *page, unsigned int order) 909static bool free_pages_prepare(struct page *page, unsigned int order)
768{ 910{
769 bool compound = PageCompound(page); 911 bool compound = PageCompound(page);
@@ -818,7 +960,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
818 local_irq_restore(flags); 960 local_irq_restore(flags);
819} 961}
820 962
821void __init __free_pages_bootmem(struct page *page, unsigned int order) 963static void __init __free_pages_boot_core(struct page *page,
964 unsigned long pfn, unsigned int order)
822{ 965{
823 unsigned int nr_pages = 1 << order; 966 unsigned int nr_pages = 1 << order;
824 struct page *p = page; 967 struct page *p = page;
@@ -838,6 +981,223 @@ void __init __free_pages_bootmem(struct page *page, unsigned int order)
838 __free_pages(page, order); 981 __free_pages(page, order);
839} 982}
840 983
984#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
985 defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
986/* Only safe to use early in boot when initialisation is single-threaded */
987static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
988
989int __meminit early_pfn_to_nid(unsigned long pfn)
990{
991 int nid;
992
993 /* The system will behave unpredictably otherwise */
994 BUG_ON(system_state != SYSTEM_BOOTING);
995
996 nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
997 if (nid >= 0)
998 return nid;
999 /* just returns 0 */
1000 return 0;
1001}
1002#endif
1003
1004#ifdef CONFIG_NODES_SPAN_OTHER_NODES
1005static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
1006 struct mminit_pfnnid_cache *state)
1007{
1008 int nid;
1009
1010 nid = __early_pfn_to_nid(pfn, state);
1011 if (nid >= 0 && nid != node)
1012 return false;
1013 return true;
1014}
1015
1016/* Only safe to use early in boot when initialisation is single-threaded */
1017static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
1018{
1019 return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache);
1020}
1021
1022#else
1023
1024static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
1025{
1026 return true;
1027}
1028static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
1029 struct mminit_pfnnid_cache *state)
1030{
1031 return true;
1032}
1033#endif
1034
1035
1036void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
1037 unsigned int order)
1038{
1039 if (early_page_uninitialised(pfn))
1040 return;
1041 return __free_pages_boot_core(page, pfn, order);
1042}
1043
1044#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
1045static void __init deferred_free_range(struct page *page,
1046 unsigned long pfn, int nr_pages)
1047{
1048 int i;
1049
1050 if (!page)
1051 return;
1052
1053 /* Free a large naturally-aligned chunk if possible */
1054 if (nr_pages == MAX_ORDER_NR_PAGES &&
1055 (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
1056 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
1057 __free_pages_boot_core(page, pfn, MAX_ORDER-1);
1058 return;
1059 }
1060
1061 for (i = 0; i < nr_pages; i++, page++, pfn++)
1062 __free_pages_boot_core(page, pfn, 0);
1063}
1064
1065static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
1066
1067/* Initialise remaining memory on a node */
1068static int __init deferred_init_memmap(void *data)
1069{
1070 pg_data_t *pgdat = data;
1071 int nid = pgdat->node_id;
1072 struct mminit_pfnnid_cache nid_init_state = { };
1073 unsigned long start = jiffies;
1074 unsigned long nr_pages = 0;
1075 unsigned long walk_start, walk_end;
1076 int i, zid;
1077 struct zone *zone;
1078 unsigned long first_init_pfn = pgdat->first_deferred_pfn;
1079 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
1080
1081 if (first_init_pfn == ULONG_MAX) {
1082 up_read(&pgdat_init_rwsem);
1083 return 0;
1084 }
1085
1086 /* Bind memory initialisation thread to a local node if possible */
1087 if (!cpumask_empty(cpumask))
1088 set_cpus_allowed_ptr(current, cpumask);
1089
1090 /* Sanity check boundaries */
1091 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
1092 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
1093 pgdat->first_deferred_pfn = ULONG_MAX;
1094
1095 /* Only the highest zone is deferred so find it */
1096 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
1097 zone = pgdat->node_zones + zid;
1098 if (first_init_pfn < zone_end_pfn(zone))
1099 break;
1100 }
1101
1102 for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
1103 unsigned long pfn, end_pfn;
1104 struct page *page = NULL;
1105 struct page *free_base_page = NULL;
1106 unsigned long free_base_pfn = 0;
1107 int nr_to_free = 0;
1108
1109 end_pfn = min(walk_end, zone_end_pfn(zone));
1110 pfn = first_init_pfn;
1111 if (pfn < walk_start)
1112 pfn = walk_start;
1113 if (pfn < zone->zone_start_pfn)
1114 pfn = zone->zone_start_pfn;
1115
1116 for (; pfn < end_pfn; pfn++) {
1117 if (!pfn_valid_within(pfn))
1118 goto free_range;
1119
1120 /*
1121 * Ensure pfn_valid is checked every
1122 * MAX_ORDER_NR_PAGES for memory holes
1123 */
1124 if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
1125 if (!pfn_valid(pfn)) {
1126 page = NULL;
1127 goto free_range;
1128 }
1129 }
1130
1131 if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
1132 page = NULL;
1133 goto free_range;
1134 }
1135
1136 /* Minimise pfn page lookups and scheduler checks */
1137 if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) {
1138 page++;
1139 } else {
1140 nr_pages += nr_to_free;
1141 deferred_free_range(free_base_page,
1142 free_base_pfn, nr_to_free);
1143 free_base_page = NULL;
1144 free_base_pfn = nr_to_free = 0;
1145
1146 page = pfn_to_page(pfn);
1147 cond_resched();
1148 }
1149
1150 if (page->flags) {
1151 VM_BUG_ON(page_zone(page) != zone);
1152 goto free_range;
1153 }
1154
1155 __init_single_page(page, pfn, zid, nid);
1156 if (!free_base_page) {
1157 free_base_page = page;
1158 free_base_pfn = pfn;
1159 nr_to_free = 0;
1160 }
1161 nr_to_free++;
1162
1163 /* Where possible, batch up pages for a single free */
1164 continue;
1165free_range:
1166 /* Free the current block of pages to allocator */
1167 nr_pages += nr_to_free;
1168 deferred_free_range(free_base_page, free_base_pfn,
1169 nr_to_free);
1170 free_base_page = NULL;
1171 free_base_pfn = nr_to_free = 0;
1172 }
1173
1174 first_init_pfn = max(end_pfn, first_init_pfn);
1175 }
1176
1177 /* Sanity check that the next zone really is unpopulated */
1178 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
1179
1180 pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
1181 jiffies_to_msecs(jiffies - start));
1182 up_read(&pgdat_init_rwsem);
1183 return 0;
1184}
1185
1186void __init page_alloc_init_late(void)
1187{
1188 int nid;
1189
1190 for_each_node_state(nid, N_MEMORY) {
1191 down_read(&pgdat_init_rwsem);
1192 kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
1193 }
1194
1195 /* Block until all are initialised */
1196 down_write(&pgdat_init_rwsem);
1197 up_write(&pgdat_init_rwsem);
1198}
1199#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
1200
841#ifdef CONFIG_CMA 1201#ifdef CONFIG_CMA
842/* Free whole pageblock and set its migration type to MIGRATE_CMA. */ 1202/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
843void __init init_cma_reserved_pageblock(struct page *page) 1203void __init init_cma_reserved_pageblock(struct page *page)
@@ -4150,6 +4510,9 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4150 zone->nr_migrate_reserve_block = reserve; 4510 zone->nr_migrate_reserve_block = reserve;
4151 4511
4152 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { 4512 for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
4513 if (!early_page_nid_uninitialised(pfn, zone_to_nid(zone)))
4514 return;
4515
4153 if (!pfn_valid(pfn)) 4516 if (!pfn_valid(pfn))
4154 continue; 4517 continue;
4155 page = pfn_to_page(pfn); 4518 page = pfn_to_page(pfn);
@@ -4212,15 +4575,16 @@ static void setup_zone_migrate_reserve(struct zone *zone)
4212void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, 4575void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4213 unsigned long start_pfn, enum memmap_context context) 4576 unsigned long start_pfn, enum memmap_context context)
4214{ 4577{
4215 struct page *page; 4578 pg_data_t *pgdat = NODE_DATA(nid);
4216 unsigned long end_pfn = start_pfn + size; 4579 unsigned long end_pfn = start_pfn + size;
4217 unsigned long pfn; 4580 unsigned long pfn;
4218 struct zone *z; 4581 struct zone *z;
4582 unsigned long nr_initialised = 0;
4219 4583
4220 if (highest_memmap_pfn < end_pfn - 1) 4584 if (highest_memmap_pfn < end_pfn - 1)
4221 highest_memmap_pfn = end_pfn - 1; 4585 highest_memmap_pfn = end_pfn - 1;
4222 4586
4223 z = &NODE_DATA(nid)->node_zones[zone]; 4587 z = &pgdat->node_zones[zone];
4224 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 4588 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
4225 /* 4589 /*
4226 * There can be holes in boot-time mem_map[]s 4590 * There can be holes in boot-time mem_map[]s
@@ -4232,14 +4596,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4232 continue; 4596 continue;
4233 if (!early_pfn_in_nid(pfn, nid)) 4597 if (!early_pfn_in_nid(pfn, nid))
4234 continue; 4598 continue;
4599 if (!update_defer_init(pgdat, pfn, end_pfn,
4600 &nr_initialised))
4601 break;
4235 } 4602 }
4236 page = pfn_to_page(pfn); 4603
4237 set_page_links(page, zone, nid, pfn);
4238 mminit_verify_page_links(page, zone, nid, pfn);
4239 init_page_count(page);
4240 page_mapcount_reset(page);
4241 page_cpupid_reset_last(page);
4242 SetPageReserved(page);
4243 /* 4604 /*
4244 * Mark the block movable so that blocks are reserved for 4605 * Mark the block movable so that blocks are reserved for
4245 * movable at startup. This will force kernel allocations 4606 * movable at startup. This will force kernel allocations
@@ -4254,17 +4615,14 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
4254 * check here not to call set_pageblock_migratetype() against 4615 * check here not to call set_pageblock_migratetype() against
4255 * pfn out of zone. 4616 * pfn out of zone.
4256 */ 4617 */
4257 if ((z->zone_start_pfn <= pfn) 4618 if (!(pfn & (pageblock_nr_pages - 1))) {
4258 && (pfn < zone_end_pfn(z)) 4619 struct page *page = pfn_to_page(pfn);
4259 && !(pfn & (pageblock_nr_pages - 1)))
4260 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
4261 4620
4262 INIT_LIST_HEAD(&page->lru); 4621 __init_single_page(page, pfn, zone, nid);
4263#ifdef WANT_PAGE_VIRTUAL 4622 set_pageblock_migratetype(page, MIGRATE_MOVABLE);
4264 /* The shift won't overflow because ZONE_NORMAL is below 4G. */ 4623 } else {
4265 if (!is_highmem_idx(zone)) 4624 __init_single_pfn(pfn, zone, nid);
4266 set_page_address(page, __va(pfn << PAGE_SHIFT)); 4625 }
4267#endif
4268 } 4626 }
4269} 4627}
4270 4628
@@ -4522,57 +4880,30 @@ int __meminit init_currently_empty_zone(struct zone *zone,
4522 4880
4523#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 4881#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4524#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID 4882#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
4883
4525/* 4884/*
4526 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on. 4885 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
4527 */ 4886 */
4528int __meminit __early_pfn_to_nid(unsigned long pfn) 4887int __meminit __early_pfn_to_nid(unsigned long pfn,
4888 struct mminit_pfnnid_cache *state)
4529{ 4889{
4530 unsigned long start_pfn, end_pfn; 4890 unsigned long start_pfn, end_pfn;
4531 int nid; 4891 int nid;
4532 /*
4533 * NOTE: The following SMP-unsafe globals are only used early in boot
4534 * when the kernel is running single-threaded.
4535 */
4536 static unsigned long __meminitdata last_start_pfn, last_end_pfn;
4537 static int __meminitdata last_nid;
4538 4892
4539 if (last_start_pfn <= pfn && pfn < last_end_pfn) 4893 if (state->last_start <= pfn && pfn < state->last_end)
4540 return last_nid; 4894 return state->last_nid;
4541 4895
4542 nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); 4896 nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
4543 if (nid != -1) { 4897 if (nid != -1) {
4544 last_start_pfn = start_pfn; 4898 state->last_start = start_pfn;
4545 last_end_pfn = end_pfn; 4899 state->last_end = end_pfn;
4546 last_nid = nid; 4900 state->last_nid = nid;
4547 } 4901 }
4548 4902
4549 return nid; 4903 return nid;
4550} 4904}
4551#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ 4905#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
4552 4906
4553int __meminit early_pfn_to_nid(unsigned long pfn)
4554{
4555 int nid;
4556
4557 nid = __early_pfn_to_nid(pfn);
4558 if (nid >= 0)
4559 return nid;
4560 /* just returns 0 */
4561 return 0;
4562}
4563
4564#ifdef CONFIG_NODES_SPAN_OTHER_NODES
4565bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
4566{
4567 int nid;
4568
4569 nid = __early_pfn_to_nid(pfn);
4570 if (nid >= 0 && nid != node)
4571 return false;
4572 return true;
4573}
4574#endif
4575
4576/** 4907/**
4577 * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range 4908 * free_bootmem_with_active_regions - Call memblock_free_early_nid for each active range
4578 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed. 4909 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -5090,6 +5421,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
5090 /* pg_data_t should be reset to zero when it's allocated */ 5421 /* pg_data_t should be reset to zero when it's allocated */
5091 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx); 5422 WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
5092 5423
5424 reset_deferred_meminit(pgdat);
5093 pgdat->node_id = nid; 5425 pgdat->node_id = nid;
5094 pgdat->node_start_pfn = node_start_pfn; 5426 pgdat->node_start_pfn = node_start_pfn;
5095#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 5427#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP