aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@kernel.org>2010-02-10 04:20:20 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-12 12:41:59 -0500
commit08677214e318297f228237be0042aac754f48f1d (patch)
tree6d03424f7e287fcf66136b44512328afb1aeee49
parentc252a5bb1f57afb1e336d68085217727ca7b2134 (diff)
x86: Make 64 bit use early_res instead of bootmem before slab
Finally we can use early_res to replace bootmem for x86_64 now. Still can use CONFIG_NO_BOOTMEM to enable it or not. -v2: fix 32bit compiling about MAX_DMA32_PFN -v3: folded bug fix from LKML message below Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4B747239.4070907@kernel.org> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/e820.h6
-rw-r--r--arch/x86/kernel/e820.c159
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/numa_64.c20
-rw-r--r--include/linux/bootmem.h7
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/mmzone.h2
-rw-r--r--mm/bootmem.c195
-rw-r--r--mm/page_alloc.c59
-rw-r--r--mm/percpu.c3
-rw-r--r--mm/sparse-vmemmap.c2
13 files changed, 454 insertions, 23 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb4092568f9e..95439843cebc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -568,6 +568,19 @@ config PARAVIRT_DEBUG
568 Enable to debug paravirt_ops internals. Specifically, BUG if 568 Enable to debug paravirt_ops internals. Specifically, BUG if
569 a paravirt_op is missing when it is called. 569 a paravirt_op is missing when it is called.
570 570
571config NO_BOOTMEM
572 default y
573 bool "Disable Bootmem code"
574 depends on X86_64
575 ---help---
576 Use early_res directly instead of bootmem before slab is ready.
577 - allocator (buddy) [generic]
578 - early allocator (bootmem) [generic]
579 - very early allocator (reserve_early*()) [x86]
580 - very very early allocator (early brk model) [x86]
581 So reduce one layer between early allocator to final allocator
582
583
571config MEMTEST 584config MEMTEST
572 bool "Memtest" 585 bool "Memtest"
573 ---help--- 586 ---help---
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..7d72e5fb7008 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end);
117extern void early_res_to_bootmem(u64 start, u64 end); 117extern void early_res_to_bootmem(u64 start, u64 end);
118extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 118extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
119 119
120void reserve_early_without_check(u64 start, u64 end, char *name);
121u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
122 u64 size, u64 align);
123#include <linux/range.h>
124int get_free_all_memory_range(struct range **rangep, int nodeid);
125
120extern unsigned long e820_end_of_ram_pfn(void); 126extern unsigned long e820_end_of_ram_pfn(void);
121extern unsigned long e820_end_of_low_ram_pfn(void); 127extern unsigned long e820_end_of_low_ram_pfn(void);
122extern int e820_find_active_region(const struct e820entry *ei, 128extern int e820_find_active_region(const struct e820entry *ei,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e09c18c8f3c1..90a85295f332 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name)
977 __reserve_early(start, end, name, 0); 977 __reserve_early(start, end, name, 0);
978} 978}
979 979
980void __init reserve_early_without_check(u64 start, u64 end, char *name)
981{
982 struct early_res *r;
983
984 if (start >= end)
985 return;
986
987 __check_and_double_early_res(end);
988
989 r = &early_res[early_res_count];
990
991 r->start = start;
992 r->end = end;
993 r->overlap_ok = 0;
994 if (name)
995 strncpy(r->name, name, sizeof(r->name) - 1);
996 early_res_count++;
997}
998
980void __init free_early(u64 start, u64 end) 999void __init free_early(u64 start, u64 end)
981{ 1000{
982 struct early_res *r; 1001 struct early_res *r;
@@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end)
991 drop_range(i); 1010 drop_range(i);
992} 1011}
993 1012
1013#ifdef CONFIG_NO_BOOTMEM
1014static void __init subtract_early_res(struct range *range, int az)
1015{
1016 int i, count;
1017 u64 final_start, final_end;
1018 int idx = 0;
1019
1020 count = 0;
1021 for (i = 0; i < max_early_res && early_res[i].end; i++)
1022 count++;
1023
1024 /* need to skip first one ?*/
1025 if (early_res != early_res_x)
1026 idx = 1;
1027
1028#if 1
1029 printk(KERN_INFO "Subtract (%d early reservations)\n", count);
1030#endif
1031 for (i = idx; i < count; i++) {
1032 struct early_res *r = &early_res[i];
1033#if 0
1034 printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i,
1035 r->start, r->end, r->name);
1036#endif
1037 final_start = PFN_DOWN(r->start);
1038 final_end = PFN_UP(r->end);
1039 if (final_start >= final_end) {
1040#if 0
1041 printk(KERN_CONT "\n");
1042#endif
1043 continue;
1044 }
1045#if 0
1046 printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
1047 final_start, final_end);
1048#endif
1049 subtract_range(range, az, final_start, final_end);
1050 }
1051
1052}
1053
1054int __init get_free_all_memory_range(struct range **rangep, int nodeid)
1055{
1056 int i, count;
1057 u64 start = 0, end;
1058 u64 size;
1059 u64 mem;
1060 struct range *range;
1061 int nr_range;
1062
1063 count = 0;
1064 for (i = 0; i < max_early_res && early_res[i].end; i++)
1065 count++;
1066
1067 count *= 2;
1068
1069 size = sizeof(struct range) * count;
1070#ifdef MAX_DMA32_PFN
1071 if (max_pfn_mapped > MAX_DMA32_PFN)
1072 start = MAX_DMA32_PFN << PAGE_SHIFT;
1073#endif
1074 end = max_pfn_mapped << PAGE_SHIFT;
1075 mem = find_e820_area(start, end, size, sizeof(struct range));
1076 if (mem == -1ULL)
1077 panic("can not find more space for range free");
1078
1079 range = __va(mem);
1080 /* use early_node_map[] and early_res to get range array at first */
1081 memset(range, 0, size);
1082 nr_range = 0;
1083
1084 /* need to go over early_node_map to find out good range for node */
1085 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
1086 subtract_early_res(range, count);
1087 nr_range = clean_sort_range(range, count);
1088
1089 /* need to clear it ? */
1090 if (nodeid == MAX_NUMNODES) {
1091 memset(&early_res[0], 0,
1092 sizeof(struct early_res) * max_early_res);
1093 early_res = NULL;
1094 max_early_res = 0;
1095 }
1096
1097 *rangep = range;
1098 return nr_range;
1099}
1100#else
994void __init early_res_to_bootmem(u64 start, u64 end) 1101void __init early_res_to_bootmem(u64 start, u64 end)
995{ 1102{
996 int i, count; 1103 int i, count;
@@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
1028 max_early_res = 0; 1135 max_early_res = 0;
1029 early_res_count = 0; 1136 early_res_count = 0;
1030} 1137}
1138#endif
1031 1139
1032/* Check for already reserved areas */ 1140/* Check for already reserved areas */
1033static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 1141static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
@@ -1083,6 +1191,35 @@ again:
1083 1191
1084/* 1192/*
1085 * Find a free area with specified alignment in a specific range. 1193 * Find a free area with specified alignment in a specific range.
1194 * only with the area.between start to end is active range from early_node_map
1195 * so they are good as RAM
1196 */
1197u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
1198 u64 size, u64 align)
1199{
1200 u64 addr, last;
1201
1202 addr = round_up(ei_start, align);
1203 if (addr < start)
1204 addr = round_up(start, align);
1205 if (addr >= ei_last)
1206 goto out;
1207 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1208 ;
1209 last = addr + size;
1210 if (last > ei_last)
1211 goto out;
1212 if (last > end)
1213 goto out;
1214
1215 return addr;
1216
1217out:
1218 return -1ULL;
1219}
1220
1221/*
1222 * Find a free area with specified alignment in a specific range.
1086 */ 1223 */
1087u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) 1224u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1088{ 1225{
@@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1090 1227
1091 for (i = 0; i < e820.nr_map; i++) { 1228 for (i = 0; i < e820.nr_map; i++) {
1092 struct e820entry *ei = &e820.map[i]; 1229 struct e820entry *ei = &e820.map[i];
1093 u64 addr, last; 1230 u64 addr;
1094 u64 ei_last; 1231 u64 ei_start, ei_last;
1095 1232
1096 if (ei->type != E820_RAM) 1233 if (ei->type != E820_RAM)
1097 continue; 1234 continue;
1098 addr = round_up(ei->addr, align); 1235
1099 ei_last = ei->addr + ei->size; 1236 ei_last = ei->addr + ei->size;
1100 if (addr < start) 1237 ei_start = ei->addr;
1101 addr = round_up(start, align); 1238 addr = find_early_area(ei_start, ei_last, start, end,
1102 if (addr >= ei_last) 1239 size, align);
1103 continue; 1240
1104 while (bad_addr(&addr, size, align) && addr+size <= ei_last) 1241 if (addr == -1ULL)
1105 ;
1106 last = addr + size;
1107 if (last > ei_last)
1108 continue;
1109 if (last > end)
1110 continue; 1242 continue;
1243
1111 return addr; 1244 return addr;
1112 } 1245 }
1113 return -1ULL; 1246 return -1ULL;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ea4141b48518..d49e168bda8c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p)
967#endif 967#endif
968 968
969 initmem_init(0, max_pfn, acpi, k8); 969 initmem_init(0, max_pfn, acpi, k8);
970#ifndef CONFIG_NO_BOOTMEM
970 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); 971 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
972#endif
971 973
972 dma32_reserve_bootmem(); 974 dma32_reserve_bootmem();
973 975
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a15abaae5ba4..53158b7e5d46 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
572void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 572void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
573 int acpi, int k8) 573 int acpi, int k8)
574{ 574{
575#ifndef CONFIG_NO_BOOTMEM
575 unsigned long bootmap_size, bootmap; 576 unsigned long bootmap_size, bootmap;
576 577
577 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 578 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
585 0, end_pfn); 586 0, end_pfn);
586 e820_register_active_regions(0, start_pfn, end_pfn); 587 e820_register_active_regions(0, start_pfn, end_pfn);
587 free_bootmem_with_active_regions(0, end_pfn); 588 free_bootmem_with_active_regions(0, end_pfn);
589#else
590 e820_register_active_regions(0, start_pfn, end_pfn);
591#endif
588} 592}
589#endif 593#endif
590 594
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 02f13cb99bc2..a20e17059afd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
198void __init 198void __init
199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
200{ 200{
201 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 201 unsigned long start_pfn, last_pfn, nodedata_phys;
202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
203 unsigned long bootmap_start, nodedata_phys;
204 void *bootmap;
205 int nid; 203 int nid;
204#ifndef CONFIG_NO_BOOTMEM
205 unsigned long bootmap_start, bootmap_pages, bootmap_size;
206 void *bootmap;
207#endif
206 208
207 if (!end) 209 if (!end)
208 return; 210 return;
@@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
216 218
217 start = roundup(start, ZONE_ALIGN); 219 start = roundup(start, ZONE_ALIGN);
218 220
219 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 221 printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
220 start, end); 222 start, end);
221 223
222 start_pfn = start >> PAGE_SHIFT; 224 start_pfn = start >> PAGE_SHIFT;
@@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
235 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); 237 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
236 238
237 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 239 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
238 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 240 NODE_DATA(nodeid)->node_id = nodeid;
239 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 241 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
240 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 242 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
241 243
244#ifndef CONFIG_NO_BOOTMEM
245 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
246
242 /* 247 /*
243 * Find a place for the bootmem map 248 * Find a place for the bootmem map
244 * nodedata_phys could be on other nodes by alloc_bootmem, 249 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
275 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 280 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
276 281
277 free_bootmem_with_active_regions(nodeid, end); 282 free_bootmem_with_active_regions(nodeid, end);
283#endif
278 284
279 node_set_online(nodeid); 285 node_set_online(nodeid);
280} 286}
@@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void)
733 for_each_online_node(i) 739 for_each_online_node(i)
734 pages += free_all_bootmem_node(NODE_DATA(i)); 740 pages += free_all_bootmem_node(NODE_DATA(i));
735 741
742#ifdef CONFIG_NO_BOOTMEM
743 pages += free_all_memory_core_early(MAX_NUMNODES);
744#endif
745
736 return pages; 746 return pages;
737} 747}
738 748
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b10ec49ee2dd..266ab9291232 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -23,6 +23,7 @@ extern unsigned long max_pfn;
23extern unsigned long saved_max_pfn; 23extern unsigned long saved_max_pfn;
24#endif 24#endif
25 25
26#ifndef CONFIG_NO_BOOTMEM
26/* 27/*
27 * node_bootmem_map is a map pointer - the bits represent all physical 28 * node_bootmem_map is a map pointer - the bits represent all physical
28 * memory pages (including holes) on the node. 29 * memory pages (including holes) on the node.
@@ -37,6 +38,7 @@ typedef struct bootmem_data {
37} bootmem_data_t; 38} bootmem_data_t;
38 39
39extern bootmem_data_t bootmem_node_data[]; 40extern bootmem_data_t bootmem_node_data[];
41#endif
40 42
41extern unsigned long bootmem_bootmap_pages(unsigned long); 43extern unsigned long bootmem_bootmap_pages(unsigned long);
42 44
@@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
46 unsigned long endpfn); 48 unsigned long endpfn);
47extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); 49extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
48 50
51unsigned long free_all_memory_core_early(int nodeid);
49extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); 52extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
50extern unsigned long free_all_bootmem(void); 53extern unsigned long free_all_bootmem(void);
51 54
@@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
84 unsigned long size, 87 unsigned long size,
85 unsigned long align, 88 unsigned long align,
86 unsigned long goal); 89 unsigned long goal);
90void *__alloc_bootmem_node_high(pg_data_t *pgdat,
91 unsigned long size,
92 unsigned long align,
93 unsigned long goal);
87extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, 94extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
88 unsigned long size, 95 unsigned long size,
89 unsigned long align, 96 unsigned long align,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b2fa8593c61..f2c5b3cee8a1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
12#include <linux/prio_tree.h> 12#include <linux/prio_tree.h>
13#include <linux/debug_locks.h> 13#include <linux/debug_locks.h>
14#include <linux/mm_types.h> 14#include <linux/mm_types.h>
15#include <linux/range.h>
15 16
16struct mempolicy; 17struct mempolicy;
17struct anon_vma; 18struct anon_vma;
@@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
1049extern unsigned long find_min_pfn_with_active_regions(void); 1050extern unsigned long find_min_pfn_with_active_regions(void);
1050extern void free_bootmem_with_active_regions(int nid, 1051extern void free_bootmem_with_active_regions(int nid,
1051 unsigned long max_low_pfn); 1052 unsigned long max_low_pfn);
1053int add_from_early_node_map(struct range *range, int az,
1054 int nr_range, int nid);
1055void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
1056 u64 goal, u64 limit);
1052typedef int (*work_fn_t)(unsigned long, unsigned long, void *); 1057typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
1053extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); 1058extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
1054extern void sparse_memory_present_with_active_regions(int nid); 1059extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 30fe668c2542..eae8387b6007 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -620,7 +620,9 @@ typedef struct pglist_data {
620 struct page_cgroup *node_page_cgroup; 620 struct page_cgroup *node_page_cgroup;
621#endif 621#endif
622#endif 622#endif
623#ifndef CONFIG_NO_BOOTMEM
623 struct bootmem_data *bdata; 624 struct bootmem_data *bdata;
625#endif
624#ifdef CONFIG_MEMORY_HOTPLUG 626#ifdef CONFIG_MEMORY_HOTPLUG
625 /* 627 /*
626 * Must be held any time you expect node_start_pfn, node_present_pages 628 * Must be held any time you expect node_start_pfn, node_present_pages
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 7d1486875e1c..d7c791ef0036 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -13,6 +13,7 @@
13#include <linux/bootmem.h> 13#include <linux/bootmem.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/kmemleak.h> 15#include <linux/kmemleak.h>
16#include <linux/range.h>
16 17
17#include <asm/bug.h> 18#include <asm/bug.h>
18#include <asm/io.h> 19#include <asm/io.h>
@@ -32,6 +33,7 @@ unsigned long max_pfn;
32unsigned long saved_max_pfn; 33unsigned long saved_max_pfn;
33#endif 34#endif
34 35
36#ifndef CONFIG_NO_BOOTMEM
35bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; 37bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
36 38
37static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); 39static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
142 min_low_pfn = start; 144 min_low_pfn = start;
143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 145 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
144} 146}
145 147#endif
146/* 148/*
147 * free_bootmem_late - free bootmem pages directly to page allocator 149 * free_bootmem_late - free bootmem pages directly to page allocator
148 * @addr: starting address of the range 150 * @addr: starting address of the range
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
167 } 169 }
168} 170}
169 171
172#ifdef CONFIG_NO_BOOTMEM
173static void __init __free_pages_memory(unsigned long start, unsigned long end)
174{
175 int i;
176 unsigned long start_aligned, end_aligned;
177 int order = ilog2(BITS_PER_LONG);
178
179 start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
180 end_aligned = end & ~(BITS_PER_LONG - 1);
181
182 if (end_aligned <= start_aligned) {
183#if 1
184 printk(KERN_DEBUG " %lx - %lx\n", start, end);
185#endif
186 for (i = start; i < end; i++)
187 __free_pages_bootmem(pfn_to_page(i), 0);
188
189 return;
190 }
191
192#if 1
193 printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
194 start, start_aligned, end_aligned, end);
195#endif
196 for (i = start; i < start_aligned; i++)
197 __free_pages_bootmem(pfn_to_page(i), 0);
198
199 for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
200 __free_pages_bootmem(pfn_to_page(i), order);
201
202 for (i = end_aligned; i < end; i++)
203 __free_pages_bootmem(pfn_to_page(i), 0);
204}
205
206unsigned long __init free_all_memory_core_early(int nodeid)
207{
208 int i;
209 u64 start, end;
210 unsigned long count = 0;
211 struct range *range = NULL;
212 int nr_range;
213
214 nr_range = get_free_all_memory_range(&range, nodeid);
215
216 for (i = 0; i < nr_range; i++) {
217 start = range[i].start;
218 end = range[i].end;
219 count += end - start;
220 __free_pages_memory(start, end);
221 }
222
223 return count;
224}
225#else
170static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 226static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
171{ 227{
172 int aligned; 228 int aligned;
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
227 283
228 return count; 284 return count;
229} 285}
286#endif
230 287
231/** 288/**
232 * free_all_bootmem_node - release a node's free pages to the buddy allocator 289 * free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
237unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) 294unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
238{ 295{
239 register_page_bootmem_info_node(pgdat); 296 register_page_bootmem_info_node(pgdat);
297#ifdef CONFIG_NO_BOOTMEM
298 /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
299 return 0;
300#else
240 return free_all_bootmem_core(pgdat->bdata); 301 return free_all_bootmem_core(pgdat->bdata);
302#endif
241} 303}
242 304
243/** 305/**
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
247 */ 309 */
248unsigned long __init free_all_bootmem(void) 310unsigned long __init free_all_bootmem(void)
249{ 311{
312#ifdef CONFIG_NO_BOOTMEM
313 return free_all_memory_core_early(NODE_DATA(0)->node_id);
314#else
250 return free_all_bootmem_core(NODE_DATA(0)->bdata); 315 return free_all_bootmem_core(NODE_DATA(0)->bdata);
316#endif
251} 317}
252 318
319#ifndef CONFIG_NO_BOOTMEM
253static void __init __free(bootmem_data_t *bdata, 320static void __init __free(bootmem_data_t *bdata,
254 unsigned long sidx, unsigned long eidx) 321 unsigned long sidx, unsigned long eidx)
255{ 322{
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
344 } 411 }
345 BUG(); 412 BUG();
346} 413}
414#endif
347 415
348/** 416/**
349 * free_bootmem_node - mark a page range as usable 417 * free_bootmem_node - mark a page range as usable
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
358void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 426void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
359 unsigned long size) 427 unsigned long size)
360{ 428{
429#ifdef CONFIG_NO_BOOTMEM
430 free_early(physaddr, physaddr + size);
431#if 0
432 printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
433#endif
434#else
361 unsigned long start, end; 435 unsigned long start, end;
362 436
363 kmemleak_free_part(__va(physaddr), size); 437 kmemleak_free_part(__va(physaddr), size);
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
366 end = PFN_DOWN(physaddr + size); 440 end = PFN_DOWN(physaddr + size);
367 441
368 mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 442 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
443#endif
369} 444}
370 445
371/** 446/**
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
379 */ 454 */
380void __init free_bootmem(unsigned long addr, unsigned long size) 455void __init free_bootmem(unsigned long addr, unsigned long size)
381{ 456{
457#ifdef CONFIG_NO_BOOTMEM
458 free_early(addr, addr + size);
459#if 0
460 printk(KERN_DEBUG "free %lx %lx\n", addr, size);
461#endif
462#else
382 unsigned long start, end; 463 unsigned long start, end;
383 464
384 kmemleak_free_part(__va(addr), size); 465 kmemleak_free_part(__va(addr), size);
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
387 end = PFN_DOWN(addr + size); 468 end = PFN_DOWN(addr + size);
388 469
389 mark_bootmem(start, end, 0, 0); 470 mark_bootmem(start, end, 0, 0);
471#endif
390} 472}
391 473
392/** 474/**
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
403int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 485int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
404 unsigned long size, int flags) 486 unsigned long size, int flags)
405{ 487{
488#ifdef CONFIG_NO_BOOTMEM
489 panic("no bootmem");
490 return 0;
491#else
406 unsigned long start, end; 492 unsigned long start, end;
407 493
408 start = PFN_DOWN(physaddr); 494 start = PFN_DOWN(physaddr);
409 end = PFN_UP(physaddr + size); 495 end = PFN_UP(physaddr + size);
410 496
411 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 497 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
498#endif
412} 499}
413 500
414/** 501/**
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
424int __init reserve_bootmem(unsigned long addr, unsigned long size, 511int __init reserve_bootmem(unsigned long addr, unsigned long size,
425 int flags) 512 int flags)
426{ 513{
514#ifdef CONFIG_NO_BOOTMEM
515 panic("no bootmem");
516 return 0;
517#else
427 unsigned long start, end; 518 unsigned long start, end;
428 519
429 start = PFN_DOWN(addr); 520 start = PFN_DOWN(addr);
430 end = PFN_UP(addr + size); 521 end = PFN_UP(addr + size);
431 522
432 return mark_bootmem(start, end, 1, flags); 523 return mark_bootmem(start, end, 1, flags);
524#endif
433} 525}
434 526
527#ifndef CONFIG_NO_BOOTMEM
435static unsigned long __init align_idx(struct bootmem_data *bdata, 528static unsigned long __init align_idx(struct bootmem_data *bdata,
436 unsigned long idx, unsigned long step) 529 unsigned long idx, unsigned long step)
437{ 530{
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
582#endif 675#endif
583 return NULL; 676 return NULL;
584} 677}
678#endif
585 679
586static void * __init ___alloc_bootmem_nopanic(unsigned long size, 680static void * __init ___alloc_bootmem_nopanic(unsigned long size,
587 unsigned long align, 681 unsigned long align,
588 unsigned long goal, 682 unsigned long goal,
589 unsigned long limit) 683 unsigned long limit)
590{ 684{
685#ifdef CONFIG_NO_BOOTMEM
686 void *ptr;
687
688 if (WARN_ON_ONCE(slab_is_available()))
689 return kzalloc(size, GFP_NOWAIT);
690
691restart:
692
693 ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
694
695 if (ptr)
696 return ptr;
697
698 if (goal != 0) {
699 goal = 0;
700 goto restart;
701 }
702
703 return NULL;
704#else
591 bootmem_data_t *bdata; 705 bootmem_data_t *bdata;
592 void *region; 706 void *region;
593 707
@@ -613,6 +727,7 @@ restart:
613 } 727 }
614 728
615 return NULL; 729 return NULL;
730#endif
616} 731}
617 732
618/** 733/**
@@ -631,7 +746,13 @@ restart:
631void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, 746void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
632 unsigned long goal) 747 unsigned long goal)
633{ 748{
634 return ___alloc_bootmem_nopanic(size, align, goal, 0); 749 unsigned long limit = 0;
750
751#ifdef CONFIG_NO_BOOTMEM
752 limit = -1UL;
753#endif
754
755 return ___alloc_bootmem_nopanic(size, align, goal, limit);
635} 756}
636 757
637static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, 758static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
665void * __init __alloc_bootmem(unsigned long size, unsigned long align, 786void * __init __alloc_bootmem(unsigned long size, unsigned long align,
666 unsigned long goal) 787 unsigned long goal)
667{ 788{
668 return ___alloc_bootmem(size, align, goal, 0); 789 unsigned long limit = 0;
790
791#ifdef CONFIG_NO_BOOTMEM
792 limit = -1UL;
793#endif
794
795 return ___alloc_bootmem(size, align, goal, limit);
669} 796}
670 797
798#ifndef CONFIG_NO_BOOTMEM
671static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, 799static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
672 unsigned long size, unsigned long align, 800 unsigned long size, unsigned long align,
673 unsigned long goal, unsigned long limit) 801 unsigned long goal, unsigned long limit)
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
684 812
685 return ___alloc_bootmem(size, align, goal, limit); 813 return ___alloc_bootmem(size, align, goal, limit);
686} 814}
815#endif
687 816
688/** 817/**
689 * __alloc_bootmem_node - allocate boot memory from a specific node 818 * __alloc_bootmem_node - allocate boot memory from a specific node
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
706 if (WARN_ON_ONCE(slab_is_available())) 835 if (WARN_ON_ONCE(slab_is_available()))
707 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 836 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
708 837
838#ifdef CONFIG_NO_BOOTMEM
839 return __alloc_memory_core_early(pgdat->node_id, size, align,
840 goal, -1ULL);
841#else
709 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); 842 return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
843#endif
844}
845
846void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
847 unsigned long align, unsigned long goal)
848{
849#ifdef MAX_DMA32_PFN
850 unsigned long end_pfn;
851
852 if (WARN_ON_ONCE(slab_is_available()))
853 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
854
855 /* update goal according ...MAX_DMA32_PFN */
856 end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
857
858 if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
859 (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
860 void *ptr;
861 unsigned long new_goal;
862
863 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
864#ifdef CONFIG_NO_BOOTMEM
865 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
866 new_goal, -1ULL);
867#else
868 ptr = alloc_bootmem_core(pgdat->bdata, size, align,
869 new_goal, 0);
870#endif
871 if (ptr)
872 return ptr;
873 }
874#endif
875
876 return __alloc_bootmem_node(pgdat, size, align, goal);
877
710} 878}
711 879
712#ifdef CONFIG_SPARSEMEM 880#ifdef CONFIG_SPARSEMEM
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
720void * __init alloc_bootmem_section(unsigned long size, 888void * __init alloc_bootmem_section(unsigned long size,
721 unsigned long section_nr) 889 unsigned long section_nr)
722{ 890{
891#ifdef CONFIG_NO_BOOTMEM
892 unsigned long pfn, goal, limit;
893
894 pfn = section_nr_to_pfn(section_nr);
895 goal = pfn << PAGE_SHIFT;
896 limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
897
898 return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
899 SMP_CACHE_BYTES, goal, limit);
900#else
723 bootmem_data_t *bdata; 901 bootmem_data_t *bdata;
724 unsigned long pfn, goal, limit; 902 unsigned long pfn, goal, limit;
725 903
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
729 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; 907 bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
730 908
731 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); 909 return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
910#endif
732} 911}
733#endif 912#endif
734 913
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
740 if (WARN_ON_ONCE(slab_is_available())) 919 if (WARN_ON_ONCE(slab_is_available()))
741 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 920 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
742 921
922#ifdef CONFIG_NO_BOOTMEM
923 ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
924 goal, -1ULL);
925#else
743 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); 926 ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
744 if (ptr) 927 if (ptr)
745 return ptr; 928 return ptr;
746 929
747 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 930 ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
931#endif
748 if (ptr) 932 if (ptr)
749 return ptr; 933 return ptr;
750 934
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
795 if (WARN_ON_ONCE(slab_is_available())) 979 if (WARN_ON_ONCE(slab_is_available()))
796 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); 980 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
797 981
982#ifdef CONFIG_NO_BOOTMEM
983 return __alloc_memory_core_early(pgdat->node_id, size, align,
984 goal, ARCH_LOW_ADDRESS_LIMIT);
985#else
798 return ___alloc_bootmem_node(pgdat->bdata, size, align, 986 return ___alloc_bootmem_node(pgdat->bdata, size, align,
799 goal, ARCH_LOW_ADDRESS_LIMIT); 987 goal, ARCH_LOW_ADDRESS_LIMIT);
988#endif
800} 989}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8deb9d0fd5b1..78821a28e394 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3435,6 +3435,59 @@ void __init free_bootmem_with_active_regions(int nid,
3435 } 3435 }
3436} 3436}
3437 3437
3438int __init add_from_early_node_map(struct range *range, int az,
3439 int nr_range, int nid)
3440{
3441 int i;
3442 u64 start, end;
3443
3444 /* need to go over early_node_map to find out good range for node */
3445 for_each_active_range_index_in_nid(i, nid) {
3446 start = early_node_map[i].start_pfn;
3447 end = early_node_map[i].end_pfn;
3448 nr_range = add_range(range, az, nr_range, start, end);
3449 }
3450 return nr_range;
3451}
3452
3453void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
3454 u64 goal, u64 limit)
3455{
3456 int i;
3457 void *ptr;
3458
3459 /* need to go over early_node_map to find out good range for node */
3460 for_each_active_range_index_in_nid(i, nid) {
3461 u64 addr;
3462 u64 ei_start, ei_last;
3463
3464 ei_last = early_node_map[i].end_pfn;
3465 ei_last <<= PAGE_SHIFT;
3466 ei_start = early_node_map[i].start_pfn;
3467 ei_start <<= PAGE_SHIFT;
3468 addr = find_early_area(ei_start, ei_last,
3469 goal, limit, size, align);
3470
3471 if (addr == -1ULL)
3472 continue;
3473
3474#if 0
3475 printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
3476 nid,
3477 ei_start, ei_last, goal, limit, size,
3478 align, addr);
3479#endif
3480
3481 ptr = phys_to_virt(addr);
3482 memset(ptr, 0, size);
3483 reserve_early_without_check(addr, addr + size, "BOOTMEM");
3484 return ptr;
3485 }
3486
3487 return NULL;
3488}
3489
3490
3438void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) 3491void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
3439{ 3492{
3440 int i; 3493 int i;
@@ -4467,7 +4520,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
4467} 4520}
4468 4521
4469#ifndef CONFIG_NEED_MULTIPLE_NODES 4522#ifndef CONFIG_NEED_MULTIPLE_NODES
4470struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; 4523struct pglist_data __refdata contig_page_data = {
4524#ifndef CONFIG_NO_BOOTMEM
4525 .bdata = &bootmem_node_data[0]
4526#endif
4527 };
4471EXPORT_SYMBOL(contig_page_data); 4528EXPORT_SYMBOL(contig_page_data);
4472#endif 4529#endif
4473 4530
diff --git a/mm/percpu.c b/mm/percpu.c
index 083e7c91e5f6..841defeeef86 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1929,7 +1929,10 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
1929 } 1929 }
1930 /* copy and return the unused part */ 1930 /* copy and return the unused part */
1931 memcpy(ptr, __per_cpu_load, ai->static_size); 1931 memcpy(ptr, __per_cpu_load, ai->static_size);
1932#ifndef CONFIG_NO_BOOTMEM
1933 /* fix partial free ! */
1932 free_fn(ptr + size_sum, ai->unit_size - size_sum); 1934 free_fn(ptr + size_sum, ai->unit_size - size_sum);
1935#endif
1933 } 1936 }
1934 } 1937 }
1935 1938
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d9714bdcb4a3..9506c39942f6 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -40,7 +40,7 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
40 unsigned long align, 40 unsigned long align,
41 unsigned long goal) 41 unsigned long goal)
42{ 42{
43 return __alloc_bootmem_node(NODE_DATA(node), size, align, goal); 43 return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
44} 44}
45 45
46 46