aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorYinghai Lu <yinghai@kernel.org>2010-02-10 04:20:20 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-12 12:41:59 -0500
commit08677214e318297f228237be0042aac754f48f1d (patch)
tree6d03424f7e287fcf66136b44512328afb1aeee49 /arch
parentc252a5bb1f57afb1e336d68085217727ca7b2134 (diff)
x86: Make 64 bit use early_res instead of bootmem before slab
Finally we can use early_res to replace bootmem for x86_64 now. Still can use CONFIG_NO_BOOTMEM to enable it or not. -v2: fix 32bit compiling about MAX_DMA32_PFN -v3: folded bug fix from LKML message below Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <4B747239.4070907@kernel.org> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/include/asm/e820.h6
-rw-r--r--arch/x86/kernel/e820.c159
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/mm/numa_64.c20
6 files changed, 186 insertions, 18 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb4092568f9e..95439843cebc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -568,6 +568,19 @@ config PARAVIRT_DEBUG
568 Enable to debug paravirt_ops internals. Specifically, BUG if 568 Enable to debug paravirt_ops internals. Specifically, BUG if
569 a paravirt_op is missing when it is called. 569 a paravirt_op is missing when it is called.
570 570
571config NO_BOOTMEM
572 default y
573 bool "Disable Bootmem code"
574 depends on X86_64
575 ---help---
576 Use early_res directly instead of bootmem before slab is ready.
577 - allocator (buddy) [generic]
578 - early allocator (bootmem) [generic]
579 - very early allocator (reserve_early*()) [x86]
580 - very very early allocator (early brk model) [x86]
581 So reduce one layer between early allocator to final allocator
582
583
571config MEMTEST 584config MEMTEST
572 bool "Memtest" 585 bool "Memtest"
573 ---help--- 586 ---help---
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 761249e396fe..7d72e5fb7008 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end);
117extern void early_res_to_bootmem(u64 start, u64 end); 117extern void early_res_to_bootmem(u64 start, u64 end);
118extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); 118extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
119 119
120void reserve_early_without_check(u64 start, u64 end, char *name);
121u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
122 u64 size, u64 align);
123#include <linux/range.h>
124int get_free_all_memory_range(struct range **rangep, int nodeid);
125
120extern unsigned long e820_end_of_ram_pfn(void); 126extern unsigned long e820_end_of_ram_pfn(void);
121extern unsigned long e820_end_of_low_ram_pfn(void); 127extern unsigned long e820_end_of_low_ram_pfn(void);
122extern int e820_find_active_region(const struct e820entry *ei, 128extern int e820_find_active_region(const struct e820entry *ei,
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e09c18c8f3c1..90a85295f332 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name)
977 __reserve_early(start, end, name, 0); 977 __reserve_early(start, end, name, 0);
978} 978}
979 979
980void __init reserve_early_without_check(u64 start, u64 end, char *name)
981{
982 struct early_res *r;
983
984 if (start >= end)
985 return;
986
987 __check_and_double_early_res(end);
988
989 r = &early_res[early_res_count];
990
991 r->start = start;
992 r->end = end;
993 r->overlap_ok = 0;
994 if (name)
995 strncpy(r->name, name, sizeof(r->name) - 1);
996 early_res_count++;
997}
998
980void __init free_early(u64 start, u64 end) 999void __init free_early(u64 start, u64 end)
981{ 1000{
982 struct early_res *r; 1001 struct early_res *r;
@@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end)
991 drop_range(i); 1010 drop_range(i);
992} 1011}
993 1012
1013#ifdef CONFIG_NO_BOOTMEM
1014static void __init subtract_early_res(struct range *range, int az)
1015{
1016 int i, count;
1017 u64 final_start, final_end;
1018 int idx = 0;
1019
1020 count = 0;
1021 for (i = 0; i < max_early_res && early_res[i].end; i++)
1022 count++;
1023
1024 /* need to skip first one ?*/
1025 if (early_res != early_res_x)
1026 idx = 1;
1027
1028#if 1
1029 printk(KERN_INFO "Subtract (%d early reservations)\n", count);
1030#endif
1031 for (i = idx; i < count; i++) {
1032 struct early_res *r = &early_res[i];
1033#if 0
1034 printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i,
1035 r->start, r->end, r->name);
1036#endif
1037 final_start = PFN_DOWN(r->start);
1038 final_end = PFN_UP(r->end);
1039 if (final_start >= final_end) {
1040#if 0
1041 printk(KERN_CONT "\n");
1042#endif
1043 continue;
1044 }
1045#if 0
1046 printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n",
1047 final_start, final_end);
1048#endif
1049 subtract_range(range, az, final_start, final_end);
1050 }
1051
1052}
1053
1054int __init get_free_all_memory_range(struct range **rangep, int nodeid)
1055{
1056 int i, count;
1057 u64 start = 0, end;
1058 u64 size;
1059 u64 mem;
1060 struct range *range;
1061 int nr_range;
1062
1063 count = 0;
1064 for (i = 0; i < max_early_res && early_res[i].end; i++)
1065 count++;
1066
1067 count *= 2;
1068
1069 size = sizeof(struct range) * count;
1070#ifdef MAX_DMA32_PFN
1071 if (max_pfn_mapped > MAX_DMA32_PFN)
1072 start = MAX_DMA32_PFN << PAGE_SHIFT;
1073#endif
1074 end = max_pfn_mapped << PAGE_SHIFT;
1075 mem = find_e820_area(start, end, size, sizeof(struct range));
1076 if (mem == -1ULL)
1077 panic("can not find more space for range free");
1078
1079 range = __va(mem);
1080 /* use early_node_map[] and early_res to get range array at first */
1081 memset(range, 0, size);
1082 nr_range = 0;
1083
1084 /* need to go over early_node_map to find out good range for node */
1085 nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
1086 subtract_early_res(range, count);
1087 nr_range = clean_sort_range(range, count);
1088
1089 /* need to clear it ? */
1090 if (nodeid == MAX_NUMNODES) {
1091 memset(&early_res[0], 0,
1092 sizeof(struct early_res) * max_early_res);
1093 early_res = NULL;
1094 max_early_res = 0;
1095 }
1096
1097 *rangep = range;
1098 return nr_range;
1099}
1100#else
994void __init early_res_to_bootmem(u64 start, u64 end) 1101void __init early_res_to_bootmem(u64 start, u64 end)
995{ 1102{
996 int i, count; 1103 int i, count;
@@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end)
1028 max_early_res = 0; 1135 max_early_res = 0;
1029 early_res_count = 0; 1136 early_res_count = 0;
1030} 1137}
1138#endif
1031 1139
1032/* Check for already reserved areas */ 1140/* Check for already reserved areas */
1033static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) 1141static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
@@ -1083,6 +1191,35 @@ again:
1083 1191
1084/* 1192/*
1085 * Find a free area with specified alignment in a specific range. 1193 * Find a free area with specified alignment in a specific range.
1194 * only with the area.between start to end is active range from early_node_map
1195 * so they are good as RAM
1196 */
1197u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
1198 u64 size, u64 align)
1199{
1200 u64 addr, last;
1201
1202 addr = round_up(ei_start, align);
1203 if (addr < start)
1204 addr = round_up(start, align);
1205 if (addr >= ei_last)
1206 goto out;
1207 while (bad_addr(&addr, size, align) && addr+size <= ei_last)
1208 ;
1209 last = addr + size;
1210 if (last > ei_last)
1211 goto out;
1212 if (last > end)
1213 goto out;
1214
1215 return addr;
1216
1217out:
1218 return -1ULL;
1219}
1220
1221/*
1222 * Find a free area with specified alignment in a specific range.
1086 */ 1223 */
1087u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) 1224u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1088{ 1225{
@@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
1090 1227
1091 for (i = 0; i < e820.nr_map; i++) { 1228 for (i = 0; i < e820.nr_map; i++) {
1092 struct e820entry *ei = &e820.map[i]; 1229 struct e820entry *ei = &e820.map[i];
1093 u64 addr, last; 1230 u64 addr;
1094 u64 ei_last; 1231 u64 ei_start, ei_last;
1095 1232
1096 if (ei->type != E820_RAM) 1233 if (ei->type != E820_RAM)
1097 continue; 1234 continue;
1098 addr = round_up(ei->addr, align); 1235
1099 ei_last = ei->addr + ei->size; 1236 ei_last = ei->addr + ei->size;
1100 if (addr < start) 1237 ei_start = ei->addr;
1101 addr = round_up(start, align); 1238 addr = find_early_area(ei_start, ei_last, start, end,
1102 if (addr >= ei_last) 1239 size, align);
1103 continue; 1240
1104 while (bad_addr(&addr, size, align) && addr+size <= ei_last) 1241 if (addr == -1ULL)
1105 ;
1106 last = addr + size;
1107 if (last > ei_last)
1108 continue;
1109 if (last > end)
1110 continue; 1242 continue;
1243
1111 return addr; 1244 return addr;
1112 } 1245 }
1113 return -1ULL; 1246 return -1ULL;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ea4141b48518..d49e168bda8c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p)
967#endif 967#endif
968 968
969 initmem_init(0, max_pfn, acpi, k8); 969 initmem_init(0, max_pfn, acpi, k8);
970#ifndef CONFIG_NO_BOOTMEM
970 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); 971 early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
972#endif
971 973
972 dma32_reserve_bootmem(); 974 dma32_reserve_bootmem();
973 975
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a15abaae5ba4..53158b7e5d46 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
572void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, 572void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
573 int acpi, int k8) 573 int acpi, int k8)
574{ 574{
575#ifndef CONFIG_NO_BOOTMEM
575 unsigned long bootmap_size, bootmap; 576 unsigned long bootmap_size, bootmap;
576 577
577 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; 578 bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
@@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
585 0, end_pfn); 586 0, end_pfn);
586 e820_register_active_regions(0, start_pfn, end_pfn); 587 e820_register_active_regions(0, start_pfn, end_pfn);
587 free_bootmem_with_active_regions(0, end_pfn); 588 free_bootmem_with_active_regions(0, end_pfn);
589#else
590 e820_register_active_regions(0, start_pfn, end_pfn);
591#endif
588} 592}
589#endif 593#endif
590 594
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 02f13cb99bc2..a20e17059afd 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
198void __init 198void __init
199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) 199setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
200{ 200{
201 unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; 201 unsigned long start_pfn, last_pfn, nodedata_phys;
202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); 202 const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
203 unsigned long bootmap_start, nodedata_phys;
204 void *bootmap;
205 int nid; 203 int nid;
204#ifndef CONFIG_NO_BOOTMEM
205 unsigned long bootmap_start, bootmap_pages, bootmap_size;
206 void *bootmap;
207#endif
206 208
207 if (!end) 209 if (!end)
208 return; 210 return;
@@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
216 218
217 start = roundup(start, ZONE_ALIGN); 219 start = roundup(start, ZONE_ALIGN);
218 220
219 printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, 221 printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
220 start, end); 222 start, end);
221 223
222 start_pfn = start >> PAGE_SHIFT; 224 start_pfn = start >> PAGE_SHIFT;
@@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
235 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); 237 printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
236 238
237 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); 239 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
238 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; 240 NODE_DATA(nodeid)->node_id = nodeid;
239 NODE_DATA(nodeid)->node_start_pfn = start_pfn; 241 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
240 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; 242 NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
241 243
244#ifndef CONFIG_NO_BOOTMEM
245 NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
246
242 /* 247 /*
243 * Find a place for the bootmem map 248 * Find a place for the bootmem map
244 * nodedata_phys could be on other nodes by alloc_bootmem, 249 * nodedata_phys could be on other nodes by alloc_bootmem,
@@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
275 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); 280 printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
276 281
277 free_bootmem_with_active_regions(nodeid, end); 282 free_bootmem_with_active_regions(nodeid, end);
283#endif
278 284
279 node_set_online(nodeid); 285 node_set_online(nodeid);
280} 286}
@@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void)
733 for_each_online_node(i) 739 for_each_online_node(i)
734 pages += free_all_bootmem_node(NODE_DATA(i)); 740 pages += free_all_bootmem_node(NODE_DATA(i));
735 741
742#ifdef CONFIG_NO_BOOTMEM
743 pages += free_all_memory_core_early(MAX_NUMNODES);
744#endif
745
736 return pages; 746 return pages;
737} 747}
738 748