diff options
author | Yinghai Lu <yinghai@kernel.org> | 2010-02-10 04:20:20 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-12 12:41:59 -0500 |
commit | 08677214e318297f228237be0042aac754f48f1d (patch) | |
tree | 6d03424f7e287fcf66136b44512328afb1aeee49 /arch/x86 | |
parent | c252a5bb1f57afb1e336d68085217727ca7b2134 (diff) |
x86: Make 64 bit use early_res instead of bootmem before slab
Finally we can use early_res to replace bootmem for x86_64 now.
Still can use CONFIG_NO_BOOTMEM to enable it or not.
-v2: fix 32bit compiling about MAX_DMA32_PFN
-v3: folded bug fix from LKML message below
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4B747239.4070907@kernel.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/e820.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 159 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/numa_64.c | 20 |
6 files changed, 186 insertions, 18 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index eb4092568f9e..95439843cebc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -568,6 +568,19 @@ config PARAVIRT_DEBUG | |||
568 | Enable to debug paravirt_ops internals. Specifically, BUG if | 568 | Enable to debug paravirt_ops internals. Specifically, BUG if |
569 | a paravirt_op is missing when it is called. | 569 | a paravirt_op is missing when it is called. |
570 | 570 | ||
571 | config NO_BOOTMEM | ||
572 | default y | ||
573 | bool "Disable Bootmem code" | ||
574 | depends on X86_64 | ||
575 | ---help--- | ||
576 | Use early_res directly instead of bootmem before slab is ready. | ||
577 | - allocator (buddy) [generic] | ||
578 | - early allocator (bootmem) [generic] | ||
579 | - very early allocator (reserve_early*()) [x86] | ||
580 | - very very early allocator (early brk model) [x86] | ||
581 | So reduce one layer between early allocator to final allocator | ||
582 | |||
583 | |||
571 | config MEMTEST | 584 | config MEMTEST |
572 | bool "Memtest" | 585 | bool "Memtest" |
573 | ---help--- | 586 | ---help--- |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 761249e396fe..7d72e5fb7008 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -117,6 +117,12 @@ extern void free_early(u64 start, u64 end); | |||
117 | extern void early_res_to_bootmem(u64 start, u64 end); | 117 | extern void early_res_to_bootmem(u64 start, u64 end); |
118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
119 | 119 | ||
120 | void reserve_early_without_check(u64 start, u64 end, char *name); | ||
121 | u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
122 | u64 size, u64 align); | ||
123 | #include <linux/range.h> | ||
124 | int get_free_all_memory_range(struct range **rangep, int nodeid); | ||
125 | |||
120 | extern unsigned long e820_end_of_ram_pfn(void); | 126 | extern unsigned long e820_end_of_ram_pfn(void); |
121 | extern unsigned long e820_end_of_low_ram_pfn(void); | 127 | extern unsigned long e820_end_of_low_ram_pfn(void); |
122 | extern int e820_find_active_region(const struct e820entry *ei, | 128 | extern int e820_find_active_region(const struct e820entry *ei, |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index e09c18c8f3c1..90a85295f332 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -977,6 +977,25 @@ void __init reserve_early(u64 start, u64 end, char *name) | |||
977 | __reserve_early(start, end, name, 0); | 977 | __reserve_early(start, end, name, 0); |
978 | } | 978 | } |
979 | 979 | ||
980 | void __init reserve_early_without_check(u64 start, u64 end, char *name) | ||
981 | { | ||
982 | struct early_res *r; | ||
983 | |||
984 | if (start >= end) | ||
985 | return; | ||
986 | |||
987 | __check_and_double_early_res(end); | ||
988 | |||
989 | r = &early_res[early_res_count]; | ||
990 | |||
991 | r->start = start; | ||
992 | r->end = end; | ||
993 | r->overlap_ok = 0; | ||
994 | if (name) | ||
995 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
996 | early_res_count++; | ||
997 | } | ||
998 | |||
980 | void __init free_early(u64 start, u64 end) | 999 | void __init free_early(u64 start, u64 end) |
981 | { | 1000 | { |
982 | struct early_res *r; | 1001 | struct early_res *r; |
@@ -991,6 +1010,94 @@ void __init free_early(u64 start, u64 end) | |||
991 | drop_range(i); | 1010 | drop_range(i); |
992 | } | 1011 | } |
993 | 1012 | ||
1013 | #ifdef CONFIG_NO_BOOTMEM | ||
1014 | static void __init subtract_early_res(struct range *range, int az) | ||
1015 | { | ||
1016 | int i, count; | ||
1017 | u64 final_start, final_end; | ||
1018 | int idx = 0; | ||
1019 | |||
1020 | count = 0; | ||
1021 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
1022 | count++; | ||
1023 | |||
1024 | /* need to skip first one ?*/ | ||
1025 | if (early_res != early_res_x) | ||
1026 | idx = 1; | ||
1027 | |||
1028 | #if 1 | ||
1029 | printk(KERN_INFO "Subtract (%d early reservations)\n", count); | ||
1030 | #endif | ||
1031 | for (i = idx; i < count; i++) { | ||
1032 | struct early_res *r = &early_res[i]; | ||
1033 | #if 0 | ||
1034 | printk(KERN_INFO " #%d [%010llx - %010llx] %15s", i, | ||
1035 | r->start, r->end, r->name); | ||
1036 | #endif | ||
1037 | final_start = PFN_DOWN(r->start); | ||
1038 | final_end = PFN_UP(r->end); | ||
1039 | if (final_start >= final_end) { | ||
1040 | #if 0 | ||
1041 | printk(KERN_CONT "\n"); | ||
1042 | #endif | ||
1043 | continue; | ||
1044 | } | ||
1045 | #if 0 | ||
1046 | printk(KERN_CONT " subtract pfn [%010llx - %010llx]\n", | ||
1047 | final_start, final_end); | ||
1048 | #endif | ||
1049 | subtract_range(range, az, final_start, final_end); | ||
1050 | } | ||
1051 | |||
1052 | } | ||
1053 | |||
1054 | int __init get_free_all_memory_range(struct range **rangep, int nodeid) | ||
1055 | { | ||
1056 | int i, count; | ||
1057 | u64 start = 0, end; | ||
1058 | u64 size; | ||
1059 | u64 mem; | ||
1060 | struct range *range; | ||
1061 | int nr_range; | ||
1062 | |||
1063 | count = 0; | ||
1064 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
1065 | count++; | ||
1066 | |||
1067 | count *= 2; | ||
1068 | |||
1069 | size = sizeof(struct range) * count; | ||
1070 | #ifdef MAX_DMA32_PFN | ||
1071 | if (max_pfn_mapped > MAX_DMA32_PFN) | ||
1072 | start = MAX_DMA32_PFN << PAGE_SHIFT; | ||
1073 | #endif | ||
1074 | end = max_pfn_mapped << PAGE_SHIFT; | ||
1075 | mem = find_e820_area(start, end, size, sizeof(struct range)); | ||
1076 | if (mem == -1ULL) | ||
1077 | panic("can not find more space for range free"); | ||
1078 | |||
1079 | range = __va(mem); | ||
1080 | /* use early_node_map[] and early_res to get range array at first */ | ||
1081 | memset(range, 0, size); | ||
1082 | nr_range = 0; | ||
1083 | |||
1084 | /* need to go over early_node_map to find out good range for node */ | ||
1085 | nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | ||
1086 | subtract_early_res(range, count); | ||
1087 | nr_range = clean_sort_range(range, count); | ||
1088 | |||
1089 | /* need to clear it ? */ | ||
1090 | if (nodeid == MAX_NUMNODES) { | ||
1091 | memset(&early_res[0], 0, | ||
1092 | sizeof(struct early_res) * max_early_res); | ||
1093 | early_res = NULL; | ||
1094 | max_early_res = 0; | ||
1095 | } | ||
1096 | |||
1097 | *rangep = range; | ||
1098 | return nr_range; | ||
1099 | } | ||
1100 | #else | ||
994 | void __init early_res_to_bootmem(u64 start, u64 end) | 1101 | void __init early_res_to_bootmem(u64 start, u64 end) |
995 | { | 1102 | { |
996 | int i, count; | 1103 | int i, count; |
@@ -1028,6 +1135,7 @@ void __init early_res_to_bootmem(u64 start, u64 end) | |||
1028 | max_early_res = 0; | 1135 | max_early_res = 0; |
1029 | early_res_count = 0; | 1136 | early_res_count = 0; |
1030 | } | 1137 | } |
1138 | #endif | ||
1031 | 1139 | ||
1032 | /* Check for already reserved areas */ | 1140 | /* Check for already reserved areas */ |
1033 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | 1141 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) |
@@ -1083,6 +1191,35 @@ again: | |||
1083 | 1191 | ||
1084 | /* | 1192 | /* |
1085 | * Find a free area with specified alignment in a specific range. | 1193 | * Find a free area with specified alignment in a specific range. |
1194 | * only with the area.between start to end is active range from early_node_map | ||
1195 | * so they are good as RAM | ||
1196 | */ | ||
1197 | u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
1198 | u64 size, u64 align) | ||
1199 | { | ||
1200 | u64 addr, last; | ||
1201 | |||
1202 | addr = round_up(ei_start, align); | ||
1203 | if (addr < start) | ||
1204 | addr = round_up(start, align); | ||
1205 | if (addr >= ei_last) | ||
1206 | goto out; | ||
1207 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
1208 | ; | ||
1209 | last = addr + size; | ||
1210 | if (last > ei_last) | ||
1211 | goto out; | ||
1212 | if (last > end) | ||
1213 | goto out; | ||
1214 | |||
1215 | return addr; | ||
1216 | |||
1217 | out: | ||
1218 | return -1ULL; | ||
1219 | } | ||
1220 | |||
1221 | /* | ||
1222 | * Find a free area with specified alignment in a specific range. | ||
1086 | */ | 1223 | */ |
1087 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | 1224 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
1088 | { | 1225 | { |
@@ -1090,24 +1227,20 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | |||
1090 | 1227 | ||
1091 | for (i = 0; i < e820.nr_map; i++) { | 1228 | for (i = 0; i < e820.nr_map; i++) { |
1092 | struct e820entry *ei = &e820.map[i]; | 1229 | struct e820entry *ei = &e820.map[i]; |
1093 | u64 addr, last; | 1230 | u64 addr; |
1094 | u64 ei_last; | 1231 | u64 ei_start, ei_last; |
1095 | 1232 | ||
1096 | if (ei->type != E820_RAM) | 1233 | if (ei->type != E820_RAM) |
1097 | continue; | 1234 | continue; |
1098 | addr = round_up(ei->addr, align); | 1235 | |
1099 | ei_last = ei->addr + ei->size; | 1236 | ei_last = ei->addr + ei->size; |
1100 | if (addr < start) | 1237 | ei_start = ei->addr; |
1101 | addr = round_up(start, align); | 1238 | addr = find_early_area(ei_start, ei_last, start, end, |
1102 | if (addr >= ei_last) | 1239 | size, align); |
1103 | continue; | 1240 | |
1104 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | 1241 | if (addr == -1ULL) |
1105 | ; | ||
1106 | last = addr + size; | ||
1107 | if (last > ei_last) | ||
1108 | continue; | ||
1109 | if (last > end) | ||
1110 | continue; | 1242 | continue; |
1243 | |||
1111 | return addr; | 1244 | return addr; |
1112 | } | 1245 | } |
1113 | return -1ULL; | 1246 | return -1ULL; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ea4141b48518..d49e168bda8c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -967,7 +967,9 @@ void __init setup_arch(char **cmdline_p) | |||
967 | #endif | 967 | #endif |
968 | 968 | ||
969 | initmem_init(0, max_pfn, acpi, k8); | 969 | initmem_init(0, max_pfn, acpi, k8); |
970 | #ifndef CONFIG_NO_BOOTMEM | ||
970 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | 971 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); |
972 | #endif | ||
971 | 973 | ||
972 | dma32_reserve_bootmem(); | 974 | dma32_reserve_bootmem(); |
973 | 975 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a15abaae5ba4..53158b7e5d46 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
573 | int acpi, int k8) | 573 | int acpi, int k8) |
574 | { | 574 | { |
575 | #ifndef CONFIG_NO_BOOTMEM | ||
575 | unsigned long bootmap_size, bootmap; | 576 | unsigned long bootmap_size, bootmap; |
576 | 577 | ||
577 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 578 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
@@ -585,6 +586,9 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
585 | 0, end_pfn); | 586 | 0, end_pfn); |
586 | e820_register_active_regions(0, start_pfn, end_pfn); | 587 | e820_register_active_regions(0, start_pfn, end_pfn); |
587 | free_bootmem_with_active_regions(0, end_pfn); | 588 | free_bootmem_with_active_regions(0, end_pfn); |
589 | #else | ||
590 | e820_register_active_regions(0, start_pfn, end_pfn); | ||
591 | #endif | ||
588 | } | 592 | } |
589 | #endif | 593 | #endif |
590 | 594 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 02f13cb99bc2..a20e17059afd 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -198,11 +198,13 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
198 | void __init | 198 | void __init |
199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
200 | { | 200 | { |
201 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
203 | unsigned long bootmap_start, nodedata_phys; | ||
204 | void *bootmap; | ||
205 | int nid; | 203 | int nid; |
204 | #ifndef CONFIG_NO_BOOTMEM | ||
205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
206 | void *bootmap; | ||
207 | #endif | ||
206 | 208 | ||
207 | if (!end) | 209 | if (!end) |
208 | return; | 210 | return; |
@@ -216,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
216 | 218 | ||
217 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
218 | 220 | ||
219 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
220 | start, end); | 222 | start, end); |
221 | 223 | ||
222 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
@@ -235,10 +237,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
235 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | 237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); |
236 | 238 | ||
237 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
238 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
239 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
240 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
241 | 243 | ||
244 | #ifndef CONFIG_NO_BOOTMEM | ||
245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
246 | |||
242 | /* | 247 | /* |
243 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
244 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
@@ -275,6 +280,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
275 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
276 | 281 | ||
277 | free_bootmem_with_active_regions(nodeid, end); | 282 | free_bootmem_with_active_regions(nodeid, end); |
283 | #endif | ||
278 | 284 | ||
279 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
280 | } | 286 | } |
@@ -733,6 +739,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
733 | for_each_online_node(i) | 739 | for_each_online_node(i) |
734 | pages += free_all_bootmem_node(NODE_DATA(i)); | 740 | pages += free_all_bootmem_node(NODE_DATA(i)); |
735 | 741 | ||
742 | #ifdef CONFIG_NO_BOOTMEM | ||
743 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
744 | #endif | ||
745 | |||
736 | return pages; | 746 | return pages; |
737 | } | 747 | } |
738 | 748 | ||