aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-02-26 20:35:44 -0500
committerH. Peter Anvin <hpa@zytor.com>2009-03-14 18:37:14 -0400
commit93dbda7cbcd70a0bd1a99f39f44a9ccde8ab9040 (patch)
tree132347cd952fb8281247c1fb38ec1c608c19a3f6
parentb9719a4d9cfa5d46fa6a1eb3e3fc2238e7981e4d (diff)
x86: add brk allocation for very, very early allocations
Impact: new interface Add a brk()-like allocator which effectively extends the bss in order to allow very early code to do dynamic allocations. This is better than using statically allocated arrays for data in subsystems which may never get used. The space for brk allocations is in the bss ELF segment, so that the space is mapped properly by the code which maps the kernel, and so that bootloaders keep the space free rather than putting a ramdisk or something into it. The bss itself, delimited by __bss_stop, ends before the brk area (__brk_base to __brk_limit). The kernel text, data and bss is reserved up to __bss_stop. Any brk-allocated data is reserved separately just before the kernel pagetable is built, as that code allocates from unreserved spaces in the e820 map, potentially allocating from any unused brk memory. Ultimately any unused memory in the brk area is used in the general kernel memory pool. Initially the brk space is set to 1MB, which is probably much larger than any user needs (the largest current user is i386 head_32.S's code to build the pagetables to map the kernel, which can get fairly large with a big kernel image and no PSE support). So long as the system has sufficient memory for the bootloader to reserve the kernel+1MB brk, there are no bad effects resulting from an over-large brk. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/include/asm/sections.h7
-rw-r--r--arch/x86/include/asm/setup.h4
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/setup.c39
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S7
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S5
-rw-r--r--arch/x86/mm/pageattr.c5
-rw-r--r--arch/x86/xen/mmu.c6
9 files changed, 65 insertions, 12 deletions
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 2b8c5160388f..1b7ee5d673c2 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -1 +1,8 @@
1#ifndef _ASM_X86_SECTIONS_H
2#define _ASM_X86_SECTIONS_H
3
1#include <asm-generic/sections.h> 4#include <asm-generic/sections.h>
5
6extern char __brk_base[], __brk_limit[];
7
8#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 05c6f6b11fd5..45454f3fa121 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,6 +100,10 @@ extern struct boot_params boot_params;
100 */ 100 */
101#define LOWMEMSIZE() (0x9f000) 101#define LOWMEMSIZE() (0x9f000)
102 102
103/* exceedingly early brk-like allocator */
104extern unsigned long _brk_end;
105void *extend_brk(size_t size, size_t align);
106
103#ifdef __i386__ 107#ifdef __i386__
104 108
105void __init i386_start_kernel(void); 109void __init i386_start_kernel(void);
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index ac108d1fe182..29f1095b0849 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
18{ 18{
19 reserve_trampoline_memory(); 19 reserve_trampoline_memory();
20 20
21 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 21 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
22 22
23#ifdef CONFIG_BLK_DEV_INITRD 23#ifdef CONFIG_BLK_DEV_INITRD
24 /* Reserve INITRD */ 24 /* Reserve INITRD */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index f5b272247690..70eaa852c732 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -100,7 +100,7 @@ void __init x86_64_start_reservations(char *real_mode_data)
100 100
101 reserve_trampoline_memory(); 101 reserve_trampoline_memory();
102 102
103 reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); 103 reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
104 104
105#ifdef CONFIG_BLK_DEV_INITRD 105#ifdef CONFIG_BLK_DEV_INITRD
106 /* Reserve INITRD */ 106 /* Reserve INITRD */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f28c56e6bf94..e6b742d2a3f5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -114,6 +114,9 @@
114 114
115unsigned int boot_cpu_id __read_mostly; 115unsigned int boot_cpu_id __read_mostly;
116 116
117static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
118unsigned long _brk_end = (unsigned long)__brk_base;
119
117#ifdef CONFIG_X86_64 120#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu) 121int default_cpu_present_to_apicid(int mps_cpu)
119{ 122{
@@ -337,6 +340,34 @@ static void __init relocate_initrd(void)
337} 340}
338#endif 341#endif
339 342
343void * __init extend_brk(size_t size, size_t align)
344{
345 size_t mask = align - 1;
346 void *ret;
347
348 BUG_ON(_brk_start == 0);
349 BUG_ON(align & mask);
350
351 _brk_end = (_brk_end + mask) & ~mask;
352 BUG_ON((char *)(_brk_end + size) > __brk_limit);
353
354 ret = (void *)_brk_end;
355 _brk_end += size;
356
357 memset(ret, 0, size);
358
359 return ret;
360}
361
362static void __init reserve_brk(void)
363{
364 if (_brk_end > _brk_start)
365 reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
366
367 /* Mark brk area as locked down and no longer taking any new allocations */
368 _brk_start = 0;
369}
370
340static void __init reserve_initrd(void) 371static void __init reserve_initrd(void)
341{ 372{
342 u64 ramdisk_image = boot_params.hdr.ramdisk_image; 373 u64 ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -717,11 +748,7 @@ void __init setup_arch(char **cmdline_p)
717 init_mm.start_code = (unsigned long) _text; 748 init_mm.start_code = (unsigned long) _text;
718 init_mm.end_code = (unsigned long) _etext; 749 init_mm.end_code = (unsigned long) _etext;
719 init_mm.end_data = (unsigned long) _edata; 750 init_mm.end_data = (unsigned long) _edata;
720#ifdef CONFIG_X86_32 751 init_mm.brk = _brk_end;
721 init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
722#else
723 init_mm.brk = (unsigned long) &_end;
724#endif
725 752
726 code_resource.start = virt_to_phys(_text); 753 code_resource.start = virt_to_phys(_text);
727 code_resource.end = virt_to_phys(_etext)-1; 754 code_resource.end = virt_to_phys(_etext)-1;
@@ -842,6 +869,8 @@ void __init setup_arch(char **cmdline_p)
842 setup_bios_corruption_check(); 869 setup_bios_corruption_check();
843#endif 870#endif
844 871
872 reserve_brk();
873
845 /* max_pfn_mapped is updated here */ 874 /* max_pfn_mapped is updated here */
846 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 875 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
847 max_pfn_mapped = max_low_pfn_mapped; 876 max_pfn_mapped = max_low_pfn_mapped;
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 0d860963f268..27e44aa21585 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -189,7 +189,14 @@ SECTIONS
189 *(.bss) 189 *(.bss)
190 . = ALIGN(4); 190 . = ALIGN(4);
191 __bss_stop = .; 191 __bss_stop = .;
192
193 . = ALIGN(PAGE_SIZE);
194 __brk_base = . ;
195 . += 1024 * 1024 ;
196 __brk_limit = . ;
197
192 _end = . ; 198 _end = . ;
199
193 /* This is where the kernel creates the early boot page tables */ 200 /* This is where the kernel creates the early boot page tables */
194 . = ALIGN(PAGE_SIZE); 201 . = ALIGN(PAGE_SIZE);
195 pg0 = . ; 202 pg0 = . ;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fe5d21ce7241..ff373423138c 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -247,6 +247,11 @@ SECTIONS
247 *(.bss.page_aligned) 247 *(.bss.page_aligned)
248 *(.bss) 248 *(.bss)
249 __bss_stop = .; 249 __bss_stop = .;
250
251 . = ALIGN(PAGE_SIZE);
252 __brk_base = . ;
253 . += 1024 * 1024 ;
254 __brk_limit = . ;
250 } 255 }
251 256
252 _end = . ; 257 _end = . ;
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 9c4294986af7..1280565670e4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -16,6 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
18#include <asm/sections.h> 18#include <asm/sections.h>
19#include <asm/setup.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
20#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
21#include <asm/proto.h> 22#include <asm/proto.h>
@@ -95,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
95 96
96static inline unsigned long highmap_end_pfn(void) 97static inline unsigned long highmap_end_pfn(void)
97{ 98{
98 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 99 return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
99} 100}
100 101
101#endif 102#endif
@@ -711,7 +712,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
711 * No need to redo, when the primary call touched the high 712 * No need to redo, when the primary call touched the high
712 * mapping already: 713 * mapping already:
713 */ 714 */
714 if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) 715 if (within(vaddr, (unsigned long) _text, _brk_end))
715 return 0; 716 return 0;
716 717
717 /* 718 /*
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index cb6afa4ec95c..72f6a76dbfb9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1723,9 +1723,9 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
1723{ 1723{
1724 pmd_t *kernel_pmd; 1724 pmd_t *kernel_pmd;
1725 1725
1726 init_pg_tables_start = __pa(pgd); 1726 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1727 init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; 1727 xen_start_info->nr_pt_frames * PAGE_SIZE +
1728 max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); 1728 512*1024);
1729 1729
1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); 1730 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); 1731 memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);