diff options
author | H. Peter Anvin <hpa@linux.intel.com> | 2013-02-15 12:25:08 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2013-02-15 12:25:08 -0500 |
commit | 0da3e7f526fde7a6522a3038b7ce609fc50f6707 (patch) | |
tree | c6e2dacd96fe7eac8312f3d7c22e0995dc423879 /arch | |
parent | 95c9608478d639dcffc14ea47b31bff021a99ed1 (diff) | |
parent | 68d00bbebb5a48b7a9056a8c03476a71ecbc30a6 (diff) |
Merge branch 'x86/mm2' into x86/mm
x86/mm2 is testing out fine, but has developed conflicts with x86/mm
due to patches in adjacent code. Merge them so we can drop x86/mm2
and have a unified branch.
Resolved Conflicts:
arch/x86/kernel/setup.c
Diffstat (limited to 'arch')
47 files changed, 1254 insertions, 900 deletions
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 41dd00884975..02f244475207 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c | |||
@@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void) | |||
317 | 317 | ||
318 | octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); | 318 | octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); |
319 | 319 | ||
320 | swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1); | 320 | if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM) |
321 | panic("Cannot allocate SWIOTLB buffer"); | ||
321 | 322 | ||
322 | mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; | 323 | mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; |
323 | } | 324 | } |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index c3b72423c846..fc5a7c4bd9e8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2021,6 +2021,16 @@ static void __init patch_tlb_miss_handler_bitmap(void) | |||
2021 | flushi(&valid_addr_bitmap_insn[0]); | 2021 | flushi(&valid_addr_bitmap_insn[0]); |
2022 | } | 2022 | } |
2023 | 2023 | ||
2024 | static void __init register_page_bootmem_info(void) | ||
2025 | { | ||
2026 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
2027 | int i; | ||
2028 | |||
2029 | for_each_online_node(i) | ||
2030 | if (NODE_DATA(i)->node_spanned_pages) | ||
2031 | register_page_bootmem_info_node(NODE_DATA(i)); | ||
2032 | #endif | ||
2033 | } | ||
2024 | void __init mem_init(void) | 2034 | void __init mem_init(void) |
2025 | { | 2035 | { |
2026 | unsigned long codepages, datapages, initpages; | 2036 | unsigned long codepages, datapages, initpages; |
@@ -2038,20 +2048,8 @@ void __init mem_init(void) | |||
2038 | 2048 | ||
2039 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); | 2049 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
2040 | 2050 | ||
2041 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 2051 | register_page_bootmem_info(); |
2042 | { | ||
2043 | int i; | ||
2044 | for_each_online_node(i) { | ||
2045 | if (NODE_DATA(i)->node_spanned_pages != 0) { | ||
2046 | totalram_pages += | ||
2047 | free_all_bootmem_node(NODE_DATA(i)); | ||
2048 | } | ||
2049 | } | ||
2050 | totalram_pages += free_low_memory_core_early(MAX_NUMNODES); | ||
2051 | } | ||
2052 | #else | ||
2053 | totalram_pages = free_all_bootmem(); | 2052 | totalram_pages = free_all_bootmem(); |
2054 | #endif | ||
2055 | 2053 | ||
2056 | /* We subtract one to account for the mem_map_zero page | 2054 | /* We subtract one to account for the mem_map_zero page |
2057 | * allocated below. | 2055 | * allocated below. |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 18997e5a1053..5b7531966b84 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -285,16 +285,26 @@ struct biosregs { | |||
285 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); | 285 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); |
286 | 286 | ||
287 | /* cmdline.c */ | 287 | /* cmdline.c */ |
288 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); | 288 | int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize); |
289 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); | 289 | int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option); |
290 | static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) | 290 | static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) |
291 | { | 291 | { |
292 | return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); | 292 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; |
293 | |||
294 | if (cmd_line_ptr >= 0x100000) | ||
295 | return -1; /* inaccessible */ | ||
296 | |||
297 | return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize); | ||
293 | } | 298 | } |
294 | 299 | ||
295 | static inline int cmdline_find_option_bool(const char *option) | 300 | static inline int cmdline_find_option_bool(const char *option) |
296 | { | 301 | { |
297 | return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); | 302 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; |
303 | |||
304 | if (cmd_line_ptr >= 0x100000) | ||
305 | return -1; /* inaccessible */ | ||
306 | |||
307 | return __cmdline_find_option_bool(cmd_line_ptr, option); | ||
298 | } | 308 | } |
299 | 309 | ||
300 | 310 | ||
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 6b3b6f708c04..625d21b0cd3f 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c | |||
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c) | |||
27 | * Returns the length of the argument (regardless of if it was | 27 | * Returns the length of the argument (regardless of if it was |
28 | * truncated to fit in the buffer), or -1 on not found. | 28 | * truncated to fit in the buffer), or -1 on not found. |
29 | */ | 29 | */ |
30 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) | 30 | int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize) |
31 | { | 31 | { |
32 | addr_t cptr; | 32 | addr_t cptr; |
33 | char c; | 33 | char c; |
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int | |||
41 | st_bufcpy /* Copying this to buffer */ | 41 | st_bufcpy /* Copying this to buffer */ |
42 | } state = st_wordstart; | 42 | } state = st_wordstart; |
43 | 43 | ||
44 | if (!cmdline_ptr || cmdline_ptr >= 0x100000) | 44 | if (!cmdline_ptr) |
45 | return -1; /* No command line, or inaccessible */ | 45 | return -1; /* No command line */ |
46 | 46 | ||
47 | cptr = cmdline_ptr & 0xf; | 47 | cptr = cmdline_ptr & 0xf; |
48 | set_fs(cmdline_ptr >> 4); | 48 | set_fs(cmdline_ptr >> 4); |
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int | |||
99 | * Returns the position of that option (starts counting with 1) | 99 | * Returns the position of that option (starts counting with 1) |
100 | * or 0 on not found | 100 | * or 0 on not found |
101 | */ | 101 | */ |
102 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) | 102 | int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) |
103 | { | 103 | { |
104 | addr_t cptr; | 104 | addr_t cptr; |
105 | char c; | 105 | char c; |
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) | |||
111 | st_wordskip, /* Miscompare, skip */ | 111 | st_wordskip, /* Miscompare, skip */ |
112 | } state = st_wordstart; | 112 | } state = st_wordstart; |
113 | 113 | ||
114 | if (!cmdline_ptr || cmdline_ptr >= 0x100000) | 114 | if (!cmdline_ptr) |
115 | return -1; /* No command line, or inaccessible */ | 115 | return -1; /* No command line */ |
116 | 116 | ||
117 | cptr = cmdline_ptr & 0xf; | 117 | cptr = cmdline_ptr & 0xf; |
118 | set_fs(cmdline_ptr >> 4); | 118 | set_fs(cmdline_ptr >> 4); |
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 10f6b1178c68..bffd73b45b1f 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr) | |||
13 | return *((char *)(fs + addr)); | 13 | return *((char *)(fs + addr)); |
14 | } | 14 | } |
15 | #include "../cmdline.c" | 15 | #include "../cmdline.c" |
16 | static unsigned long get_cmd_line_ptr(void) | ||
17 | { | ||
18 | unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; | ||
19 | |||
20 | cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; | ||
21 | |||
22 | return cmd_line_ptr; | ||
23 | } | ||
16 | int cmdline_find_option(const char *option, char *buffer, int bufsize) | 24 | int cmdline_find_option(const char *option, char *buffer, int bufsize) |
17 | { | 25 | { |
18 | return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); | 26 | return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize); |
19 | } | 27 | } |
20 | int cmdline_find_option_bool(const char *option) | 28 | int cmdline_find_option_bool(const char *option) |
21 | { | 29 | { |
22 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | 30 | return __cmdline_find_option_bool(get_cmd_line_ptr(), option); |
23 | } | 31 | } |
24 | 32 | ||
25 | #endif | 33 | #endif |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2c4b171eec33..d9ae9a4ffcb9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -37,6 +37,12 @@ | |||
37 | __HEAD | 37 | __HEAD |
38 | .code32 | 38 | .code32 |
39 | ENTRY(startup_32) | 39 | ENTRY(startup_32) |
40 | /* | ||
41 | * 32bit entry is 0 and it is ABI so immutable! | ||
42 | * If we come here directly from a bootloader, | ||
43 | * kernel(text+data+bss+brk) ramdisk, zero_page, command line | ||
44 | * all need to be under the 4G limit. | ||
45 | */ | ||
40 | cld | 46 | cld |
41 | /* | 47 | /* |
42 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 48 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
@@ -154,6 +160,12 @@ ENTRY(startup_32) | |||
154 | btsl $_EFER_LME, %eax | 160 | btsl $_EFER_LME, %eax |
155 | wrmsr | 161 | wrmsr |
156 | 162 | ||
163 | /* After gdt is loaded */ | ||
164 | xorl %eax, %eax | ||
165 | lldt %ax | ||
166 | movl $0x20, %eax | ||
167 | ltr %ax | ||
168 | |||
157 | /* | 169 | /* |
158 | * Setup for the jump to 64bit mode | 170 | * Setup for the jump to 64bit mode |
159 | * | 171 | * |
@@ -176,28 +188,18 @@ ENTRY(startup_32) | |||
176 | lret | 188 | lret |
177 | ENDPROC(startup_32) | 189 | ENDPROC(startup_32) |
178 | 190 | ||
179 | no_longmode: | ||
180 | /* This isn't an x86-64 CPU so hang */ | ||
181 | 1: | ||
182 | hlt | ||
183 | jmp 1b | ||
184 | |||
185 | #include "../../kernel/verify_cpu.S" | ||
186 | |||
187 | /* | ||
188 | * Be careful here startup_64 needs to be at a predictable | ||
189 | * address so I can export it in an ELF header. Bootloaders | ||
190 | * should look at the ELF header to find this address, as | ||
191 | * it may change in the future. | ||
192 | */ | ||
193 | .code64 | 191 | .code64 |
194 | .org 0x200 | 192 | .org 0x200 |
195 | ENTRY(startup_64) | 193 | ENTRY(startup_64) |
196 | /* | 194 | /* |
195 | * 64bit entry is 0x200 and it is ABI so immutable! | ||
197 | * We come here either from startup_32 or directly from a | 196 | * We come here either from startup_32 or directly from a |
198 | * 64bit bootloader. If we come here from a bootloader we depend on | 197 | * 64bit bootloader. |
199 | * an identity mapped page table being provied that maps our | 198 | * If we come here from a bootloader, kernel(text+data+bss+brk), |
200 | * entire text+data+bss and hopefully all of memory. | 199 | * ramdisk, zero_page, command line could be above 4G. |
200 | * We depend on an identity mapped page table being provided | ||
201 | * that maps our entire kernel(text+data+bss+brk), zero page | ||
202 | * and command line. | ||
201 | */ | 203 | */ |
202 | #ifdef CONFIG_EFI_STUB | 204 | #ifdef CONFIG_EFI_STUB |
203 | /* | 205 | /* |
@@ -247,9 +249,6 @@ preferred_addr: | |||
247 | movl %eax, %ss | 249 | movl %eax, %ss |
248 | movl %eax, %fs | 250 | movl %eax, %fs |
249 | movl %eax, %gs | 251 | movl %eax, %gs |
250 | lldt %ax | ||
251 | movl $0x20, %eax | ||
252 | ltr %ax | ||
253 | 252 | ||
254 | /* | 253 | /* |
255 | * Compute the decompressed kernel start address. It is where | 254 | * Compute the decompressed kernel start address. It is where |
@@ -349,6 +348,15 @@ relocated: | |||
349 | */ | 348 | */ |
350 | jmp *%rbp | 349 | jmp *%rbp |
351 | 350 | ||
351 | .code32 | ||
352 | no_longmode: | ||
353 | /* This isn't an x86-64 CPU so hang */ | ||
354 | 1: | ||
355 | hlt | ||
356 | jmp 1b | ||
357 | |||
358 | #include "../../kernel/verify_cpu.S" | ||
359 | |||
352 | .data | 360 | .data |
353 | gdt: | 361 | gdt: |
354 | .word gdt_end - gdt | 362 | .word gdt_end - gdt |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 88f7ff6da404..7cb56c6ca351 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
325 | { | 325 | { |
326 | real_mode = rmode; | 326 | real_mode = rmode; |
327 | 327 | ||
328 | sanitize_boot_params(real_mode); | ||
329 | |||
328 | if (real_mode->screen_info.orig_video_mode == 7) { | 330 | if (real_mode->screen_info.orig_video_mode == 7) { |
329 | vidmem = (char *) 0xb0000; | 331 | vidmem = (char *) 0xb0000; |
330 | vidport = 0x3b4; | 332 | vidport = 0x3b4; |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0e6dc0ee0eea..674019d8e235 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/page.h> | 18 | #include <asm/page.h> |
19 | #include <asm/boot.h> | 19 | #include <asm/boot.h> |
20 | #include <asm/bootparam.h> | 20 | #include <asm/bootparam.h> |
21 | #include <asm/bootparam_utils.h> | ||
21 | 22 | ||
22 | #define BOOT_BOOT_H | 23 | #define BOOT_BOOT_H |
23 | #include "../ctype.h" | 24 | #include "../ctype.h" |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 8c132a625b94..9ec06a1f6d61 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/e820.h> | 21 | #include <asm/e820.h> |
22 | #include <asm/page_types.h> | 22 | #include <asm/page_types.h> |
23 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
24 | #include <asm/bootparam.h> | ||
24 | #include "boot.h" | 25 | #include "boot.h" |
25 | #include "voffset.h" | 26 | #include "voffset.h" |
26 | #include "zoffset.h" | 27 | #include "zoffset.h" |
@@ -255,6 +256,9 @@ section_table: | |||
255 | # header, from the old boot sector. | 256 | # header, from the old boot sector. |
256 | 257 | ||
257 | .section ".header", "a" | 258 | .section ".header", "a" |
259 | .globl sentinel | ||
260 | sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */ | ||
261 | |||
258 | .globl hdr | 262 | .globl hdr |
259 | hdr: | 263 | hdr: |
260 | setup_sects: .byte 0 /* Filled in by build.c */ | 264 | setup_sects: .byte 0 /* Filled in by build.c */ |
@@ -279,7 +283,7 @@ _start: | |||
279 | # Part 2 of the header, from the old setup.S | 283 | # Part 2 of the header, from the old setup.S |
280 | 284 | ||
281 | .ascii "HdrS" # header signature | 285 | .ascii "HdrS" # header signature |
282 | .word 0x020b # header version number (>= 0x0105) | 286 | .word 0x020c # header version number (>= 0x0105) |
283 | # or else old loadlin-1.5 will fail) | 287 | # or else old loadlin-1.5 will fail) |
284 | .globl realmode_swtch | 288 | .globl realmode_swtch |
285 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -297,13 +301,7 @@ type_of_loader: .byte 0 # 0 means ancient bootloader, newer | |||
297 | 301 | ||
298 | # flags, unused bits must be zero (RFU) bit within loadflags | 302 | # flags, unused bits must be zero (RFU) bit within loadflags |
299 | loadflags: | 303 | loadflags: |
300 | LOADED_HIGH = 1 # If set, the kernel is loaded high | 304 | .byte LOADED_HIGH # The kernel is to be loaded high |
301 | CAN_USE_HEAP = 0x80 # If set, the loader also has set | ||
302 | # heap_end_ptr to tell how much | ||
303 | # space behind setup.S can be used for | ||
304 | # heap purposes. | ||
305 | # Only the loader knows what is free | ||
306 | .byte LOADED_HIGH | ||
307 | 305 | ||
308 | setup_move_size: .word 0x8000 # size to move, when setup is not | 306 | setup_move_size: .word 0x8000 # size to move, when setup is not |
309 | # loaded at 0x90000. We will move setup | 307 | # loaded at 0x90000. We will move setup |
@@ -369,7 +367,31 @@ relocatable_kernel: .byte 1 | |||
369 | relocatable_kernel: .byte 0 | 367 | relocatable_kernel: .byte 0 |
370 | #endif | 368 | #endif |
371 | min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment | 369 | min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment |
372 | pad3: .word 0 | 370 | |
371 | xloadflags: | ||
372 | #ifdef CONFIG_X86_64 | ||
373 | # define XLF0 XLF_KERNEL_64 /* 64-bit kernel */ | ||
374 | #else | ||
375 | # define XLF0 0 | ||
376 | #endif | ||
377 | |||
378 | #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) | ||
379 | /* kernel/boot_param/ramdisk could be loaded above 4g */ | ||
380 | # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G | ||
381 | #else | ||
382 | # define XLF1 0 | ||
383 | #endif | ||
384 | |||
385 | #ifdef CONFIG_EFI_STUB | ||
386 | # ifdef CONFIG_X86_64 | ||
387 | # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ | ||
388 | # else | ||
389 | # define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ | ||
390 | # endif | ||
391 | #else | ||
392 | # define XLF23 0 | ||
393 | #endif | ||
394 | .word XLF0 | XLF1 | XLF23 | ||
373 | 395 | ||
374 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 396 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
375 | #added with boot protocol | 397 | #added with boot protocol |
@@ -397,8 +419,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | |||
397 | #define INIT_SIZE VO_INIT_SIZE | 419 | #define INIT_SIZE VO_INIT_SIZE |
398 | #endif | 420 | #endif |
399 | init_size: .long INIT_SIZE # kernel initialization size | 421 | init_size: .long INIT_SIZE # kernel initialization size |
400 | handover_offset: .long 0x30 # offset to the handover | 422 | handover_offset: |
423 | #ifdef CONFIG_EFI_STUB | ||
424 | .long 0x30 # offset to the handover | ||
401 | # protocol entry point | 425 | # protocol entry point |
426 | #else | ||
427 | .long 0 | ||
428 | #endif | ||
402 | 429 | ||
403 | # End of setup header ##################################################### | 430 | # End of setup header ##################################################### |
404 | 431 | ||
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 03c0683636b6..96a6c7563538 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld | |||
@@ -13,7 +13,7 @@ SECTIONS | |||
13 | .bstext : { *(.bstext) } | 13 | .bstext : { *(.bstext) } |
14 | .bsdata : { *(.bsdata) } | 14 | .bsdata : { *(.bsdata) } |
15 | 15 | ||
16 | . = 497; | 16 | . = 495; |
17 | .header : { *(.header) } | 17 | .header : { *(.header) } |
18 | .entrytext : { *(.entrytext) } | 18 | .entrytext : { *(.entrytext) } |
19 | .inittext : { *(.inittext) } | 19 | .inittext : { *(.inittext) } |
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h new file mode 100644 index 000000000000..5b5e9cb774b5 --- /dev/null +++ b/arch/x86/include/asm/bootparam_utils.h | |||
@@ -0,0 +1,38 @@ | |||
1 | #ifndef _ASM_X86_BOOTPARAM_UTILS_H | ||
2 | #define _ASM_X86_BOOTPARAM_UTILS_H | ||
3 | |||
4 | #include <asm/bootparam.h> | ||
5 | |||
6 | /* | ||
7 | * This file is included from multiple environments. Do not | ||
8 | * add completing #includes to make it standalone. | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * Deal with bootloaders which fail to initialize unknown fields in | ||
13 | * boot_params to zero. The list fields in this list are taken from | ||
14 | * analysis of kexec-tools; if other broken bootloaders initialize a | ||
15 | * different set of fields we will need to figure out how to disambiguate. | ||
16 | * | ||
17 | */ | ||
18 | static void sanitize_boot_params(struct boot_params *boot_params) | ||
19 | { | ||
20 | if (boot_params->sentinel) { | ||
21 | /*fields in boot_params are not valid, clear them */ | ||
22 | memset(&boot_params->olpc_ofw_header, 0, | ||
23 | (char *)&boot_params->alt_mem_k - | ||
24 | (char *)&boot_params->olpc_ofw_header); | ||
25 | memset(&boot_params->kbd_status, 0, | ||
26 | (char *)&boot_params->hdr - | ||
27 | (char *)&boot_params->kbd_status); | ||
28 | memset(&boot_params->_pad7[0], 0, | ||
29 | (char *)&boot_params->edd_mbr_sig_buffer[0] - | ||
30 | (char *)&boot_params->_pad7[0]); | ||
31 | memset(&boot_params->_pad8[0], 0, | ||
32 | (char *)&boot_params->eddbuf[0] - | ||
33 | (char *)&boot_params->_pad8[0]); | ||
34 | memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | #endif /* _ASM_X86_BOOTPARAM_UTILS_H */ | ||
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index adcc0ae73d09..223042086f4e 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -1,20 +1,14 @@ | |||
1 | #ifndef _ASM_X86_INIT_32_H | 1 | #ifndef _ASM_X86_INIT_H |
2 | #define _ASM_X86_INIT_32_H | 2 | #define _ASM_X86_INIT_H |
3 | 3 | ||
4 | #ifdef CONFIG_X86_32 | 4 | struct x86_mapping_info { |
5 | extern void __init early_ioremap_page_table_range_init(void); | 5 | void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ |
6 | #endif | 6 | void *context; /* context for alloc_pgt_page */ |
7 | unsigned long pmd_flag; /* page flag for PMD entry */ | ||
8 | bool kernel_mapping; /* kernel mapping or ident mapping */ | ||
9 | }; | ||
7 | 10 | ||
8 | extern void __init zone_sizes_init(void); | 11 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, |
12 | unsigned long addr, unsigned long end); | ||
9 | 13 | ||
10 | extern unsigned long __init | 14 | #endif /* _ASM_X86_INIT_H */ |
11 | kernel_physical_mapping_init(unsigned long start, | ||
12 | unsigned long end, | ||
13 | unsigned long page_size_mask); | ||
14 | |||
15 | |||
16 | extern unsigned long __initdata pgt_buf_start; | ||
17 | extern unsigned long __meminitdata pgt_buf_end; | ||
18 | extern unsigned long __meminitdata pgt_buf_top; | ||
19 | |||
20 | #endif /* _ASM_X86_INIT_32_H */ | ||
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6080d2694bad..17483a492f18 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -48,11 +48,11 @@ | |||
48 | # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) | 48 | # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) |
49 | #else | 49 | #else |
50 | /* Maximum physical address we can use pages from */ | 50 | /* Maximum physical address we can use pages from */ |
51 | # define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 51 | # define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1) |
52 | /* Maximum address we can reach in physical address mode */ | 52 | /* Maximum address we can reach in physical address mode */ |
53 | # define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 53 | # define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1) |
54 | /* Maximum address we can use for the control pages */ | 54 | /* Maximum address we can use for the control pages */ |
55 | # define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 55 | # define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) |
56 | 56 | ||
57 | /* Allocate one page for the pdp and the second for the code */ | 57 | /* Allocate one page for the pdp and the second for the code */ |
58 | # define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) | 58 | # define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 49119fcea2dc..52560a2038e1 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -54,8 +54,6 @@ static inline int numa_cpu_node(int cpu) | |||
54 | 54 | ||
55 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
56 | # include <asm/numa_32.h> | 56 | # include <asm/numa_32.h> |
57 | #else | ||
58 | # include <asm/numa_64.h> | ||
59 | #endif | 57 | #endif |
60 | 58 | ||
61 | #ifdef CONFIG_NUMA | 59 | #ifdef CONFIG_NUMA |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h deleted file mode 100644 index 0c05f7ae46e8..000000000000 --- a/arch/x86/include/asm/numa_64.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _ASM_X86_NUMA_64_H | ||
2 | #define _ASM_X86_NUMA_64_H | ||
3 | |||
4 | extern unsigned long numa_free_all_bootmem(void); | ||
5 | |||
6 | #endif /* _ASM_X86_NUMA_64_H */ | ||
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 3698a6a0a940..c87892442e53 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -17,6 +17,10 @@ | |||
17 | 17 | ||
18 | struct page; | 18 | struct page; |
19 | 19 | ||
20 | #include <linux/range.h> | ||
21 | extern struct range pfn_mapped[]; | ||
22 | extern int nr_pfn_mapped; | ||
23 | |||
20 | static inline void clear_user_page(void *page, unsigned long vaddr, | 24 | static inline void clear_user_page(void *page, unsigned long vaddr, |
21 | struct page *pg) | 25 | struct page *pg) |
22 | { | 26 | { |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e21fdd10479f..54c97879195e 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void) | |||
51 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; | 51 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; |
52 | } | 52 | } |
53 | 53 | ||
54 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); | ||
55 | |||
54 | extern unsigned long init_memory_mapping(unsigned long start, | 56 | extern unsigned long init_memory_mapping(unsigned long start, |
55 | unsigned long end); | 57 | unsigned long end); |
56 | 58 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bc28e6fe7052..b6e41b8cd659 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -616,6 +616,8 @@ static inline int pgd_none(pgd_t pgd) | |||
616 | #ifndef __ASSEMBLY__ | 616 | #ifndef __ASSEMBLY__ |
617 | 617 | ||
618 | extern int direct_gbpages; | 618 | extern int direct_gbpages; |
619 | void init_mem_mapping(void); | ||
620 | void early_alloc_pgt_buf(void); | ||
619 | 621 | ||
620 | /* local pte updates need not use xchg for locking */ | 622 | /* local pte updates need not use xchg for locking */ |
621 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | 623 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 766ea16fbbbd..2d883440cb9a 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_PGTABLE_64_DEFS_H | 1 | #ifndef _ASM_X86_PGTABLE_64_DEFS_H |
2 | #define _ASM_X86_PGTABLE_64_DEFS_H | 2 | #define _ASM_X86_PGTABLE_64_DEFS_H |
3 | 3 | ||
4 | #include <asm/sparsemem.h> | ||
5 | |||
4 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
5 | #include <linux/types.h> | 7 | #include <linux/types.h> |
6 | 8 | ||
@@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t; | |||
60 | #define MODULES_END _AC(0xffffffffff000000, UL) | 62 | #define MODULES_END _AC(0xffffffffff000000, UL) |
61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
62 | 64 | ||
65 | #define EARLY_DYNAMIC_PAGE_TABLES 64 | ||
66 | |||
63 | #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ | 67 | #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 9f82690f81ed..e6423002c10b 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
321 | /* Install a pte for a particular vaddr in kernel space. */ | 321 | /* Install a pte for a particular vaddr in kernel space. */ |
322 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); | 322 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); |
323 | 323 | ||
324 | extern void native_pagetable_reserve(u64 start, u64 end); | ||
325 | #ifdef CONFIG_X86_32 | 324 | #ifdef CONFIG_X86_32 |
326 | extern void native_pagetable_init(void); | 325 | extern void native_pagetable_init(void); |
327 | #else | 326 | #else |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 888184b2fc85..bdee8bd318ea 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -731,6 +731,7 @@ extern void enable_sep_cpu(void); | |||
731 | extern int sysenter_setup(void); | 731 | extern int sysenter_setup(void); |
732 | 732 | ||
733 | extern void early_trap_init(void); | 733 | extern void early_trap_init(void); |
734 | void early_trap_pf_init(void); | ||
734 | 735 | ||
735 | /* Defined in head.S */ | 736 | /* Defined in head.S */ |
736 | extern struct desc_ptr early_gdt_descr; | 737 | extern struct desc_ptr early_gdt_descr; |
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fe1ec5bcd846..9c6b890d5e7a 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -58,6 +58,7 @@ extern unsigned char boot_gdt[]; | |||
58 | extern unsigned char secondary_startup_64[]; | 58 | extern unsigned char secondary_startup_64[]; |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | extern void __init setup_real_mode(void); | 61 | void reserve_real_mode(void); |
62 | void setup_real_mode(void); | ||
62 | 63 | ||
63 | #endif /* _ARCH_X86_REALMODE_H */ | 64 | #endif /* _ARCH_X86_REALMODE_H */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 57693498519c..3b2ce8fc995a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -69,17 +69,6 @@ struct x86_init_oem { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | /** | 71 | /** |
72 | * struct x86_init_mapping - platform specific initial kernel pagetable setup | ||
73 | * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage | ||
74 | * | ||
75 | * For more details on the purpose of this hook, look in | ||
76 | * init_memory_mapping and the commit that added it. | ||
77 | */ | ||
78 | struct x86_init_mapping { | ||
79 | void (*pagetable_reserve)(u64 start, u64 end); | ||
80 | }; | ||
81 | |||
82 | /** | ||
83 | * struct x86_init_paging - platform specific paging functions | 72 | * struct x86_init_paging - platform specific paging functions |
84 | * @pagetable_init: platform specific paging initialization call to setup | 73 | * @pagetable_init: platform specific paging initialization call to setup |
85 | * the kernel pagetables and prepare accessors functions. | 74 | * the kernel pagetables and prepare accessors functions. |
@@ -136,7 +125,6 @@ struct x86_init_ops { | |||
136 | struct x86_init_mpparse mpparse; | 125 | struct x86_init_mpparse mpparse; |
137 | struct x86_init_irqs irqs; | 126 | struct x86_init_irqs irqs; |
138 | struct x86_init_oem oem; | 127 | struct x86_init_oem oem; |
139 | struct x86_init_mapping mapping; | ||
140 | struct x86_init_paging paging; | 128 | struct x86_init_paging paging; |
141 | struct x86_init_timers timers; | 129 | struct x86_init_timers timers; |
142 | struct x86_init_iommu iommu; | 130 | struct x86_init_iommu iommu; |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 92862cd90201..c15ddaf90710 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -1,6 +1,31 @@ | |||
1 | #ifndef _ASM_X86_BOOTPARAM_H | 1 | #ifndef _ASM_X86_BOOTPARAM_H |
2 | #define _ASM_X86_BOOTPARAM_H | 2 | #define _ASM_X86_BOOTPARAM_H |
3 | 3 | ||
4 | /* setup_data types */ | ||
5 | #define SETUP_NONE 0 | ||
6 | #define SETUP_E820_EXT 1 | ||
7 | #define SETUP_DTB 2 | ||
8 | #define SETUP_PCI 3 | ||
9 | |||
10 | /* ram_size flags */ | ||
11 | #define RAMDISK_IMAGE_START_MASK 0x07FF | ||
12 | #define RAMDISK_PROMPT_FLAG 0x8000 | ||
13 | #define RAMDISK_LOAD_FLAG 0x4000 | ||
14 | |||
15 | /* loadflags */ | ||
16 | #define LOADED_HIGH (1<<0) | ||
17 | #define QUIET_FLAG (1<<5) | ||
18 | #define KEEP_SEGMENTS (1<<6) | ||
19 | #define CAN_USE_HEAP (1<<7) | ||
20 | |||
21 | /* xloadflags */ | ||
22 | #define XLF_KERNEL_64 (1<<0) | ||
23 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) | ||
24 | #define XLF_EFI_HANDOVER_32 (1<<2) | ||
25 | #define XLF_EFI_HANDOVER_64 (1<<3) | ||
26 | |||
27 | #ifndef __ASSEMBLY__ | ||
28 | |||
4 | #include <linux/types.h> | 29 | #include <linux/types.h> |
5 | #include <linux/screen_info.h> | 30 | #include <linux/screen_info.h> |
6 | #include <linux/apm_bios.h> | 31 | #include <linux/apm_bios.h> |
@@ -9,12 +34,6 @@ | |||
9 | #include <asm/ist.h> | 34 | #include <asm/ist.h> |
10 | #include <video/edid.h> | 35 | #include <video/edid.h> |
11 | 36 | ||
12 | /* setup data types */ | ||
13 | #define SETUP_NONE 0 | ||
14 | #define SETUP_E820_EXT 1 | ||
15 | #define SETUP_DTB 2 | ||
16 | #define SETUP_PCI 3 | ||
17 | |||
18 | /* extensible setup data list node */ | 37 | /* extensible setup data list node */ |
19 | struct setup_data { | 38 | struct setup_data { |
20 | __u64 next; | 39 | __u64 next; |
@@ -28,9 +47,6 @@ struct setup_header { | |||
28 | __u16 root_flags; | 47 | __u16 root_flags; |
29 | __u32 syssize; | 48 | __u32 syssize; |
30 | __u16 ram_size; | 49 | __u16 ram_size; |
31 | #define RAMDISK_IMAGE_START_MASK 0x07FF | ||
32 | #define RAMDISK_PROMPT_FLAG 0x8000 | ||
33 | #define RAMDISK_LOAD_FLAG 0x4000 | ||
34 | __u16 vid_mode; | 50 | __u16 vid_mode; |
35 | __u16 root_dev; | 51 | __u16 root_dev; |
36 | __u16 boot_flag; | 52 | __u16 boot_flag; |
@@ -42,10 +58,6 @@ struct setup_header { | |||
42 | __u16 kernel_version; | 58 | __u16 kernel_version; |
43 | __u8 type_of_loader; | 59 | __u8 type_of_loader; |
44 | __u8 loadflags; | 60 | __u8 loadflags; |
45 | #define LOADED_HIGH (1<<0) | ||
46 | #define QUIET_FLAG (1<<5) | ||
47 | #define KEEP_SEGMENTS (1<<6) | ||
48 | #define CAN_USE_HEAP (1<<7) | ||
49 | __u16 setup_move_size; | 61 | __u16 setup_move_size; |
50 | __u32 code32_start; | 62 | __u32 code32_start; |
51 | __u32 ramdisk_image; | 63 | __u32 ramdisk_image; |
@@ -58,7 +70,8 @@ struct setup_header { | |||
58 | __u32 initrd_addr_max; | 70 | __u32 initrd_addr_max; |
59 | __u32 kernel_alignment; | 71 | __u32 kernel_alignment; |
60 | __u8 relocatable_kernel; | 72 | __u8 relocatable_kernel; |
61 | __u8 _pad2[3]; | 73 | __u8 min_alignment; |
74 | __u16 xloadflags; | ||
62 | __u32 cmdline_size; | 75 | __u32 cmdline_size; |
63 | __u32 hardware_subarch; | 76 | __u32 hardware_subarch; |
64 | __u64 hardware_subarch_data; | 77 | __u64 hardware_subarch_data; |
@@ -106,7 +119,10 @@ struct boot_params { | |||
106 | __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ | 119 | __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ |
107 | struct sys_desc_table sys_desc_table; /* 0x0a0 */ | 120 | struct sys_desc_table sys_desc_table; /* 0x0a0 */ |
108 | struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ | 121 | struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */ |
109 | __u8 _pad4[128]; /* 0x0c0 */ | 122 | __u32 ext_ramdisk_image; /* 0x0c0 */ |
123 | __u32 ext_ramdisk_size; /* 0x0c4 */ | ||
124 | __u32 ext_cmd_line_ptr; /* 0x0c8 */ | ||
125 | __u8 _pad4[116]; /* 0x0cc */ | ||
110 | struct edid_info edid_info; /* 0x140 */ | 126 | struct edid_info edid_info; /* 0x140 */ |
111 | struct efi_info efi_info; /* 0x1c0 */ | 127 | struct efi_info efi_info; /* 0x1c0 */ |
112 | __u32 alt_mem_k; /* 0x1e0 */ | 128 | __u32 alt_mem_k; /* 0x1e0 */ |
@@ -115,7 +131,20 @@ struct boot_params { | |||
115 | __u8 eddbuf_entries; /* 0x1e9 */ | 131 | __u8 eddbuf_entries; /* 0x1e9 */ |
116 | __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ | 132 | __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ |
117 | __u8 kbd_status; /* 0x1eb */ | 133 | __u8 kbd_status; /* 0x1eb */ |
118 | __u8 _pad6[5]; /* 0x1ec */ | 134 | __u8 _pad5[3]; /* 0x1ec */ |
135 | /* | ||
136 | * The sentinel is set to a nonzero value (0xff) in header.S. | ||
137 | * | ||
138 | * A bootloader is supposed to only take setup_header and put | ||
139 | * it into a clean boot_params buffer. If it turns out that | ||
140 | * it is clumsy or too generous with the buffer, it most | ||
141 | * probably will pick up the sentinel variable too. The fact | ||
142 | * that this variable then is still 0xff will let kernel | ||
143 | * know that some variables in boot_params are invalid and | ||
144 | * kernel should zero out certain portions of boot_params. | ||
145 | */ | ||
146 | __u8 sentinel; /* 0x1ef */ | ||
147 | __u8 _pad6[1]; /* 0x1f0 */ | ||
119 | struct setup_header hdr; /* setup header */ /* 0x1f1 */ | 148 | struct setup_header hdr; /* setup header */ /* 0x1f1 */ |
120 | __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; | 149 | __u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)]; |
121 | __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ | 150 | __u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */ |
@@ -134,6 +163,6 @@ enum { | |||
134 | X86_NR_SUBARCHS, | 163 | X86_NR_SUBARCHS, |
135 | }; | 164 | }; |
136 | 165 | ||
137 | 166 | #endif /* __ASSEMBLY__ */ | |
138 | 167 | ||
139 | #endif /* _ASM_X86_BOOTPARAM_H */ | 168 | #endif /* _ASM_X86_BOOTPARAM_H */ |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bacf4b0d91f4..cfc755dc1607 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
51 | 51 | ||
52 | #ifdef CONFIG_X86_64 | 52 | #ifdef CONFIG_X86_64 |
53 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
54 | # include <asm/numa_64.h> | ||
55 | #endif /* X86 */ | 54 | #endif /* X86 */ |
56 | 55 | ||
57 | #define BAD_MADT_ENTRY(entry, end) ( \ | 56 | #define BAD_MADT_ENTRY(entry, end) ( \ |
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e66311200cbd..b574b295a2f9 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void) | |||
768 | aper_base = info.aper_base; | 768 | aper_base = info.aper_base; |
769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
770 | 770 | ||
771 | if (end_pfn > max_low_pfn_mapped) { | 771 | start_pfn = PFN_DOWN(aper_base); |
772 | start_pfn = (aper_base>>PAGE_SHIFT); | 772 | if (!pfn_range_is_mapped(start_pfn, end_pfn)) |
773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
774 | } | ||
775 | 774 | ||
776 | pr_info("PCI-DMA: using GART IOMMU.\n"); | 775 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
777 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 776 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239fffd6fe..eafb084e80f8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
13 | 13 | ||
14 | #ifdef CONFIG_X86_64 | 14 | #ifdef CONFIG_X86_64 |
15 | # include <asm/numa_64.h> | ||
16 | # include <asm/mmconfig.h> | 15 | # include <asm/mmconfig.h> |
17 | # include <asm/cacheflush.h> | 16 | # include <asm/cacheflush.h> |
18 | #endif | 17 | #endif |
@@ -685,12 +684,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
685 | * benefit in doing so. | 684 | * benefit in doing so. |
686 | */ | 685 | */ |
687 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | 686 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
687 | unsigned long pfn = tseg >> PAGE_SHIFT; | ||
688 | |||
688 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | 689 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
689 | if ((tseg>>PMD_SHIFT) < | 690 | if (pfn_range_is_mapped(pfn, pfn + 1)) |
690 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
691 | ((tseg>>PMD_SHIFT) < | ||
692 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
693 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
694 | set_memory_4k((unsigned long)__va(tseg), 1); | 691 | set_memory_4k((unsigned long)__va(tseg), 1); |
695 | } | 692 | } |
696 | } | 693 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fdfefa27b948..1905ce98bee0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -17,7 +17,6 @@ | |||
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
19 | #include <linux/topology.h> | 19 | #include <linux/topology.h> |
20 | #include <asm/numa_64.h> | ||
21 | #endif | 20 | #endif |
22 | 21 | ||
23 | #include "cpu.h" | 22 | #include "cpu.h" |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index df06ade26bef..d32abeabbda5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p) | |||
835 | } | 835 | } |
836 | early_param("mem", parse_memopt); | 836 | early_param("mem", parse_memopt); |
837 | 837 | ||
838 | static int __init parse_memmap_opt(char *p) | 838 | static int __init parse_memmap_one(char *p) |
839 | { | 839 | { |
840 | char *oldp; | 840 | char *oldp; |
841 | u64 start_at, mem_size; | 841 | u64 start_at, mem_size; |
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p) | |||
877 | 877 | ||
878 | return *p == '\0' ? 0 : -EINVAL; | 878 | return *p == '\0' ? 0 : -EINVAL; |
879 | } | 879 | } |
880 | static int __init parse_memmap_opt(char *str) | ||
881 | { | ||
882 | while (str) { | ||
883 | char *k = strchr(str, ','); | ||
884 | |||
885 | if (k) | ||
886 | *k++ = 0; | ||
887 | |||
888 | parse_memmap_one(str); | ||
889 | str = k; | ||
890 | } | ||
891 | |||
892 | return 0; | ||
893 | } | ||
880 | early_param("memmap", parse_memmap_opt); | 894 | early_param("memmap", parse_memmap_opt); |
881 | 895 | ||
882 | void __init finish_e820_parsing(void) | 896 | void __init finish_e820_parsing(void) |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index e17554832991..138463a24877 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
19 | #include <asm/bios_ebda.h> | 19 | #include <asm/bios_ebda.h> |
20 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
21 | #include <asm/bootparam_utils.h> | ||
21 | 22 | ||
22 | static void __init i386_default_early_setup(void) | 23 | static void __init i386_default_early_setup(void) |
23 | { | 24 | { |
@@ -30,19 +31,7 @@ static void __init i386_default_early_setup(void) | |||
30 | 31 | ||
31 | void __init i386_start_kernel(void) | 32 | void __init i386_start_kernel(void) |
32 | { | 33 | { |
33 | memblock_reserve(__pa_symbol(_text), | 34 | sanitize_boot_params(&boot_params); |
34 | (unsigned long)__bss_stop - (unsigned long)_text); | ||
35 | |||
36 | #ifdef CONFIG_BLK_DEV_INITRD | ||
37 | /* Reserve INITRD */ | ||
38 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
39 | /* Assume only end is not page aligned */ | ||
40 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
41 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
42 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
43 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
44 | } | ||
45 | #endif | ||
46 | 35 | ||
47 | /* Call the subarch specific early setup function */ | 36 | /* Call the subarch specific early setup function */ |
48 | switch (boot_params.hdr.hardware_subarch) { | 37 | switch (boot_params.hdr.hardware_subarch) { |
@@ -57,11 +46,5 @@ void __init i386_start_kernel(void) | |||
57 | break; | 46 | break; |
58 | } | 47 | } |
59 | 48 | ||
60 | /* | ||
61 | * At this point everything still needed from the boot loader | ||
62 | * or BIOS or kernel text should be early reserved or marked not | ||
63 | * RAM in e820. All other memory is free game. | ||
64 | */ | ||
65 | |||
66 | start_kernel(); | 49 | start_kernel(); |
67 | } | 50 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7b215a50ec1e..57334f4cd3af 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -25,12 +25,83 @@ | |||
25 | #include <asm/kdebug.h> | 25 | #include <asm/kdebug.h> |
26 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/bootparam_utils.h> | ||
28 | 29 | ||
29 | static void __init zap_identity_mappings(void) | 30 | /* |
31 | * Manage page tables very early on. | ||
32 | */ | ||
33 | extern pgd_t early_level4_pgt[PTRS_PER_PGD]; | ||
34 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | ||
35 | static unsigned int __initdata next_early_pgt = 2; | ||
36 | |||
37 | /* Wipe all early page tables except for the kernel symbol map */ | ||
38 | static void __init reset_early_page_tables(void) | ||
39 | { | ||
40 | unsigned long i; | ||
41 | |||
42 | for (i = 0; i < PTRS_PER_PGD-1; i++) | ||
43 | early_level4_pgt[i].pgd = 0; | ||
44 | |||
45 | next_early_pgt = 0; | ||
46 | |||
47 | write_cr3(__pa(early_level4_pgt)); | ||
48 | } | ||
49 | |||
50 | /* Create a new PMD entry */ | ||
51 | int __init early_make_pgtable(unsigned long address) | ||
30 | { | 52 | { |
31 | pgd_t *pgd = pgd_offset_k(0UL); | 53 | unsigned long physaddr = address - __PAGE_OFFSET; |
32 | pgd_clear(pgd); | 54 | unsigned long i; |
33 | __flush_tlb_all(); | 55 | pgdval_t pgd, *pgd_p; |
56 | pudval_t pud, *pud_p; | ||
57 | pmdval_t pmd, *pmd_p; | ||
58 | |||
59 | /* Invalid address or early pgt is done ? */ | ||
60 | if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) | ||
61 | return -1; | ||
62 | |||
63 | again: | ||
64 | pgd_p = &early_level4_pgt[pgd_index(address)].pgd; | ||
65 | pgd = *pgd_p; | ||
66 | |||
67 | /* | ||
68 | * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is | ||
69 | * critical -- __PAGE_OFFSET would point us back into the dynamic | ||
70 | * range and we might end up looping forever... | ||
71 | */ | ||
72 | if (pgd) | ||
73 | pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
74 | else { | ||
75 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
76 | reset_early_page_tables(); | ||
77 | goto again; | ||
78 | } | ||
79 | |||
80 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
81 | for (i = 0; i < PTRS_PER_PUD; i++) | ||
82 | pud_p[i] = 0; | ||
83 | *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
84 | } | ||
85 | pud_p += pud_index(address); | ||
86 | pud = *pud_p; | ||
87 | |||
88 | if (pud) | ||
89 | pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
90 | else { | ||
91 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
92 | reset_early_page_tables(); | ||
93 | goto again; | ||
94 | } | ||
95 | |||
96 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
97 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
98 | pmd_p[i] = 0; | ||
99 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
100 | } | ||
101 | pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); | ||
102 | pmd_p[pmd_index(address)] = pmd; | ||
103 | |||
104 | return 0; | ||
34 | } | 105 | } |
35 | 106 | ||
36 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 107 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
@@ -41,13 +112,25 @@ static void __init clear_bss(void) | |||
41 | (unsigned long) __bss_stop - (unsigned long) __bss_start); | 112 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
42 | } | 113 | } |
43 | 114 | ||
115 | static unsigned long get_cmd_line_ptr(void) | ||
116 | { | ||
117 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; | ||
118 | |||
119 | cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; | ||
120 | |||
121 | return cmd_line_ptr; | ||
122 | } | ||
123 | |||
44 | static void __init copy_bootdata(char *real_mode_data) | 124 | static void __init copy_bootdata(char *real_mode_data) |
45 | { | 125 | { |
46 | char * command_line; | 126 | char * command_line; |
127 | unsigned long cmd_line_ptr; | ||
47 | 128 | ||
48 | memcpy(&boot_params, real_mode_data, sizeof boot_params); | 129 | memcpy(&boot_params, real_mode_data, sizeof boot_params); |
49 | if (boot_params.hdr.cmd_line_ptr) { | 130 | sanitize_boot_params(&boot_params); |
50 | command_line = __va(boot_params.hdr.cmd_line_ptr); | 131 | cmd_line_ptr = get_cmd_line_ptr(); |
132 | if (cmd_line_ptr) { | ||
133 | command_line = __va(cmd_line_ptr); | ||
51 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | 134 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
52 | } | 135 | } |
53 | } | 136 | } |
@@ -70,14 +153,12 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
70 | (__START_KERNEL & PGDIR_MASK))); | 153 | (__START_KERNEL & PGDIR_MASK))); |
71 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | 154 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
72 | 155 | ||
156 | /* Kill off the identity-map trampoline */ | ||
157 | reset_early_page_tables(); | ||
158 | |||
73 | /* clear bss before set_intr_gate with early_idt_handler */ | 159 | /* clear bss before set_intr_gate with early_idt_handler */ |
74 | clear_bss(); | 160 | clear_bss(); |
75 | 161 | ||
76 | /* Make NULL pointers segfault */ | ||
77 | zap_identity_mappings(); | ||
78 | |||
79 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
80 | |||
81 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 162 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
82 | #ifdef CONFIG_EARLY_PRINTK | 163 | #ifdef CONFIG_EARLY_PRINTK |
83 | set_intr_gate(i, &early_idt_handlers[i]); | 164 | set_intr_gate(i, &early_idt_handlers[i]); |
@@ -87,37 +168,25 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
87 | } | 168 | } |
88 | load_idt((const struct desc_ptr *)&idt_descr); | 169 | load_idt((const struct desc_ptr *)&idt_descr); |
89 | 170 | ||
171 | copy_bootdata(__va(real_mode_data)); | ||
172 | |||
90 | if (console_loglevel == 10) | 173 | if (console_loglevel == 10) |
91 | early_printk("Kernel alive\n"); | 174 | early_printk("Kernel alive\n"); |
92 | 175 | ||
176 | clear_page(init_level4_pgt); | ||
177 | /* set init_level4_pgt kernel high mapping*/ | ||
178 | init_level4_pgt[511] = early_level4_pgt[511]; | ||
179 | |||
93 | x86_64_start_reservations(real_mode_data); | 180 | x86_64_start_reservations(real_mode_data); |
94 | } | 181 | } |
95 | 182 | ||
96 | void __init x86_64_start_reservations(char *real_mode_data) | 183 | void __init x86_64_start_reservations(char *real_mode_data) |
97 | { | 184 | { |
98 | copy_bootdata(__va(real_mode_data)); | 185 | /* version is always not zero if it is copied */ |
99 | 186 | if (!boot_params.hdr.version) | |
100 | memblock_reserve(__pa_symbol(_text), | 187 | copy_bootdata(__va(real_mode_data)); |
101 | (unsigned long)__bss_stop - (unsigned long)_text); | ||
102 | |||
103 | #ifdef CONFIG_BLK_DEV_INITRD | ||
104 | /* Reserve INITRD */ | ||
105 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
106 | /* Assume only end is not page aligned */ | ||
107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
110 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
111 | } | ||
112 | #endif | ||
113 | 188 | ||
114 | reserve_ebda_region(); | 189 | reserve_ebda_region(); |
115 | 190 | ||
116 | /* | ||
117 | * At this point everything still needed from the boot loader | ||
118 | * or BIOS or kernel text should be early reserved or marked not | ||
119 | * RAM in e820. All other memory is free game. | ||
120 | */ | ||
121 | |||
122 | start_kernel(); | 191 | start_kernel(); |
123 | } | 192 | } |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 980053c4b9cc..d94f6d68be2a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) | |||
47 | .code64 | 47 | .code64 |
48 | .globl startup_64 | 48 | .globl startup_64 |
49 | startup_64: | 49 | startup_64: |
50 | |||
51 | /* | 50 | /* |
52 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 51 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
53 | * and someone has loaded an identity mapped page table | 52 | * and someone has loaded an identity mapped page table |
54 | * for us. These identity mapped page tables map all of the | 53 | * for us. These identity mapped page tables map all of the |
55 | * kernel pages and possibly all of memory. | 54 | * kernel pages and possibly all of memory. |
56 | * | 55 | * |
57 | * %esi holds a physical pointer to real_mode_data. | 56 | * %rsi holds a physical pointer to real_mode_data. |
58 | * | 57 | * |
59 | * We come here either directly from a 64bit bootloader, or from | 58 | * We come here either directly from a 64bit bootloader, or from |
60 | * arch/x86_64/boot/compressed/head.S. | 59 | * arch/x86_64/boot/compressed/head.S. |
@@ -66,7 +65,8 @@ startup_64: | |||
66 | * tables and then reload them. | 65 | * tables and then reload them. |
67 | */ | 66 | */ |
68 | 67 | ||
69 | /* Compute the delta between the address I am compiled to run at and the | 68 | /* |
69 | * Compute the delta between the address I am compiled to run at and the | ||
70 | * address I am actually running at. | 70 | * address I am actually running at. |
71 | */ | 71 | */ |
72 | leaq _text(%rip), %rbp | 72 | leaq _text(%rip), %rbp |
@@ -78,45 +78,62 @@ startup_64: | |||
78 | testl %eax, %eax | 78 | testl %eax, %eax |
79 | jnz bad_address | 79 | jnz bad_address |
80 | 80 | ||
81 | /* Is the address too large? */ | 81 | /* |
82 | leaq _text(%rip), %rdx | 82 | * Is the address too large? |
83 | movq $PGDIR_SIZE, %rax | ||
84 | cmpq %rax, %rdx | ||
85 | jae bad_address | ||
86 | |||
87 | /* Fixup the physical addresses in the page table | ||
88 | */ | 83 | */ |
89 | addq %rbp, init_level4_pgt + 0(%rip) | 84 | leaq _text(%rip), %rax |
90 | addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) | 85 | shrq $MAX_PHYSMEM_BITS, %rax |
91 | addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) | 86 | jnz bad_address |
92 | 87 | ||
93 | addq %rbp, level3_ident_pgt + 0(%rip) | 88 | /* |
89 | * Fixup the physical addresses in the page table | ||
90 | */ | ||
91 | addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip) | ||
94 | 92 | ||
95 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) | 93 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) |
96 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) | 94 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) |
97 | 95 | ||
98 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) | 96 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) |
99 | 97 | ||
100 | /* Add an Identity mapping if I am above 1G */ | 98 | /* |
99 | * Set up the identity mapping for the switchover. These | ||
100 | * entries should *NOT* have the global bit set! This also | ||
101 | * creates a bunch of nonsense entries but that is fine -- | ||
102 | * it avoids problems around wraparound. | ||
103 | */ | ||
101 | leaq _text(%rip), %rdi | 104 | leaq _text(%rip), %rdi |
102 | andq $PMD_PAGE_MASK, %rdi | 105 | leaq early_level4_pgt(%rip), %rbx |
103 | 106 | ||
104 | movq %rdi, %rax | 107 | movq %rdi, %rax |
105 | shrq $PUD_SHIFT, %rax | 108 | shrq $PGDIR_SHIFT, %rax |
106 | andq $(PTRS_PER_PUD - 1), %rax | ||
107 | jz ident_complete | ||
108 | 109 | ||
109 | leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx | 110 | leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx |
110 | leaq level3_ident_pgt(%rip), %rbx | 111 | movq %rdx, 0(%rbx,%rax,8) |
111 | movq %rdx, 0(%rbx, %rax, 8) | 112 | movq %rdx, 8(%rbx,%rax,8) |
112 | 113 | ||
114 | addq $4096, %rdx | ||
113 | movq %rdi, %rax | 115 | movq %rdi, %rax |
114 | shrq $PMD_SHIFT, %rax | 116 | shrq $PUD_SHIFT, %rax |
115 | andq $(PTRS_PER_PMD - 1), %rax | 117 | andl $(PTRS_PER_PUD-1), %eax |
116 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx | 118 | movq %rdx, (4096+0)(%rbx,%rax,8) |
117 | leaq level2_spare_pgt(%rip), %rbx | 119 | movq %rdx, (4096+8)(%rbx,%rax,8) |
118 | movq %rdx, 0(%rbx, %rax, 8) | 120 | |
119 | ident_complete: | 121 | addq $8192, %rbx |
122 | movq %rdi, %rax | ||
123 | shrq $PMD_SHIFT, %rdi | ||
124 | addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax | ||
125 | leaq (_end - 1)(%rip), %rcx | ||
126 | shrq $PMD_SHIFT, %rcx | ||
127 | subq %rdi, %rcx | ||
128 | incl %ecx | ||
129 | |||
130 | 1: | ||
131 | andq $(PTRS_PER_PMD - 1), %rdi | ||
132 | movq %rax, (%rbx,%rdi,8) | ||
133 | incq %rdi | ||
134 | addq $PMD_SIZE, %rax | ||
135 | decl %ecx | ||
136 | jnz 1b | ||
120 | 137 | ||
121 | /* | 138 | /* |
122 | * Fixup the kernel text+data virtual addresses. Note that | 139 | * Fixup the kernel text+data virtual addresses. Note that |
@@ -124,7 +141,6 @@ ident_complete: | |||
124 | * cleanup_highmap() fixes this up along with the mappings | 141 | * cleanup_highmap() fixes this up along with the mappings |
125 | * beyond _end. | 142 | * beyond _end. |
126 | */ | 143 | */ |
127 | |||
128 | leaq level2_kernel_pgt(%rip), %rdi | 144 | leaq level2_kernel_pgt(%rip), %rdi |
129 | leaq 4096(%rdi), %r8 | 145 | leaq 4096(%rdi), %r8 |
130 | /* See if it is a valid page table entry */ | 146 | /* See if it is a valid page table entry */ |
@@ -139,17 +155,14 @@ ident_complete: | |||
139 | /* Fixup phys_base */ | 155 | /* Fixup phys_base */ |
140 | addq %rbp, phys_base(%rip) | 156 | addq %rbp, phys_base(%rip) |
141 | 157 | ||
142 | /* Due to ENTRY(), sometimes the empty space gets filled with | 158 | movq $(early_level4_pgt - __START_KERNEL_map), %rax |
143 | * zeros. Better take a jmp than relying on empty space being | 159 | jmp 1f |
144 | * filled with 0x90 (nop) | ||
145 | */ | ||
146 | jmp secondary_startup_64 | ||
147 | ENTRY(secondary_startup_64) | 160 | ENTRY(secondary_startup_64) |
148 | /* | 161 | /* |
149 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 162 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
150 | * and someone has loaded a mapped page table. | 163 | * and someone has loaded a mapped page table. |
151 | * | 164 | * |
152 | * %esi holds a physical pointer to real_mode_data. | 165 | * %rsi holds a physical pointer to real_mode_data. |
153 | * | 166 | * |
154 | * We come here either from startup_64 (using physical addresses) | 167 | * We come here either from startup_64 (using physical addresses) |
155 | * or from trampoline.S (using virtual addresses). | 168 | * or from trampoline.S (using virtual addresses). |
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64) | |||
159 | * after the boot processor executes this code. | 172 | * after the boot processor executes this code. |
160 | */ | 173 | */ |
161 | 174 | ||
175 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
176 | 1: | ||
177 | |||
162 | /* Enable PAE mode and PGE */ | 178 | /* Enable PAE mode and PGE */ |
163 | movl $(X86_CR4_PAE | X86_CR4_PGE), %eax | 179 | movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx |
164 | movq %rax, %cr4 | 180 | movq %rcx, %cr4 |
165 | 181 | ||
166 | /* Setup early boot stage 4 level pagetables. */ | 182 | /* Setup early boot stage 4 level pagetables. */ |
167 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
168 | addq phys_base(%rip), %rax | 183 | addq phys_base(%rip), %rax |
169 | movq %rax, %cr3 | 184 | movq %rax, %cr3 |
170 | 185 | ||
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64) | |||
196 | movq %rax, %cr0 | 211 | movq %rax, %cr0 |
197 | 212 | ||
198 | /* Setup a boot time stack */ | 213 | /* Setup a boot time stack */ |
199 | movq stack_start(%rip),%rsp | 214 | movq stack_start(%rip), %rsp |
200 | 215 | ||
201 | /* zero EFLAGS after setting rsp */ | 216 | /* zero EFLAGS after setting rsp */ |
202 | pushq $0 | 217 | pushq $0 |
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64) | |||
236 | movl initial_gs+4(%rip),%edx | 251 | movl initial_gs+4(%rip),%edx |
237 | wrmsr | 252 | wrmsr |
238 | 253 | ||
239 | /* esi is pointer to real mode structure with interesting info. | 254 | /* rsi is pointer to real mode structure with interesting info. |
240 | pass it to C */ | 255 | pass it to C */ |
241 | movl %esi, %edi | 256 | movq %rsi, %rdi |
242 | 257 | ||
243 | /* Finally jump to run C code and to be on real kernel address | 258 | /* Finally jump to run C code and to be on real kernel address |
244 | * Since we are running on identity-mapped space we have to jump | 259 | * Since we are running on identity-mapped space we have to jump |
245 | * to the full 64bit address, this is only possible as indirect | 260 | * to the full 64bit address, this is only possible as indirect |
246 | * jump. In addition we need to ensure %cs is set so we make this | 261 | * jump. In addition we need to ensure %cs is set so we make this |
247 | * a far return. | 262 | * a far return. |
263 | * | ||
264 | * Note: do not change to far jump indirect with 64bit offset. | ||
265 | * | ||
266 | * AMD does not support far jump indirect with 64bit offset. | ||
267 | * AMD64 Architecture Programmer's Manual, Volume 3: states only | ||
268 | * JMP FAR mem16:16 FF /5 Far jump indirect, | ||
269 | * with the target specified by a far pointer in memory. | ||
270 | * JMP FAR mem16:32 FF /5 Far jump indirect, | ||
271 | * with the target specified by a far pointer in memory. | ||
272 | * | ||
273 | * Intel64 does support 64bit offset. | ||
274 | * Software Developer Manual Vol 2: states: | ||
275 | * FF /5 JMP m16:16 Jump far, absolute indirect, | ||
276 | * address given in m16:16 | ||
277 | * FF /5 JMP m16:32 Jump far, absolute indirect, | ||
278 | * address given in m16:32. | ||
279 | * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, | ||
280 | * address given in m16:64. | ||
248 | */ | 281 | */ |
249 | movq initial_code(%rip),%rax | 282 | movq initial_code(%rip),%rax |
250 | pushq $0 # fake return address to stop unwinder | 283 | pushq $0 # fake return address to stop unwinder |
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0) | |||
270 | 303 | ||
271 | /* SMP bootup changes these two */ | 304 | /* SMP bootup changes these two */ |
272 | __REFDATA | 305 | __REFDATA |
273 | .align 8 | 306 | .balign 8 |
274 | ENTRY(initial_code) | 307 | GLOBAL(initial_code) |
275 | .quad x86_64_start_kernel | 308 | .quad x86_64_start_kernel |
276 | ENTRY(initial_gs) | 309 | GLOBAL(initial_gs) |
277 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 310 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
278 | 311 | ||
279 | ENTRY(stack_start) | 312 | GLOBAL(stack_start) |
280 | .quad init_thread_union+THREAD_SIZE-8 | 313 | .quad init_thread_union+THREAD_SIZE-8 |
281 | .word 0 | 314 | .word 0 |
282 | __FINITDATA | 315 | __FINITDATA |
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0) | |||
284 | bad_address: | 317 | bad_address: |
285 | jmp bad_address | 318 | jmp bad_address |
286 | 319 | ||
287 | .section ".init.text","ax" | 320 | __INIT |
288 | .globl early_idt_handlers | 321 | .globl early_idt_handlers |
289 | early_idt_handlers: | 322 | early_idt_handlers: |
290 | # 104(%rsp) %rflags | 323 | # 104(%rsp) %rflags |
@@ -321,14 +354,22 @@ ENTRY(early_idt_handler) | |||
321 | pushq %r11 # 0(%rsp) | 354 | pushq %r11 # 0(%rsp) |
322 | 355 | ||
323 | cmpl $__KERNEL_CS,96(%rsp) | 356 | cmpl $__KERNEL_CS,96(%rsp) |
324 | jne 10f | 357 | jne 11f |
358 | |||
359 | cmpl $14,72(%rsp) # Page fault? | ||
360 | jnz 10f | ||
361 | GET_CR2_INTO(%rdi) # can clobber any volatile register if pv | ||
362 | call early_make_pgtable | ||
363 | andl %eax,%eax | ||
364 | jz 20f # All good | ||
325 | 365 | ||
366 | 10: | ||
326 | leaq 88(%rsp),%rdi # Pointer to %rip | 367 | leaq 88(%rsp),%rdi # Pointer to %rip |
327 | call early_fixup_exception | 368 | call early_fixup_exception |
328 | andl %eax,%eax | 369 | andl %eax,%eax |
329 | jnz 20f # Found an exception entry | 370 | jnz 20f # Found an exception entry |
330 | 371 | ||
331 | 10: | 372 | 11: |
332 | #ifdef CONFIG_EARLY_PRINTK | 373 | #ifdef CONFIG_EARLY_PRINTK |
333 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv | 374 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv |
334 | movl 80(%rsp),%r8d # error code | 375 | movl 80(%rsp),%r8d # error code |
@@ -350,7 +391,7 @@ ENTRY(early_idt_handler) | |||
350 | 1: hlt | 391 | 1: hlt |
351 | jmp 1b | 392 | jmp 1b |
352 | 393 | ||
353 | 20: # Exception table entry found | 394 | 20: # Exception table entry found or page table generated |
354 | popq %r11 | 395 | popq %r11 |
355 | popq %r10 | 396 | popq %r10 |
356 | popq %r9 | 397 | popq %r9 |
@@ -364,6 +405,8 @@ ENTRY(early_idt_handler) | |||
364 | decl early_recursion_flag(%rip) | 405 | decl early_recursion_flag(%rip) |
365 | INTERRUPT_RETURN | 406 | INTERRUPT_RETURN |
366 | 407 | ||
408 | __INITDATA | ||
409 | |||
367 | .balign 4 | 410 | .balign 4 |
368 | early_recursion_flag: | 411 | early_recursion_flag: |
369 | .long 0 | 412 | .long 0 |
@@ -374,11 +417,10 @@ early_idt_msg: | |||
374 | early_idt_ripmsg: | 417 | early_idt_ripmsg: |
375 | .asciz "RIP %s\n" | 418 | .asciz "RIP %s\n" |
376 | #endif /* CONFIG_EARLY_PRINTK */ | 419 | #endif /* CONFIG_EARLY_PRINTK */ |
377 | .previous | ||
378 | 420 | ||
379 | #define NEXT_PAGE(name) \ | 421 | #define NEXT_PAGE(name) \ |
380 | .balign PAGE_SIZE; \ | 422 | .balign PAGE_SIZE; \ |
381 | ENTRY(name) | 423 | GLOBAL(name) |
382 | 424 | ||
383 | /* Automate the creation of 1 to 1 mapping pmd entries */ | 425 | /* Automate the creation of 1 to 1 mapping pmd entries */ |
384 | #define PMDS(START, PERM, COUNT) \ | 426 | #define PMDS(START, PERM, COUNT) \ |
@@ -388,24 +430,37 @@ ENTRY(name) | |||
388 | i = i + 1 ; \ | 430 | i = i + 1 ; \ |
389 | .endr | 431 | .endr |
390 | 432 | ||
433 | __INITDATA | ||
434 | NEXT_PAGE(early_level4_pgt) | ||
435 | .fill 511,8,0 | ||
436 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
437 | |||
438 | NEXT_PAGE(early_dynamic_pgts) | ||
439 | .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 | ||
440 | |||
391 | .data | 441 | .data |
392 | /* | 442 | |
393 | * This default setting generates an ident mapping at address 0x100000 | 443 | #ifndef CONFIG_XEN |
394 | * and a mapping for the kernel that precisely maps virtual address | ||
395 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
396 | * 2Mbyte large pages provided by PAE mode) | ||
397 | */ | ||
398 | NEXT_PAGE(init_level4_pgt) | 444 | NEXT_PAGE(init_level4_pgt) |
399 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 445 | .fill 512,8,0 |
400 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | 446 | #else |
401 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 447 | NEXT_PAGE(init_level4_pgt) |
402 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | 448 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
449 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | ||
450 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
451 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | ||
403 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 452 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
404 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 453 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE |
405 | 454 | ||
406 | NEXT_PAGE(level3_ident_pgt) | 455 | NEXT_PAGE(level3_ident_pgt) |
407 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 456 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
408 | .fill 511,8,0 | 457 | .fill 511, 8, 0 |
458 | NEXT_PAGE(level2_ident_pgt) | ||
459 | /* Since I easily can, map the first 1G. | ||
460 | * Don't set NX because code runs from these pages. | ||
461 | */ | ||
462 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
463 | #endif | ||
409 | 464 | ||
410 | NEXT_PAGE(level3_kernel_pgt) | 465 | NEXT_PAGE(level3_kernel_pgt) |
411 | .fill L3_START_KERNEL,8,0 | 466 | .fill L3_START_KERNEL,8,0 |
@@ -413,21 +468,6 @@ NEXT_PAGE(level3_kernel_pgt) | |||
413 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | 468 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE |
414 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | 469 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE |
415 | 470 | ||
416 | NEXT_PAGE(level2_fixmap_pgt) | ||
417 | .fill 506,8,0 | ||
418 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
419 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
420 | .fill 5,8,0 | ||
421 | |||
422 | NEXT_PAGE(level1_fixmap_pgt) | ||
423 | .fill 512,8,0 | ||
424 | |||
425 | NEXT_PAGE(level2_ident_pgt) | ||
426 | /* Since I easily can, map the first 1G. | ||
427 | * Don't set NX because code runs from these pages. | ||
428 | */ | ||
429 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
430 | |||
431 | NEXT_PAGE(level2_kernel_pgt) | 471 | NEXT_PAGE(level2_kernel_pgt) |
432 | /* | 472 | /* |
433 | * 512 MB kernel mapping. We spend a full page on this pagetable | 473 | * 512 MB kernel mapping. We spend a full page on this pagetable |
@@ -442,11 +482,16 @@ NEXT_PAGE(level2_kernel_pgt) | |||
442 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, | 482 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, |
443 | KERNEL_IMAGE_SIZE/PMD_SIZE) | 483 | KERNEL_IMAGE_SIZE/PMD_SIZE) |
444 | 484 | ||
445 | NEXT_PAGE(level2_spare_pgt) | 485 | NEXT_PAGE(level2_fixmap_pgt) |
446 | .fill 512, 8, 0 | 486 | .fill 506,8,0 |
487 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
488 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
489 | .fill 5,8,0 | ||
490 | |||
491 | NEXT_PAGE(level1_fixmap_pgt) | ||
492 | .fill 512,8,0 | ||
447 | 493 | ||
448 | #undef PMDS | 494 | #undef PMDS |
449 | #undef NEXT_PAGE | ||
450 | 495 | ||
451 | .data | 496 | .data |
452 | .align 16 | 497 | .align 16 |
@@ -472,6 +517,5 @@ ENTRY(nmi_idt_table) | |||
472 | .skip IDT_ENTRIES * 16 | 517 | .skip IDT_ENTRIES * 16 |
473 | 518 | ||
474 | __PAGE_ALIGNED_BSS | 519 | __PAGE_ALIGNED_BSS |
475 | .align PAGE_SIZE | 520 | NEXT_PAGE(empty_zero_page) |
476 | ENTRY(empty_zero_page) | ||
477 | .skip PAGE_SIZE | 521 | .skip PAGE_SIZE |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index b3ea9db39db6..4eabc160696f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -16,125 +16,12 @@ | |||
16 | #include <linux/io.h> | 16 | #include <linux/io.h> |
17 | #include <linux/suspend.h> | 17 | #include <linux/suspend.h> |
18 | 18 | ||
19 | #include <asm/init.h> | ||
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
20 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
21 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
22 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
23 | 24 | ||
24 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | ||
25 | unsigned long addr) | ||
26 | { | ||
27 | pud_t *pud; | ||
28 | pmd_t *pmd; | ||
29 | struct page *page; | ||
30 | int result = -ENOMEM; | ||
31 | |||
32 | addr &= PMD_MASK; | ||
33 | pgd += pgd_index(addr); | ||
34 | if (!pgd_present(*pgd)) { | ||
35 | page = kimage_alloc_control_pages(image, 0); | ||
36 | if (!page) | ||
37 | goto out; | ||
38 | pud = (pud_t *)page_address(page); | ||
39 | clear_page(pud); | ||
40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
41 | } | ||
42 | pud = pud_offset(pgd, addr); | ||
43 | if (!pud_present(*pud)) { | ||
44 | page = kimage_alloc_control_pages(image, 0); | ||
45 | if (!page) | ||
46 | goto out; | ||
47 | pmd = (pmd_t *)page_address(page); | ||
48 | clear_page(pmd); | ||
49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
50 | } | ||
51 | pmd = pmd_offset(pud, addr); | ||
52 | if (!pmd_present(*pmd)) | ||
53 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
54 | result = 0; | ||
55 | out: | ||
56 | return result; | ||
57 | } | ||
58 | |||
59 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | ||
60 | { | ||
61 | unsigned long end_addr; | ||
62 | |||
63 | addr &= PAGE_MASK; | ||
64 | end_addr = addr + PUD_SIZE; | ||
65 | while (addr < end_addr) { | ||
66 | set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
67 | addr += PMD_SIZE; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | static int init_level3_page(struct kimage *image, pud_t *level3p, | ||
72 | unsigned long addr, unsigned long last_addr) | ||
73 | { | ||
74 | unsigned long end_addr; | ||
75 | int result; | ||
76 | |||
77 | result = 0; | ||
78 | addr &= PAGE_MASK; | ||
79 | end_addr = addr + PGDIR_SIZE; | ||
80 | while ((addr < last_addr) && (addr < end_addr)) { | ||
81 | struct page *page; | ||
82 | pmd_t *level2p; | ||
83 | |||
84 | page = kimage_alloc_control_pages(image, 0); | ||
85 | if (!page) { | ||
86 | result = -ENOMEM; | ||
87 | goto out; | ||
88 | } | ||
89 | level2p = (pmd_t *)page_address(page); | ||
90 | init_level2_page(level2p, addr); | ||
91 | set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); | ||
92 | addr += PUD_SIZE; | ||
93 | } | ||
94 | /* clear the unused entries */ | ||
95 | while (addr < end_addr) { | ||
96 | pud_clear(level3p++); | ||
97 | addr += PUD_SIZE; | ||
98 | } | ||
99 | out: | ||
100 | return result; | ||
101 | } | ||
102 | |||
103 | |||
104 | static int init_level4_page(struct kimage *image, pgd_t *level4p, | ||
105 | unsigned long addr, unsigned long last_addr) | ||
106 | { | ||
107 | unsigned long end_addr; | ||
108 | int result; | ||
109 | |||
110 | result = 0; | ||
111 | addr &= PAGE_MASK; | ||
112 | end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); | ||
113 | while ((addr < last_addr) && (addr < end_addr)) { | ||
114 | struct page *page; | ||
115 | pud_t *level3p; | ||
116 | |||
117 | page = kimage_alloc_control_pages(image, 0); | ||
118 | if (!page) { | ||
119 | result = -ENOMEM; | ||
120 | goto out; | ||
121 | } | ||
122 | level3p = (pud_t *)page_address(page); | ||
123 | result = init_level3_page(image, level3p, addr, last_addr); | ||
124 | if (result) | ||
125 | goto out; | ||
126 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | ||
127 | addr += PGDIR_SIZE; | ||
128 | } | ||
129 | /* clear the unused entries */ | ||
130 | while (addr < end_addr) { | ||
131 | pgd_clear(level4p++); | ||
132 | addr += PGDIR_SIZE; | ||
133 | } | ||
134 | out: | ||
135 | return result; | ||
136 | } | ||
137 | |||
138 | static void free_transition_pgtable(struct kimage *image) | 25 | static void free_transition_pgtable(struct kimage *image) |
139 | { | 26 | { |
140 | free_page((unsigned long)image->arch.pud); | 27 | free_page((unsigned long)image->arch.pud); |
@@ -184,22 +71,62 @@ err: | |||
184 | return result; | 71 | return result; |
185 | } | 72 | } |
186 | 73 | ||
74 | static void *alloc_pgt_page(void *data) | ||
75 | { | ||
76 | struct kimage *image = (struct kimage *)data; | ||
77 | struct page *page; | ||
78 | void *p = NULL; | ||
79 | |||
80 | page = kimage_alloc_control_pages(image, 0); | ||
81 | if (page) { | ||
82 | p = page_address(page); | ||
83 | clear_page(p); | ||
84 | } | ||
85 | |||
86 | return p; | ||
87 | } | ||
187 | 88 | ||
188 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 89 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
189 | { | 90 | { |
91 | struct x86_mapping_info info = { | ||
92 | .alloc_pgt_page = alloc_pgt_page, | ||
93 | .context = image, | ||
94 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
95 | }; | ||
96 | unsigned long mstart, mend; | ||
190 | pgd_t *level4p; | 97 | pgd_t *level4p; |
191 | int result; | 98 | int result; |
99 | int i; | ||
100 | |||
192 | level4p = (pgd_t *)__va(start_pgtable); | 101 | level4p = (pgd_t *)__va(start_pgtable); |
193 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 102 | clear_page(level4p); |
194 | if (result) | 103 | for (i = 0; i < nr_pfn_mapped; i++) { |
195 | return result; | 104 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
105 | mend = pfn_mapped[i].end << PAGE_SHIFT; | ||
106 | |||
107 | result = kernel_ident_mapping_init(&info, | ||
108 | level4p, mstart, mend); | ||
109 | if (result) | ||
110 | return result; | ||
111 | } | ||
112 | |||
196 | /* | 113 | /* |
197 | * image->start may be outside 0 ~ max_pfn, for example when | 114 | * segments's mem ranges could be outside 0 ~ max_pfn, |
198 | * jump back to original kernel from kexeced kernel | 115 | * for example when jump back to original kernel from kexeced kernel. |
116 | * or first kernel is booted with user mem map, and second kernel | ||
117 | * could be loaded out of that range. | ||
199 | */ | 118 | */ |
200 | result = init_one_level2_page(image, level4p, image->start); | 119 | for (i = 0; i < image->nr_segments; i++) { |
201 | if (result) | 120 | mstart = image->segment[i].mem; |
202 | return result; | 121 | mend = mstart + image->segment[i].memsz; |
122 | |||
123 | result = kernel_ident_mapping_init(&info, | ||
124 | level4p, mstart, mend); | ||
125 | |||
126 | if (result) | ||
127 | return result; | ||
128 | } | ||
129 | |||
203 | return init_transition_pgtable(image, level4p); | 130 | return init_transition_pgtable(image, level4p); |
204 | } | 131 | } |
205 | 132 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0aebd776018e..878cf1d326e5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -108,17 +108,16 @@ | |||
108 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
109 | #include <asm/apicdef.h> | 109 | #include <asm/apicdef.h> |
110 | #include <asm/amd_nb.h> | 110 | #include <asm/amd_nb.h> |
111 | #ifdef CONFIG_X86_64 | ||
112 | #include <asm/numa_64.h> | ||
113 | #endif | ||
114 | #include <asm/mce.h> | 111 | #include <asm/mce.h> |
115 | #include <asm/alternative.h> | 112 | #include <asm/alternative.h> |
116 | #include <asm/prom.h> | 113 | #include <asm/prom.h> |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 116 | * max_low_pfn_mapped: highest direct mapped pfn under 4GB |
120 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 117 | * max_pfn_mapped: highest direct mapped pfn over 4GB |
121 | * apertures, ACPI and other tables without having to play with fixmaps. | 118 | * |
119 | * The direct mapping only covers E820_RAM regions, so the ranges and gaps are | ||
120 | * represented by pfn_mapped | ||
122 | */ | 121 | */ |
123 | unsigned long max_low_pfn_mapped; | 122 | unsigned long max_low_pfn_mapped; |
124 | unsigned long max_pfn_mapped; | 123 | unsigned long max_pfn_mapped; |
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align) | |||
276 | return ret; | 275 | return ret; |
277 | } | 276 | } |
278 | 277 | ||
279 | #ifdef CONFIG_X86_64 | 278 | #ifdef CONFIG_X86_32 |
280 | static void __init init_gbpages(void) | ||
281 | { | ||
282 | if (direct_gbpages && cpu_has_gbpages) | ||
283 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
284 | else | ||
285 | direct_gbpages = 0; | ||
286 | } | ||
287 | #else | ||
288 | static inline void init_gbpages(void) | ||
289 | { | ||
290 | } | ||
291 | static void __init cleanup_highmap(void) | 279 | static void __init cleanup_highmap(void) |
292 | { | 280 | { |
293 | } | 281 | } |
@@ -306,27 +294,43 @@ static void __init reserve_brk(void) | |||
306 | 294 | ||
307 | #ifdef CONFIG_BLK_DEV_INITRD | 295 | #ifdef CONFIG_BLK_DEV_INITRD |
308 | 296 | ||
297 | static u64 __init get_ramdisk_image(void) | ||
298 | { | ||
299 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
300 | |||
301 | ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; | ||
302 | |||
303 | return ramdisk_image; | ||
304 | } | ||
305 | static u64 __init get_ramdisk_size(void) | ||
306 | { | ||
307 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
308 | |||
309 | ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; | ||
310 | |||
311 | return ramdisk_size; | ||
312 | } | ||
313 | |||
309 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 314 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
310 | static void __init relocate_initrd(void) | 315 | static void __init relocate_initrd(void) |
311 | { | 316 | { |
312 | /* Assume only end is not page aligned */ | 317 | /* Assume only end is not page aligned */ |
313 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 318 | u64 ramdisk_image = get_ramdisk_image(); |
314 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 319 | u64 ramdisk_size = get_ramdisk_size(); |
315 | u64 area_size = PAGE_ALIGN(ramdisk_size); | 320 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
316 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | ||
317 | u64 ramdisk_here; | 321 | u64 ramdisk_here; |
318 | unsigned long slop, clen, mapaddr; | 322 | unsigned long slop, clen, mapaddr; |
319 | char *p, *q; | 323 | char *p, *q; |
320 | 324 | ||
321 | /* We need to move the initrd down into lowmem */ | 325 | /* We need to move the initrd down into directly mapped mem */ |
322 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, | 326 | ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
323 | PAGE_SIZE); | 327 | area_size, PAGE_SIZE); |
324 | 328 | ||
325 | if (!ramdisk_here) | 329 | if (!ramdisk_here) |
326 | panic("Cannot find place for new RAMDISK of size %lld\n", | 330 | panic("Cannot find place for new RAMDISK of size %lld\n", |
327 | ramdisk_size); | 331 | ramdisk_size); |
328 | 332 | ||
329 | /* Note: this includes all the lowmem currently occupied by | 333 | /* Note: this includes all the mem currently occupied by |
330 | the initrd, we rely on that fact to keep the data intact. */ | 334 | the initrd, we rely on that fact to keep the data intact. */ |
331 | memblock_reserve(ramdisk_here, area_size); | 335 | memblock_reserve(ramdisk_here, area_size); |
332 | initrd_start = ramdisk_here + PAGE_OFFSET; | 336 | initrd_start = ramdisk_here + PAGE_OFFSET; |
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void) | |||
336 | 340 | ||
337 | q = (char *)initrd_start; | 341 | q = (char *)initrd_start; |
338 | 342 | ||
339 | /* Copy any lowmem portion of the initrd */ | 343 | /* Copy the initrd */ |
340 | if (ramdisk_image < end_of_lowmem) { | ||
341 | clen = end_of_lowmem - ramdisk_image; | ||
342 | p = (char *)__va(ramdisk_image); | ||
343 | memcpy(q, p, clen); | ||
344 | q += clen; | ||
345 | ramdisk_image += clen; | ||
346 | ramdisk_size -= clen; | ||
347 | } | ||
348 | |||
349 | /* Copy the highmem portion of the initrd */ | ||
350 | while (ramdisk_size) { | 344 | while (ramdisk_size) { |
351 | slop = ramdisk_image & ~PAGE_MASK; | 345 | slop = ramdisk_image & ~PAGE_MASK; |
352 | clen = ramdisk_size; | 346 | clen = ramdisk_size; |
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void) | |||
360 | ramdisk_image += clen; | 354 | ramdisk_image += clen; |
361 | ramdisk_size -= clen; | 355 | ramdisk_size -= clen; |
362 | } | 356 | } |
363 | /* high pages is not converted by early_res_to_bootmem */ | 357 | |
364 | ramdisk_image = boot_params.hdr.ramdisk_image; | 358 | ramdisk_image = get_ramdisk_image(); |
365 | ramdisk_size = boot_params.hdr.ramdisk_size; | 359 | ramdisk_size = get_ramdisk_size(); |
366 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" | 360 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
367 | " [mem %#010llx-%#010llx]\n", | 361 | " [mem %#010llx-%#010llx]\n", |
368 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 362 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
369 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 363 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
370 | } | 364 | } |
371 | 365 | ||
366 | static void __init early_reserve_initrd(void) | ||
367 | { | ||
368 | /* Assume only end is not page aligned */ | ||
369 | u64 ramdisk_image = get_ramdisk_image(); | ||
370 | u64 ramdisk_size = get_ramdisk_size(); | ||
371 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
372 | |||
373 | if (!boot_params.hdr.type_of_loader || | ||
374 | !ramdisk_image || !ramdisk_size) | ||
375 | return; /* No initrd provided by bootloader */ | ||
376 | |||
377 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
378 | } | ||
372 | static void __init reserve_initrd(void) | 379 | static void __init reserve_initrd(void) |
373 | { | 380 | { |
374 | /* Assume only end is not page aligned */ | 381 | /* Assume only end is not page aligned */ |
375 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 382 | u64 ramdisk_image = get_ramdisk_image(); |
376 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 383 | u64 ramdisk_size = get_ramdisk_size(); |
377 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 384 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
378 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 385 | u64 mapped_size; |
379 | 386 | ||
380 | if (!boot_params.hdr.type_of_loader || | 387 | if (!boot_params.hdr.type_of_loader || |
381 | !ramdisk_image || !ramdisk_size) | 388 | !ramdisk_image || !ramdisk_size) |
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void) | |||
383 | 390 | ||
384 | initrd_start = 0; | 391 | initrd_start = 0; |
385 | 392 | ||
386 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 393 | mapped_size = memblock_mem_size(max_pfn_mapped); |
394 | if (ramdisk_size >= (mapped_size>>1)) | ||
387 | panic("initrd too large to handle, " | 395 | panic("initrd too large to handle, " |
388 | "disabling initrd (%lld needed, %lld available)\n", | 396 | "disabling initrd (%lld needed, %lld available)\n", |
389 | ramdisk_size, end_of_lowmem>>1); | 397 | ramdisk_size, mapped_size>>1); |
390 | } | ||
391 | 398 | ||
392 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, | 399 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, |
393 | ramdisk_end - 1); | 400 | ramdisk_end - 1); |
394 | 401 | ||
395 | 402 | if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), | |
396 | if (ramdisk_end <= end_of_lowmem) { | 403 | PFN_DOWN(ramdisk_end))) { |
397 | /* All in lowmem, easy case */ | 404 | /* All are mapped, easy case */ |
398 | /* | ||
399 | * don't need to reserve again, already reserved early | ||
400 | * in i386_start_kernel | ||
401 | */ | ||
402 | initrd_start = ramdisk_image + PAGE_OFFSET; | 405 | initrd_start = ramdisk_image + PAGE_OFFSET; |
403 | initrd_end = initrd_start + ramdisk_size; | 406 | initrd_end = initrd_start + ramdisk_size; |
404 | return; | 407 | return; |
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void) | |||
409 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); | 412 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
410 | } | 413 | } |
411 | #else | 414 | #else |
415 | static void __init early_reserve_initrd(void) | ||
416 | { | ||
417 | } | ||
412 | static void __init reserve_initrd(void) | 418 | static void __init reserve_initrd(void) |
413 | { | 419 | { |
414 | } | 420 | } |
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void) | |||
419 | struct setup_data *data; | 425 | struct setup_data *data; |
420 | u64 pa_data; | 426 | u64 pa_data; |
421 | 427 | ||
422 | if (boot_params.hdr.version < 0x0209) | ||
423 | return; | ||
424 | pa_data = boot_params.hdr.setup_data; | 428 | pa_data = boot_params.hdr.setup_data; |
425 | while (pa_data) { | 429 | while (pa_data) { |
426 | u32 data_len, map_len; | 430 | u32 data_len, map_len; |
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void) | |||
456 | u64 pa_data; | 460 | u64 pa_data; |
457 | int found = 0; | 461 | int found = 0; |
458 | 462 | ||
459 | if (boot_params.hdr.version < 0x0209) | ||
460 | return; | ||
461 | pa_data = boot_params.hdr.setup_data; | 463 | pa_data = boot_params.hdr.setup_data; |
462 | while (pa_data) { | 464 | while (pa_data) { |
463 | data = early_memremap(pa_data, sizeof(*data)); | 465 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
481 | struct setup_data *data; | 483 | struct setup_data *data; |
482 | u64 pa_data; | 484 | u64 pa_data; |
483 | 485 | ||
484 | if (boot_params.hdr.version < 0x0209) | ||
485 | return; | ||
486 | pa_data = boot_params.hdr.setup_data; | 486 | pa_data = boot_params.hdr.setup_data; |
487 | while (pa_data) { | 487 | while (pa_data) { |
488 | data = early_memremap(pa_data, sizeof(*data)); | 488 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
501 | /* | 501 | /* |
502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | 502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels |
503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. | 503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. |
504 | * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this | ||
505 | * limit once kexec-tools are fixed. | ||
506 | */ | 504 | */ |
507 | #ifdef CONFIG_X86_32 | 505 | #ifdef CONFIG_X86_32 |
508 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) | 506 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) |
509 | #else | 507 | #else |
510 | # define CRASH_KERNEL_ADDR_MAX (896 << 20) | 508 | # define CRASH_KERNEL_ADDR_MAX MAXMEM |
511 | #endif | 509 | #endif |
512 | 510 | ||
511 | static void __init reserve_crashkernel_low(void) | ||
512 | { | ||
513 | #ifdef CONFIG_X86_64 | ||
514 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long low_base = 0, low_size = 0; | ||
516 | unsigned long total_low_mem; | ||
517 | unsigned long long base; | ||
518 | int ret; | ||
519 | |||
520 | total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); | ||
521 | ret = parse_crashkernel_low(boot_command_line, total_low_mem, | ||
522 | &low_size, &base); | ||
523 | if (ret != 0 || low_size <= 0) | ||
524 | return; | ||
525 | |||
526 | low_base = memblock_find_in_range(low_size, (1ULL<<32), | ||
527 | low_size, alignment); | ||
528 | |||
529 | if (!low_base) { | ||
530 | pr_info("crashkernel low reservation failed - No suitable area found.\n"); | ||
531 | |||
532 | return; | ||
533 | } | ||
534 | |||
535 | memblock_reserve(low_base, low_size); | ||
536 | pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", | ||
537 | (unsigned long)(low_size >> 20), | ||
538 | (unsigned long)(low_base >> 20), | ||
539 | (unsigned long)(total_low_mem >> 20)); | ||
540 | crashk_low_res.start = low_base; | ||
541 | crashk_low_res.end = low_base + low_size - 1; | ||
542 | insert_resource(&iomem_resource, &crashk_low_res); | ||
543 | #endif | ||
544 | } | ||
545 | |||
513 | static void __init reserve_crashkernel(void) | 546 | static void __init reserve_crashkernel(void) |
514 | { | 547 | { |
548 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long total_mem; | 549 | unsigned long long total_mem; |
516 | unsigned long long crash_size, crash_base; | 550 | unsigned long long crash_size, crash_base; |
517 | int ret; | 551 | int ret; |
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void) | |||
525 | 559 | ||
526 | /* 0 means: find the address automatically */ | 560 | /* 0 means: find the address automatically */ |
527 | if (crash_base <= 0) { | 561 | if (crash_base <= 0) { |
528 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
529 | |||
530 | /* | 562 | /* |
531 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX | 563 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX |
532 | */ | 564 | */ |
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void) | |||
537 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 569 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
538 | return; | 570 | return; |
539 | } | 571 | } |
572 | |||
540 | } else { | 573 | } else { |
541 | unsigned long long start; | 574 | unsigned long long start; |
542 | 575 | ||
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void) | |||
558 | crashk_res.start = crash_base; | 591 | crashk_res.start = crash_base; |
559 | crashk_res.end = crash_base + crash_size - 1; | 592 | crashk_res.end = crash_base + crash_size - 1; |
560 | insert_resource(&iomem_resource, &crashk_res); | 593 | insert_resource(&iomem_resource, &crashk_res); |
594 | |||
595 | if (crash_base >= (1ULL<<32)) | ||
596 | reserve_crashkernel_low(); | ||
561 | } | 597 | } |
562 | #else | 598 | #else |
563 | static void __init reserve_crashkernel(void) | 599 | static void __init reserve_crashkernel(void) |
@@ -708,6 +744,27 @@ static void __init trim_bios_range(void) | |||
708 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 744 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
709 | } | 745 | } |
710 | 746 | ||
747 | /* called before trim_bios_range() to spare extra sanitize */ | ||
748 | static void __init e820_add_kernel_range(void) | ||
749 | { | ||
750 | u64 start = __pa_symbol(_text); | ||
751 | u64 size = __pa_symbol(_end) - start; | ||
752 | |||
753 | /* | ||
754 | * Complain if .text .data and .bss are not marked as E820_RAM and | ||
755 | * attempt to fix it by adding the range. We may have a confused BIOS, | ||
756 | * or the user may have used memmap=exactmap or memmap=xxM$yyM to | ||
757 | * exclude kernel range. If we really are running on top non-RAM, | ||
758 | * we will crash later anyways. | ||
759 | */ | ||
760 | if (e820_all_mapped(start, start + size, E820_RAM)) | ||
761 | return; | ||
762 | |||
763 | pr_warn(".text .data .bss are not marked as E820_RAM!\n"); | ||
764 | e820_remove_range(start, size, E820_RAM, 0); | ||
765 | e820_add_region(start, size, E820_RAM); | ||
766 | } | ||
767 | |||
711 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | 768 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; |
712 | 769 | ||
713 | static int __init parse_reservelow(char *p) | 770 | static int __init parse_reservelow(char *p) |
@@ -752,6 +809,17 @@ static void __init trim_low_memory_range(void) | |||
752 | 809 | ||
753 | void __init setup_arch(char **cmdline_p) | 810 | void __init setup_arch(char **cmdline_p) |
754 | { | 811 | { |
812 | memblock_reserve(__pa_symbol(_text), | ||
813 | (unsigned long)__bss_stop - (unsigned long)_text); | ||
814 | |||
815 | early_reserve_initrd(); | ||
816 | |||
817 | /* | ||
818 | * At this point everything still needed from the boot loader | ||
819 | * or BIOS or kernel text should be early reserved or marked not | ||
820 | * RAM in e820. All other memory is free game. | ||
821 | */ | ||
822 | |||
755 | #ifdef CONFIG_X86_32 | 823 | #ifdef CONFIG_X86_32 |
756 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 824 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
757 | visws_early_detect(); | 825 | visws_early_detect(); |
@@ -910,6 +978,7 @@ void __init setup_arch(char **cmdline_p) | |||
910 | insert_resource(&iomem_resource, &data_resource); | 978 | insert_resource(&iomem_resource, &data_resource); |
911 | insert_resource(&iomem_resource, &bss_resource); | 979 | insert_resource(&iomem_resource, &bss_resource); |
912 | 980 | ||
981 | e820_add_kernel_range(); | ||
913 | trim_bios_range(); | 982 | trim_bios_range(); |
914 | #ifdef CONFIG_X86_32 | 983 | #ifdef CONFIG_X86_32 |
915 | if (ppro_with_ram_bug()) { | 984 | if (ppro_with_ram_bug()) { |
@@ -959,6 +1028,8 @@ void __init setup_arch(char **cmdline_p) | |||
959 | 1028 | ||
960 | reserve_ibft_region(); | 1029 | reserve_ibft_region(); |
961 | 1030 | ||
1031 | early_alloc_pgt_buf(); | ||
1032 | |||
962 | /* | 1033 | /* |
963 | * Need to conclude brk, before memblock_x86_fill() | 1034 | * Need to conclude brk, before memblock_x86_fill() |
964 | * it could use memblock_find_in_range, could overlap with | 1035 | * it could use memblock_find_in_range, could overlap with |
@@ -968,7 +1039,7 @@ void __init setup_arch(char **cmdline_p) | |||
968 | 1039 | ||
969 | cleanup_highmap(); | 1040 | cleanup_highmap(); |
970 | 1041 | ||
971 | memblock.current_limit = get_max_mapped(); | 1042 | memblock.current_limit = ISA_END_ADDRESS; |
972 | memblock_x86_fill(); | 1043 | memblock_x86_fill(); |
973 | 1044 | ||
974 | /* | 1045 | /* |
@@ -985,42 +1056,22 @@ void __init setup_arch(char **cmdline_p) | |||
985 | setup_bios_corruption_check(); | 1056 | setup_bios_corruption_check(); |
986 | #endif | 1057 | #endif |
987 | 1058 | ||
1059 | #ifdef CONFIG_X86_32 | ||
988 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", | 1060 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", |
989 | (max_pfn_mapped<<PAGE_SHIFT) - 1); | 1061 | (max_pfn_mapped<<PAGE_SHIFT) - 1); |
1062 | #endif | ||
990 | 1063 | ||
991 | setup_real_mode(); | 1064 | reserve_real_mode(); |
992 | 1065 | ||
993 | trim_platform_memory_ranges(); | 1066 | trim_platform_memory_ranges(); |
994 | trim_low_memory_range(); | 1067 | trim_low_memory_range(); |
995 | 1068 | ||
996 | init_gbpages(); | 1069 | init_mem_mapping(); |
997 | |||
998 | /* max_pfn_mapped is updated here */ | ||
999 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | ||
1000 | max_pfn_mapped = max_low_pfn_mapped; | ||
1001 | |||
1002 | #ifdef CONFIG_X86_64 | ||
1003 | if (max_pfn > max_low_pfn) { | ||
1004 | int i; | ||
1005 | unsigned long start, end; | ||
1006 | unsigned long start_pfn, end_pfn; | ||
1007 | |||
1008 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, | ||
1009 | NULL) { | ||
1010 | 1070 | ||
1011 | end = PFN_PHYS(end_pfn); | 1071 | early_trap_pf_init(); |
1012 | if (end <= (1UL<<32)) | ||
1013 | continue; | ||
1014 | 1072 | ||
1015 | start = PFN_PHYS(start_pfn); | 1073 | setup_real_mode(); |
1016 | max_pfn_mapped = init_memory_mapping( | ||
1017 | max((1UL<<32), start), end); | ||
1018 | } | ||
1019 | 1074 | ||
1020 | /* can we preseve max_low_pfn ?*/ | ||
1021 | max_low_pfn = max_pfn; | ||
1022 | } | ||
1023 | #endif | ||
1024 | memblock.current_limit = get_max_mapped(); | 1075 | memblock.current_limit = get_max_mapped(); |
1025 | dma_contiguous_reserve(0); | 1076 | dma_contiguous_reserve(0); |
1026 | 1077 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ecffca11f4e9..68bda7a84159 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -688,10 +688,19 @@ void __init early_trap_init(void) | |||
688 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); | 688 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); |
689 | /* int3 can be called from all */ | 689 | /* int3 can be called from all */ |
690 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); | 690 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); |
691 | #ifdef CONFIG_X86_32 | ||
691 | set_intr_gate(X86_TRAP_PF, &page_fault); | 692 | set_intr_gate(X86_TRAP_PF, &page_fault); |
693 | #endif | ||
692 | load_idt(&idt_descr); | 694 | load_idt(&idt_descr); |
693 | } | 695 | } |
694 | 696 | ||
697 | void __init early_trap_pf_init(void) | ||
698 | { | ||
699 | #ifdef CONFIG_X86_64 | ||
700 | set_intr_gate(X86_TRAP_PF, &page_fault); | ||
701 | #endif | ||
702 | } | ||
703 | |||
695 | void __init trap_init(void) | 704 | void __init trap_init(void) |
696 | { | 705 | { |
697 | int i; | 706 | int i; |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075a814a..50cf83ecd32e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -62,10 +62,6 @@ struct x86_init_ops x86_init __initdata = { | |||
62 | .banner = default_banner, | 62 | .banner = default_banner, |
63 | }, | 63 | }, |
64 | 64 | ||
65 | .mapping = { | ||
66 | .pagetable_reserve = native_pagetable_reserve, | ||
67 | }, | ||
68 | |||
69 | .paging = { | 65 | .paging = { |
70 | .pagetable_init = native_pagetable_init, | 66 | .pagetable_init = native_pagetable_init, |
71 | }, | 67 | }, |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d7aea41563b3..d41815265a0b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -17,86 +17,132 @@ | |||
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | 19 | ||
20 | unsigned long __initdata pgt_buf_start; | 20 | #include "mm_internal.h" |
21 | unsigned long __meminitdata pgt_buf_end; | ||
22 | unsigned long __meminitdata pgt_buf_top; | ||
23 | 21 | ||
24 | int after_bootmem; | 22 | static unsigned long __initdata pgt_buf_start; |
23 | static unsigned long __initdata pgt_buf_end; | ||
24 | static unsigned long __initdata pgt_buf_top; | ||
25 | 25 | ||
26 | int direct_gbpages | 26 | static unsigned long min_pfn_mapped; |
27 | #ifdef CONFIG_DIRECT_GBPAGES | ||
28 | = 1 | ||
29 | #endif | ||
30 | ; | ||
31 | 27 | ||
32 | struct map_range { | 28 | static bool __initdata can_use_brk_pgt = true; |
33 | unsigned long start; | ||
34 | unsigned long end; | ||
35 | unsigned page_size_mask; | ||
36 | }; | ||
37 | 29 | ||
38 | /* | 30 | /* |
39 | * First calculate space needed for kernel direct mapping page tables to cover | 31 | * Pages returned are already directly mapped. |
40 | * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB | 32 | * |
41 | * pages. Then find enough contiguous space for those page tables. | 33 | * Changing that is likely to break Xen, see commit: |
34 | * | ||
35 | * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve | ||
36 | * | ||
37 | * for detailed information. | ||
42 | */ | 38 | */ |
43 | static void __init find_early_table_space(struct map_range *mr, int nr_range) | 39 | __ref void *alloc_low_pages(unsigned int num) |
44 | { | 40 | { |
41 | unsigned long pfn; | ||
45 | int i; | 42 | int i; |
46 | unsigned long puds = 0, pmds = 0, ptes = 0, tables; | ||
47 | unsigned long start = 0, good_end; | ||
48 | phys_addr_t base; | ||
49 | 43 | ||
50 | for (i = 0; i < nr_range; i++) { | 44 | if (after_bootmem) { |
51 | unsigned long range, extra; | 45 | unsigned int order; |
52 | 46 | ||
53 | range = mr[i].end - mr[i].start; | 47 | order = get_order((unsigned long)num << PAGE_SHIFT); |
54 | puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; | 48 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | |
49 | __GFP_ZERO, order); | ||
50 | } | ||
55 | 51 | ||
56 | if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { | 52 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { |
57 | extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); | 53 | unsigned long ret; |
58 | pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; | 54 | if (min_pfn_mapped >= max_pfn_mapped) |
59 | } else { | 55 | panic("alloc_low_page: ran out of memory"); |
60 | pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; | 56 | ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, |
61 | } | 57 | max_pfn_mapped << PAGE_SHIFT, |
58 | PAGE_SIZE * num , PAGE_SIZE); | ||
59 | if (!ret) | ||
60 | panic("alloc_low_page: can not alloc memory"); | ||
61 | memblock_reserve(ret, PAGE_SIZE * num); | ||
62 | pfn = ret >> PAGE_SHIFT; | ||
63 | } else { | ||
64 | pfn = pgt_buf_end; | ||
65 | pgt_buf_end += num; | ||
66 | printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", | ||
67 | pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); | ||
68 | } | ||
62 | 69 | ||
63 | if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { | 70 | for (i = 0; i < num; i++) { |
64 | extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); | 71 | void *adr; |
65 | #ifdef CONFIG_X86_32 | 72 | |
66 | extra += PMD_SIZE; | 73 | adr = __va((pfn + i) << PAGE_SHIFT); |
67 | #endif | 74 | clear_page(adr); |
68 | ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
69 | } else { | ||
70 | ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
71 | } | ||
72 | } | 75 | } |
73 | 76 | ||
74 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 77 | return __va(pfn << PAGE_SHIFT); |
75 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 78 | } |
76 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
77 | 79 | ||
78 | #ifdef CONFIG_X86_32 | 80 | /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ |
79 | /* for fixmap */ | 81 | #define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) |
80 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | 82 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
81 | #endif | 83 | void __init early_alloc_pgt_buf(void) |
82 | good_end = max_pfn_mapped << PAGE_SHIFT; | 84 | { |
85 | unsigned long tables = INIT_PGT_BUF_SIZE; | ||
86 | phys_addr_t base; | ||
83 | 87 | ||
84 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); | 88 | base = __pa(extend_brk(tables, PAGE_SIZE)); |
85 | if (!base) | ||
86 | panic("Cannot find space for the kernel page tables"); | ||
87 | 89 | ||
88 | pgt_buf_start = base >> PAGE_SHIFT; | 90 | pgt_buf_start = base >> PAGE_SHIFT; |
89 | pgt_buf_end = pgt_buf_start; | 91 | pgt_buf_end = pgt_buf_start; |
90 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); | 92 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
93 | } | ||
94 | |||
95 | int after_bootmem; | ||
96 | |||
97 | int direct_gbpages | ||
98 | #ifdef CONFIG_DIRECT_GBPAGES | ||
99 | = 1 | ||
100 | #endif | ||
101 | ; | ||
91 | 102 | ||
92 | printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", | 103 | static void __init init_gbpages(void) |
93 | mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, | 104 | { |
94 | (pgt_buf_top << PAGE_SHIFT) - 1); | 105 | #ifdef CONFIG_X86_64 |
106 | if (direct_gbpages && cpu_has_gbpages) | ||
107 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
108 | else | ||
109 | direct_gbpages = 0; | ||
110 | #endif | ||
95 | } | 111 | } |
96 | 112 | ||
97 | void __init native_pagetable_reserve(u64 start, u64 end) | 113 | struct map_range { |
114 | unsigned long start; | ||
115 | unsigned long end; | ||
116 | unsigned page_size_mask; | ||
117 | }; | ||
118 | |||
119 | static int page_size_mask; | ||
120 | |||
121 | static void __init probe_page_size_mask(void) | ||
98 | { | 122 | { |
99 | memblock_reserve(start, end - start); | 123 | init_gbpages(); |
124 | |||
125 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | ||
126 | /* | ||
127 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
128 | * This will simplify cpa(), which otherwise needs to support splitting | ||
129 | * large pages into small in interrupt context, etc. | ||
130 | */ | ||
131 | if (direct_gbpages) | ||
132 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
133 | if (cpu_has_pse) | ||
134 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
135 | #endif | ||
136 | |||
137 | /* Enable PSE if available */ | ||
138 | if (cpu_has_pse) | ||
139 | set_in_cr4(X86_CR4_PSE); | ||
140 | |||
141 | /* Enable PGE if available */ | ||
142 | if (cpu_has_pge) { | ||
143 | set_in_cr4(X86_CR4_PGE); | ||
144 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
145 | } | ||
100 | } | 146 | } |
101 | 147 | ||
102 | #ifdef CONFIG_X86_32 | 148 | #ifdef CONFIG_X86_32 |
@@ -122,58 +168,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, | |||
122 | } | 168 | } |
123 | 169 | ||
124 | /* | 170 | /* |
125 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 171 | * adjust the page_size_mask for small range to go with |
126 | * This runs before bootmem is initialized and gets pages directly from | 172 | * big page size instead small one if nearby are ram too. |
127 | * the physical memory. To access them they are temporarily mapped. | ||
128 | */ | 173 | */ |
129 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 174 | static void __init_refok adjust_range_page_size_mask(struct map_range *mr, |
130 | unsigned long end) | 175 | int nr_range) |
131 | { | 176 | { |
132 | unsigned long page_size_mask = 0; | 177 | int i; |
133 | unsigned long start_pfn, end_pfn; | ||
134 | unsigned long ret = 0; | ||
135 | unsigned long pos; | ||
136 | |||
137 | struct map_range mr[NR_RANGE_MR]; | ||
138 | int nr_range, i; | ||
139 | int use_pse, use_gbpages; | ||
140 | 178 | ||
141 | printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", | 179 | for (i = 0; i < nr_range; i++) { |
142 | start, end - 1); | 180 | if ((page_size_mask & (1<<PG_LEVEL_2M)) && |
181 | !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { | ||
182 | unsigned long start = round_down(mr[i].start, PMD_SIZE); | ||
183 | unsigned long end = round_up(mr[i].end, PMD_SIZE); | ||
143 | 184 | ||
144 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 185 | #ifdef CONFIG_X86_32 |
145 | /* | 186 | if ((end >> PAGE_SHIFT) > max_low_pfn) |
146 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 187 | continue; |
147 | * This will simplify cpa(), which otherwise needs to support splitting | ||
148 | * large pages into small in interrupt context, etc. | ||
149 | */ | ||
150 | use_pse = use_gbpages = 0; | ||
151 | #else | ||
152 | use_pse = cpu_has_pse; | ||
153 | use_gbpages = direct_gbpages; | ||
154 | #endif | 188 | #endif |
155 | 189 | ||
156 | /* Enable PSE if available */ | 190 | if (memblock_is_region_memory(start, end - start)) |
157 | if (cpu_has_pse) | 191 | mr[i].page_size_mask |= 1<<PG_LEVEL_2M; |
158 | set_in_cr4(X86_CR4_PSE); | 192 | } |
193 | if ((page_size_mask & (1<<PG_LEVEL_1G)) && | ||
194 | !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { | ||
195 | unsigned long start = round_down(mr[i].start, PUD_SIZE); | ||
196 | unsigned long end = round_up(mr[i].end, PUD_SIZE); | ||
159 | 197 | ||
160 | /* Enable PGE if available */ | 198 | if (memblock_is_region_memory(start, end - start)) |
161 | if (cpu_has_pge) { | 199 | mr[i].page_size_mask |= 1<<PG_LEVEL_1G; |
162 | set_in_cr4(X86_CR4_PGE); | 200 | } |
163 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
164 | } | 201 | } |
202 | } | ||
165 | 203 | ||
166 | if (use_gbpages) | 204 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
167 | page_size_mask |= 1 << PG_LEVEL_1G; | 205 | unsigned long start, |
168 | if (use_pse) | 206 | unsigned long end) |
169 | page_size_mask |= 1 << PG_LEVEL_2M; | 207 | { |
208 | unsigned long start_pfn, end_pfn, limit_pfn; | ||
209 | unsigned long pfn; | ||
210 | int i; | ||
170 | 211 | ||
171 | memset(mr, 0, sizeof(mr)); | 212 | limit_pfn = PFN_DOWN(end); |
172 | nr_range = 0; | ||
173 | 213 | ||
174 | /* head if not big page alignment ? */ | 214 | /* head if not big page alignment ? */ |
175 | start_pfn = start >> PAGE_SHIFT; | 215 | pfn = start_pfn = PFN_DOWN(start); |
176 | pos = start_pfn << PAGE_SHIFT; | ||
177 | #ifdef CONFIG_X86_32 | 216 | #ifdef CONFIG_X86_32 |
178 | /* | 217 | /* |
179 | * Don't use a large page for the first 2/4MB of memory | 218 | * Don't use a large page for the first 2/4MB of memory |
@@ -181,66 +220,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
181 | * and overlapping MTRRs into large pages can cause | 220 | * and overlapping MTRRs into large pages can cause |
182 | * slowdowns. | 221 | * slowdowns. |
183 | */ | 222 | */ |
184 | if (pos == 0) | 223 | if (pfn == 0) |
185 | end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); | 224 | end_pfn = PFN_DOWN(PMD_SIZE); |
186 | else | 225 | else |
187 | end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 226 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
188 | << (PMD_SHIFT - PAGE_SHIFT); | ||
189 | #else /* CONFIG_X86_64 */ | 227 | #else /* CONFIG_X86_64 */ |
190 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | 228 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
191 | << (PMD_SHIFT - PAGE_SHIFT); | ||
192 | #endif | 229 | #endif |
193 | if (end_pfn > (end >> PAGE_SHIFT)) | 230 | if (end_pfn > limit_pfn) |
194 | end_pfn = end >> PAGE_SHIFT; | 231 | end_pfn = limit_pfn; |
195 | if (start_pfn < end_pfn) { | 232 | if (start_pfn < end_pfn) { |
196 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 233 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
197 | pos = end_pfn << PAGE_SHIFT; | 234 | pfn = end_pfn; |
198 | } | 235 | } |
199 | 236 | ||
200 | /* big page (2M) range */ | 237 | /* big page (2M) range */ |
201 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 238 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
202 | << (PMD_SHIFT - PAGE_SHIFT); | ||
203 | #ifdef CONFIG_X86_32 | 239 | #ifdef CONFIG_X86_32 |
204 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | 240 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
205 | #else /* CONFIG_X86_64 */ | 241 | #else /* CONFIG_X86_64 */ |
206 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 242 | end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
207 | << (PUD_SHIFT - PAGE_SHIFT); | 243 | if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) |
208 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | 244 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
209 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
210 | #endif | 245 | #endif |
211 | 246 | ||
212 | if (start_pfn < end_pfn) { | 247 | if (start_pfn < end_pfn) { |
213 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 248 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
214 | page_size_mask & (1<<PG_LEVEL_2M)); | 249 | page_size_mask & (1<<PG_LEVEL_2M)); |
215 | pos = end_pfn << PAGE_SHIFT; | 250 | pfn = end_pfn; |
216 | } | 251 | } |
217 | 252 | ||
218 | #ifdef CONFIG_X86_64 | 253 | #ifdef CONFIG_X86_64 |
219 | /* big page (1G) range */ | 254 | /* big page (1G) range */ |
220 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 255 | start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
221 | << (PUD_SHIFT - PAGE_SHIFT); | 256 | end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); |
222 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
223 | if (start_pfn < end_pfn) { | 257 | if (start_pfn < end_pfn) { |
224 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 258 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
225 | page_size_mask & | 259 | page_size_mask & |
226 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | 260 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); |
227 | pos = end_pfn << PAGE_SHIFT; | 261 | pfn = end_pfn; |
228 | } | 262 | } |
229 | 263 | ||
230 | /* tail is not big page (1G) alignment */ | 264 | /* tail is not big page (1G) alignment */ |
231 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 265 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
232 | << (PMD_SHIFT - PAGE_SHIFT); | 266 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
233 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
234 | if (start_pfn < end_pfn) { | 267 | if (start_pfn < end_pfn) { |
235 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 268 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
236 | page_size_mask & (1<<PG_LEVEL_2M)); | 269 | page_size_mask & (1<<PG_LEVEL_2M)); |
237 | pos = end_pfn << PAGE_SHIFT; | 270 | pfn = end_pfn; |
238 | } | 271 | } |
239 | #endif | 272 | #endif |
240 | 273 | ||
241 | /* tail is not big page (2M) alignment */ | 274 | /* tail is not big page (2M) alignment */ |
242 | start_pfn = pos>>PAGE_SHIFT; | 275 | start_pfn = pfn; |
243 | end_pfn = end>>PAGE_SHIFT; | 276 | end_pfn = limit_pfn; |
244 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 277 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
245 | 278 | ||
246 | /* try to merge same page size and continuous */ | 279 | /* try to merge same page size and continuous */ |
@@ -257,59 +290,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
257 | nr_range--; | 290 | nr_range--; |
258 | } | 291 | } |
259 | 292 | ||
293 | if (!after_bootmem) | ||
294 | adjust_range_page_size_mask(mr, nr_range); | ||
295 | |||
260 | for (i = 0; i < nr_range; i++) | 296 | for (i = 0; i < nr_range; i++) |
261 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 297 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
262 | mr[i].start, mr[i].end - 1, | 298 | mr[i].start, mr[i].end - 1, |
263 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 299 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( |
264 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 300 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
265 | 301 | ||
266 | /* | 302 | return nr_range; |
267 | * Find space for the kernel direct mapping tables. | 303 | } |
268 | * | 304 | |
269 | * Later we should allocate these tables in the local node of the | 305 | struct range pfn_mapped[E820_X_MAX]; |
270 | * memory mapped. Unfortunately this is done currently before the | 306 | int nr_pfn_mapped; |
271 | * nodes are discovered. | 307 | |
272 | */ | 308 | static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) |
273 | if (!after_bootmem) | 309 | { |
274 | find_early_table_space(mr, nr_range); | 310 | nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, |
311 | nr_pfn_mapped, start_pfn, end_pfn); | ||
312 | nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); | ||
313 | |||
314 | max_pfn_mapped = max(max_pfn_mapped, end_pfn); | ||
315 | |||
316 | if (start_pfn < (1UL<<(32-PAGE_SHIFT))) | ||
317 | max_low_pfn_mapped = max(max_low_pfn_mapped, | ||
318 | min(end_pfn, 1UL<<(32-PAGE_SHIFT))); | ||
319 | } | ||
320 | |||
321 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) | ||
322 | { | ||
323 | int i; | ||
324 | |||
325 | for (i = 0; i < nr_pfn_mapped; i++) | ||
326 | if ((start_pfn >= pfn_mapped[i].start) && | ||
327 | (end_pfn <= pfn_mapped[i].end)) | ||
328 | return true; | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
335 | * This runs before bootmem is initialized and gets pages directly from | ||
336 | * the physical memory. To access them they are temporarily mapped. | ||
337 | */ | ||
338 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
339 | unsigned long end) | ||
340 | { | ||
341 | struct map_range mr[NR_RANGE_MR]; | ||
342 | unsigned long ret = 0; | ||
343 | int nr_range, i; | ||
344 | |||
345 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", | ||
346 | start, end - 1); | ||
347 | |||
348 | memset(mr, 0, sizeof(mr)); | ||
349 | nr_range = split_mem_range(mr, 0, start, end); | ||
275 | 350 | ||
276 | for (i = 0; i < nr_range; i++) | 351 | for (i = 0; i < nr_range; i++) |
277 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 352 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
278 | mr[i].page_size_mask); | 353 | mr[i].page_size_mask); |
279 | 354 | ||
280 | #ifdef CONFIG_X86_32 | 355 | add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); |
281 | early_ioremap_page_table_range_init(); | ||
282 | 356 | ||
283 | load_cr3(swapper_pg_dir); | 357 | return ret >> PAGE_SHIFT; |
284 | #endif | 358 | } |
285 | 359 | ||
286 | __flush_tlb_all(); | 360 | /* |
361 | * would have hole in the middle or ends, and only ram parts will be mapped. | ||
362 | */ | ||
363 | static unsigned long __init init_range_memory_mapping( | ||
364 | unsigned long r_start, | ||
365 | unsigned long r_end) | ||
366 | { | ||
367 | unsigned long start_pfn, end_pfn; | ||
368 | unsigned long mapped_ram_size = 0; | ||
369 | int i; | ||
287 | 370 | ||
288 | /* | 371 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
289 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | 372 | u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); |
290 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | 373 | u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); |
291 | * so that they can be reused for other purposes. | 374 | if (start >= end) |
292 | * | 375 | continue; |
293 | * On native it just means calling memblock_reserve, on Xen it also | ||
294 | * means marking RW the pagetable pages that we allocated before | ||
295 | * but that haven't been used. | ||
296 | * | ||
297 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
298 | * pgt_buf_top, because we have to make sure that when | ||
299 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
300 | * RO all the pagetable pages, including the ones that are beyond | ||
301 | * pgt_buf_end at that time. | ||
302 | */ | ||
303 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | ||
304 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), | ||
305 | PFN_PHYS(pgt_buf_end)); | ||
306 | 376 | ||
307 | if (!after_bootmem) | 377 | /* |
308 | early_memtest(start, end); | 378 | * if it is overlapping with brk pgt, we need to |
379 | * alloc pgt buf from memblock instead. | ||
380 | */ | ||
381 | can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= | ||
382 | min(end, (u64)pgt_buf_top<<PAGE_SHIFT); | ||
383 | init_memory_mapping(start, end); | ||
384 | mapped_ram_size += end - start; | ||
385 | can_use_brk_pgt = true; | ||
386 | } | ||
309 | 387 | ||
310 | return ret >> PAGE_SHIFT; | 388 | return mapped_ram_size; |
311 | } | 389 | } |
312 | 390 | ||
391 | /* (PUD_SHIFT-PMD_SHIFT)/2 */ | ||
392 | #define STEP_SIZE_SHIFT 5 | ||
393 | void __init init_mem_mapping(void) | ||
394 | { | ||
395 | unsigned long end, real_end, start, last_start; | ||
396 | unsigned long step_size; | ||
397 | unsigned long addr; | ||
398 | unsigned long mapped_ram_size = 0; | ||
399 | unsigned long new_mapped_ram_size; | ||
400 | |||
401 | probe_page_size_mask(); | ||
402 | |||
403 | #ifdef CONFIG_X86_64 | ||
404 | end = max_pfn << PAGE_SHIFT; | ||
405 | #else | ||
406 | end = max_low_pfn << PAGE_SHIFT; | ||
407 | #endif | ||
408 | |||
409 | /* the ISA range is always mapped regardless of memory holes */ | ||
410 | init_memory_mapping(0, ISA_END_ADDRESS); | ||
411 | |||
412 | /* xen has big range in reserved near end of ram, skip it at first */ | ||
413 | addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, | ||
414 | PAGE_SIZE); | ||
415 | real_end = addr + PMD_SIZE; | ||
416 | |||
417 | /* step_size need to be small so pgt_buf from BRK could cover it */ | ||
418 | step_size = PMD_SIZE; | ||
419 | max_pfn_mapped = 0; /* will get exact value next */ | ||
420 | min_pfn_mapped = real_end >> PAGE_SHIFT; | ||
421 | last_start = start = real_end; | ||
422 | while (last_start > ISA_END_ADDRESS) { | ||
423 | if (last_start > step_size) { | ||
424 | start = round_down(last_start - 1, step_size); | ||
425 | if (start < ISA_END_ADDRESS) | ||
426 | start = ISA_END_ADDRESS; | ||
427 | } else | ||
428 | start = ISA_END_ADDRESS; | ||
429 | new_mapped_ram_size = init_range_memory_mapping(start, | ||
430 | last_start); | ||
431 | last_start = start; | ||
432 | min_pfn_mapped = last_start >> PAGE_SHIFT; | ||
433 | /* only increase step_size after big range get mapped */ | ||
434 | if (new_mapped_ram_size > mapped_ram_size) | ||
435 | step_size <<= STEP_SIZE_SHIFT; | ||
436 | mapped_ram_size += new_mapped_ram_size; | ||
437 | } | ||
438 | |||
439 | if (real_end < end) | ||
440 | init_range_memory_mapping(real_end, end); | ||
441 | |||
442 | #ifdef CONFIG_X86_64 | ||
443 | if (max_pfn > max_low_pfn) { | ||
444 | /* can we preseve max_low_pfn ?*/ | ||
445 | max_low_pfn = max_pfn; | ||
446 | } | ||
447 | #else | ||
448 | early_ioremap_page_table_range_init(); | ||
449 | #endif | ||
450 | |||
451 | load_cr3(swapper_pg_dir); | ||
452 | __flush_tlb_all(); | ||
453 | |||
454 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); | ||
455 | } | ||
313 | 456 | ||
314 | /* | 457 | /* |
315 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 458 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 745d66b843c8..b299724f6e34 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -53,25 +53,14 @@ | |||
53 | #include <asm/page_types.h> | 53 | #include <asm/page_types.h> |
54 | #include <asm/init.h> | 54 | #include <asm/init.h> |
55 | 55 | ||
56 | #include "mm_internal.h" | ||
57 | |||
56 | unsigned long highstart_pfn, highend_pfn; | 58 | unsigned long highstart_pfn, highend_pfn; |
57 | 59 | ||
58 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
59 | 61 | ||
60 | bool __read_mostly __vmalloc_start_set = false; | 62 | bool __read_mostly __vmalloc_start_set = false; |
61 | 63 | ||
62 | static __init void *alloc_low_page(void) | ||
63 | { | ||
64 | unsigned long pfn = pgt_buf_end++; | ||
65 | void *adr; | ||
66 | |||
67 | if (pfn >= pgt_buf_top) | ||
68 | panic("alloc_low_page: ran out of memory"); | ||
69 | |||
70 | adr = __va(pfn * PAGE_SIZE); | ||
71 | clear_page(adr); | ||
72 | return adr; | ||
73 | } | ||
74 | |||
75 | /* | 64 | /* |
76 | * Creates a middle page table and puts a pointer to it in the | 65 | * Creates a middle page table and puts a pointer to it in the |
77 | * given global directory entry. This only returns the gd entry | 66 | * given global directory entry. This only returns the gd entry |
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
84 | 73 | ||
85 | #ifdef CONFIG_X86_PAE | 74 | #ifdef CONFIG_X86_PAE |
86 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 75 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
87 | if (after_bootmem) | 76 | pmd_table = (pmd_t *)alloc_low_page(); |
88 | pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
89 | else | ||
90 | pmd_table = (pmd_t *)alloc_low_page(); | ||
91 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 77 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
92 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 78 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
93 | pud = pud_offset(pgd, 0); | 79 | pud = pud_offset(pgd, 0); |
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
109 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 95 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
110 | { | 96 | { |
111 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 97 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
112 | pte_t *page_table = NULL; | 98 | pte_t *page_table = (pte_t *)alloc_low_page(); |
113 | |||
114 | if (after_bootmem) { | ||
115 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | ||
116 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | ||
117 | #endif | ||
118 | if (!page_table) | ||
119 | page_table = | ||
120 | (pte_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
121 | } else | ||
122 | page_table = (pte_t *)alloc_low_page(); | ||
123 | 99 | ||
124 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 100 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
125 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 101 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr) | |||
146 | return one_page_table_init(pmd) + pte_idx; | 122 | return one_page_table_init(pmd) + pte_idx; |
147 | } | 123 | } |
148 | 124 | ||
125 | static unsigned long __init | ||
126 | page_table_range_init_count(unsigned long start, unsigned long end) | ||
127 | { | ||
128 | unsigned long count = 0; | ||
129 | #ifdef CONFIG_HIGHMEM | ||
130 | int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; | ||
131 | int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; | ||
132 | int pgd_idx, pmd_idx; | ||
133 | unsigned long vaddr; | ||
134 | |||
135 | if (pmd_idx_kmap_begin == pmd_idx_kmap_end) | ||
136 | return 0; | ||
137 | |||
138 | vaddr = start; | ||
139 | pgd_idx = pgd_index(vaddr); | ||
140 | |||
141 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { | ||
142 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | ||
143 | pmd_idx++) { | ||
144 | if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && | ||
145 | (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) | ||
146 | count++; | ||
147 | vaddr += PMD_SIZE; | ||
148 | } | ||
149 | pmd_idx = 0; | ||
150 | } | ||
151 | #endif | ||
152 | return count; | ||
153 | } | ||
154 | |||
149 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | 155 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, |
150 | unsigned long vaddr, pte_t *lastpte) | 156 | unsigned long vaddr, pte_t *lastpte, |
157 | void **adr) | ||
151 | { | 158 | { |
152 | #ifdef CONFIG_HIGHMEM | 159 | #ifdef CONFIG_HIGHMEM |
153 | /* | 160 | /* |
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
161 | 168 | ||
162 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 169 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
163 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 170 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
164 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 171 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) { |
165 | && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start | ||
166 | || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { | ||
167 | pte_t *newpte; | 172 | pte_t *newpte; |
168 | int i; | 173 | int i; |
169 | 174 | ||
170 | BUG_ON(after_bootmem); | 175 | BUG_ON(after_bootmem); |
171 | newpte = alloc_low_page(); | 176 | newpte = *adr; |
172 | for (i = 0; i < PTRS_PER_PTE; i++) | 177 | for (i = 0; i < PTRS_PER_PTE; i++) |
173 | set_pte(newpte + i, pte[i]); | 178 | set_pte(newpte + i, pte[i]); |
179 | *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); | ||
174 | 180 | ||
175 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); | 181 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); |
176 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); | 182 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); |
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
204 | pgd_t *pgd; | 210 | pgd_t *pgd; |
205 | pmd_t *pmd; | 211 | pmd_t *pmd; |
206 | pte_t *pte = NULL; | 212 | pte_t *pte = NULL; |
213 | unsigned long count = page_table_range_init_count(start, end); | ||
214 | void *adr = NULL; | ||
215 | |||
216 | if (count) | ||
217 | adr = alloc_low_pages(count); | ||
207 | 218 | ||
208 | vaddr = start; | 219 | vaddr = start; |
209 | pgd_idx = pgd_index(vaddr); | 220 | pgd_idx = pgd_index(vaddr); |
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
216 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | 227 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); |
217 | pmd++, pmd_idx++) { | 228 | pmd++, pmd_idx++) { |
218 | pte = page_table_kmap_check(one_page_table_init(pmd), | 229 | pte = page_table_kmap_check(one_page_table_init(pmd), |
219 | pmd, vaddr, pte); | 230 | pmd, vaddr, pte, &adr); |
220 | 231 | ||
221 | vaddr += PMD_SIZE; | 232 | vaddr += PMD_SIZE; |
222 | } | 233 | } |
@@ -310,6 +321,7 @@ repeat: | |||
310 | __pgprot(PTE_IDENT_ATTR | | 321 | __pgprot(PTE_IDENT_ATTR | |
311 | _PAGE_PSE); | 322 | _PAGE_PSE); |
312 | 323 | ||
324 | pfn &= PMD_MASK >> PAGE_SHIFT; | ||
313 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | 325 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + |
314 | PAGE_OFFSET + PAGE_SIZE-1; | 326 | PAGE_OFFSET + PAGE_SIZE-1; |
315 | 327 | ||
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void) | |||
455 | 467 | ||
456 | /* | 468 | /* |
457 | * Remove any mappings which extend past the end of physical | 469 | * Remove any mappings which extend past the end of physical |
458 | * memory from the boot time page table: | 470 | * memory from the boot time page table. |
471 | * In virtual address space, we should have at least two pages | ||
472 | * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END | ||
473 | * definition. And max_low_pfn is set to VMALLOC_END physical | ||
474 | * address. If initial memory mapping is doing right job, we | ||
475 | * should have pte used near max_low_pfn or one pmd is not present. | ||
459 | */ | 476 | */ |
460 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { | 477 | for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) { |
461 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); | 478 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); |
462 | pgd = base + pgd_index(va); | 479 | pgd = base + pgd_index(va); |
463 | if (!pgd_present(*pgd)) | 480 | if (!pgd_present(*pgd)) |
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void) | |||
468 | if (!pmd_present(*pmd)) | 485 | if (!pmd_present(*pmd)) |
469 | break; | 486 | break; |
470 | 487 | ||
488 | /* should not be large page here */ | ||
489 | if (pmd_large(*pmd)) { | ||
490 | pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n", | ||
491 | pfn, pmd, __pa(pmd)); | ||
492 | BUG_ON(1); | ||
493 | } | ||
494 | |||
471 | pte = pte_offset_kernel(pmd, va); | 495 | pte = pte_offset_kernel(pmd, va); |
472 | if (!pte_present(*pte)) | 496 | if (!pte_present(*pte)) |
473 | break; | 497 | break; |
474 | 498 | ||
499 | printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n", | ||
500 | pfn, pmd, __pa(pmd), pte, __pa(pte)); | ||
475 | pte_clear(NULL, va, pte); | 501 | pte_clear(NULL, va, pte); |
476 | } | 502 | } |
477 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); | 503 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem); | |||
550 | * artificially via the highmem=x boot parameter then create | 576 | * artificially via the highmem=x boot parameter then create |
551 | * it: | 577 | * it: |
552 | */ | 578 | */ |
553 | void __init lowmem_pfn_init(void) | 579 | static void __init lowmem_pfn_init(void) |
554 | { | 580 | { |
555 | /* max_low_pfn is 0, we already have early_res support */ | 581 | /* max_low_pfn is 0, we already have early_res support */ |
556 | max_low_pfn = max_pfn; | 582 | max_low_pfn = max_pfn; |
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void) | |||
586 | * We have more RAM than fits into lowmem - we try to put it into | 612 | * We have more RAM than fits into lowmem - we try to put it into |
587 | * highmem, also taking the highmem=x boot parameter into account: | 613 | * highmem, also taking the highmem=x boot parameter into account: |
588 | */ | 614 | */ |
589 | void __init highmem_pfn_init(void) | 615 | static void __init highmem_pfn_init(void) |
590 | { | 616 | { |
591 | max_low_pfn = MAXMEM_PFN; | 617 | max_low_pfn = MAXMEM_PFN; |
592 | 618 | ||
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void) | |||
669 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 695 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
670 | max_pfn_mapped<<PAGE_SHIFT); | 696 | max_pfn_mapped<<PAGE_SHIFT); |
671 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 697 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
672 | |||
673 | after_bootmem = 1; | ||
674 | } | 698 | } |
675 | 699 | ||
676 | /* | 700 | /* |
@@ -753,6 +777,8 @@ void __init mem_init(void) | |||
753 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 777 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
754 | reservedpages++; | 778 | reservedpages++; |
755 | 779 | ||
780 | after_bootmem = 1; | ||
781 | |||
756 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 782 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
757 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 783 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
758 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 784 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 287c6d6a9ef1..edaa2daf4b37 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -54,6 +54,82 @@ | |||
54 | #include <asm/uv/uv.h> | 54 | #include <asm/uv/uv.h> |
55 | #include <asm/setup.h> | 55 | #include <asm/setup.h> |
56 | 56 | ||
57 | #include "mm_internal.h" | ||
58 | |||
59 | static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, | ||
60 | unsigned long addr, unsigned long end) | ||
61 | { | ||
62 | addr &= PMD_MASK; | ||
63 | for (; addr < end; addr += PMD_SIZE) { | ||
64 | pmd_t *pmd = pmd_page + pmd_index(addr); | ||
65 | |||
66 | if (!pmd_present(*pmd)) | ||
67 | set_pmd(pmd, __pmd(addr | pmd_flag)); | ||
68 | } | ||
69 | } | ||
70 | static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | ||
71 | unsigned long addr, unsigned long end) | ||
72 | { | ||
73 | unsigned long next; | ||
74 | |||
75 | for (; addr < end; addr = next) { | ||
76 | pud_t *pud = pud_page + pud_index(addr); | ||
77 | pmd_t *pmd; | ||
78 | |||
79 | next = (addr & PUD_MASK) + PUD_SIZE; | ||
80 | if (next > end) | ||
81 | next = end; | ||
82 | |||
83 | if (pud_present(*pud)) { | ||
84 | pmd = pmd_offset(pud, 0); | ||
85 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
86 | continue; | ||
87 | } | ||
88 | pmd = (pmd_t *)info->alloc_pgt_page(info->context); | ||
89 | if (!pmd) | ||
90 | return -ENOMEM; | ||
91 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
92 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
93 | } | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | ||
99 | unsigned long addr, unsigned long end) | ||
100 | { | ||
101 | unsigned long next; | ||
102 | int result; | ||
103 | int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; | ||
104 | |||
105 | for (; addr < end; addr = next) { | ||
106 | pgd_t *pgd = pgd_page + pgd_index(addr) + off; | ||
107 | pud_t *pud; | ||
108 | |||
109 | next = (addr & PGDIR_MASK) + PGDIR_SIZE; | ||
110 | if (next > end) | ||
111 | next = end; | ||
112 | |||
113 | if (pgd_present(*pgd)) { | ||
114 | pud = pud_offset(pgd, 0); | ||
115 | result = ident_pud_init(info, pud, addr, next); | ||
116 | if (result) | ||
117 | return result; | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | pud = (pud_t *)info->alloc_pgt_page(info->context); | ||
122 | if (!pud) | ||
123 | return -ENOMEM; | ||
124 | result = ident_pud_init(info, pud, addr, next); | ||
125 | if (result) | ||
126 | return result; | ||
127 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
128 | } | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
57 | static int __init parse_direct_gbpages_off(char *arg) | 133 | static int __init parse_direct_gbpages_off(char *arg) |
58 | { | 134 | { |
59 | direct_gbpages = 0; | 135 | direct_gbpages = 0; |
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |||
302 | void __init cleanup_highmap(void) | 378 | void __init cleanup_highmap(void) |
303 | { | 379 | { |
304 | unsigned long vaddr = __START_KERNEL_map; | 380 | unsigned long vaddr = __START_KERNEL_map; |
305 | unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | 381 | unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE; |
306 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | 382 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; |
307 | pmd_t *pmd = level2_kernel_pgt; | 383 | pmd_t *pmd = level2_kernel_pgt; |
308 | 384 | ||
385 | /* | ||
386 | * Native path, max_pfn_mapped is not set yet. | ||
387 | * Xen has valid max_pfn_mapped set in | ||
388 | * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable(). | ||
389 | */ | ||
390 | if (max_pfn_mapped) | ||
391 | vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | ||
392 | |||
309 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { | 393 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { |
310 | if (pmd_none(*pmd)) | 394 | if (pmd_none(*pmd)) |
311 | continue; | 395 | continue; |
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void) | |||
314 | } | 398 | } |
315 | } | 399 | } |
316 | 400 | ||
317 | static __ref void *alloc_low_page(unsigned long *phys) | ||
318 | { | ||
319 | unsigned long pfn = pgt_buf_end++; | ||
320 | void *adr; | ||
321 | |||
322 | if (after_bootmem) { | ||
323 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); | ||
324 | *phys = __pa(adr); | ||
325 | |||
326 | return adr; | ||
327 | } | ||
328 | |||
329 | if (pfn >= pgt_buf_top) | ||
330 | panic("alloc_low_page: ran out of memory"); | ||
331 | |||
332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | ||
333 | clear_page(adr); | ||
334 | *phys = pfn * PAGE_SIZE; | ||
335 | return adr; | ||
336 | } | ||
337 | |||
338 | static __ref void *map_low_page(void *virt) | ||
339 | { | ||
340 | void *adr; | ||
341 | unsigned long phys, left; | ||
342 | |||
343 | if (after_bootmem) | ||
344 | return virt; | ||
345 | |||
346 | phys = __pa(virt); | ||
347 | left = phys & (PAGE_SIZE - 1); | ||
348 | adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); | ||
349 | adr = (void *)(((unsigned long)adr) | left); | ||
350 | |||
351 | return adr; | ||
352 | } | ||
353 | |||
354 | static __ref void unmap_low_page(void *adr) | ||
355 | { | ||
356 | if (after_bootmem) | ||
357 | return; | ||
358 | |||
359 | early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); | ||
360 | } | ||
361 | |||
362 | static unsigned long __meminit | 401 | static unsigned long __meminit |
363 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | 402 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
364 | pgprot_t prot) | 403 | pgprot_t prot) |
365 | { | 404 | { |
366 | unsigned pages = 0; | 405 | unsigned long pages = 0, next; |
367 | unsigned long last_map_addr = end; | 406 | unsigned long last_map_addr = end; |
368 | int i; | 407 | int i; |
369 | 408 | ||
370 | pte_t *pte = pte_page + pte_index(addr); | 409 | pte_t *pte = pte_page + pte_index(addr); |
371 | 410 | ||
372 | for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { | 411 | for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { |
373 | 412 | next = (addr & PAGE_MASK) + PAGE_SIZE; | |
374 | if (addr >= end) { | 413 | if (addr >= end) { |
375 | if (!after_bootmem) { | 414 | if (!after_bootmem && |
376 | for(; i < PTRS_PER_PTE; i++, pte++) | 415 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && |
377 | set_pte(pte, __pte(0)); | 416 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) |
378 | } | 417 | set_pte(pte, __pte(0)); |
379 | break; | 418 | continue; |
380 | } | 419 | } |
381 | 420 | ||
382 | /* | 421 | /* |
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
414 | int i = pmd_index(address); | 453 | int i = pmd_index(address); |
415 | 454 | ||
416 | for (; i < PTRS_PER_PMD; i++, address = next) { | 455 | for (; i < PTRS_PER_PMD; i++, address = next) { |
417 | unsigned long pte_phys; | ||
418 | pmd_t *pmd = pmd_page + pmd_index(address); | 456 | pmd_t *pmd = pmd_page + pmd_index(address); |
419 | pte_t *pte; | 457 | pte_t *pte; |
420 | pgprot_t new_prot = prot; | 458 | pgprot_t new_prot = prot; |
421 | 459 | ||
460 | next = (address & PMD_MASK) + PMD_SIZE; | ||
422 | if (address >= end) { | 461 | if (address >= end) { |
423 | if (!after_bootmem) { | 462 | if (!after_bootmem && |
424 | for (; i < PTRS_PER_PMD; i++, pmd++) | 463 | !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && |
425 | set_pmd(pmd, __pmd(0)); | 464 | !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) |
426 | } | 465 | set_pmd(pmd, __pmd(0)); |
427 | break; | 466 | continue; |
428 | } | 467 | } |
429 | 468 | ||
430 | next = (address & PMD_MASK) + PMD_SIZE; | ||
431 | |||
432 | if (pmd_val(*pmd)) { | 469 | if (pmd_val(*pmd)) { |
433 | if (!pmd_large(*pmd)) { | 470 | if (!pmd_large(*pmd)) { |
434 | spin_lock(&init_mm.page_table_lock); | 471 | spin_lock(&init_mm.page_table_lock); |
435 | pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); | 472 | pte = (pte_t *)pmd_page_vaddr(*pmd); |
436 | last_map_addr = phys_pte_init(pte, address, | 473 | last_map_addr = phys_pte_init(pte, address, |
437 | end, prot); | 474 | end, prot); |
438 | unmap_low_page(pte); | ||
439 | spin_unlock(&init_mm.page_table_lock); | 475 | spin_unlock(&init_mm.page_table_lock); |
440 | continue; | 476 | continue; |
441 | } | 477 | } |
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
464 | pages++; | 500 | pages++; |
465 | spin_lock(&init_mm.page_table_lock); | 501 | spin_lock(&init_mm.page_table_lock); |
466 | set_pte((pte_t *)pmd, | 502 | set_pte((pte_t *)pmd, |
467 | pfn_pte(address >> PAGE_SHIFT, | 503 | pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, |
468 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | 504 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
469 | spin_unlock(&init_mm.page_table_lock); | 505 | spin_unlock(&init_mm.page_table_lock); |
470 | last_map_addr = next; | 506 | last_map_addr = next; |
471 | continue; | 507 | continue; |
472 | } | 508 | } |
473 | 509 | ||
474 | pte = alloc_low_page(&pte_phys); | 510 | pte = alloc_low_page(); |
475 | last_map_addr = phys_pte_init(pte, address, end, new_prot); | 511 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
476 | unmap_low_page(pte); | ||
477 | 512 | ||
478 | spin_lock(&init_mm.page_table_lock); | 513 | spin_lock(&init_mm.page_table_lock); |
479 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 514 | pmd_populate_kernel(&init_mm, pmd, pte); |
480 | spin_unlock(&init_mm.page_table_lock); | 515 | spin_unlock(&init_mm.page_table_lock); |
481 | } | 516 | } |
482 | update_page_count(PG_LEVEL_2M, pages); | 517 | update_page_count(PG_LEVEL_2M, pages); |
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
492 | int i = pud_index(addr); | 527 | int i = pud_index(addr); |
493 | 528 | ||
494 | for (; i < PTRS_PER_PUD; i++, addr = next) { | 529 | for (; i < PTRS_PER_PUD; i++, addr = next) { |
495 | unsigned long pmd_phys; | ||
496 | pud_t *pud = pud_page + pud_index(addr); | 530 | pud_t *pud = pud_page + pud_index(addr); |
497 | pmd_t *pmd; | 531 | pmd_t *pmd; |
498 | pgprot_t prot = PAGE_KERNEL; | 532 | pgprot_t prot = PAGE_KERNEL; |
499 | 533 | ||
500 | if (addr >= end) | ||
501 | break; | ||
502 | |||
503 | next = (addr & PUD_MASK) + PUD_SIZE; | 534 | next = (addr & PUD_MASK) + PUD_SIZE; |
504 | 535 | if (addr >= end) { | |
505 | if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { | 536 | if (!after_bootmem && |
506 | set_pud(pud, __pud(0)); | 537 | !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && |
538 | !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) | ||
539 | set_pud(pud, __pud(0)); | ||
507 | continue; | 540 | continue; |
508 | } | 541 | } |
509 | 542 | ||
510 | if (pud_val(*pud)) { | 543 | if (pud_val(*pud)) { |
511 | if (!pud_large(*pud)) { | 544 | if (!pud_large(*pud)) { |
512 | pmd = map_low_page(pmd_offset(pud, 0)); | 545 | pmd = pmd_offset(pud, 0); |
513 | last_map_addr = phys_pmd_init(pmd, addr, end, | 546 | last_map_addr = phys_pmd_init(pmd, addr, end, |
514 | page_size_mask, prot); | 547 | page_size_mask, prot); |
515 | unmap_low_page(pmd); | ||
516 | __flush_tlb_all(); | 548 | __flush_tlb_all(); |
517 | continue; | 549 | continue; |
518 | } | 550 | } |
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
541 | pages++; | 573 | pages++; |
542 | spin_lock(&init_mm.page_table_lock); | 574 | spin_lock(&init_mm.page_table_lock); |
543 | set_pte((pte_t *)pud, | 575 | set_pte((pte_t *)pud, |
544 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 576 | pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, |
577 | PAGE_KERNEL_LARGE)); | ||
545 | spin_unlock(&init_mm.page_table_lock); | 578 | spin_unlock(&init_mm.page_table_lock); |
546 | last_map_addr = next; | 579 | last_map_addr = next; |
547 | continue; | 580 | continue; |
548 | } | 581 | } |
549 | 582 | ||
550 | pmd = alloc_low_page(&pmd_phys); | 583 | pmd = alloc_low_page(); |
551 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, | 584 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
552 | prot); | 585 | prot); |
553 | unmap_low_page(pmd); | ||
554 | 586 | ||
555 | spin_lock(&init_mm.page_table_lock); | 587 | spin_lock(&init_mm.page_table_lock); |
556 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 588 | pud_populate(&init_mm, pud, pmd); |
557 | spin_unlock(&init_mm.page_table_lock); | 589 | spin_unlock(&init_mm.page_table_lock); |
558 | } | 590 | } |
559 | __flush_tlb_all(); | 591 | __flush_tlb_all(); |
@@ -578,28 +610,23 @@ kernel_physical_mapping_init(unsigned long start, | |||
578 | 610 | ||
579 | for (; start < end; start = next) { | 611 | for (; start < end; start = next) { |
580 | pgd_t *pgd = pgd_offset_k(start); | 612 | pgd_t *pgd = pgd_offset_k(start); |
581 | unsigned long pud_phys; | ||
582 | pud_t *pud; | 613 | pud_t *pud; |
583 | 614 | ||
584 | next = (start + PGDIR_SIZE) & PGDIR_MASK; | 615 | next = (start & PGDIR_MASK) + PGDIR_SIZE; |
585 | if (next > end) | ||
586 | next = end; | ||
587 | 616 | ||
588 | if (pgd_val(*pgd)) { | 617 | if (pgd_val(*pgd)) { |
589 | pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); | 618 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
590 | last_map_addr = phys_pud_init(pud, __pa(start), | 619 | last_map_addr = phys_pud_init(pud, __pa(start), |
591 | __pa(end), page_size_mask); | 620 | __pa(end), page_size_mask); |
592 | unmap_low_page(pud); | ||
593 | continue; | 621 | continue; |
594 | } | 622 | } |
595 | 623 | ||
596 | pud = alloc_low_page(&pud_phys); | 624 | pud = alloc_low_page(); |
597 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 625 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), |
598 | page_size_mask); | 626 | page_size_mask); |
599 | unmap_low_page(pud); | ||
600 | 627 | ||
601 | spin_lock(&init_mm.page_table_lock); | 628 | spin_lock(&init_mm.page_table_lock); |
602 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 629 | pgd_populate(&init_mm, pgd, pud); |
603 | spin_unlock(&init_mm.page_table_lock); | 630 | spin_unlock(&init_mm.page_table_lock); |
604 | pgd_changed = true; | 631 | pgd_changed = true; |
605 | } | 632 | } |
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
664 | { | 691 | { |
665 | struct pglist_data *pgdat = NODE_DATA(nid); | 692 | struct pglist_data *pgdat = NODE_DATA(nid); |
666 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; | 693 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; |
667 | unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; | 694 | unsigned long start_pfn = start >> PAGE_SHIFT; |
668 | unsigned long nr_pages = size >> PAGE_SHIFT; | 695 | unsigned long nr_pages = size >> PAGE_SHIFT; |
669 | int ret; | 696 | int ret; |
670 | 697 | ||
671 | last_mapped_pfn = init_memory_mapping(start, start + size); | 698 | init_memory_mapping(start, start + size); |
672 | if (last_mapped_pfn > max_pfn_mapped) | ||
673 | max_pfn_mapped = last_mapped_pfn; | ||
674 | 699 | ||
675 | ret = __add_pages(nid, zone, start_pfn, nr_pages); | 700 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
676 | WARN_ON_ONCE(ret); | 701 | WARN_ON_ONCE(ret); |
@@ -686,6 +711,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory); | |||
686 | 711 | ||
687 | static struct kcore_list kcore_vsyscall; | 712 | static struct kcore_list kcore_vsyscall; |
688 | 713 | ||
714 | static void __init register_page_bootmem_info(void) | ||
715 | { | ||
716 | #ifdef CONFIG_NUMA | ||
717 | int i; | ||
718 | |||
719 | for_each_online_node(i) | ||
720 | register_page_bootmem_info_node(NODE_DATA(i)); | ||
721 | #endif | ||
722 | } | ||
723 | |||
689 | void __init mem_init(void) | 724 | void __init mem_init(void) |
690 | { | 725 | { |
691 | long codesize, reservedpages, datasize, initsize; | 726 | long codesize, reservedpages, datasize, initsize; |
@@ -698,11 +733,8 @@ void __init mem_init(void) | |||
698 | reservedpages = 0; | 733 | reservedpages = 0; |
699 | 734 | ||
700 | /* this will put all low memory onto the freelists */ | 735 | /* this will put all low memory onto the freelists */ |
701 | #ifdef CONFIG_NUMA | 736 | register_page_bootmem_info(); |
702 | totalram_pages = numa_free_all_bootmem(); | ||
703 | #else | ||
704 | totalram_pages = free_all_bootmem(); | 737 | totalram_pages = free_all_bootmem(); |
705 | #endif | ||
706 | 738 | ||
707 | absent_pages = absent_pages_in_range(0, max_pfn); | 739 | absent_pages = absent_pages_in_range(0, max_pfn); |
708 | reservedpages = max_pfn - totalram_pages - absent_pages; | 740 | reservedpages = max_pfn - totalram_pages - absent_pages; |
@@ -776,6 +808,7 @@ void mark_rodata_ro(void) | |||
776 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 808 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
777 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); | 809 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); |
778 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); | 810 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
811 | unsigned long all_end = PFN_ALIGN(&_end); | ||
779 | 812 | ||
780 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 813 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
781 | (end - start) >> 10); | 814 | (end - start) >> 10); |
@@ -784,10 +817,10 @@ void mark_rodata_ro(void) | |||
784 | kernel_set_to_readonly = 1; | 817 | kernel_set_to_readonly = 1; |
785 | 818 | ||
786 | /* | 819 | /* |
787 | * The rodata section (but not the kernel text!) should also be | 820 | * The rodata/data/bss/brk section (but not the kernel text!) |
788 | * not-executable. | 821 | * should also be not-executable. |
789 | */ | 822 | */ |
790 | set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); | 823 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); |
791 | 824 | ||
792 | rodata_test(); | 825 | rodata_test(); |
793 | 826 | ||
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h new file mode 100644 index 000000000000..6b563a118891 --- /dev/null +++ b/arch/x86/mm/mm_internal.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef __X86_MM_INTERNAL_H | ||
2 | #define __X86_MM_INTERNAL_H | ||
3 | |||
4 | void *alloc_low_pages(unsigned int num); | ||
5 | static inline void *alloc_low_page(void) | ||
6 | { | ||
7 | return alloc_low_pages(1); | ||
8 | } | ||
9 | |||
10 | void early_ioremap_page_table_range_init(void); | ||
11 | |||
12 | unsigned long kernel_physical_mapping_init(unsigned long start, | ||
13 | unsigned long end, | ||
14 | unsigned long page_size_mask); | ||
15 | void zone_sizes_init(void); | ||
16 | |||
17 | extern int after_bootmem; | ||
18 | |||
19 | #endif /* __X86_MM_INTERNAL_H */ | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 92e27119ee1a..9405ffc91502 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -10,16 +10,3 @@ void __init initmem_init(void) | |||
10 | { | 10 | { |
11 | x86_numa_init(); | 11 | x86_numa_init(); |
12 | } | 12 | } |
13 | |||
14 | unsigned long __init numa_free_all_bootmem(void) | ||
15 | { | ||
16 | unsigned long pages = 0; | ||
17 | int i; | ||
18 | |||
19 | for_each_online_node(i) | ||
20 | pages += free_all_bootmem_node(NODE_DATA(i)); | ||
21 | |||
22 | pages += free_low_memory_core_early(MAX_NUMNODES); | ||
23 | |||
24 | return pages; | ||
25 | } | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 6d13d2a3f825..a1b1c88f9caf 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -579,16 +579,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
579 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) | 579 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) |
580 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); | 580 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
581 | 581 | ||
582 | if (address >= (unsigned long)__va(0) && | 582 | if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), |
583 | address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) | 583 | PFN_DOWN(__pa(address)) + 1)) |
584 | split_page_count(level); | 584 | split_page_count(level); |
585 | 585 | ||
586 | #ifdef CONFIG_X86_64 | ||
587 | if (address >= (unsigned long)__va(1UL<<32) && | ||
588 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | ||
589 | split_page_count(level); | ||
590 | #endif | ||
591 | |||
592 | /* | 586 | /* |
593 | * Install the new, split up pagetable. | 587 | * Install the new, split up pagetable. |
594 | * | 588 | * |
@@ -757,13 +751,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
757 | unsigned long vaddr; | 751 | unsigned long vaddr; |
758 | int ret; | 752 | int ret; |
759 | 753 | ||
760 | if (cpa->pfn >= max_pfn_mapped) | 754 | if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) |
761 | return 0; | 755 | return 0; |
762 | 756 | ||
763 | #ifdef CONFIG_X86_64 | ||
764 | if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) | ||
765 | return 0; | ||
766 | #endif | ||
767 | /* | 757 | /* |
768 | * No need to redo, when the primary call touched the direct | 758 | * No need to redo, when the primary call touched the direct |
769 | * mapping already: | 759 | * mapping already: |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1b600266265e..1743c1c92411 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -835,7 +835,7 @@ void __init efi_enter_virtual_mode(void) | |||
835 | efi_memory_desc_t *md, *prev_md = NULL; | 835 | efi_memory_desc_t *md, *prev_md = NULL; |
836 | efi_status_t status; | 836 | efi_status_t status; |
837 | unsigned long size; | 837 | unsigned long size; |
838 | u64 end, systab, end_pfn; | 838 | u64 end, systab, start_pfn, end_pfn; |
839 | void *p, *va, *new_memmap = NULL; | 839 | void *p, *va, *new_memmap = NULL; |
840 | int count = 0; | 840 | int count = 0; |
841 | 841 | ||
@@ -888,10 +888,9 @@ void __init efi_enter_virtual_mode(void) | |||
888 | size = md->num_pages << EFI_PAGE_SHIFT; | 888 | size = md->num_pages << EFI_PAGE_SHIFT; |
889 | end = md->phys_addr + size; | 889 | end = md->phys_addr + size; |
890 | 890 | ||
891 | start_pfn = PFN_DOWN(md->phys_addr); | ||
891 | end_pfn = PFN_UP(end); | 892 | end_pfn = PFN_UP(end); |
892 | if (end_pfn <= max_low_pfn_mapped | 893 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
893 | || (end_pfn > (1UL << (32 - PAGE_SHIFT)) | ||
894 | && end_pfn <= max_pfn_mapped)) { | ||
895 | va = __va(md->phys_addr); | 894 | va = __va(md->phys_addr); |
896 | 895 | ||
897 | if (!(md->attribute & EFI_MEMORY_WB)) | 896 | if (!(md->attribute & EFI_MEMORY_WB)) |
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 460f314d13e5..a0fde91c16cf 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/gfp.h> | 11 | #include <linux/gfp.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/suspend.h> | 13 | #include <linux/suspend.h> |
14 | |||
15 | #include <asm/init.h> | ||
14 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
15 | #include <asm/page.h> | 17 | #include <asm/page.h> |
16 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
@@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt; | |||
39 | 41 | ||
40 | void *relocated_restore_code; | 42 | void *relocated_restore_code; |
41 | 43 | ||
42 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 44 | static void *alloc_pgt_page(void *context) |
43 | { | 45 | { |
44 | long i, j; | 46 | return (void *)get_safe_page(GFP_ATOMIC); |
45 | |||
46 | i = pud_index(address); | ||
47 | pud = pud + i; | ||
48 | for (; i < PTRS_PER_PUD; pud++, i++) { | ||
49 | unsigned long paddr; | ||
50 | pmd_t *pmd; | ||
51 | |||
52 | paddr = address + i*PUD_SIZE; | ||
53 | if (paddr >= end) | ||
54 | break; | ||
55 | |||
56 | pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); | ||
57 | if (!pmd) | ||
58 | return -ENOMEM; | ||
59 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
60 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | ||
61 | unsigned long pe; | ||
62 | |||
63 | if (paddr >= end) | ||
64 | break; | ||
65 | pe = __PAGE_KERNEL_LARGE_EXEC | paddr; | ||
66 | pe &= __supported_pte_mask; | ||
67 | set_pmd(pmd, __pmd(pe)); | ||
68 | } | ||
69 | } | ||
70 | return 0; | ||
71 | } | 47 | } |
72 | 48 | ||
73 | static int set_up_temporary_mappings(void) | 49 | static int set_up_temporary_mappings(void) |
74 | { | 50 | { |
75 | unsigned long start, end, next; | 51 | struct x86_mapping_info info = { |
76 | int error; | 52 | .alloc_pgt_page = alloc_pgt_page, |
53 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
54 | .kernel_mapping = true, | ||
55 | }; | ||
56 | unsigned long mstart, mend; | ||
57 | int result; | ||
58 | int i; | ||
77 | 59 | ||
78 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); | 60 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); |
79 | if (!temp_level4_pgt) | 61 | if (!temp_level4_pgt) |
@@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void) | |||
84 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); | 66 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); |
85 | 67 | ||
86 | /* Set up the direct mapping from scratch */ | 68 | /* Set up the direct mapping from scratch */ |
87 | start = (unsigned long)pfn_to_kaddr(0); | 69 | for (i = 0; i < nr_pfn_mapped; i++) { |
88 | end = (unsigned long)pfn_to_kaddr(max_pfn); | 70 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
89 | 71 | mend = pfn_mapped[i].end << PAGE_SHIFT; | |
90 | for (; start < end; start = next) { | 72 | |
91 | pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); | 73 | result = kernel_ident_mapping_init(&info, temp_level4_pgt, |
92 | if (!pud) | 74 | mstart, mend); |
93 | return -ENOMEM; | 75 | |
94 | next = start + PGDIR_SIZE; | 76 | if (result) |
95 | if (next > end) | 77 | return result; |
96 | next = end; | ||
97 | if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) | ||
98 | return error; | ||
99 | set_pgd(temp_level4_pgt + pgd_index(start), | ||
100 | mk_kernel_pgd(__pa(pud))); | ||
101 | } | 78 | } |
79 | |||
102 | return 0; | 80 | return 0; |
103 | } | 81 | } |
104 | 82 | ||
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 80450261215c..a44f457e70a1 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -8,9 +8,26 @@ | |||
8 | struct real_mode_header *real_mode_header; | 8 | struct real_mode_header *real_mode_header; |
9 | u32 *trampoline_cr4_features; | 9 | u32 *trampoline_cr4_features; |
10 | 10 | ||
11 | void __init setup_real_mode(void) | 11 | void __init reserve_real_mode(void) |
12 | { | 12 | { |
13 | phys_addr_t mem; | 13 | phys_addr_t mem; |
14 | unsigned char *base; | ||
15 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | ||
16 | |||
17 | /* Has to be under 1M so we can execute real-mode AP code. */ | ||
18 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
19 | if (!mem) | ||
20 | panic("Cannot allocate trampoline\n"); | ||
21 | |||
22 | base = __va(mem); | ||
23 | memblock_reserve(mem, size); | ||
24 | real_mode_header = (struct real_mode_header *) base; | ||
25 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
26 | base, (unsigned long long)mem, size); | ||
27 | } | ||
28 | |||
29 | void __init setup_real_mode(void) | ||
30 | { | ||
14 | u16 real_mode_seg; | 31 | u16 real_mode_seg; |
15 | u32 *rel; | 32 | u32 *rel; |
16 | u32 count; | 33 | u32 count; |
@@ -25,16 +42,7 @@ void __init setup_real_mode(void) | |||
25 | u64 efer; | 42 | u64 efer; |
26 | #endif | 43 | #endif |
27 | 44 | ||
28 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 45 | base = (unsigned char *)real_mode_header; |
29 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
30 | if (!mem) | ||
31 | panic("Cannot allocate trampoline\n"); | ||
32 | |||
33 | base = __va(mem); | ||
34 | memblock_reserve(mem, size); | ||
35 | real_mode_header = (struct real_mode_header *) base; | ||
36 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
37 | base, (unsigned long long)mem, size); | ||
38 | 46 | ||
39 | memcpy(base, real_mode_blob, size); | 47 | memcpy(base, real_mode_blob, size); |
40 | 48 | ||
@@ -78,16 +86,18 @@ void __init setup_real_mode(void) | |||
78 | *trampoline_cr4_features = read_cr4(); | 86 | *trampoline_cr4_features = read_cr4(); |
79 | 87 | ||
80 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); | 88 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); |
81 | trampoline_pgd[0] = __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE; | 89 | trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; |
82 | trampoline_pgd[511] = __pa_symbol(level3_kernel_pgt) + _KERNPG_TABLE; | 90 | trampoline_pgd[511] = init_level4_pgt[511].pgd; |
83 | #endif | 91 | #endif |
84 | } | 92 | } |
85 | 93 | ||
86 | /* | 94 | /* |
87 | * set_real_mode_permissions() gets called very early, to guarantee the | 95 | * reserve_real_mode() gets called very early, to guarantee the |
88 | * availability of low memory. This is before the proper kernel page | 96 | * availability of low memory. This is before the proper kernel page |
89 | * tables are set up, so we cannot set page permissions in that | 97 | * tables are set up, so we cannot set page permissions in that |
90 | * function. Thus, we use an arch_initcall instead. | 98 | * function. Also trampoline code will be executed by APs so we |
99 | * need to mark it executable at do_pre_smp_initcalls() at least, | ||
100 | * thus run it as a early_initcall(). | ||
91 | */ | 101 | */ |
92 | static int __init set_real_mode_permissions(void) | 102 | static int __init set_real_mode_permissions(void) |
93 | { | 103 | { |
@@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void) | |||
111 | 121 | ||
112 | return 0; | 122 | return 0; |
113 | } | 123 | } |
114 | 124 | early_initcall(set_real_mode_permissions); | |
115 | arch_initcall(set_real_mode_permissions); | ||
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5a1847d61930..79d67bd507fa 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -814,12 +814,14 @@ int main(int argc, char **argv) | |||
814 | read_relocs(fp); | 814 | read_relocs(fp); |
815 | if (show_absolute_syms) { | 815 | if (show_absolute_syms) { |
816 | print_absolute_symbols(); | 816 | print_absolute_symbols(); |
817 | return 0; | 817 | goto out; |
818 | } | 818 | } |
819 | if (show_absolute_relocs) { | 819 | if (show_absolute_relocs) { |
820 | print_absolute_relocs(); | 820 | print_absolute_relocs(); |
821 | return 0; | 821 | goto out; |
822 | } | 822 | } |
823 | emit_relocs(as_text, use_real_mode); | 823 | emit_relocs(as_text, use_real_mode); |
824 | out: | ||
825 | fclose(fp); | ||
824 | return 0; | 826 | return 0; |
825 | } | 827 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 01de35c77221..f5e86eee4e0e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1178 | 1178 | ||
1179 | static void xen_post_allocator_init(void); | 1179 | static void xen_post_allocator_init(void); |
1180 | 1180 | ||
1181 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1182 | { | ||
1183 | /* reserve the range used */ | ||
1184 | native_pagetable_reserve(start, end); | ||
1185 | |||
1186 | /* set as RW the rest */ | ||
1187 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1188 | PFN_PHYS(pgt_buf_top)); | ||
1189 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1190 | make_lowmem_page_readwrite(__va(end)); | ||
1191 | end += PAGE_SIZE; | ||
1192 | } | ||
1193 | } | ||
1194 | |||
1195 | #ifdef CONFIG_X86_64 | 1181 | #ifdef CONFIG_X86_64 |
1196 | static void __init xen_cleanhighmap(unsigned long vaddr, | 1182 | static void __init xen_cleanhighmap(unsigned long vaddr, |
1197 | unsigned long vaddr_end) | 1183 | unsigned long vaddr_end) |
@@ -1503,19 +1489,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1503 | #else /* CONFIG_X86_64 */ | 1489 | #else /* CONFIG_X86_64 */ |
1504 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | 1490 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1505 | { | 1491 | { |
1506 | unsigned long pfn = pte_pfn(pte); | ||
1507 | |||
1508 | /* | ||
1509 | * If the new pfn is within the range of the newly allocated | ||
1510 | * kernel pagetable, and it isn't being mapped into an | ||
1511 | * early_ioremap fixmap slot as a freshly allocated page, make sure | ||
1512 | * it is RO. | ||
1513 | */ | ||
1514 | if (((!is_early_ioremap_ptep(ptep) && | ||
1515 | pfn >= pgt_buf_start && pfn < pgt_buf_top)) || | ||
1516 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | ||
1517 | pte = pte_wrprotect(pte); | ||
1518 | |||
1519 | return pte; | 1492 | return pte; |
1520 | } | 1493 | } |
1521 | #endif /* CONFIG_X86_64 */ | 1494 | #endif /* CONFIG_X86_64 */ |
@@ -2197,7 +2170,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2197 | 2170 | ||
2198 | void __init xen_init_mmu_ops(void) | 2171 | void __init xen_init_mmu_ops(void) |
2199 | { | 2172 | { |
2200 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2201 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2173 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2202 | pv_mmu_ops = xen_mmu_ops; | 2174 | pv_mmu_ops = xen_mmu_ops; |
2203 | 2175 | ||