diff options
77 files changed, 1542 insertions, 1247 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4c5b3f993bbb..9aa8ff3e54dc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
594 | is selected automatically. Check | 594 | is selected automatically. Check |
595 | Documentation/kdump/kdump.txt for further details. | 595 | Documentation/kdump/kdump.txt for further details. |
596 | 596 | ||
597 | crashkernel_low=size[KMG] | ||
598 | [KNL, x86] parts under 4G. | ||
599 | |||
597 | crashkernel=range1:size1[,range2:size2,...][@offset] | 600 | crashkernel=range1:size1[,range2:size2,...][@offset] |
598 | [KNL] Same as above, but depends on the memory | 601 | [KNL] Same as above, but depends on the memory |
599 | in the running system. The syntax of range is | 602 | in the running system. The syntax of range is |
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index b443f1de0e5a..3840b6f28afb 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt | |||
@@ -1055,6 +1055,44 @@ must have read/write permission; CS must be __BOOT_CS and DS, ES, SS | |||
1055 | must be __BOOT_DS; interrupt must be disabled; %esi must hold the base | 1055 | must be __BOOT_DS; interrupt must be disabled; %esi must hold the base |
1056 | address of the struct boot_params; %ebp, %edi and %ebx must be zero. | 1056 | address of the struct boot_params; %ebp, %edi and %ebx must be zero. |
1057 | 1057 | ||
1058 | **** 64-bit BOOT PROTOCOL | ||
1059 | |||
1060 | For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader | ||
1061 | and we need a 64-bit boot protocol. | ||
1062 | |||
1063 | In 64-bit boot protocol, the first step in loading a Linux kernel | ||
1064 | should be to setup the boot parameters (struct boot_params, | ||
1065 | traditionally known as "zero page"). The memory for struct boot_params | ||
1066 | could be allocated anywhere (even above 4G) and initialized to all zero. | ||
1067 | Then, the setup header at offset 0x01f1 of kernel image on should be | ||
1068 | loaded into struct boot_params and examined. The end of setup header | ||
1069 | can be calculated as follows: | ||
1070 | |||
1071 | 0x0202 + byte value at offset 0x0201 | ||
1072 | |||
1073 | In addition to read/modify/write the setup header of the struct | ||
1074 | boot_params as that of 16-bit boot protocol, the boot loader should | ||
1075 | also fill the additional fields of the struct boot_params as described | ||
1076 | in zero-page.txt. | ||
1077 | |||
1078 | After setting up the struct boot_params, the boot loader can load | ||
1079 | 64-bit kernel in the same way as that of 16-bit boot protocol, but | ||
1080 | kernel could be loaded above 4G. | ||
1081 | |||
1082 | In 64-bit boot protocol, the kernel is started by jumping to the | ||
1083 | 64-bit kernel entry point, which is the start address of loaded | ||
1084 | 64-bit kernel plus 0x200. | ||
1085 | |||
1086 | At entry, the CPU must be in 64-bit mode with paging enabled. | ||
1087 | The range with setup_header.init_size from start address of loaded | ||
1088 | kernel and zero page and command line buffer get ident mapping; | ||
1089 | a GDT must be loaded with the descriptors for selectors | ||
1090 | __BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat | ||
1091 | segment; __BOOT_CS must have execute/read permission, and __BOOT_DS | ||
1092 | must have read/write permission; CS must be __BOOT_CS and DS, ES, SS | ||
1093 | must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base | ||
1094 | address of the struct boot_params. | ||
1095 | |||
1058 | **** EFI HANDOVER PROTOCOL | 1096 | **** EFI HANDOVER PROTOCOL |
1059 | 1097 | ||
1060 | This protocol allows boot loaders to defer initialisation to the EFI | 1098 | This protocol allows boot loaders to defer initialisation to the EFI |
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 41dd00884975..02f244475207 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c | |||
@@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void) | |||
317 | 317 | ||
318 | octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); | 318 | octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); |
319 | 319 | ||
320 | swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1); | 320 | if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM) |
321 | panic("Cannot allocate SWIOTLB buffer"); | ||
321 | 322 | ||
322 | mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; | 323 | mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; |
323 | } | 324 | } |
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 82bbf048a5b0..5c2c6e61facb 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c | |||
@@ -2027,6 +2027,16 @@ static void __init patch_tlb_miss_handler_bitmap(void) | |||
2027 | flushi(&valid_addr_bitmap_insn[0]); | 2027 | flushi(&valid_addr_bitmap_insn[0]); |
2028 | } | 2028 | } |
2029 | 2029 | ||
2030 | static void __init register_page_bootmem_info(void) | ||
2031 | { | ||
2032 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
2033 | int i; | ||
2034 | |||
2035 | for_each_online_node(i) | ||
2036 | if (NODE_DATA(i)->node_spanned_pages) | ||
2037 | register_page_bootmem_info_node(NODE_DATA(i)); | ||
2038 | #endif | ||
2039 | } | ||
2030 | void __init mem_init(void) | 2040 | void __init mem_init(void) |
2031 | { | 2041 | { |
2032 | unsigned long codepages, datapages, initpages; | 2042 | unsigned long codepages, datapages, initpages; |
@@ -2044,20 +2054,8 @@ void __init mem_init(void) | |||
2044 | 2054 | ||
2045 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); | 2055 | high_memory = __va(last_valid_pfn << PAGE_SHIFT); |
2046 | 2056 | ||
2047 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 2057 | register_page_bootmem_info(); |
2048 | { | ||
2049 | int i; | ||
2050 | for_each_online_node(i) { | ||
2051 | if (NODE_DATA(i)->node_spanned_pages != 0) { | ||
2052 | totalram_pages += | ||
2053 | free_all_bootmem_node(NODE_DATA(i)); | ||
2054 | } | ||
2055 | } | ||
2056 | totalram_pages += free_low_memory_core_early(MAX_NUMNODES); | ||
2057 | } | ||
2058 | #else | ||
2059 | totalram_pages = free_all_bootmem(); | 2058 | totalram_pages = free_all_bootmem(); |
2060 | #endif | ||
2061 | 2059 | ||
2062 | /* We subtract one to account for the mem_map_zero page | 2060 | /* We subtract one to account for the mem_map_zero page |
2063 | * allocated below. | 2061 | * allocated below. |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b44c0b50e569..ff0e5f3c844e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1277,10 +1277,6 @@ config NODES_SHIFT | |||
1277 | Specify the maximum number of NUMA Nodes available on the target | 1277 | Specify the maximum number of NUMA Nodes available on the target |
1278 | system. Increases memory reserved to accommodate various tables. | 1278 | system. Increases memory reserved to accommodate various tables. |
1279 | 1279 | ||
1280 | config HAVE_ARCH_ALLOC_REMAP | ||
1281 | def_bool y | ||
1282 | depends on X86_32 && NUMA | ||
1283 | |||
1284 | config ARCH_HAVE_MEMORY_PRESENT | 1280 | config ARCH_HAVE_MEMORY_PRESENT |
1285 | def_bool y | 1281 | def_bool y |
1286 | depends on X86_32 && DISCONTIGMEM | 1282 | depends on X86_32 && DISCONTIGMEM |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 18997e5a1053..5b7531966b84 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -285,16 +285,26 @@ struct biosregs { | |||
285 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); | 285 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); |
286 | 286 | ||
287 | /* cmdline.c */ | 287 | /* cmdline.c */ |
288 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); | 288 | int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize); |
289 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); | 289 | int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option); |
290 | static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) | 290 | static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) |
291 | { | 291 | { |
292 | return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); | 292 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; |
293 | |||
294 | if (cmd_line_ptr >= 0x100000) | ||
295 | return -1; /* inaccessible */ | ||
296 | |||
297 | return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize); | ||
293 | } | 298 | } |
294 | 299 | ||
295 | static inline int cmdline_find_option_bool(const char *option) | 300 | static inline int cmdline_find_option_bool(const char *option) |
296 | { | 301 | { |
297 | return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); | 302 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; |
303 | |||
304 | if (cmd_line_ptr >= 0x100000) | ||
305 | return -1; /* inaccessible */ | ||
306 | |||
307 | return __cmdline_find_option_bool(cmd_line_ptr, option); | ||
298 | } | 308 | } |
299 | 309 | ||
300 | 310 | ||
diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 6b3b6f708c04..625d21b0cd3f 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c | |||
@@ -27,7 +27,7 @@ static inline int myisspace(u8 c) | |||
27 | * Returns the length of the argument (regardless of if it was | 27 | * Returns the length of the argument (regardless of if it was |
28 | * truncated to fit in the buffer), or -1 on not found. | 28 | * truncated to fit in the buffer), or -1 on not found. |
29 | */ | 29 | */ |
30 | int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) | 30 | int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize) |
31 | { | 31 | { |
32 | addr_t cptr; | 32 | addr_t cptr; |
33 | char c; | 33 | char c; |
@@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int | |||
41 | st_bufcpy /* Copying this to buffer */ | 41 | st_bufcpy /* Copying this to buffer */ |
42 | } state = st_wordstart; | 42 | } state = st_wordstart; |
43 | 43 | ||
44 | if (!cmdline_ptr || cmdline_ptr >= 0x100000) | 44 | if (!cmdline_ptr) |
45 | return -1; /* No command line, or inaccessible */ | 45 | return -1; /* No command line */ |
46 | 46 | ||
47 | cptr = cmdline_ptr & 0xf; | 47 | cptr = cmdline_ptr & 0xf; |
48 | set_fs(cmdline_ptr >> 4); | 48 | set_fs(cmdline_ptr >> 4); |
@@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int | |||
99 | * Returns the position of that option (starts counting with 1) | 99 | * Returns the position of that option (starts counting with 1) |
100 | * or 0 on not found | 100 | * or 0 on not found |
101 | */ | 101 | */ |
102 | int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) | 102 | int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) |
103 | { | 103 | { |
104 | addr_t cptr; | 104 | addr_t cptr; |
105 | char c; | 105 | char c; |
@@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) | |||
111 | st_wordskip, /* Miscompare, skip */ | 111 | st_wordskip, /* Miscompare, skip */ |
112 | } state = st_wordstart; | 112 | } state = st_wordstart; |
113 | 113 | ||
114 | if (!cmdline_ptr || cmdline_ptr >= 0x100000) | 114 | if (!cmdline_ptr) |
115 | return -1; /* No command line, or inaccessible */ | 115 | return -1; /* No command line */ |
116 | 116 | ||
117 | cptr = cmdline_ptr & 0xf; | 117 | cptr = cmdline_ptr & 0xf; |
118 | set_fs(cmdline_ptr >> 4); | 118 | set_fs(cmdline_ptr >> 4); |
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 10f6b1178c68..bffd73b45b1f 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -13,13 +13,21 @@ static inline char rdfs8(addr_t addr) | |||
13 | return *((char *)(fs + addr)); | 13 | return *((char *)(fs + addr)); |
14 | } | 14 | } |
15 | #include "../cmdline.c" | 15 | #include "../cmdline.c" |
16 | static unsigned long get_cmd_line_ptr(void) | ||
17 | { | ||
18 | unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; | ||
19 | |||
20 | cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; | ||
21 | |||
22 | return cmd_line_ptr; | ||
23 | } | ||
16 | int cmdline_find_option(const char *option, char *buffer, int bufsize) | 24 | int cmdline_find_option(const char *option, char *buffer, int bufsize) |
17 | { | 25 | { |
18 | return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); | 26 | return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize); |
19 | } | 27 | } |
20 | int cmdline_find_option_bool(const char *option) | 28 | int cmdline_find_option_bool(const char *option) |
21 | { | 29 | { |
22 | return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); | 30 | return __cmdline_find_option_bool(get_cmd_line_ptr(), option); |
23 | } | 31 | } |
24 | 32 | ||
25 | #endif | 33 | #endif |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index f5d1aaa0dec8..c1d383d1fb7e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -37,6 +37,12 @@ | |||
37 | __HEAD | 37 | __HEAD |
38 | .code32 | 38 | .code32 |
39 | ENTRY(startup_32) | 39 | ENTRY(startup_32) |
40 | /* | ||
41 | * 32bit entry is 0 and it is ABI so immutable! | ||
42 | * If we come here directly from a bootloader, | ||
43 | * kernel(text+data+bss+brk) ramdisk, zero_page, command line | ||
44 | * all need to be under the 4G limit. | ||
45 | */ | ||
40 | cld | 46 | cld |
41 | /* | 47 | /* |
42 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking | 48 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
@@ -154,6 +160,12 @@ ENTRY(startup_32) | |||
154 | btsl $_EFER_LME, %eax | 160 | btsl $_EFER_LME, %eax |
155 | wrmsr | 161 | wrmsr |
156 | 162 | ||
163 | /* After gdt is loaded */ | ||
164 | xorl %eax, %eax | ||
165 | lldt %ax | ||
166 | movl $0x20, %eax | ||
167 | ltr %ax | ||
168 | |||
157 | /* | 169 | /* |
158 | * Setup for the jump to 64bit mode | 170 | * Setup for the jump to 64bit mode |
159 | * | 171 | * |
@@ -176,28 +188,18 @@ ENTRY(startup_32) | |||
176 | lret | 188 | lret |
177 | ENDPROC(startup_32) | 189 | ENDPROC(startup_32) |
178 | 190 | ||
179 | no_longmode: | ||
180 | /* This isn't an x86-64 CPU so hang */ | ||
181 | 1: | ||
182 | hlt | ||
183 | jmp 1b | ||
184 | |||
185 | #include "../../kernel/verify_cpu.S" | ||
186 | |||
187 | /* | ||
188 | * Be careful here startup_64 needs to be at a predictable | ||
189 | * address so I can export it in an ELF header. Bootloaders | ||
190 | * should look at the ELF header to find this address, as | ||
191 | * it may change in the future. | ||
192 | */ | ||
193 | .code64 | 191 | .code64 |
194 | .org 0x200 | 192 | .org 0x200 |
195 | ENTRY(startup_64) | 193 | ENTRY(startup_64) |
196 | /* | 194 | /* |
195 | * 64bit entry is 0x200 and it is ABI so immutable! | ||
197 | * We come here either from startup_32 or directly from a | 196 | * We come here either from startup_32 or directly from a |
198 | * 64bit bootloader. If we come here from a bootloader we depend on | 197 | * 64bit bootloader. |
199 | * an identity mapped page table being provied that maps our | 198 | * If we come here from a bootloader, kernel(text+data+bss+brk), |
200 | * entire text+data+bss and hopefully all of memory. | 199 | * ramdisk, zero_page, command line could be above 4G. |
200 | * We depend on an identity mapped page table being provided | ||
201 | * that maps our entire kernel(text+data+bss+brk), zero page | ||
202 | * and command line. | ||
201 | */ | 203 | */ |
202 | #ifdef CONFIG_EFI_STUB | 204 | #ifdef CONFIG_EFI_STUB |
203 | /* | 205 | /* |
@@ -247,9 +249,6 @@ preferred_addr: | |||
247 | movl %eax, %ss | 249 | movl %eax, %ss |
248 | movl %eax, %fs | 250 | movl %eax, %fs |
249 | movl %eax, %gs | 251 | movl %eax, %gs |
250 | lldt %ax | ||
251 | movl $0x20, %eax | ||
252 | ltr %ax | ||
253 | 252 | ||
254 | /* | 253 | /* |
255 | * Compute the decompressed kernel start address. It is where | 254 | * Compute the decompressed kernel start address. It is where |
@@ -349,6 +348,15 @@ relocated: | |||
349 | */ | 348 | */ |
350 | jmp *%rbp | 349 | jmp *%rbp |
351 | 350 | ||
351 | .code32 | ||
352 | no_longmode: | ||
353 | /* This isn't an x86-64 CPU so hang */ | ||
354 | 1: | ||
355 | hlt | ||
356 | jmp 1b | ||
357 | |||
358 | #include "../../kernel/verify_cpu.S" | ||
359 | |||
352 | .data | 360 | .data |
353 | gdt: | 361 | gdt: |
354 | .word gdt_end - gdt | 362 | .word gdt_end - gdt |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 944ce595f767..9ec06a1f6d61 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -374,6 +374,14 @@ xloadflags: | |||
374 | #else | 374 | #else |
375 | # define XLF0 0 | 375 | # define XLF0 0 |
376 | #endif | 376 | #endif |
377 | |||
378 | #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) | ||
379 | /* kernel/boot_param/ramdisk could be loaded above 4g */ | ||
380 | # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G | ||
381 | #else | ||
382 | # define XLF1 0 | ||
383 | #endif | ||
384 | |||
377 | #ifdef CONFIG_EFI_STUB | 385 | #ifdef CONFIG_EFI_STUB |
378 | # ifdef CONFIG_X86_64 | 386 | # ifdef CONFIG_X86_64 |
379 | # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ | 387 | # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ |
@@ -383,7 +391,7 @@ xloadflags: | |||
383 | #else | 391 | #else |
384 | # define XLF23 0 | 392 | # define XLF23 0 |
385 | #endif | 393 | #endif |
386 | .word XLF0 | XLF23 | 394 | .word XLF0 | XLF1 | XLF23 |
387 | 395 | ||
388 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 396 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
389 | #added with boot protocol | 397 | #added with boot protocol |
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index adcc0ae73d09..223042086f4e 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -1,20 +1,14 @@ | |||
1 | #ifndef _ASM_X86_INIT_32_H | 1 | #ifndef _ASM_X86_INIT_H |
2 | #define _ASM_X86_INIT_32_H | 2 | #define _ASM_X86_INIT_H |
3 | 3 | ||
4 | #ifdef CONFIG_X86_32 | 4 | struct x86_mapping_info { |
5 | extern void __init early_ioremap_page_table_range_init(void); | 5 | void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ |
6 | #endif | 6 | void *context; /* context for alloc_pgt_page */ |
7 | unsigned long pmd_flag; /* page flag for PMD entry */ | ||
8 | bool kernel_mapping; /* kernel mapping or ident mapping */ | ||
9 | }; | ||
7 | 10 | ||
8 | extern void __init zone_sizes_init(void); | 11 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, |
12 | unsigned long addr, unsigned long end); | ||
9 | 13 | ||
10 | extern unsigned long __init | 14 | #endif /* _ASM_X86_INIT_H */ |
11 | kernel_physical_mapping_init(unsigned long start, | ||
12 | unsigned long end, | ||
13 | unsigned long page_size_mask); | ||
14 | |||
15 | |||
16 | extern unsigned long __initdata pgt_buf_start; | ||
17 | extern unsigned long __meminitdata pgt_buf_end; | ||
18 | extern unsigned long __meminitdata pgt_buf_top; | ||
19 | |||
20 | #endif /* _ASM_X86_INIT_32_H */ | ||
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6080d2694bad..17483a492f18 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -48,11 +48,11 @@ | |||
48 | # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) | 48 | # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) |
49 | #else | 49 | #else |
50 | /* Maximum physical address we can use pages from */ | 50 | /* Maximum physical address we can use pages from */ |
51 | # define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 51 | # define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1) |
52 | /* Maximum address we can reach in physical address mode */ | 52 | /* Maximum address we can reach in physical address mode */ |
53 | # define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 53 | # define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1) |
54 | /* Maximum address we can use for the control pages */ | 54 | /* Maximum address we can use for the control pages */ |
55 | # define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) | 55 | # define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) |
56 | 56 | ||
57 | /* Allocate one page for the pdp and the second for the code */ | 57 | /* Allocate one page for the pdp and the second for the code */ |
58 | # define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) | 58 | # define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) |
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index eb05fb3b02fb..8a9b3e288cb4 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; | |||
14 | 14 | ||
15 | #include <asm/numaq.h> | 15 | #include <asm/numaq.h> |
16 | 16 | ||
17 | extern void resume_map_numa_kva(pgd_t *pgd); | ||
18 | |||
19 | #else /* !CONFIG_NUMA */ | ||
20 | |||
21 | static inline void resume_map_numa_kva(pgd_t *pgd) {} | ||
22 | |||
23 | #endif /* CONFIG_NUMA */ | 17 | #endif /* CONFIG_NUMA */ |
24 | 18 | ||
25 | #ifdef CONFIG_DISCONTIGMEM | 19 | #ifdef CONFIG_DISCONTIGMEM |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 49119fcea2dc..52560a2038e1 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -54,8 +54,6 @@ static inline int numa_cpu_node(int cpu) | |||
54 | 54 | ||
55 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
56 | # include <asm/numa_32.h> | 56 | # include <asm/numa_32.h> |
57 | #else | ||
58 | # include <asm/numa_64.h> | ||
59 | #endif | 57 | #endif |
60 | 58 | ||
61 | #ifdef CONFIG_NUMA | 59 | #ifdef CONFIG_NUMA |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h deleted file mode 100644 index 0c05f7ae46e8..000000000000 --- a/arch/x86/include/asm/numa_64.h +++ /dev/null | |||
@@ -1,6 +0,0 @@ | |||
1 | #ifndef _ASM_X86_NUMA_64_H | ||
2 | #define _ASM_X86_NUMA_64_H | ||
3 | |||
4 | extern unsigned long numa_free_all_bootmem(void); | ||
5 | |||
6 | #endif /* _ASM_X86_NUMA_64_H */ | ||
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8ca82839288a..c87892442e53 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -17,6 +17,10 @@ | |||
17 | 17 | ||
18 | struct page; | 18 | struct page; |
19 | 19 | ||
20 | #include <linux/range.h> | ||
21 | extern struct range pfn_mapped[]; | ||
22 | extern int nr_pfn_mapped; | ||
23 | |||
20 | static inline void clear_user_page(void *page, unsigned long vaddr, | 24 | static inline void clear_user_page(void *page, unsigned long vaddr, |
21 | struct page *pg) | 25 | struct page *pg) |
22 | { | 26 | { |
@@ -44,7 +48,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |||
44 | * case properly. Once all supported versions of gcc understand it, we can | 48 | * case properly. Once all supported versions of gcc understand it, we can |
45 | * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) | 49 | * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) |
46 | */ | 50 | */ |
47 | #define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) | 51 | #define __pa_symbol(x) \ |
52 | __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) | ||
48 | 53 | ||
49 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) | 54 | #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) |
50 | 55 | ||
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index da4e762406f7..4d550d04b609 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h | |||
@@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long); | |||
15 | #else | 15 | #else |
16 | #define __phys_addr(x) __phys_addr_nodebug(x) | 16 | #define __phys_addr(x) __phys_addr_nodebug(x) |
17 | #endif | 17 | #endif |
18 | #define __phys_addr_symbol(x) __phys_addr(x) | ||
18 | #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) | 19 | #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) |
19 | 20 | ||
20 | #ifdef CONFIG_FLATMEM | 21 | #ifdef CONFIG_FLATMEM |
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 072694ed81a5..0f1ddee6a0ce 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h | |||
@@ -3,4 +3,40 @@ | |||
3 | 3 | ||
4 | #include <asm/page_64_types.h> | 4 | #include <asm/page_64_types.h> |
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | ||
7 | |||
8 | /* duplicated to the one in bootmem.h */ | ||
9 | extern unsigned long max_pfn; | ||
10 | extern unsigned long phys_base; | ||
11 | |||
12 | static inline unsigned long __phys_addr_nodebug(unsigned long x) | ||
13 | { | ||
14 | unsigned long y = x - __START_KERNEL_map; | ||
15 | |||
16 | /* use the carry flag to determine if x was < __START_KERNEL_map */ | ||
17 | x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); | ||
18 | |||
19 | return x; | ||
20 | } | ||
21 | |||
22 | #ifdef CONFIG_DEBUG_VIRTUAL | ||
23 | extern unsigned long __phys_addr(unsigned long); | ||
24 | extern unsigned long __phys_addr_symbol(unsigned long); | ||
25 | #else | ||
26 | #define __phys_addr(x) __phys_addr_nodebug(x) | ||
27 | #define __phys_addr_symbol(x) \ | ||
28 | ((unsigned long)(x) - __START_KERNEL_map + phys_base) | ||
29 | #endif | ||
30 | |||
31 | #define __phys_reloc_hide(x) (x) | ||
32 | |||
33 | #ifdef CONFIG_FLATMEM | ||
34 | #define pfn_valid(pfn) ((pfn) < max_pfn) | ||
35 | #endif | ||
36 | |||
37 | void clear_page(void *page); | ||
38 | void copy_page(void *to, void *from); | ||
39 | |||
40 | #endif /* !__ASSEMBLY__ */ | ||
41 | |||
6 | #endif /* _ASM_X86_PAGE_64_H */ | 42 | #endif /* _ASM_X86_PAGE_64_H */ |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 320f7bb95f76..8b491e66eaa8 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -50,26 +50,4 @@ | |||
50 | #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) | 50 | #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) |
51 | #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) | 51 | #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) |
52 | 52 | ||
53 | #ifndef __ASSEMBLY__ | ||
54 | void clear_page(void *page); | ||
55 | void copy_page(void *to, void *from); | ||
56 | |||
57 | /* duplicated to the one in bootmem.h */ | ||
58 | extern unsigned long max_pfn; | ||
59 | extern unsigned long phys_base; | ||
60 | |||
61 | extern unsigned long __phys_addr(unsigned long); | ||
62 | #define __phys_reloc_hide(x) (x) | ||
63 | |||
64 | #define vmemmap ((struct page *)VMEMMAP_START) | ||
65 | |||
66 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); | ||
67 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); | ||
68 | |||
69 | #endif /* !__ASSEMBLY__ */ | ||
70 | |||
71 | #ifdef CONFIG_FLATMEM | ||
72 | #define pfn_valid(pfn) ((pfn) < max_pfn) | ||
73 | #endif | ||
74 | |||
75 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ | 53 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e21fdd10479f..54c97879195e 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -51,6 +51,8 @@ static inline phys_addr_t get_max_mapped(void) | |||
51 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; | 51 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; |
52 | } | 52 | } |
53 | 53 | ||
54 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); | ||
55 | |||
54 | extern unsigned long init_memory_mapping(unsigned long start, | 56 | extern unsigned long init_memory_mapping(unsigned long start, |
55 | unsigned long end); | 57 | unsigned long end); |
56 | 58 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index fc304279b559..1e672234c4ff 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -395,6 +395,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); | |||
395 | 395 | ||
396 | #ifndef __ASSEMBLY__ | 396 | #ifndef __ASSEMBLY__ |
397 | #include <linux/mm_types.h> | 397 | #include <linux/mm_types.h> |
398 | #include <linux/log2.h> | ||
398 | 399 | ||
399 | static inline int pte_none(pte_t pte) | 400 | static inline int pte_none(pte_t pte) |
400 | { | 401 | { |
@@ -620,6 +621,8 @@ static inline int pgd_none(pgd_t pgd) | |||
620 | #ifndef __ASSEMBLY__ | 621 | #ifndef __ASSEMBLY__ |
621 | 622 | ||
622 | extern int direct_gbpages; | 623 | extern int direct_gbpages; |
624 | void init_mem_mapping(void); | ||
625 | void early_alloc_pgt_buf(void); | ||
623 | 626 | ||
624 | /* local pte updates need not use xchg for locking */ | 627 | /* local pte updates need not use xchg for locking */ |
625 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | 628 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) |
@@ -786,6 +789,20 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | |||
786 | memcpy(dst, src, count * sizeof(pgd_t)); | 789 | memcpy(dst, src, count * sizeof(pgd_t)); |
787 | } | 790 | } |
788 | 791 | ||
792 | #define PTE_SHIFT ilog2(PTRS_PER_PTE) | ||
793 | static inline int page_level_shift(enum pg_level level) | ||
794 | { | ||
795 | return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; | ||
796 | } | ||
797 | static inline unsigned long page_level_size(enum pg_level level) | ||
798 | { | ||
799 | return 1UL << page_level_shift(level); | ||
800 | } | ||
801 | static inline unsigned long page_level_mask(enum pg_level level) | ||
802 | { | ||
803 | return ~(page_level_size(level) - 1); | ||
804 | } | ||
805 | |||
789 | /* | 806 | /* |
790 | * The x86 doesn't have any external MMU info: the kernel page | 807 | * The x86 doesn't have any external MMU info: the kernel page |
791 | * tables contain all the necessary information. | 808 | * tables contain all the necessary information. |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 615b0c78449f..e22c1dbf7feb 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -180,6 +180,11 @@ extern void cleanup_highmap(void); | |||
180 | 180 | ||
181 | #define __HAVE_ARCH_PTE_SAME | 181 | #define __HAVE_ARCH_PTE_SAME |
182 | 182 | ||
183 | #define vmemmap ((struct page *)VMEMMAP_START) | ||
184 | |||
185 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); | ||
186 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); | ||
187 | |||
183 | #endif /* !__ASSEMBLY__ */ | 188 | #endif /* !__ASSEMBLY__ */ |
184 | 189 | ||
185 | #endif /* _ASM_X86_PGTABLE_64_H */ | 190 | #endif /* _ASM_X86_PGTABLE_64_H */ |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 766ea16fbbbd..2d883440cb9a 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_PGTABLE_64_DEFS_H | 1 | #ifndef _ASM_X86_PGTABLE_64_DEFS_H |
2 | #define _ASM_X86_PGTABLE_64_DEFS_H | 2 | #define _ASM_X86_PGTABLE_64_DEFS_H |
3 | 3 | ||
4 | #include <asm/sparsemem.h> | ||
5 | |||
4 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
5 | #include <linux/types.h> | 7 | #include <linux/types.h> |
6 | 8 | ||
@@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t; | |||
60 | #define MODULES_END _AC(0xffffffffff000000, UL) | 62 | #define MODULES_END _AC(0xffffffffff000000, UL) |
61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
62 | 64 | ||
65 | #define EARLY_DYNAMIC_PAGE_TABLES 64 | ||
66 | |||
63 | #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ | 67 | #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3c32db8c539d..e6423002c10b 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -321,7 +321,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
321 | /* Install a pte for a particular vaddr in kernel space. */ | 321 | /* Install a pte for a particular vaddr in kernel space. */ |
322 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); | 322 | void set_pte_vaddr(unsigned long vaddr, pte_t pte); |
323 | 323 | ||
324 | extern void native_pagetable_reserve(u64 start, u64 end); | ||
325 | #ifdef CONFIG_X86_32 | 324 | #ifdef CONFIG_X86_32 |
326 | extern void native_pagetable_init(void); | 325 | extern void native_pagetable_init(void); |
327 | #else | 326 | #else |
@@ -331,7 +330,7 @@ extern void native_pagetable_init(void); | |||
331 | struct seq_file; | 330 | struct seq_file; |
332 | extern void arch_report_meminfo(struct seq_file *m); | 331 | extern void arch_report_meminfo(struct seq_file *m); |
333 | 332 | ||
334 | enum { | 333 | enum pg_level { |
335 | PG_LEVEL_NONE, | 334 | PG_LEVEL_NONE, |
336 | PG_LEVEL_4K, | 335 | PG_LEVEL_4K, |
337 | PG_LEVEL_2M, | 336 | PG_LEVEL_2M, |
@@ -352,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
352 | * as a pte too. | 351 | * as a pte too. |
353 | */ | 352 | */ |
354 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 353 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
354 | extern phys_addr_t slow_virt_to_phys(void *__address); | ||
355 | 355 | ||
356 | #endif /* !__ASSEMBLY__ */ | 356 | #endif /* !__ASSEMBLY__ */ |
357 | 357 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d172588efae5..8277941cbe99 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -721,6 +721,7 @@ extern void enable_sep_cpu(void); | |||
721 | extern int sysenter_setup(void); | 721 | extern int sysenter_setup(void); |
722 | 722 | ||
723 | extern void early_trap_init(void); | 723 | extern void early_trap_init(void); |
724 | void early_trap_pf_init(void); | ||
724 | 725 | ||
725 | /* Defined in head.S */ | 726 | /* Defined in head.S */ |
726 | extern struct desc_ptr early_gdt_descr; | 727 | extern struct desc_ptr early_gdt_descr; |
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fe1ec5bcd846..9c6b890d5e7a 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h | |||
@@ -58,6 +58,7 @@ extern unsigned char boot_gdt[]; | |||
58 | extern unsigned char secondary_startup_64[]; | 58 | extern unsigned char secondary_startup_64[]; |
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | extern void __init setup_real_mode(void); | 61 | void reserve_real_mode(void); |
62 | void setup_real_mode(void); | ||
62 | 63 | ||
63 | #endif /* _ARCH_X86_REALMODE_H */ | 64 | #endif /* _ARCH_X86_REALMODE_H */ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1709801d18ec..5ee26875baea 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -125,13 +125,12 @@ extern int __get_user_4(void); | |||
125 | extern int __get_user_8(void); | 125 | extern int __get_user_8(void); |
126 | extern int __get_user_bad(void); | 126 | extern int __get_user_bad(void); |
127 | 127 | ||
128 | #define __get_user_x(size, ret, x, ptr) \ | 128 | /* |
129 | asm volatile("call __get_user_" #size \ | 129 | * This is a type: either unsigned long, if the argument fits into |
130 | : "=a" (ret), "=d" (x) \ | 130 | * that type, or otherwise unsigned long long. |
131 | : "0" (ptr)) \ | 131 | */ |
132 | 132 | #define __inttype(x) \ | |
133 | /* Careful: we have to cast the result to the type of the pointer | 133 | __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) |
134 | * for sign reasons */ | ||
135 | 134 | ||
136 | /** | 135 | /** |
137 | * get_user: - Get a simple variable from user space. | 136 | * get_user: - Get a simple variable from user space. |
@@ -150,38 +149,26 @@ extern int __get_user_bad(void); | |||
150 | * Returns zero on success, or -EFAULT on error. | 149 | * Returns zero on success, or -EFAULT on error. |
151 | * On error, the variable @x is set to zero. | 150 | * On error, the variable @x is set to zero. |
152 | */ | 151 | */ |
153 | #ifdef CONFIG_X86_32 | 152 | /* |
154 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ | 153 | * Careful: we have to cast the result to the type of the pointer |
155 | __get_user_x(X, __ret_gu, __val_gu, ptr) | 154 | * for sign reasons. |
156 | #else | 155 | * |
157 | #define __get_user_8(__ret_gu, __val_gu, ptr) \ | 156 | * The use of %edx as the register specifier is a bit of a |
158 | __get_user_x(8, __ret_gu, __val_gu, ptr) | 157 | * simplification, as gcc only cares about it as the starting point |
159 | #endif | 158 | * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits |
160 | 159 | * (%ecx being the next register in gcc's x86 register sequence), and | |
160 | * %rdx on 64 bits. | ||
161 | */ | ||
161 | #define get_user(x, ptr) \ | 162 | #define get_user(x, ptr) \ |
162 | ({ \ | 163 | ({ \ |
163 | int __ret_gu; \ | 164 | int __ret_gu; \ |
164 | unsigned long __val_gu; \ | 165 | register __inttype(*(ptr)) __val_gu asm("%edx"); \ |
165 | __chk_user_ptr(ptr); \ | 166 | __chk_user_ptr(ptr); \ |
166 | might_fault(); \ | 167 | might_fault(); \ |
167 | switch (sizeof(*(ptr))) { \ | 168 | asm volatile("call __get_user_%P3" \ |
168 | case 1: \ | 169 | : "=a" (__ret_gu), "=r" (__val_gu) \ |
169 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ | 170 | : "0" (ptr), "i" (sizeof(*(ptr)))); \ |
170 | break; \ | 171 | (x) = (__typeof__(*(ptr))) __val_gu; \ |
171 | case 2: \ | ||
172 | __get_user_x(2, __ret_gu, __val_gu, ptr); \ | ||
173 | break; \ | ||
174 | case 4: \ | ||
175 | __get_user_x(4, __ret_gu, __val_gu, ptr); \ | ||
176 | break; \ | ||
177 | case 8: \ | ||
178 | __get_user_8(__ret_gu, __val_gu, ptr); \ | ||
179 | break; \ | ||
180 | default: \ | ||
181 | __get_user_x(X, __ret_gu, __val_gu, ptr); \ | ||
182 | break; \ | ||
183 | } \ | ||
184 | (x) = (__typeof__(*(ptr)))__val_gu; \ | ||
185 | __ret_gu; \ | 172 | __ret_gu; \ |
186 | }) | 173 | }) |
187 | 174 | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 7669941cc9d2..d8d99222b36a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -69,17 +69,6 @@ struct x86_init_oem { | |||
69 | }; | 69 | }; |
70 | 70 | ||
71 | /** | 71 | /** |
72 | * struct x86_init_mapping - platform specific initial kernel pagetable setup | ||
73 | * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage | ||
74 | * | ||
75 | * For more details on the purpose of this hook, look in | ||
76 | * init_memory_mapping and the commit that added it. | ||
77 | */ | ||
78 | struct x86_init_mapping { | ||
79 | void (*pagetable_reserve)(u64 start, u64 end); | ||
80 | }; | ||
81 | |||
82 | /** | ||
83 | * struct x86_init_paging - platform specific paging functions | 72 | * struct x86_init_paging - platform specific paging functions |
84 | * @pagetable_init: platform specific paging initialization call to setup | 73 | * @pagetable_init: platform specific paging initialization call to setup |
85 | * the kernel pagetables and prepare accessors functions. | 74 | * the kernel pagetables and prepare accessors functions. |
@@ -136,7 +125,6 @@ struct x86_init_ops { | |||
136 | struct x86_init_mpparse mpparse; | 125 | struct x86_init_mpparse mpparse; |
137 | struct x86_init_irqs irqs; | 126 | struct x86_init_irqs irqs; |
138 | struct x86_init_oem oem; | 127 | struct x86_init_oem oem; |
139 | struct x86_init_mapping mapping; | ||
140 | struct x86_init_paging paging; | 128 | struct x86_init_paging paging; |
141 | struct x86_init_timers timers; | 129 | struct x86_init_timers timers; |
142 | struct x86_init_iommu iommu; | 130 | struct x86_init_iommu iommu; |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bacf4b0d91f4..cfc755dc1607 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
51 | 51 | ||
52 | #ifdef CONFIG_X86_64 | 52 | #ifdef CONFIG_X86_64 |
53 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
54 | # include <asm/numa_64.h> | ||
55 | #endif /* X86 */ | 54 | #endif /* X86 */ |
56 | 55 | ||
57 | #define BAD_MADT_ENTRY(entry, end) ( \ | 56 | #define BAD_MADT_ENTRY(entry, end) ( \ |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d5e0d717005a..0532f5d6e4ef 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void) | |||
69 | 69 | ||
70 | #ifndef CONFIG_64BIT | 70 | #ifndef CONFIG_64BIT |
71 | header->pmode_entry = (u32)&wakeup_pmode_return; | 71 | header->pmode_entry = (u32)&wakeup_pmode_return; |
72 | header->pmode_cr3 = (u32)__pa(&initial_page_table); | 72 | header->pmode_cr3 = (u32)__pa_symbol(initial_page_table); |
73 | saved_magic = 0x12345678; | 73 | saved_magic = 0x12345678; |
74 | #else /* CONFIG_64BIT */ | 74 | #else /* CONFIG_64BIT */ |
75 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e66311200cbd..b574b295a2f9 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
@@ -768,10 +768,9 @@ int __init gart_iommu_init(void) | |||
768 | aper_base = info.aper_base; | 768 | aper_base = info.aper_base; |
769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
770 | 770 | ||
771 | if (end_pfn > max_low_pfn_mapped) { | 771 | start_pfn = PFN_DOWN(aper_base); |
772 | start_pfn = (aper_base>>PAGE_SHIFT); | 772 | if (!pfn_range_is_mapped(start_pfn, end_pfn)) |
773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
774 | } | ||
775 | 774 | ||
776 | pr_info("PCI-DMA: using GART IOMMU.\n"); | 775 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
777 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 776 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 9c2aa89a11cb..9a9110918ca7 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/ipi.h> | 29 | #include <asm/ipi.h> |
30 | #include <asm/apic_flat_64.h> | 30 | #include <asm/apic_flat_64.h> |
31 | #include <asm/pgtable.h> | ||
31 | 32 | ||
32 | static int numachip_system __read_mostly; | 33 | static int numachip_system __read_mostly; |
33 | 34 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 84bee67141ad..edd77e7508b3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
13 | 13 | ||
14 | #ifdef CONFIG_X86_64 | 14 | #ifdef CONFIG_X86_64 |
15 | # include <asm/numa_64.h> | ||
16 | # include <asm/mmconfig.h> | 15 | # include <asm/mmconfig.h> |
17 | # include <asm/cacheflush.h> | 16 | # include <asm/cacheflush.h> |
18 | #endif | 17 | #endif |
@@ -680,12 +679,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
680 | * benefit in doing so. | 679 | * benefit in doing so. |
681 | */ | 680 | */ |
682 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | 681 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
682 | unsigned long pfn = tseg >> PAGE_SHIFT; | ||
683 | |||
683 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | 684 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
684 | if ((tseg>>PMD_SHIFT) < | 685 | if (pfn_range_is_mapped(pfn, pfn + 1)) |
685 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
686 | ((tseg>>PMD_SHIFT) < | ||
687 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
688 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
689 | set_memory_4k((unsigned long)__va(tseg), 1); | 686 | set_memory_4k((unsigned long)__va(tseg), 1); |
690 | } | 687 | } |
691 | } | 688 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcaabd0432c5..1905ce98bee0 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -17,7 +17,6 @@ | |||
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | 18 | #ifdef CONFIG_X86_64 |
19 | #include <linux/topology.h> | 19 | #include <linux/topology.h> |
20 | #include <asm/numa_64.h> | ||
21 | #endif | 20 | #endif |
22 | 21 | ||
23 | #include "cpu.h" | 22 | #include "cpu.h" |
@@ -168,7 +167,7 @@ int __cpuinit ppro_with_ram_bug(void) | |||
168 | #ifdef CONFIG_X86_F00F_BUG | 167 | #ifdef CONFIG_X86_F00F_BUG |
169 | static void __cpuinit trap_init_f00f_bug(void) | 168 | static void __cpuinit trap_init_f00f_bug(void) |
170 | { | 169 | { |
171 | __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); | 170 | __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); |
172 | 171 | ||
173 | /* | 172 | /* |
174 | * Update the IDT descriptor and reload the IDT so that | 173 | * Update the IDT descriptor and reload the IDT so that |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index df06ade26bef..d32abeabbda5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -835,7 +835,7 @@ static int __init parse_memopt(char *p) | |||
835 | } | 835 | } |
836 | early_param("mem", parse_memopt); | 836 | early_param("mem", parse_memopt); |
837 | 837 | ||
838 | static int __init parse_memmap_opt(char *p) | 838 | static int __init parse_memmap_one(char *p) |
839 | { | 839 | { |
840 | char *oldp; | 840 | char *oldp; |
841 | u64 start_at, mem_size; | 841 | u64 start_at, mem_size; |
@@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p) | |||
877 | 877 | ||
878 | return *p == '\0' ? 0 : -EINVAL; | 878 | return *p == '\0' ? 0 : -EINVAL; |
879 | } | 879 | } |
880 | static int __init parse_memmap_opt(char *str) | ||
881 | { | ||
882 | while (str) { | ||
883 | char *k = strchr(str, ','); | ||
884 | |||
885 | if (k) | ||
886 | *k++ = 0; | ||
887 | |||
888 | parse_memmap_one(str); | ||
889 | str = k; | ||
890 | } | ||
891 | |||
892 | return 0; | ||
893 | } | ||
880 | early_param("memmap", parse_memmap_opt); | 894 | early_param("memmap", parse_memmap_opt); |
881 | 895 | ||
882 | void __init finish_e820_parsing(void) | 896 | void __init finish_e820_parsing(void) |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1d414029f1d8..42a392a9fd02 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) | |||
89 | * kernel identity mapping to modify code. | 89 | * kernel identity mapping to modify code. |
90 | */ | 90 | */ |
91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
92 | ip = (unsigned long)__va(__pa(ip)); | 92 | ip = (unsigned long)__va(__pa_symbol(ip)); |
93 | 93 | ||
94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); | 94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); |
95 | } | 95 | } |
@@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size) | |||
279 | * kernel identity mapping to modify code. | 279 | * kernel identity mapping to modify code. |
280 | */ | 280 | */ |
281 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 281 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
282 | ip = (unsigned long)__va(__pa(ip)); | 282 | ip = (unsigned long)__va(__pa_symbol(ip)); |
283 | 283 | ||
284 | return probe_kernel_write((void *)ip, val, size); | 284 | return probe_kernel_write((void *)ip, val, size); |
285 | } | 285 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 6773c918b8cc..138463a24877 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -33,20 +33,6 @@ void __init i386_start_kernel(void) | |||
33 | { | 33 | { |
34 | sanitize_boot_params(&boot_params); | 34 | sanitize_boot_params(&boot_params); |
35 | 35 | ||
36 | memblock_reserve(__pa_symbol(&_text), | ||
37 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | ||
38 | |||
39 | #ifdef CONFIG_BLK_DEV_INITRD | ||
40 | /* Reserve INITRD */ | ||
41 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
42 | /* Assume only end is not page aligned */ | ||
43 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
44 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
45 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
46 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
47 | } | ||
48 | #endif | ||
49 | |||
50 | /* Call the subarch specific early setup function */ | 36 | /* Call the subarch specific early setup function */ |
51 | switch (boot_params.hdr.hardware_subarch) { | 37 | switch (boot_params.hdr.hardware_subarch) { |
52 | case X86_SUBARCH_MRST: | 38 | case X86_SUBARCH_MRST: |
@@ -60,11 +46,5 @@ void __init i386_start_kernel(void) | |||
60 | break; | 46 | break; |
61 | } | 47 | } |
62 | 48 | ||
63 | /* | ||
64 | * At this point everything still needed from the boot loader | ||
65 | * or BIOS or kernel text should be early reserved or marked not | ||
66 | * RAM in e820. All other memory is free game. | ||
67 | */ | ||
68 | |||
69 | start_kernel(); | 49 | start_kernel(); |
70 | } | 50 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 849fc9e63c2f..57334f4cd3af 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -27,11 +27,81 @@ | |||
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/bootparam_utils.h> | 28 | #include <asm/bootparam_utils.h> |
29 | 29 | ||
30 | static void __init zap_identity_mappings(void) | 30 | /* |
31 | * Manage page tables very early on. | ||
32 | */ | ||
33 | extern pgd_t early_level4_pgt[PTRS_PER_PGD]; | ||
34 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | ||
35 | static unsigned int __initdata next_early_pgt = 2; | ||
36 | |||
37 | /* Wipe all early page tables except for the kernel symbol map */ | ||
38 | static void __init reset_early_page_tables(void) | ||
39 | { | ||
40 | unsigned long i; | ||
41 | |||
42 | for (i = 0; i < PTRS_PER_PGD-1; i++) | ||
43 | early_level4_pgt[i].pgd = 0; | ||
44 | |||
45 | next_early_pgt = 0; | ||
46 | |||
47 | write_cr3(__pa(early_level4_pgt)); | ||
48 | } | ||
49 | |||
50 | /* Create a new PMD entry */ | ||
51 | int __init early_make_pgtable(unsigned long address) | ||
31 | { | 52 | { |
32 | pgd_t *pgd = pgd_offset_k(0UL); | 53 | unsigned long physaddr = address - __PAGE_OFFSET; |
33 | pgd_clear(pgd); | 54 | unsigned long i; |
34 | __flush_tlb_all(); | 55 | pgdval_t pgd, *pgd_p; |
56 | pudval_t pud, *pud_p; | ||
57 | pmdval_t pmd, *pmd_p; | ||
58 | |||
59 | /* Invalid address or early pgt is done ? */ | ||
60 | if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) | ||
61 | return -1; | ||
62 | |||
63 | again: | ||
64 | pgd_p = &early_level4_pgt[pgd_index(address)].pgd; | ||
65 | pgd = *pgd_p; | ||
66 | |||
67 | /* | ||
68 | * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is | ||
69 | * critical -- __PAGE_OFFSET would point us back into the dynamic | ||
70 | * range and we might end up looping forever... | ||
71 | */ | ||
72 | if (pgd) | ||
73 | pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
74 | else { | ||
75 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
76 | reset_early_page_tables(); | ||
77 | goto again; | ||
78 | } | ||
79 | |||
80 | pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
81 | for (i = 0; i < PTRS_PER_PUD; i++) | ||
82 | pud_p[i] = 0; | ||
83 | *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
84 | } | ||
85 | pud_p += pud_index(address); | ||
86 | pud = *pud_p; | ||
87 | |||
88 | if (pud) | ||
89 | pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | ||
90 | else { | ||
91 | if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { | ||
92 | reset_early_page_tables(); | ||
93 | goto again; | ||
94 | } | ||
95 | |||
96 | pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; | ||
97 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
98 | pmd_p[i] = 0; | ||
99 | *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; | ||
100 | } | ||
101 | pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); | ||
102 | pmd_p[pmd_index(address)] = pmd; | ||
103 | |||
104 | return 0; | ||
35 | } | 105 | } |
36 | 106 | ||
37 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 107 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
@@ -42,14 +112,25 @@ static void __init clear_bss(void) | |||
42 | (unsigned long) __bss_stop - (unsigned long) __bss_start); | 112 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
43 | } | 113 | } |
44 | 114 | ||
115 | static unsigned long get_cmd_line_ptr(void) | ||
116 | { | ||
117 | unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; | ||
118 | |||
119 | cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; | ||
120 | |||
121 | return cmd_line_ptr; | ||
122 | } | ||
123 | |||
45 | static void __init copy_bootdata(char *real_mode_data) | 124 | static void __init copy_bootdata(char *real_mode_data) |
46 | { | 125 | { |
47 | char * command_line; | 126 | char * command_line; |
127 | unsigned long cmd_line_ptr; | ||
48 | 128 | ||
49 | memcpy(&boot_params, real_mode_data, sizeof boot_params); | 129 | memcpy(&boot_params, real_mode_data, sizeof boot_params); |
50 | sanitize_boot_params(&boot_params); | 130 | sanitize_boot_params(&boot_params); |
51 | if (boot_params.hdr.cmd_line_ptr) { | 131 | cmd_line_ptr = get_cmd_line_ptr(); |
52 | command_line = __va(boot_params.hdr.cmd_line_ptr); | 132 | if (cmd_line_ptr) { |
133 | command_line = __va(cmd_line_ptr); | ||
53 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | 134 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
54 | } | 135 | } |
55 | } | 136 | } |
@@ -72,14 +153,12 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
72 | (__START_KERNEL & PGDIR_MASK))); | 153 | (__START_KERNEL & PGDIR_MASK))); |
73 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | 154 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
74 | 155 | ||
156 | /* Kill off the identity-map trampoline */ | ||
157 | reset_early_page_tables(); | ||
158 | |||
75 | /* clear bss before set_intr_gate with early_idt_handler */ | 159 | /* clear bss before set_intr_gate with early_idt_handler */ |
76 | clear_bss(); | 160 | clear_bss(); |
77 | 161 | ||
78 | /* Make NULL pointers segfault */ | ||
79 | zap_identity_mappings(); | ||
80 | |||
81 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
82 | |||
83 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 162 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
84 | #ifdef CONFIG_EARLY_PRINTK | 163 | #ifdef CONFIG_EARLY_PRINTK |
85 | set_intr_gate(i, &early_idt_handlers[i]); | 164 | set_intr_gate(i, &early_idt_handlers[i]); |
@@ -89,37 +168,25 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
89 | } | 168 | } |
90 | load_idt((const struct desc_ptr *)&idt_descr); | 169 | load_idt((const struct desc_ptr *)&idt_descr); |
91 | 170 | ||
171 | copy_bootdata(__va(real_mode_data)); | ||
172 | |||
92 | if (console_loglevel == 10) | 173 | if (console_loglevel == 10) |
93 | early_printk("Kernel alive\n"); | 174 | early_printk("Kernel alive\n"); |
94 | 175 | ||
176 | clear_page(init_level4_pgt); | ||
177 | /* set init_level4_pgt kernel high mapping*/ | ||
178 | init_level4_pgt[511] = early_level4_pgt[511]; | ||
179 | |||
95 | x86_64_start_reservations(real_mode_data); | 180 | x86_64_start_reservations(real_mode_data); |
96 | } | 181 | } |
97 | 182 | ||
98 | void __init x86_64_start_reservations(char *real_mode_data) | 183 | void __init x86_64_start_reservations(char *real_mode_data) |
99 | { | 184 | { |
100 | copy_bootdata(__va(real_mode_data)); | 185 | /* version is always not zero if it is copied */ |
101 | 186 | if (!boot_params.hdr.version) | |
102 | memblock_reserve(__pa_symbol(&_text), | 187 | copy_bootdata(__va(real_mode_data)); |
103 | __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); | ||
104 | |||
105 | #ifdef CONFIG_BLK_DEV_INITRD | ||
106 | /* Reserve INITRD */ | ||
107 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | ||
108 | /* Assume only end is not page aligned */ | ||
109 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | ||
110 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | ||
111 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
112 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
113 | } | ||
114 | #endif | ||
115 | 188 | ||
116 | reserve_ebda_region(); | 189 | reserve_ebda_region(); |
117 | 190 | ||
118 | /* | ||
119 | * At this point everything still needed from the boot loader | ||
120 | * or BIOS or kernel text should be early reserved or marked not | ||
121 | * RAM in e820. All other memory is free game. | ||
122 | */ | ||
123 | |||
124 | start_kernel(); | 191 | start_kernel(); |
125 | } | 192 | } |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 980053c4b9cc..d94f6d68be2a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) | |||
47 | .code64 | 47 | .code64 |
48 | .globl startup_64 | 48 | .globl startup_64 |
49 | startup_64: | 49 | startup_64: |
50 | |||
51 | /* | 50 | /* |
52 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 51 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
53 | * and someone has loaded an identity mapped page table | 52 | * and someone has loaded an identity mapped page table |
54 | * for us. These identity mapped page tables map all of the | 53 | * for us. These identity mapped page tables map all of the |
55 | * kernel pages and possibly all of memory. | 54 | * kernel pages and possibly all of memory. |
56 | * | 55 | * |
57 | * %esi holds a physical pointer to real_mode_data. | 56 | * %rsi holds a physical pointer to real_mode_data. |
58 | * | 57 | * |
59 | * We come here either directly from a 64bit bootloader, or from | 58 | * We come here either directly from a 64bit bootloader, or from |
60 | * arch/x86_64/boot/compressed/head.S. | 59 | * arch/x86_64/boot/compressed/head.S. |
@@ -66,7 +65,8 @@ startup_64: | |||
66 | * tables and then reload them. | 65 | * tables and then reload them. |
67 | */ | 66 | */ |
68 | 67 | ||
69 | /* Compute the delta between the address I am compiled to run at and the | 68 | /* |
69 | * Compute the delta between the address I am compiled to run at and the | ||
70 | * address I am actually running at. | 70 | * address I am actually running at. |
71 | */ | 71 | */ |
72 | leaq _text(%rip), %rbp | 72 | leaq _text(%rip), %rbp |
@@ -78,45 +78,62 @@ startup_64: | |||
78 | testl %eax, %eax | 78 | testl %eax, %eax |
79 | jnz bad_address | 79 | jnz bad_address |
80 | 80 | ||
81 | /* Is the address too large? */ | 81 | /* |
82 | leaq _text(%rip), %rdx | 82 | * Is the address too large? |
83 | movq $PGDIR_SIZE, %rax | ||
84 | cmpq %rax, %rdx | ||
85 | jae bad_address | ||
86 | |||
87 | /* Fixup the physical addresses in the page table | ||
88 | */ | 83 | */ |
89 | addq %rbp, init_level4_pgt + 0(%rip) | 84 | leaq _text(%rip), %rax |
90 | addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) | 85 | shrq $MAX_PHYSMEM_BITS, %rax |
91 | addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) | 86 | jnz bad_address |
92 | 87 | ||
93 | addq %rbp, level3_ident_pgt + 0(%rip) | 88 | /* |
89 | * Fixup the physical addresses in the page table | ||
90 | */ | ||
91 | addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip) | ||
94 | 92 | ||
95 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) | 93 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) |
96 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) | 94 | addq %rbp, level3_kernel_pgt + (511*8)(%rip) |
97 | 95 | ||
98 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) | 96 | addq %rbp, level2_fixmap_pgt + (506*8)(%rip) |
99 | 97 | ||
100 | /* Add an Identity mapping if I am above 1G */ | 98 | /* |
99 | * Set up the identity mapping for the switchover. These | ||
100 | * entries should *NOT* have the global bit set! This also | ||
101 | * creates a bunch of nonsense entries but that is fine -- | ||
102 | * it avoids problems around wraparound. | ||
103 | */ | ||
101 | leaq _text(%rip), %rdi | 104 | leaq _text(%rip), %rdi |
102 | andq $PMD_PAGE_MASK, %rdi | 105 | leaq early_level4_pgt(%rip), %rbx |
103 | 106 | ||
104 | movq %rdi, %rax | 107 | movq %rdi, %rax |
105 | shrq $PUD_SHIFT, %rax | 108 | shrq $PGDIR_SHIFT, %rax |
106 | andq $(PTRS_PER_PUD - 1), %rax | ||
107 | jz ident_complete | ||
108 | 109 | ||
109 | leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx | 110 | leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx |
110 | leaq level3_ident_pgt(%rip), %rbx | 111 | movq %rdx, 0(%rbx,%rax,8) |
111 | movq %rdx, 0(%rbx, %rax, 8) | 112 | movq %rdx, 8(%rbx,%rax,8) |
112 | 113 | ||
114 | addq $4096, %rdx | ||
113 | movq %rdi, %rax | 115 | movq %rdi, %rax |
114 | shrq $PMD_SHIFT, %rax | 116 | shrq $PUD_SHIFT, %rax |
115 | andq $(PTRS_PER_PMD - 1), %rax | 117 | andl $(PTRS_PER_PUD-1), %eax |
116 | leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx | 118 | movq %rdx, (4096+0)(%rbx,%rax,8) |
117 | leaq level2_spare_pgt(%rip), %rbx | 119 | movq %rdx, (4096+8)(%rbx,%rax,8) |
118 | movq %rdx, 0(%rbx, %rax, 8) | 120 | |
119 | ident_complete: | 121 | addq $8192, %rbx |
122 | movq %rdi, %rax | ||
123 | shrq $PMD_SHIFT, %rdi | ||
124 | addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax | ||
125 | leaq (_end - 1)(%rip), %rcx | ||
126 | shrq $PMD_SHIFT, %rcx | ||
127 | subq %rdi, %rcx | ||
128 | incl %ecx | ||
129 | |||
130 | 1: | ||
131 | andq $(PTRS_PER_PMD - 1), %rdi | ||
132 | movq %rax, (%rbx,%rdi,8) | ||
133 | incq %rdi | ||
134 | addq $PMD_SIZE, %rax | ||
135 | decl %ecx | ||
136 | jnz 1b | ||
120 | 137 | ||
121 | /* | 138 | /* |
122 | * Fixup the kernel text+data virtual addresses. Note that | 139 | * Fixup the kernel text+data virtual addresses. Note that |
@@ -124,7 +141,6 @@ ident_complete: | |||
124 | * cleanup_highmap() fixes this up along with the mappings | 141 | * cleanup_highmap() fixes this up along with the mappings |
125 | * beyond _end. | 142 | * beyond _end. |
126 | */ | 143 | */ |
127 | |||
128 | leaq level2_kernel_pgt(%rip), %rdi | 144 | leaq level2_kernel_pgt(%rip), %rdi |
129 | leaq 4096(%rdi), %r8 | 145 | leaq 4096(%rdi), %r8 |
130 | /* See if it is a valid page table entry */ | 146 | /* See if it is a valid page table entry */ |
@@ -139,17 +155,14 @@ ident_complete: | |||
139 | /* Fixup phys_base */ | 155 | /* Fixup phys_base */ |
140 | addq %rbp, phys_base(%rip) | 156 | addq %rbp, phys_base(%rip) |
141 | 157 | ||
142 | /* Due to ENTRY(), sometimes the empty space gets filled with | 158 | movq $(early_level4_pgt - __START_KERNEL_map), %rax |
143 | * zeros. Better take a jmp than relying on empty space being | 159 | jmp 1f |
144 | * filled with 0x90 (nop) | ||
145 | */ | ||
146 | jmp secondary_startup_64 | ||
147 | ENTRY(secondary_startup_64) | 160 | ENTRY(secondary_startup_64) |
148 | /* | 161 | /* |
149 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, | 162 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
150 | * and someone has loaded a mapped page table. | 163 | * and someone has loaded a mapped page table. |
151 | * | 164 | * |
152 | * %esi holds a physical pointer to real_mode_data. | 165 | * %rsi holds a physical pointer to real_mode_data. |
153 | * | 166 | * |
154 | * We come here either from startup_64 (using physical addresses) | 167 | * We come here either from startup_64 (using physical addresses) |
155 | * or from trampoline.S (using virtual addresses). | 168 | * or from trampoline.S (using virtual addresses). |
@@ -159,12 +172,14 @@ ENTRY(secondary_startup_64) | |||
159 | * after the boot processor executes this code. | 172 | * after the boot processor executes this code. |
160 | */ | 173 | */ |
161 | 174 | ||
175 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
176 | 1: | ||
177 | |||
162 | /* Enable PAE mode and PGE */ | 178 | /* Enable PAE mode and PGE */ |
163 | movl $(X86_CR4_PAE | X86_CR4_PGE), %eax | 179 | movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx |
164 | movq %rax, %cr4 | 180 | movq %rcx, %cr4 |
165 | 181 | ||
166 | /* Setup early boot stage 4 level pagetables. */ | 182 | /* Setup early boot stage 4 level pagetables. */ |
167 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | ||
168 | addq phys_base(%rip), %rax | 183 | addq phys_base(%rip), %rax |
169 | movq %rax, %cr3 | 184 | movq %rax, %cr3 |
170 | 185 | ||
@@ -196,7 +211,7 @@ ENTRY(secondary_startup_64) | |||
196 | movq %rax, %cr0 | 211 | movq %rax, %cr0 |
197 | 212 | ||
198 | /* Setup a boot time stack */ | 213 | /* Setup a boot time stack */ |
199 | movq stack_start(%rip),%rsp | 214 | movq stack_start(%rip), %rsp |
200 | 215 | ||
201 | /* zero EFLAGS after setting rsp */ | 216 | /* zero EFLAGS after setting rsp */ |
202 | pushq $0 | 217 | pushq $0 |
@@ -236,15 +251,33 @@ ENTRY(secondary_startup_64) | |||
236 | movl initial_gs+4(%rip),%edx | 251 | movl initial_gs+4(%rip),%edx |
237 | wrmsr | 252 | wrmsr |
238 | 253 | ||
239 | /* esi is pointer to real mode structure with interesting info. | 254 | /* rsi is pointer to real mode structure with interesting info. |
240 | pass it to C */ | 255 | pass it to C */ |
241 | movl %esi, %edi | 256 | movq %rsi, %rdi |
242 | 257 | ||
243 | /* Finally jump to run C code and to be on real kernel address | 258 | /* Finally jump to run C code and to be on real kernel address |
244 | * Since we are running on identity-mapped space we have to jump | 259 | * Since we are running on identity-mapped space we have to jump |
245 | * to the full 64bit address, this is only possible as indirect | 260 | * to the full 64bit address, this is only possible as indirect |
246 | * jump. In addition we need to ensure %cs is set so we make this | 261 | * jump. In addition we need to ensure %cs is set so we make this |
247 | * a far return. | 262 | * a far return. |
263 | * | ||
264 | * Note: do not change to far jump indirect with 64bit offset. | ||
265 | * | ||
266 | * AMD does not support far jump indirect with 64bit offset. | ||
267 | * AMD64 Architecture Programmer's Manual, Volume 3: states only | ||
268 | * JMP FAR mem16:16 FF /5 Far jump indirect, | ||
269 | * with the target specified by a far pointer in memory. | ||
270 | * JMP FAR mem16:32 FF /5 Far jump indirect, | ||
271 | * with the target specified by a far pointer in memory. | ||
272 | * | ||
273 | * Intel64 does support 64bit offset. | ||
274 | * Software Developer Manual Vol 2: states: | ||
275 | * FF /5 JMP m16:16 Jump far, absolute indirect, | ||
276 | * address given in m16:16 | ||
277 | * FF /5 JMP m16:32 Jump far, absolute indirect, | ||
278 | * address given in m16:32. | ||
279 | * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, | ||
280 | * address given in m16:64. | ||
248 | */ | 281 | */ |
249 | movq initial_code(%rip),%rax | 282 | movq initial_code(%rip),%rax |
250 | pushq $0 # fake return address to stop unwinder | 283 | pushq $0 # fake return address to stop unwinder |
@@ -270,13 +303,13 @@ ENDPROC(start_cpu0) | |||
270 | 303 | ||
271 | /* SMP bootup changes these two */ | 304 | /* SMP bootup changes these two */ |
272 | __REFDATA | 305 | __REFDATA |
273 | .align 8 | 306 | .balign 8 |
274 | ENTRY(initial_code) | 307 | GLOBAL(initial_code) |
275 | .quad x86_64_start_kernel | 308 | .quad x86_64_start_kernel |
276 | ENTRY(initial_gs) | 309 | GLOBAL(initial_gs) |
277 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 310 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
278 | 311 | ||
279 | ENTRY(stack_start) | 312 | GLOBAL(stack_start) |
280 | .quad init_thread_union+THREAD_SIZE-8 | 313 | .quad init_thread_union+THREAD_SIZE-8 |
281 | .word 0 | 314 | .word 0 |
282 | __FINITDATA | 315 | __FINITDATA |
@@ -284,7 +317,7 @@ ENDPROC(start_cpu0) | |||
284 | bad_address: | 317 | bad_address: |
285 | jmp bad_address | 318 | jmp bad_address |
286 | 319 | ||
287 | .section ".init.text","ax" | 320 | __INIT |
288 | .globl early_idt_handlers | 321 | .globl early_idt_handlers |
289 | early_idt_handlers: | 322 | early_idt_handlers: |
290 | # 104(%rsp) %rflags | 323 | # 104(%rsp) %rflags |
@@ -321,14 +354,22 @@ ENTRY(early_idt_handler) | |||
321 | pushq %r11 # 0(%rsp) | 354 | pushq %r11 # 0(%rsp) |
322 | 355 | ||
323 | cmpl $__KERNEL_CS,96(%rsp) | 356 | cmpl $__KERNEL_CS,96(%rsp) |
324 | jne 10f | 357 | jne 11f |
358 | |||
359 | cmpl $14,72(%rsp) # Page fault? | ||
360 | jnz 10f | ||
361 | GET_CR2_INTO(%rdi) # can clobber any volatile register if pv | ||
362 | call early_make_pgtable | ||
363 | andl %eax,%eax | ||
364 | jz 20f # All good | ||
325 | 365 | ||
366 | 10: | ||
326 | leaq 88(%rsp),%rdi # Pointer to %rip | 367 | leaq 88(%rsp),%rdi # Pointer to %rip |
327 | call early_fixup_exception | 368 | call early_fixup_exception |
328 | andl %eax,%eax | 369 | andl %eax,%eax |
329 | jnz 20f # Found an exception entry | 370 | jnz 20f # Found an exception entry |
330 | 371 | ||
331 | 10: | 372 | 11: |
332 | #ifdef CONFIG_EARLY_PRINTK | 373 | #ifdef CONFIG_EARLY_PRINTK |
333 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv | 374 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv |
334 | movl 80(%rsp),%r8d # error code | 375 | movl 80(%rsp),%r8d # error code |
@@ -350,7 +391,7 @@ ENTRY(early_idt_handler) | |||
350 | 1: hlt | 391 | 1: hlt |
351 | jmp 1b | 392 | jmp 1b |
352 | 393 | ||
353 | 20: # Exception table entry found | 394 | 20: # Exception table entry found or page table generated |
354 | popq %r11 | 395 | popq %r11 |
355 | popq %r10 | 396 | popq %r10 |
356 | popq %r9 | 397 | popq %r9 |
@@ -364,6 +405,8 @@ ENTRY(early_idt_handler) | |||
364 | decl early_recursion_flag(%rip) | 405 | decl early_recursion_flag(%rip) |
365 | INTERRUPT_RETURN | 406 | INTERRUPT_RETURN |
366 | 407 | ||
408 | __INITDATA | ||
409 | |||
367 | .balign 4 | 410 | .balign 4 |
368 | early_recursion_flag: | 411 | early_recursion_flag: |
369 | .long 0 | 412 | .long 0 |
@@ -374,11 +417,10 @@ early_idt_msg: | |||
374 | early_idt_ripmsg: | 417 | early_idt_ripmsg: |
375 | .asciz "RIP %s\n" | 418 | .asciz "RIP %s\n" |
376 | #endif /* CONFIG_EARLY_PRINTK */ | 419 | #endif /* CONFIG_EARLY_PRINTK */ |
377 | .previous | ||
378 | 420 | ||
379 | #define NEXT_PAGE(name) \ | 421 | #define NEXT_PAGE(name) \ |
380 | .balign PAGE_SIZE; \ | 422 | .balign PAGE_SIZE; \ |
381 | ENTRY(name) | 423 | GLOBAL(name) |
382 | 424 | ||
383 | /* Automate the creation of 1 to 1 mapping pmd entries */ | 425 | /* Automate the creation of 1 to 1 mapping pmd entries */ |
384 | #define PMDS(START, PERM, COUNT) \ | 426 | #define PMDS(START, PERM, COUNT) \ |
@@ -388,24 +430,37 @@ ENTRY(name) | |||
388 | i = i + 1 ; \ | 430 | i = i + 1 ; \ |
389 | .endr | 431 | .endr |
390 | 432 | ||
433 | __INITDATA | ||
434 | NEXT_PAGE(early_level4_pgt) | ||
435 | .fill 511,8,0 | ||
436 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
437 | |||
438 | NEXT_PAGE(early_dynamic_pgts) | ||
439 | .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 | ||
440 | |||
391 | .data | 441 | .data |
392 | /* | 442 | |
393 | * This default setting generates an ident mapping at address 0x100000 | 443 | #ifndef CONFIG_XEN |
394 | * and a mapping for the kernel that precisely maps virtual address | ||
395 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
396 | * 2Mbyte large pages provided by PAE mode) | ||
397 | */ | ||
398 | NEXT_PAGE(init_level4_pgt) | 444 | NEXT_PAGE(init_level4_pgt) |
399 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 445 | .fill 512,8,0 |
400 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | 446 | #else |
401 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 447 | NEXT_PAGE(init_level4_pgt) |
402 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | 448 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
449 | .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 | ||
450 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
451 | .org init_level4_pgt + L4_START_KERNEL*8, 0 | ||
403 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 452 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
404 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | 453 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE |
405 | 454 | ||
406 | NEXT_PAGE(level3_ident_pgt) | 455 | NEXT_PAGE(level3_ident_pgt) |
407 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | 456 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
408 | .fill 511,8,0 | 457 | .fill 511, 8, 0 |
458 | NEXT_PAGE(level2_ident_pgt) | ||
459 | /* Since I easily can, map the first 1G. | ||
460 | * Don't set NX because code runs from these pages. | ||
461 | */ | ||
462 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
463 | #endif | ||
409 | 464 | ||
410 | NEXT_PAGE(level3_kernel_pgt) | 465 | NEXT_PAGE(level3_kernel_pgt) |
411 | .fill L3_START_KERNEL,8,0 | 466 | .fill L3_START_KERNEL,8,0 |
@@ -413,21 +468,6 @@ NEXT_PAGE(level3_kernel_pgt) | |||
413 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | 468 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE |
414 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | 469 | .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE |
415 | 470 | ||
416 | NEXT_PAGE(level2_fixmap_pgt) | ||
417 | .fill 506,8,0 | ||
418 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
419 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
420 | .fill 5,8,0 | ||
421 | |||
422 | NEXT_PAGE(level1_fixmap_pgt) | ||
423 | .fill 512,8,0 | ||
424 | |||
425 | NEXT_PAGE(level2_ident_pgt) | ||
426 | /* Since I easily can, map the first 1G. | ||
427 | * Don't set NX because code runs from these pages. | ||
428 | */ | ||
429 | PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) | ||
430 | |||
431 | NEXT_PAGE(level2_kernel_pgt) | 471 | NEXT_PAGE(level2_kernel_pgt) |
432 | /* | 472 | /* |
433 | * 512 MB kernel mapping. We spend a full page on this pagetable | 473 | * 512 MB kernel mapping. We spend a full page on this pagetable |
@@ -442,11 +482,16 @@ NEXT_PAGE(level2_kernel_pgt) | |||
442 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, | 482 | PMDS(0, __PAGE_KERNEL_LARGE_EXEC, |
443 | KERNEL_IMAGE_SIZE/PMD_SIZE) | 483 | KERNEL_IMAGE_SIZE/PMD_SIZE) |
444 | 484 | ||
445 | NEXT_PAGE(level2_spare_pgt) | 485 | NEXT_PAGE(level2_fixmap_pgt) |
446 | .fill 512, 8, 0 | 486 | .fill 506,8,0 |
487 | .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
488 | /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ | ||
489 | .fill 5,8,0 | ||
490 | |||
491 | NEXT_PAGE(level1_fixmap_pgt) | ||
492 | .fill 512,8,0 | ||
447 | 493 | ||
448 | #undef PMDS | 494 | #undef PMDS |
449 | #undef NEXT_PAGE | ||
450 | 495 | ||
451 | .data | 496 | .data |
452 | .align 16 | 497 | .align 16 |
@@ -472,6 +517,5 @@ ENTRY(nmi_idt_table) | |||
472 | .skip IDT_ENTRIES * 16 | 517 | .skip IDT_ENTRIES * 16 |
473 | 518 | ||
474 | __PAGE_ALIGNED_BSS | 519 | __PAGE_ALIGNED_BSS |
475 | .align PAGE_SIZE | 520 | NEXT_PAGE(empty_zero_page) |
476 | ENTRY(empty_zero_page) | ||
477 | .skip PAGE_SIZE | 521 | .skip PAGE_SIZE |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 9c3bd4a2050e..0fa69127209a 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic); | |||
26 | EXPORT_SYMBOL(__get_user_1); | 26 | EXPORT_SYMBOL(__get_user_1); |
27 | EXPORT_SYMBOL(__get_user_2); | 27 | EXPORT_SYMBOL(__get_user_2); |
28 | EXPORT_SYMBOL(__get_user_4); | 28 | EXPORT_SYMBOL(__get_user_4); |
29 | EXPORT_SYMBOL(__get_user_8); | ||
29 | 30 | ||
30 | EXPORT_SYMBOL(__put_user_1); | 31 | EXPORT_SYMBOL(__put_user_1); |
31 | EXPORT_SYMBOL(__put_user_2); | 32 | EXPORT_SYMBOL(__put_user_2); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 2b44ea5f269d..b686a904d7c3 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -297,9 +297,9 @@ static void kvm_register_steal_time(void) | |||
297 | 297 | ||
298 | memset(st, 0, sizeof(*st)); | 298 | memset(st, 0, sizeof(*st)); |
299 | 299 | ||
300 | wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); | 300 | wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); |
301 | printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", | 301 | pr_info("kvm-stealtime: cpu %d, msr %llx\n", |
302 | cpu, __pa(st)); | 302 | cpu, (unsigned long long) slow_virt_to_phys(st)); |
303 | } | 303 | } |
304 | 304 | ||
305 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; | 305 | static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; |
@@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
324 | return; | 324 | return; |
325 | 325 | ||
326 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { | 326 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { |
327 | u64 pa = __pa(&__get_cpu_var(apf_reason)); | 327 | u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); |
328 | 328 | ||
329 | #ifdef CONFIG_PREEMPT | 329 | #ifdef CONFIG_PREEMPT |
330 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; | 330 | pa |= KVM_ASYNC_PF_SEND_ALWAYS; |
@@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void) | |||
340 | /* Size alignment is implied but just to make it explicit. */ | 340 | /* Size alignment is implied but just to make it explicit. */ |
341 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); | 341 | BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); |
342 | __get_cpu_var(kvm_apic_eoi) = 0; | 342 | __get_cpu_var(kvm_apic_eoi) = 0; |
343 | pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; | 343 | pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) |
344 | | KVM_MSR_ENABLED; | ||
344 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); | 345 | wrmsrl(MSR_KVM_PV_EOI_EN, pa); |
345 | } | 346 | } |
346 | 347 | ||
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 220a360010f8..9f966dc0b9e4 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -162,8 +162,8 @@ int kvm_register_clock(char *txt) | |||
162 | int low, high, ret; | 162 | int low, high, ret; |
163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; | 163 | struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; |
164 | 164 | ||
165 | low = (int)__pa(src) | 1; | 165 | low = (int)slow_virt_to_phys(src) | 1; |
166 | high = ((u64)__pa(src) >> 32); | 166 | high = ((u64)slow_virt_to_phys(src) >> 32); |
167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); | 167 | ret = native_write_msr_safe(msr_kvm_system_time, low, high); |
168 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", | 168 | printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", |
169 | cpu, high, low, txt); | 169 | cpu, high, low, txt); |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index b3ea9db39db6..4eabc160696f 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -16,125 +16,12 @@ | |||
16 | #include <linux/io.h> | 16 | #include <linux/io.h> |
17 | #include <linux/suspend.h> | 17 | #include <linux/suspend.h> |
18 | 18 | ||
19 | #include <asm/init.h> | ||
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
20 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
21 | #include <asm/mmu_context.h> | 22 | #include <asm/mmu_context.h> |
22 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
23 | 24 | ||
24 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | ||
25 | unsigned long addr) | ||
26 | { | ||
27 | pud_t *pud; | ||
28 | pmd_t *pmd; | ||
29 | struct page *page; | ||
30 | int result = -ENOMEM; | ||
31 | |||
32 | addr &= PMD_MASK; | ||
33 | pgd += pgd_index(addr); | ||
34 | if (!pgd_present(*pgd)) { | ||
35 | page = kimage_alloc_control_pages(image, 0); | ||
36 | if (!page) | ||
37 | goto out; | ||
38 | pud = (pud_t *)page_address(page); | ||
39 | clear_page(pud); | ||
40 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
41 | } | ||
42 | pud = pud_offset(pgd, addr); | ||
43 | if (!pud_present(*pud)) { | ||
44 | page = kimage_alloc_control_pages(image, 0); | ||
45 | if (!page) | ||
46 | goto out; | ||
47 | pmd = (pmd_t *)page_address(page); | ||
48 | clear_page(pmd); | ||
49 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
50 | } | ||
51 | pmd = pmd_offset(pud, addr); | ||
52 | if (!pmd_present(*pmd)) | ||
53 | set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
54 | result = 0; | ||
55 | out: | ||
56 | return result; | ||
57 | } | ||
58 | |||
59 | static void init_level2_page(pmd_t *level2p, unsigned long addr) | ||
60 | { | ||
61 | unsigned long end_addr; | ||
62 | |||
63 | addr &= PAGE_MASK; | ||
64 | end_addr = addr + PUD_SIZE; | ||
65 | while (addr < end_addr) { | ||
66 | set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); | ||
67 | addr += PMD_SIZE; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | static int init_level3_page(struct kimage *image, pud_t *level3p, | ||
72 | unsigned long addr, unsigned long last_addr) | ||
73 | { | ||
74 | unsigned long end_addr; | ||
75 | int result; | ||
76 | |||
77 | result = 0; | ||
78 | addr &= PAGE_MASK; | ||
79 | end_addr = addr + PGDIR_SIZE; | ||
80 | while ((addr < last_addr) && (addr < end_addr)) { | ||
81 | struct page *page; | ||
82 | pmd_t *level2p; | ||
83 | |||
84 | page = kimage_alloc_control_pages(image, 0); | ||
85 | if (!page) { | ||
86 | result = -ENOMEM; | ||
87 | goto out; | ||
88 | } | ||
89 | level2p = (pmd_t *)page_address(page); | ||
90 | init_level2_page(level2p, addr); | ||
91 | set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); | ||
92 | addr += PUD_SIZE; | ||
93 | } | ||
94 | /* clear the unused entries */ | ||
95 | while (addr < end_addr) { | ||
96 | pud_clear(level3p++); | ||
97 | addr += PUD_SIZE; | ||
98 | } | ||
99 | out: | ||
100 | return result; | ||
101 | } | ||
102 | |||
103 | |||
104 | static int init_level4_page(struct kimage *image, pgd_t *level4p, | ||
105 | unsigned long addr, unsigned long last_addr) | ||
106 | { | ||
107 | unsigned long end_addr; | ||
108 | int result; | ||
109 | |||
110 | result = 0; | ||
111 | addr &= PAGE_MASK; | ||
112 | end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); | ||
113 | while ((addr < last_addr) && (addr < end_addr)) { | ||
114 | struct page *page; | ||
115 | pud_t *level3p; | ||
116 | |||
117 | page = kimage_alloc_control_pages(image, 0); | ||
118 | if (!page) { | ||
119 | result = -ENOMEM; | ||
120 | goto out; | ||
121 | } | ||
122 | level3p = (pud_t *)page_address(page); | ||
123 | result = init_level3_page(image, level3p, addr, last_addr); | ||
124 | if (result) | ||
125 | goto out; | ||
126 | set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); | ||
127 | addr += PGDIR_SIZE; | ||
128 | } | ||
129 | /* clear the unused entries */ | ||
130 | while (addr < end_addr) { | ||
131 | pgd_clear(level4p++); | ||
132 | addr += PGDIR_SIZE; | ||
133 | } | ||
134 | out: | ||
135 | return result; | ||
136 | } | ||
137 | |||
138 | static void free_transition_pgtable(struct kimage *image) | 25 | static void free_transition_pgtable(struct kimage *image) |
139 | { | 26 | { |
140 | free_page((unsigned long)image->arch.pud); | 27 | free_page((unsigned long)image->arch.pud); |
@@ -184,22 +71,62 @@ err: | |||
184 | return result; | 71 | return result; |
185 | } | 72 | } |
186 | 73 | ||
74 | static void *alloc_pgt_page(void *data) | ||
75 | { | ||
76 | struct kimage *image = (struct kimage *)data; | ||
77 | struct page *page; | ||
78 | void *p = NULL; | ||
79 | |||
80 | page = kimage_alloc_control_pages(image, 0); | ||
81 | if (page) { | ||
82 | p = page_address(page); | ||
83 | clear_page(p); | ||
84 | } | ||
85 | |||
86 | return p; | ||
87 | } | ||
187 | 88 | ||
188 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) | 89 | static int init_pgtable(struct kimage *image, unsigned long start_pgtable) |
189 | { | 90 | { |
91 | struct x86_mapping_info info = { | ||
92 | .alloc_pgt_page = alloc_pgt_page, | ||
93 | .context = image, | ||
94 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
95 | }; | ||
96 | unsigned long mstart, mend; | ||
190 | pgd_t *level4p; | 97 | pgd_t *level4p; |
191 | int result; | 98 | int result; |
99 | int i; | ||
100 | |||
192 | level4p = (pgd_t *)__va(start_pgtable); | 101 | level4p = (pgd_t *)__va(start_pgtable); |
193 | result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); | 102 | clear_page(level4p); |
194 | if (result) | 103 | for (i = 0; i < nr_pfn_mapped; i++) { |
195 | return result; | 104 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
105 | mend = pfn_mapped[i].end << PAGE_SHIFT; | ||
106 | |||
107 | result = kernel_ident_mapping_init(&info, | ||
108 | level4p, mstart, mend); | ||
109 | if (result) | ||
110 | return result; | ||
111 | } | ||
112 | |||
196 | /* | 113 | /* |
197 | * image->start may be outside 0 ~ max_pfn, for example when | 114 | * segments's mem ranges could be outside 0 ~ max_pfn, |
198 | * jump back to original kernel from kexeced kernel | 115 | * for example when jump back to original kernel from kexeced kernel. |
116 | * or first kernel is booted with user mem map, and second kernel | ||
117 | * could be loaded out of that range. | ||
199 | */ | 118 | */ |
200 | result = init_one_level2_page(image, level4p, image->start); | 119 | for (i = 0; i < image->nr_segments; i++) { |
201 | if (result) | 120 | mstart = image->segment[i].mem; |
202 | return result; | 121 | mend = mstart + image->segment[i].memsz; |
122 | |||
123 | result = kernel_ident_mapping_init(&info, | ||
124 | level4p, mstart, mend); | ||
125 | |||
126 | if (result) | ||
127 | return result; | ||
128 | } | ||
129 | |||
203 | return init_transition_pgtable(image, level4p); | 130 | return init_transition_pgtable(image, level4p); |
204 | } | 131 | } |
205 | 132 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8b24289cc10c..915f5efefcf5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -108,17 +108,16 @@ | |||
108 | #include <asm/topology.h> | 108 | #include <asm/topology.h> |
109 | #include <asm/apicdef.h> | 109 | #include <asm/apicdef.h> |
110 | #include <asm/amd_nb.h> | 110 | #include <asm/amd_nb.h> |
111 | #ifdef CONFIG_X86_64 | ||
112 | #include <asm/numa_64.h> | ||
113 | #endif | ||
114 | #include <asm/mce.h> | 111 | #include <asm/mce.h> |
115 | #include <asm/alternative.h> | 112 | #include <asm/alternative.h> |
116 | #include <asm/prom.h> | 113 | #include <asm/prom.h> |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 116 | * max_low_pfn_mapped: highest direct mapped pfn under 4GB |
120 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 117 | * max_pfn_mapped: highest direct mapped pfn over 4GB |
121 | * apertures, ACPI and other tables without having to play with fixmaps. | 118 | * |
119 | * The direct mapping only covers E820_RAM regions, so the ranges and gaps are | ||
120 | * represented by pfn_mapped | ||
122 | */ | 121 | */ |
123 | unsigned long max_low_pfn_mapped; | 122 | unsigned long max_low_pfn_mapped; |
124 | unsigned long max_pfn_mapped; | 123 | unsigned long max_pfn_mapped; |
@@ -276,18 +275,7 @@ void * __init extend_brk(size_t size, size_t align) | |||
276 | return ret; | 275 | return ret; |
277 | } | 276 | } |
278 | 277 | ||
279 | #ifdef CONFIG_X86_64 | 278 | #ifdef CONFIG_X86_32 |
280 | static void __init init_gbpages(void) | ||
281 | { | ||
282 | if (direct_gbpages && cpu_has_gbpages) | ||
283 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
284 | else | ||
285 | direct_gbpages = 0; | ||
286 | } | ||
287 | #else | ||
288 | static inline void init_gbpages(void) | ||
289 | { | ||
290 | } | ||
291 | static void __init cleanup_highmap(void) | 279 | static void __init cleanup_highmap(void) |
292 | { | 280 | { |
293 | } | 281 | } |
@@ -296,8 +284,8 @@ static void __init cleanup_highmap(void) | |||
296 | static void __init reserve_brk(void) | 284 | static void __init reserve_brk(void) |
297 | { | 285 | { |
298 | if (_brk_end > _brk_start) | 286 | if (_brk_end > _brk_start) |
299 | memblock_reserve(__pa(_brk_start), | 287 | memblock_reserve(__pa_symbol(_brk_start), |
300 | __pa(_brk_end) - __pa(_brk_start)); | 288 | _brk_end - _brk_start); |
301 | 289 | ||
302 | /* Mark brk area as locked down and no longer taking any | 290 | /* Mark brk area as locked down and no longer taking any |
303 | new allocations */ | 291 | new allocations */ |
@@ -306,27 +294,43 @@ static void __init reserve_brk(void) | |||
306 | 294 | ||
307 | #ifdef CONFIG_BLK_DEV_INITRD | 295 | #ifdef CONFIG_BLK_DEV_INITRD |
308 | 296 | ||
297 | static u64 __init get_ramdisk_image(void) | ||
298 | { | ||
299 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | ||
300 | |||
301 | ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; | ||
302 | |||
303 | return ramdisk_image; | ||
304 | } | ||
305 | static u64 __init get_ramdisk_size(void) | ||
306 | { | ||
307 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | ||
308 | |||
309 | ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; | ||
310 | |||
311 | return ramdisk_size; | ||
312 | } | ||
313 | |||
309 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 314 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
310 | static void __init relocate_initrd(void) | 315 | static void __init relocate_initrd(void) |
311 | { | 316 | { |
312 | /* Assume only end is not page aligned */ | 317 | /* Assume only end is not page aligned */ |
313 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 318 | u64 ramdisk_image = get_ramdisk_image(); |
314 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 319 | u64 ramdisk_size = get_ramdisk_size(); |
315 | u64 area_size = PAGE_ALIGN(ramdisk_size); | 320 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
316 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | ||
317 | u64 ramdisk_here; | 321 | u64 ramdisk_here; |
318 | unsigned long slop, clen, mapaddr; | 322 | unsigned long slop, clen, mapaddr; |
319 | char *p, *q; | 323 | char *p, *q; |
320 | 324 | ||
321 | /* We need to move the initrd down into lowmem */ | 325 | /* We need to move the initrd down into directly mapped mem */ |
322 | ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, | 326 | ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
323 | PAGE_SIZE); | 327 | area_size, PAGE_SIZE); |
324 | 328 | ||
325 | if (!ramdisk_here) | 329 | if (!ramdisk_here) |
326 | panic("Cannot find place for new RAMDISK of size %lld\n", | 330 | panic("Cannot find place for new RAMDISK of size %lld\n", |
327 | ramdisk_size); | 331 | ramdisk_size); |
328 | 332 | ||
329 | /* Note: this includes all the lowmem currently occupied by | 333 | /* Note: this includes all the mem currently occupied by |
330 | the initrd, we rely on that fact to keep the data intact. */ | 334 | the initrd, we rely on that fact to keep the data intact. */ |
331 | memblock_reserve(ramdisk_here, area_size); | 335 | memblock_reserve(ramdisk_here, area_size); |
332 | initrd_start = ramdisk_here + PAGE_OFFSET; | 336 | initrd_start = ramdisk_here + PAGE_OFFSET; |
@@ -336,17 +340,7 @@ static void __init relocate_initrd(void) | |||
336 | 340 | ||
337 | q = (char *)initrd_start; | 341 | q = (char *)initrd_start; |
338 | 342 | ||
339 | /* Copy any lowmem portion of the initrd */ | 343 | /* Copy the initrd */ |
340 | if (ramdisk_image < end_of_lowmem) { | ||
341 | clen = end_of_lowmem - ramdisk_image; | ||
342 | p = (char *)__va(ramdisk_image); | ||
343 | memcpy(q, p, clen); | ||
344 | q += clen; | ||
345 | ramdisk_image += clen; | ||
346 | ramdisk_size -= clen; | ||
347 | } | ||
348 | |||
349 | /* Copy the highmem portion of the initrd */ | ||
350 | while (ramdisk_size) { | 344 | while (ramdisk_size) { |
351 | slop = ramdisk_image & ~PAGE_MASK; | 345 | slop = ramdisk_image & ~PAGE_MASK; |
352 | clen = ramdisk_size; | 346 | clen = ramdisk_size; |
@@ -360,22 +354,35 @@ static void __init relocate_initrd(void) | |||
360 | ramdisk_image += clen; | 354 | ramdisk_image += clen; |
361 | ramdisk_size -= clen; | 355 | ramdisk_size -= clen; |
362 | } | 356 | } |
363 | /* high pages is not converted by early_res_to_bootmem */ | 357 | |
364 | ramdisk_image = boot_params.hdr.ramdisk_image; | 358 | ramdisk_image = get_ramdisk_image(); |
365 | ramdisk_size = boot_params.hdr.ramdisk_size; | 359 | ramdisk_size = get_ramdisk_size(); |
366 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" | 360 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
367 | " [mem %#010llx-%#010llx]\n", | 361 | " [mem %#010llx-%#010llx]\n", |
368 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 362 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
369 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 363 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
370 | } | 364 | } |
371 | 365 | ||
366 | static void __init early_reserve_initrd(void) | ||
367 | { | ||
368 | /* Assume only end is not page aligned */ | ||
369 | u64 ramdisk_image = get_ramdisk_image(); | ||
370 | u64 ramdisk_size = get_ramdisk_size(); | ||
371 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | ||
372 | |||
373 | if (!boot_params.hdr.type_of_loader || | ||
374 | !ramdisk_image || !ramdisk_size) | ||
375 | return; /* No initrd provided by bootloader */ | ||
376 | |||
377 | memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); | ||
378 | } | ||
372 | static void __init reserve_initrd(void) | 379 | static void __init reserve_initrd(void) |
373 | { | 380 | { |
374 | /* Assume only end is not page aligned */ | 381 | /* Assume only end is not page aligned */ |
375 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 382 | u64 ramdisk_image = get_ramdisk_image(); |
376 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 383 | u64 ramdisk_size = get_ramdisk_size(); |
377 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); | 384 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
378 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 385 | u64 mapped_size; |
379 | 386 | ||
380 | if (!boot_params.hdr.type_of_loader || | 387 | if (!boot_params.hdr.type_of_loader || |
381 | !ramdisk_image || !ramdisk_size) | 388 | !ramdisk_image || !ramdisk_size) |
@@ -383,22 +390,18 @@ static void __init reserve_initrd(void) | |||
383 | 390 | ||
384 | initrd_start = 0; | 391 | initrd_start = 0; |
385 | 392 | ||
386 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 393 | mapped_size = memblock_mem_size(max_pfn_mapped); |
394 | if (ramdisk_size >= (mapped_size>>1)) | ||
387 | panic("initrd too large to handle, " | 395 | panic("initrd too large to handle, " |
388 | "disabling initrd (%lld needed, %lld available)\n", | 396 | "disabling initrd (%lld needed, %lld available)\n", |
389 | ramdisk_size, end_of_lowmem>>1); | 397 | ramdisk_size, mapped_size>>1); |
390 | } | ||
391 | 398 | ||
392 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, | 399 | printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, |
393 | ramdisk_end - 1); | 400 | ramdisk_end - 1); |
394 | 401 | ||
395 | 402 | if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), | |
396 | if (ramdisk_end <= end_of_lowmem) { | 403 | PFN_DOWN(ramdisk_end))) { |
397 | /* All in lowmem, easy case */ | 404 | /* All are mapped, easy case */ |
398 | /* | ||
399 | * don't need to reserve again, already reserved early | ||
400 | * in i386_start_kernel | ||
401 | */ | ||
402 | initrd_start = ramdisk_image + PAGE_OFFSET; | 405 | initrd_start = ramdisk_image + PAGE_OFFSET; |
403 | initrd_end = initrd_start + ramdisk_size; | 406 | initrd_end = initrd_start + ramdisk_size; |
404 | return; | 407 | return; |
@@ -409,6 +412,9 @@ static void __init reserve_initrd(void) | |||
409 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); | 412 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); |
410 | } | 413 | } |
411 | #else | 414 | #else |
415 | static void __init early_reserve_initrd(void) | ||
416 | { | ||
417 | } | ||
412 | static void __init reserve_initrd(void) | 418 | static void __init reserve_initrd(void) |
413 | { | 419 | { |
414 | } | 420 | } |
@@ -419,8 +425,6 @@ static void __init parse_setup_data(void) | |||
419 | struct setup_data *data; | 425 | struct setup_data *data; |
420 | u64 pa_data; | 426 | u64 pa_data; |
421 | 427 | ||
422 | if (boot_params.hdr.version < 0x0209) | ||
423 | return; | ||
424 | pa_data = boot_params.hdr.setup_data; | 428 | pa_data = boot_params.hdr.setup_data; |
425 | while (pa_data) { | 429 | while (pa_data) { |
426 | u32 data_len, map_len; | 430 | u32 data_len, map_len; |
@@ -456,8 +460,6 @@ static void __init e820_reserve_setup_data(void) | |||
456 | u64 pa_data; | 460 | u64 pa_data; |
457 | int found = 0; | 461 | int found = 0; |
458 | 462 | ||
459 | if (boot_params.hdr.version < 0x0209) | ||
460 | return; | ||
461 | pa_data = boot_params.hdr.setup_data; | 463 | pa_data = boot_params.hdr.setup_data; |
462 | while (pa_data) { | 464 | while (pa_data) { |
463 | data = early_memremap(pa_data, sizeof(*data)); | 465 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -481,8 +483,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
481 | struct setup_data *data; | 483 | struct setup_data *data; |
482 | u64 pa_data; | 484 | u64 pa_data; |
483 | 485 | ||
484 | if (boot_params.hdr.version < 0x0209) | ||
485 | return; | ||
486 | pa_data = boot_params.hdr.setup_data; | 486 | pa_data = boot_params.hdr.setup_data; |
487 | while (pa_data) { | 487 | while (pa_data) { |
488 | data = early_memremap(pa_data, sizeof(*data)); | 488 | data = early_memremap(pa_data, sizeof(*data)); |
@@ -501,17 +501,51 @@ static void __init memblock_x86_reserve_range_setup_data(void) | |||
501 | /* | 501 | /* |
502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels | 502 | * Keep the crash kernel below this limit. On 32 bits earlier kernels |
503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. | 503 | * would limit the kernel to the low 512 MiB due to mapping restrictions. |
504 | * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this | ||
505 | * limit once kexec-tools are fixed. | ||
506 | */ | 504 | */ |
507 | #ifdef CONFIG_X86_32 | 505 | #ifdef CONFIG_X86_32 |
508 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) | 506 | # define CRASH_KERNEL_ADDR_MAX (512 << 20) |
509 | #else | 507 | #else |
510 | # define CRASH_KERNEL_ADDR_MAX (896 << 20) | 508 | # define CRASH_KERNEL_ADDR_MAX MAXMEM |
509 | #endif | ||
510 | |||
511 | static void __init reserve_crashkernel_low(void) | ||
512 | { | ||
513 | #ifdef CONFIG_X86_64 | ||
514 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long low_base = 0, low_size = 0; | ||
516 | unsigned long total_low_mem; | ||
517 | unsigned long long base; | ||
518 | int ret; | ||
519 | |||
520 | total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); | ||
521 | ret = parse_crashkernel_low(boot_command_line, total_low_mem, | ||
522 | &low_size, &base); | ||
523 | if (ret != 0 || low_size <= 0) | ||
524 | return; | ||
525 | |||
526 | low_base = memblock_find_in_range(low_size, (1ULL<<32), | ||
527 | low_size, alignment); | ||
528 | |||
529 | if (!low_base) { | ||
530 | pr_info("crashkernel low reservation failed - No suitable area found.\n"); | ||
531 | |||
532 | return; | ||
533 | } | ||
534 | |||
535 | memblock_reserve(low_base, low_size); | ||
536 | pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", | ||
537 | (unsigned long)(low_size >> 20), | ||
538 | (unsigned long)(low_base >> 20), | ||
539 | (unsigned long)(total_low_mem >> 20)); | ||
540 | crashk_low_res.start = low_base; | ||
541 | crashk_low_res.end = low_base + low_size - 1; | ||
542 | insert_resource(&iomem_resource, &crashk_low_res); | ||
511 | #endif | 543 | #endif |
544 | } | ||
512 | 545 | ||
513 | static void __init reserve_crashkernel(void) | 546 | static void __init reserve_crashkernel(void) |
514 | { | 547 | { |
548 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
515 | unsigned long long total_mem; | 549 | unsigned long long total_mem; |
516 | unsigned long long crash_size, crash_base; | 550 | unsigned long long crash_size, crash_base; |
517 | int ret; | 551 | int ret; |
@@ -525,8 +559,6 @@ static void __init reserve_crashkernel(void) | |||
525 | 559 | ||
526 | /* 0 means: find the address automatically */ | 560 | /* 0 means: find the address automatically */ |
527 | if (crash_base <= 0) { | 561 | if (crash_base <= 0) { |
528 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
529 | |||
530 | /* | 562 | /* |
531 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX | 563 | * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX |
532 | */ | 564 | */ |
@@ -537,6 +569,7 @@ static void __init reserve_crashkernel(void) | |||
537 | pr_info("crashkernel reservation failed - No suitable area found.\n"); | 569 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
538 | return; | 570 | return; |
539 | } | 571 | } |
572 | |||
540 | } else { | 573 | } else { |
541 | unsigned long long start; | 574 | unsigned long long start; |
542 | 575 | ||
@@ -558,6 +591,9 @@ static void __init reserve_crashkernel(void) | |||
558 | crashk_res.start = crash_base; | 591 | crashk_res.start = crash_base; |
559 | crashk_res.end = crash_base + crash_size - 1; | 592 | crashk_res.end = crash_base + crash_size - 1; |
560 | insert_resource(&iomem_resource, &crashk_res); | 593 | insert_resource(&iomem_resource, &crashk_res); |
594 | |||
595 | if (crash_base >= (1ULL<<32)) | ||
596 | reserve_crashkernel_low(); | ||
561 | } | 597 | } |
562 | #else | 598 | #else |
563 | static void __init reserve_crashkernel(void) | 599 | static void __init reserve_crashkernel(void) |
@@ -608,8 +644,6 @@ static __init void reserve_ibft_region(void) | |||
608 | memblock_reserve(addr, size); | 644 | memblock_reserve(addr, size); |
609 | } | 645 | } |
610 | 646 | ||
611 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | ||
612 | |||
613 | static bool __init snb_gfx_workaround_needed(void) | 647 | static bool __init snb_gfx_workaround_needed(void) |
614 | { | 648 | { |
615 | #ifdef CONFIG_PCI | 649 | #ifdef CONFIG_PCI |
@@ -698,8 +732,7 @@ static void __init trim_bios_range(void) | |||
698 | * since some BIOSes are known to corrupt low memory. See the | 732 | * since some BIOSes are known to corrupt low memory. See the |
699 | * Kconfig help text for X86_RESERVE_LOW. | 733 | * Kconfig help text for X86_RESERVE_LOW. |
700 | */ | 734 | */ |
701 | e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), | 735 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); |
702 | E820_RAM, E820_RESERVED); | ||
703 | 736 | ||
704 | /* | 737 | /* |
705 | * special case: Some BIOSen report the PC BIOS | 738 | * special case: Some BIOSen report the PC BIOS |
@@ -711,6 +744,29 @@ static void __init trim_bios_range(void) | |||
711 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 744 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
712 | } | 745 | } |
713 | 746 | ||
747 | /* called before trim_bios_range() to spare extra sanitize */ | ||
748 | static void __init e820_add_kernel_range(void) | ||
749 | { | ||
750 | u64 start = __pa_symbol(_text); | ||
751 | u64 size = __pa_symbol(_end) - start; | ||
752 | |||
753 | /* | ||
754 | * Complain if .text .data and .bss are not marked as E820_RAM and | ||
755 | * attempt to fix it by adding the range. We may have a confused BIOS, | ||
756 | * or the user may have used memmap=exactmap or memmap=xxM$yyM to | ||
757 | * exclude kernel range. If we really are running on top non-RAM, | ||
758 | * we will crash later anyways. | ||
759 | */ | ||
760 | if (e820_all_mapped(start, start + size, E820_RAM)) | ||
761 | return; | ||
762 | |||
763 | pr_warn(".text .data .bss are not marked as E820_RAM!\n"); | ||
764 | e820_remove_range(start, size, E820_RAM, 0); | ||
765 | e820_add_region(start, size, E820_RAM); | ||
766 | } | ||
767 | |||
768 | static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; | ||
769 | |||
714 | static int __init parse_reservelow(char *p) | 770 | static int __init parse_reservelow(char *p) |
715 | { | 771 | { |
716 | unsigned long long size; | 772 | unsigned long long size; |
@@ -733,6 +789,11 @@ static int __init parse_reservelow(char *p) | |||
733 | 789 | ||
734 | early_param("reservelow", parse_reservelow); | 790 | early_param("reservelow", parse_reservelow); |
735 | 791 | ||
792 | static void __init trim_low_memory_range(void) | ||
793 | { | ||
794 | memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); | ||
795 | } | ||
796 | |||
736 | /* | 797 | /* |
737 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 798 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
738 | * passed the efi memmap, systab, etc., so we should use these data structures | 799 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -748,6 +809,17 @@ early_param("reservelow", parse_reservelow); | |||
748 | 809 | ||
749 | void __init setup_arch(char **cmdline_p) | 810 | void __init setup_arch(char **cmdline_p) |
750 | { | 811 | { |
812 | memblock_reserve(__pa_symbol(_text), | ||
813 | (unsigned long)__bss_stop - (unsigned long)_text); | ||
814 | |||
815 | early_reserve_initrd(); | ||
816 | |||
817 | /* | ||
818 | * At this point everything still needed from the boot loader | ||
819 | * or BIOS or kernel text should be early reserved or marked not | ||
820 | * RAM in e820. All other memory is free game. | ||
821 | */ | ||
822 | |||
751 | #ifdef CONFIG_X86_32 | 823 | #ifdef CONFIG_X86_32 |
752 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 824 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
753 | visws_early_detect(); | 825 | visws_early_detect(); |
@@ -835,12 +907,12 @@ void __init setup_arch(char **cmdline_p) | |||
835 | init_mm.end_data = (unsigned long) _edata; | 907 | init_mm.end_data = (unsigned long) _edata; |
836 | init_mm.brk = _brk_end; | 908 | init_mm.brk = _brk_end; |
837 | 909 | ||
838 | code_resource.start = virt_to_phys(_text); | 910 | code_resource.start = __pa_symbol(_text); |
839 | code_resource.end = virt_to_phys(_etext)-1; | 911 | code_resource.end = __pa_symbol(_etext)-1; |
840 | data_resource.start = virt_to_phys(_etext); | 912 | data_resource.start = __pa_symbol(_etext); |
841 | data_resource.end = virt_to_phys(_edata)-1; | 913 | data_resource.end = __pa_symbol(_edata)-1; |
842 | bss_resource.start = virt_to_phys(&__bss_start); | 914 | bss_resource.start = __pa_symbol(__bss_start); |
843 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 915 | bss_resource.end = __pa_symbol(__bss_stop)-1; |
844 | 916 | ||
845 | #ifdef CONFIG_CMDLINE_BOOL | 917 | #ifdef CONFIG_CMDLINE_BOOL |
846 | #ifdef CONFIG_CMDLINE_OVERRIDE | 918 | #ifdef CONFIG_CMDLINE_OVERRIDE |
@@ -906,6 +978,7 @@ void __init setup_arch(char **cmdline_p) | |||
906 | insert_resource(&iomem_resource, &data_resource); | 978 | insert_resource(&iomem_resource, &data_resource); |
907 | insert_resource(&iomem_resource, &bss_resource); | 979 | insert_resource(&iomem_resource, &bss_resource); |
908 | 980 | ||
981 | e820_add_kernel_range(); | ||
909 | trim_bios_range(); | 982 | trim_bios_range(); |
910 | #ifdef CONFIG_X86_32 | 983 | #ifdef CONFIG_X86_32 |
911 | if (ppro_with_ram_bug()) { | 984 | if (ppro_with_ram_bug()) { |
@@ -955,6 +1028,8 @@ void __init setup_arch(char **cmdline_p) | |||
955 | 1028 | ||
956 | reserve_ibft_region(); | 1029 | reserve_ibft_region(); |
957 | 1030 | ||
1031 | early_alloc_pgt_buf(); | ||
1032 | |||
958 | /* | 1033 | /* |
959 | * Need to conclude brk, before memblock_x86_fill() | 1034 | * Need to conclude brk, before memblock_x86_fill() |
960 | * it could use memblock_find_in_range, could overlap with | 1035 | * it could use memblock_find_in_range, could overlap with |
@@ -964,7 +1039,7 @@ void __init setup_arch(char **cmdline_p) | |||
964 | 1039 | ||
965 | cleanup_highmap(); | 1040 | cleanup_highmap(); |
966 | 1041 | ||
967 | memblock.current_limit = get_max_mapped(); | 1042 | memblock.current_limit = ISA_END_ADDRESS; |
968 | memblock_x86_fill(); | 1043 | memblock_x86_fill(); |
969 | 1044 | ||
970 | /* | 1045 | /* |
@@ -981,41 +1056,22 @@ void __init setup_arch(char **cmdline_p) | |||
981 | setup_bios_corruption_check(); | 1056 | setup_bios_corruption_check(); |
982 | #endif | 1057 | #endif |
983 | 1058 | ||
1059 | #ifdef CONFIG_X86_32 | ||
984 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", | 1060 | printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", |
985 | (max_pfn_mapped<<PAGE_SHIFT) - 1); | 1061 | (max_pfn_mapped<<PAGE_SHIFT) - 1); |
1062 | #endif | ||
986 | 1063 | ||
987 | setup_real_mode(); | 1064 | reserve_real_mode(); |
988 | 1065 | ||
989 | trim_platform_memory_ranges(); | 1066 | trim_platform_memory_ranges(); |
1067 | trim_low_memory_range(); | ||
990 | 1068 | ||
991 | init_gbpages(); | 1069 | init_mem_mapping(); |
992 | |||
993 | /* max_pfn_mapped is updated here */ | ||
994 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); | ||
995 | max_pfn_mapped = max_low_pfn_mapped; | ||
996 | |||
997 | #ifdef CONFIG_X86_64 | ||
998 | if (max_pfn > max_low_pfn) { | ||
999 | int i; | ||
1000 | unsigned long start, end; | ||
1001 | unsigned long start_pfn, end_pfn; | ||
1002 | |||
1003 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, | ||
1004 | NULL) { | ||
1005 | 1070 | ||
1006 | end = PFN_PHYS(end_pfn); | 1071 | early_trap_pf_init(); |
1007 | if (end <= (1UL<<32)) | ||
1008 | continue; | ||
1009 | 1072 | ||
1010 | start = PFN_PHYS(start_pfn); | 1073 | setup_real_mode(); |
1011 | max_pfn_mapped = init_memory_mapping( | ||
1012 | max((1UL<<32), start), end); | ||
1013 | } | ||
1014 | 1074 | ||
1015 | /* can we preseve max_low_pfn ?*/ | ||
1016 | max_low_pfn = max_pfn; | ||
1017 | } | ||
1018 | #endif | ||
1019 | memblock.current_limit = get_max_mapped(); | 1075 | memblock.current_limit = get_max_mapped(); |
1020 | dma_contiguous_reserve(0); | 1076 | dma_contiguous_reserve(0); |
1021 | 1077 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ecffca11f4e9..68bda7a84159 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -688,10 +688,19 @@ void __init early_trap_init(void) | |||
688 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); | 688 | set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); |
689 | /* int3 can be called from all */ | 689 | /* int3 can be called from all */ |
690 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); | 690 | set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); |
691 | #ifdef CONFIG_X86_32 | ||
691 | set_intr_gate(X86_TRAP_PF, &page_fault); | 692 | set_intr_gate(X86_TRAP_PF, &page_fault); |
693 | #endif | ||
692 | load_idt(&idt_descr); | 694 | load_idt(&idt_descr); |
693 | } | 695 | } |
694 | 696 | ||
697 | void __init early_trap_pf_init(void) | ||
698 | { | ||
699 | #ifdef CONFIG_X86_64 | ||
700 | set_intr_gate(X86_TRAP_PF, &page_fault); | ||
701 | #endif | ||
702 | } | ||
703 | |||
695 | void __init trap_init(void) | 704 | void __init trap_init(void) |
696 | { | 705 | { |
697 | int i; | 706 | int i; |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 1330dd102950..b014d9414d08 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy); | |||
59 | EXPORT_SYMBOL(__memcpy); | 59 | EXPORT_SYMBOL(__memcpy); |
60 | EXPORT_SYMBOL(memmove); | 60 | EXPORT_SYMBOL(memmove); |
61 | 61 | ||
62 | #ifndef CONFIG_DEBUG_VIRTUAL | ||
63 | EXPORT_SYMBOL(phys_base); | ||
64 | #endif | ||
62 | EXPORT_SYMBOL(empty_zero_page); | 65 | EXPORT_SYMBOL(empty_zero_page); |
63 | #ifndef CONFIG_PARAVIRT | 66 | #ifndef CONFIG_PARAVIRT |
64 | EXPORT_SYMBOL(native_load_gs_index); | 67 | EXPORT_SYMBOL(native_load_gs_index); |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d065d67c2672..45a14dbbddaf 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -63,10 +63,6 @@ struct x86_init_ops x86_init __initdata = { | |||
63 | .banner = default_banner, | 63 | .banner = default_banner, |
64 | }, | 64 | }, |
65 | 65 | ||
66 | .mapping = { | ||
67 | .pagetable_reserve = native_pagetable_reserve, | ||
68 | }, | ||
69 | |||
70 | .paging = { | 66 | .paging = { |
71 | .pagetable_init = native_pagetable_init, | 67 | .pagetable_init = native_pagetable_init, |
72 | }, | 68 | }, |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index df4176cdbb32..1cbd89ca5569 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3) | |||
552 | current_cr3 = cr3; | 552 | current_cr3 = cr3; |
553 | 553 | ||
554 | /* These two page tables are simple, linear, and used during boot */ | 554 | /* These two page tables are simple, linear, and used during boot */ |
555 | if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) | 555 | if (cr3 != __pa_symbol(swapper_pg_dir) && |
556 | cr3 != __pa_symbol(initial_page_table)) | ||
556 | cr3_changed = true; | 557 | cr3_changed = true; |
557 | } | 558 | } |
558 | 559 | ||
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 156b9c804670..a4512359656a 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S | |||
@@ -15,11 +15,10 @@ | |||
15 | * __get_user_X | 15 | * __get_user_X |
16 | * | 16 | * |
17 | * Inputs: %[r|e]ax contains the address. | 17 | * Inputs: %[r|e]ax contains the address. |
18 | * The register is modified, but all changes are undone | ||
19 | * before returning because the C code doesn't know about it. | ||
20 | * | 18 | * |
21 | * Outputs: %[r|e]ax is error code (0 or -EFAULT) | 19 | * Outputs: %[r|e]ax is error code (0 or -EFAULT) |
22 | * %[r|e]dx contains zero-extended value | 20 | * %[r|e]dx contains zero-extended value |
21 | * %ecx contains the high half for 32-bit __get_user_8 | ||
23 | * | 22 | * |
24 | * | 23 | * |
25 | * These functions should not modify any other registers, | 24 | * These functions should not modify any other registers, |
@@ -42,7 +41,7 @@ ENTRY(__get_user_1) | |||
42 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 41 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
43 | jae bad_get_user | 42 | jae bad_get_user |
44 | ASM_STAC | 43 | ASM_STAC |
45 | 1: movzb (%_ASM_AX),%edx | 44 | 1: movzbl (%_ASM_AX),%edx |
46 | xor %eax,%eax | 45 | xor %eax,%eax |
47 | ASM_CLAC | 46 | ASM_CLAC |
48 | ret | 47 | ret |
@@ -72,29 +71,42 @@ ENTRY(__get_user_4) | |||
72 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 71 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
73 | jae bad_get_user | 72 | jae bad_get_user |
74 | ASM_STAC | 73 | ASM_STAC |
75 | 3: mov -3(%_ASM_AX),%edx | 74 | 3: movl -3(%_ASM_AX),%edx |
76 | xor %eax,%eax | 75 | xor %eax,%eax |
77 | ASM_CLAC | 76 | ASM_CLAC |
78 | ret | 77 | ret |
79 | CFI_ENDPROC | 78 | CFI_ENDPROC |
80 | ENDPROC(__get_user_4) | 79 | ENDPROC(__get_user_4) |
81 | 80 | ||
82 | #ifdef CONFIG_X86_64 | ||
83 | ENTRY(__get_user_8) | 81 | ENTRY(__get_user_8) |
84 | CFI_STARTPROC | 82 | CFI_STARTPROC |
83 | #ifdef CONFIG_X86_64 | ||
85 | add $7,%_ASM_AX | 84 | add $7,%_ASM_AX |
86 | jc bad_get_user | 85 | jc bad_get_user |
87 | GET_THREAD_INFO(%_ASM_DX) | 86 | GET_THREAD_INFO(%_ASM_DX) |
88 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | 87 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
89 | jae bad_get_user | 88 | jae bad_get_user |
90 | ASM_STAC | 89 | ASM_STAC |
91 | 4: movq -7(%_ASM_AX),%_ASM_DX | 90 | 4: movq -7(%_ASM_AX),%rdx |
92 | xor %eax,%eax | 91 | xor %eax,%eax |
93 | ASM_CLAC | 92 | ASM_CLAC |
94 | ret | 93 | ret |
94 | #else | ||
95 | add $7,%_ASM_AX | ||
96 | jc bad_get_user_8 | ||
97 | GET_THREAD_INFO(%_ASM_DX) | ||
98 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX | ||
99 | jae bad_get_user_8 | ||
100 | ASM_STAC | ||
101 | 4: movl -7(%_ASM_AX),%edx | ||
102 | 5: movl -3(%_ASM_AX),%ecx | ||
103 | xor %eax,%eax | ||
104 | ASM_CLAC | ||
105 | ret | ||
106 | #endif | ||
95 | CFI_ENDPROC | 107 | CFI_ENDPROC |
96 | ENDPROC(__get_user_8) | 108 | ENDPROC(__get_user_8) |
97 | #endif | 109 | |
98 | 110 | ||
99 | bad_get_user: | 111 | bad_get_user: |
100 | CFI_STARTPROC | 112 | CFI_STARTPROC |
@@ -105,9 +117,24 @@ bad_get_user: | |||
105 | CFI_ENDPROC | 117 | CFI_ENDPROC |
106 | END(bad_get_user) | 118 | END(bad_get_user) |
107 | 119 | ||
120 | #ifdef CONFIG_X86_32 | ||
121 | bad_get_user_8: | ||
122 | CFI_STARTPROC | ||
123 | xor %edx,%edx | ||
124 | xor %ecx,%ecx | ||
125 | mov $(-EFAULT),%_ASM_AX | ||
126 | ASM_CLAC | ||
127 | ret | ||
128 | CFI_ENDPROC | ||
129 | END(bad_get_user_8) | ||
130 | #endif | ||
131 | |||
108 | _ASM_EXTABLE(1b,bad_get_user) | 132 | _ASM_EXTABLE(1b,bad_get_user) |
109 | _ASM_EXTABLE(2b,bad_get_user) | 133 | _ASM_EXTABLE(2b,bad_get_user) |
110 | _ASM_EXTABLE(3b,bad_get_user) | 134 | _ASM_EXTABLE(3b,bad_get_user) |
111 | #ifdef CONFIG_X86_64 | 135 | #ifdef CONFIG_X86_64 |
112 | _ASM_EXTABLE(4b,bad_get_user) | 136 | _ASM_EXTABLE(4b,bad_get_user) |
137 | #else | ||
138 | _ASM_EXTABLE(4b,bad_get_user_8) | ||
139 | _ASM_EXTABLE(5b,bad_get_user_8) | ||
113 | #endif | 140 | #endif |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d7aea41563b3..d41815265a0b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -17,86 +17,132 @@ | |||
17 | #include <asm/proto.h> | 17 | #include <asm/proto.h> |
18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ | 18 | #include <asm/dma.h> /* for MAX_DMA_PFN */ |
19 | 19 | ||
20 | unsigned long __initdata pgt_buf_start; | 20 | #include "mm_internal.h" |
21 | unsigned long __meminitdata pgt_buf_end; | ||
22 | unsigned long __meminitdata pgt_buf_top; | ||
23 | 21 | ||
24 | int after_bootmem; | 22 | static unsigned long __initdata pgt_buf_start; |
23 | static unsigned long __initdata pgt_buf_end; | ||
24 | static unsigned long __initdata pgt_buf_top; | ||
25 | 25 | ||
26 | int direct_gbpages | 26 | static unsigned long min_pfn_mapped; |
27 | #ifdef CONFIG_DIRECT_GBPAGES | ||
28 | = 1 | ||
29 | #endif | ||
30 | ; | ||
31 | 27 | ||
32 | struct map_range { | 28 | static bool __initdata can_use_brk_pgt = true; |
33 | unsigned long start; | ||
34 | unsigned long end; | ||
35 | unsigned page_size_mask; | ||
36 | }; | ||
37 | 29 | ||
38 | /* | 30 | /* |
39 | * First calculate space needed for kernel direct mapping page tables to cover | 31 | * Pages returned are already directly mapped. |
40 | * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB | 32 | * |
41 | * pages. Then find enough contiguous space for those page tables. | 33 | * Changing that is likely to break Xen, see commit: |
34 | * | ||
35 | * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve | ||
36 | * | ||
37 | * for detailed information. | ||
42 | */ | 38 | */ |
43 | static void __init find_early_table_space(struct map_range *mr, int nr_range) | 39 | __ref void *alloc_low_pages(unsigned int num) |
44 | { | 40 | { |
41 | unsigned long pfn; | ||
45 | int i; | 42 | int i; |
46 | unsigned long puds = 0, pmds = 0, ptes = 0, tables; | ||
47 | unsigned long start = 0, good_end; | ||
48 | phys_addr_t base; | ||
49 | 43 | ||
50 | for (i = 0; i < nr_range; i++) { | 44 | if (after_bootmem) { |
51 | unsigned long range, extra; | 45 | unsigned int order; |
52 | 46 | ||
53 | range = mr[i].end - mr[i].start; | 47 | order = get_order((unsigned long)num << PAGE_SHIFT); |
54 | puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; | 48 | return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | |
49 | __GFP_ZERO, order); | ||
50 | } | ||
55 | 51 | ||
56 | if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { | 52 | if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { |
57 | extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); | 53 | unsigned long ret; |
58 | pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; | 54 | if (min_pfn_mapped >= max_pfn_mapped) |
59 | } else { | 55 | panic("alloc_low_page: ran out of memory"); |
60 | pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; | 56 | ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, |
61 | } | 57 | max_pfn_mapped << PAGE_SHIFT, |
58 | PAGE_SIZE * num , PAGE_SIZE); | ||
59 | if (!ret) | ||
60 | panic("alloc_low_page: can not alloc memory"); | ||
61 | memblock_reserve(ret, PAGE_SIZE * num); | ||
62 | pfn = ret >> PAGE_SHIFT; | ||
63 | } else { | ||
64 | pfn = pgt_buf_end; | ||
65 | pgt_buf_end += num; | ||
66 | printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", | ||
67 | pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); | ||
68 | } | ||
62 | 69 | ||
63 | if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { | 70 | for (i = 0; i < num; i++) { |
64 | extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); | 71 | void *adr; |
65 | #ifdef CONFIG_X86_32 | 72 | |
66 | extra += PMD_SIZE; | 73 | adr = __va((pfn + i) << PAGE_SHIFT); |
67 | #endif | 74 | clear_page(adr); |
68 | ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
69 | } else { | ||
70 | ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
71 | } | ||
72 | } | 75 | } |
73 | 76 | ||
74 | tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); | 77 | return __va(pfn << PAGE_SHIFT); |
75 | tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); | 78 | } |
76 | tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); | ||
77 | 79 | ||
78 | #ifdef CONFIG_X86_32 | 80 | /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ |
79 | /* for fixmap */ | 81 | #define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) |
80 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | 82 | RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); |
81 | #endif | 83 | void __init early_alloc_pgt_buf(void) |
82 | good_end = max_pfn_mapped << PAGE_SHIFT; | 84 | { |
85 | unsigned long tables = INIT_PGT_BUF_SIZE; | ||
86 | phys_addr_t base; | ||
83 | 87 | ||
84 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); | 88 | base = __pa(extend_brk(tables, PAGE_SIZE)); |
85 | if (!base) | ||
86 | panic("Cannot find space for the kernel page tables"); | ||
87 | 89 | ||
88 | pgt_buf_start = base >> PAGE_SHIFT; | 90 | pgt_buf_start = base >> PAGE_SHIFT; |
89 | pgt_buf_end = pgt_buf_start; | 91 | pgt_buf_end = pgt_buf_start; |
90 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); | 92 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
93 | } | ||
94 | |||
95 | int after_bootmem; | ||
96 | |||
97 | int direct_gbpages | ||
98 | #ifdef CONFIG_DIRECT_GBPAGES | ||
99 | = 1 | ||
100 | #endif | ||
101 | ; | ||
91 | 102 | ||
92 | printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", | 103 | static void __init init_gbpages(void) |
93 | mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, | 104 | { |
94 | (pgt_buf_top << PAGE_SHIFT) - 1); | 105 | #ifdef CONFIG_X86_64 |
106 | if (direct_gbpages && cpu_has_gbpages) | ||
107 | printk(KERN_INFO "Using GB pages for direct mapping\n"); | ||
108 | else | ||
109 | direct_gbpages = 0; | ||
110 | #endif | ||
95 | } | 111 | } |
96 | 112 | ||
97 | void __init native_pagetable_reserve(u64 start, u64 end) | 113 | struct map_range { |
114 | unsigned long start; | ||
115 | unsigned long end; | ||
116 | unsigned page_size_mask; | ||
117 | }; | ||
118 | |||
119 | static int page_size_mask; | ||
120 | |||
121 | static void __init probe_page_size_mask(void) | ||
98 | { | 122 | { |
99 | memblock_reserve(start, end - start); | 123 | init_gbpages(); |
124 | |||
125 | #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) | ||
126 | /* | ||
127 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | ||
128 | * This will simplify cpa(), which otherwise needs to support splitting | ||
129 | * large pages into small in interrupt context, etc. | ||
130 | */ | ||
131 | if (direct_gbpages) | ||
132 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
133 | if (cpu_has_pse) | ||
134 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
135 | #endif | ||
136 | |||
137 | /* Enable PSE if available */ | ||
138 | if (cpu_has_pse) | ||
139 | set_in_cr4(X86_CR4_PSE); | ||
140 | |||
141 | /* Enable PGE if available */ | ||
142 | if (cpu_has_pge) { | ||
143 | set_in_cr4(X86_CR4_PGE); | ||
144 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
145 | } | ||
100 | } | 146 | } |
101 | 147 | ||
102 | #ifdef CONFIG_X86_32 | 148 | #ifdef CONFIG_X86_32 |
@@ -122,58 +168,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, | |||
122 | } | 168 | } |
123 | 169 | ||
124 | /* | 170 | /* |
125 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 171 | * adjust the page_size_mask for small range to go with |
126 | * This runs before bootmem is initialized and gets pages directly from | 172 | * big page size instead small one if nearby are ram too. |
127 | * the physical memory. To access them they are temporarily mapped. | ||
128 | */ | 173 | */ |
129 | unsigned long __init_refok init_memory_mapping(unsigned long start, | 174 | static void __init_refok adjust_range_page_size_mask(struct map_range *mr, |
130 | unsigned long end) | 175 | int nr_range) |
131 | { | 176 | { |
132 | unsigned long page_size_mask = 0; | 177 | int i; |
133 | unsigned long start_pfn, end_pfn; | ||
134 | unsigned long ret = 0; | ||
135 | unsigned long pos; | ||
136 | |||
137 | struct map_range mr[NR_RANGE_MR]; | ||
138 | int nr_range, i; | ||
139 | int use_pse, use_gbpages; | ||
140 | 178 | ||
141 | printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", | 179 | for (i = 0; i < nr_range; i++) { |
142 | start, end - 1); | 180 | if ((page_size_mask & (1<<PG_LEVEL_2M)) && |
181 | !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { | ||
182 | unsigned long start = round_down(mr[i].start, PMD_SIZE); | ||
183 | unsigned long end = round_up(mr[i].end, PMD_SIZE); | ||
143 | 184 | ||
144 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | 185 | #ifdef CONFIG_X86_32 |
145 | /* | 186 | if ((end >> PAGE_SHIFT) > max_low_pfn) |
146 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 187 | continue; |
147 | * This will simplify cpa(), which otherwise needs to support splitting | ||
148 | * large pages into small in interrupt context, etc. | ||
149 | */ | ||
150 | use_pse = use_gbpages = 0; | ||
151 | #else | ||
152 | use_pse = cpu_has_pse; | ||
153 | use_gbpages = direct_gbpages; | ||
154 | #endif | 188 | #endif |
155 | 189 | ||
156 | /* Enable PSE if available */ | 190 | if (memblock_is_region_memory(start, end - start)) |
157 | if (cpu_has_pse) | 191 | mr[i].page_size_mask |= 1<<PG_LEVEL_2M; |
158 | set_in_cr4(X86_CR4_PSE); | 192 | } |
193 | if ((page_size_mask & (1<<PG_LEVEL_1G)) && | ||
194 | !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { | ||
195 | unsigned long start = round_down(mr[i].start, PUD_SIZE); | ||
196 | unsigned long end = round_up(mr[i].end, PUD_SIZE); | ||
159 | 197 | ||
160 | /* Enable PGE if available */ | 198 | if (memblock_is_region_memory(start, end - start)) |
161 | if (cpu_has_pge) { | 199 | mr[i].page_size_mask |= 1<<PG_LEVEL_1G; |
162 | set_in_cr4(X86_CR4_PGE); | 200 | } |
163 | __supported_pte_mask |= _PAGE_GLOBAL; | ||
164 | } | 201 | } |
202 | } | ||
165 | 203 | ||
166 | if (use_gbpages) | 204 | static int __meminit split_mem_range(struct map_range *mr, int nr_range, |
167 | page_size_mask |= 1 << PG_LEVEL_1G; | 205 | unsigned long start, |
168 | if (use_pse) | 206 | unsigned long end) |
169 | page_size_mask |= 1 << PG_LEVEL_2M; | 207 | { |
208 | unsigned long start_pfn, end_pfn, limit_pfn; | ||
209 | unsigned long pfn; | ||
210 | int i; | ||
170 | 211 | ||
171 | memset(mr, 0, sizeof(mr)); | 212 | limit_pfn = PFN_DOWN(end); |
172 | nr_range = 0; | ||
173 | 213 | ||
174 | /* head if not big page alignment ? */ | 214 | /* head if not big page alignment ? */ |
175 | start_pfn = start >> PAGE_SHIFT; | 215 | pfn = start_pfn = PFN_DOWN(start); |
176 | pos = start_pfn << PAGE_SHIFT; | ||
177 | #ifdef CONFIG_X86_32 | 216 | #ifdef CONFIG_X86_32 |
178 | /* | 217 | /* |
179 | * Don't use a large page for the first 2/4MB of memory | 218 | * Don't use a large page for the first 2/4MB of memory |
@@ -181,66 +220,60 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
181 | * and overlapping MTRRs into large pages can cause | 220 | * and overlapping MTRRs into large pages can cause |
182 | * slowdowns. | 221 | * slowdowns. |
183 | */ | 222 | */ |
184 | if (pos == 0) | 223 | if (pfn == 0) |
185 | end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); | 224 | end_pfn = PFN_DOWN(PMD_SIZE); |
186 | else | 225 | else |
187 | end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 226 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
188 | << (PMD_SHIFT - PAGE_SHIFT); | ||
189 | #else /* CONFIG_X86_64 */ | 227 | #else /* CONFIG_X86_64 */ |
190 | end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) | 228 | end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
191 | << (PMD_SHIFT - PAGE_SHIFT); | ||
192 | #endif | 229 | #endif |
193 | if (end_pfn > (end >> PAGE_SHIFT)) | 230 | if (end_pfn > limit_pfn) |
194 | end_pfn = end >> PAGE_SHIFT; | 231 | end_pfn = limit_pfn; |
195 | if (start_pfn < end_pfn) { | 232 | if (start_pfn < end_pfn) { |
196 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 233 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
197 | pos = end_pfn << PAGE_SHIFT; | 234 | pfn = end_pfn; |
198 | } | 235 | } |
199 | 236 | ||
200 | /* big page (2M) range */ | 237 | /* big page (2M) range */ |
201 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 238 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
202 | << (PMD_SHIFT - PAGE_SHIFT); | ||
203 | #ifdef CONFIG_X86_32 | 239 | #ifdef CONFIG_X86_32 |
204 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | 240 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
205 | #else /* CONFIG_X86_64 */ | 241 | #else /* CONFIG_X86_64 */ |
206 | end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 242 | end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
207 | << (PUD_SHIFT - PAGE_SHIFT); | 243 | if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) |
208 | if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) | 244 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
209 | end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); | ||
210 | #endif | 245 | #endif |
211 | 246 | ||
212 | if (start_pfn < end_pfn) { | 247 | if (start_pfn < end_pfn) { |
213 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 248 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
214 | page_size_mask & (1<<PG_LEVEL_2M)); | 249 | page_size_mask & (1<<PG_LEVEL_2M)); |
215 | pos = end_pfn << PAGE_SHIFT; | 250 | pfn = end_pfn; |
216 | } | 251 | } |
217 | 252 | ||
218 | #ifdef CONFIG_X86_64 | 253 | #ifdef CONFIG_X86_64 |
219 | /* big page (1G) range */ | 254 | /* big page (1G) range */ |
220 | start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) | 255 | start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); |
221 | << (PUD_SHIFT - PAGE_SHIFT); | 256 | end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); |
222 | end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
223 | if (start_pfn < end_pfn) { | 257 | if (start_pfn < end_pfn) { |
224 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 258 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
225 | page_size_mask & | 259 | page_size_mask & |
226 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | 260 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); |
227 | pos = end_pfn << PAGE_SHIFT; | 261 | pfn = end_pfn; |
228 | } | 262 | } |
229 | 263 | ||
230 | /* tail is not big page (1G) alignment */ | 264 | /* tail is not big page (1G) alignment */ |
231 | start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) | 265 | start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); |
232 | << (PMD_SHIFT - PAGE_SHIFT); | 266 | end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); |
233 | end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
234 | if (start_pfn < end_pfn) { | 267 | if (start_pfn < end_pfn) { |
235 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | 268 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, |
236 | page_size_mask & (1<<PG_LEVEL_2M)); | 269 | page_size_mask & (1<<PG_LEVEL_2M)); |
237 | pos = end_pfn << PAGE_SHIFT; | 270 | pfn = end_pfn; |
238 | } | 271 | } |
239 | #endif | 272 | #endif |
240 | 273 | ||
241 | /* tail is not big page (2M) alignment */ | 274 | /* tail is not big page (2M) alignment */ |
242 | start_pfn = pos>>PAGE_SHIFT; | 275 | start_pfn = pfn; |
243 | end_pfn = end>>PAGE_SHIFT; | 276 | end_pfn = limit_pfn; |
244 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | 277 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); |
245 | 278 | ||
246 | /* try to merge same page size and continuous */ | 279 | /* try to merge same page size and continuous */ |
@@ -257,59 +290,169 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
257 | nr_range--; | 290 | nr_range--; |
258 | } | 291 | } |
259 | 292 | ||
293 | if (!after_bootmem) | ||
294 | adjust_range_page_size_mask(mr, nr_range); | ||
295 | |||
260 | for (i = 0; i < nr_range; i++) | 296 | for (i = 0; i < nr_range; i++) |
261 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", | 297 | printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", |
262 | mr[i].start, mr[i].end - 1, | 298 | mr[i].start, mr[i].end - 1, |
263 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | 299 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( |
264 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | 300 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); |
265 | 301 | ||
266 | /* | 302 | return nr_range; |
267 | * Find space for the kernel direct mapping tables. | 303 | } |
268 | * | 304 | |
269 | * Later we should allocate these tables in the local node of the | 305 | struct range pfn_mapped[E820_X_MAX]; |
270 | * memory mapped. Unfortunately this is done currently before the | 306 | int nr_pfn_mapped; |
271 | * nodes are discovered. | 307 | |
272 | */ | 308 | static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) |
273 | if (!after_bootmem) | 309 | { |
274 | find_early_table_space(mr, nr_range); | 310 | nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, |
311 | nr_pfn_mapped, start_pfn, end_pfn); | ||
312 | nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); | ||
313 | |||
314 | max_pfn_mapped = max(max_pfn_mapped, end_pfn); | ||
315 | |||
316 | if (start_pfn < (1UL<<(32-PAGE_SHIFT))) | ||
317 | max_low_pfn_mapped = max(max_low_pfn_mapped, | ||
318 | min(end_pfn, 1UL<<(32-PAGE_SHIFT))); | ||
319 | } | ||
320 | |||
321 | bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) | ||
322 | { | ||
323 | int i; | ||
324 | |||
325 | for (i = 0; i < nr_pfn_mapped; i++) | ||
326 | if ((start_pfn >= pfn_mapped[i].start) && | ||
327 | (end_pfn <= pfn_mapped[i].end)) | ||
328 | return true; | ||
329 | |||
330 | return false; | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
335 | * This runs before bootmem is initialized and gets pages directly from | ||
336 | * the physical memory. To access them they are temporarily mapped. | ||
337 | */ | ||
338 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
339 | unsigned long end) | ||
340 | { | ||
341 | struct map_range mr[NR_RANGE_MR]; | ||
342 | unsigned long ret = 0; | ||
343 | int nr_range, i; | ||
344 | |||
345 | pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", | ||
346 | start, end - 1); | ||
347 | |||
348 | memset(mr, 0, sizeof(mr)); | ||
349 | nr_range = split_mem_range(mr, 0, start, end); | ||
275 | 350 | ||
276 | for (i = 0; i < nr_range; i++) | 351 | for (i = 0; i < nr_range; i++) |
277 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, | 352 | ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, |
278 | mr[i].page_size_mask); | 353 | mr[i].page_size_mask); |
279 | 354 | ||
280 | #ifdef CONFIG_X86_32 | 355 | add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); |
281 | early_ioremap_page_table_range_init(); | ||
282 | 356 | ||
283 | load_cr3(swapper_pg_dir); | 357 | return ret >> PAGE_SHIFT; |
284 | #endif | 358 | } |
285 | 359 | ||
286 | __flush_tlb_all(); | 360 | /* |
361 | * would have hole in the middle or ends, and only ram parts will be mapped. | ||
362 | */ | ||
363 | static unsigned long __init init_range_memory_mapping( | ||
364 | unsigned long r_start, | ||
365 | unsigned long r_end) | ||
366 | { | ||
367 | unsigned long start_pfn, end_pfn; | ||
368 | unsigned long mapped_ram_size = 0; | ||
369 | int i; | ||
287 | 370 | ||
288 | /* | 371 | for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { |
289 | * Reserve the kernel pagetable pages we used (pgt_buf_start - | 372 | u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); |
290 | * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) | 373 | u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); |
291 | * so that they can be reused for other purposes. | 374 | if (start >= end) |
292 | * | 375 | continue; |
293 | * On native it just means calling memblock_reserve, on Xen it also | ||
294 | * means marking RW the pagetable pages that we allocated before | ||
295 | * but that haven't been used. | ||
296 | * | ||
297 | * In fact on xen we mark RO the whole range pgt_buf_start - | ||
298 | * pgt_buf_top, because we have to make sure that when | ||
299 | * init_memory_mapping reaches the pagetable pages area, it maps | ||
300 | * RO all the pagetable pages, including the ones that are beyond | ||
301 | * pgt_buf_end at that time. | ||
302 | */ | ||
303 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) | ||
304 | x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), | ||
305 | PFN_PHYS(pgt_buf_end)); | ||
306 | 376 | ||
307 | if (!after_bootmem) | 377 | /* |
308 | early_memtest(start, end); | 378 | * if it is overlapping with brk pgt, we need to |
379 | * alloc pgt buf from memblock instead. | ||
380 | */ | ||
381 | can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= | ||
382 | min(end, (u64)pgt_buf_top<<PAGE_SHIFT); | ||
383 | init_memory_mapping(start, end); | ||
384 | mapped_ram_size += end - start; | ||
385 | can_use_brk_pgt = true; | ||
386 | } | ||
309 | 387 | ||
310 | return ret >> PAGE_SHIFT; | 388 | return mapped_ram_size; |
311 | } | 389 | } |
312 | 390 | ||
391 | /* (PUD_SHIFT-PMD_SHIFT)/2 */ | ||
392 | #define STEP_SIZE_SHIFT 5 | ||
393 | void __init init_mem_mapping(void) | ||
394 | { | ||
395 | unsigned long end, real_end, start, last_start; | ||
396 | unsigned long step_size; | ||
397 | unsigned long addr; | ||
398 | unsigned long mapped_ram_size = 0; | ||
399 | unsigned long new_mapped_ram_size; | ||
400 | |||
401 | probe_page_size_mask(); | ||
402 | |||
403 | #ifdef CONFIG_X86_64 | ||
404 | end = max_pfn << PAGE_SHIFT; | ||
405 | #else | ||
406 | end = max_low_pfn << PAGE_SHIFT; | ||
407 | #endif | ||
408 | |||
409 | /* the ISA range is always mapped regardless of memory holes */ | ||
410 | init_memory_mapping(0, ISA_END_ADDRESS); | ||
411 | |||
412 | /* xen has big range in reserved near end of ram, skip it at first */ | ||
413 | addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, | ||
414 | PAGE_SIZE); | ||
415 | real_end = addr + PMD_SIZE; | ||
416 | |||
417 | /* step_size need to be small so pgt_buf from BRK could cover it */ | ||
418 | step_size = PMD_SIZE; | ||
419 | max_pfn_mapped = 0; /* will get exact value next */ | ||
420 | min_pfn_mapped = real_end >> PAGE_SHIFT; | ||
421 | last_start = start = real_end; | ||
422 | while (last_start > ISA_END_ADDRESS) { | ||
423 | if (last_start > step_size) { | ||
424 | start = round_down(last_start - 1, step_size); | ||
425 | if (start < ISA_END_ADDRESS) | ||
426 | start = ISA_END_ADDRESS; | ||
427 | } else | ||
428 | start = ISA_END_ADDRESS; | ||
429 | new_mapped_ram_size = init_range_memory_mapping(start, | ||
430 | last_start); | ||
431 | last_start = start; | ||
432 | min_pfn_mapped = last_start >> PAGE_SHIFT; | ||
433 | /* only increase step_size after big range get mapped */ | ||
434 | if (new_mapped_ram_size > mapped_ram_size) | ||
435 | step_size <<= STEP_SIZE_SHIFT; | ||
436 | mapped_ram_size += new_mapped_ram_size; | ||
437 | } | ||
438 | |||
439 | if (real_end < end) | ||
440 | init_range_memory_mapping(real_end, end); | ||
441 | |||
442 | #ifdef CONFIG_X86_64 | ||
443 | if (max_pfn > max_low_pfn) { | ||
444 | /* can we preseve max_low_pfn ?*/ | ||
445 | max_low_pfn = max_pfn; | ||
446 | } | ||
447 | #else | ||
448 | early_ioremap_page_table_range_init(); | ||
449 | #endif | ||
450 | |||
451 | load_cr3(swapper_pg_dir); | ||
452 | __flush_tlb_all(); | ||
453 | |||
454 | early_memtest(0, max_pfn_mapped << PAGE_SHIFT); | ||
455 | } | ||
313 | 456 | ||
314 | /* | 457 | /* |
315 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address | 458 | * devmem_is_allowed() checks to see if /dev/mem access to a certain address |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 745d66b843c8..b299724f6e34 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -53,25 +53,14 @@ | |||
53 | #include <asm/page_types.h> | 53 | #include <asm/page_types.h> |
54 | #include <asm/init.h> | 54 | #include <asm/init.h> |
55 | 55 | ||
56 | #include "mm_internal.h" | ||
57 | |||
56 | unsigned long highstart_pfn, highend_pfn; | 58 | unsigned long highstart_pfn, highend_pfn; |
57 | 59 | ||
58 | static noinline int do_test_wp_bit(void); | 60 | static noinline int do_test_wp_bit(void); |
59 | 61 | ||
60 | bool __read_mostly __vmalloc_start_set = false; | 62 | bool __read_mostly __vmalloc_start_set = false; |
61 | 63 | ||
62 | static __init void *alloc_low_page(void) | ||
63 | { | ||
64 | unsigned long pfn = pgt_buf_end++; | ||
65 | void *adr; | ||
66 | |||
67 | if (pfn >= pgt_buf_top) | ||
68 | panic("alloc_low_page: ran out of memory"); | ||
69 | |||
70 | adr = __va(pfn * PAGE_SIZE); | ||
71 | clear_page(adr); | ||
72 | return adr; | ||
73 | } | ||
74 | |||
75 | /* | 64 | /* |
76 | * Creates a middle page table and puts a pointer to it in the | 65 | * Creates a middle page table and puts a pointer to it in the |
77 | * given global directory entry. This only returns the gd entry | 66 | * given global directory entry. This only returns the gd entry |
@@ -84,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
84 | 73 | ||
85 | #ifdef CONFIG_X86_PAE | 74 | #ifdef CONFIG_X86_PAE |
86 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 75 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
87 | if (after_bootmem) | 76 | pmd_table = (pmd_t *)alloc_low_page(); |
88 | pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
89 | else | ||
90 | pmd_table = (pmd_t *)alloc_low_page(); | ||
91 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 77 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
92 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 78 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
93 | pud = pud_offset(pgd, 0); | 79 | pud = pud_offset(pgd, 0); |
@@ -109,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
109 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 95 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
110 | { | 96 | { |
111 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { | 97 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
112 | pte_t *page_table = NULL; | 98 | pte_t *page_table = (pte_t *)alloc_low_page(); |
113 | |||
114 | if (after_bootmem) { | ||
115 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) | ||
116 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | ||
117 | #endif | ||
118 | if (!page_table) | ||
119 | page_table = | ||
120 | (pte_t *)alloc_bootmem_pages(PAGE_SIZE); | ||
121 | } else | ||
122 | page_table = (pte_t *)alloc_low_page(); | ||
123 | 99 | ||
124 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 100 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
125 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 101 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
@@ -146,8 +122,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr) | |||
146 | return one_page_table_init(pmd) + pte_idx; | 122 | return one_page_table_init(pmd) + pte_idx; |
147 | } | 123 | } |
148 | 124 | ||
125 | static unsigned long __init | ||
126 | page_table_range_init_count(unsigned long start, unsigned long end) | ||
127 | { | ||
128 | unsigned long count = 0; | ||
129 | #ifdef CONFIG_HIGHMEM | ||
130 | int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; | ||
131 | int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; | ||
132 | int pgd_idx, pmd_idx; | ||
133 | unsigned long vaddr; | ||
134 | |||
135 | if (pmd_idx_kmap_begin == pmd_idx_kmap_end) | ||
136 | return 0; | ||
137 | |||
138 | vaddr = start; | ||
139 | pgd_idx = pgd_index(vaddr); | ||
140 | |||
141 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { | ||
142 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | ||
143 | pmd_idx++) { | ||
144 | if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && | ||
145 | (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) | ||
146 | count++; | ||
147 | vaddr += PMD_SIZE; | ||
148 | } | ||
149 | pmd_idx = 0; | ||
150 | } | ||
151 | #endif | ||
152 | return count; | ||
153 | } | ||
154 | |||
149 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | 155 | static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, |
150 | unsigned long vaddr, pte_t *lastpte) | 156 | unsigned long vaddr, pte_t *lastpte, |
157 | void **adr) | ||
151 | { | 158 | { |
152 | #ifdef CONFIG_HIGHMEM | 159 | #ifdef CONFIG_HIGHMEM |
153 | /* | 160 | /* |
@@ -161,16 +168,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
161 | 168 | ||
162 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 169 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
163 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 170 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
164 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 171 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) { |
165 | && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start | ||
166 | || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { | ||
167 | pte_t *newpte; | 172 | pte_t *newpte; |
168 | int i; | 173 | int i; |
169 | 174 | ||
170 | BUG_ON(after_bootmem); | 175 | BUG_ON(after_bootmem); |
171 | newpte = alloc_low_page(); | 176 | newpte = *adr; |
172 | for (i = 0; i < PTRS_PER_PTE; i++) | 177 | for (i = 0; i < PTRS_PER_PTE; i++) |
173 | set_pte(newpte + i, pte[i]); | 178 | set_pte(newpte + i, pte[i]); |
179 | *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); | ||
174 | 180 | ||
175 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); | 181 | paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); |
176 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); | 182 | set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); |
@@ -204,6 +210,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
204 | pgd_t *pgd; | 210 | pgd_t *pgd; |
205 | pmd_t *pmd; | 211 | pmd_t *pmd; |
206 | pte_t *pte = NULL; | 212 | pte_t *pte = NULL; |
213 | unsigned long count = page_table_range_init_count(start, end); | ||
214 | void *adr = NULL; | ||
215 | |||
216 | if (count) | ||
217 | adr = alloc_low_pages(count); | ||
207 | 218 | ||
208 | vaddr = start; | 219 | vaddr = start; |
209 | pgd_idx = pgd_index(vaddr); | 220 | pgd_idx = pgd_index(vaddr); |
@@ -216,7 +227,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) | |||
216 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); | 227 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); |
217 | pmd++, pmd_idx++) { | 228 | pmd++, pmd_idx++) { |
218 | pte = page_table_kmap_check(one_page_table_init(pmd), | 229 | pte = page_table_kmap_check(one_page_table_init(pmd), |
219 | pmd, vaddr, pte); | 230 | pmd, vaddr, pte, &adr); |
220 | 231 | ||
221 | vaddr += PMD_SIZE; | 232 | vaddr += PMD_SIZE; |
222 | } | 233 | } |
@@ -310,6 +321,7 @@ repeat: | |||
310 | __pgprot(PTE_IDENT_ATTR | | 321 | __pgprot(PTE_IDENT_ATTR | |
311 | _PAGE_PSE); | 322 | _PAGE_PSE); |
312 | 323 | ||
324 | pfn &= PMD_MASK >> PAGE_SHIFT; | ||
313 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + | 325 | addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + |
314 | PAGE_OFFSET + PAGE_SIZE-1; | 326 | PAGE_OFFSET + PAGE_SIZE-1; |
315 | 327 | ||
@@ -455,9 +467,14 @@ void __init native_pagetable_init(void) | |||
455 | 467 | ||
456 | /* | 468 | /* |
457 | * Remove any mappings which extend past the end of physical | 469 | * Remove any mappings which extend past the end of physical |
458 | * memory from the boot time page table: | 470 | * memory from the boot time page table. |
471 | * In virtual address space, we should have at least two pages | ||
472 | * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END | ||
473 | * definition. And max_low_pfn is set to VMALLOC_END physical | ||
474 | * address. If initial memory mapping is doing right job, we | ||
475 | * should have pte used near max_low_pfn or one pmd is not present. | ||
459 | */ | 476 | */ |
460 | for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { | 477 | for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) { |
461 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); | 478 | va = PAGE_OFFSET + (pfn<<PAGE_SHIFT); |
462 | pgd = base + pgd_index(va); | 479 | pgd = base + pgd_index(va); |
463 | if (!pgd_present(*pgd)) | 480 | if (!pgd_present(*pgd)) |
@@ -468,10 +485,19 @@ void __init native_pagetable_init(void) | |||
468 | if (!pmd_present(*pmd)) | 485 | if (!pmd_present(*pmd)) |
469 | break; | 486 | break; |
470 | 487 | ||
488 | /* should not be large page here */ | ||
489 | if (pmd_large(*pmd)) { | ||
490 | pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n", | ||
491 | pfn, pmd, __pa(pmd)); | ||
492 | BUG_ON(1); | ||
493 | } | ||
494 | |||
471 | pte = pte_offset_kernel(pmd, va); | 495 | pte = pte_offset_kernel(pmd, va); |
472 | if (!pte_present(*pte)) | 496 | if (!pte_present(*pte)) |
473 | break; | 497 | break; |
474 | 498 | ||
499 | printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n", | ||
500 | pfn, pmd, __pa(pmd), pte, __pa(pte)); | ||
475 | pte_clear(NULL, va, pte); | 501 | pte_clear(NULL, va, pte); |
476 | } | 502 | } |
477 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); | 503 | paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); |
@@ -550,7 +576,7 @@ early_param("highmem", parse_highmem); | |||
550 | * artificially via the highmem=x boot parameter then create | 576 | * artificially via the highmem=x boot parameter then create |
551 | * it: | 577 | * it: |
552 | */ | 578 | */ |
553 | void __init lowmem_pfn_init(void) | 579 | static void __init lowmem_pfn_init(void) |
554 | { | 580 | { |
555 | /* max_low_pfn is 0, we already have early_res support */ | 581 | /* max_low_pfn is 0, we already have early_res support */ |
556 | max_low_pfn = max_pfn; | 582 | max_low_pfn = max_pfn; |
@@ -586,7 +612,7 @@ void __init lowmem_pfn_init(void) | |||
586 | * We have more RAM than fits into lowmem - we try to put it into | 612 | * We have more RAM than fits into lowmem - we try to put it into |
587 | * highmem, also taking the highmem=x boot parameter into account: | 613 | * highmem, also taking the highmem=x boot parameter into account: |
588 | */ | 614 | */ |
589 | void __init highmem_pfn_init(void) | 615 | static void __init highmem_pfn_init(void) |
590 | { | 616 | { |
591 | max_low_pfn = MAXMEM_PFN; | 617 | max_low_pfn = MAXMEM_PFN; |
592 | 618 | ||
@@ -669,8 +695,6 @@ void __init setup_bootmem_allocator(void) | |||
669 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 695 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
670 | max_pfn_mapped<<PAGE_SHIFT); | 696 | max_pfn_mapped<<PAGE_SHIFT); |
671 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 697 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
672 | |||
673 | after_bootmem = 1; | ||
674 | } | 698 | } |
675 | 699 | ||
676 | /* | 700 | /* |
@@ -753,6 +777,8 @@ void __init mem_init(void) | |||
753 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) | 777 | if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) |
754 | reservedpages++; | 778 | reservedpages++; |
755 | 779 | ||
780 | after_bootmem = 1; | ||
781 | |||
756 | codesize = (unsigned long) &_etext - (unsigned long) &_text; | 782 | codesize = (unsigned long) &_etext - (unsigned long) &_text; |
757 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; | 783 | datasize = (unsigned long) &_edata - (unsigned long) &_etext; |
758 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; | 784 | initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d6eeead43758..3eba7f429880 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -54,6 +54,82 @@ | |||
54 | #include <asm/uv/uv.h> | 54 | #include <asm/uv/uv.h> |
55 | #include <asm/setup.h> | 55 | #include <asm/setup.h> |
56 | 56 | ||
57 | #include "mm_internal.h" | ||
58 | |||
59 | static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, | ||
60 | unsigned long addr, unsigned long end) | ||
61 | { | ||
62 | addr &= PMD_MASK; | ||
63 | for (; addr < end; addr += PMD_SIZE) { | ||
64 | pmd_t *pmd = pmd_page + pmd_index(addr); | ||
65 | |||
66 | if (!pmd_present(*pmd)) | ||
67 | set_pmd(pmd, __pmd(addr | pmd_flag)); | ||
68 | } | ||
69 | } | ||
70 | static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, | ||
71 | unsigned long addr, unsigned long end) | ||
72 | { | ||
73 | unsigned long next; | ||
74 | |||
75 | for (; addr < end; addr = next) { | ||
76 | pud_t *pud = pud_page + pud_index(addr); | ||
77 | pmd_t *pmd; | ||
78 | |||
79 | next = (addr & PUD_MASK) + PUD_SIZE; | ||
80 | if (next > end) | ||
81 | next = end; | ||
82 | |||
83 | if (pud_present(*pud)) { | ||
84 | pmd = pmd_offset(pud, 0); | ||
85 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
86 | continue; | ||
87 | } | ||
88 | pmd = (pmd_t *)info->alloc_pgt_page(info->context); | ||
89 | if (!pmd) | ||
90 | return -ENOMEM; | ||
91 | ident_pmd_init(info->pmd_flag, pmd, addr, next); | ||
92 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
93 | } | ||
94 | |||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | ||
99 | unsigned long addr, unsigned long end) | ||
100 | { | ||
101 | unsigned long next; | ||
102 | int result; | ||
103 | int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; | ||
104 | |||
105 | for (; addr < end; addr = next) { | ||
106 | pgd_t *pgd = pgd_page + pgd_index(addr) + off; | ||
107 | pud_t *pud; | ||
108 | |||
109 | next = (addr & PGDIR_MASK) + PGDIR_SIZE; | ||
110 | if (next > end) | ||
111 | next = end; | ||
112 | |||
113 | if (pgd_present(*pgd)) { | ||
114 | pud = pud_offset(pgd, 0); | ||
115 | result = ident_pud_init(info, pud, addr, next); | ||
116 | if (result) | ||
117 | return result; | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | pud = (pud_t *)info->alloc_pgt_page(info->context); | ||
122 | if (!pud) | ||
123 | return -ENOMEM; | ||
124 | result = ident_pud_init(info, pud, addr, next); | ||
125 | if (result) | ||
126 | return result; | ||
127 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
128 | } | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
57 | static int __init parse_direct_gbpages_off(char *arg) | 133 | static int __init parse_direct_gbpages_off(char *arg) |
58 | { | 134 | { |
59 | direct_gbpages = 0; | 135 | direct_gbpages = 0; |
@@ -302,10 +378,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |||
302 | void __init cleanup_highmap(void) | 378 | void __init cleanup_highmap(void) |
303 | { | 379 | { |
304 | unsigned long vaddr = __START_KERNEL_map; | 380 | unsigned long vaddr = __START_KERNEL_map; |
305 | unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | 381 | unsigned long vaddr_end = __START_KERNEL_map + KERNEL_IMAGE_SIZE; |
306 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | 382 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; |
307 | pmd_t *pmd = level2_kernel_pgt; | 383 | pmd_t *pmd = level2_kernel_pgt; |
308 | 384 | ||
385 | /* | ||
386 | * Native path, max_pfn_mapped is not set yet. | ||
387 | * Xen has valid max_pfn_mapped set in | ||
388 | * arch/x86/xen/mmu.c:xen_setup_kernel_pagetable(). | ||
389 | */ | ||
390 | if (max_pfn_mapped) | ||
391 | vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); | ||
392 | |||
309 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { | 393 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { |
310 | if (pmd_none(*pmd)) | 394 | if (pmd_none(*pmd)) |
311 | continue; | 395 | continue; |
@@ -314,69 +398,24 @@ void __init cleanup_highmap(void) | |||
314 | } | 398 | } |
315 | } | 399 | } |
316 | 400 | ||
317 | static __ref void *alloc_low_page(unsigned long *phys) | ||
318 | { | ||
319 | unsigned long pfn = pgt_buf_end++; | ||
320 | void *adr; | ||
321 | |||
322 | if (after_bootmem) { | ||
323 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); | ||
324 | *phys = __pa(adr); | ||
325 | |||
326 | return adr; | ||
327 | } | ||
328 | |||
329 | if (pfn >= pgt_buf_top) | ||
330 | panic("alloc_low_page: ran out of memory"); | ||
331 | |||
332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | ||
333 | clear_page(adr); | ||
334 | *phys = pfn * PAGE_SIZE; | ||
335 | return adr; | ||
336 | } | ||
337 | |||
338 | static __ref void *map_low_page(void *virt) | ||
339 | { | ||
340 | void *adr; | ||
341 | unsigned long phys, left; | ||
342 | |||
343 | if (after_bootmem) | ||
344 | return virt; | ||
345 | |||
346 | phys = __pa(virt); | ||
347 | left = phys & (PAGE_SIZE - 1); | ||
348 | adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); | ||
349 | adr = (void *)(((unsigned long)adr) | left); | ||
350 | |||
351 | return adr; | ||
352 | } | ||
353 | |||
354 | static __ref void unmap_low_page(void *adr) | ||
355 | { | ||
356 | if (after_bootmem) | ||
357 | return; | ||
358 | |||
359 | early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); | ||
360 | } | ||
361 | |||
362 | static unsigned long __meminit | 401 | static unsigned long __meminit |
363 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | 402 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, |
364 | pgprot_t prot) | 403 | pgprot_t prot) |
365 | { | 404 | { |
366 | unsigned pages = 0; | 405 | unsigned long pages = 0, next; |
367 | unsigned long last_map_addr = end; | 406 | unsigned long last_map_addr = end; |
368 | int i; | 407 | int i; |
369 | 408 | ||
370 | pte_t *pte = pte_page + pte_index(addr); | 409 | pte_t *pte = pte_page + pte_index(addr); |
371 | 410 | ||
372 | for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { | 411 | for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { |
373 | 412 | next = (addr & PAGE_MASK) + PAGE_SIZE; | |
374 | if (addr >= end) { | 413 | if (addr >= end) { |
375 | if (!after_bootmem) { | 414 | if (!after_bootmem && |
376 | for(; i < PTRS_PER_PTE; i++, pte++) | 415 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && |
377 | set_pte(pte, __pte(0)); | 416 | !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) |
378 | } | 417 | set_pte(pte, __pte(0)); |
379 | break; | 418 | continue; |
380 | } | 419 | } |
381 | 420 | ||
382 | /* | 421 | /* |
@@ -414,28 +453,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
414 | int i = pmd_index(address); | 453 | int i = pmd_index(address); |
415 | 454 | ||
416 | for (; i < PTRS_PER_PMD; i++, address = next) { | 455 | for (; i < PTRS_PER_PMD; i++, address = next) { |
417 | unsigned long pte_phys; | ||
418 | pmd_t *pmd = pmd_page + pmd_index(address); | 456 | pmd_t *pmd = pmd_page + pmd_index(address); |
419 | pte_t *pte; | 457 | pte_t *pte; |
420 | pgprot_t new_prot = prot; | 458 | pgprot_t new_prot = prot; |
421 | 459 | ||
460 | next = (address & PMD_MASK) + PMD_SIZE; | ||
422 | if (address >= end) { | 461 | if (address >= end) { |
423 | if (!after_bootmem) { | 462 | if (!after_bootmem && |
424 | for (; i < PTRS_PER_PMD; i++, pmd++) | 463 | !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && |
425 | set_pmd(pmd, __pmd(0)); | 464 | !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) |
426 | } | 465 | set_pmd(pmd, __pmd(0)); |
427 | break; | 466 | continue; |
428 | } | 467 | } |
429 | 468 | ||
430 | next = (address & PMD_MASK) + PMD_SIZE; | ||
431 | |||
432 | if (pmd_val(*pmd)) { | 469 | if (pmd_val(*pmd)) { |
433 | if (!pmd_large(*pmd)) { | 470 | if (!pmd_large(*pmd)) { |
434 | spin_lock(&init_mm.page_table_lock); | 471 | spin_lock(&init_mm.page_table_lock); |
435 | pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); | 472 | pte = (pte_t *)pmd_page_vaddr(*pmd); |
436 | last_map_addr = phys_pte_init(pte, address, | 473 | last_map_addr = phys_pte_init(pte, address, |
437 | end, prot); | 474 | end, prot); |
438 | unmap_low_page(pte); | ||
439 | spin_unlock(&init_mm.page_table_lock); | 475 | spin_unlock(&init_mm.page_table_lock); |
440 | continue; | 476 | continue; |
441 | } | 477 | } |
@@ -464,19 +500,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
464 | pages++; | 500 | pages++; |
465 | spin_lock(&init_mm.page_table_lock); | 501 | spin_lock(&init_mm.page_table_lock); |
466 | set_pte((pte_t *)pmd, | 502 | set_pte((pte_t *)pmd, |
467 | pfn_pte(address >> PAGE_SHIFT, | 503 | pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, |
468 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); | 504 | __pgprot(pgprot_val(prot) | _PAGE_PSE))); |
469 | spin_unlock(&init_mm.page_table_lock); | 505 | spin_unlock(&init_mm.page_table_lock); |
470 | last_map_addr = next; | 506 | last_map_addr = next; |
471 | continue; | 507 | continue; |
472 | } | 508 | } |
473 | 509 | ||
474 | pte = alloc_low_page(&pte_phys); | 510 | pte = alloc_low_page(); |
475 | last_map_addr = phys_pte_init(pte, address, end, new_prot); | 511 | last_map_addr = phys_pte_init(pte, address, end, new_prot); |
476 | unmap_low_page(pte); | ||
477 | 512 | ||
478 | spin_lock(&init_mm.page_table_lock); | 513 | spin_lock(&init_mm.page_table_lock); |
479 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 514 | pmd_populate_kernel(&init_mm, pmd, pte); |
480 | spin_unlock(&init_mm.page_table_lock); | 515 | spin_unlock(&init_mm.page_table_lock); |
481 | } | 516 | } |
482 | update_page_count(PG_LEVEL_2M, pages); | 517 | update_page_count(PG_LEVEL_2M, pages); |
@@ -492,27 +527,24 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
492 | int i = pud_index(addr); | 527 | int i = pud_index(addr); |
493 | 528 | ||
494 | for (; i < PTRS_PER_PUD; i++, addr = next) { | 529 | for (; i < PTRS_PER_PUD; i++, addr = next) { |
495 | unsigned long pmd_phys; | ||
496 | pud_t *pud = pud_page + pud_index(addr); | 530 | pud_t *pud = pud_page + pud_index(addr); |
497 | pmd_t *pmd; | 531 | pmd_t *pmd; |
498 | pgprot_t prot = PAGE_KERNEL; | 532 | pgprot_t prot = PAGE_KERNEL; |
499 | 533 | ||
500 | if (addr >= end) | ||
501 | break; | ||
502 | |||
503 | next = (addr & PUD_MASK) + PUD_SIZE; | 534 | next = (addr & PUD_MASK) + PUD_SIZE; |
504 | 535 | if (addr >= end) { | |
505 | if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { | 536 | if (!after_bootmem && |
506 | set_pud(pud, __pud(0)); | 537 | !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && |
538 | !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) | ||
539 | set_pud(pud, __pud(0)); | ||
507 | continue; | 540 | continue; |
508 | } | 541 | } |
509 | 542 | ||
510 | if (pud_val(*pud)) { | 543 | if (pud_val(*pud)) { |
511 | if (!pud_large(*pud)) { | 544 | if (!pud_large(*pud)) { |
512 | pmd = map_low_page(pmd_offset(pud, 0)); | 545 | pmd = pmd_offset(pud, 0); |
513 | last_map_addr = phys_pmd_init(pmd, addr, end, | 546 | last_map_addr = phys_pmd_init(pmd, addr, end, |
514 | page_size_mask, prot); | 547 | page_size_mask, prot); |
515 | unmap_low_page(pmd); | ||
516 | __flush_tlb_all(); | 548 | __flush_tlb_all(); |
517 | continue; | 549 | continue; |
518 | } | 550 | } |
@@ -541,19 +573,19 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
541 | pages++; | 573 | pages++; |
542 | spin_lock(&init_mm.page_table_lock); | 574 | spin_lock(&init_mm.page_table_lock); |
543 | set_pte((pte_t *)pud, | 575 | set_pte((pte_t *)pud, |
544 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 576 | pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, |
577 | PAGE_KERNEL_LARGE)); | ||
545 | spin_unlock(&init_mm.page_table_lock); | 578 | spin_unlock(&init_mm.page_table_lock); |
546 | last_map_addr = next; | 579 | last_map_addr = next; |
547 | continue; | 580 | continue; |
548 | } | 581 | } |
549 | 582 | ||
550 | pmd = alloc_low_page(&pmd_phys); | 583 | pmd = alloc_low_page(); |
551 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, | 584 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, |
552 | prot); | 585 | prot); |
553 | unmap_low_page(pmd); | ||
554 | 586 | ||
555 | spin_lock(&init_mm.page_table_lock); | 587 | spin_lock(&init_mm.page_table_lock); |
556 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 588 | pud_populate(&init_mm, pud, pmd); |
557 | spin_unlock(&init_mm.page_table_lock); | 589 | spin_unlock(&init_mm.page_table_lock); |
558 | } | 590 | } |
559 | __flush_tlb_all(); | 591 | __flush_tlb_all(); |
@@ -578,28 +610,23 @@ kernel_physical_mapping_init(unsigned long start, | |||
578 | 610 | ||
579 | for (; start < end; start = next) { | 611 | for (; start < end; start = next) { |
580 | pgd_t *pgd = pgd_offset_k(start); | 612 | pgd_t *pgd = pgd_offset_k(start); |
581 | unsigned long pud_phys; | ||
582 | pud_t *pud; | 613 | pud_t *pud; |
583 | 614 | ||
584 | next = (start + PGDIR_SIZE) & PGDIR_MASK; | 615 | next = (start & PGDIR_MASK) + PGDIR_SIZE; |
585 | if (next > end) | ||
586 | next = end; | ||
587 | 616 | ||
588 | if (pgd_val(*pgd)) { | 617 | if (pgd_val(*pgd)) { |
589 | pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); | 618 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
590 | last_map_addr = phys_pud_init(pud, __pa(start), | 619 | last_map_addr = phys_pud_init(pud, __pa(start), |
591 | __pa(end), page_size_mask); | 620 | __pa(end), page_size_mask); |
592 | unmap_low_page(pud); | ||
593 | continue; | 621 | continue; |
594 | } | 622 | } |
595 | 623 | ||
596 | pud = alloc_low_page(&pud_phys); | 624 | pud = alloc_low_page(); |
597 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 625 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), |
598 | page_size_mask); | 626 | page_size_mask); |
599 | unmap_low_page(pud); | ||
600 | 627 | ||
601 | spin_lock(&init_mm.page_table_lock); | 628 | spin_lock(&init_mm.page_table_lock); |
602 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | 629 | pgd_populate(&init_mm, pgd, pud); |
603 | spin_unlock(&init_mm.page_table_lock); | 630 | spin_unlock(&init_mm.page_table_lock); |
604 | pgd_changed = true; | 631 | pgd_changed = true; |
605 | } | 632 | } |
@@ -664,13 +691,11 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
664 | { | 691 | { |
665 | struct pglist_data *pgdat = NODE_DATA(nid); | 692 | struct pglist_data *pgdat = NODE_DATA(nid); |
666 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; | 693 | struct zone *zone = pgdat->node_zones + ZONE_NORMAL; |
667 | unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; | 694 | unsigned long start_pfn = start >> PAGE_SHIFT; |
668 | unsigned long nr_pages = size >> PAGE_SHIFT; | 695 | unsigned long nr_pages = size >> PAGE_SHIFT; |
669 | int ret; | 696 | int ret; |
670 | 697 | ||
671 | last_mapped_pfn = init_memory_mapping(start, start + size); | 698 | init_memory_mapping(start, start + size); |
672 | if (last_mapped_pfn > max_pfn_mapped) | ||
673 | max_pfn_mapped = last_mapped_pfn; | ||
674 | 699 | ||
675 | ret = __add_pages(nid, zone, start_pfn, nr_pages); | 700 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
676 | WARN_ON_ONCE(ret); | 701 | WARN_ON_ONCE(ret); |
@@ -686,6 +711,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory); | |||
686 | 711 | ||
687 | static struct kcore_list kcore_vsyscall; | 712 | static struct kcore_list kcore_vsyscall; |
688 | 713 | ||
714 | static void __init register_page_bootmem_info(void) | ||
715 | { | ||
716 | #ifdef CONFIG_NUMA | ||
717 | int i; | ||
718 | |||
719 | for_each_online_node(i) | ||
720 | register_page_bootmem_info_node(NODE_DATA(i)); | ||
721 | #endif | ||
722 | } | ||
723 | |||
689 | void __init mem_init(void) | 724 | void __init mem_init(void) |
690 | { | 725 | { |
691 | long codesize, reservedpages, datasize, initsize; | 726 | long codesize, reservedpages, datasize, initsize; |
@@ -698,11 +733,8 @@ void __init mem_init(void) | |||
698 | reservedpages = 0; | 733 | reservedpages = 0; |
699 | 734 | ||
700 | /* this will put all low memory onto the freelists */ | 735 | /* this will put all low memory onto the freelists */ |
701 | #ifdef CONFIG_NUMA | 736 | register_page_bootmem_info(); |
702 | totalram_pages = numa_free_all_bootmem(); | ||
703 | #else | ||
704 | totalram_pages = free_all_bootmem(); | 737 | totalram_pages = free_all_bootmem(); |
705 | #endif | ||
706 | 738 | ||
707 | absent_pages = absent_pages_in_range(0, max_pfn); | 739 | absent_pages = absent_pages_in_range(0, max_pfn); |
708 | reservedpages = max_pfn - totalram_pages - absent_pages; | 740 | reservedpages = max_pfn - totalram_pages - absent_pages; |
@@ -772,12 +804,11 @@ void set_kernel_text_ro(void) | |||
772 | void mark_rodata_ro(void) | 804 | void mark_rodata_ro(void) |
773 | { | 805 | { |
774 | unsigned long start = PFN_ALIGN(_text); | 806 | unsigned long start = PFN_ALIGN(_text); |
775 | unsigned long rodata_start = | 807 | unsigned long rodata_start = PFN_ALIGN(__start_rodata); |
776 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | ||
777 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 808 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
778 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | 809 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); |
779 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | 810 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
780 | unsigned long data_start = (unsigned long) &_sdata; | 811 | unsigned long all_end = PFN_ALIGN(&_end); |
781 | 812 | ||
782 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 813 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
783 | (end - start) >> 10); | 814 | (end - start) >> 10); |
@@ -786,10 +817,10 @@ void mark_rodata_ro(void) | |||
786 | kernel_set_to_readonly = 1; | 817 | kernel_set_to_readonly = 1; |
787 | 818 | ||
788 | /* | 819 | /* |
789 | * The rodata section (but not the kernel text!) should also be | 820 | * The rodata/data/bss/brk section (but not the kernel text!) |
790 | * not-executable. | 821 | * should also be not-executable. |
791 | */ | 822 | */ |
792 | set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); | 823 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); |
793 | 824 | ||
794 | rodata_test(); | 825 | rodata_test(); |
795 | 826 | ||
@@ -802,12 +833,12 @@ void mark_rodata_ro(void) | |||
802 | #endif | 833 | #endif |
803 | 834 | ||
804 | free_init_pages("unused kernel memory", | 835 | free_init_pages("unused kernel memory", |
805 | (unsigned long) page_address(virt_to_page(text_end)), | 836 | (unsigned long) __va(__pa_symbol(text_end)), |
806 | (unsigned long) | 837 | (unsigned long) __va(__pa_symbol(rodata_start))); |
807 | page_address(virt_to_page(rodata_start))); | 838 | |
808 | free_init_pages("unused kernel memory", | 839 | free_init_pages("unused kernel memory", |
809 | (unsigned long) page_address(virt_to_page(rodata_end)), | 840 | (unsigned long) __va(__pa_symbol(rodata_end)), |
810 | (unsigned long) page_address(virt_to_page(data_start))); | 841 | (unsigned long) __va(__pa_symbol(_sdata))); |
811 | } | 842 | } |
812 | 843 | ||
813 | #endif | 844 | #endif |
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h new file mode 100644 index 000000000000..6b563a118891 --- /dev/null +++ b/arch/x86/mm/mm_internal.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef __X86_MM_INTERNAL_H | ||
2 | #define __X86_MM_INTERNAL_H | ||
3 | |||
4 | void *alloc_low_pages(unsigned int num); | ||
5 | static inline void *alloc_low_page(void) | ||
6 | { | ||
7 | return alloc_low_pages(1); | ||
8 | } | ||
9 | |||
10 | void early_ioremap_page_table_range_init(void); | ||
11 | |||
12 | unsigned long kernel_physical_mapping_init(unsigned long start, | ||
13 | unsigned long end, | ||
14 | unsigned long page_size_mask); | ||
15 | void zone_sizes_init(void); | ||
16 | |||
17 | extern int after_bootmem; | ||
18 | |||
19 | #endif /* __X86_MM_INTERNAL_H */ | ||
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2d125be1bae9..8504f3698753 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) | |||
193 | static void __init setup_node_data(int nid, u64 start, u64 end) | 193 | static void __init setup_node_data(int nid, u64 start, u64 end) |
194 | { | 194 | { |
195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 195 | const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
196 | bool remapped = false; | ||
197 | u64 nd_pa; | 196 | u64 nd_pa; |
198 | void *nd; | 197 | void *nd; |
199 | int tnid; | 198 | int tnid; |
@@ -205,37 +204,28 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
205 | if (end && (end - start) < NODE_MIN_SIZE) | 204 | if (end && (end - start) < NODE_MIN_SIZE) |
206 | return; | 205 | return; |
207 | 206 | ||
208 | /* initialize remap allocator before aligning to ZONE_ALIGN */ | ||
209 | init_alloc_remap(nid, start, end); | ||
210 | |||
211 | start = roundup(start, ZONE_ALIGN); | 207 | start = roundup(start, ZONE_ALIGN); |
212 | 208 | ||
213 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", | 209 | printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", |
214 | nid, start, end - 1); | 210 | nid, start, end - 1); |
215 | 211 | ||
216 | /* | 212 | /* |
217 | * Allocate node data. Try remap allocator first, node-local | 213 | * Allocate node data. Try node-local memory and then any node. |
218 | * memory and then any node. Never allocate in DMA zone. | 214 | * Never allocate in DMA zone. |
219 | */ | 215 | */ |
220 | nd = alloc_remap(nid, nd_size); | 216 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
221 | if (nd) { | 217 | if (!nd_pa) { |
222 | nd_pa = __pa(nd); | 218 | pr_err("Cannot find %zu bytes in node %d\n", |
223 | remapped = true; | 219 | nd_size, nid); |
224 | } else { | 220 | return; |
225 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | ||
226 | if (!nd_pa) { | ||
227 | pr_err("Cannot find %zu bytes in node %d\n", | ||
228 | nd_size, nid); | ||
229 | return; | ||
230 | } | ||
231 | nd = __va(nd_pa); | ||
232 | } | 221 | } |
222 | nd = __va(nd_pa); | ||
233 | 223 | ||
234 | /* report and initialize */ | 224 | /* report and initialize */ |
235 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", | 225 | printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", |
236 | nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); | 226 | nd_pa, nd_pa + nd_size - 1); |
237 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); | 227 | tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); |
238 | if (!remapped && tnid != nid) | 228 | if (tnid != nid) |
239 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); | 229 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); |
240 | 230 | ||
241 | node_data[nid] = nd; | 231 | node_data[nid] = nd; |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 534255a36b6b..73a6d7395bd3 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
73 | 73 | ||
74 | extern unsigned long highend_pfn, highstart_pfn; | 74 | extern unsigned long highend_pfn, highstart_pfn; |
75 | 75 | ||
76 | #define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) | ||
77 | |||
78 | static void *node_remap_start_vaddr[MAX_NUMNODES]; | ||
79 | void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | ||
80 | |||
81 | /* | ||
82 | * Remap memory allocator | ||
83 | */ | ||
84 | static unsigned long node_remap_start_pfn[MAX_NUMNODES]; | ||
85 | static void *node_remap_end_vaddr[MAX_NUMNODES]; | ||
86 | static void *node_remap_alloc_vaddr[MAX_NUMNODES]; | ||
87 | |||
88 | /** | ||
89 | * alloc_remap - Allocate remapped memory | ||
90 | * @nid: NUMA node to allocate memory from | ||
91 | * @size: The size of allocation | ||
92 | * | ||
93 | * Allocate @size bytes from the remap area of NUMA node @nid. The | ||
94 | * size of the remap area is predetermined by init_alloc_remap() and | ||
95 | * only the callers considered there should call this function. For | ||
96 | * more info, please read the comment on top of init_alloc_remap(). | ||
97 | * | ||
98 | * The caller must be ready to handle allocation failure from this | ||
99 | * function and fall back to regular memory allocator in such cases. | ||
100 | * | ||
101 | * CONTEXT: | ||
102 | * Single CPU early boot context. | ||
103 | * | ||
104 | * RETURNS: | ||
105 | * Pointer to the allocated memory on success, %NULL on failure. | ||
106 | */ | ||
107 | void *alloc_remap(int nid, unsigned long size) | ||
108 | { | ||
109 | void *allocation = node_remap_alloc_vaddr[nid]; | ||
110 | |||
111 | size = ALIGN(size, L1_CACHE_BYTES); | ||
112 | |||
113 | if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) | ||
114 | return NULL; | ||
115 | |||
116 | node_remap_alloc_vaddr[nid] += size; | ||
117 | memset(allocation, 0, size); | ||
118 | |||
119 | return allocation; | ||
120 | } | ||
121 | |||
122 | #ifdef CONFIG_HIBERNATION | ||
123 | /** | ||
124 | * resume_map_numa_kva - add KVA mapping to the temporary page tables created | ||
125 | * during resume from hibernation | ||
126 | * @pgd_base - temporary resume page directory | ||
127 | */ | ||
128 | void resume_map_numa_kva(pgd_t *pgd_base) | ||
129 | { | ||
130 | int node; | ||
131 | |||
132 | for_each_online_node(node) { | ||
133 | unsigned long start_va, start_pfn, nr_pages, pfn; | ||
134 | |||
135 | start_va = (unsigned long)node_remap_start_vaddr[node]; | ||
136 | start_pfn = node_remap_start_pfn[node]; | ||
137 | nr_pages = (node_remap_end_vaddr[node] - | ||
138 | node_remap_start_vaddr[node]) >> PAGE_SHIFT; | ||
139 | |||
140 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); | ||
141 | |||
142 | for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { | ||
143 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | ||
144 | pgd_t *pgd = pgd_base + pgd_index(vaddr); | ||
145 | pud_t *pud = pud_offset(pgd, vaddr); | ||
146 | pmd_t *pmd = pmd_offset(pud, vaddr); | ||
147 | |||
148 | set_pmd(pmd, pfn_pmd(start_pfn + pfn, | ||
149 | PAGE_KERNEL_LARGE_EXEC)); | ||
150 | |||
151 | printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", | ||
152 | __func__, vaddr, start_pfn + pfn); | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | #endif | ||
157 | |||
158 | /** | ||
159 | * init_alloc_remap - Initialize remap allocator for a NUMA node | ||
160 | * @nid: NUMA node to initizlie remap allocator for | ||
161 | * | ||
162 | * NUMA nodes may end up without any lowmem. As allocating pgdat and | ||
163 | * memmap on a different node with lowmem is inefficient, a special | ||
164 | * remap allocator is implemented which can be used by alloc_remap(). | ||
165 | * | ||
166 | * For each node, the amount of memory which will be necessary for | ||
167 | * pgdat and memmap is calculated and two memory areas of the size are | ||
168 | * allocated - one in the node and the other in lowmem; then, the area | ||
169 | * in the node is remapped to the lowmem area. | ||
170 | * | ||
171 | * As pgdat and memmap must be allocated in lowmem anyway, this | ||
172 | * doesn't waste lowmem address space; however, the actual lowmem | ||
173 | * which gets remapped over is wasted. The amount shouldn't be | ||
174 | * problematic on machines this feature will be used. | ||
175 | * | ||
176 | * Initialization failure isn't fatal. alloc_remap() is used | ||
177 | * opportunistically and the callers will fall back to other memory | ||
178 | * allocation mechanisms on failure. | ||
179 | */ | ||
180 | void __init init_alloc_remap(int nid, u64 start, u64 end) | ||
181 | { | ||
182 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
183 | unsigned long end_pfn = end >> PAGE_SHIFT; | ||
184 | unsigned long size, pfn; | ||
185 | u64 node_pa, remap_pa; | ||
186 | void *remap_va; | ||
187 | |||
188 | /* | ||
189 | * The acpi/srat node info can show hot-add memroy zones where | ||
190 | * memory could be added but not currently present. | ||
191 | */ | ||
192 | printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", | ||
193 | nid, start_pfn, end_pfn); | ||
194 | |||
195 | /* calculate the necessary space aligned to large page size */ | ||
196 | size = node_memmap_size_bytes(nid, start_pfn, end_pfn); | ||
197 | size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); | ||
198 | size = ALIGN(size, LARGE_PAGE_BYTES); | ||
199 | |||
200 | /* allocate node memory and the lowmem remap area */ | ||
201 | node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); | ||
202 | if (!node_pa) { | ||
203 | pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", | ||
204 | size, nid); | ||
205 | return; | ||
206 | } | ||
207 | memblock_reserve(node_pa, size); | ||
208 | |||
209 | remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, | ||
210 | max_low_pfn << PAGE_SHIFT, | ||
211 | size, LARGE_PAGE_BYTES); | ||
212 | if (!remap_pa) { | ||
213 | pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", | ||
214 | size, nid); | ||
215 | memblock_free(node_pa, size); | ||
216 | return; | ||
217 | } | ||
218 | memblock_reserve(remap_pa, size); | ||
219 | remap_va = phys_to_virt(remap_pa); | ||
220 | |||
221 | /* perform actual remap */ | ||
222 | for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) | ||
223 | set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), | ||
224 | (node_pa >> PAGE_SHIFT) + pfn, | ||
225 | PAGE_KERNEL_LARGE); | ||
226 | |||
227 | /* initialize remap allocator parameters */ | ||
228 | node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; | ||
229 | node_remap_start_vaddr[nid] = remap_va; | ||
230 | node_remap_end_vaddr[nid] = remap_va + size; | ||
231 | node_remap_alloc_vaddr[nid] = remap_va; | ||
232 | |||
233 | printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", | ||
234 | nid, node_pa, node_pa + size, remap_va, remap_va + size); | ||
235 | } | ||
236 | |||
237 | void __init initmem_init(void) | 76 | void __init initmem_init(void) |
238 | { | 77 | { |
239 | x86_numa_init(); | 78 | x86_numa_init(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 92e27119ee1a..9405ffc91502 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -10,16 +10,3 @@ void __init initmem_init(void) | |||
10 | { | 10 | { |
11 | x86_numa_init(); | 11 | x86_numa_init(); |
12 | } | 12 | } |
13 | |||
14 | unsigned long __init numa_free_all_bootmem(void) | ||
15 | { | ||
16 | unsigned long pages = 0; | ||
17 | int i; | ||
18 | |||
19 | for_each_online_node(i) | ||
20 | pages += free_all_bootmem_node(NODE_DATA(i)); | ||
21 | |||
22 | pages += free_low_memory_core_early(MAX_NUMNODES); | ||
23 | |||
24 | return pages; | ||
25 | } | ||
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index 7178c3afe05e..ad86ec91e640 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h | |||
@@ -21,12 +21,6 @@ void __init numa_reset_distance(void); | |||
21 | 21 | ||
22 | void __init x86_numa_init(void); | 22 | void __init x86_numa_init(void); |
23 | 23 | ||
24 | #ifdef CONFIG_X86_64 | ||
25 | static inline void init_alloc_remap(int nid, u64 start, u64 end) { } | ||
26 | #else | ||
27 | void __init init_alloc_remap(int nid, u64 start, u64 end); | ||
28 | #endif | ||
29 | |||
30 | #ifdef CONFIG_NUMA_EMU | 24 | #ifdef CONFIG_NUMA_EMU |
31 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, | 25 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, |
32 | int numa_dist_cnt); | 26 | int numa_dist_cnt); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d23503..a1b1c88f9caf 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -94,12 +94,12 @@ static inline void split_page_count(int level) { } | |||
94 | 94 | ||
95 | static inline unsigned long highmap_start_pfn(void) | 95 | static inline unsigned long highmap_start_pfn(void) |
96 | { | 96 | { |
97 | return __pa(_text) >> PAGE_SHIFT; | 97 | return __pa_symbol(_text) >> PAGE_SHIFT; |
98 | } | 98 | } |
99 | 99 | ||
100 | static inline unsigned long highmap_end_pfn(void) | 100 | static inline unsigned long highmap_end_pfn(void) |
101 | { | 101 | { |
102 | return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; | 102 | return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; |
103 | } | 103 | } |
104 | 104 | ||
105 | #endif | 105 | #endif |
@@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
276 | * The .rodata section needs to be read-only. Using the pfn | 276 | * The .rodata section needs to be read-only. Using the pfn |
277 | * catches all aliases. | 277 | * catches all aliases. |
278 | */ | 278 | */ |
279 | if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, | 279 | if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, |
280 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 280 | __pa_symbol(__end_rodata) >> PAGE_SHIFT)) |
281 | pgprot_val(forbidden) |= _PAGE_RW; | 281 | pgprot_val(forbidden) |= _PAGE_RW; |
282 | 282 | ||
283 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | 283 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) |
@@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
364 | EXPORT_SYMBOL_GPL(lookup_address); | 364 | EXPORT_SYMBOL_GPL(lookup_address); |
365 | 365 | ||
366 | /* | 366 | /* |
367 | * This is necessary because __pa() does not work on some | ||
368 | * kinds of memory, like vmalloc() or the alloc_remap() | ||
369 | * areas on 32-bit NUMA systems. The percpu areas can | ||
370 | * end up in this kind of memory, for instance. | ||
371 | * | ||
372 | * This could be optimized, but it is only intended to be | ||
373 | * used at inititalization time, and keeping it | ||
374 | * unoptimized should increase the testing coverage for | ||
375 | * the more obscure platforms. | ||
376 | */ | ||
377 | phys_addr_t slow_virt_to_phys(void *__virt_addr) | ||
378 | { | ||
379 | unsigned long virt_addr = (unsigned long)__virt_addr; | ||
380 | phys_addr_t phys_addr; | ||
381 | unsigned long offset; | ||
382 | enum pg_level level; | ||
383 | unsigned long psize; | ||
384 | unsigned long pmask; | ||
385 | pte_t *pte; | ||
386 | |||
387 | pte = lookup_address(virt_addr, &level); | ||
388 | BUG_ON(!pte); | ||
389 | psize = page_level_size(level); | ||
390 | pmask = page_level_mask(level); | ||
391 | offset = virt_addr & ~pmask; | ||
392 | phys_addr = pte_pfn(*pte) << PAGE_SHIFT; | ||
393 | return (phys_addr | offset); | ||
394 | } | ||
395 | EXPORT_SYMBOL_GPL(slow_virt_to_phys); | ||
396 | |||
397 | /* | ||
367 | * Set the new pmd in all the pgds we know about: | 398 | * Set the new pmd in all the pgds we know about: |
368 | */ | 399 | */ |
369 | static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | 400 | static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) |
@@ -396,7 +427,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
396 | pte_t new_pte, old_pte, *tmp; | 427 | pte_t new_pte, old_pte, *tmp; |
397 | pgprot_t old_prot, new_prot, req_prot; | 428 | pgprot_t old_prot, new_prot, req_prot; |
398 | int i, do_split = 1; | 429 | int i, do_split = 1; |
399 | unsigned int level; | 430 | enum pg_level level; |
400 | 431 | ||
401 | if (cpa->force_split) | 432 | if (cpa->force_split) |
402 | return 1; | 433 | return 1; |
@@ -412,15 +443,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
412 | 443 | ||
413 | switch (level) { | 444 | switch (level) { |
414 | case PG_LEVEL_2M: | 445 | case PG_LEVEL_2M: |
415 | psize = PMD_PAGE_SIZE; | ||
416 | pmask = PMD_PAGE_MASK; | ||
417 | break; | ||
418 | #ifdef CONFIG_X86_64 | 446 | #ifdef CONFIG_X86_64 |
419 | case PG_LEVEL_1G: | 447 | case PG_LEVEL_1G: |
420 | psize = PUD_PAGE_SIZE; | ||
421 | pmask = PUD_PAGE_MASK; | ||
422 | break; | ||
423 | #endif | 448 | #endif |
449 | psize = page_level_size(level); | ||
450 | pmask = page_level_mask(level); | ||
451 | break; | ||
424 | default: | 452 | default: |
425 | do_split = -EINVAL; | 453 | do_split = -EINVAL; |
426 | goto out_unlock; | 454 | goto out_unlock; |
@@ -551,16 +579,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
551 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) | 579 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) |
552 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); | 580 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
553 | 581 | ||
554 | if (address >= (unsigned long)__va(0) && | 582 | if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), |
555 | address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) | 583 | PFN_DOWN(__pa(address)) + 1)) |
556 | split_page_count(level); | 584 | split_page_count(level); |
557 | 585 | ||
558 | #ifdef CONFIG_X86_64 | ||
559 | if (address >= (unsigned long)__va(1UL<<32) && | ||
560 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | ||
561 | split_page_count(level); | ||
562 | #endif | ||
563 | |||
564 | /* | 586 | /* |
565 | * Install the new, split up pagetable. | 587 | * Install the new, split up pagetable. |
566 | * | 588 | * |
@@ -729,13 +751,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
729 | unsigned long vaddr; | 751 | unsigned long vaddr; |
730 | int ret; | 752 | int ret; |
731 | 753 | ||
732 | if (cpa->pfn >= max_pfn_mapped) | 754 | if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) |
733 | return 0; | 755 | return 0; |
734 | 756 | ||
735 | #ifdef CONFIG_X86_64 | ||
736 | if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) | ||
737 | return 0; | ||
738 | #endif | ||
739 | /* | 757 | /* |
740 | * No need to redo, when the primary call touched the direct | 758 | * No need to redo, when the primary call touched the direct |
741 | * mapping already: | 759 | * mapping already: |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 0eb572eda406..2610bd93c896 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
560 | { | 560 | { |
561 | unsigned long id_sz; | 561 | unsigned long id_sz; |
562 | 562 | ||
563 | if (base >= __pa(high_memory)) | 563 | if (base > __pa(high_memory-1)) |
564 | return 0; | 564 | return 0; |
565 | 565 | ||
566 | id_sz = (__pa(high_memory) < base + size) ? | 566 | id_sz = (__pa(high_memory-1) <= base + size) ? |
567 | __pa(high_memory) - base : | 567 | __pa(high_memory) - base : |
568 | size; | 568 | size; |
569 | 569 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e27fbf887f3b..193350b51f90 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -334,7 +334,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, | |||
334 | if (changed && dirty) { | 334 | if (changed && dirty) { |
335 | *pmdp = entry; | 335 | *pmdp = entry; |
336 | pmd_update_defer(vma->vm_mm, address, pmdp); | 336 | pmd_update_defer(vma->vm_mm, address, pmdp); |
337 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 337 | /* |
338 | * We had a write-protection fault here and changed the pmd | ||
339 | * to to more permissive. No need to flush the TLB for that, | ||
340 | * #PF is architecturally guaranteed to do that and in the | ||
341 | * worst-case we'll generate a spurious fault. | ||
342 | */ | ||
338 | } | 343 | } |
339 | 344 | ||
340 | return changed; | 345 | return changed; |
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index d2e2735327b4..e666cbbb9261 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/bootmem.h> | ||
1 | #include <linux/mmdebug.h> | 2 | #include <linux/mmdebug.h> |
2 | #include <linux/module.h> | 3 | #include <linux/module.h> |
3 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
@@ -8,33 +9,54 @@ | |||
8 | 9 | ||
9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
10 | 11 | ||
12 | #ifdef CONFIG_DEBUG_VIRTUAL | ||
11 | unsigned long __phys_addr(unsigned long x) | 13 | unsigned long __phys_addr(unsigned long x) |
12 | { | 14 | { |
13 | if (x >= __START_KERNEL_map) { | 15 | unsigned long y = x - __START_KERNEL_map; |
14 | x -= __START_KERNEL_map; | 16 | |
15 | VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); | 17 | /* use the carry flag to determine if x was < __START_KERNEL_map */ |
16 | x += phys_base; | 18 | if (unlikely(x > y)) { |
19 | x = y + phys_base; | ||
20 | |||
21 | VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); | ||
17 | } else { | 22 | } else { |
18 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 23 | x = y + (__START_KERNEL_map - PAGE_OFFSET); |
19 | x -= PAGE_OFFSET; | 24 | |
20 | VIRTUAL_BUG_ON(!phys_addr_valid(x)); | 25 | /* carry flag will be set if starting x was >= PAGE_OFFSET */ |
26 | VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); | ||
21 | } | 27 | } |
28 | |||
22 | return x; | 29 | return x; |
23 | } | 30 | } |
24 | EXPORT_SYMBOL(__phys_addr); | 31 | EXPORT_SYMBOL(__phys_addr); |
25 | 32 | ||
33 | unsigned long __phys_addr_symbol(unsigned long x) | ||
34 | { | ||
35 | unsigned long y = x - __START_KERNEL_map; | ||
36 | |||
37 | /* only check upper bounds since lower bounds will trigger carry */ | ||
38 | VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); | ||
39 | |||
40 | return y + phys_base; | ||
41 | } | ||
42 | EXPORT_SYMBOL(__phys_addr_symbol); | ||
43 | #endif | ||
44 | |||
26 | bool __virt_addr_valid(unsigned long x) | 45 | bool __virt_addr_valid(unsigned long x) |
27 | { | 46 | { |
28 | if (x >= __START_KERNEL_map) { | 47 | unsigned long y = x - __START_KERNEL_map; |
29 | x -= __START_KERNEL_map; | 48 | |
30 | if (x >= KERNEL_IMAGE_SIZE) | 49 | /* use the carry flag to determine if x was < __START_KERNEL_map */ |
50 | if (unlikely(x > y)) { | ||
51 | x = y + phys_base; | ||
52 | |||
53 | if (y >= KERNEL_IMAGE_SIZE) | ||
31 | return false; | 54 | return false; |
32 | x += phys_base; | ||
33 | } else { | 55 | } else { |
34 | if (x < PAGE_OFFSET) | 56 | x = y + (__START_KERNEL_map - PAGE_OFFSET); |
35 | return false; | 57 | |
36 | x -= PAGE_OFFSET; | 58 | /* carry flag will be set if starting x was >= PAGE_OFFSET */ |
37 | if (!phys_addr_valid(x)) | 59 | if ((x > y) || !phys_addr_valid(x)) |
38 | return false; | 60 | return false; |
39 | } | 61 | } |
40 | 62 | ||
@@ -47,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid); | |||
47 | #ifdef CONFIG_DEBUG_VIRTUAL | 69 | #ifdef CONFIG_DEBUG_VIRTUAL |
48 | unsigned long __phys_addr(unsigned long x) | 70 | unsigned long __phys_addr(unsigned long x) |
49 | { | 71 | { |
72 | unsigned long phys_addr = x - PAGE_OFFSET; | ||
50 | /* VMALLOC_* aren't constants */ | 73 | /* VMALLOC_* aren't constants */ |
51 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); | 74 | VIRTUAL_BUG_ON(x < PAGE_OFFSET); |
52 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); | 75 | VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); |
53 | return x - PAGE_OFFSET; | 76 | /* max_low_pfn is set early, but not _that_ early */ |
77 | if (max_low_pfn) { | ||
78 | VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); | ||
79 | BUG_ON(slow_virt_to_phys((void *)x) != phys_addr); | ||
80 | } | ||
81 | return phys_addr; | ||
54 | } | 82 | } |
55 | EXPORT_SYMBOL(__phys_addr); | 83 | EXPORT_SYMBOL(__phys_addr); |
56 | #endif | 84 | #endif |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 928bf837040a..70b2a3a305d6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -416,8 +416,8 @@ void __init efi_reserve_boot_services(void) | |||
416 | * - Not within any part of the kernel | 416 | * - Not within any part of the kernel |
417 | * - Not the bios reserved area | 417 | * - Not the bios reserved area |
418 | */ | 418 | */ |
419 | if ((start+size >= virt_to_phys(_text) | 419 | if ((start+size >= __pa_symbol(_text) |
420 | && start <= virt_to_phys(_end)) || | 420 | && start <= __pa_symbol(_end)) || |
421 | !e820_all_mapped(start, start+size, E820_RAM) || | 421 | !e820_all_mapped(start, start+size, E820_RAM) || |
422 | memblock_is_region_reserved(start, size)) { | 422 | memblock_is_region_reserved(start, size)) { |
423 | /* Could not reserve, skip it */ | 423 | /* Could not reserve, skip it */ |
@@ -843,7 +843,7 @@ void __init efi_enter_virtual_mode(void) | |||
843 | efi_memory_desc_t *md, *prev_md = NULL; | 843 | efi_memory_desc_t *md, *prev_md = NULL; |
844 | efi_status_t status; | 844 | efi_status_t status; |
845 | unsigned long size; | 845 | unsigned long size; |
846 | u64 end, systab, end_pfn; | 846 | u64 end, systab, start_pfn, end_pfn; |
847 | void *p, *va, *new_memmap = NULL; | 847 | void *p, *va, *new_memmap = NULL; |
848 | int count = 0; | 848 | int count = 0; |
849 | 849 | ||
@@ -896,10 +896,9 @@ void __init efi_enter_virtual_mode(void) | |||
896 | size = md->num_pages << EFI_PAGE_SHIFT; | 896 | size = md->num_pages << EFI_PAGE_SHIFT; |
897 | end = md->phys_addr + size; | 897 | end = md->phys_addr + size; |
898 | 898 | ||
899 | start_pfn = PFN_DOWN(md->phys_addr); | ||
899 | end_pfn = PFN_UP(end); | 900 | end_pfn = PFN_UP(end); |
900 | if (end_pfn <= max_low_pfn_mapped | 901 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
901 | || (end_pfn > (1UL << (32 - PAGE_SHIFT)) | ||
902 | && end_pfn <= max_pfn_mapped)) { | ||
903 | va = __va(md->phys_addr); | 902 | va = __va(md->phys_addr); |
904 | 903 | ||
905 | if (!(md->attribute & EFI_MEMORY_WB)) | 904 | if (!(md->attribute & EFI_MEMORY_WB)) |
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 74202c1910cd..7d28c885d238 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c | |||
@@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) | |||
129 | } | 129 | } |
130 | } | 130 | } |
131 | 131 | ||
132 | resume_map_numa_kva(pgd_base); | ||
133 | |||
134 | return 0; | 132 | return 0; |
135 | } | 133 | } |
136 | 134 | ||
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 460f314d13e5..a0fde91c16cf 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/gfp.h> | 11 | #include <linux/gfp.h> |
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/suspend.h> | 13 | #include <linux/suspend.h> |
14 | |||
15 | #include <asm/init.h> | ||
14 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
15 | #include <asm/page.h> | 17 | #include <asm/page.h> |
16 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
@@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt; | |||
39 | 41 | ||
40 | void *relocated_restore_code; | 42 | void *relocated_restore_code; |
41 | 43 | ||
42 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 44 | static void *alloc_pgt_page(void *context) |
43 | { | 45 | { |
44 | long i, j; | 46 | return (void *)get_safe_page(GFP_ATOMIC); |
45 | |||
46 | i = pud_index(address); | ||
47 | pud = pud + i; | ||
48 | for (; i < PTRS_PER_PUD; pud++, i++) { | ||
49 | unsigned long paddr; | ||
50 | pmd_t *pmd; | ||
51 | |||
52 | paddr = address + i*PUD_SIZE; | ||
53 | if (paddr >= end) | ||
54 | break; | ||
55 | |||
56 | pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); | ||
57 | if (!pmd) | ||
58 | return -ENOMEM; | ||
59 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
60 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | ||
61 | unsigned long pe; | ||
62 | |||
63 | if (paddr >= end) | ||
64 | break; | ||
65 | pe = __PAGE_KERNEL_LARGE_EXEC | paddr; | ||
66 | pe &= __supported_pte_mask; | ||
67 | set_pmd(pmd, __pmd(pe)); | ||
68 | } | ||
69 | } | ||
70 | return 0; | ||
71 | } | 47 | } |
72 | 48 | ||
73 | static int set_up_temporary_mappings(void) | 49 | static int set_up_temporary_mappings(void) |
74 | { | 50 | { |
75 | unsigned long start, end, next; | 51 | struct x86_mapping_info info = { |
76 | int error; | 52 | .alloc_pgt_page = alloc_pgt_page, |
53 | .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, | ||
54 | .kernel_mapping = true, | ||
55 | }; | ||
56 | unsigned long mstart, mend; | ||
57 | int result; | ||
58 | int i; | ||
77 | 59 | ||
78 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); | 60 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); |
79 | if (!temp_level4_pgt) | 61 | if (!temp_level4_pgt) |
@@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void) | |||
84 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); | 66 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); |
85 | 67 | ||
86 | /* Set up the direct mapping from scratch */ | 68 | /* Set up the direct mapping from scratch */ |
87 | start = (unsigned long)pfn_to_kaddr(0); | 69 | for (i = 0; i < nr_pfn_mapped; i++) { |
88 | end = (unsigned long)pfn_to_kaddr(max_pfn); | 70 | mstart = pfn_mapped[i].start << PAGE_SHIFT; |
89 | 71 | mend = pfn_mapped[i].end << PAGE_SHIFT; | |
90 | for (; start < end; start = next) { | 72 | |
91 | pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); | 73 | result = kernel_ident_mapping_init(&info, temp_level4_pgt, |
92 | if (!pud) | 74 | mstart, mend); |
93 | return -ENOMEM; | 75 | |
94 | next = start + PGDIR_SIZE; | 76 | if (result) |
95 | if (next > end) | 77 | return result; |
96 | next = end; | ||
97 | if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) | ||
98 | return error; | ||
99 | set_pgd(temp_level4_pgt + pgd_index(start), | ||
100 | mk_kernel_pgd(__pa(pud))); | ||
101 | } | 78 | } |
79 | |||
102 | return 0; | 80 | return 0; |
103 | } | 81 | } |
104 | 82 | ||
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index cbca565af5bd..a44f457e70a1 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -8,9 +8,26 @@ | |||
8 | struct real_mode_header *real_mode_header; | 8 | struct real_mode_header *real_mode_header; |
9 | u32 *trampoline_cr4_features; | 9 | u32 *trampoline_cr4_features; |
10 | 10 | ||
11 | void __init setup_real_mode(void) | 11 | void __init reserve_real_mode(void) |
12 | { | 12 | { |
13 | phys_addr_t mem; | 13 | phys_addr_t mem; |
14 | unsigned char *base; | ||
15 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | ||
16 | |||
17 | /* Has to be under 1M so we can execute real-mode AP code. */ | ||
18 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
19 | if (!mem) | ||
20 | panic("Cannot allocate trampoline\n"); | ||
21 | |||
22 | base = __va(mem); | ||
23 | memblock_reserve(mem, size); | ||
24 | real_mode_header = (struct real_mode_header *) base; | ||
25 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
26 | base, (unsigned long long)mem, size); | ||
27 | } | ||
28 | |||
29 | void __init setup_real_mode(void) | ||
30 | { | ||
14 | u16 real_mode_seg; | 31 | u16 real_mode_seg; |
15 | u32 *rel; | 32 | u32 *rel; |
16 | u32 count; | 33 | u32 count; |
@@ -25,16 +42,7 @@ void __init setup_real_mode(void) | |||
25 | u64 efer; | 42 | u64 efer; |
26 | #endif | 43 | #endif |
27 | 44 | ||
28 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 45 | base = (unsigned char *)real_mode_header; |
29 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); | ||
30 | if (!mem) | ||
31 | panic("Cannot allocate trampoline\n"); | ||
32 | |||
33 | base = __va(mem); | ||
34 | memblock_reserve(mem, size); | ||
35 | real_mode_header = (struct real_mode_header *) base; | ||
36 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
37 | base, (unsigned long long)mem, size); | ||
38 | 46 | ||
39 | memcpy(base, real_mode_blob, size); | 47 | memcpy(base, real_mode_blob, size); |
40 | 48 | ||
@@ -62,9 +70,9 @@ void __init setup_real_mode(void) | |||
62 | __va(real_mode_header->trampoline_header); | 70 | __va(real_mode_header->trampoline_header); |
63 | 71 | ||
64 | #ifdef CONFIG_X86_32 | 72 | #ifdef CONFIG_X86_32 |
65 | trampoline_header->start = __pa(startup_32_smp); | 73 | trampoline_header->start = __pa_symbol(startup_32_smp); |
66 | trampoline_header->gdt_limit = __BOOT_DS + 7; | 74 | trampoline_header->gdt_limit = __BOOT_DS + 7; |
67 | trampoline_header->gdt_base = __pa(boot_gdt); | 75 | trampoline_header->gdt_base = __pa_symbol(boot_gdt); |
68 | #else | 76 | #else |
69 | /* | 77 | /* |
70 | * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR | 78 | * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR |
@@ -78,16 +86,18 @@ void __init setup_real_mode(void) | |||
78 | *trampoline_cr4_features = read_cr4(); | 86 | *trampoline_cr4_features = read_cr4(); |
79 | 87 | ||
80 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); | 88 | trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); |
81 | trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; | 89 | trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; |
82 | trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; | 90 | trampoline_pgd[511] = init_level4_pgt[511].pgd; |
83 | #endif | 91 | #endif |
84 | } | 92 | } |
85 | 93 | ||
86 | /* | 94 | /* |
87 | * set_real_mode_permissions() gets called very early, to guarantee the | 95 | * reserve_real_mode() gets called very early, to guarantee the |
88 | * availability of low memory. This is before the proper kernel page | 96 | * availability of low memory. This is before the proper kernel page |
89 | * tables are set up, so we cannot set page permissions in that | 97 | * tables are set up, so we cannot set page permissions in that |
90 | * function. Thus, we use an arch_initcall instead. | 98 | * function. Also trampoline code will be executed by APs so we |
99 | * need to mark it executable at do_pre_smp_initcalls() at least, | ||
100 | * thus run it as a early_initcall(). | ||
91 | */ | 101 | */ |
92 | static int __init set_real_mode_permissions(void) | 102 | static int __init set_real_mode_permissions(void) |
93 | { | 103 | { |
@@ -111,5 +121,4 @@ static int __init set_real_mode_permissions(void) | |||
111 | 121 | ||
112 | return 0; | 122 | return 0; |
113 | } | 123 | } |
114 | 124 | early_initcall(set_real_mode_permissions); | |
115 | arch_initcall(set_real_mode_permissions); | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 01de35c77221..f5e86eee4e0e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm) | |||
1178 | 1178 | ||
1179 | static void xen_post_allocator_init(void); | 1179 | static void xen_post_allocator_init(void); |
1180 | 1180 | ||
1181 | static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) | ||
1182 | { | ||
1183 | /* reserve the range used */ | ||
1184 | native_pagetable_reserve(start, end); | ||
1185 | |||
1186 | /* set as RW the rest */ | ||
1187 | printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, | ||
1188 | PFN_PHYS(pgt_buf_top)); | ||
1189 | while (end < PFN_PHYS(pgt_buf_top)) { | ||
1190 | make_lowmem_page_readwrite(__va(end)); | ||
1191 | end += PAGE_SIZE; | ||
1192 | } | ||
1193 | } | ||
1194 | |||
1195 | #ifdef CONFIG_X86_64 | 1181 | #ifdef CONFIG_X86_64 |
1196 | static void __init xen_cleanhighmap(unsigned long vaddr, | 1182 | static void __init xen_cleanhighmap(unsigned long vaddr, |
1197 | unsigned long vaddr_end) | 1183 | unsigned long vaddr_end) |
@@ -1503,19 +1489,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1503 | #else /* CONFIG_X86_64 */ | 1489 | #else /* CONFIG_X86_64 */ |
1504 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) | 1490 | static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) |
1505 | { | 1491 | { |
1506 | unsigned long pfn = pte_pfn(pte); | ||
1507 | |||
1508 | /* | ||
1509 | * If the new pfn is within the range of the newly allocated | ||
1510 | * kernel pagetable, and it isn't being mapped into an | ||
1511 | * early_ioremap fixmap slot as a freshly allocated page, make sure | ||
1512 | * it is RO. | ||
1513 | */ | ||
1514 | if (((!is_early_ioremap_ptep(ptep) && | ||
1515 | pfn >= pgt_buf_start && pfn < pgt_buf_top)) || | ||
1516 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | ||
1517 | pte = pte_wrprotect(pte); | ||
1518 | |||
1519 | return pte; | 1492 | return pte; |
1520 | } | 1493 | } |
1521 | #endif /* CONFIG_X86_64 */ | 1494 | #endif /* CONFIG_X86_64 */ |
@@ -2197,7 +2170,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2197 | 2170 | ||
2198 | void __init xen_init_mmu_ops(void) | 2171 | void __init xen_init_mmu_ops(void) |
2199 | { | 2172 | { |
2200 | x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; | ||
2201 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2173 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2202 | pv_mmu_ops = xen_mmu_ops; | 2174 | pv_mmu_ops = xen_mmu_ops; |
2203 | 2175 | ||
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index af47e7594460..1d94316f0ea4 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c | |||
@@ -231,7 +231,9 @@ retry: | |||
231 | } | 231 | } |
232 | start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); | 232 | start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); |
233 | if (early) { | 233 | if (early) { |
234 | swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); | 234 | if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, |
235 | verbose)) | ||
236 | panic("Cannot allocate SWIOTLB buffer"); | ||
235 | rc = 0; | 237 | rc = 0; |
236 | } else | 238 | } else |
237 | rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); | 239 | rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3f778c27f825..3cd16ba82f15 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
@@ -99,6 +99,9 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, | |||
99 | extern void *__alloc_bootmem_low(unsigned long size, | 99 | extern void *__alloc_bootmem_low(unsigned long size, |
100 | unsigned long align, | 100 | unsigned long align, |
101 | unsigned long goal); | 101 | unsigned long goal); |
102 | void *__alloc_bootmem_low_nopanic(unsigned long size, | ||
103 | unsigned long align, | ||
104 | unsigned long goal); | ||
102 | extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, | 105 | extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, |
103 | unsigned long size, | 106 | unsigned long size, |
104 | unsigned long align, | 107 | unsigned long align, |
@@ -132,6 +135,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, | |||
132 | 135 | ||
133 | #define alloc_bootmem_low(x) \ | 136 | #define alloc_bootmem_low(x) \ |
134 | __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) | 137 | __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) |
138 | #define alloc_bootmem_low_pages_nopanic(x) \ | ||
139 | __alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0) | ||
135 | #define alloc_bootmem_low_pages(x) \ | 140 | #define alloc_bootmem_low_pages(x) \ |
136 | __alloc_bootmem_low(x, PAGE_SIZE, 0) | 141 | __alloc_bootmem_low(x, PAGE_SIZE, 0) |
137 | #define alloc_bootmem_low_pages_node(pgdat, x) \ | 142 | #define alloc_bootmem_low_pages_node(pgdat, x) \ |
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d0b8458a703a..d2e6927bbaae 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h | |||
@@ -191,6 +191,7 @@ extern struct kimage *kexec_crash_image; | |||
191 | /* Location of a reserved region to hold the crash kernel. | 191 | /* Location of a reserved region to hold the crash kernel. |
192 | */ | 192 | */ |
193 | extern struct resource crashk_res; | 193 | extern struct resource crashk_res; |
194 | extern struct resource crashk_low_res; | ||
194 | typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; | 195 | typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; |
195 | extern note_buf_t __percpu *crash_notes; | 196 | extern note_buf_t __percpu *crash_notes; |
196 | extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | 197 | extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; |
@@ -199,6 +200,8 @@ extern size_t vmcoreinfo_max_size; | |||
199 | 200 | ||
200 | int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, | 201 | int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, |
201 | unsigned long long *crash_size, unsigned long long *crash_base); | 202 | unsigned long long *crash_size, unsigned long long *crash_base); |
203 | int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, | ||
204 | unsigned long long *crash_size, unsigned long long *crash_base); | ||
202 | int crash_shrink_memory(unsigned long new_size); | 205 | int crash_shrink_memory(unsigned long new_size); |
203 | size_t crash_get_memory_size(void); | 206 | size_t crash_get_memory_size(void); |
204 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); | 207 | void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d452ee191066..f388203db7e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -155,6 +155,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, | |||
155 | phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, | 155 | phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, |
156 | phys_addr_t max_addr); | 156 | phys_addr_t max_addr); |
157 | phys_addr_t memblock_phys_mem_size(void); | 157 | phys_addr_t memblock_phys_mem_size(void); |
158 | phys_addr_t memblock_mem_size(unsigned long limit_pfn); | ||
158 | phys_addr_t memblock_start_of_DRAM(void); | 159 | phys_addr_t memblock_start_of_DRAM(void); |
159 | phys_addr_t memblock_end_of_DRAM(void); | 160 | phys_addr_t memblock_end_of_DRAM(void); |
160 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); | 161 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 66e2f7c61e5c..9d9dcc35d6a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1386,7 +1386,6 @@ extern void __init mmap_init(void); | |||
1386 | extern void show_mem(unsigned int flags); | 1386 | extern void show_mem(unsigned int flags); |
1387 | extern void si_meminfo(struct sysinfo * val); | 1387 | extern void si_meminfo(struct sysinfo * val); |
1388 | extern void si_meminfo_node(struct sysinfo *val, int nid); | 1388 | extern void si_meminfo_node(struct sysinfo *val, int nid); |
1389 | extern int after_bootmem; | ||
1390 | 1389 | ||
1391 | extern __printf(3, 4) | 1390 | extern __printf(3, 4) |
1392 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); | 1391 | void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); |
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 071d62c214a6..2de42f9401d2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h | |||
@@ -23,7 +23,7 @@ extern int swiotlb_force; | |||
23 | #define IO_TLB_SHIFT 11 | 23 | #define IO_TLB_SHIFT 11 |
24 | 24 | ||
25 | extern void swiotlb_init(int verbose); | 25 | extern void swiotlb_init(int verbose); |
26 | extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); | 26 | int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); |
27 | extern unsigned long swiotlb_nr_tbl(void); | 27 | extern unsigned long swiotlb_nr_tbl(void); |
28 | extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); | 28 | extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); |
29 | 29 | ||
diff --git a/kernel/kexec.c b/kernel/kexec.c index 5e4bd7864c5d..2436ffcec91f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -54,6 +54,12 @@ struct resource crashk_res = { | |||
54 | .end = 0, | 54 | .end = 0, |
55 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | 55 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
56 | }; | 56 | }; |
57 | struct resource crashk_low_res = { | ||
58 | .name = "Crash kernel low", | ||
59 | .start = 0, | ||
60 | .end = 0, | ||
61 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
62 | }; | ||
57 | 63 | ||
58 | int kexec_should_crash(struct task_struct *p) | 64 | int kexec_should_crash(struct task_struct *p) |
59 | { | 65 | { |
@@ -1369,10 +1375,11 @@ static int __init parse_crashkernel_simple(char *cmdline, | |||
1369 | * That function is the entry point for command line parsing and should be | 1375 | * That function is the entry point for command line parsing and should be |
1370 | * called from the arch-specific code. | 1376 | * called from the arch-specific code. |
1371 | */ | 1377 | */ |
1372 | int __init parse_crashkernel(char *cmdline, | 1378 | static int __init __parse_crashkernel(char *cmdline, |
1373 | unsigned long long system_ram, | 1379 | unsigned long long system_ram, |
1374 | unsigned long long *crash_size, | 1380 | unsigned long long *crash_size, |
1375 | unsigned long long *crash_base) | 1381 | unsigned long long *crash_base, |
1382 | const char *name) | ||
1376 | { | 1383 | { |
1377 | char *p = cmdline, *ck_cmdline = NULL; | 1384 | char *p = cmdline, *ck_cmdline = NULL; |
1378 | char *first_colon, *first_space; | 1385 | char *first_colon, *first_space; |
@@ -1382,16 +1389,16 @@ int __init parse_crashkernel(char *cmdline, | |||
1382 | *crash_base = 0; | 1389 | *crash_base = 0; |
1383 | 1390 | ||
1384 | /* find crashkernel and use the last one if there are more */ | 1391 | /* find crashkernel and use the last one if there are more */ |
1385 | p = strstr(p, "crashkernel="); | 1392 | p = strstr(p, name); |
1386 | while (p) { | 1393 | while (p) { |
1387 | ck_cmdline = p; | 1394 | ck_cmdline = p; |
1388 | p = strstr(p+1, "crashkernel="); | 1395 | p = strstr(p+1, name); |
1389 | } | 1396 | } |
1390 | 1397 | ||
1391 | if (!ck_cmdline) | 1398 | if (!ck_cmdline) |
1392 | return -EINVAL; | 1399 | return -EINVAL; |
1393 | 1400 | ||
1394 | ck_cmdline += 12; /* strlen("crashkernel=") */ | 1401 | ck_cmdline += strlen(name); |
1395 | 1402 | ||
1396 | /* | 1403 | /* |
1397 | * if the commandline contains a ':', then that's the extended | 1404 | * if the commandline contains a ':', then that's the extended |
@@ -1409,6 +1416,23 @@ int __init parse_crashkernel(char *cmdline, | |||
1409 | return 0; | 1416 | return 0; |
1410 | } | 1417 | } |
1411 | 1418 | ||
1419 | int __init parse_crashkernel(char *cmdline, | ||
1420 | unsigned long long system_ram, | ||
1421 | unsigned long long *crash_size, | ||
1422 | unsigned long long *crash_base) | ||
1423 | { | ||
1424 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1425 | "crashkernel="); | ||
1426 | } | ||
1427 | |||
1428 | int __init parse_crashkernel_low(char *cmdline, | ||
1429 | unsigned long long system_ram, | ||
1430 | unsigned long long *crash_size, | ||
1431 | unsigned long long *crash_base) | ||
1432 | { | ||
1433 | return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, | ||
1434 | "crashkernel_low="); | ||
1435 | } | ||
1412 | 1436 | ||
1413 | static void update_vmcoreinfo_note(void) | 1437 | static void update_vmcoreinfo_note(void) |
1414 | { | 1438 | { |
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 196b06984dec..bfe02b8fc55b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c | |||
@@ -122,11 +122,18 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, | |||
122 | return phys_to_dma(hwdev, virt_to_phys(address)); | 122 | return phys_to_dma(hwdev, virt_to_phys(address)); |
123 | } | 123 | } |
124 | 124 | ||
125 | static bool no_iotlb_memory; | ||
126 | |||
125 | void swiotlb_print_info(void) | 127 | void swiotlb_print_info(void) |
126 | { | 128 | { |
127 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; | 129 | unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
128 | unsigned char *vstart, *vend; | 130 | unsigned char *vstart, *vend; |
129 | 131 | ||
132 | if (no_iotlb_memory) { | ||
133 | pr_warn("software IO TLB: No low mem\n"); | ||
134 | return; | ||
135 | } | ||
136 | |||
130 | vstart = phys_to_virt(io_tlb_start); | 137 | vstart = phys_to_virt(io_tlb_start); |
131 | vend = phys_to_virt(io_tlb_end); | 138 | vend = phys_to_virt(io_tlb_end); |
132 | 139 | ||
@@ -136,7 +143,7 @@ void swiotlb_print_info(void) | |||
136 | bytes >> 20, vstart, vend - 1); | 143 | bytes >> 20, vstart, vend - 1); |
137 | } | 144 | } |
138 | 145 | ||
139 | void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | 146 | int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) |
140 | { | 147 | { |
141 | void *v_overflow_buffer; | 148 | void *v_overflow_buffer; |
142 | unsigned long i, bytes; | 149 | unsigned long i, bytes; |
@@ -150,9 +157,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | |||
150 | /* | 157 | /* |
151 | * Get the overflow emergency buffer | 158 | * Get the overflow emergency buffer |
152 | */ | 159 | */ |
153 | v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); | 160 | v_overflow_buffer = alloc_bootmem_low_pages_nopanic( |
161 | PAGE_ALIGN(io_tlb_overflow)); | ||
154 | if (!v_overflow_buffer) | 162 | if (!v_overflow_buffer) |
155 | panic("Cannot allocate SWIOTLB overflow buffer!\n"); | 163 | return -ENOMEM; |
156 | 164 | ||
157 | io_tlb_overflow_buffer = __pa(v_overflow_buffer); | 165 | io_tlb_overflow_buffer = __pa(v_overflow_buffer); |
158 | 166 | ||
@@ -169,15 +177,19 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) | |||
169 | 177 | ||
170 | if (verbose) | 178 | if (verbose) |
171 | swiotlb_print_info(); | 179 | swiotlb_print_info(); |
180 | |||
181 | return 0; | ||
172 | } | 182 | } |
173 | 183 | ||
174 | /* | 184 | /* |
175 | * Statically reserve bounce buffer space and initialize bounce buffer data | 185 | * Statically reserve bounce buffer space and initialize bounce buffer data |
176 | * structures for the software IO TLB used to implement the DMA API. | 186 | * structures for the software IO TLB used to implement the DMA API. |
177 | */ | 187 | */ |
178 | static void __init | 188 | void __init |
179 | swiotlb_init_with_default_size(size_t default_size, int verbose) | 189 | swiotlb_init(int verbose) |
180 | { | 190 | { |
191 | /* default to 64MB */ | ||
192 | size_t default_size = 64UL<<20; | ||
181 | unsigned char *vstart; | 193 | unsigned char *vstart; |
182 | unsigned long bytes; | 194 | unsigned long bytes; |
183 | 195 | ||
@@ -188,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) | |||
188 | 200 | ||
189 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; | 201 | bytes = io_tlb_nslabs << IO_TLB_SHIFT; |
190 | 202 | ||
191 | /* | 203 | /* Get IO TLB memory from the low pages */ |
192 | * Get IO TLB memory from the low pages | 204 | vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); |
193 | */ | 205 | if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) |
194 | vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); | 206 | return; |
195 | if (!vstart) | ||
196 | panic("Cannot allocate SWIOTLB buffer"); | ||
197 | |||
198 | swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose); | ||
199 | } | ||
200 | 207 | ||
201 | void __init | 208 | if (io_tlb_start) |
202 | swiotlb_init(int verbose) | 209 | free_bootmem(io_tlb_start, |
203 | { | 210 | PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); |
204 | swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ | 211 | pr_warn("Cannot allocate SWIOTLB buffer"); |
212 | no_iotlb_memory = true; | ||
205 | } | 213 | } |
206 | 214 | ||
207 | /* | 215 | /* |
@@ -405,6 +413,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, | |||
405 | unsigned long offset_slots; | 413 | unsigned long offset_slots; |
406 | unsigned long max_slots; | 414 | unsigned long max_slots; |
407 | 415 | ||
416 | if (no_iotlb_memory) | ||
417 | panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); | ||
418 | |||
408 | mask = dma_get_seg_boundary(hwdev); | 419 | mask = dma_get_seg_boundary(hwdev); |
409 | 420 | ||
410 | tbl_dma_addr &= mask; | 421 | tbl_dma_addr &= mask; |
diff --git a/mm/bootmem.c b/mm/bootmem.c index b93376c39b61..2b0bcb019ec2 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -833,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, | |||
833 | return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); | 833 | return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); |
834 | } | 834 | } |
835 | 835 | ||
836 | void * __init __alloc_bootmem_low_nopanic(unsigned long size, | ||
837 | unsigned long align, | ||
838 | unsigned long goal) | ||
839 | { | ||
840 | return ___alloc_bootmem_nopanic(size, align, goal, | ||
841 | ARCH_LOW_ADDRESS_LIMIT); | ||
842 | } | ||
843 | |||
836 | /** | 844 | /** |
837 | * __alloc_bootmem_low_node - allocate low boot memory from a specific node | 845 | * __alloc_bootmem_low_node - allocate low boot memory from a specific node |
838 | * @pgdat: node to allocate from | 846 | * @pgdat: node to allocate from |
diff --git a/mm/memblock.c b/mm/memblock.c index 88adc8afb610..b8d9147e5c08 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -828,6 +828,23 @@ phys_addr_t __init memblock_phys_mem_size(void) | |||
828 | return memblock.memory.total_size; | 828 | return memblock.memory.total_size; |
829 | } | 829 | } |
830 | 830 | ||
831 | phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) | ||
832 | { | ||
833 | unsigned long pages = 0; | ||
834 | struct memblock_region *r; | ||
835 | unsigned long start_pfn, end_pfn; | ||
836 | |||
837 | for_each_memblock(memory, r) { | ||
838 | start_pfn = memblock_region_memory_base_pfn(r); | ||
839 | end_pfn = memblock_region_memory_end_pfn(r); | ||
840 | start_pfn = min_t(unsigned long, start_pfn, limit_pfn); | ||
841 | end_pfn = min_t(unsigned long, end_pfn, limit_pfn); | ||
842 | pages += end_pfn - start_pfn; | ||
843 | } | ||
844 | |||
845 | return (phys_addr_t)pages << PAGE_SHIFT; | ||
846 | } | ||
847 | |||
831 | /* lowest address */ | 848 | /* lowest address */ |
832 | phys_addr_t __init_memblock memblock_start_of_DRAM(void) | 849 | phys_addr_t __init_memblock memblock_start_of_DRAM(void) |
833 | { | 850 | { |
diff --git a/mm/nobootmem.c b/mm/nobootmem.c index b8294fc03df8..5e07d36e381e 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c | |||
@@ -154,21 +154,6 @@ static void reset_node_lowmem_managed_pages(pg_data_t *pgdat) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /** | 156 | /** |
157 | * free_all_bootmem_node - release a node's free pages to the buddy allocator | ||
158 | * @pgdat: node to be released | ||
159 | * | ||
160 | * Returns the number of pages actually released. | ||
161 | */ | ||
162 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | ||
163 | { | ||
164 | register_page_bootmem_info_node(pgdat); | ||
165 | reset_node_lowmem_managed_pages(pgdat); | ||
166 | |||
167 | /* free_low_memory_core_early(MAX_NUMNODES) will be called later */ | ||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * free_all_bootmem - release free pages to the buddy allocator | 157 | * free_all_bootmem - release free pages to the buddy allocator |
173 | * | 158 | * |
174 | * Returns the number of pages actually released. | 159 | * Returns the number of pages actually released. |
@@ -406,6 +391,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, | |||
406 | return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); | 391 | return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); |
407 | } | 392 | } |
408 | 393 | ||
394 | void * __init __alloc_bootmem_low_nopanic(unsigned long size, | ||
395 | unsigned long align, | ||
396 | unsigned long goal) | ||
397 | { | ||
398 | return ___alloc_bootmem_nopanic(size, align, goal, | ||
399 | ARCH_LOW_ADDRESS_LIMIT); | ||
400 | } | ||
401 | |||
409 | /** | 402 | /** |
410 | * __alloc_bootmem_low_node - allocate low boot memory from a specific node | 403 | * __alloc_bootmem_low_node - allocate low boot memory from a specific node |
411 | * @pgdat: node to allocate from | 404 | * @pgdat: node to allocate from |