diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 16:27:33 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 16:27:33 -0500 |
commit | e33c01972239fee4696679ae5f7d1f340f424999 (patch) | |
tree | bd4bc3223ba572719b3a53142fdb98910450fe64 | |
parent | 343036cea2854acf8d4b4c930c0063223bc6b8a2 (diff) | |
parent | ccef086454d4c97e7b722e9303390207d681cb4c (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (36 commits)
x86, mm: Correct the implementation of is_untracked_pat_range()
x86/pat: Trivial: don't create debugfs for memtype if pat is disabled
x86, mtrr: Fix sorting of mtrr after subtracting
x86: Move find_smp_config() earlier and avoid bootmem usage
x86, platform: Change is_untracked_pat_range() to bool; cleanup init
x86: Change is_ISA_range() into an inline function
x86, mm: is_untracked_pat_range() takes a normal semiclosed range
x86, mm: Call is_untracked_pat_range() rather than is_ISA_range()
x86: UV SGI: Don't track GRU space in PAT
x86: SGI UV: Fix BAU initialization
x86, numa: Use near(er) online node instead of roundrobin for NUMA
x86, numa, bootmem: Only free bootmem on NUMA failure path
x86: Change crash kernel to reserve via reserve_early()
x86: Eliminate redundant/contradicting cache line size config options
x86: When cleaning MTRRs, do not fold WP into UC
x86: remove "extern" from function prototypes in <asm/proto.h>
x86, mm: Report state of NX protections during boot
x86, mm: Clean up and simplify NX enablement
x86, pageattr: Make set_memory_(x|nx) aware of NX support
x86, sleep: Always save the value of EFER
...
Fix up conflicts (added both iommu_shutdown and is_untracked_pat_range)
to 'struct x86_platform_ops') in
arch/x86/include/asm/x86_init.h
arch/x86/kernel/x86_init.c
43 files changed, 656 insertions, 343 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 5e99762eb5c2..08e442bc3ab9 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -301,15 +301,11 @@ config X86_CPU | |||
301 | 301 | ||
302 | # | 302 | # |
303 | # Define implied options from the CPU selection here | 303 | # Define implied options from the CPU selection here |
304 | config X86_L1_CACHE_BYTES | 304 | config X86_INTERNODE_CACHE_SHIFT |
305 | int | 305 | int |
306 | default "128" if MPSC | 306 | default "12" if X86_VSMP |
307 | default "64" if GENERIC_CPU || MK8 || MCORE2 || MATOM || X86_32 | 307 | default "7" if NUMA |
308 | 308 | default X86_L1_CACHE_SHIFT | |
309 | config X86_INTERNODE_CACHE_BYTES | ||
310 | int | ||
311 | default "4096" if X86_VSMP | ||
312 | default X86_L1_CACHE_BYTES if !X86_VSMP | ||
313 | 309 | ||
314 | config X86_CMPXCHG | 310 | config X86_CMPXCHG |
315 | def_bool X86_64 || (X86_32 && !M386) | 311 | def_bool X86_64 || (X86_32 && !M386) |
@@ -317,9 +313,9 @@ config X86_CMPXCHG | |||
317 | config X86_L1_CACHE_SHIFT | 313 | config X86_L1_CACHE_SHIFT |
318 | int | 314 | int |
319 | default "7" if MPENTIUM4 || MPSC | 315 | default "7" if MPENTIUM4 || MPSC |
316 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
320 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 317 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
321 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 318 | default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
322 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU | ||
323 | 319 | ||
324 | config X86_XADD | 320 | config X86_XADD |
325 | def_bool y | 321 | def_bool y |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 077e1b69198e..faff0dc9c06a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -107,8 +107,7 @@ ENTRY(startup_32) | |||
107 | lgdt gdt(%ebp) | 107 | lgdt gdt(%ebp) |
108 | 108 | ||
109 | /* Enable PAE mode */ | 109 | /* Enable PAE mode */ |
110 | xorl %eax, %eax | 110 | movl $(X86_CR4_PAE), %eax |
111 | orl $(X86_CR4_PAE), %eax | ||
112 | movl %eax, %cr4 | 111 | movl %eax, %cr4 |
113 | 112 | ||
114 | /* | 113 | /* |
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index f4193bb48782..a6f1a59a5b0c 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -4,6 +4,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) | |||
4 | 4 | ||
5 | #undef i386 | 5 | #undef i386 |
6 | 6 | ||
7 | #include <asm/cache.h> | ||
7 | #include <asm/page_types.h> | 8 | #include <asm/page_types.h> |
8 | 9 | ||
9 | #ifdef CONFIG_X86_64 | 10 | #ifdef CONFIG_X86_64 |
@@ -46,7 +47,7 @@ SECTIONS | |||
46 | *(.data.*) | 47 | *(.data.*) |
47 | _edata = . ; | 48 | _edata = . ; |
48 | } | 49 | } |
49 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 50 | . = ALIGN(L1_CACHE_BYTES); |
50 | .bss : { | 51 | .bss : { |
51 | _bss = . ; | 52 | _bss = . ; |
52 | *(.bss) | 53 | *(.bss) |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc500903..60d2b2db0bc5 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -118,7 +118,7 @@ extern void acpi_restore_state_mem(void); | |||
118 | extern unsigned long acpi_wakeup_address; | 118 | extern unsigned long acpi_wakeup_address; |
119 | 119 | ||
120 | /* early initialization routine */ | 120 | /* early initialization routine */ |
121 | extern void acpi_reserve_bootmem(void); | 121 | extern void acpi_reserve_wakeup_memory(void); |
122 | 122 | ||
123 | /* | 123 | /* |
124 | * Check if the CPU can handle C2 and deeper | 124 | * Check if the CPU can handle C2 and deeper |
@@ -158,6 +158,7 @@ struct bootnode; | |||
158 | 158 | ||
159 | #ifdef CONFIG_ACPI_NUMA | 159 | #ifdef CONFIG_ACPI_NUMA |
160 | extern int acpi_numa; | 160 | extern int acpi_numa; |
161 | extern int acpi_get_nodes(struct bootnode *physnodes); | ||
161 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); | 162 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); |
162 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | 163 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) |
163 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, | 164 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, |
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h index 549860d3be8f..2f9047cfaaca 100644 --- a/arch/x86/include/asm/cache.h +++ b/arch/x86/include/asm/cache.h | |||
@@ -9,12 +9,13 @@ | |||
9 | 9 | ||
10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) | 10 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) |
11 | 11 | ||
12 | #define INTERNODE_CACHE_SHIFT CONFIG_X86_INTERNODE_CACHE_SHIFT | ||
13 | #define INTERNODE_CACHE_BYTES (1 << INTERNODE_CACHE_SHIFT) | ||
14 | |||
12 | #ifdef CONFIG_X86_VSMP | 15 | #ifdef CONFIG_X86_VSMP |
13 | /* vSMP Internode cacheline shift */ | ||
14 | #define INTERNODE_CACHE_SHIFT (12) | ||
15 | #ifdef CONFIG_SMP | 16 | #ifdef CONFIG_SMP |
16 | #define __cacheline_aligned_in_smp \ | 17 | #define __cacheline_aligned_in_smp \ |
17 | __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \ | 18 | __attribute__((__aligned__(INTERNODE_CACHE_BYTES))) \ |
18 | __page_aligned_data | 19 | __page_aligned_data |
19 | #endif | 20 | #endif |
20 | #endif | 21 | #endif |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 9076add593a8..634c40a739a6 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -177,6 +177,7 @@ void clflush_cache_range(void *addr, unsigned int size); | |||
177 | #ifdef CONFIG_DEBUG_RODATA | 177 | #ifdef CONFIG_DEBUG_RODATA |
178 | void mark_rodata_ro(void); | 178 | void mark_rodata_ro(void); |
179 | extern const int rodata_test_data; | 179 | extern const int rodata_test_data; |
180 | extern int kernel_set_to_readonly; | ||
180 | void set_kernel_text_rw(void); | 181 | void set_kernel_text_rw(void); |
181 | void set_kernel_text_ro(void); | 182 | void set_kernel_text_ro(void); |
182 | #else | 183 | #else |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 40b4e614fe71..761249e396fe 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -61,6 +61,12 @@ struct e820map { | |||
61 | struct e820entry map[E820_X_MAX]; | 61 | struct e820entry map[E820_X_MAX]; |
62 | }; | 62 | }; |
63 | 63 | ||
64 | #define ISA_START_ADDRESS 0xa0000 | ||
65 | #define ISA_END_ADDRESS 0x100000 | ||
66 | |||
67 | #define BIOS_BEGIN 0x000a0000 | ||
68 | #define BIOS_END 0x00100000 | ||
69 | |||
64 | #ifdef __KERNEL__ | 70 | #ifdef __KERNEL__ |
65 | /* see comment in arch/x86/kernel/e820.c */ | 71 | /* see comment in arch/x86/kernel/e820.c */ |
66 | extern struct e820map e820; | 72 | extern struct e820map e820; |
@@ -126,15 +132,18 @@ extern void e820_reserve_resources(void); | |||
126 | extern void e820_reserve_resources_late(void); | 132 | extern void e820_reserve_resources_late(void); |
127 | extern void setup_memory_map(void); | 133 | extern void setup_memory_map(void); |
128 | extern char *default_machine_specific_memory_setup(void); | 134 | extern char *default_machine_specific_memory_setup(void); |
129 | #endif /* __KERNEL__ */ | ||
130 | #endif /* __ASSEMBLY__ */ | ||
131 | 135 | ||
132 | #define ISA_START_ADDRESS 0xa0000 | 136 | /* |
133 | #define ISA_END_ADDRESS 0x100000 | 137 | * Returns true iff the specified range [s,e) is completely contained inside |
134 | #define is_ISA_range(s, e) ((s) >= ISA_START_ADDRESS && (e) < ISA_END_ADDRESS) | 138 | * the ISA region. |
139 | */ | ||
140 | static inline bool is_ISA_range(u64 s, u64 e) | ||
141 | { | ||
142 | return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS; | ||
143 | } | ||
135 | 144 | ||
136 | #define BIOS_BEGIN 0x000a0000 | 145 | #endif /* __KERNEL__ */ |
137 | #define BIOS_END 0x00100000 | 146 | #endif /* __ASSEMBLY__ */ |
138 | 147 | ||
139 | #ifdef __KERNEL__ | 148 | #ifdef __KERNEL__ |
140 | #include <linux/ioport.h> | 149 | #include <linux/ioport.h> |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index c2d1f3b58e5f..f70e60071fe8 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -4,13 +4,16 @@ | |||
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | 5 | ||
6 | extern struct pci_device_id k8_nb_ids[]; | 6 | extern struct pci_device_id k8_nb_ids[]; |
7 | struct bootnode; | ||
7 | 8 | ||
8 | extern int early_is_k8_nb(u32 value); | 9 | extern int early_is_k8_nb(u32 value); |
9 | extern struct pci_dev **k8_northbridges; | 10 | extern struct pci_dev **k8_northbridges; |
10 | extern int num_k8_northbridges; | 11 | extern int num_k8_northbridges; |
11 | extern int cache_k8_northbridges(void); | 12 | extern int cache_k8_northbridges(void); |
12 | extern void k8_flush_garts(void); | 13 | extern void k8_flush_garts(void); |
13 | extern int k8_scan_nodes(unsigned long start, unsigned long end); | 14 | extern int k8_get_nodes(struct bootnode *nodes); |
15 | extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | ||
16 | extern int k8_scan_nodes(void); | ||
14 | 17 | ||
15 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
16 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 61d90b1331c3..d8bf23a88d05 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -71,12 +71,7 @@ static inline void early_get_smp_config(void) | |||
71 | 71 | ||
72 | static inline void find_smp_config(void) | 72 | static inline void find_smp_config(void) |
73 | { | 73 | { |
74 | x86_init.mpparse.find_smp_config(1); | 74 | x86_init.mpparse.find_smp_config(); |
75 | } | ||
76 | |||
77 | static inline void early_find_smp_config(void) | ||
78 | { | ||
79 | x86_init.mpparse.find_smp_config(0); | ||
80 | } | 75 | } |
81 | 76 | ||
82 | #ifdef CONFIG_X86_MPPARSE | 77 | #ifdef CONFIG_X86_MPPARSE |
@@ -89,7 +84,7 @@ extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str); | |||
89 | # else | 84 | # else |
90 | # define default_mpc_oem_bus_info NULL | 85 | # define default_mpc_oem_bus_info NULL |
91 | # endif | 86 | # endif |
92 | extern void default_find_smp_config(unsigned int reserve); | 87 | extern void default_find_smp_config(void); |
93 | extern void default_get_smp_config(unsigned int early); | 88 | extern void default_get_smp_config(unsigned int early); |
94 | #else | 89 | #else |
95 | static inline void early_reserve_e820_mpc_new(void) { } | 90 | static inline void early_reserve_e820_mpc_new(void) { } |
@@ -97,7 +92,7 @@ static inline void early_reserve_e820_mpc_new(void) { } | |||
97 | #define default_mpc_apic_id NULL | 92 | #define default_mpc_apic_id NULL |
98 | #define default_smp_read_mpc_oem NULL | 93 | #define default_smp_read_mpc_oem NULL |
99 | #define default_mpc_oem_bus_info NULL | 94 | #define default_mpc_oem_bus_info NULL |
100 | #define default_find_smp_config x86_init_uint_noop | 95 | #define default_find_smp_config x86_init_noop |
101 | #define default_get_smp_config x86_init_uint_noop | 96 | #define default_get_smp_config x86_init_uint_noop |
102 | #endif | 97 | #endif |
103 | 98 | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 6473f5ccff85..642fe34b36a2 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -49,7 +49,8 @@ extern unsigned long max_pfn_mapped; | |||
49 | extern unsigned long init_memory_mapping(unsigned long start, | 49 | extern unsigned long init_memory_mapping(unsigned long start, |
50 | unsigned long end); | 50 | unsigned long end); |
51 | 51 | ||
52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | 52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
53 | int acpi, int k8); | ||
53 | extern void free_initmem(void); | 54 | extern void free_initmem(void); |
54 | 55 | ||
55 | #endif /* !__ASSEMBLY__ */ | 56 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index af6fd360ab35..a34c785c5a63 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -16,6 +16,8 @@ | |||
16 | 16 | ||
17 | #ifndef __ASSEMBLY__ | 17 | #ifndef __ASSEMBLY__ |
18 | 18 | ||
19 | #include <asm/x86_init.h> | ||
20 | |||
19 | /* | 21 | /* |
20 | * ZERO_PAGE is a global shared page that is always zero: used | 22 | * ZERO_PAGE is a global shared page that is always zero: used |
21 | * for zero-mapped memory areas etc.. | 23 | * for zero-mapped memory areas etc.. |
@@ -270,9 +272,9 @@ static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, | |||
270 | unsigned long new_flags) | 272 | unsigned long new_flags) |
271 | { | 273 | { |
272 | /* | 274 | /* |
273 | * PAT type is always WB for ISA. So no need to check. | 275 | * PAT type is always WB for untracked ranges, so no need to check. |
274 | */ | 276 | */ |
275 | if (is_ISA_range(paddr, paddr + size - 1)) | 277 | if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) |
276 | return 1; | 278 | return 1; |
277 | 279 | ||
278 | /* | 280 | /* |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 621f56d73121..4009f6534f52 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -5,18 +5,19 @@ | |||
5 | 5 | ||
6 | /* misc architecture specific prototypes */ | 6 | /* misc architecture specific prototypes */ |
7 | 7 | ||
8 | extern void early_idt_handler(void); | 8 | void early_idt_handler(void); |
9 | 9 | ||
10 | extern void system_call(void); | 10 | void system_call(void); |
11 | extern void syscall_init(void); | 11 | void syscall_init(void); |
12 | 12 | ||
13 | extern void ia32_syscall(void); | 13 | void ia32_syscall(void); |
14 | extern void ia32_cstar_target(void); | 14 | void ia32_cstar_target(void); |
15 | extern void ia32_sysenter_target(void); | 15 | void ia32_sysenter_target(void); |
16 | 16 | ||
17 | extern void syscall32_cpu_init(void); | 17 | void syscall32_cpu_init(void); |
18 | 18 | ||
19 | extern void check_efer(void); | 19 | void x86_configure_nx(void); |
20 | void x86_report_nx(void); | ||
20 | 21 | ||
21 | extern int reboot_force; | 22 | extern int reboot_force; |
22 | 23 | ||
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 1b7ee5d673c2..0a5242428659 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h | |||
@@ -2,7 +2,13 @@ | |||
2 | #define _ASM_X86_SECTIONS_H | 2 | #define _ASM_X86_SECTIONS_H |
3 | 3 | ||
4 | #include <asm-generic/sections.h> | 4 | #include <asm-generic/sections.h> |
5 | #include <asm/uaccess.h> | ||
5 | 6 | ||
6 | extern char __brk_base[], __brk_limit[]; | 7 | extern char __brk_base[], __brk_limit[]; |
8 | extern struct exception_table_entry __stop___ex_table[]; | ||
9 | |||
10 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
11 | extern char __end_rodata_hpage_align[]; | ||
12 | #endif | ||
7 | 13 | ||
8 | #endif /* _ASM_X86_SECTIONS_H */ | 14 | #endif /* _ASM_X86_SECTIONS_H */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index d8e71459f025..ea0e8ea15e15 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -26,7 +26,7 @@ struct x86_init_mpparse { | |||
26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); | 26 | void (*smp_read_mpc_oem)(struct mpc_table *mpc); |
27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); | 27 | void (*mpc_oem_pci_bus)(struct mpc_bus *m); |
28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); | 28 | void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); |
29 | void (*find_smp_config)(unsigned int reserve); | 29 | void (*find_smp_config)(void); |
30 | void (*get_smp_config)(unsigned int early); | 30 | void (*get_smp_config)(unsigned int early); |
31 | }; | 31 | }; |
32 | 32 | ||
@@ -125,12 +125,14 @@ struct x86_cpuinit_ops { | |||
125 | * @calibrate_tsc: calibrate TSC | 125 | * @calibrate_tsc: calibrate TSC |
126 | * @get_wallclock: get time from HW clock like RTC etc. | 126 | * @get_wallclock: get time from HW clock like RTC etc. |
127 | * @set_wallclock: set time back to HW clock | 127 | * @set_wallclock: set time back to HW clock |
128 | * @is_untracked_pat_range exclude from PAT logic | ||
128 | */ | 129 | */ |
129 | struct x86_platform_ops { | 130 | struct x86_platform_ops { |
130 | unsigned long (*calibrate_tsc)(void); | 131 | unsigned long (*calibrate_tsc)(void); |
131 | unsigned long (*get_wallclock)(void); | 132 | unsigned long (*get_wallclock)(void); |
132 | int (*set_wallclock)(unsigned long nowtime); | 133 | int (*set_wallclock)(unsigned long nowtime); |
133 | void (*iommu_shutdown)(void); | 134 | void (*iommu_shutdown)(void); |
135 | bool (*is_untracked_pat_range)(u64 start, u64 end); | ||
134 | }; | 136 | }; |
135 | 137 | ||
136 | extern struct x86_init_ops x86_init; | 138 | extern struct x86_init_ops x86_init; |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638ba430..82e508677b91 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void) | |||
78 | #ifndef CONFIG_64BIT | 78 | #ifndef CONFIG_64BIT |
79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
80 | 80 | ||
81 | header->pmode_efer_low = nx_enabled; | 81 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, |
82 | if (header->pmode_efer_low & 1) { | 82 | &header->pmode_efer_high)) |
83 | /* This is strange, why not save efer, always? */ | 83 | header->pmode_efer_low = header->pmode_efer_high = 0; |
84 | rdmsr(MSR_EFER, header->pmode_efer_low, | ||
85 | header->pmode_efer_high); | ||
86 | } | ||
87 | #endif /* !CONFIG_64BIT */ | 84 | #endif /* !CONFIG_64BIT */ |
88 | 85 | ||
89 | header->pmode_cr0 = read_cr0(); | 86 | header->pmode_cr0 = read_cr0(); |
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void) | |||
119 | 116 | ||
120 | 117 | ||
121 | /** | 118 | /** |
122 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | 119 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation |
123 | * | 120 | * |
124 | * We allocate a page from the first 1MB of memory for the wakeup | 121 | * We allocate a page from the first 1MB of memory for the wakeup |
125 | * routine for when we come back from a sleep state. The | 122 | * routine for when we come back from a sleep state. The |
126 | * runtime allocator allows specification of <16MB pages, but not | 123 | * runtime allocator allows specification of <16MB pages, but not |
127 | * <1MB pages. | 124 | * <1MB pages. |
128 | */ | 125 | */ |
129 | void __init acpi_reserve_bootmem(void) | 126 | void __init acpi_reserve_wakeup_memory(void) |
130 | { | 127 | { |
128 | unsigned long mem; | ||
129 | |||
131 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
132 | printk(KERN_ERR | 131 | printk(KERN_ERR |
133 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | 132 | "ACPI: Wakeup code way too big, S3 disabled.\n"); |
134 | return; | 133 | return; |
135 | } | 134 | } |
136 | 135 | ||
137 | acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); | 136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
138 | 137 | ||
139 | if (!acpi_realmode) { | 138 | if (mem == -1L) { |
140 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
141 | return; | 140 | return; |
142 | } | 141 | } |
143 | 142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | |
144 | acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); | 143 | acpi_wakeup_address = mem; |
144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 07cdbdcd7a92..98c4665f251c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
264 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
265 | { | 265 | { |
266 | /* | 266 | /* |
267 | * Find possible boot-time SMP configuration: | ||
268 | */ | ||
269 | early_find_smp_config(); | ||
270 | |||
271 | /* | ||
272 | * get boot-time SMP configuration: | 267 | * get boot-time SMP configuration: |
273 | */ | 268 | */ |
274 | if (smp_found_config) | 269 | if (smp_found_config) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 130c4b934877..b684bb303cbf 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -30,10 +30,22 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/ipi.h> | 31 | #include <asm/ipi.h> |
32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
33 | #include <asm/x86_init.h> | ||
33 | 34 | ||
34 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 35 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
35 | 36 | ||
36 | static enum uv_system_type uv_system_type; | 37 | static enum uv_system_type uv_system_type; |
38 | static u64 gru_start_paddr, gru_end_paddr; | ||
39 | |||
40 | static inline bool is_GRU_range(u64 start, u64 end) | ||
41 | { | ||
42 | return start >= gru_start_paddr && end <= gru_end_paddr; | ||
43 | } | ||
44 | |||
45 | static bool uv_is_untracked_pat_range(u64 start, u64 end) | ||
46 | { | ||
47 | return is_ISA_range(start, end) || is_GRU_range(start, end); | ||
48 | } | ||
37 | 49 | ||
38 | static int early_get_nodeid(void) | 50 | static int early_get_nodeid(void) |
39 | { | 51 | { |
@@ -49,6 +61,7 @@ static int early_get_nodeid(void) | |||
49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 61 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 62 | { |
51 | if (!strcmp(oem_id, "SGI")) { | 63 | if (!strcmp(oem_id, "SGI")) { |
64 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | ||
52 | if (!strcmp(oem_table_id, "UVL")) | 65 | if (!strcmp(oem_table_id, "UVL")) |
53 | uv_system_type = UV_LEGACY_APIC; | 66 | uv_system_type = UV_LEGACY_APIC; |
54 | else if (!strcmp(oem_table_id, "UVX")) | 67 | else if (!strcmp(oem_table_id, "UVX")) |
@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode) | |||
385 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | 398 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; |
386 | 399 | ||
387 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 400 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
388 | if (gru.s.enable) | 401 | if (gru.s.enable) { |
389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 402 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); |
403 | gru_start_paddr = ((u64)gru.s.base << shift); | ||
404 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | ||
405 | |||
406 | } | ||
390 | } | 407 | } |
391 | 408 | ||
392 | static __init void map_mmr_high(int max_pnode) | 409 | static __init void map_mmr_high(int max_pnode) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a4ec8b647544..c1afa990a6c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void) | |||
1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
1137 | barrier(); | 1137 | barrier(); |
1138 | 1138 | ||
1139 | check_efer(); | 1139 | x86_configure_nx(); |
1140 | if (cpu != 0) | 1140 | if (cpu != 0) |
1141 | enable_x2apic(); | 1141 | enable_x2apic(); |
1142 | 1142 | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e8..c900b73f9224 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
263 | /* Don't do the funky fallback heuristics the AMD version employs | 263 | /* Don't do the funky fallback heuristics the AMD version employs |
264 | for now. */ | 264 | for now. */ |
265 | node = apicid_to_node[apicid]; | 265 | node = apicid_to_node[apicid]; |
266 | if (node == NUMA_NO_NODE || !node_online(node)) | 266 | if (node == NUMA_NO_NODE) |
267 | node = first_node(node_online_map); | 267 | node = first_node(node_online_map); |
268 | else if (!node_online(node)) { | ||
269 | /* reuse the value from init_cpu_to_node() */ | ||
270 | node = cpu_to_node(cpu); | ||
271 | } | ||
268 | numa_set_node(cpu, node); | 272 | numa_set_node(cpu, node); |
269 | 273 | ||
270 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | 274 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 73c86db5acbe..09b1698e0466 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2) | |||
170 | return start1 - start2; | 170 | return start1 - start2; |
171 | } | 171 | } |
172 | 172 | ||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
173 | #define BIOS_BUG_MSG KERN_WARNING \ | 208 | #define BIOS_BUG_MSG KERN_WARNING \ |
174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
175 | 210 | ||
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | subtract_range(range, extra_remove_base, | 258 | subtract_range(range, extra_remove_base, |
224 | extra_remove_base + extra_remove_size - 1); | 259 | extra_remove_base + extra_remove_size - 1); |
225 | 260 | ||
226 | /* get new range num */ | ||
227 | nr_range = 0; | ||
228 | for (i = 0; i < RANGE_NUM; i++) { | ||
229 | if (!range[i].end) | ||
230 | continue; | ||
231 | nr_range++; | ||
232 | } | ||
233 | if (debug_print) { | 261 | if (debug_print) { |
234 | printk(KERN_DEBUG "After UC checking\n"); | 262 | printk(KERN_DEBUG "After UC checking\n"); |
235 | for (i = 0; i < nr_range; i++) | 263 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | ||
265 | continue; | ||
236 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", |
237 | range[i].start, range[i].end + 1); | 267 | range[i].start, range[i].end + 1); |
268 | } | ||
238 | } | 269 | } |
239 | 270 | ||
240 | /* sort the ranges */ | 271 | /* sort the ranges */ |
241 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 272 | nr_range = clean_sort_range(range, RANGE_NUM); |
242 | if (debug_print) { | 273 | if (debug_print) { |
243 | printk(KERN_DEBUG "After sorting\n"); | 274 | printk(KERN_DEBUG "After sorting\n"); |
244 | for (i = 0; i < nr_range; i++) | 275 | for (i = 0; i < nr_range; i++) |
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void) | |||
689 | continue; | 720 | continue; |
690 | if (!size) | 721 | if (!size) |
691 | type = MTRR_NUM_TYPES; | 722 | type = MTRR_NUM_TYPES; |
692 | if (type == MTRR_TYPE_WRPROT) | ||
693 | type = MTRR_TYPE_UNCACHABLE; | ||
694 | num[type]++; | 723 | num[type]++; |
695 | } | 724 | } |
696 | 725 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5a1b9758fd62..309689245431 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -189,9 +189,26 @@ static void wait_for_nmi(void) | |||
189 | nmi_wait_count++; | 189 | nmi_wait_count++; |
190 | } | 190 | } |
191 | 191 | ||
192 | static inline int | ||
193 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
194 | { | ||
195 | return addr >= start && addr < end; | ||
196 | } | ||
197 | |||
192 | static int | 198 | static int |
193 | do_ftrace_mod_code(unsigned long ip, void *new_code) | 199 | do_ftrace_mod_code(unsigned long ip, void *new_code) |
194 | { | 200 | { |
201 | /* | ||
202 | * On x86_64, kernel text mappings are mapped read-only with | ||
203 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
204 | * of the kernel text mapping to modify the kernel text. | ||
205 | * | ||
206 | * For 32bit kernels, these mappings are same and we can use | ||
207 | * kernel identity mapping to modify code. | ||
208 | */ | ||
209 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
210 | ip = (unsigned long)__va(__pa(ip)); | ||
211 | |||
195 | mod_code_ip = (void *)ip; | 212 | mod_code_ip = (void *)ip; |
196 | mod_code_newcode = new_code; | 213 | mod_code_newcode = new_code; |
197 | 214 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278481b1..7fd318bac59c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
19 | #include <asm/setup.h> | 19 | #include <asm/setup.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/msr-index.h> | ||
22 | #include <asm/cpufeature.h> | ||
21 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
22 | 24 | ||
23 | /* Physical address */ | 25 | /* Physical address */ |
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp) | |||
297 | orl %edx,%eax | 299 | orl %edx,%eax |
298 | movl %eax,%cr4 | 300 | movl %eax,%cr4 |
299 | 301 | ||
300 | btl $5, %eax # check if PAE is enabled | 302 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
301 | jnc 6f | 303 | jz 6f |
302 | 304 | ||
303 | /* Check if extended functions are implemented */ | 305 | /* Check if extended functions are implemented */ |
304 | movl $0x80000000, %eax | 306 | movl $0x80000000, %eax |
305 | cpuid | 307 | cpuid |
306 | cmpl $0x80000000, %eax | 308 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
307 | jbe 6f | 309 | subl $0x80000001, %eax |
310 | cmpl $(0x8000ffff-0x80000001), %eax | ||
311 | ja 6f | ||
308 | mov $0x80000001, %eax | 312 | mov $0x80000001, %eax |
309 | cpuid | 313 | cpuid |
310 | /* Execute Disable bit supported? */ | 314 | /* Execute Disable bit supported? */ |
311 | btl $20, %edx | 315 | btl $(X86_FEATURE_NX & 31), %edx |
312 | jnc 6f | 316 | jnc 6f |
313 | 317 | ||
314 | /* Setup EFER (Extended Feature Enable Register) */ | 318 | /* Setup EFER (Extended Feature Enable Register) */ |
315 | movl $0xc0000080, %ecx | 319 | movl $MSR_EFER, %ecx |
316 | rdmsr | 320 | rdmsr |
317 | 321 | ||
318 | btsl $11, %eax | 322 | btsl $_EFER_NX, %eax |
319 | /* Make changes effective */ | 323 | /* Make changes effective */ |
320 | wrmsr | 324 | wrmsr |
321 | 325 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 22db86a37643..2d8b5035371c 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) | |||
262 | .quad x86_64_start_kernel | 262 | .quad x86_64_start_kernel |
263 | ENTRY(initial_gs) | 263 | ENTRY(initial_gs) |
264 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 264 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
265 | __FINITDATA | ||
266 | 265 | ||
267 | ENTRY(stack_start) | 266 | ENTRY(stack_start) |
268 | .quad init_thread_union+THREAD_SIZE-8 | 267 | .quad init_thread_union+THREAD_SIZE-8 |
269 | .word 0 | 268 | .word 0 |
269 | __FINITDATA | ||
270 | 270 | ||
271 | bad_address: | 271 | bad_address: |
272 | jmp bad_address | 272 | jmp bad_address |
@@ -340,6 +340,7 @@ ENTRY(name) | |||
340 | i = i + 1 ; \ | 340 | i = i + 1 ; \ |
341 | .endr | 341 | .endr |
342 | 342 | ||
343 | .data | ||
343 | /* | 344 | /* |
344 | * This default setting generates an ident mapping at address 0x100000 | 345 | * This default setting generates an ident mapping at address 0x100000 |
345 | * and a mapping for the kernel that precisely maps virtual address | 346 | * and a mapping for the kernel that precisely maps virtual address |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c843f8406da2..a3fa43ba5d3b 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
158 | { | 158 | { |
159 | int error; | 159 | int error; |
160 | 160 | ||
161 | if (nx_enabled) | 161 | set_pages_x(image->control_code_page, 1); |
162 | set_pages_x(image->control_code_page, 1); | ||
163 | error = machine_kexec_alloc_page_tables(image); | 162 | error = machine_kexec_alloc_page_tables(image); |
164 | if (error) | 163 | if (error) |
165 | return error; | 164 | return error; |
@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
173 | */ | 172 | */ |
174 | void machine_kexec_cleanup(struct kimage *image) | 173 | void machine_kexec_cleanup(struct kimage *image) |
175 | { | 174 | { |
176 | if (nx_enabled) | 175 | set_pages_nx(image->control_code_page, 1); |
177 | set_pages_nx(image->control_code_page, 1); | ||
178 | machine_kexec_free_page_tables(image); | 176 | machine_kexec_free_page_tables(image); |
179 | } | 177 | } |
180 | 178 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5be95ef4ffec..35a57c963df9 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early) | |||
667 | */ | 667 | */ |
668 | } | 668 | } |
669 | 669 | ||
670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 670 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
671 | { | 671 | { |
672 | unsigned long size = get_mpc_size(mpf->physptr); | 672 | unsigned long size = get_mpc_size(mpf->physptr); |
673 | #ifdef CONFIG_X86_32 | ||
674 | /* | ||
675 | * We cannot access to MPC table to compute table size yet, | ||
676 | * as only few megabytes from the bottom is mapped now. | ||
677 | * PC-9800's MPC table places on the very last of physical | ||
678 | * memory; so that simply reserving PAGE_SIZE from mpf->physptr | ||
679 | * yields BUG() in reserve_bootmem. | ||
680 | * also need to make sure physptr is below than max_low_pfn | ||
681 | * we don't need reserve the area above max_low_pfn | ||
682 | */ | ||
683 | unsigned long end = max_low_pfn * PAGE_SIZE; | ||
684 | 673 | ||
685 | if (mpf->physptr < end) { | 674 | reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); |
686 | if (mpf->physptr + size > end) | ||
687 | size = end - mpf->physptr; | ||
688 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
689 | } | ||
690 | #else | ||
691 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
692 | #endif | ||
693 | } | 675 | } |
694 | 676 | ||
695 | static int __init smp_scan_config(unsigned long base, unsigned long length, | 677 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
696 | unsigned reserve) | ||
697 | { | 678 | { |
698 | unsigned int *bp = phys_to_virt(base); | 679 | unsigned int *bp = phys_to_virt(base); |
699 | struct mpf_intel *mpf; | 680 | struct mpf_intel *mpf; |
681 | unsigned long mem; | ||
700 | 682 | ||
701 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 683 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
702 | bp, length); | 684 | bp, length); |
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
717 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", | 699 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
718 | mpf, (u64)virt_to_phys(mpf)); | 700 | mpf, (u64)virt_to_phys(mpf)); |
719 | 701 | ||
720 | if (!reserve) | 702 | mem = virt_to_phys(mpf); |
721 | return 1; | 703 | reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); |
722 | reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), | ||
723 | BOOTMEM_DEFAULT); | ||
724 | if (mpf->physptr) | 704 | if (mpf->physptr) |
725 | smp_reserve_bootmem(mpf); | 705 | smp_reserve_memory(mpf); |
726 | 706 | ||
727 | return 1; | 707 | return 1; |
728 | } | 708 | } |
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
732 | return 0; | 712 | return 0; |
733 | } | 713 | } |
734 | 714 | ||
735 | void __init default_find_smp_config(unsigned int reserve) | 715 | void __init default_find_smp_config(void) |
736 | { | 716 | { |
737 | unsigned int address; | 717 | unsigned int address; |
738 | 718 | ||
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve) | |||
744 | * 2) Scan the top 1K of base RAM | 724 | * 2) Scan the top 1K of base RAM |
745 | * 3) Scan the 64K of bios | 725 | * 3) Scan the 64K of bios |
746 | */ | 726 | */ |
747 | if (smp_scan_config(0x0, 0x400, reserve) || | 727 | if (smp_scan_config(0x0, 0x400) || |
748 | smp_scan_config(639 * 0x400, 0x400, reserve) || | 728 | smp_scan_config(639 * 0x400, 0x400) || |
749 | smp_scan_config(0xF0000, 0x10000, reserve)) | 729 | smp_scan_config(0xF0000, 0x10000)) |
750 | return; | 730 | return; |
751 | /* | 731 | /* |
752 | * If it is an SMP machine we should know now, unless the | 732 | * If it is an SMP machine we should know now, unless the |
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve) | |||
767 | 747 | ||
768 | address = get_bios_ebda(); | 748 | address = get_bios_ebda(); |
769 | if (address) | 749 | if (address) |
770 | smp_scan_config(address, 0x400, reserve); | 750 | smp_scan_config(address, 0x400); |
771 | } | 751 | } |
772 | 752 | ||
773 | #ifdef CONFIG_X86_IO_APIC | 753 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 82e88cdda9bc..946a311a25c9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -106,6 +106,7 @@ | |||
106 | #include <asm/percpu.h> | 106 | #include <asm/percpu.h> |
107 | #include <asm/topology.h> | 107 | #include <asm/topology.h> |
108 | #include <asm/apicdef.h> | 108 | #include <asm/apicdef.h> |
109 | #include <asm/k8.h> | ||
109 | #ifdef CONFIG_X86_64 | 110 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 111 | #include <asm/numa_64.h> |
111 | #endif | 112 | #endif |
@@ -487,42 +488,11 @@ static void __init reserve_early_setup_data(void) | |||
487 | 488 | ||
488 | #ifdef CONFIG_KEXEC | 489 | #ifdef CONFIG_KEXEC |
489 | 490 | ||
490 | /** | ||
491 | * Reserve @size bytes of crashkernel memory at any suitable offset. | ||
492 | * | ||
493 | * @size: Size of the crashkernel memory to reserve. | ||
494 | * Returns the base address on success, and -1ULL on failure. | ||
495 | */ | ||
496 | static | ||
497 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | ||
498 | { | ||
499 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
500 | unsigned long long start = 0LL; | ||
501 | |||
502 | while (1) { | ||
503 | int ret; | ||
504 | |||
505 | start = find_e820_area(start, ULONG_MAX, size, alignment); | ||
506 | if (start == -1ULL) | ||
507 | return start; | ||
508 | |||
509 | /* try to reserve it */ | ||
510 | ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); | ||
511 | if (ret >= 0) | ||
512 | return start; | ||
513 | |||
514 | start += alignment; | ||
515 | } | ||
516 | } | ||
517 | |||
518 | static inline unsigned long long get_total_mem(void) | 491 | static inline unsigned long long get_total_mem(void) |
519 | { | 492 | { |
520 | unsigned long long total; | 493 | unsigned long long total; |
521 | 494 | ||
522 | total = max_low_pfn - min_low_pfn; | 495 | total = max_pfn - min_low_pfn; |
523 | #ifdef CONFIG_HIGHMEM | ||
524 | total += highend_pfn - highstart_pfn; | ||
525 | #endif | ||
526 | 496 | ||
527 | return total << PAGE_SHIFT; | 497 | return total << PAGE_SHIFT; |
528 | } | 498 | } |
@@ -542,21 +512,25 @@ static void __init reserve_crashkernel(void) | |||
542 | 512 | ||
543 | /* 0 means: find the address automatically */ | 513 | /* 0 means: find the address automatically */ |
544 | if (crash_base <= 0) { | 514 | if (crash_base <= 0) { |
545 | crash_base = find_and_reserve_crashkernel(crash_size); | 515 | const unsigned long long alignment = 16<<20; /* 16M */ |
516 | |||
517 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | ||
518 | alignment); | ||
546 | if (crash_base == -1ULL) { | 519 | if (crash_base == -1ULL) { |
547 | pr_info("crashkernel reservation failed. " | 520 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
548 | "No suitable area found.\n"); | ||
549 | return; | 521 | return; |
550 | } | 522 | } |
551 | } else { | 523 | } else { |
552 | ret = reserve_bootmem_generic(crash_base, crash_size, | 524 | unsigned long long start; |
553 | BOOTMEM_EXCLUSIVE); | 525 | |
554 | if (ret < 0) { | 526 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, |
555 | pr_info("crashkernel reservation failed - " | 527 | 1<<20); |
556 | "memory is in use\n"); | 528 | if (start != crash_base) { |
529 | pr_info("crashkernel reservation failed - memory is in use.\n"); | ||
557 | return; | 530 | return; |
558 | } | 531 | } |
559 | } | 532 | } |
533 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | ||
560 | 534 | ||
561 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 535 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
562 | "for crashkernel (System RAM: %ldMB)\n", | 536 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -699,6 +673,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
699 | 673 | ||
700 | void __init setup_arch(char **cmdline_p) | 674 | void __init setup_arch(char **cmdline_p) |
701 | { | 675 | { |
676 | int acpi = 0; | ||
677 | int k8 = 0; | ||
678 | |||
702 | #ifdef CONFIG_X86_32 | 679 | #ifdef CONFIG_X86_32 |
703 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 680 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
704 | visws_early_detect(); | 681 | visws_early_detect(); |
@@ -791,21 +768,18 @@ void __init setup_arch(char **cmdline_p) | |||
791 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 768 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
792 | *cmdline_p = command_line; | 769 | *cmdline_p = command_line; |
793 | 770 | ||
794 | #ifdef CONFIG_X86_64 | ||
795 | /* | 771 | /* |
796 | * Must call this twice: Once just to detect whether hardware doesn't | 772 | * x86_configure_nx() is called before parse_early_param() to detect |
797 | * support NX (so that the early EHCI debug console setup can safely | 773 | * whether hardware doesn't support NX (so that the early EHCI debug |
798 | * call set_fixmap(), and then again after parsing early parameters to | 774 | * console setup can safely call set_fixmap()). It may then be called |
799 | * honor the respective command line option. | 775 | * again from within noexec_setup() during parsing early parameters |
776 | * to honor the respective command line option. | ||
800 | */ | 777 | */ |
801 | check_efer(); | 778 | x86_configure_nx(); |
802 | #endif | ||
803 | 779 | ||
804 | parse_early_param(); | 780 | parse_early_param(); |
805 | 781 | ||
806 | #ifdef CONFIG_X86_64 | 782 | x86_report_nx(); |
807 | check_efer(); | ||
808 | #endif | ||
809 | 783 | ||
810 | /* Must be before kernel pagetables are setup */ | 784 | /* Must be before kernel pagetables are setup */ |
811 | vmi_activate(); | 785 | vmi_activate(); |
@@ -901,6 +875,13 @@ void __init setup_arch(char **cmdline_p) | |||
901 | 875 | ||
902 | reserve_brk(); | 876 | reserve_brk(); |
903 | 877 | ||
878 | #ifdef CONFIG_ACPI_SLEEP | ||
879 | /* | ||
880 | * Reserve low memory region for sleep support. | ||
881 | * even before init_memory_mapping | ||
882 | */ | ||
883 | acpi_reserve_wakeup_memory(); | ||
884 | #endif | ||
904 | init_gbpages(); | 885 | init_gbpages(); |
905 | 886 | ||
906 | /* max_pfn_mapped is updated here */ | 887 | /* max_pfn_mapped is updated here */ |
@@ -927,6 +908,8 @@ void __init setup_arch(char **cmdline_p) | |||
927 | 908 | ||
928 | reserve_initrd(); | 909 | reserve_initrd(); |
929 | 910 | ||
911 | reserve_crashkernel(); | ||
912 | |||
930 | vsmp_init(); | 913 | vsmp_init(); |
931 | 914 | ||
932 | io_delay_init(); | 915 | io_delay_init(); |
@@ -938,27 +921,24 @@ void __init setup_arch(char **cmdline_p) | |||
938 | 921 | ||
939 | early_acpi_boot_init(); | 922 | early_acpi_boot_init(); |
940 | 923 | ||
924 | /* | ||
925 | * Find and reserve possible boot-time SMP configuration: | ||
926 | */ | ||
927 | find_smp_config(); | ||
928 | |||
941 | #ifdef CONFIG_ACPI_NUMA | 929 | #ifdef CONFIG_ACPI_NUMA |
942 | /* | 930 | /* |
943 | * Parse SRAT to discover nodes. | 931 | * Parse SRAT to discover nodes. |
944 | */ | 932 | */ |
945 | acpi_numa_init(); | 933 | acpi = acpi_numa_init(); |
946 | #endif | 934 | #endif |
947 | 935 | ||
948 | initmem_init(0, max_pfn); | 936 | #ifdef CONFIG_K8_NUMA |
949 | 937 | if (!acpi) | |
950 | #ifdef CONFIG_ACPI_SLEEP | 938 | k8 = !k8_numa_init(0, max_pfn); |
951 | /* | ||
952 | * Reserve low memory region for sleep support. | ||
953 | */ | ||
954 | acpi_reserve_bootmem(); | ||
955 | #endif | 939 | #endif |
956 | /* | ||
957 | * Find and reserve possible boot-time SMP configuration: | ||
958 | */ | ||
959 | find_smp_config(); | ||
960 | 940 | ||
961 | reserve_crashkernel(); | 941 | initmem_init(0, max_pfn, acpi, k8); |
962 | 942 | ||
963 | #ifdef CONFIG_X86_64 | 943 | #ifdef CONFIG_X86_64 |
964 | /* | 944 | /* |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 1740c85e24bb..364d015efebc 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade) | |||
817 | */ | 817 | */ |
818 | apicid = blade_to_first_apicid(blade); | 818 | apicid = blade_to_first_apicid(blade); |
819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | 819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); |
820 | if ((pa & 0xff) != UV_BAU_MESSAGE) { | 820 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 821 | ((apicid << 32) | UV_BAU_MESSAGE)); |
823 | } | ||
824 | return 0; | 822 | return 0; |
825 | } | 823 | } |
826 | 824 | ||
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index abda6f53e71e..34a279a7471d 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
197 | apic_version[m->apicid] = ver; | 197 | apic_version[m->apicid] = ver; |
198 | } | 198 | } |
199 | 199 | ||
200 | static void __init visws_find_smp_config(unsigned int reserve) | 200 | static void __init visws_find_smp_config(void) |
201 | { | 201 | { |
202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3c68fe2d46cf..f3f2104408d9 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64) | |||
41 | jiffies_64 = jiffies; | 41 | jiffies_64 = jiffies; |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
45 | /* | ||
46 | * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA | ||
47 | * we retain large page mappings for boundaries spanning kernel text, rodata | ||
48 | * and data sections. | ||
49 | * | ||
50 | * However, kernel identity mappings will have different RWX permissions | ||
51 | * to the pages mapping to text and to the pages padding (which are freed) the | ||
52 | * text section. Hence kernel identity mappings will be broken to smaller | ||
53 | * pages. For 64-bit, kernel text and kernel identity mappings are different, | ||
54 | * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, | ||
55 | * as well as retain 2MB large page mappings for kernel text. | ||
56 | */ | ||
57 | #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); | ||
58 | |||
59 | #define X64_ALIGN_DEBUG_RODATA_END \ | ||
60 | . = ALIGN(HPAGE_SIZE); \ | ||
61 | __end_rodata_hpage_align = .; | ||
62 | |||
63 | #else | ||
64 | |||
65 | #define X64_ALIGN_DEBUG_RODATA_BEGIN | ||
66 | #define X64_ALIGN_DEBUG_RODATA_END | ||
67 | |||
68 | #endif | ||
69 | |||
44 | PHDRS { | 70 | PHDRS { |
45 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
46 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(7); /* RWE */ |
@@ -90,7 +116,9 @@ SECTIONS | |||
90 | 116 | ||
91 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
92 | 118 | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | ||
93 | RO_DATA(PAGE_SIZE) | 120 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | ||
94 | 122 | ||
95 | /* Data */ | 123 | /* Data */ |
96 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 124 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -107,13 +135,13 @@ SECTIONS | |||
107 | 135 | ||
108 | PAGE_ALIGNED_DATA(PAGE_SIZE) | 136 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
109 | 137 | ||
110 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) | 138 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
111 | 139 | ||
112 | DATA_DATA | 140 | DATA_DATA |
113 | CONSTRUCTORS | 141 | CONSTRUCTORS |
114 | 142 | ||
115 | /* rarely changed data like cpu maps */ | 143 | /* rarely changed data like cpu maps */ |
116 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) | 144 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
117 | 145 | ||
118 | /* End of data section */ | 146 | /* End of data section */ |
119 | _edata = .; | 147 | _edata = .; |
@@ -137,12 +165,12 @@ SECTIONS | |||
137 | *(.vsyscall_0) | 165 | *(.vsyscall_0) |
138 | } :user | 166 | } :user |
139 | 167 | ||
140 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 168 | . = ALIGN(L1_CACHE_BYTES); |
141 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | 169 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { |
142 | *(.vsyscall_fn) | 170 | *(.vsyscall_fn) |
143 | } | 171 | } |
144 | 172 | ||
145 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 173 | . = ALIGN(L1_CACHE_BYTES); |
146 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | 174 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { |
147 | *(.vsyscall_gtod_data) | 175 | *(.vsyscall_gtod_data) |
148 | } | 176 | } |
@@ -166,7 +194,7 @@ SECTIONS | |||
166 | } | 194 | } |
167 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 195 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
168 | 196 | ||
169 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 197 | . = ALIGN(L1_CACHE_BYTES); |
170 | .jiffies : AT(VLOAD(.jiffies)) { | 198 | .jiffies : AT(VLOAD(.jiffies)) { |
171 | *(.jiffies) | 199 | *(.jiffies) |
172 | } | 200 | } |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index d11c5ff7c65e..ccd179dec36e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/time.h> | 14 | #include <asm/time.h> |
15 | #include <asm/irq.h> | 15 | #include <asm/irq.h> |
16 | #include <asm/pat.h> | ||
16 | #include <asm/tsc.h> | 17 | #include <asm/tsc.h> |
17 | #include <asm/iommu.h> | 18 | #include <asm/iommu.h> |
18 | 19 | ||
@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = { | |||
80 | .get_wallclock = mach_get_cmos_time, | 81 | .get_wallclock = mach_get_cmos_time, |
81 | .set_wallclock = mach_set_rtc_mmss, | 82 | .set_wallclock = mach_set_rtc_mmss, |
82 | .iommu_shutdown = iommu_shutdown_noop, | 83 | .iommu_shutdown = iommu_shutdown_noop, |
84 | .is_untracked_pat_range = is_ISA_range, | ||
83 | }; | 85 | }; |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 73ffd5536f62..d406c5239019 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -146,10 +146,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
146 | use_gbpages = direct_gbpages; | 146 | use_gbpages = direct_gbpages; |
147 | #endif | 147 | #endif |
148 | 148 | ||
149 | set_nx(); | ||
150 | if (nx_enabled) | ||
151 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | ||
152 | |||
153 | /* Enable PSE if available */ | 149 | /* Enable PSE if available */ |
154 | if (cpu_has_pse) | 150 | if (cpu_has_pse) |
155 | set_in_cr4(X86_CR4_PSE); | 151 | set_in_cr4(X86_CR4_PSE); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 30938c1d8d5d..c973f8e2a6cf 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -412,7 +412,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
412 | pkmap_page_table = pte; | 412 | pkmap_page_table = pte; |
413 | } | 413 | } |
414 | 414 | ||
415 | static void __init add_one_highpage_init(struct page *page, int pfn) | 415 | static void __init add_one_highpage_init(struct page *page) |
416 | { | 416 | { |
417 | ClearPageReserved(page); | 417 | ClearPageReserved(page); |
418 | init_page_count(page); | 418 | init_page_count(page); |
@@ -445,7 +445,7 @@ static int __init add_highpages_work_fn(unsigned long start_pfn, | |||
445 | if (!pfn_valid(node_pfn)) | 445 | if (!pfn_valid(node_pfn)) |
446 | continue; | 446 | continue; |
447 | page = pfn_to_page(node_pfn); | 447 | page = pfn_to_page(node_pfn); |
448 | add_one_highpage_init(page, node_pfn); | 448 | add_one_highpage_init(page); |
449 | } | 449 | } |
450 | 450 | ||
451 | return 0; | 451 | return 0; |
@@ -703,8 +703,8 @@ void __init find_low_pfn_range(void) | |||
703 | } | 703 | } |
704 | 704 | ||
705 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 705 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
706 | void __init initmem_init(unsigned long start_pfn, | 706 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
707 | unsigned long end_pfn) | 707 | int acpi, int k8) |
708 | { | 708 | { |
709 | #ifdef CONFIG_HIGHMEM | 709 | #ifdef CONFIG_HIGHMEM |
710 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
@@ -997,7 +997,7 @@ static noinline int do_test_wp_bit(void) | |||
997 | const int rodata_test_data = 0xC3; | 997 | const int rodata_test_data = 0xC3; |
998 | EXPORT_SYMBOL_GPL(rodata_test_data); | 998 | EXPORT_SYMBOL_GPL(rodata_test_data); |
999 | 999 | ||
1000 | static int kernel_set_to_readonly; | 1000 | int kernel_set_to_readonly __read_mostly; |
1001 | 1001 | ||
1002 | void set_kernel_text_rw(void) | 1002 | void set_kernel_text_rw(void) |
1003 | { | 1003 | { |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5a4398a6006b..5198b9bb34ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -568,7 +568,8 @@ kernel_physical_mapping_init(unsigned long start, | |||
568 | } | 568 | } |
569 | 569 | ||
570 | #ifndef CONFIG_NUMA | 570 | #ifndef CONFIG_NUMA |
571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 571 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
572 | int acpi, int k8) | ||
572 | { | 573 | { |
573 | unsigned long bootmap_size, bootmap; | 574 | unsigned long bootmap_size, bootmap; |
574 | 575 | ||
@@ -694,12 +695,12 @@ void __init mem_init(void) | |||
694 | const int rodata_test_data = 0xC3; | 695 | const int rodata_test_data = 0xC3; |
695 | EXPORT_SYMBOL_GPL(rodata_test_data); | 696 | EXPORT_SYMBOL_GPL(rodata_test_data); |
696 | 697 | ||
697 | static int kernel_set_to_readonly; | 698 | int kernel_set_to_readonly; |
698 | 699 | ||
699 | void set_kernel_text_rw(void) | 700 | void set_kernel_text_rw(void) |
700 | { | 701 | { |
701 | unsigned long start = PFN_ALIGN(_stext); | 702 | unsigned long start = PFN_ALIGN(_text); |
702 | unsigned long end = PFN_ALIGN(__start_rodata); | 703 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
703 | 704 | ||
704 | if (!kernel_set_to_readonly) | 705 | if (!kernel_set_to_readonly) |
705 | return; | 706 | return; |
@@ -707,13 +708,18 @@ void set_kernel_text_rw(void) | |||
707 | pr_debug("Set kernel text: %lx - %lx for read write\n", | 708 | pr_debug("Set kernel text: %lx - %lx for read write\n", |
708 | start, end); | 709 | start, end); |
709 | 710 | ||
711 | /* | ||
712 | * Make the kernel identity mapping for text RW. Kernel text | ||
713 | * mapping will always be RO. Refer to the comment in | ||
714 | * static_protections() in pageattr.c | ||
715 | */ | ||
710 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); | 716 | set_memory_rw(start, (end - start) >> PAGE_SHIFT); |
711 | } | 717 | } |
712 | 718 | ||
713 | void set_kernel_text_ro(void) | 719 | void set_kernel_text_ro(void) |
714 | { | 720 | { |
715 | unsigned long start = PFN_ALIGN(_stext); | 721 | unsigned long start = PFN_ALIGN(_text); |
716 | unsigned long end = PFN_ALIGN(__start_rodata); | 722 | unsigned long end = PFN_ALIGN(__stop___ex_table); |
717 | 723 | ||
718 | if (!kernel_set_to_readonly) | 724 | if (!kernel_set_to_readonly) |
719 | return; | 725 | return; |
@@ -721,14 +727,21 @@ void set_kernel_text_ro(void) | |||
721 | pr_debug("Set kernel text: %lx - %lx for read only\n", | 727 | pr_debug("Set kernel text: %lx - %lx for read only\n", |
722 | start, end); | 728 | start, end); |
723 | 729 | ||
730 | /* | ||
731 | * Set the kernel identity mapping for text RO. | ||
732 | */ | ||
724 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); | 733 | set_memory_ro(start, (end - start) >> PAGE_SHIFT); |
725 | } | 734 | } |
726 | 735 | ||
727 | void mark_rodata_ro(void) | 736 | void mark_rodata_ro(void) |
728 | { | 737 | { |
729 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 738 | unsigned long start = PFN_ALIGN(_text); |
730 | unsigned long rodata_start = | 739 | unsigned long rodata_start = |
731 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 740 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; |
741 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | ||
742 | unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); | ||
743 | unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); | ||
744 | unsigned long data_start = (unsigned long) &_sdata; | ||
732 | 745 | ||
733 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 746 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
734 | (end - start) >> 10); | 747 | (end - start) >> 10); |
@@ -751,6 +764,14 @@ void mark_rodata_ro(void) | |||
751 | printk(KERN_INFO "Testing CPA: again\n"); | 764 | printk(KERN_INFO "Testing CPA: again\n"); |
752 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); | 765 | set_memory_ro(start, (end-start) >> PAGE_SHIFT); |
753 | #endif | 766 | #endif |
767 | |||
768 | free_init_pages("unused kernel memory", | ||
769 | (unsigned long) page_address(virt_to_page(text_end)), | ||
770 | (unsigned long) | ||
771 | page_address(virt_to_page(rodata_start))); | ||
772 | free_init_pages("unused kernel memory", | ||
773 | (unsigned long) page_address(virt_to_page(rodata_end)), | ||
774 | (unsigned long) page_address(virt_to_page(data_start))); | ||
754 | } | 775 | } |
755 | 776 | ||
756 | #endif | 777 | #endif |
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 268f8255280f..970ed579d4e4 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/k8.h> | 25 | #include <asm/k8.h> |
26 | 26 | ||
27 | static struct bootnode __initdata nodes[8]; | ||
28 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
29 | |||
27 | static __init int find_northbridge(void) | 30 | static __init int find_northbridge(void) |
28 | { | 31 | { |
29 | int num; | 32 | int num; |
@@ -54,18 +57,6 @@ static __init void early_get_boot_cpu_id(void) | |||
54 | * need to get boot_cpu_id so can use that to create apicid_to_node | 57 | * need to get boot_cpu_id so can use that to create apicid_to_node |
55 | * in k8_scan_nodes() | 58 | * in k8_scan_nodes() |
56 | */ | 59 | */ |
57 | /* | ||
58 | * Find possible boot-time SMP configuration: | ||
59 | */ | ||
60 | #ifdef CONFIG_X86_MPPARSE | ||
61 | early_find_smp_config(); | ||
62 | #endif | ||
63 | #ifdef CONFIG_ACPI | ||
64 | /* | ||
65 | * Read APIC information from ACPI tables. | ||
66 | */ | ||
67 | early_acpi_boot_init(); | ||
68 | #endif | ||
69 | #ifdef CONFIG_X86_MPPARSE | 60 | #ifdef CONFIG_X86_MPPARSE |
70 | /* | 61 | /* |
71 | * get boot-time SMP configuration: | 62 | * get boot-time SMP configuration: |
@@ -76,12 +67,26 @@ static __init void early_get_boot_cpu_id(void) | |||
76 | early_init_lapic_mapping(); | 67 | early_init_lapic_mapping(); |
77 | } | 68 | } |
78 | 69 | ||
79 | int __init k8_scan_nodes(unsigned long start, unsigned long end) | 70 | int __init k8_get_nodes(struct bootnode *physnodes) |
80 | { | 71 | { |
81 | unsigned numnodes, cores, bits, apicid_base; | 72 | int i; |
73 | int ret = 0; | ||
74 | |||
75 | for_each_node_mask(i, nodes_parsed) { | ||
76 | physnodes[ret].start = nodes[i].start; | ||
77 | physnodes[ret].end = nodes[i].end; | ||
78 | ret++; | ||
79 | } | ||
80 | return ret; | ||
81 | } | ||
82 | |||
83 | int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) | ||
84 | { | ||
85 | unsigned long start = PFN_PHYS(start_pfn); | ||
86 | unsigned long end = PFN_PHYS(end_pfn); | ||
87 | unsigned numnodes; | ||
82 | unsigned long prevbase; | 88 | unsigned long prevbase; |
83 | struct bootnode nodes[8]; | 89 | int i, nb, found = 0; |
84 | int i, j, nb, found = 0; | ||
85 | u32 nodeid, reg; | 90 | u32 nodeid, reg; |
86 | 91 | ||
87 | if (!early_pci_allowed()) | 92 | if (!early_pci_allowed()) |
@@ -91,16 +96,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
91 | if (nb < 0) | 96 | if (nb < 0) |
92 | return nb; | 97 | return nb; |
93 | 98 | ||
94 | printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb); | 99 | pr_info("Scanning NUMA topology in Northbridge %d\n", nb); |
95 | 100 | ||
96 | reg = read_pci_config(0, nb, 0, 0x60); | 101 | reg = read_pci_config(0, nb, 0, 0x60); |
97 | numnodes = ((reg >> 4) & 0xF) + 1; | 102 | numnodes = ((reg >> 4) & 0xF) + 1; |
98 | if (numnodes <= 1) | 103 | if (numnodes <= 1) |
99 | return -1; | 104 | return -1; |
100 | 105 | ||
101 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 106 | pr_info("Number of physical nodes %d\n", numnodes); |
102 | 107 | ||
103 | memset(&nodes, 0, sizeof(nodes)); | ||
104 | prevbase = 0; | 108 | prevbase = 0; |
105 | for (i = 0; i < 8; i++) { | 109 | for (i = 0; i < 8; i++) { |
106 | unsigned long base, limit; | 110 | unsigned long base, limit; |
@@ -111,28 +115,28 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
111 | nodeid = limit & 7; | 115 | nodeid = limit & 7; |
112 | if ((base & 3) == 0) { | 116 | if ((base & 3) == 0) { |
113 | if (i < numnodes) | 117 | if (i < numnodes) |
114 | printk("Skipping disabled node %d\n", i); | 118 | pr_info("Skipping disabled node %d\n", i); |
115 | continue; | 119 | continue; |
116 | } | 120 | } |
117 | if (nodeid >= numnodes) { | 121 | if (nodeid >= numnodes) { |
118 | printk("Ignoring excess node %d (%lx:%lx)\n", nodeid, | 122 | pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid, |
119 | base, limit); | 123 | base, limit); |
120 | continue; | 124 | continue; |
121 | } | 125 | } |
122 | 126 | ||
123 | if (!limit) { | 127 | if (!limit) { |
124 | printk(KERN_INFO "Skipping node entry %d (base %lx)\n", | 128 | pr_info("Skipping node entry %d (base %lx)\n", |
125 | i, base); | 129 | i, base); |
126 | continue; | 130 | continue; |
127 | } | 131 | } |
128 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 132 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
129 | printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n", | 133 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
130 | nodeid, (base>>8)&3, (limit>>8) & 3); | 134 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
131 | return -1; | 135 | return -1; |
132 | } | 136 | } |
133 | if (node_isset(nodeid, node_possible_map)) { | 137 | if (node_isset(nodeid, nodes_parsed)) { |
134 | printk(KERN_INFO "Node %d already present. Skipping\n", | 138 | pr_info("Node %d already present, skipping\n", |
135 | nodeid); | 139 | nodeid); |
136 | continue; | 140 | continue; |
137 | } | 141 | } |
138 | 142 | ||
@@ -141,8 +145,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
141 | limit |= (1<<24)-1; | 145 | limit |= (1<<24)-1; |
142 | limit++; | 146 | limit++; |
143 | 147 | ||
144 | if (limit > max_pfn << PAGE_SHIFT) | 148 | if (limit > end) |
145 | limit = max_pfn << PAGE_SHIFT; | 149 | limit = end; |
146 | if (limit <= base) | 150 | if (limit <= base) |
147 | continue; | 151 | continue; |
148 | 152 | ||
@@ -154,24 +158,24 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
154 | if (limit > end) | 158 | if (limit > end) |
155 | limit = end; | 159 | limit = end; |
156 | if (limit == base) { | 160 | if (limit == base) { |
157 | printk(KERN_ERR "Empty node %d\n", nodeid); | 161 | pr_err("Empty node %d\n", nodeid); |
158 | continue; | 162 | continue; |
159 | } | 163 | } |
160 | if (limit < base) { | 164 | if (limit < base) { |
161 | printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n", | 165 | pr_err("Node %d bogus settings %lx-%lx.\n", |
162 | nodeid, base, limit); | 166 | nodeid, base, limit); |
163 | continue; | 167 | continue; |
164 | } | 168 | } |
165 | 169 | ||
166 | /* Could sort here, but pun for now. Should not happen anyroads. */ | 170 | /* Could sort here, but pun for now. Should not happen anyroads. */ |
167 | if (prevbase > base) { | 171 | if (prevbase > base) { |
168 | printk(KERN_ERR "Node map not sorted %lx,%lx\n", | 172 | pr_err("Node map not sorted %lx,%lx\n", |
169 | prevbase, base); | 173 | prevbase, base); |
170 | return -1; | 174 | return -1; |
171 | } | 175 | } |
172 | 176 | ||
173 | printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n", | 177 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
174 | nodeid, base, limit); | 178 | nodeid, base, limit); |
175 | 179 | ||
176 | found++; | 180 | found++; |
177 | 181 | ||
@@ -180,18 +184,29 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
180 | 184 | ||
181 | prevbase = base; | 185 | prevbase = base; |
182 | 186 | ||
183 | node_set(nodeid, node_possible_map); | 187 | node_set(nodeid, nodes_parsed); |
184 | } | 188 | } |
185 | 189 | ||
186 | if (!found) | 190 | if (!found) |
187 | return -1; | 191 | return -1; |
192 | return 0; | ||
193 | } | ||
194 | |||
195 | int __init k8_scan_nodes(void) | ||
196 | { | ||
197 | unsigned int bits; | ||
198 | unsigned int cores; | ||
199 | unsigned int apicid_base; | ||
200 | int i; | ||
188 | 201 | ||
202 | BUG_ON(nodes_empty(nodes_parsed)); | ||
203 | node_possible_map = nodes_parsed; | ||
189 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | 204 | memnode_shift = compute_hash_shift(nodes, 8, NULL); |
190 | if (memnode_shift < 0) { | 205 | if (memnode_shift < 0) { |
191 | printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); | 206 | pr_err("No NUMA node hash function found. Contact maintainer\n"); |
192 | return -1; | 207 | return -1; |
193 | } | 208 | } |
194 | printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); | 209 | pr_info("Using node hash shift of %d\n", memnode_shift); |
195 | 210 | ||
196 | /* use the coreid bits from early_identify_cpu */ | 211 | /* use the coreid bits from early_identify_cpu */ |
197 | bits = boot_cpu_data.x86_coreid_bits; | 212 | bits = boot_cpu_data.x86_coreid_bits; |
@@ -200,14 +215,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
200 | /* need to get boot_cpu_id early for system with apicid lifting */ | 215 | /* need to get boot_cpu_id early for system with apicid lifting */ |
201 | early_get_boot_cpu_id(); | 216 | early_get_boot_cpu_id(); |
202 | if (boot_cpu_physical_apicid > 0) { | 217 | if (boot_cpu_physical_apicid > 0) { |
203 | printk(KERN_INFO "BSP APIC ID: %02x\n", | 218 | pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); |
204 | boot_cpu_physical_apicid); | ||
205 | apicid_base = boot_cpu_physical_apicid; | 219 | apicid_base = boot_cpu_physical_apicid; |
206 | } | 220 | } |
207 | 221 | ||
208 | for (i = 0; i < 8; i++) { | 222 | for_each_node_mask(i, node_possible_map) { |
209 | if (nodes[i].start == nodes[i].end) | 223 | int j; |
210 | continue; | ||
211 | 224 | ||
212 | e820_register_active_regions(i, | 225 | e820_register_active_regions(i, |
213 | nodes[i].start >> PAGE_SHIFT, | 226 | nodes[i].start >> PAGE_SHIFT, |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index d2530062fe00..b20760ca7244 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -347,8 +347,8 @@ static void init_remap_allocator(int nid) | |||
347 | (ulong) node_remap_end_vaddr[nid]); | 347 | (ulong) node_remap_end_vaddr[nid]); |
348 | } | 348 | } |
349 | 349 | ||
350 | void __init initmem_init(unsigned long start_pfn, | 350 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
351 | unsigned long end_pfn) | 351 | int acpi, int k8) |
352 | { | 352 | { |
353 | int nid; | 353 | int nid; |
354 | long kva_target_pfn; | 354 | long kva_target_pfn; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 459913beac71..83bbc70d11bb 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -239,8 +239,14 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 239 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 241 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) | 242 | if (nodedata_phys < start || nodedata_phys >= end) { |
243 | free_bootmem(nodedata_phys, pgdat_size); | 243 | /* |
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
244 | node_data[nodeid] = NULL; | 250 | node_data[nodeid] = NULL; |
245 | return; | 251 | return; |
246 | } | 252 | } |
@@ -306,8 +312,71 @@ void __init numa_init_array(void) | |||
306 | 312 | ||
307 | #ifdef CONFIG_NUMA_EMU | 313 | #ifdef CONFIG_NUMA_EMU |
308 | /* Numa emulation */ | 314 | /* Numa emulation */ |
315 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
316 | static struct bootnode physnodes[MAX_NUMNODES] __initdata; | ||
309 | static char *cmdline __initdata; | 317 | static char *cmdline __initdata; |
310 | 318 | ||
319 | static int __init setup_physnodes(unsigned long start, unsigned long end, | ||
320 | int acpi, int k8) | ||
321 | { | ||
322 | int nr_nodes = 0; | ||
323 | int ret = 0; | ||
324 | int i; | ||
325 | |||
326 | #ifdef CONFIG_ACPI_NUMA | ||
327 | if (acpi) | ||
328 | nr_nodes = acpi_get_nodes(physnodes); | ||
329 | #endif | ||
330 | #ifdef CONFIG_K8_NUMA | ||
331 | if (k8) | ||
332 | nr_nodes = k8_get_nodes(physnodes); | ||
333 | #endif | ||
334 | /* | ||
335 | * Basic sanity checking on the physical node map: there may be errors | ||
336 | * if the SRAT or K8 incorrectly reported the topology or the mem= | ||
337 | * kernel parameter is used. | ||
338 | */ | ||
339 | for (i = 0; i < nr_nodes; i++) { | ||
340 | if (physnodes[i].start == physnodes[i].end) | ||
341 | continue; | ||
342 | if (physnodes[i].start > end) { | ||
343 | physnodes[i].end = physnodes[i].start; | ||
344 | continue; | ||
345 | } | ||
346 | if (physnodes[i].end < start) { | ||
347 | physnodes[i].start = physnodes[i].end; | ||
348 | continue; | ||
349 | } | ||
350 | if (physnodes[i].start < start) | ||
351 | physnodes[i].start = start; | ||
352 | if (physnodes[i].end > end) | ||
353 | physnodes[i].end = end; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Remove all nodes that have no memory or were truncated because of the | ||
358 | * limited address range. | ||
359 | */ | ||
360 | for (i = 0; i < nr_nodes; i++) { | ||
361 | if (physnodes[i].start == physnodes[i].end) | ||
362 | continue; | ||
363 | physnodes[ret].start = physnodes[i].start; | ||
364 | physnodes[ret].end = physnodes[i].end; | ||
365 | ret++; | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * If no physical topology was detected, a single node is faked to cover | ||
370 | * the entire address space. | ||
371 | */ | ||
372 | if (!ret) { | ||
373 | physnodes[ret].start = start; | ||
374 | physnodes[ret].end = end; | ||
375 | ret = 1; | ||
376 | } | ||
377 | return ret; | ||
378 | } | ||
379 | |||
311 | /* | 380 | /* |
312 | * Setups up nid to range from addr to addr + size. If the end | 381 | * Setups up nid to range from addr to addr + size. If the end |
313 | * boundary is greater than max_addr, then max_addr is used instead. | 382 | * boundary is greater than max_addr, then max_addr is used instead. |
@@ -315,11 +384,9 @@ static char *cmdline __initdata; | |||
315 | * allocation past addr and -1 otherwise. addr is adjusted to be at | 384 | * allocation past addr and -1 otherwise. addr is adjusted to be at |
316 | * the end of the node. | 385 | * the end of the node. |
317 | */ | 386 | */ |
318 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | 387 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) |
319 | u64 size, u64 max_addr) | ||
320 | { | 388 | { |
321 | int ret = 0; | 389 | int ret = 0; |
322 | |||
323 | nodes[nid].start = *addr; | 390 | nodes[nid].start = *addr; |
324 | *addr += size; | 391 | *addr += size; |
325 | if (*addr >= max_addr) { | 392 | if (*addr >= max_addr) { |
@@ -335,12 +402,111 @@ static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, | |||
335 | } | 402 | } |
336 | 403 | ||
337 | /* | 404 | /* |
405 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
406 | * to max_addr. The return value is the number of nodes allocated. | ||
407 | */ | ||
408 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, | ||
409 | int nr_phys_nodes, int nr_nodes) | ||
410 | { | ||
411 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
412 | u64 size; | ||
413 | int big; | ||
414 | int ret = 0; | ||
415 | int i; | ||
416 | |||
417 | if (nr_nodes <= 0) | ||
418 | return -1; | ||
419 | if (nr_nodes > MAX_NUMNODES) { | ||
420 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
421 | nr_nodes, MAX_NUMNODES); | ||
422 | nr_nodes = MAX_NUMNODES; | ||
423 | } | ||
424 | |||
425 | size = (max_addr - addr - e820_hole_size(addr, max_addr)) / nr_nodes; | ||
426 | /* | ||
427 | * Calculate the number of big nodes that can be allocated as a result | ||
428 | * of consolidating the remainder. | ||
429 | */ | ||
430 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / | ||
431 | FAKE_NODE_MIN_SIZE; | ||
432 | |||
433 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
434 | if (!size) { | ||
435 | pr_err("Not enough memory for each node. " | ||
436 | "NUMA emulation disabled.\n"); | ||
437 | return -1; | ||
438 | } | ||
439 | |||
440 | for (i = 0; i < nr_phys_nodes; i++) | ||
441 | if (physnodes[i].start != physnodes[i].end) | ||
442 | node_set(i, physnode_mask); | ||
443 | |||
444 | /* | ||
445 | * Continue to fill physical nodes with fake nodes until there is no | ||
446 | * memory left on any of them. | ||
447 | */ | ||
448 | while (nodes_weight(physnode_mask)) { | ||
449 | for_each_node_mask(i, physnode_mask) { | ||
450 | u64 end = physnodes[i].start + size; | ||
451 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
452 | |||
453 | if (ret < big) | ||
454 | end += FAKE_NODE_MIN_SIZE; | ||
455 | |||
456 | /* | ||
457 | * Continue to add memory to this fake node if its | ||
458 | * non-reserved memory is less than the per-node size. | ||
459 | */ | ||
460 | while (end - physnodes[i].start - | ||
461 | e820_hole_size(physnodes[i].start, end) < size) { | ||
462 | end += FAKE_NODE_MIN_SIZE; | ||
463 | if (end > physnodes[i].end) { | ||
464 | end = physnodes[i].end; | ||
465 | break; | ||
466 | } | ||
467 | } | ||
468 | |||
469 | /* | ||
470 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
471 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
472 | * this one must extend to the boundary. | ||
473 | */ | ||
474 | if (end < dma32_end && dma32_end - end - | ||
475 | e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
476 | end = dma32_end; | ||
477 | |||
478 | /* | ||
479 | * If there won't be enough non-reserved memory for the | ||
480 | * next node, this one must extend to the end of the | ||
481 | * physical node. | ||
482 | */ | ||
483 | if (physnodes[i].end - end - | ||
484 | e820_hole_size(end, physnodes[i].end) < size) | ||
485 | end = physnodes[i].end; | ||
486 | |||
487 | /* | ||
488 | * Avoid allocating more nodes than requested, which can | ||
489 | * happen as a result of rounding down each node's size | ||
490 | * to FAKE_NODE_MIN_SIZE. | ||
491 | */ | ||
492 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | ||
493 | end = physnodes[i].end; | ||
494 | |||
495 | if (setup_node_range(ret++, &physnodes[i].start, | ||
496 | end - physnodes[i].start, | ||
497 | physnodes[i].end) < 0) | ||
498 | node_clear(i, physnode_mask); | ||
499 | } | ||
500 | } | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | /* | ||
338 | * Splits num_nodes nodes up equally starting at node_start. The return value | 505 | * Splits num_nodes nodes up equally starting at node_start. The return value |
339 | * is the number of nodes split up and addr is adjusted to be at the end of the | 506 | * is the number of nodes split up and addr is adjusted to be at the end of the |
340 | * last node allocated. | 507 | * last node allocated. |
341 | */ | 508 | */ |
342 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | 509 | static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, |
343 | u64 max_addr, int node_start, | ||
344 | int num_nodes) | 510 | int num_nodes) |
345 | { | 511 | { |
346 | unsigned int big; | 512 | unsigned int big; |
@@ -388,7 +554,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
388 | break; | 554 | break; |
389 | } | 555 | } |
390 | } | 556 | } |
391 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | 557 | if (setup_node_range(i, addr, end - *addr, max_addr) < 0) |
392 | break; | 558 | break; |
393 | } | 559 | } |
394 | return i - node_start + 1; | 560 | return i - node_start + 1; |
@@ -399,12 +565,12 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | |||
399 | * always assigned to a final node and can be asymmetric. Returns the number of | 565 | * always assigned to a final node and can be asymmetric. Returns the number of |
400 | * nodes split. | 566 | * nodes split. |
401 | */ | 567 | */ |
402 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | 568 | static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, |
403 | u64 max_addr, int node_start, u64 size) | 569 | u64 size) |
404 | { | 570 | { |
405 | int i = node_start; | 571 | int i = node_start; |
406 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | 572 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; |
407 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | 573 | while (!setup_node_range(i++, addr, size, max_addr)) |
408 | ; | 574 | ; |
409 | return i - node_start; | 575 | return i - node_start; |
410 | } | 576 | } |
@@ -413,15 +579,15 @@ static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | |||
413 | * Sets up the system RAM area from start_pfn to last_pfn according to the | 579 | * Sets up the system RAM area from start_pfn to last_pfn according to the |
414 | * numa=fake command-line option. | 580 | * numa=fake command-line option. |
415 | */ | 581 | */ |
416 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 582 | static int __init numa_emulation(unsigned long start_pfn, |
417 | 583 | unsigned long last_pfn, int acpi, int k8) | |
418 | static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn) | ||
419 | { | 584 | { |
420 | u64 size, addr = start_pfn << PAGE_SHIFT; | 585 | u64 size, addr = start_pfn << PAGE_SHIFT; |
421 | u64 max_addr = last_pfn << PAGE_SHIFT; | 586 | u64 max_addr = last_pfn << PAGE_SHIFT; |
422 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; | 587 | int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i; |
588 | int num_phys_nodes; | ||
423 | 589 | ||
424 | memset(&nodes, 0, sizeof(nodes)); | 590 | num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); |
425 | /* | 591 | /* |
426 | * If the numa=fake command-line is just a single number N, split the | 592 | * If the numa=fake command-line is just a single number N, split the |
427 | * system RAM into N fake nodes. | 593 | * system RAM into N fake nodes. |
@@ -429,7 +595,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
429 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | 595 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { |
430 | long n = simple_strtol(cmdline, NULL, 0); | 596 | long n = simple_strtol(cmdline, NULL, 0); |
431 | 597 | ||
432 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, n); | 598 | num_nodes = split_nodes_interleave(addr, max_addr, |
599 | num_phys_nodes, n); | ||
433 | if (num_nodes < 0) | 600 | if (num_nodes < 0) |
434 | return num_nodes; | 601 | return num_nodes; |
435 | goto out; | 602 | goto out; |
@@ -456,8 +623,8 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long last_pfn | |||
456 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | 623 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; |
457 | if (size) | 624 | if (size) |
458 | for (i = 0; i < coeff; i++, num_nodes++) | 625 | for (i = 0; i < coeff; i++, num_nodes++) |
459 | if (setup_node_range(num_nodes, nodes, | 626 | if (setup_node_range(num_nodes, &addr, |
460 | &addr, size, max_addr) < 0) | 627 | size, max_addr) < 0) |
461 | goto done; | 628 | goto done; |
462 | if (!*cmdline) | 629 | if (!*cmdline) |
463 | break; | 630 | break; |
@@ -473,7 +640,7 @@ done: | |||
473 | if (addr < max_addr) { | 640 | if (addr < max_addr) { |
474 | if (coeff_flag && coeff < 0) { | 641 | if (coeff_flag && coeff < 0) { |
475 | /* Split remaining nodes into num-sized chunks */ | 642 | /* Split remaining nodes into num-sized chunks */ |
476 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | 643 | num_nodes += split_nodes_by_size(&addr, max_addr, |
477 | num_nodes, num); | 644 | num_nodes, num); |
478 | goto out; | 645 | goto out; |
479 | } | 646 | } |
@@ -482,7 +649,7 @@ done: | |||
482 | /* Split remaining nodes into coeff chunks */ | 649 | /* Split remaining nodes into coeff chunks */ |
483 | if (coeff <= 0) | 650 | if (coeff <= 0) |
484 | break; | 651 | break; |
485 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, | 652 | num_nodes += split_nodes_equally(&addr, max_addr, |
486 | num_nodes, coeff); | 653 | num_nodes, coeff); |
487 | break; | 654 | break; |
488 | case ',': | 655 | case ',': |
@@ -490,8 +657,8 @@ done: | |||
490 | break; | 657 | break; |
491 | default: | 658 | default: |
492 | /* Give one final node */ | 659 | /* Give one final node */ |
493 | setup_node_range(num_nodes, nodes, &addr, | 660 | setup_node_range(num_nodes, &addr, max_addr - addr, |
494 | max_addr - addr, max_addr); | 661 | max_addr); |
495 | num_nodes++; | 662 | num_nodes++; |
496 | } | 663 | } |
497 | } | 664 | } |
@@ -505,14 +672,10 @@ out: | |||
505 | } | 672 | } |
506 | 673 | ||
507 | /* | 674 | /* |
508 | * We need to vacate all active ranges that may have been registered by | 675 | * We need to vacate all active ranges that may have been registered for |
509 | * SRAT and set acpi_numa to -1 so that srat_disabled() always returns | 676 | * the e820 memory map. |
510 | * true. NUMA emulation has succeeded so we will not scan ACPI nodes. | ||
511 | */ | 677 | */ |
512 | remove_all_active_ranges(); | 678 | remove_all_active_ranges(); |
513 | #ifdef CONFIG_ACPI_NUMA | ||
514 | acpi_numa = -1; | ||
515 | #endif | ||
516 | for_each_node_mask(i, node_possible_map) { | 679 | for_each_node_mask(i, node_possible_map) { |
517 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 680 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
518 | nodes[i].end >> PAGE_SHIFT); | 681 | nodes[i].end >> PAGE_SHIFT); |
@@ -524,7 +687,8 @@ out: | |||
524 | } | 687 | } |
525 | #endif /* CONFIG_NUMA_EMU */ | 688 | #endif /* CONFIG_NUMA_EMU */ |
526 | 689 | ||
527 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | 690 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, |
691 | int acpi, int k8) | ||
528 | { | 692 | { |
529 | int i; | 693 | int i; |
530 | 694 | ||
@@ -532,23 +696,22 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
532 | nodes_clear(node_online_map); | 696 | nodes_clear(node_online_map); |
533 | 697 | ||
534 | #ifdef CONFIG_NUMA_EMU | 698 | #ifdef CONFIG_NUMA_EMU |
535 | if (cmdline && !numa_emulation(start_pfn, last_pfn)) | 699 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) |
536 | return; | 700 | return; |
537 | nodes_clear(node_possible_map); | 701 | nodes_clear(node_possible_map); |
538 | nodes_clear(node_online_map); | 702 | nodes_clear(node_online_map); |
539 | #endif | 703 | #endif |
540 | 704 | ||
541 | #ifdef CONFIG_ACPI_NUMA | 705 | #ifdef CONFIG_ACPI_NUMA |
542 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 706 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
543 | last_pfn << PAGE_SHIFT)) | 707 | last_pfn << PAGE_SHIFT)) |
544 | return; | 708 | return; |
545 | nodes_clear(node_possible_map); | 709 | nodes_clear(node_possible_map); |
546 | nodes_clear(node_online_map); | 710 | nodes_clear(node_online_map); |
547 | #endif | 711 | #endif |
548 | 712 | ||
549 | #ifdef CONFIG_K8_NUMA | 713 | #ifdef CONFIG_K8_NUMA |
550 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, | 714 | if (!numa_off && k8 && !k8_scan_nodes()) |
551 | last_pfn<<PAGE_SHIFT)) | ||
552 | return; | 715 | return; |
553 | nodes_clear(node_possible_map); | 716 | nodes_clear(node_possible_map); |
554 | nodes_clear(node_online_map); | 717 | nodes_clear(node_online_map); |
@@ -601,6 +764,25 @@ static __init int numa_setup(char *opt) | |||
601 | early_param("numa", numa_setup); | 764 | early_param("numa", numa_setup); |
602 | 765 | ||
603 | #ifdef CONFIG_NUMA | 766 | #ifdef CONFIG_NUMA |
767 | |||
768 | static __init int find_near_online_node(int node) | ||
769 | { | ||
770 | int n, val; | ||
771 | int min_val = INT_MAX; | ||
772 | int best_node = -1; | ||
773 | |||
774 | for_each_online_node(n) { | ||
775 | val = node_distance(node, n); | ||
776 | |||
777 | if (val < min_val) { | ||
778 | min_val = val; | ||
779 | best_node = n; | ||
780 | } | ||
781 | } | ||
782 | |||
783 | return best_node; | ||
784 | } | ||
785 | |||
604 | /* | 786 | /* |
605 | * Setup early cpu_to_node. | 787 | * Setup early cpu_to_node. |
606 | * | 788 | * |
@@ -632,7 +814,7 @@ void __init init_cpu_to_node(void) | |||
632 | if (node == NUMA_NO_NODE) | 814 | if (node == NUMA_NO_NODE) |
633 | continue; | 815 | continue; |
634 | if (!node_online(node)) | 816 | if (!node_online(node)) |
635 | continue; | 817 | node = find_near_online_node(node); |
636 | numa_set_node(cpu, node); | 818 | numa_set_node(cpu, node); |
637 | } | 819 | } |
638 | } | 820 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index dd38bfbefd1f..1d4eb93d333c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -279,6 +279,22 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) | 279 | __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) |
280 | pgprot_val(forbidden) |= _PAGE_RW; | 280 | pgprot_val(forbidden) |= _PAGE_RW; |
281 | 281 | ||
282 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
283 | /* | ||
284 | * Once the kernel maps the text as RO (kernel_set_to_readonly is set), | ||
285 | * kernel text mappings for the large page aligned text, rodata sections | ||
286 | * will be always read-only. For the kernel identity mappings covering | ||
287 | * the holes caused by this alignment can be anything that user asks. | ||
288 | * | ||
289 | * This will preserve the large page mappings for kernel text/data | ||
290 | * at no extra cost. | ||
291 | */ | ||
292 | if (kernel_set_to_readonly && | ||
293 | within(address, (unsigned long)_text, | ||
294 | (unsigned long)__end_rodata_hpage_align)) | ||
295 | pgprot_val(forbidden) |= _PAGE_RW; | ||
296 | #endif | ||
297 | |||
282 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | 298 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); |
283 | 299 | ||
284 | return prot; | 300 | return prot; |
@@ -1069,12 +1085,18 @@ EXPORT_SYMBOL(set_memory_array_wb); | |||
1069 | 1085 | ||
1070 | int set_memory_x(unsigned long addr, int numpages) | 1086 | int set_memory_x(unsigned long addr, int numpages) |
1071 | { | 1087 | { |
1088 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1089 | return 0; | ||
1090 | |||
1072 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1091 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1073 | } | 1092 | } |
1074 | EXPORT_SYMBOL(set_memory_x); | 1093 | EXPORT_SYMBOL(set_memory_x); |
1075 | 1094 | ||
1076 | int set_memory_nx(unsigned long addr, int numpages) | 1095 | int set_memory_nx(unsigned long addr, int numpages) |
1077 | { | 1096 | { |
1097 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1098 | return 0; | ||
1099 | |||
1078 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); | 1100 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
1079 | } | 1101 | } |
1080 | EXPORT_SYMBOL(set_memory_nx); | 1102 | EXPORT_SYMBOL(set_memory_nx); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index e78cd0ec2bcf..a81b7e73275d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
22 | #include <asm/tlbflush.h> | 22 | #include <asm/tlbflush.h> |
23 | #include <asm/x86_init.h> | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/fcntl.h> | 25 | #include <asm/fcntl.h> |
25 | #include <asm/e820.h> | 26 | #include <asm/e820.h> |
@@ -388,7 +389,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
388 | } | 389 | } |
389 | 390 | ||
390 | /* Low ISA region is always mapped WB in page table. No need to track */ | 391 | /* Low ISA region is always mapped WB in page table. No need to track */ |
391 | if (is_ISA_range(start, end - 1)) { | 392 | if (x86_platform.is_untracked_pat_range(start, end)) { |
392 | if (new_type) | 393 | if (new_type) |
393 | *new_type = _PAGE_CACHE_WB; | 394 | *new_type = _PAGE_CACHE_WB; |
394 | return 0; | 395 | return 0; |
@@ -499,7 +500,7 @@ int free_memtype(u64 start, u64 end) | |||
499 | return 0; | 500 | return 0; |
500 | 501 | ||
501 | /* Low ISA region is always mapped WB. No need to track */ | 502 | /* Low ISA region is always mapped WB. No need to track */ |
502 | if (is_ISA_range(start, end - 1)) | 503 | if (x86_platform.is_untracked_pat_range(start, end)) |
503 | return 0; | 504 | return 0; |
504 | 505 | ||
505 | is_range_ram = pat_pagerange_is_ram(start, end); | 506 | is_range_ram = pat_pagerange_is_ram(start, end); |
@@ -582,7 +583,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
582 | int rettype = _PAGE_CACHE_WB; | 583 | int rettype = _PAGE_CACHE_WB; |
583 | struct memtype *entry; | 584 | struct memtype *entry; |
584 | 585 | ||
585 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | 586 | if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE)) |
586 | return rettype; | 587 | return rettype; |
587 | 588 | ||
588 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 589 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
@@ -1018,8 +1019,10 @@ static const struct file_operations memtype_fops = { | |||
1018 | 1019 | ||
1019 | static int __init pat_memtype_list_init(void) | 1020 | static int __init pat_memtype_list_init(void) |
1020 | { | 1021 | { |
1021 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | 1022 | if (pat_enabled) { |
1022 | NULL, &memtype_fops); | 1023 | debugfs_create_file("pat_memtype_list", S_IRUSR, |
1024 | arch_debugfs_dir, NULL, &memtype_fops); | ||
1025 | } | ||
1023 | return 0; | 1026 | return 0; |
1024 | } | 1027 | } |
1025 | 1028 | ||
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c index 513d8ed5d2ec..a3250aa34086 100644 --- a/arch/x86/mm/setup_nx.c +++ b/arch/x86/mm/setup_nx.c | |||
@@ -3,10 +3,8 @@ | |||
3 | #include <linux/init.h> | 3 | #include <linux/init.h> |
4 | 4 | ||
5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
6 | #include <asm/proto.h> | ||
6 | 7 | ||
7 | int nx_enabled; | ||
8 | |||
9 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
10 | static int disable_nx __cpuinitdata; | 8 | static int disable_nx __cpuinitdata; |
11 | 9 | ||
12 | /* | 10 | /* |
@@ -22,48 +20,41 @@ static int __init noexec_setup(char *str) | |||
22 | if (!str) | 20 | if (!str) |
23 | return -EINVAL; | 21 | return -EINVAL; |
24 | if (!strncmp(str, "on", 2)) { | 22 | if (!strncmp(str, "on", 2)) { |
25 | __supported_pte_mask |= _PAGE_NX; | ||
26 | disable_nx = 0; | 23 | disable_nx = 0; |
27 | } else if (!strncmp(str, "off", 3)) { | 24 | } else if (!strncmp(str, "off", 3)) { |
28 | disable_nx = 1; | 25 | disable_nx = 1; |
29 | __supported_pte_mask &= ~_PAGE_NX; | ||
30 | } | 26 | } |
27 | x86_configure_nx(); | ||
31 | return 0; | 28 | return 0; |
32 | } | 29 | } |
33 | early_param("noexec", noexec_setup); | 30 | early_param("noexec", noexec_setup); |
34 | #endif | ||
35 | 31 | ||
36 | #ifdef CONFIG_X86_PAE | 32 | void __cpuinit x86_configure_nx(void) |
37 | void __init set_nx(void) | ||
38 | { | 33 | { |
39 | unsigned int v[4], l, h; | 34 | if (cpu_has_nx && !disable_nx) |
40 | 35 | __supported_pte_mask |= _PAGE_NX; | |
41 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | 36 | else |
42 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | 37 | __supported_pte_mask &= ~_PAGE_NX; |
38 | } | ||
43 | 39 | ||
44 | if ((v[3] & (1 << 20)) && !disable_nx) { | 40 | void __init x86_report_nx(void) |
45 | rdmsr(MSR_EFER, l, h); | 41 | { |
46 | l |= EFER_NX; | 42 | if (!cpu_has_nx) { |
47 | wrmsr(MSR_EFER, l, h); | 43 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
48 | nx_enabled = 1; | 44 | "missing in CPU or disabled in BIOS!\n"); |
49 | __supported_pte_mask |= _PAGE_NX; | 45 | } else { |
46 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
47 | if (disable_nx) { | ||
48 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
49 | "disabled by kernel command line option\n"); | ||
50 | } else { | ||
51 | printk(KERN_INFO "NX (Execute Disable) protection: " | ||
52 | "active\n"); | ||
50 | } | 53 | } |
51 | } | ||
52 | } | ||
53 | #else | 54 | #else |
54 | void set_nx(void) | 55 | /* 32bit non-PAE kernel, NX cannot be used */ |
55 | { | 56 | printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " |
56 | } | 57 | "cannot be enabled: non-PAE kernel!\n"); |
57 | #endif | 58 | #endif |
58 | 59 | } | |
59 | #ifdef CONFIG_X86_64 | ||
60 | void __cpuinit check_efer(void) | ||
61 | { | ||
62 | unsigned long efer; | ||
63 | |||
64 | rdmsrl(MSR_EFER, efer); | ||
65 | if (!(efer & EFER_NX) || disable_nx) | ||
66 | __supported_pte_mask &= ~_PAGE_NX; | ||
67 | } | 60 | } |
68 | #endif | ||
69 | |||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 9d7ce96e5a5c..d89075489664 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -290,8 +290,6 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
290 | 290 | ||
291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 291 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
292 | start, end); | 292 | start, end); |
293 | e820_register_active_regions(node, start >> PAGE_SHIFT, | ||
294 | end >> PAGE_SHIFT); | ||
295 | 293 | ||
296 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
297 | update_nodes_add(node, start, end); | 295 | update_nodes_add(node, start, end); |
@@ -338,6 +336,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
338 | 336 | ||
339 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
340 | 338 | ||
339 | int __init acpi_get_nodes(struct bootnode *physnodes) | ||
340 | { | ||
341 | int i; | ||
342 | int ret = 0; | ||
343 | |||
344 | for_each_node_mask(i, nodes_parsed) { | ||
345 | physnodes[ret].start = nodes[i].start; | ||
346 | physnodes[ret].end = nodes[i].end; | ||
347 | ret++; | ||
348 | } | ||
349 | return ret; | ||
350 | } | ||
351 | |||
341 | /* Use the information discovered above to actually set up the nodes. */ | 352 | /* Use the information discovered above to actually set up the nodes. */ |
342 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 353 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
343 | { | 354 | { |
@@ -350,11 +361,6 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
350 | for (i = 0; i < MAX_NUMNODES; i++) | 361 | for (i = 0; i < MAX_NUMNODES; i++) |
351 | cutoff_node(i, start, end); | 362 | cutoff_node(i, start, end); |
352 | 363 | ||
353 | if (!nodes_cover_memory(nodes)) { | ||
354 | bad_srat(); | ||
355 | return -1; | ||
356 | } | ||
357 | |||
358 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 364 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
359 | memblk_nodeid); | 365 | memblk_nodeid); |
360 | if (memnode_shift < 0) { | 366 | if (memnode_shift < 0) { |
@@ -364,6 +370,14 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
364 | return -1; | 370 | return -1; |
365 | } | 371 | } |
366 | 372 | ||
373 | for_each_node_mask(i, nodes_parsed) | ||
374 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
375 | nodes[i].end >> PAGE_SHIFT); | ||
376 | if (!nodes_cover_memory(nodes)) { | ||
377 | bad_srat(); | ||
378 | return -1; | ||
379 | } | ||
380 | |||
367 | /* Account for nodes with cpus and no memory */ | 381 | /* Account for nodes with cpus and no memory */ |
368 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 382 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); |
369 | 383 | ||
@@ -454,7 +468,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
454 | for (i = 0; i < num_nodes; i++) | 468 | for (i = 0; i < num_nodes; i++) |
455 | if (fake_nodes[i].start != fake_nodes[i].end) | 469 | if (fake_nodes[i].start != fake_nodes[i].end) |
456 | node_set(i, nodes_parsed); | 470 | node_set(i, nodes_parsed); |
457 | WARN_ON(!nodes_cover_memory(fake_nodes)); | ||
458 | } | 471 | } |
459 | 472 | ||
460 | static int null_slit_node_compare(int a, int b) | 473 | static int null_slit_node_compare(int a, int b) |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 36fe08eeb5c3..65b58e4b0b8b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <asm/tlbflush.h> | 9 | #include <asm/tlbflush.h> |
10 | #include <asm/mmu_context.h> | 10 | #include <asm/mmu_context.h> |
11 | #include <asm/cache.h> | ||
11 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
12 | #include <asm/uv/uv.h> | 13 | #include <asm/uv/uv.h> |
13 | 14 | ||
@@ -43,7 +44,7 @@ union smp_flush_state { | |||
43 | spinlock_t tlbstate_lock; | 44 | spinlock_t tlbstate_lock; |
44 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); | 45 | DECLARE_BITMAP(flush_cpumask, NR_CPUS); |
45 | }; | 46 | }; |
46 | char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; | 47 | char pad[INTERNODE_CACHE_BYTES]; |
47 | } ____cacheline_internodealigned_in_smp; | 48 | } ____cacheline_internodealigned_in_smp; |
48 | 49 | ||
49 | /* State is put into the per CPU data section, but padded | 50 | /* State is put into the per CPU data section, but padded |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index dfbf70e65860..c462cea8ef09 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1093,10 +1093,8 @@ asmlinkage void __init xen_start_kernel(void) | |||
1093 | 1093 | ||
1094 | __supported_pte_mask |= _PAGE_IOMAP; | 1094 | __supported_pte_mask |= _PAGE_IOMAP; |
1095 | 1095 | ||
1096 | #ifdef CONFIG_X86_64 | ||
1097 | /* Work out if we support NX */ | 1096 | /* Work out if we support NX */ |
1098 | check_efer(); | 1097 | x86_configure_nx(); |
1099 | #endif | ||
1100 | 1098 | ||
1101 | xen_setup_features(); | 1099 | xen_setup_features(); |
1102 | 1100 | ||
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 202dd0c976a3..2be2fb66204e 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c | |||
@@ -283,22 +283,24 @@ acpi_table_parse_srat(enum acpi_srat_type id, | |||
283 | 283 | ||
284 | int __init acpi_numa_init(void) | 284 | int __init acpi_numa_init(void) |
285 | { | 285 | { |
286 | int ret = 0; | ||
287 | |||
286 | /* SRAT: Static Resource Affinity Table */ | 288 | /* SRAT: Static Resource Affinity Table */ |
287 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { | 289 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { |
288 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, | 290 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, |
289 | acpi_parse_x2apic_affinity, NR_CPUS); | 291 | acpi_parse_x2apic_affinity, NR_CPUS); |
290 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, | 292 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, |
291 | acpi_parse_processor_affinity, NR_CPUS); | 293 | acpi_parse_processor_affinity, NR_CPUS); |
292 | acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, | 294 | ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, |
293 | acpi_parse_memory_affinity, | 295 | acpi_parse_memory_affinity, |
294 | NR_NODE_MEMBLKS); | 296 | NR_NODE_MEMBLKS); |
295 | } | 297 | } |
296 | 298 | ||
297 | /* SLIT: System Locality Information Table */ | 299 | /* SLIT: System Locality Information Table */ |
298 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); | 300 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); |
299 | 301 | ||
300 | acpi_numa_arch_fixup(); | 302 | acpi_numa_arch_fixup(); |
301 | return 0; | 303 | return ret; |
302 | } | 304 | } |
303 | 305 | ||
304 | int acpi_get_pxm(acpi_handle h) | 306 | int acpi_get_pxm(acpi_handle h) |