aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 16:27:33 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-08 16:27:33 -0500
commite33c01972239fee4696679ae5f7d1f340f424999 (patch)
treebd4bc3223ba572719b3a53142fdb98910450fe64 /arch/x86/kernel
parent343036cea2854acf8d4b4c930c0063223bc6b8a2 (diff)
parentccef086454d4c97e7b722e9303390207d681cb4c (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (36 commits) x86, mm: Correct the implementation of is_untracked_pat_range() x86/pat: Trivial: don't create debugfs for memtype if pat is disabled x86, mtrr: Fix sorting of mtrr after subtracting x86: Move find_smp_config() earlier and avoid bootmem usage x86, platform: Change is_untracked_pat_range() to bool; cleanup init x86: Change is_ISA_range() into an inline function x86, mm: is_untracked_pat_range() takes a normal semiclosed range x86, mm: Call is_untracked_pat_range() rather than is_ISA_range() x86: UV SGI: Don't track GRU space in PAT x86: SGI UV: Fix BAU initialization x86, numa: Use near(er) online node instead of roundrobin for NUMA x86, numa, bootmem: Only free bootmem on NUMA failure path x86: Change crash kernel to reserve via reserve_early() x86: Eliminate redundant/contradicting cache line size config options x86: When cleaning MTRRs, do not fold WP into UC x86: remove "extern" from function prototypes in <asm/proto.h> x86, mm: Report state of NX protections during boot x86, mm: Clean up and simplify NX enablement x86, pageattr: Make set_memory_(x|nx) aware of NX support x86, sleep: Always save the value of EFER ... Fix up conflicts (added both iommu_shutdown and is_untracked_pat_range) to 'struct x86_platform_ops') in arch/x86/include/asm/x86_init.h arch/x86/kernel/x86_init.c
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/sleep.c24
-rw-r--r--arch/x86/kernel/apic/numaq_32.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c19
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c51
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/head_32.S18
-rw-r--r--arch/x86/kernel/head_64.S3
-rw-r--r--arch/x86/kernel/machine_kexec_32.c6
-rw-r--r--arch/x86/kernel/mpparse.c44
-rw-r--r--arch/x86/kernel/setup.c106
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S38
-rw-r--r--arch/x86/kernel/x86_init.c2
16 files changed, 200 insertions, 147 deletions
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ca93638ba430..82e508677b91 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void)
78#ifndef CONFIG_64BIT 78#ifndef CONFIG_64BIT
79 store_gdt((struct desc_ptr *)&header->pmode_gdt); 79 store_gdt((struct desc_ptr *)&header->pmode_gdt);
80 80
81 header->pmode_efer_low = nx_enabled; 81 if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
82 if (header->pmode_efer_low & 1) { 82 &header->pmode_efer_high))
83 /* This is strange, why not save efer, always? */ 83 header->pmode_efer_low = header->pmode_efer_high = 0;
84 rdmsr(MSR_EFER, header->pmode_efer_low,
85 header->pmode_efer_high);
86 }
87#endif /* !CONFIG_64BIT */ 84#endif /* !CONFIG_64BIT */
88 85
89 header->pmode_cr0 = read_cr0(); 86 header->pmode_cr0 = read_cr0();
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void)
119 116
120 117
121/** 118/**
122 * acpi_reserve_bootmem - do _very_ early ACPI initialisation 119 * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
123 * 120 *
124 * We allocate a page from the first 1MB of memory for the wakeup 121 * We allocate a page from the first 1MB of memory for the wakeup
125 * routine for when we come back from a sleep state. The 122 * routine for when we come back from a sleep state. The
126 * runtime allocator allows specification of <16MB pages, but not 123 * runtime allocator allows specification of <16MB pages, but not
127 * <1MB pages. 124 * <1MB pages.
128 */ 125 */
129void __init acpi_reserve_bootmem(void) 126void __init acpi_reserve_wakeup_memory(void)
130{ 127{
128 unsigned long mem;
129
131 if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { 130 if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
132 printk(KERN_ERR 131 printk(KERN_ERR
133 "ACPI: Wakeup code way too big, S3 disabled.\n"); 132 "ACPI: Wakeup code way too big, S3 disabled.\n");
134 return; 133 return;
135 } 134 }
136 135
137 acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); 136 mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
138 137
139 if (!acpi_realmode) { 138 if (mem == -1L) {
140 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); 139 printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
141 return; 140 return;
142 } 141 }
143 142 acpi_realmode = (unsigned long) phys_to_virt(mem);
144 acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); 143 acpi_wakeup_address = mem;
144 reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
145} 145}
146 146
147 147
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 07cdbdcd7a92..98c4665f251c 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
264static __init void early_check_numaq(void) 264static __init void early_check_numaq(void)
265{ 265{
266 /* 266 /*
267 * Find possible boot-time SMP configuration:
268 */
269 early_find_smp_config();
270
271 /*
272 * get boot-time SMP configuration: 267 * get boot-time SMP configuration:
273 */ 268 */
274 if (smp_found_config) 269 if (smp_found_config)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 130c4b934877..b684bb303cbf 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -30,10 +30,22 @@
30#include <asm/apic.h> 30#include <asm/apic.h>
31#include <asm/ipi.h> 31#include <asm/ipi.h>
32#include <asm/smp.h> 32#include <asm/smp.h>
33#include <asm/x86_init.h>
33 34
34DEFINE_PER_CPU(int, x2apic_extra_bits); 35DEFINE_PER_CPU(int, x2apic_extra_bits);
35 36
36static enum uv_system_type uv_system_type; 37static enum uv_system_type uv_system_type;
38static u64 gru_start_paddr, gru_end_paddr;
39
40static inline bool is_GRU_range(u64 start, u64 end)
41{
42 return start >= gru_start_paddr && end <= gru_end_paddr;
43}
44
45static bool uv_is_untracked_pat_range(u64 start, u64 end)
46{
47 return is_ISA_range(start, end) || is_GRU_range(start, end);
48}
37 49
38static int early_get_nodeid(void) 50static int early_get_nodeid(void)
39{ 51{
@@ -49,6 +61,7 @@ static int early_get_nodeid(void)
49static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 61static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
50{ 62{
51 if (!strcmp(oem_id, "SGI")) { 63 if (!strcmp(oem_id, "SGI")) {
64 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
52 if (!strcmp(oem_table_id, "UVL")) 65 if (!strcmp(oem_table_id, "UVL"))
53 uv_system_type = UV_LEGACY_APIC; 66 uv_system_type = UV_LEGACY_APIC;
54 else if (!strcmp(oem_table_id, "UVX")) 67 else if (!strcmp(oem_table_id, "UVX"))
@@ -385,8 +398,12 @@ static __init void map_gru_high(int max_pnode)
385 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; 398 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
386 399
387 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 400 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
388 if (gru.s.enable) 401 if (gru.s.enable) {
389 map_high("GRU", gru.s.base, shift, max_pnode, map_wb); 402 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
403 gru_start_paddr = ((u64)gru.s.base << shift);
404 gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
405
406 }
390} 407}
391 408
392static __init void map_mmr_high(int max_pnode) 409static __init void map_mmr_high(int max_pnode)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a4ec8b647544..c1afa990a6c8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1136,7 +1136,7 @@ void __cpuinit cpu_init(void)
1136 wrmsrl(MSR_KERNEL_GS_BASE, 0); 1136 wrmsrl(MSR_KERNEL_GS_BASE, 0);
1137 barrier(); 1137 barrier();
1138 1138
1139 check_efer(); 1139 x86_configure_nx();
1140 if (cpu != 0) 1140 if (cpu != 0)
1141 enable_x2apic(); 1141 enable_x2apic();
1142 1142
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 40e1835b35e8..c900b73f9224 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -263,8 +263,12 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
263 /* Don't do the funky fallback heuristics the AMD version employs 263 /* Don't do the funky fallback heuristics the AMD version employs
264 for now. */ 264 for now. */
265 node = apicid_to_node[apicid]; 265 node = apicid_to_node[apicid];
266 if (node == NUMA_NO_NODE || !node_online(node)) 266 if (node == NUMA_NO_NODE)
267 node = first_node(node_online_map); 267 node = first_node(node_online_map);
268 else if (!node_online(node)) {
269 /* reuse the value from init_cpu_to_node() */
270 node = cpu_to_node(cpu);
271 }
268 numa_set_node(cpu, node); 272 numa_set_node(cpu, node);
269 273
270 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); 274 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 73c86db5acbe..09b1698e0466 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -170,6 +170,41 @@ static int __init cmp_range(const void *x1, const void *x2)
170 return start1 - start2; 170 return start1 - start2;
171} 171}
172 172
173static int __init clean_sort_range(struct res_range *range, int az)
174{
175 int i, j, k = az - 1, nr_range = 0;
176
177 for (i = 0; i < k; i++) {
178 if (range[i].end)
179 continue;
180 for (j = k; j > i; j--) {
181 if (range[j].end) {
182 k = j;
183 break;
184 }
185 }
186 if (j == i)
187 break;
188 range[i].start = range[k].start;
189 range[i].end = range[k].end;
190 range[k].start = 0;
191 range[k].end = 0;
192 k--;
193 }
194 /* count it */
195 for (i = 0; i < az; i++) {
196 if (!range[i].end) {
197 nr_range = i;
198 break;
199 }
200 }
201
202 /* sort them */
203 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
204
205 return nr_range;
206}
207
173#define BIOS_BUG_MSG KERN_WARNING \ 208#define BIOS_BUG_MSG KERN_WARNING \
174 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" 209 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
175 210
@@ -223,22 +258,18 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
223 subtract_range(range, extra_remove_base, 258 subtract_range(range, extra_remove_base,
224 extra_remove_base + extra_remove_size - 1); 259 extra_remove_base + extra_remove_size - 1);
225 260
226 /* get new range num */
227 nr_range = 0;
228 for (i = 0; i < RANGE_NUM; i++) {
229 if (!range[i].end)
230 continue;
231 nr_range++;
232 }
233 if (debug_print) { 261 if (debug_print) {
234 printk(KERN_DEBUG "After UC checking\n"); 262 printk(KERN_DEBUG "After UC checking\n");
235 for (i = 0; i < nr_range; i++) 263 for (i = 0; i < RANGE_NUM; i++) {
264 if (!range[i].end)
265 continue;
236 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", 266 printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
237 range[i].start, range[i].end + 1); 267 range[i].start, range[i].end + 1);
268 }
238 } 269 }
239 270
240 /* sort the ranges */ 271 /* sort the ranges */
241 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 272 nr_range = clean_sort_range(range, RANGE_NUM);
242 if (debug_print) { 273 if (debug_print) {
243 printk(KERN_DEBUG "After sorting\n"); 274 printk(KERN_DEBUG "After sorting\n");
244 for (i = 0; i < nr_range; i++) 275 for (i = 0; i < nr_range; i++)
@@ -689,8 +720,6 @@ static int __init mtrr_need_cleanup(void)
689 continue; 720 continue;
690 if (!size) 721 if (!size)
691 type = MTRR_NUM_TYPES; 722 type = MTRR_NUM_TYPES;
692 if (type == MTRR_TYPE_WRPROT)
693 type = MTRR_TYPE_UNCACHABLE;
694 num[type]++; 723 num[type]++;
695 } 724 }
696 725
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5a1b9758fd62..309689245431 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -189,9 +189,26 @@ static void wait_for_nmi(void)
189 nmi_wait_count++; 189 nmi_wait_count++;
190} 190}
191 191
192static inline int
193within(unsigned long addr, unsigned long start, unsigned long end)
194{
195 return addr >= start && addr < end;
196}
197
192static int 198static int
193do_ftrace_mod_code(unsigned long ip, void *new_code) 199do_ftrace_mod_code(unsigned long ip, void *new_code)
194{ 200{
201 /*
202 * On x86_64, kernel text mappings are mapped read-only with
203 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
204 * of the kernel text mapping to modify the kernel text.
205 *
206 * For 32bit kernels, these mappings are same and we can use
207 * kernel identity mapping to modify code.
208 */
209 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
210 ip = (unsigned long)__va(__pa(ip));
211
195 mod_code_ip = (void *)ip; 212 mod_code_ip = (void *)ip;
196 mod_code_newcode = new_code; 213 mod_code_newcode = new_code;
197 214
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 050c278481b1..7fd318bac59c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -18,6 +18,8 @@
18#include <asm/asm-offsets.h> 18#include <asm/asm-offsets.h>
19#include <asm/setup.h> 19#include <asm/setup.h>
20#include <asm/processor-flags.h> 20#include <asm/processor-flags.h>
21#include <asm/msr-index.h>
22#include <asm/cpufeature.h>
21#include <asm/percpu.h> 23#include <asm/percpu.h>
22 24
23/* Physical address */ 25/* Physical address */
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp)
297 orl %edx,%eax 299 orl %edx,%eax
298 movl %eax,%cr4 300 movl %eax,%cr4
299 301
300 btl $5, %eax # check if PAE is enabled 302 testb $X86_CR4_PAE, %al # check if PAE is enabled
301 jnc 6f 303 jz 6f
302 304
303 /* Check if extended functions are implemented */ 305 /* Check if extended functions are implemented */
304 movl $0x80000000, %eax 306 movl $0x80000000, %eax
305 cpuid 307 cpuid
306 cmpl $0x80000000, %eax 308 /* Value must be in the range 0x80000001 to 0x8000ffff */
307 jbe 6f 309 subl $0x80000001, %eax
310 cmpl $(0x8000ffff-0x80000001), %eax
311 ja 6f
308 mov $0x80000001, %eax 312 mov $0x80000001, %eax
309 cpuid 313 cpuid
310 /* Execute Disable bit supported? */ 314 /* Execute Disable bit supported? */
311 btl $20, %edx 315 btl $(X86_FEATURE_NX & 31), %edx
312 jnc 6f 316 jnc 6f
313 317
314 /* Setup EFER (Extended Feature Enable Register) */ 318 /* Setup EFER (Extended Feature Enable Register) */
315 movl $0xc0000080, %ecx 319 movl $MSR_EFER, %ecx
316 rdmsr 320 rdmsr
317 321
318 btsl $11, %eax 322 btsl $_EFER_NX, %eax
319 /* Make changes effective */ 323 /* Make changes effective */
320 wrmsr 324 wrmsr
321 325
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22db86a37643..2d8b5035371c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64)
262 .quad x86_64_start_kernel 262 .quad x86_64_start_kernel
263 ENTRY(initial_gs) 263 ENTRY(initial_gs)
264 .quad INIT_PER_CPU_VAR(irq_stack_union) 264 .quad INIT_PER_CPU_VAR(irq_stack_union)
265 __FINITDATA
266 265
267 ENTRY(stack_start) 266 ENTRY(stack_start)
268 .quad init_thread_union+THREAD_SIZE-8 267 .quad init_thread_union+THREAD_SIZE-8
269 .word 0 268 .word 0
269 __FINITDATA
270 270
271bad_address: 271bad_address:
272 jmp bad_address 272 jmp bad_address
@@ -340,6 +340,7 @@ ENTRY(name)
340 i = i + 1 ; \ 340 i = i + 1 ; \
341 .endr 341 .endr
342 342
343 .data
343 /* 344 /*
344 * This default setting generates an ident mapping at address 0x100000 345 * This default setting generates an ident mapping at address 0x100000
345 * and a mapping for the kernel that precisely maps virtual address 346 * and a mapping for the kernel that precisely maps virtual address
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c843f8406da2..a3fa43ba5d3b 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -158,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image)
158{ 158{
159 int error; 159 int error;
160 160
161 if (nx_enabled) 161 set_pages_x(image->control_code_page, 1);
162 set_pages_x(image->control_code_page, 1);
163 error = machine_kexec_alloc_page_tables(image); 162 error = machine_kexec_alloc_page_tables(image);
164 if (error) 163 if (error)
165 return error; 164 return error;
@@ -173,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image)
173 */ 172 */
174void machine_kexec_cleanup(struct kimage *image) 173void machine_kexec_cleanup(struct kimage *image)
175{ 174{
176 if (nx_enabled) 175 set_pages_nx(image->control_code_page, 1);
177 set_pages_nx(image->control_code_page, 1);
178 machine_kexec_free_page_tables(image); 176 machine_kexec_free_page_tables(image);
179} 177}
180 178
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5be95ef4ffec..35a57c963df9 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -667,36 +667,18 @@ void __init default_get_smp_config(unsigned int early)
667 */ 667 */
668} 668}
669 669
670static void __init smp_reserve_bootmem(struct mpf_intel *mpf) 670static void __init smp_reserve_memory(struct mpf_intel *mpf)
671{ 671{
672 unsigned long size = get_mpc_size(mpf->physptr); 672 unsigned long size = get_mpc_size(mpf->physptr);
673#ifdef CONFIG_X86_32
674 /*
675 * We cannot access to MPC table to compute table size yet,
676 * as only few megabytes from the bottom is mapped now.
677 * PC-9800's MPC table places on the very last of physical
678 * memory; so that simply reserving PAGE_SIZE from mpf->physptr
679 * yields BUG() in reserve_bootmem.
680 * also need to make sure physptr is below than max_low_pfn
681 * we don't need reserve the area above max_low_pfn
682 */
683 unsigned long end = max_low_pfn * PAGE_SIZE;
684 673
685 if (mpf->physptr < end) { 674 reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
686 if (mpf->physptr + size > end)
687 size = end - mpf->physptr;
688 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
689 }
690#else
691 reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
692#endif
693} 675}
694 676
695static int __init smp_scan_config(unsigned long base, unsigned long length, 677static int __init smp_scan_config(unsigned long base, unsigned long length)
696 unsigned reserve)
697{ 678{
698 unsigned int *bp = phys_to_virt(base); 679 unsigned int *bp = phys_to_virt(base);
699 struct mpf_intel *mpf; 680 struct mpf_intel *mpf;
681 unsigned long mem;
700 682
701 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 683 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
702 bp, length); 684 bp, length);
@@ -717,12 +699,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
717 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", 699 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
718 mpf, (u64)virt_to_phys(mpf)); 700 mpf, (u64)virt_to_phys(mpf));
719 701
720 if (!reserve) 702 mem = virt_to_phys(mpf);
721 return 1; 703 reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
722 reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
723 BOOTMEM_DEFAULT);
724 if (mpf->physptr) 704 if (mpf->physptr)
725 smp_reserve_bootmem(mpf); 705 smp_reserve_memory(mpf);
726 706
727 return 1; 707 return 1;
728 } 708 }
@@ -732,7 +712,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
732 return 0; 712 return 0;
733} 713}
734 714
735void __init default_find_smp_config(unsigned int reserve) 715void __init default_find_smp_config(void)
736{ 716{
737 unsigned int address; 717 unsigned int address;
738 718
@@ -744,9 +724,9 @@ void __init default_find_smp_config(unsigned int reserve)
744 * 2) Scan the top 1K of base RAM 724 * 2) Scan the top 1K of base RAM
745 * 3) Scan the 64K of bios 725 * 3) Scan the 64K of bios
746 */ 726 */
747 if (smp_scan_config(0x0, 0x400, reserve) || 727 if (smp_scan_config(0x0, 0x400) ||
748 smp_scan_config(639 * 0x400, 0x400, reserve) || 728 smp_scan_config(639 * 0x400, 0x400) ||
749 smp_scan_config(0xF0000, 0x10000, reserve)) 729 smp_scan_config(0xF0000, 0x10000))
750 return; 730 return;
751 /* 731 /*
752 * If it is an SMP machine we should know now, unless the 732 * If it is an SMP machine we should know now, unless the
@@ -767,7 +747,7 @@ void __init default_find_smp_config(unsigned int reserve)
767 747
768 address = get_bios_ebda(); 748 address = get_bios_ebda();
769 if (address) 749 if (address)
770 smp_scan_config(address, 0x400, reserve); 750 smp_scan_config(address, 0x400);
771} 751}
772 752
773#ifdef CONFIG_X86_IO_APIC 753#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 82e88cdda9bc..946a311a25c9 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -106,6 +106,7 @@
106#include <asm/percpu.h> 106#include <asm/percpu.h>
107#include <asm/topology.h> 107#include <asm/topology.h>
108#include <asm/apicdef.h> 108#include <asm/apicdef.h>
109#include <asm/k8.h>
109#ifdef CONFIG_X86_64 110#ifdef CONFIG_X86_64
110#include <asm/numa_64.h> 111#include <asm/numa_64.h>
111#endif 112#endif
@@ -487,42 +488,11 @@ static void __init reserve_early_setup_data(void)
487 488
488#ifdef CONFIG_KEXEC 489#ifdef CONFIG_KEXEC
489 490
490/**
491 * Reserve @size bytes of crashkernel memory at any suitable offset.
492 *
493 * @size: Size of the crashkernel memory to reserve.
494 * Returns the base address on success, and -1ULL on failure.
495 */
496static
497unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
498{
499 const unsigned long long alignment = 16<<20; /* 16M */
500 unsigned long long start = 0LL;
501
502 while (1) {
503 int ret;
504
505 start = find_e820_area(start, ULONG_MAX, size, alignment);
506 if (start == -1ULL)
507 return start;
508
509 /* try to reserve it */
510 ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
511 if (ret >= 0)
512 return start;
513
514 start += alignment;
515 }
516}
517
518static inline unsigned long long get_total_mem(void) 491static inline unsigned long long get_total_mem(void)
519{ 492{
520 unsigned long long total; 493 unsigned long long total;
521 494
522 total = max_low_pfn - min_low_pfn; 495 total = max_pfn - min_low_pfn;
523#ifdef CONFIG_HIGHMEM
524 total += highend_pfn - highstart_pfn;
525#endif
526 496
527 return total << PAGE_SHIFT; 497 return total << PAGE_SHIFT;
528} 498}
@@ -542,21 +512,25 @@ static void __init reserve_crashkernel(void)
542 512
543 /* 0 means: find the address automatically */ 513 /* 0 means: find the address automatically */
544 if (crash_base <= 0) { 514 if (crash_base <= 0) {
545 crash_base = find_and_reserve_crashkernel(crash_size); 515 const unsigned long long alignment = 16<<20; /* 16M */
516
517 crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
518 alignment);
546 if (crash_base == -1ULL) { 519 if (crash_base == -1ULL) {
547 pr_info("crashkernel reservation failed. " 520 pr_info("crashkernel reservation failed - No suitable area found.\n");
548 "No suitable area found.\n");
549 return; 521 return;
550 } 522 }
551 } else { 523 } else {
552 ret = reserve_bootmem_generic(crash_base, crash_size, 524 unsigned long long start;
553 BOOTMEM_EXCLUSIVE); 525
554 if (ret < 0) { 526 start = find_e820_area(crash_base, ULONG_MAX, crash_size,
555 pr_info("crashkernel reservation failed - " 527 1<<20);
556 "memory is in use\n"); 528 if (start != crash_base) {
529 pr_info("crashkernel reservation failed - memory is in use.\n");
557 return; 530 return;
558 } 531 }
559 } 532 }
533 reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
560 534
561 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " 535 printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
562 "for crashkernel (System RAM: %ldMB)\n", 536 "for crashkernel (System RAM: %ldMB)\n",
@@ -699,6 +673,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
699 673
700void __init setup_arch(char **cmdline_p) 674void __init setup_arch(char **cmdline_p)
701{ 675{
676 int acpi = 0;
677 int k8 = 0;
678
702#ifdef CONFIG_X86_32 679#ifdef CONFIG_X86_32
703 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 680 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
704 visws_early_detect(); 681 visws_early_detect();
@@ -791,21 +768,18 @@ void __init setup_arch(char **cmdline_p)
791 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); 768 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
792 *cmdline_p = command_line; 769 *cmdline_p = command_line;
793 770
794#ifdef CONFIG_X86_64
795 /* 771 /*
796 * Must call this twice: Once just to detect whether hardware doesn't 772 * x86_configure_nx() is called before parse_early_param() to detect
797 * support NX (so that the early EHCI debug console setup can safely 773 * whether hardware doesn't support NX (so that the early EHCI debug
798 * call set_fixmap(), and then again after parsing early parameters to 774 * console setup can safely call set_fixmap()). It may then be called
799 * honor the respective command line option. 775 * again from within noexec_setup() during parsing early parameters
776 * to honor the respective command line option.
800 */ 777 */
801 check_efer(); 778 x86_configure_nx();
802#endif
803 779
804 parse_early_param(); 780 parse_early_param();
805 781
806#ifdef CONFIG_X86_64 782 x86_report_nx();
807 check_efer();
808#endif
809 783
810 /* Must be before kernel pagetables are setup */ 784 /* Must be before kernel pagetables are setup */
811 vmi_activate(); 785 vmi_activate();
@@ -901,6 +875,13 @@ void __init setup_arch(char **cmdline_p)
901 875
902 reserve_brk(); 876 reserve_brk();
903 877
878#ifdef CONFIG_ACPI_SLEEP
879 /*
880 * Reserve low memory region for sleep support.
881 * even before init_memory_mapping
882 */
883 acpi_reserve_wakeup_memory();
884#endif
904 init_gbpages(); 885 init_gbpages();
905 886
906 /* max_pfn_mapped is updated here */ 887 /* max_pfn_mapped is updated here */
@@ -927,6 +908,8 @@ void __init setup_arch(char **cmdline_p)
927 908
928 reserve_initrd(); 909 reserve_initrd();
929 910
911 reserve_crashkernel();
912
930 vsmp_init(); 913 vsmp_init();
931 914
932 io_delay_init(); 915 io_delay_init();
@@ -938,27 +921,24 @@ void __init setup_arch(char **cmdline_p)
938 921
939 early_acpi_boot_init(); 922 early_acpi_boot_init();
940 923
924 /*
925 * Find and reserve possible boot-time SMP configuration:
926 */
927 find_smp_config();
928
941#ifdef CONFIG_ACPI_NUMA 929#ifdef CONFIG_ACPI_NUMA
942 /* 930 /*
943 * Parse SRAT to discover nodes. 931 * Parse SRAT to discover nodes.
944 */ 932 */
945 acpi_numa_init(); 933 acpi = acpi_numa_init();
946#endif 934#endif
947 935
948 initmem_init(0, max_pfn); 936#ifdef CONFIG_K8_NUMA
949 937 if (!acpi)
950#ifdef CONFIG_ACPI_SLEEP 938 k8 = !k8_numa_init(0, max_pfn);
951 /*
952 * Reserve low memory region for sleep support.
953 */
954 acpi_reserve_bootmem();
955#endif 939#endif
956 /*
957 * Find and reserve possible boot-time SMP configuration:
958 */
959 find_smp_config();
960 940
961 reserve_crashkernel(); 941 initmem_init(0, max_pfn, acpi, k8);
962 942
963#ifdef CONFIG_X86_64 943#ifdef CONFIG_X86_64
964 /* 944 /*
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 1740c85e24bb..364d015efebc 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -817,10 +817,8 @@ static int __init uv_init_blade(int blade)
817 */ 817 */
818 apicid = blade_to_first_apicid(blade); 818 apicid = blade_to_first_apicid(blade);
819 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); 819 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
820 if ((pa & 0xff) != UV_BAU_MESSAGE) { 820 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
821 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
822 ((apicid << 32) | UV_BAU_MESSAGE)); 821 ((apicid << 32) | UV_BAU_MESSAGE));
823 }
824 return 0; 822 return 0;
825} 823}
826 824
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index abda6f53e71e..34a279a7471d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -197,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
197 apic_version[m->apicid] = ver; 197 apic_version[m->apicid] = ver;
198} 198}
199 199
200static void __init visws_find_smp_config(unsigned int reserve) 200static void __init visws_find_smp_config(void)
201{ 201{
202 struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); 202 struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
203 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); 203 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3c68fe2d46cf..f3f2104408d9 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64)
41jiffies_64 = jiffies; 41jiffies_64 = jiffies;
42#endif 42#endif
43 43
44#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
45/*
46 * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
47 * we retain large page mappings for boundaries spanning kernel text, rodata
48 * and data sections.
49 *
50 * However, kernel identity mappings will have different RWX permissions
51 * to the pages mapping to text and to the pages padding (which are freed) the
52 * text section. Hence kernel identity mappings will be broken to smaller
53 * pages. For 64-bit, kernel text and kernel identity mappings are different,
54 * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
55 * as well as retain 2MB large page mappings for kernel text.
56 */
57#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
58
59#define X64_ALIGN_DEBUG_RODATA_END \
60 . = ALIGN(HPAGE_SIZE); \
61 __end_rodata_hpage_align = .;
62
63#else
64
65#define X64_ALIGN_DEBUG_RODATA_BEGIN
66#define X64_ALIGN_DEBUG_RODATA_END
67
68#endif
69
44PHDRS { 70PHDRS {
45 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
46 data PT_LOAD FLAGS(7); /* RWE */ 72 data PT_LOAD FLAGS(7); /* RWE */
@@ -90,7 +116,9 @@ SECTIONS
90 116
91 EXCEPTION_TABLE(16) :text = 0x9090 117 EXCEPTION_TABLE(16) :text = 0x9090
92 118
119 X64_ALIGN_DEBUG_RODATA_BEGIN
93 RO_DATA(PAGE_SIZE) 120 RO_DATA(PAGE_SIZE)
121 X64_ALIGN_DEBUG_RODATA_END
94 122
95 /* Data */ 123 /* Data */
96 .data : AT(ADDR(.data) - LOAD_OFFSET) { 124 .data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -107,13 +135,13 @@ SECTIONS
107 135
108 PAGE_ALIGNED_DATA(PAGE_SIZE) 136 PAGE_ALIGNED_DATA(PAGE_SIZE)
109 137
110 CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) 138 CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
111 139
112 DATA_DATA 140 DATA_DATA
113 CONSTRUCTORS 141 CONSTRUCTORS
114 142
115 /* rarely changed data like cpu maps */ 143 /* rarely changed data like cpu maps */
116 READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) 144 READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)
117 145
118 /* End of data section */ 146 /* End of data section */
119 _edata = .; 147 _edata = .;
@@ -137,12 +165,12 @@ SECTIONS
137 *(.vsyscall_0) 165 *(.vsyscall_0)
138 } :user 166 } :user
139 167
140 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 168 . = ALIGN(L1_CACHE_BYTES);
141 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { 169 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
142 *(.vsyscall_fn) 170 *(.vsyscall_fn)
143 } 171 }
144 172
145 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 173 . = ALIGN(L1_CACHE_BYTES);
146 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { 174 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
147 *(.vsyscall_gtod_data) 175 *(.vsyscall_gtod_data)
148 } 176 }
@@ -166,7 +194,7 @@ SECTIONS
166 } 194 }
167 vgetcpu_mode = VVIRT(.vgetcpu_mode); 195 vgetcpu_mode = VVIRT(.vgetcpu_mode);
168 196
169 . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); 197 . = ALIGN(L1_CACHE_BYTES);
170 .jiffies : AT(VLOAD(.jiffies)) { 198 .jiffies : AT(VLOAD(.jiffies)) {
171 *(.jiffies) 199 *(.jiffies)
172 } 200 }
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d11c5ff7c65e..ccd179dec36e 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -13,6 +13,7 @@
13#include <asm/e820.h> 13#include <asm/e820.h>
14#include <asm/time.h> 14#include <asm/time.h>
15#include <asm/irq.h> 15#include <asm/irq.h>
16#include <asm/pat.h>
16#include <asm/tsc.h> 17#include <asm/tsc.h>
17#include <asm/iommu.h> 18#include <asm/iommu.h>
18 19
@@ -80,4 +81,5 @@ struct x86_platform_ops x86_platform = {
80 .get_wallclock = mach_get_cmos_time, 81 .get_wallclock = mach_get_cmos_time,
81 .set_wallclock = mach_set_rtc_mmss, 82 .set_wallclock = mach_set_rtc_mmss,
82 .iommu_shutdown = iommu_shutdown_noop, 83 .iommu_shutdown = iommu_shutdown_noop,
84 .is_untracked_pat_range = is_ISA_range,
83}; 85};