aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig37
-rw-r--r--arch/x86/boot/boot.h10
-rw-r--r--arch/x86/boot/cpu.c3
-rw-r--r--arch/x86/boot/cpucheck.c10
-rw-r--r--arch/x86/boot/main.c5
-rw-r--r--arch/x86/boot/memory.c1
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/amd_iommu.c21
-rw-r--r--arch/x86/kernel/amd_iommu_init.c24
-rw-r--r--arch/x86/kernel/apic_32.c22
-rw-r--r--arch/x86/kernel/apic_64.c7
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c17
-rw-r--r--arch/x86/kernel/cpu/bugs.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/elanfreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/cyrix.c18
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c18
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c20
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c5
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c8
-rw-r--r--arch/x86/kernel/efi_32.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c10
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/hpet.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c6
-rw-r--r--arch/x86/kernel/io_apic_64.c25
-rw-r--r--arch/x86/kernel/machine_kexec_32.c20
-rw-r--r--arch/x86/kernel/mfgpt_32.c52
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c2
-rw-r--r--arch/x86/kernel/mpparse.c17
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c28
-rw-r--r--arch/x86/kernel/numaq_32.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c16
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S10
-rw-r--r--arch/x86/kernel/setup.c37
-rw-r--r--arch/x86/kernel/signal_64.c11
-rw-r--r--arch/x86/kernel/smpboot.c68
-rw-r--r--arch/x86/kernel/smpcommon.c17
-rw-r--r--arch/x86/kernel/tlb_uv.c3
-rw-r--r--arch/x86/kernel/traps_64.c9
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/kernel/tsc_sync.c6
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vmi_32.c3
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kvm/mmu.c100
-rw-r--r--arch/x86/kvm/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/x86.c24
-rw-r--r--arch/x86/mach-rdc321x/platform.c1
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/init_64.c48
-rw-r--r--arch/x86/mm/ioremap.c10
-rw-r--r--arch/x86/mm/mmio-mod.c4
-rw-r--r--arch/x86/mm/pageattr-test.c3
-rw-r--r--arch/x86/mm/pageattr.c27
-rw-r--r--arch/x86/mm/pat.c50
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/mm/srat_32.c12
-rw-r--r--arch/x86/oprofile/nmi_int.c39
-rw-r--r--arch/x86/pci/amd_bus.c52
-rw-r--r--arch/x86/pci/i386.c78
-rw-r--r--arch/x86/pci/irq.c2
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c67
-rw-r--r--arch/x86/power/cpu_32.c6
-rw-r--r--arch/x86/power/hibernate_asm_32.S26
74 files changed, 887 insertions, 365 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3d0f2b6a5a16..ed92864d1325 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,7 +22,6 @@ config X86
22 select HAVE_IDE 22 select HAVE_IDE
23 select HAVE_OPROFILE 23 select HAVE_OPROFILE
24 select HAVE_IOREMAP_PROT 24 select HAVE_IOREMAP_PROT
25 select HAVE_GET_USER_PAGES_FAST
26 select HAVE_KPROBES 25 select HAVE_KPROBES
27 select ARCH_WANT_OPTIONAL_GPIOLIB 26 select ARCH_WANT_OPTIONAL_GPIOLIB
28 select HAVE_KRETPROBES 27 select HAVE_KRETPROBES
@@ -578,35 +577,29 @@ config SWIOTLB
578 577
579config IOMMU_HELPER 578config IOMMU_HELPER
580 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 579 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
580
581config MAXSMP 581config MAXSMP
582 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 582 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
583 depends on X86_64 && SMP 583 depends on X86_64 && SMP && BROKEN
584 default n 584 default n
585 help 585 help
586 Configure maximum number of CPUS and NUMA Nodes for this architecture. 586 Configure maximum number of CPUS and NUMA Nodes for this architecture.
587 If unsure, say N. 587 If unsure, say N.
588 588
589if MAXSMP
590config NR_CPUS
591 int
592 default "4096"
593endif
594
595if !MAXSMP
596config NR_CPUS 589config NR_CPUS
597 int "Maximum number of CPUs (2-4096)" 590 int "Maximum number of CPUs (2-512)" if !MAXSMP
598 range 2 4096 591 range 2 512
599 depends on SMP 592 depends on SMP
593 default "4096" if MAXSMP
600 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 594 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
601 default "8" 595 default "8"
602 help 596 help
603 This allows you to specify the maximum number of CPUs which this 597 This allows you to specify the maximum number of CPUs which this
604 kernel will support. The maximum supported value is 4096 and the 598 kernel will support. The maximum supported value is 512 and the
605 minimum value which makes sense is 2. 599 minimum value which makes sense is 2.
606 600
607 This is purely to save memory - each supported CPU adds 601 This is purely to save memory - each supported CPU adds
608 approximately eight kilobytes to the kernel image. 602 approximately eight kilobytes to the kernel image.
609endif
610 603
611config SCHED_SMT 604config SCHED_SMT
612 bool "SMT (Hyperthreading) scheduler support" 605 bool "SMT (Hyperthreading) scheduler support"
@@ -952,9 +945,9 @@ config NUMA
952 local memory controller of the CPU and add some more 945 local memory controller of the CPU and add some more
953 NUMA awareness to the kernel. 946 NUMA awareness to the kernel.
954 947
955 For i386 this is currently highly experimental and should be only 948 For 32-bit this is currently highly experimental and should be only
956 used for kernel development. It might also cause boot failures. 949 used for kernel development. It might also cause boot failures.
957 For x86_64 this is recommended on all multiprocessor Opteron systems. 950 For 64-bit this is recommended on all multiprocessor Opteron systems.
958 If the system is EM64T, you should say N unless your system is 951 If the system is EM64T, you should say N unless your system is
959 EM64T NUMA. 952 EM64T NUMA.
960 953
@@ -997,17 +990,10 @@ config NUMA_EMU
997 into virtual nodes when booted with "numa=fake=N", where N is the 990 into virtual nodes when booted with "numa=fake=N", where N is the
998 number of nodes. This is only useful for debugging. 991 number of nodes. This is only useful for debugging.
999 992
1000if MAXSMP
1001
1002config NODES_SHIFT 993config NODES_SHIFT
1003 int 994 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1004 default "9"
1005endif
1006
1007if !MAXSMP
1008config NODES_SHIFT
1009 int "Maximum NUMA Nodes (as a power of 2)"
1010 range 1 9 if X86_64 995 range 1 9 if X86_64
996 default "9" if MAXSMP
1011 default "6" if X86_64 997 default "6" if X86_64
1012 default "4" if X86_NUMAQ 998 default "4" if X86_NUMAQ
1013 default "3" 999 default "3"
@@ -1015,7 +1001,6 @@ config NODES_SHIFT
1015 help 1001 help
1016 Specify the maximum number of NUMA Nodes available on the target 1002 Specify the maximum number of NUMA Nodes available on the target
1017 system. Increases memory reserved to accomodate various tables. 1003 system. Increases memory reserved to accomodate various tables.
1018endif
1019 1004
1020config HAVE_ARCH_BOOTMEM_NODE 1005config HAVE_ARCH_BOOTMEM_NODE
1021 def_bool y 1006 def_bool y
@@ -1264,7 +1249,7 @@ config KEXEC
1264 strongly in flux, so no good recommendation can be made. 1249 strongly in flux, so no good recommendation can be made.
1265 1250
1266config CRASH_DUMP 1251config CRASH_DUMP
1267 bool "kernel crash dumps (EXPERIMENTAL)" 1252 bool "kernel crash dumps"
1268 depends on X86_64 || (X86_32 && HIGHMEM) 1253 depends on X86_64 || (X86_32 && HIGHMEM)
1269 help 1254 help
1270 Generate crash dump after being started by kexec. 1255 Generate crash dump after being started by kexec.
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index a34b9982c7cb..cc0ef13fba7a 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -24,10 +24,14 @@
24#include <linux/edd.h> 24#include <linux/edd.h>
25#include <asm/boot.h> 25#include <asm/boot.h>
26#include <asm/setup.h> 26#include <asm/setup.h>
27#include "bitops.h"
28#include <asm/cpufeature.h>
27 29
28/* Useful macros */ 30/* Useful macros */
29#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) 31#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
30 32
33#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
34
31extern struct setup_header hdr; 35extern struct setup_header hdr;
32extern struct boot_params boot_params; 36extern struct boot_params boot_params;
33 37
@@ -242,6 +246,12 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize);
242int cmdline_find_option_bool(const char *option); 246int cmdline_find_option_bool(const char *option);
243 247
244/* cpu.c, cpucheck.c */ 248/* cpu.c, cpucheck.c */
249struct cpu_features {
250 int level; /* Family, or 64 for x86-64 */
251 int model;
252 u32 flags[NCAPINTS];
253};
254extern struct cpu_features cpu;
245int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); 255int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
246int validate_cpu(void); 256int validate_cpu(void);
247 257
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 92d6fd73dc7d..75298fe2edca 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -16,9 +16,6 @@
16 */ 16 */
17 17
18#include "boot.h" 18#include "boot.h"
19#include "bitops.h"
20#include <asm/cpufeature.h>
21
22#include "cpustr.h" 19#include "cpustr.h"
23 20
24static char *cpu_name(int level) 21static char *cpu_name(int level)
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 7804389ee005..4b9ae7c56748 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -22,21 +22,13 @@
22 22
23#ifdef _SETUP 23#ifdef _SETUP
24# include "boot.h" 24# include "boot.h"
25# include "bitops.h"
26#endif 25#endif
27#include <linux/types.h> 26#include <linux/types.h>
28#include <asm/cpufeature.h>
29#include <asm/processor-flags.h> 27#include <asm/processor-flags.h>
30#include <asm/required-features.h> 28#include <asm/required-features.h>
31#include <asm/msr-index.h> 29#include <asm/msr-index.h>
32 30
33struct cpu_features { 31struct cpu_features cpu;
34 int level; /* Family, or 64 for x86-64 */
35 int model;
36 u32 flags[NCAPINTS];
37};
38
39static struct cpu_features cpu;
40static u32 cpu_vendor[3]; 32static u32 cpu_vendor[3];
41static u32 err_flags[NCAPINTS]; 33static u32 err_flags[NCAPINTS];
42 34
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 2296164b54d2..197421db1af1 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -73,6 +73,11 @@ static void keyboard_set_repeat(void)
73 */ 73 */
74static void query_ist(void) 74static void query_ist(void)
75{ 75{
76 /* Some older BIOSes apparently crash on this call, so filter
77 it from machines too old to have SpeedStep at all. */
78 if (cpu.level < 6)
79 return;
80
76 asm("int $0x15" 81 asm("int $0x15"
77 : "=a" (boot_params.ist_info.signature), 82 : "=a" (boot_params.ist_info.signature),
78 "=b" (boot_params.ist_info.command), 83 "=b" (boot_params.ist_info.command),
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index 53165c97336b..8c3c25f35578 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -13,7 +13,6 @@
13 */ 13 */
14 14
15#include "boot.h" 15#include "boot.h"
16#include <linux/kernel.h>
17 16
18#define SMAP 0x534d4150 /* ASCII "SMAP" */ 17#define SMAP 0x534d4150 /* ASCII "SMAP" */
19 18
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d71290..bfd10fd211cd 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
97#warning ACPI uses CMPXCHG, i486 and later hardware 97#warning ACPI uses CMPXCHG, i486 and later hardware
98#endif 98#endif
99 99
100static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
101
100/* -------------------------------------------------------------------------- 102/* --------------------------------------------------------------------------
101 Boot-time Configuration 103 Boot-time Configuration
102 -------------------------------------------------------------------------- */ 104 -------------------------------------------------------------------------- */
@@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
158struct acpi_mcfg_allocation *pci_mmcfg_config; 160struct acpi_mcfg_allocation *pci_mmcfg_config;
159int pci_mmcfg_config_num; 161int pci_mmcfg_config_num;
160 162
163static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
164{
165 if (!strcmp(mcfg->header.oem_id, "SGI"))
166 acpi_mcfg_64bit_base_addr = TRUE;
167
168 return 0;
169}
170
161int __init acpi_parse_mcfg(struct acpi_table_header *header) 171int __init acpi_parse_mcfg(struct acpi_table_header *header)
162{ 172{
163 struct acpi_table_mcfg *mcfg; 173 struct acpi_table_mcfg *mcfg;
@@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header)
190 } 200 }
191 201
192 memcpy(pci_mmcfg_config, &mcfg[1], config_size); 202 memcpy(pci_mmcfg_config, &mcfg[1], config_size);
203
204 acpi_mcfg_oem_check(mcfg);
205
193 for (i = 0; i < pci_mmcfg_config_num; ++i) { 206 for (i = 0; i < pci_mmcfg_config_num; ++i) {
194 if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { 207 if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) &&
208 !acpi_mcfg_64bit_base_addr) {
195 printk(KERN_ERR PREFIX 209 printk(KERN_ERR PREFIX
196 "MMCONFIG not in low 4GB of memory\n"); 210 "MMCONFIG not in low 4GB of memory\n");
197 kfree(pci_mmcfg_config); 211 kfree(pci_mmcfg_config);
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index fa2161d5003b..426e5d91b63a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags;
20/* address in low memory of the wakeup routine. */ 20/* address in low memory of the wakeup routine. */
21static unsigned long acpi_realmode; 21static unsigned long acpi_realmode;
22 22
23#ifdef CONFIG_64BIT 23#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
24static char temp_stack[10240]; 24static char temp_stack[10240];
25#endif 25#endif
26 26
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void)
86#endif /* !CONFIG_64BIT */ 86#endif /* !CONFIG_64BIT */
87 87
88 header->pmode_cr0 = read_cr0(); 88 header->pmode_cr0 = read_cr0();
89 header->pmode_cr4 = read_cr4(); 89 header->pmode_cr4 = read_cr4_safe();
90 header->realmode_flags = acpi_realmode_flags; 90 header->realmode_flags = acpi_realmode_flags;
91 header->real_magic = 0x12345678; 91 header->real_magic = 0x12345678;
92 92
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 22d7d050905d..69b4d060b21c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -65,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
65 u8 *target; 65 u8 *target;
66 66
67 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); 67 tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
68 target = (iommu->cmd_buf + tail); 68 target = iommu->cmd_buf + tail;
69 memcpy_toio(target, cmd, sizeof(*cmd)); 69 memcpy_toio(target, cmd, sizeof(*cmd));
70 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; 70 tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
71 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); 71 head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
@@ -101,16 +101,13 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
101 */ 101 */
102static int iommu_completion_wait(struct amd_iommu *iommu) 102static int iommu_completion_wait(struct amd_iommu *iommu)
103{ 103{
104 int ret; 104 int ret, ready = 0;
105 unsigned status = 0;
105 struct iommu_cmd cmd; 106 struct iommu_cmd cmd;
106 volatile u64 ready = 0;
107 unsigned long ready_phys = virt_to_phys(&ready);
108 unsigned long i = 0; 107 unsigned long i = 0;
109 108
110 memset(&cmd, 0, sizeof(cmd)); 109 memset(&cmd, 0, sizeof(cmd));
111 cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; 110 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
112 cmd.data[1] = upper_32_bits(ready_phys);
113 cmd.data[2] = 1; /* value written to 'ready' */
114 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 111 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
115 112
116 iommu->need_sync = 0; 113 iommu->need_sync = 0;
@@ -122,9 +119,15 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
122 119
123 while (!ready && (i < EXIT_LOOP_COUNT)) { 120 while (!ready && (i < EXIT_LOOP_COUNT)) {
124 ++i; 121 ++i;
125 cpu_relax(); 122 /* wait for the bit to become one */
123 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
124 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
126 } 125 }
127 126
127 /* set bit back to zero */
128 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
129 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
130
128 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 131 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
129 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); 132 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
130 133
@@ -161,7 +164,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
161 address &= PAGE_MASK; 164 address &= PAGE_MASK;
162 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 165 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
163 cmd.data[1] |= domid; 166 cmd.data[1] |= domid;
164 cmd.data[2] = LOW_U32(address); 167 cmd.data[2] = lower_32_bits(address);
165 cmd.data[3] = upper_32_bits(address); 168 cmd.data[3] = upper_32_bits(address);
166 if (s) /* size bit - we flush more than one 4kb page */ 169 if (s) /* size bit - we flush more than one 4kb page */
167 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; 170 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index d9a9da597e79..a69cc0f52042 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -801,6 +801,21 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
801} 801}
802 802
803/* 803/*
804 * Init the device table to not allow DMA access for devices and
805 * suppress all page faults
806 */
807static void init_device_table(void)
808{
809 u16 devid;
810
811 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
812 set_dev_entry_bit(devid, DEV_ENTRY_VALID);
813 set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
814 set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
815 }
816}
817
818/*
804 * This function finally enables all IOMMUs found in the system after 819 * This function finally enables all IOMMUs found in the system after
805 * they have been initialized 820 * they have been initialized
806 */ 821 */
@@ -931,6 +946,9 @@ int __init amd_iommu_init(void)
931 if (amd_iommu_pd_alloc_bitmap == NULL) 946 if (amd_iommu_pd_alloc_bitmap == NULL)
932 goto free; 947 goto free;
933 948
949 /* init the device table */
950 init_device_table();
951
934 /* 952 /*
935 * let all alias entries point to itself 953 * let all alias entries point to itself
936 */ 954 */
@@ -954,15 +972,15 @@ int __init amd_iommu_init(void)
954 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 972 if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
955 goto free; 973 goto free;
956 974
957 ret = amd_iommu_init_dma_ops(); 975 ret = sysdev_class_register(&amd_iommu_sysdev_class);
958 if (ret) 976 if (ret)
959 goto free; 977 goto free;
960 978
961 ret = sysdev_class_register(&amd_iommu_sysdev_class); 979 ret = sysdev_register(&device_amd_iommu);
962 if (ret) 980 if (ret)
963 goto free; 981 goto free;
964 982
965 ret = sysdev_register(&device_amd_iommu); 983 ret = amd_iommu_init_dma_ops();
966 if (ret) 984 if (ret)
967 goto free; 985 goto free;
968 986
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c898358371..f88bd0d982b0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1454,8 +1454,6 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1454 } 1454 }
1455} 1455}
1456 1456
1457unsigned int __cpuinitdata maxcpus = NR_CPUS;
1458
1459void __cpuinit generic_processor_info(int apicid, int version) 1457void __cpuinit generic_processor_info(int apicid, int version)
1460{ 1458{
1461 int cpu; 1459 int cpu;
@@ -1482,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1482 return; 1480 return;
1483 } 1481 }
1484 1482
1485 if (num_processors >= maxcpus) {
1486 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
1487 " Processor ignored.\n", maxcpus);
1488 return;
1489 }
1490
1491 num_processors++; 1483 num_processors++;
1492 cpus_complement(tmp_map, cpu_present_map); 1484 cpus_complement(tmp_map, cpu_present_map);
1493 cpu = first_cpu(tmp_map); 1485 cpu = first_cpu(tmp_map);
@@ -1720,15 +1712,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1720} 1712}
1721early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1713early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1722 1714
1723static int __init apic_set_verbosity(char *str) 1715static int __init apic_set_verbosity(char *arg)
1724{ 1716{
1725 if (strcmp("debug", str) == 0) 1717 if (!arg)
1718 return -EINVAL;
1719
1720 if (strcmp(arg, "debug") == 0)
1726 apic_verbosity = APIC_DEBUG; 1721 apic_verbosity = APIC_DEBUG;
1727 else if (strcmp("verbose", str) == 0) 1722 else if (strcmp(arg, "verbose") == 0)
1728 apic_verbosity = APIC_VERBOSE; 1723 apic_verbosity = APIC_VERBOSE;
1729 return 1; 1724
1725 return 0;
1730} 1726}
1731__setup("apic=", apic_set_verbosity); 1727early_param("apic", apic_set_verbosity);
1732 1728
1733static int __init lapic_insert_resource(void) 1729static int __init lapic_insert_resource(void)
1734{ 1730{
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 7f1f030da7ee..446c062e831c 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -90,7 +90,6 @@ static unsigned long apic_phys;
90 90
91unsigned long mp_lapic_addr; 91unsigned long mp_lapic_addr;
92 92
93unsigned int __cpuinitdata maxcpus = NR_CPUS;
94/* 93/*
95 * Get the LAPIC version 94 * Get the LAPIC version
96 */ 95 */
@@ -1062,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1062 return; 1061 return;
1063 } 1062 }
1064 1063
1065 if (num_processors >= maxcpus) {
1066 printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
1067 " Processor ignored.\n", maxcpus);
1068 return;
1069 }
1070
1071 num_processors++; 1064 num_processors++;
1072 cpus_complement(tmp_map, cpu_present_map); 1065 cpus_complement(tmp_map, cpu_present_map);
1073 cpu = first_cpu(tmp_map); 1066 cpu = first_cpu(tmp_map);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 84a8220a6072..a6ef672adbba 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
56 56
57 switch (c->x86_vendor) { 57 switch (c->x86_vendor) {
58 case X86_VENDOR_INTEL: 58 case X86_VENDOR_INTEL:
59 if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) 59 /*
60 * There is a known erratum on Pentium III and Core Solo
61 * and Core Duo CPUs.
62 * " Page with PAT set to WC while associated MTRR is UC
63 * may consolidate to UC "
64 * Because of this erratum, it is better to stick with
65 * setting WC in MTRR rather than using PAT on these CPUs.
66 *
67 * Enable PAT WC only on P4, Core 2 or later CPUs.
68 */
69 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
60 return; 70 return;
61 break; 71
72 pat_disable("PAT WC disabled due to known CPU erratum.");
73 return;
74
62 case X86_VENDOR_AMD: 75 case X86_VENDOR_AMD:
63 case X86_VENDOR_CENTAUR: 76 case X86_VENDOR_CENTAUR:
64 case X86_VENDOR_TRANSMETA: 77 case X86_VENDOR_TRANSMETA:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9b58a806e85..c8e315f1aa83 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0;
50 */ 50 */
51static void __init check_fpu(void) 51static void __init check_fpu(void)
52{ 52{
53 s32 fdiv_bug;
54
53 if (!boot_cpu_data.hard_math) { 55 if (!boot_cpu_data.hard_math) {
54#ifndef CONFIG_MATH_EMULATION 56#ifndef CONFIG_MATH_EMULATION
55 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); 57 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
@@ -74,8 +76,10 @@ static void __init check_fpu(void)
74 "fistpl %0\n\t" 76 "fistpl %0\n\t"
75 "fwait\n\t" 77 "fwait\n\t"
76 "fninit" 78 "fninit"
77 : "=m" (*&boot_cpu_data.fdiv_bug) 79 : "=m" (*&fdiv_bug)
78 : "m" (*&x), "m" (*&y)); 80 : "m" (*&x), "m" (*&y));
81
82 boot_cpu_data.fdiv_bug = fdiv_bug;
79 if (boot_cpu_data.fdiv_bug) 83 if (boot_cpu_data.fdiv_bug)
80 printk("Hmm, FPU with FDIV bug.\n"); 84 printk("Hmm, FPU with FDIV bug.\n");
81} 85}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index cb7a5715596d..efae3b22a0ff 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -235,9 +235,9 @@ config X86_LONGHAUL
235 If in doubt, say N. 235 If in doubt, say N.
236 236
237config X86_E_POWERSAVER 237config X86_E_POWERSAVER
238 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" 238 tristate "VIA C7 Enhanced PowerSaver"
239 select CPU_FREQ_TABLE 239 select CPU_FREQ_TABLE
240 depends on X86_32 && EXPERIMENTAL 240 depends on X86_32
241 help 241 help
242 This adds the CPUFreq driver for VIA C7 processors. 242 This adds the CPUFreq driver for VIA C7 processors.
243 243
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 94619c22f563..e4a4bf870e94 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -44,7 +44,7 @@ struct s_elan_multiplier {
44 * It is important that the frequencies 44 * It is important that the frequencies
45 * are listed in ascending order here! 45 * are listed in ascending order here!
46 */ 46 */
47struct s_elan_multiplier elan_multiplier[] = { 47static struct s_elan_multiplier elan_multiplier[] = {
48 {1000, 0x02, 0x18}, 48 {1000, 0x02, 0x18},
49 {2000, 0x02, 0x10}, 49 {2000, 0x02, 0x10},
50 {4000, 0x02, 0x08}, 50 {4000, 0x02, 0x08},
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index c45ca6d4dce1..84bb395038d8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid)
66 return 800 + (fid * 100); 66 return 800 + (fid * 100);
67} 67}
68 68
69
70/* Return a frequency in KHz, given an input fid */ 69/* Return a frequency in KHz, given an input fid */
71static u32 find_khz_freq_from_fid(u32 fid) 70static u32 find_khz_freq_from_fid(u32 fid)
72{ 71{
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p
78 return data[pstate].frequency; 77 return data[pstate].frequency;
79} 78}
80 79
81
82/* Return the vco fid for an input fid 80/* Return the vco fid for an input fid
83 * 81 *
84 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids 82 * Each "low" fid has corresponding "high" fid, and you can get to "low" fids
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void)
166 wrmsr(MSR_FIDVID_CTL, lo, hi); 164 wrmsr(MSR_FIDVID_CTL, lo, hi);
167} 165}
168 166
169
170/* write the new fid value along with the other control fields to the msr */ 167/* write the new fid value along with the other control fields to the msr */
171static int write_new_fid(struct powernow_k8_data *data, u32 fid) 168static int write_new_fid(struct powernow_k8_data *data, u32 fid)
172{ 169{
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 3fd7a67bb06a..e710a21bb6e8 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -134,23 +134,6 @@ static void __cpuinit set_cx86_memwb(void)
134 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); 134 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
135} 135}
136 136
137static void __cpuinit set_cx86_inc(void)
138{
139 unsigned char ccr3;
140
141 printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
142
143 ccr3 = getCx86(CX86_CCR3);
144 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
145 /* PCR1 -- Performance Control */
146 /* Incrementor on, whatever that is */
147 setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
148 /* PCR0 -- Performance Control */
149 /* Incrementor Margin 10 */
150 setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
151 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
152}
153
154/* 137/*
155 * Configure later MediaGX and/or Geode processor. 138 * Configure later MediaGX and/or Geode processor.
156 */ 139 */
@@ -174,7 +157,6 @@ static void __cpuinit geode_configure(void)
174 157
175 set_cx86_memwb(); 158 set_cx86_memwb();
176 set_cx86_reorder(); 159 set_cx86_reorder();
177 set_cx86_inc();
178 160
179 local_irq_restore(flags); 161 local_irq_restore(flags);
180} 162}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 65a339678ece..726a5fcdf341 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = {
759}; 759};
760 760
761DEFINE_PER_CPU(struct sys_device, device_mce); 761DEFINE_PER_CPU(struct sys_device, device_mce);
762void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
762 763
763/* Why are there no generic functions for this? */ 764/* Why are there no generic functions for this? */
764#define ACCESSOR(name, var, start) \ 765#define ACCESSOR(name, var, start) \
@@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
883 case CPU_ONLINE: 884 case CPU_ONLINE:
884 case CPU_ONLINE_FROZEN: 885 case CPU_ONLINE_FROZEN:
885 mce_create_device(cpu); 886 mce_create_device(cpu);
887 if (threshold_cpu_callback)
888 threshold_cpu_callback(action, cpu);
886 break; 889 break;
887 case CPU_DEAD: 890 case CPU_DEAD:
888 case CPU_DEAD_FROZEN: 891 case CPU_DEAD_FROZEN:
892 if (threshold_cpu_callback)
893 threshold_cpu_callback(action, cpu);
889 mce_remove_device(cpu); 894 mce_remove_device(cpu);
890 break; 895 break;
891 } 896 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 88736cadbaa6..5eb390a4b2e9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
628 deallocate_threshold_block(cpu, bank); 628 deallocate_threshold_block(cpu, bank);
629 629
630free_out: 630free_out:
631 kobject_del(b->kobj);
631 kobject_put(b->kobj); 632 kobject_put(b->kobj);
632 kfree(b); 633 kfree(b);
633 per_cpu(threshold_banks, cpu)[bank] = NULL; 634 per_cpu(threshold_banks, cpu)[bank] = NULL;
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu)
645} 646}
646 647
647/* get notified when a cpu comes on/off */ 648/* get notified when a cpu comes on/off */
648static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, 649static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
649 unsigned long action, void *hcpu) 650 unsigned int cpu)
650{ 651{
651 /* cpu was unsigned int to begin with */
652 unsigned int cpu = (unsigned long)hcpu;
653
654 if (cpu >= NR_CPUS) 652 if (cpu >= NR_CPUS)
655 goto out; 653 return;
656 654
657 switch (action) { 655 switch (action) {
658 case CPU_ONLINE: 656 case CPU_ONLINE:
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
666 default: 664 default:
667 break; 665 break;
668 } 666 }
669 out:
670 return NOTIFY_OK;
671} 667}
672 668
673static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
674 .notifier_call = threshold_cpu_callback,
675};
676
677static __init int threshold_init_device(void) 669static __init int threshold_init_device(void)
678{ 670{
679 unsigned lcpu = 0; 671 unsigned lcpu = 0;
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void)
684 if (err) 676 if (err)
685 return err; 677 return err;
686 } 678 }
687 register_hotcpu_notifier(&threshold_cpu_notifier); 679 threshold_cpu_callback = amd_64_threshold_cpu_callback;
688 return 0; 680 return 0;
689} 681}
690 682
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 509bd3d9eacd..cb7d3b6a80eb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
379 unsigned long *size, mtrr_type *type) 379 unsigned long *size, mtrr_type *type)
380{ 380{
381 unsigned int mask_lo, mask_hi, base_lo, base_hi; 381 unsigned int mask_lo, mask_hi, base_lo, base_hi;
382 unsigned int tmp, hi;
382 383
383 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 384 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
384 if ((mask_lo & 0x800) == 0) { 385 if ((mask_lo & 0x800) == 0) {
@@ -392,8 +393,23 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
392 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); 393 rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
393 394
394 /* Work out the shifted address mask. */ 395 /* Work out the shifted address mask. */
395 mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) 396 tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
396 | mask_lo >> PAGE_SHIFT; 397 mask_lo = size_or_mask | tmp;
398 /* Expand tmp with high bits to all 1s*/
399 hi = fls(tmp);
400 if (hi > 0) {
401 tmp |= ~((1<<(hi - 1)) - 1);
402
403 if (tmp != mask_lo) {
404 static int once = 1;
405
406 if (once) {
407 printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n");
408 once = 0;
409 }
410 mask_lo = tmp;
411 }
412 }
397 413
398 /* This works correctly if size is a power of two, i.e. a 414 /* This works correctly if size is a power of two, i.e. a
399 contiguous range. */ 415 contiguous range. */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6f23969c8faf..b117d7f8a564 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1496 1496
1497 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 1497 /* kvm/qemu doesn't have mtrr set right, don't trim them all */
1498 if (!highest_pfn) { 1498 if (!highest_pfn) {
1499 if (!kvm_para_available()) { 1499 WARN(!kvm_para_available(), KERN_WARNING
1500 printk(KERN_WARNING
1501 "WARNING: strange, CPU MTRRs all blank?\n"); 1500 "WARNING: strange, CPU MTRRs all blank?\n");
1502 WARN_ON(1);
1503 }
1504 return 0; 1501 return 0;
1505 } 1502 }
1506 1503
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index de7439f82b92..05cc22dbd4ff 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz)
478 perfctr_msr = MSR_P4_IQ_PERFCTR1; 478 perfctr_msr = MSR_P4_IQ_PERFCTR1;
479 evntsel_msr = MSR_P4_CRU_ESCR0; 479 evntsel_msr = MSR_P4_CRU_ESCR0;
480 cccr_msr = MSR_P4_IQ_CCCR1; 480 cccr_msr = MSR_P4_IQ_CCCR1;
481 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); 481
482 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
483 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
484 cccr_val = P4_CCCR_OVF_PMI0;
485 else
486 cccr_val = P4_CCCR_OVF_PMI1;
487 cccr_val |= P4_CCCR_ESCR_SELECT(4);
482 } 488 }
483 489
484 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 490 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 4b63c8e1f13b..5cab48ee61a4 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void)
53 * directory. If I have PAE, I just need to duplicate one entry in 53 * directory. If I have PAE, I just need to duplicate one entry in
54 * page directory. 54 * page directory.
55 */ 55 */
56 cr4 = read_cr4(); 56 cr4 = read_cr4_safe();
57 57
58 if (cr4 & X86_CR4_PAE) { 58 if (cr4 & X86_CR4_PAE) {
59 efi_bak_pg_dir_pointer[0].pgd = 59 efi_bak_pg_dir_pointer[0].pgd =
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void)
91 gdt_descr.size = GDT_SIZE - 1; 91 gdt_descr.size = GDT_SIZE - 1;
92 load_gdt(&gdt_descr); 92 load_gdt(&gdt_descr);
93 93
94 cr4 = read_cr4(); 94 cr4 = read_cr4_safe();
95 95
96 if (cr4 & X86_CR4_PAE) { 96 if (cr4 & X86_CR4_PAE) {
97 swapper_pg_dir[pgd_index(0)].pgd = 97 swapper_pg_dir[pgd_index(0)].pgd =
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888a..bfa837cb16be 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -222,7 +222,7 @@ static __init void map_low_mmrs(void)
222 222
223enum map_type {map_wb, map_uc}; 223enum map_type {map_wb, map_uc};
224 224
225static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 225static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
226{ 226{
227 unsigned long bytes, paddr; 227 unsigned long bytes, paddr;
228 228
@@ -293,7 +293,9 @@ static __init void uv_rtc_init(void)
293 sn_rtc_cycles_per_second = ticks_per_sec; 293 sn_rtc_cycles_per_second = ticks_per_sec;
294} 294}
295 295
296static __init void uv_system_init(void) 296static bool uv_system_inited;
297
298void __init uv_system_init(void)
297{ 299{
298 union uvh_si_addr_map_config_u m_n_config; 300 union uvh_si_addr_map_config_u m_n_config;
299 union uvh_node_id_u node_id; 301 union uvh_node_id_u node_id;
@@ -383,6 +385,7 @@ static __init void uv_system_init(void)
383 map_mmr_high(max_pnode); 385 map_mmr_high(max_pnode);
384 map_config_high(max_pnode); 386 map_config_high(max_pnode);
385 map_mmioh_high(max_pnode); 387 map_mmioh_high(max_pnode);
388 uv_system_inited = true;
386} 389}
387 390
388/* 391/*
@@ -391,8 +394,7 @@ static __init void uv_system_init(void)
391 */ 394 */
392void __cpuinit uv_cpu_init(void) 395void __cpuinit uv_cpu_init(void)
393{ 396{
394 if (!uv_node_to_blade) 397 BUG_ON(!uv_system_inited);
395 uv_system_init();
396 398
397 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; 399 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
398 400
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 1b318e903bf6..9bfc4d72fb2e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -88,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
88 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 88 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
89 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 89 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
90 (__START_KERNEL & PGDIR_MASK))); 90 (__START_KERNEL & PGDIR_MASK)));
91 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
91 92
92 /* clear bss before set_intr_gate with early_idt_handler */ 93 /* clear bss before set_intr_gate with early_idt_handler */
93 clear_bss(); 94 clear_bss();
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ad2b15a1334d..59fd3b6b1303 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -359,6 +359,7 @@ static int hpet_clocksource_register(void)
359int __init hpet_enable(void) 359int __init hpet_enable(void)
360{ 360{
361 unsigned long id; 361 unsigned long id;
362 int i;
362 363
363 if (!is_hpet_capable()) 364 if (!is_hpet_capable())
364 return 0; 365 return 0;
@@ -369,6 +370,29 @@ int __init hpet_enable(void)
369 * Read the period and check for a sane value: 370 * Read the period and check for a sane value:
370 */ 371 */
371 hpet_period = hpet_readl(HPET_PERIOD); 372 hpet_period = hpet_readl(HPET_PERIOD);
373
374 /*
375 * AMD SB700 based systems with spread spectrum enabled use a
376 * SMM based HPET emulation to provide proper frequency
377 * setting. The SMM code is initialized with the first HPET
378 * register access and takes some time to complete. During
379 * this time the config register reads 0xffffffff. We check
380 * for max. 1000 loops whether the config register reads a non
381 * 0xffffffff value to make sure that HPET is up and running
382 * before we go further. A counting loop is safe, as the HPET
383 * access takes thousands of CPU cycles. On non SB700 based
384 * machines this check is only done once and has no side
385 * effects.
386 */
387 for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) {
388 if (i == 1000) {
389 printk(KERN_WARNING
390 "HPET config register value = 0xFFFFFFFF. "
391 "Disabling HPET\n");
392 goto out_nohpet;
393 }
394 }
395
372 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) 396 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
373 goto out_nohpet; 397 goto out_nohpet;
374 398
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..09cddb57bec4 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -57,7 +57,7 @@ atomic_t irq_mis_count;
57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 57static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
58 58
59static DEFINE_SPINLOCK(ioapic_lock); 59static DEFINE_SPINLOCK(ioapic_lock);
60static DEFINE_SPINLOCK(vector_lock); 60DEFINE_SPINLOCK(vector_lock);
61 61
62int timer_through_8259 __initdata; 62int timer_through_8259 __initdata;
63 63
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq)
1209 return vector; 1209 return vector;
1210} 1210}
1211 1211
1212void setup_vector_irq(int cpu)
1213{
1214}
1215
1216static struct irq_chip ioapic_chip; 1212static struct irq_chip ioapic_chip;
1217 1213
1218#define IOAPIC_AUTO -1 1214#define IOAPIC_AUTO -1
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d1707..61a83b70c18f 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -101,7 +101,7 @@ int timer_through_8259 __initdata;
101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 101static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
102 102
103static DEFINE_SPINLOCK(ioapic_lock); 103static DEFINE_SPINLOCK(ioapic_lock);
104DEFINE_SPINLOCK(vector_lock); 104static DEFINE_SPINLOCK(vector_lock);
105 105
106/* 106/*
107 * # of IRQ routing registers 107 * # of IRQ routing registers
@@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin)
697 return irq; 697 return irq;
698} 698}
699 699
700void lock_vector_lock(void)
701{
702 /* Used to the online set of cpus does not change
703 * during assign_irq_vector.
704 */
705 spin_lock(&vector_lock);
706}
707
708void unlock_vector_lock(void)
709{
710 spin_unlock(&vector_lock);
711}
712
700static int __assign_irq_vector(int irq, cpumask_t mask) 713static int __assign_irq_vector(int irq, cpumask_t mask)
701{ 714{
702 /* 715 /*
@@ -802,7 +815,7 @@ static void __clear_irq_vector(int irq)
802 cpus_clear(cfg->domain); 815 cpus_clear(cfg->domain);
803} 816}
804 817
805static void __setup_vector_irq(int cpu) 818void __setup_vector_irq(int cpu)
806{ 819{
807 /* Initialize vector_irq on a new cpu */ 820 /* Initialize vector_irq on a new cpu */
808 /* This function must be called with vector_lock held */ 821 /* This function must be called with vector_lock held */
@@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu)
825 } 838 }
826} 839}
827 840
828void setup_vector_irq(int cpu)
829{
830 spin_lock(&vector_lock);
831 __setup_vector_irq(smp_processor_id());
832 spin_unlock(&vector_lock);
833}
834
835
836static struct irq_chip ioapic_chip; 841static struct irq_chip ioapic_chip;
837 842
838static void ioapic_register_intr(int irq, unsigned long trigger) 843static void ioapic_register_intr(int irq, unsigned long trigger)
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 9fe478d98406..0732adba05ca 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -12,6 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h> 14#include <linux/ftrace.h>
15#include <linux/suspend.h>
15 16
16#include <asm/pgtable.h> 17#include <asm/pgtable.h>
17#include <asm/pgalloc.h> 18#include <asm/pgalloc.h>
@@ -78,7 +79,7 @@ static void load_segments(void)
78/* 79/*
79 * A architecture hook called to validate the 80 * A architecture hook called to validate the
80 * proposed image and prepare the control pages 81 * proposed image and prepare the control pages
81 * as needed. The pages for KEXEC_CONTROL_CODE_SIZE 82 * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
82 * have been allocated, but the segments have yet 83 * have been allocated, but the segments have yet
83 * been copied into the kernel. 84 * been copied into the kernel.
84 * 85 *
@@ -113,6 +114,7 @@ void machine_kexec(struct kimage *image)
113{ 114{
114 unsigned long page_list[PAGES_NR]; 115 unsigned long page_list[PAGES_NR];
115 void *control_page; 116 void *control_page;
117 int save_ftrace_enabled;
116 asmlinkage unsigned long 118 asmlinkage unsigned long
117 (*relocate_kernel_ptr)(unsigned long indirection_page, 119 (*relocate_kernel_ptr)(unsigned long indirection_page,
118 unsigned long control_page, 120 unsigned long control_page,
@@ -120,7 +122,12 @@ void machine_kexec(struct kimage *image)
120 unsigned int has_pae, 122 unsigned int has_pae,
121 unsigned int preserve_context); 123 unsigned int preserve_context);
122 124
123 tracer_disable(); 125#ifdef CONFIG_KEXEC_JUMP
126 if (kexec_image->preserve_context)
127 save_processor_state();
128#endif
129
130 save_ftrace_enabled = __ftrace_enabled_save();
124 131
125 /* Interrupts aren't acceptable while we reboot */ 132 /* Interrupts aren't acceptable while we reboot */
126 local_irq_disable(); 133 local_irq_disable();
@@ -138,7 +145,7 @@ void machine_kexec(struct kimage *image)
138 } 145 }
139 146
140 control_page = page_address(image->control_code_page); 147 control_page = page_address(image->control_code_page);
141 memcpy(control_page, relocate_kernel, PAGE_SIZE/2); 148 memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
142 149
143 relocate_kernel_ptr = control_page; 150 relocate_kernel_ptr = control_page;
144 page_list[PA_CONTROL_PAGE] = __pa(control_page); 151 page_list[PA_CONTROL_PAGE] = __pa(control_page);
@@ -178,6 +185,13 @@ void machine_kexec(struct kimage *image)
178 (unsigned long)page_list, 185 (unsigned long)page_list,
179 image->start, cpu_has_pae, 186 image->start, cpu_has_pae,
180 image->preserve_context); 187 image->preserve_context);
188
189#ifdef CONFIG_KEXEC_JUMP
190 if (kexec_image->preserve_context)
191 restore_processor_state();
192#endif
193
194 __ftrace_enabled_restore(save_ftrace_enabled);
181} 195}
182 196
183void arch_crash_save_vmcoreinfo(void) 197void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 07c0f828f488..3b599518c322 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -33,6 +33,8 @@
33#include <linux/module.h> 33#include <linux/module.h>
34#include <asm/geode.h> 34#include <asm/geode.h>
35 35
36#define MFGPT_DEFAULT_IRQ 7
37
36static struct mfgpt_timer_t { 38static struct mfgpt_timer_t {
37 unsigned int avail:1; 39 unsigned int avail:1;
38} mfgpt_timers[MFGPT_MAX_TIMERS]; 40} mfgpt_timers[MFGPT_MAX_TIMERS];
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
157} 159}
158EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); 160EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
159 161
160int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) 162int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable)
161{ 163{
162 u32 val, dummy; 164 u32 zsel, lpc, dummy;
163 int offset; 165 int shift;
164 166
165 if (timer < 0 || timer >= MFGPT_MAX_TIMERS) 167 if (timer < 0 || timer >= MFGPT_MAX_TIMERS)
166 return -EIO; 168 return -EIO;
167 169
168 if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) 170 /*
171 * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA
172 * is using the same CMP of the timer's Siamese twin, the IRQ is set to
173 * 2, and we mustn't use nor change it.
174 * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the
175 * IRQ of the 1st. This can only happen if forcing an IRQ, calling this
176 * with *irq==0 is safe. Currently there _are_ no 2 drivers.
177 */
178 rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
179 shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4;
180 if (((zsel >> shift) & 0xF) == 2)
169 return -EIO; 181 return -EIO;
170 182
171 rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); 183 /* Choose IRQ: if none supplied, keep IRQ already set or use default */
184 if (!*irq)
185 *irq = (zsel >> shift) & 0xF;
186 if (!*irq)
187 *irq = MFGPT_DEFAULT_IRQ;
172 188
173 offset = (timer % 4) * 4; 189 /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */
174 190 if (*irq < 1 || *irq == 2 || *irq > 15)
175 val &= ~((0xF << offset) | (0xF << (offset + 16))); 191 return -EIO;
192 rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy);
193 if (lpc & (1 << *irq))
194 return -EIO;
176 195
196 /* All chosen and checked - go for it */
197 if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable))
198 return -EIO;
177 if (enable) { 199 if (enable) {
178 val |= (irq & 0x0F) << (offset); 200 zsel = (zsel & ~(0xF << shift)) | (*irq << shift);
179 val |= (irq & 0x0F) << (offset + 16); 201 wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy);
180 } 202 }
181 203
182 wrmsr(MSR_PIC_ZSEL_LOW, val, dummy);
183 return 0; 204 return 0;
184} 205}
185 206
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
242static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; 263static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN;
243static u16 mfgpt_event_clock; 264static u16 mfgpt_event_clock;
244 265
245static int irq = 7; 266static int irq;
246static int __init mfgpt_setup(char *str) 267static int __init mfgpt_setup(char *str)
247{ 268{
248 get_option(&str, &irq); 269 get_option(&str, &irq);
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void)
346 mfgpt_event_clock = timer; 367 mfgpt_event_clock = timer;
347 368
348 /* Set up the IRQ on the MFGPT side */ 369 /* Set up the IRQ on the MFGPT side */
349 if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { 370 if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) {
350 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); 371 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq);
351 return -EIO; 372 return -EIO;
352 } 373 }
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void)
374 &mfgpt_clockevent); 395 &mfgpt_clockevent);
375 396
376 printk(KERN_INFO 397 printk(KERN_INFO
377 "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); 398 "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n",
399 timer, irq);
378 clockevents_register_device(&mfgpt_clockevent); 400 clockevents_register_device(&mfgpt_clockevent);
379 401
380 return 0; 402 return 0;
381 403
382err: 404err:
383 geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); 405 geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq);
384 printk(KERN_ERR 406 printk(KERN_ERR
385 "mfgpt-timer: Unable to set up the MFGPT clock source\n"); 407 "mfgpt-timer: Unable to set up the MFGPT clock source\n");
386 return -EIO; 408 return -EIO;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index fdfdc550b366..efc2f361fe85 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
238 {} 238 {}
239}; 239};
240 240
241void __init check_enable_amd_mmconf_dmi(void) 241void __cpuinit check_enable_amd_mmconf_dmi(void)
242{ 242{
243 dmi_check_system(mmconf_dmi_table); 243 dmi_check_system(mmconf_dmi_table);
244} 244}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..b3fb430725cb 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -49,7 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
49 return sum & 0xFF; 49 return sum & 0xFF;
50} 50}
51 51
52static void __cpuinit MP_processor_info(struct mpc_config_processor *m) 52static void __init MP_processor_info(struct mpc_config_processor *m)
53{ 53{
54 int apicid; 54 int apicid;
55 char *bootup_cpu = ""; 55 char *bootup_cpu = "";
@@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
83 if (x86_quirks->mpc_oem_bus_info) 83 if (x86_quirks->mpc_oem_bus_info)
84 x86_quirks->mpc_oem_bus_info(m, str); 84 x86_quirks->mpc_oem_bus_info(m, str);
85 else 85 else
86 printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); 86 apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
87 87
88#if MAX_MP_BUSSES < 256 88#if MAX_MP_BUSSES < 256
89 if (m->mpc_busid >= MAX_MP_BUSSES) { 89 if (m->mpc_busid >= MAX_MP_BUSSES) {
@@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
154 154
155static void print_MP_intsrc_info(struct mpc_config_intsrc *m) 155static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
156{ 156{
157 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 157 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
158 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 158 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
159 m->mpc_irqtype, m->mpc_irqflag & 3, 159 m->mpc_irqtype, m->mpc_irqflag & 3,
160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 160 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
@@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
163 163
164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 164static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
165{ 165{
166 printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," 166 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
167 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 167 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 168 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 169 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
@@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
235 235
236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) 236static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
237{ 237{
238 printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," 238 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n", 239 " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
240 m->mpc_irqtype, m->mpc_irqflag & 3, 240 m->mpc_irqtype, m->mpc_irqflag & 3,
241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, 241 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -484,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
484} 484}
485 485
486 486
487static void construct_ioapic_table(int mpc_default_type) 487static void __init construct_ioapic_table(int mpc_default_type)
488{ 488{
489 struct mpc_config_ioapic ioapic; 489 struct mpc_config_ioapic ioapic;
490 struct mpc_config_bus bus; 490 struct mpc_config_bus bus;
@@ -529,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type)
529 construct_default_ioirq_mptable(mpc_default_type); 529 construct_default_ioirq_mptable(mpc_default_type);
530} 530}
531#else 531#else
532static inline void construct_ioapic_table(int mpc_default_type) { } 532static inline void __init construct_ioapic_table(int mpc_default_type) { }
533#endif 533#endif
534 534
535static inline void __init construct_default_ISA_mptable(int mpc_default_type) 535static inline void __init construct_default_ISA_mptable(int mpc_default_type)
@@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
695 unsigned int *bp = phys_to_virt(base); 695 unsigned int *bp = phys_to_virt(base);
696 struct intel_mp_floating *mpf; 696 struct intel_mp_floating *mpf;
697 697
698 printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); 698 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
699 bp, length);
699 BUILD_BUG_ON(sizeof(*mpf) != 16); 700 BUILD_BUG_ON(sizeof(*mpf) != 16);
700 701
701 while (length > 0) { 702 while (length > 0) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9fd809552447..e43938086885 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -131,7 +131,7 @@ static int msr_open(struct inode *inode, struct file *file)
131 ret = -EIO; /* MSR not supported */ 131 ret = -EIO; /* MSR not supported */
132out: 132out:
133 unlock_kernel(); 133 unlock_kernel();
134 return 0; 134 return ret;
135} 135}
136 136
137/* 137/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ac6d51222e7d..abb78a2cc4ad 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data)
114} 114}
115#endif 115#endif
116 116
117static void report_broken_nmi(int cpu, int *prev_nmi_count)
118{
119 printk(KERN_CONT "\n");
120
121 printk(KERN_WARNING
122 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
123 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
124
125 printk(KERN_WARNING
126 "Please report this to bugzilla.kernel.org,\n");
127 printk(KERN_WARNING
128 "and attach the output of the 'dmesg' command.\n");
129
130 per_cpu(wd_enabled, cpu) = 0;
131 atomic_dec(&nmi_active);
132}
133
117int __init check_nmi_watchdog(void) 134int __init check_nmi_watchdog(void)
118{ 135{
119 unsigned int *prev_nmi_count; 136 unsigned int *prev_nmi_count;
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void)
141 for_each_online_cpu(cpu) { 158 for_each_online_cpu(cpu) {
142 if (!per_cpu(wd_enabled, cpu)) 159 if (!per_cpu(wd_enabled, cpu))
143 continue; 160 continue;
144 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 161 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
145 printk(KERN_WARNING "WARNING: CPU#%d: NMI " 162 report_broken_nmi(cpu, prev_nmi_count);
146 "appears to be stuck (%d->%d)!\n",
147 cpu,
148 prev_nmi_count[cpu],
149 get_nmi_count(cpu));
150 per_cpu(wd_enabled, cpu) = 0;
151 atomic_dec(&nmi_active);
152 }
153 } 163 }
154 endflag = 1; 164 endflag = 1;
155 if (!atomic_read(&nmi_active)) { 165 if (!atomic_read(&nmi_active)) {
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index b8c45610b20a..eecc8c18f010 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -73,7 +73,7 @@ static void __init smp_dump_qct(void)
73} 73}
74 74
75 75
76void __init numaq_tsc_disable(void) 76void __cpuinit numaq_tsc_disable(void)
77{ 77{
78 if (!found_numaq) 78 if (!found_numaq)
79 return; 79 return;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..300da17e61cb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = {
471 .spin_unlock = __ticket_spin_unlock, 471 .spin_unlock = __ticket_spin_unlock,
472#endif 472#endif
473}; 473};
474EXPORT_SYMBOL_GPL(pv_lock_ops); 474EXPORT_SYMBOL(pv_lock_ops);
475 475
476EXPORT_SYMBOL_GPL(pv_time_ops); 476EXPORT_SYMBOL_GPL(pv_time_ops);
477EXPORT_SYMBOL (pv_cpu_ops); 477EXPORT_SYMBOL (pv_cpu_ops);
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b67a4b1d4eae..dcdac6c826e9 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
343 /* were we called with bad_dma_address? */ 343 /* were we called with bad_dma_address? */
344 badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); 344 badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
345 if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { 345 if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) {
346 printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " 346 WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
347 "address 0x%Lx\n", dma_addr); 347 "address 0x%Lx\n", dma_addr);
348 WARN_ON(1);
349 return; 348 return;
350 } 349 }
351 350
@@ -1269,13 +1268,15 @@ static inline int __init determine_tce_table_size(u64 ram)
1269static int __init build_detail_arrays(void) 1268static int __init build_detail_arrays(void)
1270{ 1269{
1271 unsigned long ptr; 1270 unsigned long ptr;
1272 int i, scal_detail_size, rio_detail_size; 1271 unsigned numnodes, i;
1272 int scal_detail_size, rio_detail_size;
1273 1273
1274 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ 1274 numnodes = rio_table_hdr->num_scal_dev;
1275 if (numnodes > MAX_NUMNODES){
1275 printk(KERN_WARNING 1276 printk(KERN_WARNING
1276 "Calgary: MAX_NUMNODES too low! Defined as %d, " 1277 "Calgary: MAX_NUMNODES too low! Defined as %d, "
1277 "but system has %d nodes.\n", 1278 "but system has %d nodes.\n",
1278 MAX_NUMNODES, rio_table_hdr->num_scal_dev); 1279 MAX_NUMNODES, numnodes);
1279 return -ENODEV; 1280 return -ENODEV;
1280 } 1281 }
1281 1282
@@ -1296,8 +1297,7 @@ static int __init build_detail_arrays(void)
1296 } 1297 }
1297 1298
1298 ptr = ((unsigned long)rio_table_hdr) + 3; 1299 ptr = ((unsigned long)rio_table_hdr) + 3;
1299 for (i = 0; i < rio_table_hdr->num_scal_dev; 1300 for (i = 0; i < numnodes; i++, ptr += scal_detail_size)
1300 i++, ptr += scal_detail_size)
1301 scal_devs[i] = (struct scal_detail *)ptr; 1301 scal_devs[i] = (struct scal_detail *)ptr;
1302 1302
1303 for (i = 0; i < rio_table_hdr->num_rio_dev; 1303 for (i = 0; i < rio_table_hdr->num_rio_dev;
@@ -1350,7 +1350,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
1350 * Function for kdump case. Get the tce tables from first kernel 1350 * Function for kdump case. Get the tce tables from first kernel
1351 * by reading the contents of the base adress register of calgary iommu 1351 * by reading the contents of the base adress register of calgary iommu
1352 */ 1352 */
1353static void get_tce_space_from_tar() 1353static void __init get_tce_space_from_tar(void)
1354{ 1354{
1355 int bus; 1355 int bus;
1356 void __iomem *target; 1356 void __iomem *target;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 8dbffb846de9..87d4d6964ec2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void)
123 123
124 pci_swiotlb_init(); 124 pci_swiotlb_init();
125} 125}
126
127unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
128{
129 unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
130
131 return size >> PAGE_SHIFT;
132}
133EXPORT_SYMBOL(iommu_num_pages);
126#endif 134#endif
127 135
128/* 136/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 53bc653ed5ca..3b7a1ddcc0bc 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -95,7 +95,6 @@ static inline void play_dead(void)
95{ 95{
96 /* This must be done before dead CPU ack */ 96 /* This must be done before dead CPU ack */
97 cpu_exit_clear(); 97 cpu_exit_clear();
98 wbinvd();
99 mb(); 98 mb();
100 /* Ack it */ 99 /* Ack it */
101 __get_cpu_var(cpu_state) = CPU_DEAD; 100 __get_cpu_var(cpu_state) = CPU_DEAD;
@@ -104,8 +103,8 @@ static inline void play_dead(void)
104 * With physical CPU hotplug, we should halt the cpu 103 * With physical CPU hotplug, we should halt the cpu
105 */ 104 */
106 local_irq_disable(); 105 local_irq_disable();
107 while (1) 106 /* mask all interrupts, flush any and all caches, and halt */
108 halt(); 107 wbinvd_halt();
109} 108}
110#else 109#else
111static inline void play_dead(void) 110static inline void play_dead(void)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3fb62a7d9a16..71553b664e2a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -93,14 +93,13 @@ DECLARE_PER_CPU(int, cpu_state);
93static inline void play_dead(void) 93static inline void play_dead(void)
94{ 94{
95 idle_task_exit(); 95 idle_task_exit();
96 wbinvd();
97 mb(); 96 mb();
98 /* Ack it */ 97 /* Ack it */
99 __get_cpu_var(cpu_state) = CPU_DEAD; 98 __get_cpu_var(cpu_state) = CPU_DEAD;
100 99
101 local_irq_disable(); 100 local_irq_disable();
102 while (1) 101 /* mask all interrupts, flush any and all caches, and halt */
103 halt(); 102 wbinvd_halt();
104} 103}
105#else 104#else
106static inline void play_dead(void) 105static inline void play_dead(void)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 703310a99023..6f50664b2ba5 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -20,10 +20,11 @@
20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 20#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
21#define PAE_PGD_ATTR (_PAGE_PRESENT) 21#define PAE_PGD_ATTR (_PAGE_PRESENT)
22 22
23/* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are 23/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
24 * used to save some data for jumping back 24 * ~ control_page + PAGE_SIZE are used as data storage and stack for
25 * jumping back
25 */ 26 */
26#define DATA(offset) (PAGE_SIZE/2+(offset)) 27#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
27 28
28/* Minimal CPU state */ 29/* Minimal CPU state */
29#define ESP DATA(0x0) 30#define ESP DATA(0x0)
@@ -376,3 +377,6 @@ swap_pages:
376 popl %ebx 377 popl %ebx
377 popl %ebp 378 popl %ebp
378 ret 379 ret
380
381 .globl kexec_control_code_size
382.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf4..362d4e7f2d38 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -445,7 +445,7 @@ static void __init reserve_early_setup_data(void)
445 * @size: Size of the crashkernel memory to reserve. 445 * @size: Size of the crashkernel memory to reserve.
446 * Returns the base address on success, and -1ULL on failure. 446 * Returns the base address on success, and -1ULL on failure.
447 */ 447 */
448unsigned long long find_and_reserve_crashkernel(unsigned long long size) 448unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
449{ 449{
450 const unsigned long long alignment = 16<<20; /* 16M */ 450 const unsigned long long alignment = 16<<20; /* 16M */
451 unsigned long long start = 0LL; 451 unsigned long long start = 0LL;
@@ -670,6 +670,14 @@ void __init setup_arch(char **cmdline_p)
670 670
671 parse_early_param(); 671 parse_early_param();
672 672
673#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
674 /*
675 * Must be before kernel pagetables are setup
676 * or fixmap area is touched.
677 */
678 vmi_init();
679#endif
680
673 /* after early param, so could get panic from serial */ 681 /* after early param, so could get panic from serial */
674 reserve_early_setup_data(); 682 reserve_early_setup_data();
675 683
@@ -788,10 +796,6 @@ void __init setup_arch(char **cmdline_p)
788 796
789 initmem_init(0, max_pfn); 797 initmem_init(0, max_pfn);
790 798
791#ifdef CONFIG_X86_64
792 dma32_reserve_bootmem();
793#endif
794
795#ifdef CONFIG_ACPI_SLEEP 799#ifdef CONFIG_ACPI_SLEEP
796 /* 800 /*
797 * Reserve low memory region for sleep support. 801 * Reserve low memory region for sleep support.
@@ -806,20 +810,21 @@ void __init setup_arch(char **cmdline_p)
806#endif 810#endif
807 reserve_crashkernel(); 811 reserve_crashkernel();
808 812
813#ifdef CONFIG_X86_64
814 /*
815 * dma32_reserve_bootmem() allocates bootmem which may conflict
816 * with the crashkernel command line, so do that after
817 * reserve_crashkernel()
818 */
819 dma32_reserve_bootmem();
820#endif
821
809 reserve_ibft_region(); 822 reserve_ibft_region();
810 823
811#ifdef CONFIG_KVM_CLOCK 824#ifdef CONFIG_KVM_CLOCK
812 kvmclock_init(); 825 kvmclock_init();
813#endif 826#endif
814 827
815#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
816 /*
817 * Must be after max_low_pfn is determined, and before kernel
818 * pagetables are setup.
819 */
820 vmi_init();
821#endif
822
823 paravirt_pagetable_setup_start(swapper_pg_dir); 828 paravirt_pagetable_setup_start(swapper_pg_dir);
824 paging_init(); 829 paging_init();
825 paravirt_pagetable_setup_done(swapper_pg_dir); 830 paravirt_pagetable_setup_done(swapper_pg_dir);
@@ -856,12 +861,6 @@ void __init setup_arch(char **cmdline_p)
856 init_apic_mappings(); 861 init_apic_mappings();
857 ioapic_init_mappings(); 862 ioapic_init_mappings();
858 863
859#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
860 if (def_to_bigsmp)
861 printk(KERN_WARNING "More than 8 CPUs detected and "
862 "CONFIG_X86_PC cannot handle it.\nUse "
863 "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
864#endif
865 kvm_guest_init(); 864 kvm_guest_init();
866 865
867 e820_reserve_resources(); 866 e820_reserve_resources();
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b45ef8ddd651..ca316b5b742c 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf)
104 clts(); 104 clts();
105 task_thread_info(current)->status |= TS_USEDFPU; 105 task_thread_info(current)->status |= TS_USEDFPU;
106 } 106 }
107 return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); 107 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
108 if (unlikely(err)) {
109 /*
110 * Encountered an error while doing the restore from the
111 * user buffer, clear the fpu state.
112 */
113 clear_fpu(tsk);
114 clear_used_math();
115 }
116 return err;
108} 117}
109 118
110/* 119/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4f..7985c5b3f916 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused)
326 * for which cpus receive the IPI. Holding this 326 * for which cpus receive the IPI. Holding this
327 * lock helps us to not include this cpu in a currently in progress 327 * lock helps us to not include this cpu in a currently in progress
328 * smp_call_function(). 328 * smp_call_function().
329 *
330 * We need to hold vector_lock so there the set of online cpus
331 * does not change while we are assigning vectors to cpus. Holding
332 * this lock ensures we don't half assign or remove an irq from a cpu.
329 */ 333 */
330 ipi_call_lock_irq(); 334 ipi_call_lock_irq();
331#ifdef CONFIG_X86_IO_APIC 335 lock_vector_lock();
332 setup_vector_irq(smp_processor_id()); 336 __setup_vector_irq(smp_processor_id());
333#endif
334 cpu_set(smp_processor_id(), cpu_online_map); 337 cpu_set(smp_processor_id(), cpu_online_map);
338 unlock_vector_lock();
335 ipi_call_unlock_irq(); 339 ipi_call_unlock_irq();
336 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 340 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
337 341
@@ -752,6 +756,14 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
752} 756}
753 757
754#ifdef CONFIG_X86_64 758#ifdef CONFIG_X86_64
759
760/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
761static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
762{
763 if (!after_bootmem)
764 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
765}
766
755/* 767/*
756 * Allocate node local memory for the AP pda. 768 * Allocate node local memory for the AP pda.
757 * 769 *
@@ -780,8 +792,7 @@ int __cpuinit get_local_pda(int cpu)
780 792
781 if (oldpda) { 793 if (oldpda) {
782 memcpy(newpda, oldpda, size); 794 memcpy(newpda, oldpda, size);
783 if (!after_bootmem) 795 free_bootmem_pda(oldpda);
784 free_bootmem((unsigned long)oldpda, size);
785 } 796 }
786 797
787 newpda->in_bootmem = 0; 798 newpda->in_bootmem = 0;
@@ -1044,6 +1055,34 @@ static __init void disable_smp(void)
1044static int __init smp_sanity_check(unsigned max_cpus) 1055static int __init smp_sanity_check(unsigned max_cpus)
1045{ 1056{
1046 preempt_disable(); 1057 preempt_disable();
1058
1059#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
1060 if (def_to_bigsmp && nr_cpu_ids > 8) {
1061 unsigned int cpu;
1062 unsigned nr;
1063
1064 printk(KERN_WARNING
1065 "More than 8 CPUs detected - skipping them.\n"
1066 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
1067
1068 nr = 0;
1069 for_each_present_cpu(cpu) {
1070 if (nr >= 8)
1071 cpu_clear(cpu, cpu_present_map);
1072 nr++;
1073 }
1074
1075 nr = 0;
1076 for_each_possible_cpu(cpu) {
1077 if (nr >= 8)
1078 cpu_clear(cpu, cpu_possible_map);
1079 nr++;
1080 }
1081
1082 nr_cpu_ids = 8;
1083 }
1084#endif
1085
1047 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1086 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
1048 printk(KERN_WARNING "weird, boot CPU (#%d) not listed" 1087 printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
1049 "by the BIOS.\n", hard_smp_processor_id()); 1088 "by the BIOS.\n", hard_smp_processor_id());
@@ -1182,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1182 printk(KERN_INFO "CPU%d: ", 0); 1221 printk(KERN_INFO "CPU%d: ", 0);
1183 print_cpu_info(&cpu_data(0)); 1222 print_cpu_info(&cpu_data(0));
1184 setup_boot_clock(); 1223 setup_boot_clock();
1224
1225 if (is_uv_system())
1226 uv_system_init();
1185out: 1227out:
1186 preempt_enable(); 1228 preempt_enable();
1187} 1229}
@@ -1336,7 +1378,9 @@ int __cpu_disable(void)
1336 remove_siblinginfo(cpu); 1378 remove_siblinginfo(cpu);
1337 1379
1338 /* It's now safe to remove this processor from the online map */ 1380 /* It's now safe to remove this processor from the online map */
1381 lock_vector_lock();
1339 remove_cpu_from_maps(cpu); 1382 remove_cpu_from_maps(cpu);
1383 unlock_vector_lock();
1340 fixup_irqs(cpu_online_map); 1384 fixup_irqs(cpu_online_map);
1341 return 0; 1385 return 0;
1342} 1386}
@@ -1370,17 +1414,3 @@ void __cpu_die(unsigned int cpu)
1370 BUG(); 1414 BUG();
1371} 1415}
1372#endif 1416#endif
1373
1374/*
1375 * If the BIOS enumerates physical processors before logical,
1376 * maxcpus=N at enumeration-time can be used to disable HT.
1377 */
1378static int __init parse_maxcpus(char *arg)
1379{
1380 extern unsigned int maxcpus;
1381
1382 if (arg)
1383 maxcpus = simple_strtoul(arg, NULL, 0);
1384 return 0;
1385}
1386early_param("maxcpus", parse_maxcpus);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 99941b37eca0..397e309839dd 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -8,18 +8,21 @@
8DEFINE_PER_CPU(unsigned long, this_cpu_off); 8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off); 9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10 10
11/* Initialize the CPU's GDT. This is either the boot CPU doing itself 11/*
12 (still using the master per-cpu area), or a CPU doing it for a 12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 secondary which will soon come up. */ 13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
14__cpuinit void init_gdt(int cpu) 16__cpuinit void init_gdt(int cpu)
15{ 17{
16 struct desc_struct *gdt = get_cpu_gdt_table(cpu); 18 struct desc_struct gdt;
17 19
18 pack_descriptor(&gdt[GDT_ENTRY_PERCPU], 20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
19 __per_cpu_offset[cpu], 0xFFFFF,
20 0x2 | DESCTYPE_S, 0x8); 21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
21 23
22 gdt[GDT_ENTRY_PERCPU].s = 1; 24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
23 26
24 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; 27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
25 per_cpu(cpu_number, cpu) = cpu; 28 per_cpu(cpu_number, cpu) = cpu;
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index d0fbb7712ab0..8b8c0d6640fa 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -17,6 +17,7 @@
17#include <asm/genapic.h> 17#include <asm/genapic.h>
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/tsc.h> 19#include <asm/tsc.h>
20#include <asm/irq_vectors.h>
20 21
21#include <mach_apic.h> 22#include <mach_apic.h>
22 23
@@ -783,7 +784,7 @@ static int __init uv_bau_init(void)
783 uv_init_blade(blade, node, cur_cpu); 784 uv_init_blade(blade, node, cur_cpu);
784 cur_cpu += uv_blade_nr_possible_cpus(blade); 785 cur_cpu += uv_blade_nr_possible_cpus(blade);
785 } 786 }
786 set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); 787 alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
787 uv_enable_timeouts(); 788 uv_enable_timeouts();
788 789
789 return 0; 790 return 0;
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3f18d73f420c..513caaca7115 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void)
1131 } 1131 }
1132 1132
1133 clts(); /* Allow maths ops (or we recurse) */ 1133 clts(); /* Allow maths ops (or we recurse) */
1134 restore_fpu_checking(&me->thread.xstate->fxsave); 1134 /*
1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1136 */
1137 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
1138 stts();
1139 force_sig(SIGSEGV, me);
1140 return;
1141 }
1135 task_thread_info(me)->status |= TS_USEDFPU; 1142 task_thread_info(me)->status |= TS_USEDFPU;
1136 me->fpu_counter++; 1143 me->fpu_counter++;
1137} 1144}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7603c0553909..8e786b0d665a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup);
104/* 104/*
105 * Read TSC and the reference counters. Take care of SMI disturbance 105 * Read TSC and the reference counters. Take care of SMI disturbance
106 */ 106 */
107static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) 107static u64 tsc_read_refs(u64 *pm, u64 *hpet)
108{ 108{
109 u64 t1, t2; 109 u64 t1, t2;
110 int i; 110 int i;
@@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
314 mark_tsc_unstable("cpufreq changes"); 314 mark_tsc_unstable("cpufreq changes");
315 } 315 }
316 316
317 set_cyc2ns_scale(tsc_khz_ref, freq->cpu); 317 set_cyc2ns_scale(tsc_khz, freq->cpu);
318 318
319 return 0; 319 return 0;
320} 320}
@@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = {
325 325
326static int __init cpufreq_tsc(void) 326static int __init cpufreq_tsc(void)
327{ 327{
328 if (!cpu_has_tsc)
329 return 0;
330 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
331 return 0;
328 cpufreq_register_notifier(&time_cpufreq_notifier_block, 332 cpufreq_register_notifier(&time_cpufreq_notifier_block,
329 CPUFREQ_TRANSITION_NOTIFIER); 333 CPUFREQ_TRANSITION_NOTIFIER);
330 return 0; 334 return 0;
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 0577825cf89b..9ffb01c31c40 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void)
88 __raw_spin_unlock(&sync_lock); 88 __raw_spin_unlock(&sync_lock);
89 } 89 }
90 } 90 }
91 if (!(now-start)) { 91 WARN(!(now-start),
92 printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", 92 "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
93 now-start, end-start); 93 now-start, end-start);
94 WARN_ON(1);
95 }
96} 94}
97 95
98/* 96/*
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 41e01b145c48..594ef47f0a63 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -184,8 +184,6 @@ static int __init visws_get_smp_config(unsigned int early)
184 return 1; 184 return 1;
185} 185}
186 186
187extern unsigned int __cpuinitdata maxcpus;
188
189/* 187/*
190 * The Visual Workstation is Intel MP compliant in the hardware 188 * The Visual Workstation is Intel MP compliant in the hardware
191 * sense, but it doesn't have a BIOS(-configuration table). 189 * sense, but it doesn't have a BIOS(-configuration table).
@@ -244,8 +242,8 @@ static int __init visws_find_smp_config(unsigned int reserve)
244 ncpus = CO_CPU_MAX; 242 ncpus = CO_CPU_MAX;
245 } 243 }
246 244
247 if (ncpus > maxcpus) 245 if (ncpus > setup_max_cpus)
248 ncpus = maxcpus; 246 ncpus = setup_max_cpus;
249 247
250#ifdef CONFIG_X86_LOCAL_APIC 248#ifdef CONFIG_X86_LOCAL_APIC
251 smp_found_config = 1; 249 smp_found_config = 1;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..6ca515d6db54 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -37,6 +37,7 @@
37#include <asm/timer.h> 37#include <asm/timer.h>
38#include <asm/vmi_time.h> 38#include <asm/vmi_time.h>
39#include <asm/kmap_types.h> 39#include <asm/kmap_types.h>
40#include <asm/setup.h>
40 41
41/* Convenient for calling VMI functions indirectly in the ROM */ 42/* Convenient for calling VMI functions indirectly in the ROM */
42typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); 43typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void);
@@ -683,7 +684,7 @@ void vmi_bringup(void)
683{ 684{
684 /* We must establish the lowmem mapping for MMU ops to work */ 685 /* We must establish the lowmem mapping for MMU ops to work */
685 if (vmi_ops.set_linear_mapping) 686 if (vmi_ops.set_linear_mapping)
686 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); 687 vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0);
687} 688}
688 689
689/* 690/*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index cdb2363697d2..af5bdad84604 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -209,3 +209,11 @@ SECTIONS
209 209
210 DWARF_DEBUG 210 DWARF_DEBUG
211} 211}
212
213#ifdef CONFIG_KEXEC
214/* Link time checks */
215#include <asm/kexec.h>
216
217ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
218 "kexec control code size is too big")
219#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2fa231923cf7..0bfe2bd305eb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
653 account_shadowed(kvm, gfn); 653 account_shadowed(kvm, gfn);
654} 654}
655 655
656static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
657{
658 u64 *spte;
659 int need_tlb_flush = 0;
660
661 while ((spte = rmap_next(kvm, rmapp, NULL))) {
662 BUG_ON(!(*spte & PT_PRESENT_MASK));
663 rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
664 rmap_remove(kvm, spte);
665 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
666 need_tlb_flush = 1;
667 }
668 return need_tlb_flush;
669}
670
671static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
672 int (*handler)(struct kvm *kvm, unsigned long *rmapp))
673{
674 int i;
675 int retval = 0;
676
677 /*
678 * If mmap_sem isn't taken, we can look the memslots with only
679 * the mmu_lock by skipping over the slots with userspace_addr == 0.
680 */
681 for (i = 0; i < kvm->nmemslots; i++) {
682 struct kvm_memory_slot *memslot = &kvm->memslots[i];
683 unsigned long start = memslot->userspace_addr;
684 unsigned long end;
685
686 /* mmu_lock protects userspace_addr */
687 if (!start)
688 continue;
689
690 end = start + (memslot->npages << PAGE_SHIFT);
691 if (hva >= start && hva < end) {
692 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
693 retval |= handler(kvm, &memslot->rmap[gfn_offset]);
694 retval |= handler(kvm,
695 &memslot->lpage_info[
696 gfn_offset /
697 KVM_PAGES_PER_HPAGE].rmap_pde);
698 }
699 }
700
701 return retval;
702}
703
704int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
705{
706 return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
707}
708
709static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
710{
711 u64 *spte;
712 int young = 0;
713
714 spte = rmap_next(kvm, rmapp, NULL);
715 while (spte) {
716 int _young;
717 u64 _spte = *spte;
718 BUG_ON(!(_spte & PT_PRESENT_MASK));
719 _young = _spte & PT_ACCESSED_MASK;
720 if (_young) {
721 young = 1;
722 clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
723 }
724 spte = rmap_next(kvm, rmapp, spte);
725 }
726 return young;
727}
728
729int kvm_age_hva(struct kvm *kvm, unsigned long hva)
730{
731 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
732}
733
656#ifdef MMU_DEBUG 734#ifdef MMU_DEBUG
657static int is_empty_shadow_page(u64 *spt) 735static int is_empty_shadow_page(u64 *spt)
658{ 736{
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1203 int r; 1281 int r;
1204 int largepage = 0; 1282 int largepage = 0;
1205 pfn_t pfn; 1283 pfn_t pfn;
1284 unsigned long mmu_seq;
1206 1285
1207 down_read(&current->mm->mmap_sem); 1286 down_read(&current->mm->mmap_sem);
1208 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { 1287 if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1210 largepage = 1; 1289 largepage = 1;
1211 } 1290 }
1212 1291
1292 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1293 /* implicit mb(), we'll read before PT lock is unlocked */
1213 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1294 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1214 up_read(&current->mm->mmap_sem); 1295 up_read(&current->mm->mmap_sem);
1215 1296
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1220 } 1301 }
1221 1302
1222 spin_lock(&vcpu->kvm->mmu_lock); 1303 spin_lock(&vcpu->kvm->mmu_lock);
1304 if (mmu_notifier_retry(vcpu, mmu_seq))
1305 goto out_unlock;
1223 kvm_mmu_free_some_pages(vcpu); 1306 kvm_mmu_free_some_pages(vcpu);
1224 r = __direct_map(vcpu, v, write, largepage, gfn, pfn, 1307 r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
1225 PT32E_ROOT_LEVEL); 1308 PT32E_ROOT_LEVEL);
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
1227 1310
1228 1311
1229 return r; 1312 return r;
1313
1314out_unlock:
1315 spin_unlock(&vcpu->kvm->mmu_lock);
1316 kvm_release_pfn_clean(pfn);
1317 return 0;
1230} 1318}
1231 1319
1232 1320
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1345 int r; 1433 int r;
1346 int largepage = 0; 1434 int largepage = 0;
1347 gfn_t gfn = gpa >> PAGE_SHIFT; 1435 gfn_t gfn = gpa >> PAGE_SHIFT;
1436 unsigned long mmu_seq;
1348 1437
1349 ASSERT(vcpu); 1438 ASSERT(vcpu);
1350 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); 1439 ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1358 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1447 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1359 largepage = 1; 1448 largepage = 1;
1360 } 1449 }
1450 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1451 /* implicit mb(), we'll read before PT lock is unlocked */
1361 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1452 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1362 up_read(&current->mm->mmap_sem); 1453 up_read(&current->mm->mmap_sem);
1363 if (is_error_pfn(pfn)) { 1454 if (is_error_pfn(pfn)) {
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
1365 return 1; 1456 return 1;
1366 } 1457 }
1367 spin_lock(&vcpu->kvm->mmu_lock); 1458 spin_lock(&vcpu->kvm->mmu_lock);
1459 if (mmu_notifier_retry(vcpu, mmu_seq))
1460 goto out_unlock;
1368 kvm_mmu_free_some_pages(vcpu); 1461 kvm_mmu_free_some_pages(vcpu);
1369 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, 1462 r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
1370 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); 1463 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
1371 spin_unlock(&vcpu->kvm->mmu_lock); 1464 spin_unlock(&vcpu->kvm->mmu_lock);
1372 1465
1373 return r; 1466 return r;
1467
1468out_unlock:
1469 spin_unlock(&vcpu->kvm->mmu_lock);
1470 kvm_release_pfn_clean(pfn);
1471 return 0;
1374} 1472}
1375 1473
1376static void nonpaging_free(struct kvm_vcpu *vcpu) 1474static void nonpaging_free(struct kvm_vcpu *vcpu)
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1670 gfn &= ~(KVM_PAGES_PER_HPAGE-1); 1768 gfn &= ~(KVM_PAGES_PER_HPAGE-1);
1671 vcpu->arch.update_pte.largepage = 1; 1769 vcpu->arch.update_pte.largepage = 1;
1672 } 1770 }
1771 vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
1772 /* implicit mb(), we'll read before PT lock is unlocked */
1673 pfn = gfn_to_pfn(vcpu->kvm, gfn); 1773 pfn = gfn_to_pfn(vcpu->kvm, gfn);
1674 up_read(&current->mm->mmap_sem); 1774 up_read(&current->mm->mmap_sem);
1675 1775
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 4d918220baeb..4a814bff21f2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
263 pfn = vcpu->arch.update_pte.pfn; 263 pfn = vcpu->arch.update_pte.pfn;
264 if (is_error_pfn(pfn)) 264 if (is_error_pfn(pfn))
265 return; 265 return;
266 if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
267 return;
266 kvm_get_pfn(pfn); 268 kvm_get_pfn(pfn);
267 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, 269 mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
268 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), 270 gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
@@ -343,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
343 shadow_addr = __pa(shadow_page->spt); 345 shadow_addr = __pa(shadow_page->spt);
344 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK 346 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
345 | PT_WRITABLE_MASK | PT_USER_MASK; 347 | PT_WRITABLE_MASK | PT_USER_MASK;
346 *shadow_ent = shadow_pte; 348 set_shadow_pte(shadow_ent, shadow_pte);
347 } 349 }
348 350
349 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 351 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
380 int r; 382 int r;
381 pfn_t pfn; 383 pfn_t pfn;
382 int largepage = 0; 384 int largepage = 0;
385 unsigned long mmu_seq;
383 386
384 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 387 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
385 kvm_mmu_audit(vcpu, "pre page fault"); 388 kvm_mmu_audit(vcpu, "pre page fault");
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
413 largepage = 1; 416 largepage = 1;
414 } 417 }
415 } 418 }
419 mmu_seq = vcpu->kvm->mmu_notifier_seq;
420 /* implicit mb(), we'll read before PT lock is unlocked */
416 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); 421 pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
417 up_read(&current->mm->mmap_sem); 422 up_read(&current->mm->mmap_sem);
418 423
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
424 } 429 }
425 430
426 spin_lock(&vcpu->kvm->mmu_lock); 431 spin_lock(&vcpu->kvm->mmu_lock);
432 if (mmu_notifier_retry(vcpu, mmu_seq))
433 goto out_unlock;
427 kvm_mmu_free_some_pages(vcpu); 434 kvm_mmu_free_some_pages(vcpu);
428 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, 435 shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
429 largepage, &write_pt, pfn); 436 largepage, &write_pt, pfn);
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
439 spin_unlock(&vcpu->kvm->mmu_lock); 446 spin_unlock(&vcpu->kvm->mmu_lock);
440 447
441 return write_pt; 448 return write_pt;
449
450out_unlock:
451 spin_unlock(&vcpu->kvm->mmu_lock);
452 kvm_release_pfn_clean(pfn);
453 return 0;
442} 454}
443 455
444static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 456static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5916191420c7..0d682fc6aeb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext)
883 case KVM_CAP_PIT: 883 case KVM_CAP_PIT:
884 case KVM_CAP_NOP_IO_DELAY: 884 case KVM_CAP_NOP_IO_DELAY:
885 case KVM_CAP_MP_STATE: 885 case KVM_CAP_MP_STATE:
886 case KVM_CAP_SYNC_MMU:
886 r = 1; 887 r = 1;
887 break; 888 break;
888 case KVM_CAP_COALESCED_MMIO: 889 case KVM_CAP_COALESCED_MMIO:
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1495 goto out; 1496 goto out;
1496 1497
1497 down_write(&kvm->slots_lock); 1498 down_write(&kvm->slots_lock);
1499 spin_lock(&kvm->mmu_lock);
1498 1500
1499 p = &kvm->arch.aliases[alias->slot]; 1501 p = &kvm->arch.aliases[alias->slot];
1500 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 1502 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1506 break; 1508 break;
1507 kvm->arch.naliases = n; 1509 kvm->arch.naliases = n;
1508 1510
1511 spin_unlock(&kvm->mmu_lock);
1509 kvm_mmu_zap_all(kvm); 1512 kvm_mmu_zap_all(kvm);
1510 1513
1511 up_write(&kvm->slots_lock); 1514 up_write(&kvm->slots_lock);
@@ -3972,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
3972 */ 3975 */
3973 if (!user_alloc) { 3976 if (!user_alloc) {
3974 if (npages && !old.rmap) { 3977 if (npages && !old.rmap) {
3978 unsigned long userspace_addr;
3979
3975 down_write(&current->mm->mmap_sem); 3980 down_write(&current->mm->mmap_sem);
3976 memslot->userspace_addr = do_mmap(NULL, 0, 3981 userspace_addr = do_mmap(NULL, 0,
3977 npages * PAGE_SIZE, 3982 npages * PAGE_SIZE,
3978 PROT_READ | PROT_WRITE, 3983 PROT_READ | PROT_WRITE,
3979 MAP_SHARED | MAP_ANONYMOUS, 3984 MAP_SHARED | MAP_ANONYMOUS,
3980 0); 3985 0);
3981 up_write(&current->mm->mmap_sem); 3986 up_write(&current->mm->mmap_sem);
3982 3987
3983 if (IS_ERR((void *)memslot->userspace_addr)) 3988 if (IS_ERR((void *)userspace_addr))
3984 return PTR_ERR((void *)memslot->userspace_addr); 3989 return PTR_ERR((void *)userspace_addr);
3990
3991 /* set userspace_addr atomically for kvm_hva_to_rmapp */
3992 spin_lock(&kvm->mmu_lock);
3993 memslot->userspace_addr = userspace_addr;
3994 spin_unlock(&kvm->mmu_lock);
3985 } else { 3995 } else {
3986 if (!old.user_alloc && old.rmap) { 3996 if (!old.user_alloc && old.rmap) {
3987 int ret; 3997 int ret;
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
index a037041817c7..4f4e50c3ad3b 100644
--- a/arch/x86/mach-rdc321x/platform.c
+++ b/arch/x86/mach-rdc321x/platform.c
@@ -25,7 +25,6 @@
25#include <linux/list.h> 25#include <linux/list.h>
26#include <linux/device.h> 26#include <linux/device.h>
27#include <linux/platform_device.h> 27#include <linux/platform_device.h>
28#include <linux/version.h>
29#include <linux/leds.h> 28#include <linux/leds.h>
30 29
31#include <asm/gpio.h> 30#include <asm/gpio.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 2977ea37791f..dfb932dcf136 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,7 +1,6 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o
5obj-$(CONFIG_X86_32) += pgtable_32.o 4obj-$(CONFIG_X86_32) += pgtable_32.o
6 5
7obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 129618ca0ea2..d3746efb060d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata;
60 60
61DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 61DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
62 62
63int direct_gbpages __meminitdata 63int direct_gbpages
64#ifdef CONFIG_DIRECT_GBPAGES 64#ifdef CONFIG_DIRECT_GBPAGES
65 = 1 65 = 1
66#endif 66#endif
@@ -88,7 +88,11 @@ early_param("gbpages", parse_direct_gbpages_on);
88 88
89int after_bootmem; 89int after_bootmem;
90 90
91static __init void *spp_getpage(void) 91/*
92 * NOTE: This function is marked __ref because it calls __init function
93 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
94 */
95static __ref void *spp_getpage(void)
92{ 96{
93 void *ptr; 97 void *ptr;
94 98
@@ -237,7 +241,7 @@ static unsigned long __initdata table_start;
237static unsigned long __meminitdata table_end; 241static unsigned long __meminitdata table_end;
238static unsigned long __meminitdata table_top; 242static unsigned long __meminitdata table_top;
239 243
240static __meminit void *alloc_low_page(unsigned long *phys) 244static __ref void *alloc_low_page(unsigned long *phys)
241{ 245{
242 unsigned long pfn = table_end++; 246 unsigned long pfn = table_end++;
243 void *adr; 247 void *adr;
@@ -258,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys)
258 return adr; 262 return adr;
259} 263}
260 264
261static __meminit void unmap_low_page(void *adr) 265static __ref void unmap_low_page(void *adr)
262{ 266{
263 if (after_bootmem) 267 if (after_bootmem)
264 return; 268 return;
@@ -314,6 +318,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
314{ 318{
315 unsigned long pages = 0; 319 unsigned long pages = 0;
316 unsigned long last_map_addr = end; 320 unsigned long last_map_addr = end;
321 unsigned long start = address;
317 322
318 int i = pmd_index(address); 323 int i = pmd_index(address);
319 324
@@ -331,16 +336,24 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
331 } 336 }
332 337
333 if (pmd_val(*pmd)) { 338 if (pmd_val(*pmd)) {
334 if (!pmd_large(*pmd)) 339 if (!pmd_large(*pmd)) {
340 spin_lock(&init_mm.page_table_lock);
335 last_map_addr = phys_pte_update(pmd, address, 341 last_map_addr = phys_pte_update(pmd, address,
336 end); 342 end);
343 spin_unlock(&init_mm.page_table_lock);
344 }
345 /* Count entries we're using from level2_ident_pgt */
346 if (start == 0)
347 pages++;
337 continue; 348 continue;
338 } 349 }
339 350
340 if (page_size_mask & (1<<PG_LEVEL_2M)) { 351 if (page_size_mask & (1<<PG_LEVEL_2M)) {
341 pages++; 352 pages++;
353 spin_lock(&init_mm.page_table_lock);
342 set_pte((pte_t *)pmd, 354 set_pte((pte_t *)pmd,
343 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 355 pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
356 spin_unlock(&init_mm.page_table_lock);
344 last_map_addr = (address & PMD_MASK) + PMD_SIZE; 357 last_map_addr = (address & PMD_MASK) + PMD_SIZE;
345 continue; 358 continue;
346 } 359 }
@@ -349,7 +362,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
349 last_map_addr = phys_pte_init(pte, address, end); 362 last_map_addr = phys_pte_init(pte, address, end);
350 unmap_low_page(pte); 363 unmap_low_page(pte);
351 364
365 spin_lock(&init_mm.page_table_lock);
352 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); 366 pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
367 spin_unlock(&init_mm.page_table_lock);
353 } 368 }
354 update_page_count(PG_LEVEL_2M, pages); 369 update_page_count(PG_LEVEL_2M, pages);
355 return last_map_addr; 370 return last_map_addr;
@@ -362,9 +377,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
362 pmd_t *pmd = pmd_offset(pud, 0); 377 pmd_t *pmd = pmd_offset(pud, 0);
363 unsigned long last_map_addr; 378 unsigned long last_map_addr;
364 379
365 spin_lock(&init_mm.page_table_lock);
366 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); 380 last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
367 spin_unlock(&init_mm.page_table_lock);
368 __flush_tlb_all(); 381 __flush_tlb_all();
369 return last_map_addr; 382 return last_map_addr;
370} 383}
@@ -400,20 +413,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
400 413
401 if (page_size_mask & (1<<PG_LEVEL_1G)) { 414 if (page_size_mask & (1<<PG_LEVEL_1G)) {
402 pages++; 415 pages++;
416 spin_lock(&init_mm.page_table_lock);
403 set_pte((pte_t *)pud, 417 set_pte((pte_t *)pud,
404 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); 418 pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
419 spin_unlock(&init_mm.page_table_lock);
405 last_map_addr = (addr & PUD_MASK) + PUD_SIZE; 420 last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
406 continue; 421 continue;
407 } 422 }
408 423
409 pmd = alloc_low_page(&pmd_phys); 424 pmd = alloc_low_page(&pmd_phys);
410
411 spin_lock(&init_mm.page_table_lock);
412 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); 425 last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
413 unmap_low_page(pmd); 426 unmap_low_page(pmd);
427
428 spin_lock(&init_mm.page_table_lock);
414 pud_populate(&init_mm, pud, __va(pmd_phys)); 429 pud_populate(&init_mm, pud, __va(pmd_phys));
415 spin_unlock(&init_mm.page_table_lock); 430 spin_unlock(&init_mm.page_table_lock);
416
417 } 431 }
418 __flush_tlb_all(); 432 __flush_tlb_all();
419 update_page_count(PG_LEVEL_1G, pages); 433 update_page_count(PG_LEVEL_1G, pages);
@@ -505,16 +519,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
505 continue; 519 continue;
506 } 520 }
507 521
508 if (after_bootmem) 522 pud = alloc_low_page(&pud_phys);
509 pud = pud_offset(pgd, start & PGDIR_MASK);
510 else
511 pud = alloc_low_page(&pud_phys);
512
513 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), 523 last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
514 page_size_mask); 524 page_size_mask);
515 unmap_low_page(pud); 525 unmap_low_page(pud);
516 pgd_populate(&init_mm, pgd_offset_k(start), 526
517 __va(pud_phys)); 527 spin_lock(&init_mm.page_table_lock);
528 pgd_populate(&init_mm, pgd, __va(pud_phys));
529 spin_unlock(&init_mm.page_table_lock);
518 } 530 }
519 531
520 return last_map_addr; 532 return last_map_addr;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 016f335bbeea..d4b6e6a29ae3 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
170 phys_addr &= PAGE_MASK; 170 phys_addr &= PAGE_MASK;
171 size = PAGE_ALIGN(last_addr+1) - phys_addr; 171 size = PAGE_ALIGN(last_addr+1) - phys_addr;
172 172
173 retval = reserve_memtype(phys_addr, phys_addr + size, 173 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
174 prot_val, &new_prot_val); 174 prot_val, &new_prot_val);
175 if (retval) { 175 if (retval) {
176 pr_debug("Warning: reserve_memtype returned %d\n", retval); 176 pr_debug("Warning: reserve_memtype returned %d\n", retval);
@@ -553,13 +553,11 @@ static int __init check_early_ioremap_leak(void)
553{ 553{
554 if (!early_ioremap_nested) 554 if (!early_ioremap_nested)
555 return 0; 555 return 0;
556 556 WARN(1, KERN_WARNING
557 printk(KERN_WARNING
558 "Debug warning: early ioremap leak of %d areas detected.\n", 557 "Debug warning: early ioremap leak of %d areas detected.\n",
559 early_ioremap_nested); 558 early_ioremap_nested);
560 printk(KERN_WARNING 559 printk(KERN_WARNING
561 "please boot with early_ioremap_debug and report the dmesg.\n"); 560 "please boot with early_ioremap_debug and report the dmesg.\n");
562 WARN_ON(1);
563 561
564 return 1; 562 return 1;
565} 563}
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index e7397e108beb..635b50e85581 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -430,7 +430,9 @@ static void enter_uniprocessor(void)
430 "may miss events.\n"); 430 "may miss events.\n");
431} 431}
432 432
433static void leave_uniprocessor(void) 433/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit,
434 but this whole function is ifdefed CONFIG_HOTPLUG_CPU */
435static void __ref leave_uniprocessor(void)
434{ 436{
435 int cpu; 437 int cpu;
436 int err; 438 int err;
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 0dcd42eb94e6..d4aa503caaa2 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -221,8 +221,7 @@ static int pageattr_test(void)
221 failed += print_split(&sc); 221 failed += print_split(&sc);
222 222
223 if (failed) { 223 if (failed) {
224 printk(KERN_ERR "NOT PASSED. Please report.\n"); 224 WARN(1, KERN_ERR "NOT PASSED. Please report.\n");
225 WARN_ON(1);
226 return -EINVAL; 225 return -EINVAL;
227 } else { 226 } else {
228 if (print) 227 if (print)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 65c6e46bf059..43e2f8483e4f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -55,13 +55,19 @@ static void split_page_count(int level)
55 55
56int arch_report_meminfo(char *page) 56int arch_report_meminfo(char *page)
57{ 57{
58 int n = sprintf(page, "DirectMap4k: %8lu\n" 58 int n = sprintf(page, "DirectMap4k: %8lu kB\n",
59 "DirectMap2M: %8lu\n", 59 direct_pages_count[PG_LEVEL_4K] << 2);
60 direct_pages_count[PG_LEVEL_4K], 60#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
61 direct_pages_count[PG_LEVEL_2M]); 61 n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
62 direct_pages_count[PG_LEVEL_2M] << 11);
63#else
64 n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
65 direct_pages_count[PG_LEVEL_2M] << 12);
66#endif
62#ifdef CONFIG_X86_64 67#ifdef CONFIG_X86_64
63 n += sprintf(page + n, "DirectMap1G: %8lu\n", 68 if (direct_gbpages)
64 direct_pages_count[PG_LEVEL_1G]); 69 n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
70 direct_pages_count[PG_LEVEL_1G] << 20);
65#endif 71#endif
66 return n; 72 return n;
67} 73}
@@ -592,10 +598,9 @@ repeat:
592 if (!pte_val(old_pte)) { 598 if (!pte_val(old_pte)) {
593 if (!primary) 599 if (!primary)
594 return 0; 600 return 0;
595 printk(KERN_WARNING "CPA: called for zero pte. " 601 WARN(1, KERN_WARNING "CPA: called for zero pte. "
596 "vaddr = %lx cpa->vaddr = %lx\n", address, 602 "vaddr = %lx cpa->vaddr = %lx\n", address,
597 cpa->vaddr); 603 cpa->vaddr);
598 WARN_ON(1);
599 return -EINVAL; 604 return -EINVAL;
600 } 605 }
601 606
@@ -844,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages)
844 /* 849 /*
845 * for now UC MINUS. see comments in ioremap_nocache() 850 * for now UC MINUS. see comments in ioremap_nocache()
846 */ 851 */
847 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, 852 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
848 _PAGE_CACHE_UC_MINUS, NULL)) 853 _PAGE_CACHE_UC_MINUS, NULL))
849 return -EINVAL; 854 return -EINVAL;
850 855
@@ -863,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages)
863 if (!pat_enabled) 868 if (!pat_enabled)
864 return set_memory_uc(addr, numpages); 869 return set_memory_uc(addr, numpages);
865 870
866 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, 871 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
867 _PAGE_CACHE_WC, NULL)) 872 _PAGE_CACHE_WC, NULL))
868 return -EINVAL; 873 return -EINVAL;
869 874
@@ -879,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages)
879 884
880int set_memory_wb(unsigned long addr, int numpages) 885int set_memory_wb(unsigned long addr, int numpages)
881{ 886{
882 free_memtype(addr, addr + numpages * PAGE_SIZE); 887 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
883 888
884 return _set_memory_wb(addr, numpages); 889 return _set_memory_wb(addr, numpages);
885} 890}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2fe30916d4b6..2a50e0fa64a5 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -207,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry,
207 return -EBUSY; 207 return -EBUSY;
208} 208}
209 209
210static struct memtype *cached_entry;
211static u64 cached_start;
212
210/* 213/*
211 * req_type typically has one of the: 214 * req_type typically has one of the:
212 * - _PAGE_CACHE_WB 215 * - _PAGE_CACHE_WB
@@ -280,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
280 283
281 spin_lock(&memtype_lock); 284 spin_lock(&memtype_lock);
282 285
286 if (cached_entry && start >= cached_start)
287 entry = cached_entry;
288 else
289 entry = list_entry(&memtype_list, struct memtype, nd);
290
283 /* Search for existing mapping that overlaps the current range */ 291 /* Search for existing mapping that overlaps the current range */
284 where = NULL; 292 where = NULL;
285 list_for_each_entry(entry, &memtype_list, nd) { 293 list_for_each_entry_continue(entry, &memtype_list, nd) {
286 if (end <= entry->start) { 294 if (end <= entry->start) {
287 where = entry->nd.prev; 295 where = entry->nd.prev;
296 cached_entry = list_entry(where, struct memtype, nd);
288 break; 297 break;
289 } else if (start <= entry->start) { /* end > entry->start */ 298 } else if (start <= entry->start) { /* end > entry->start */
290 err = chk_conflict(new, entry, new_type); 299 err = chk_conflict(new, entry, new_type);
@@ -292,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
292 dprintk("Overlap at 0x%Lx-0x%Lx\n", 301 dprintk("Overlap at 0x%Lx-0x%Lx\n",
293 entry->start, entry->end); 302 entry->start, entry->end);
294 where = entry->nd.prev; 303 where = entry->nd.prev;
304 cached_entry = list_entry(where,
305 struct memtype, nd);
295 } 306 }
296 break; 307 break;
297 } else if (start < entry->end) { /* start > entry->start */ 308 } else if (start < entry->end) { /* start > entry->start */
@@ -299,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
299 if (!err) { 310 if (!err) {
300 dprintk("Overlap at 0x%Lx-0x%Lx\n", 311 dprintk("Overlap at 0x%Lx-0x%Lx\n",
301 entry->start, entry->end); 312 entry->start, entry->end);
302 where = &entry->nd; 313 cached_entry = list_entry(entry->nd.prev,
314 struct memtype, nd);
315
316 /*
317 * Move to right position in the linked
318 * list to add this new entry
319 */
320 list_for_each_entry_continue(entry,
321 &memtype_list, nd) {
322 if (start <= entry->start) {
323 where = entry->nd.prev;
324 break;
325 }
326 }
303 } 327 }
304 break; 328 break;
305 } 329 }
@@ -314,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
314 return err; 338 return err;
315 } 339 }
316 340
341 cached_start = start;
342
317 if (where) 343 if (where)
318 list_add(&new->nd, where); 344 list_add(&new->nd, where);
319 else 345 else
@@ -343,6 +369,9 @@ int free_memtype(u64 start, u64 end)
343 spin_lock(&memtype_lock); 369 spin_lock(&memtype_lock);
344 list_for_each_entry(entry, &memtype_list, nd) { 370 list_for_each_entry(entry, &memtype_list, nd) {
345 if (entry->start == start && entry->end == end) { 371 if (entry->start == start && entry->end == end) {
372 if (cached_entry == entry || cached_start == start)
373 cached_entry = NULL;
374
346 list_del(&entry->nd); 375 list_del(&entry->nd);
347 kfree(entry); 376 kfree(entry);
348 err = 0; 377 err = 0;
@@ -361,14 +390,6 @@ int free_memtype(u64 start, u64 end)
361} 390}
362 391
363 392
364/*
365 * /dev/mem mmap interface. The memtype used for mapping varies:
366 * - Use UC for mappings with O_SYNC flag
367 * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
368 * inherit the memtype from existing mapping.
369 * - Else use UC_MINUS memtype (for backward compatibility with existing
370 * X drivers.
371 */
372pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 393pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
373 unsigned long size, pgprot_t vma_prot) 394 unsigned long size, pgprot_t vma_prot)
374{ 395{
@@ -406,14 +427,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
406 unsigned long size, pgprot_t *vma_prot) 427 unsigned long size, pgprot_t *vma_prot)
407{ 428{
408 u64 offset = ((u64) pfn) << PAGE_SHIFT; 429 u64 offset = ((u64) pfn) << PAGE_SHIFT;
409 unsigned long flags = _PAGE_CACHE_UC_MINUS; 430 unsigned long flags = -1;
410 int retval; 431 int retval;
411 432
412 if (!range_is_allowed(pfn, size)) 433 if (!range_is_allowed(pfn, size))
413 return 0; 434 return 0;
414 435
415 if (file->f_flags & O_SYNC) { 436 if (file->f_flags & O_SYNC) {
416 flags = _PAGE_CACHE_UC; 437 flags = _PAGE_CACHE_UC_MINUS;
417 } 438 }
418 439
419#ifdef CONFIG_X86_32 440#ifdef CONFIG_X86_32
@@ -436,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
436#endif 457#endif
437 458
438 /* 459 /*
439 * With O_SYNC, we can only take UC mapping. Fail if we cannot. 460 * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
461 *
440 * Without O_SYNC, we want to get 462 * Without O_SYNC, we want to get
441 * - WB for WB-able memory and no other conflicting mappings 463 * - WB for WB-able memory and no other conflicting mappings
442 * - UC_MINUS for non-WB-able memory with no other conflicting mappings 464 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
443 * - Inherit from confliting mappings otherwise 465 * - Inherit from confliting mappings otherwise
444 */ 466 */
445 if (flags != _PAGE_CACHE_UC_MINUS) { 467 if (flags != -1) {
446 retval = reserve_memtype(offset, offset + size, flags, NULL); 468 retval = reserve_memtype(offset, offset + size, flags, NULL);
447 } else { 469 } else {
448 retval = reserve_memtype(offset, offset + size, -1, &flags); 470 retval = reserve_memtype(offset, offset + size, -1, &flags);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 557b2abceef8..d50302774fe2 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
207 unsigned long addr; 207 unsigned long addr;
208 int i; 208 int i;
209 209
210 if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
211 return;
212
210 pud = pud_offset(pgd, 0); 213 pud = pud_offset(pgd, 0);
211 214
212 for (addr = i = 0; i < PREALLOCATED_PMDS; 215 for (addr = i = 0; i < PREALLOCATED_PMDS;
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 1eb2973a301c..16ae70fc57e7 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void)
178 * start of the node, and that the current "end" address is after 178 * start of the node, and that the current "end" address is after
179 * the previous one. 179 * the previous one.
180 */ 180 */
181static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) 181static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
182{ 182{
183 /* 183 /*
184 * Only add present memory as told by the e820. 184 * Only add present memory as told by the e820.
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
189 if (memory_chunk->start_pfn >= max_pfn) { 189 if (memory_chunk->start_pfn >= max_pfn) {
190 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", 190 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
191 memory_chunk->start_pfn, memory_chunk->end_pfn); 191 memory_chunk->start_pfn, memory_chunk->end_pfn);
192 return; 192 return -1;
193 } 193 }
194 if (memory_chunk->nid != nid) 194 if (memory_chunk->nid != nid)
195 return; 195 return -1;
196 196
197 if (!node_has_online_mem(nid)) 197 if (!node_has_online_mem(nid))
198 node_start_pfn[nid] = memory_chunk->start_pfn; 198 node_start_pfn[nid] = memory_chunk->start_pfn;
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c
202 202
203 if (node_end_pfn[nid] < memory_chunk->end_pfn) 203 if (node_end_pfn[nid] < memory_chunk->end_pfn)
204 node_end_pfn[nid] = memory_chunk->end_pfn; 204 node_end_pfn[nid] = memory_chunk->end_pfn;
205
206 return 0;
205} 207}
206 208
207int __init get_memcfg_from_srat(void) 209int __init get_memcfg_from_srat(void)
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void)
259 printk(KERN_DEBUG 261 printk(KERN_DEBUG
260 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 262 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
261 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 263 j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
262 node_read_chunk(chunk->nid, chunk); 264 if (node_read_chunk(chunk->nid, chunk))
265 continue;
266
263 e820_register_active_regions(chunk->nid, chunk->start_pfn, 267 e820_register_active_regions(chunk->nid, chunk->start_pfn,
264 min(chunk->end_pfn, max_pfn)); 268 min(chunk->end_pfn, max_pfn));
265 } 269 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 3f90289410e6..0227694f7dab 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/moduleparam.h> 16#include <linux/moduleparam.h>
17#include <linux/kdebug.h> 17#include <linux/kdebug.h>
18#include <linux/cpu.h>
18#include <asm/nmi.h> 19#include <asm/nmi.h>
19#include <asm/msr.h> 20#include <asm/msr.h>
20#include <asm/apic.h> 21#include <asm/apic.h>
@@ -28,23 +29,48 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
28 29
29static int nmi_start(void); 30static int nmi_start(void);
30static void nmi_stop(void); 31static void nmi_stop(void);
32static void nmi_cpu_start(void *dummy);
33static void nmi_cpu_stop(void *dummy);
31 34
32/* 0 == registered but off, 1 == registered and on */ 35/* 0 == registered but off, 1 == registered and on */
33static int nmi_enabled = 0; 36static int nmi_enabled = 0;
34 37
38#ifdef CONFIG_SMP
39static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
40 void *data)
41{
42 int cpu = (unsigned long)data;
43 switch (action) {
44 case CPU_DOWN_FAILED:
45 case CPU_ONLINE:
46 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
47 break;
48 case CPU_DOWN_PREPARE:
49 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
50 break;
51 }
52 return NOTIFY_DONE;
53}
54
55static struct notifier_block oprofile_cpu_nb = {
56 .notifier_call = oprofile_cpu_notifier
57};
58#endif
59
35#ifdef CONFIG_PM 60#ifdef CONFIG_PM
36 61
37static int nmi_suspend(struct sys_device *dev, pm_message_t state) 62static int nmi_suspend(struct sys_device *dev, pm_message_t state)
38{ 63{
64 /* Only one CPU left, just stop that one */
39 if (nmi_enabled == 1) 65 if (nmi_enabled == 1)
40 nmi_stop(); 66 nmi_cpu_stop(NULL);
41 return 0; 67 return 0;
42} 68}
43 69
44static int nmi_resume(struct sys_device *dev) 70static int nmi_resume(struct sys_device *dev)
45{ 71{
46 if (nmi_enabled == 1) 72 if (nmi_enabled == 1)
47 nmi_start(); 73 nmi_cpu_start(NULL);
48 return 0; 74 return 0;
49} 75}
50 76
@@ -463,6 +489,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
463 } 489 }
464 490
465 init_sysfs(); 491 init_sysfs();
492#ifdef CONFIG_SMP
493 register_cpu_notifier(&oprofile_cpu_nb);
494#endif
466 using_nmi = 1; 495 using_nmi = 1;
467 ops->create_files = nmi_create_files; 496 ops->create_files = nmi_create_files;
468 ops->setup = nmi_setup; 497 ops->setup = nmi_setup;
@@ -476,6 +505,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
476 505
477void op_nmi_exit(void) 506void op_nmi_exit(void)
478{ 507{
479 if (using_nmi) 508 if (using_nmi) {
480 exit_sysfs(); 509 exit_sysfs();
510#ifdef CONFIG_SMP
511 unregister_cpu_notifier(&oprofile_cpu_nb);
512#endif
513 }
481} 514}
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index dbf532369711..6a0fca78c362 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,6 +1,7 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h> 3#include <linux/topology.h>
4#include <linux/cpu.h>
4#include "pci.h" 5#include "pci.h"
5 6
6#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
@@ -555,15 +556,17 @@ static int __init early_fill_mp_bus_info(void)
555 return 0; 556 return 0;
556} 557}
557 558
558postcore_initcall(early_fill_mp_bus_info); 559#else /* !CONFIG_X86_64 */
559 560
560#endif 561static int __init early_fill_mp_bus_info(void) { return 0; }
562
563#endif /* !CONFIG_X86_64 */
561 564
562/* common 32/64 bit code */ 565/* common 32/64 bit code */
563 566
564#define ENABLE_CF8_EXT_CFG (1ULL << 46) 567#define ENABLE_CF8_EXT_CFG (1ULL << 46)
565 568
566static void enable_pci_io_ecs_per_cpu(void *unused) 569static void enable_pci_io_ecs(void *unused)
567{ 570{
568 u64 reg; 571 u64 reg;
569 rdmsrl(MSR_AMD64_NB_CFG, reg); 572 rdmsrl(MSR_AMD64_NB_CFG, reg);
@@ -573,14 +576,51 @@ static void enable_pci_io_ecs_per_cpu(void *unused)
573 } 576 }
574} 577}
575 578
576static int __init enable_pci_io_ecs(void) 579static int __cpuinit amd_cpu_notify(struct notifier_block *self,
580 unsigned long action, void *hcpu)
577{ 581{
582 int cpu = (long)hcpu;
583 switch(action) {
584 case CPU_ONLINE:
585 case CPU_ONLINE_FROZEN:
586 smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
587 break;
588 default:
589 break;
590 }
591 return NOTIFY_OK;
592}
593
594static struct notifier_block __cpuinitdata amd_cpu_notifier = {
595 .notifier_call = amd_cpu_notify,
596};
597
598static int __init pci_io_ecs_init(void)
599{
600 int cpu;
601
578 /* assume all cpus from fam10h have IO ECS */ 602 /* assume all cpus from fam10h have IO ECS */
579 if (boot_cpu_data.x86 < 0x10) 603 if (boot_cpu_data.x86 < 0x10)
580 return 0; 604 return 0;
581 on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1); 605
606 register_cpu_notifier(&amd_cpu_notifier);
607 for_each_online_cpu(cpu)
608 amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
609 (void *)(long)cpu);
582 pci_probe |= PCI_HAS_IO_ECS; 610 pci_probe |= PCI_HAS_IO_ECS;
611
612 return 0;
613}
614
615static int __init amd_postcore_init(void)
616{
617 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
618 return 0;
619
620 early_fill_mp_bus_info();
621 pci_io_ecs_init();
622
583 return 0; 623 return 0;
584} 624}
585 625
586postcore_initcall(enable_pci_io_ecs); 626postcore_initcall(amd_postcore_init);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5807d1bc73f7..d765da913842 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -31,8 +31,11 @@
31#include <linux/ioport.h> 31#include <linux/ioport.h>
32#include <linux/errno.h> 32#include <linux/errno.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/acpi.h>
34 35
35#include <asm/pat.h> 36#include <asm/pat.h>
37#include <asm/hpet.h>
38#include <asm/io_apic.h>
36 39
37#include "pci.h" 40#include "pci.h"
38 41
@@ -77,6 +80,77 @@ pcibios_align_resource(void *data, struct resource *res,
77} 80}
78EXPORT_SYMBOL(pcibios_align_resource); 81EXPORT_SYMBOL(pcibios_align_resource);
79 82
83static int check_res_with_valid(struct pci_dev *dev, struct resource *res)
84{
85 unsigned long base;
86 unsigned long size;
87 int i;
88
89 base = res->start;
90 size = (res->start == 0 && res->end == res->start) ? 0 :
91 (res->end - res->start + 1);
92
93 if (!base || !size)
94 return 0;
95
96#ifdef CONFIG_HPET_TIMER
97 /* for hpet */
98 if (base == hpet_address && (res->flags & IORESOURCE_MEM)) {
99 dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n",
100 base, base + size - 1);
101 return 1;
102 }
103#endif
104
105#ifdef CONFIG_X86_IO_APIC
106 for (i = 0; i < nr_ioapics; i++) {
107 unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr;
108
109 if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) {
110 dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n",
111 base, base + size - 1);
112 return 1;
113 }
114 }
115#endif
116
117#ifdef CONFIG_PCI_MMCONFIG
118 for (i = 0; i < pci_mmcfg_config_num; i++) {
119 unsigned long addr;
120
121 addr = pci_mmcfg_config[i].address;
122 if (base == addr && (res->flags & IORESOURCE_MEM)) {
123 dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n",
124 base, base + size - 1);
125 return 1;
126 }
127 }
128#endif
129
130 return 0;
131}
132
133static int check_platform(struct pci_dev *dev, struct resource *res)
134{
135 struct resource *root = NULL;
136
137 /*
138 * forcibly insert it into the
139 * resource tree
140 */
141 if (res->flags & IORESOURCE_MEM)
142 root = &iomem_resource;
143 else if (res->flags & IORESOURCE_IO)
144 root = &ioport_resource;
145
146 if (root && check_res_with_valid(dev, res)) {
147 insert_resource(root, res);
148
149 return 1;
150 }
151
152 return 0;
153}
80/* 154/*
81 * Handle resources of PCI devices. If the world were perfect, we could 155 * Handle resources of PCI devices. If the world were perfect, we could
82 * just allocate all the resource regions and do nothing more. It isn't. 156 * just allocate all the resource regions and do nothing more. It isn't.
@@ -128,6 +202,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
128 pr = pci_find_parent_resource(dev, r); 202 pr = pci_find_parent_resource(dev, r);
129 if (!r->start || !pr || 203 if (!r->start || !pr ||
130 request_resource(pr, r) < 0) { 204 request_resource(pr, r) < 0) {
205 if (check_platform(dev, r))
206 continue;
131 dev_err(&dev->dev, "BAR %d: can't " 207 dev_err(&dev->dev, "BAR %d: can't "
132 "allocate resource\n", idx); 208 "allocate resource\n", idx);
133 /* 209 /*
@@ -171,6 +247,8 @@ static void __init pcibios_allocate_resources(int pass)
171 r->flags, disabled, pass); 247 r->flags, disabled, pass);
172 pr = pci_find_parent_resource(dev, r); 248 pr = pci_find_parent_resource(dev, r);
173 if (!pr || request_resource(pr, r) < 0) { 249 if (!pr || request_resource(pr, r) < 0) {
250 if (check_platform(dev, r))
251 continue;
174 dev_err(&dev->dev, "BAR %d: can't " 252 dev_err(&dev->dev, "BAR %d: can't "
175 "allocate resource\n", idx); 253 "allocate resource\n", idx);
176 /* We'll assign a new address later */ 254 /* We'll assign a new address later */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index fec0123b33a9..8e077185e185 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
590 case PCI_DEVICE_ID_INTEL_ICH10_1: 590 case PCI_DEVICE_ID_INTEL_ICH10_1:
591 case PCI_DEVICE_ID_INTEL_ICH10_2: 591 case PCI_DEVICE_ID_INTEL_ICH10_2:
592 case PCI_DEVICE_ID_INTEL_ICH10_3: 592 case PCI_DEVICE_ID_INTEL_ICH10_3:
593 case PCI_DEVICE_ID_INTEL_PCH_0:
594 case PCI_DEVICE_ID_INTEL_PCH_1:
593 r->name = "PIIX/ICH"; 595 r->name = "PIIX/ICH";
594 r->get = pirq_piix_get; 596 r->get = pirq_piix_get;
595 r->set = pirq_piix_set; 597 r->set = pirq_piix_set;
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index ec9ce35e44d6..b722dd481b39 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -14,7 +14,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
14 int n, devfn; 14 int n, devfn;
15 long node; 15 long node;
16 16
17 if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) 17 if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff)
18 return; 18 return;
19 DBG("PCI: Peer bridge fixup\n"); 19 DBG("PCI: Peer bridge fixup\n");
20 20
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 23faaa890ffc..d9635764ce3d 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -293,7 +293,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl,
293 return AE_OK; 293 return AE_OK;
294} 294}
295 295
296static int __init is_acpi_reserved(unsigned long start, unsigned long end) 296static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
297{ 297{
298 struct resource mcfg_res; 298 struct resource mcfg_res;
299 299
@@ -310,6 +310,41 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end)
310 return mcfg_res.flags; 310 return mcfg_res.flags;
311} 311}
312 312
313typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type);
314
315static int __init is_mmconf_reserved(check_reserved_t is_reserved,
316 u64 addr, u64 size, int i,
317 typeof(pci_mmcfg_config[0]) *cfg, int with_e820)
318{
319 u64 old_size = size;
320 int valid = 0;
321
322 while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) {
323 size >>= 1;
324 if (size < (16UL<<20))
325 break;
326 }
327
328 if (size >= (16UL<<20) || size == old_size) {
329 printk(KERN_NOTICE
330 "PCI: MCFG area at %Lx reserved in %s\n",
331 addr, with_e820?"E820":"ACPI motherboard resources");
332 valid = 1;
333
334 if (old_size != size) {
335 /* update end_bus_number */
336 cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1);
337 printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx "
338 "segment %hu buses %u - %u\n",
339 i, (unsigned long)cfg->address, cfg->pci_segment,
340 (unsigned int)cfg->start_bus_number,
341 (unsigned int)cfg->end_bus_number);
342 }
343 }
344
345 return valid;
346}
347
313static void __init pci_mmcfg_reject_broken(int early) 348static void __init pci_mmcfg_reject_broken(int early)
314{ 349{
315 typeof(pci_mmcfg_config[0]) *cfg; 350 typeof(pci_mmcfg_config[0]) *cfg;
@@ -324,21 +359,22 @@ static void __init pci_mmcfg_reject_broken(int early)
324 359
325 for (i = 0; i < pci_mmcfg_config_num; i++) { 360 for (i = 0; i < pci_mmcfg_config_num; i++) {
326 int valid = 0; 361 int valid = 0;
327 u32 size = (cfg->end_bus_number + 1) << 20; 362 u64 addr, size;
363
328 cfg = &pci_mmcfg_config[i]; 364 cfg = &pci_mmcfg_config[i];
365 addr = cfg->start_bus_number;
366 addr <<= 20;
367 addr += cfg->address;
368 size = cfg->end_bus_number + 1 - cfg->start_bus_number;
369 size <<= 20;
329 printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " 370 printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx "
330 "segment %hu buses %u - %u\n", 371 "segment %hu buses %u - %u\n",
331 i, (unsigned long)cfg->address, cfg->pci_segment, 372 i, (unsigned long)cfg->address, cfg->pci_segment,
332 (unsigned int)cfg->start_bus_number, 373 (unsigned int)cfg->start_bus_number,
333 (unsigned int)cfg->end_bus_number); 374 (unsigned int)cfg->end_bus_number);
334 375
335 if (!early && 376 if (!early)
336 is_acpi_reserved(cfg->address, cfg->address + size - 1)) { 377 valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
337 printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved "
338 "in ACPI motherboard resources\n",
339 cfg->address);
340 valid = 1;
341 }
342 378
343 if (valid) 379 if (valid)
344 continue; 380 continue;
@@ -347,16 +383,11 @@ static void __init pci_mmcfg_reject_broken(int early)
347 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" 383 printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not"
348 " reserved in ACPI motherboard resources\n", 384 " reserved in ACPI motherboard resources\n",
349 cfg->address); 385 cfg->address);
386
350 /* Don't try to do this check unless configuration 387 /* Don't try to do this check unless configuration
351 type 1 is available. how about type 2 ?*/ 388 type 1 is available. how about type 2 ?*/
352 if (raw_pci_ops && e820_all_mapped(cfg->address, 389 if (raw_pci_ops)
353 cfg->address + size - 1, 390 valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1);
354 E820_RESERVED)) {
355 printk(KERN_NOTICE
356 "PCI: MCFG area at %Lx reserved in E820\n",
357 cfg->address);
358 valid = 1;
359 }
360 391
361 if (!valid) 392 if (!valid)
362 goto reject; 393 goto reject;
@@ -365,7 +396,7 @@ static void __init pci_mmcfg_reject_broken(int early)
365 return; 396 return;
366 397
367reject: 398reject:
368 printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); 399 printk(KERN_INFO "PCI: Not using MMCONFIG.\n");
369 pci_mmcfg_arch_free(); 400 pci_mmcfg_arch_free();
370 kfree(pci_mmcfg_config); 401 kfree(pci_mmcfg_config);
371 pci_mmcfg_config = NULL; 402 pci_mmcfg_config = NULL;
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 7dc5d5cf50a2..d3e083dea720 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -45,7 +45,7 @@ static void __save_processor_state(struct saved_context *ctxt)
45 ctxt->cr0 = read_cr0(); 45 ctxt->cr0 = read_cr0();
46 ctxt->cr2 = read_cr2(); 46 ctxt->cr2 = read_cr2();
47 ctxt->cr3 = read_cr3(); 47 ctxt->cr3 = read_cr3();
48 ctxt->cr4 = read_cr4(); 48 ctxt->cr4 = read_cr4_safe();
49} 49}
50 50
51/* Needed by apm.c */ 51/* Needed by apm.c */
@@ -98,7 +98,9 @@ static void __restore_processor_state(struct saved_context *ctxt)
98 /* 98 /*
99 * control registers 99 * control registers
100 */ 100 */
101 write_cr4(ctxt->cr4); 101 /* cr4 was introduced in the Pentium CPU */
102 if (ctxt->cr4)
103 write_cr4(ctxt->cr4);
102 write_cr3(ctxt->cr3); 104 write_cr3(ctxt->cr3);
103 write_cr2(ctxt->cr2); 105 write_cr2(ctxt->cr2);
104 write_cr0(ctxt->cr0); 106 write_cr0(ctxt->cr0);
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index b95aa6cfe3cb..4fc7e872c85e 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -28,9 +28,9 @@ ENTRY(swsusp_arch_suspend)
28 ret 28 ret
29 29
30ENTRY(restore_image) 30ENTRY(restore_image)
31 movl resume_pg_dir, %ecx 31 movl resume_pg_dir, %eax
32 subl $__PAGE_OFFSET, %ecx 32 subl $__PAGE_OFFSET, %eax
33 movl %ecx, %cr3 33 movl %eax, %cr3
34 34
35 movl restore_pblist, %edx 35 movl restore_pblist, %edx
36 .p2align 4,,7 36 .p2align 4,,7
@@ -52,17 +52,21 @@ copy_loop:
52 52
53done: 53done:
54 /* go back to the original page tables */ 54 /* go back to the original page tables */
55 movl $swapper_pg_dir, %ecx 55 movl $swapper_pg_dir, %eax
56 subl $__PAGE_OFFSET, %ecx 56 subl $__PAGE_OFFSET, %eax
57 movl %ecx, %cr3 57 movl %eax, %cr3
58 /* Flush TLB, including "global" things (vmalloc) */ 58 /* Flush TLB, including "global" things (vmalloc) */
59 movl mmu_cr4_features, %eax 59 movl mmu_cr4_features, %ecx
60 movl %eax, %edx 60 jecxz 1f # cr4 Pentium and higher, skip if zero
61 movl %ecx, %edx
61 andl $~(1<<7), %edx; # PGE 62 andl $~(1<<7), %edx; # PGE
62 movl %edx, %cr4; # turn off PGE 63 movl %edx, %cr4; # turn off PGE
63 movl %cr3, %ecx; # flush TLB 641:
64 movl %ecx, %cr3 65 movl %cr3, %eax; # flush TLB
65 movl %eax, %cr4; # turn PGE back on 66 movl %eax, %cr3
67 jecxz 1f # cr4 Pentium and higher, skip if zero
68 movl %ecx, %cr4; # turn PGE back on
691:
66 70
67 movl saved_context_esp, %esp 71 movl saved_context_esp, %esp
68 movl saved_context_ebp, %ebp 72 movl saved_context_ebp, %ebp