aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig41
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c1
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/amd_nb.h49
-rw-r--r--arch/x86/include/asm/apic.h1
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/e820.h3
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/i387.h24
-rw-r--r--arch/x86/include/asm/io_apic.h8
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/kdebug.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/microcode.h6
-rw-r--r--arch/x86/include/asm/mpspec.h31
-rw-r--r--arch/x86/include/asm/mpspec_def.h7
-rw-r--r--arch/x86/include/asm/mrst-vrtc.h9
-rw-r--r--arch/x86/include/asm/mrst.h14
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/nmi.h53
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/pci.h1
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h63
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/setup.h6
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h33
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h9
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/alternative.c49
-rw-r--r--arch/x86/kernel/amd_nb.c135
-rw-r--r--arch/x86/kernel/apb_timer.c1
-rw-r--r--arch/x86/kernel/aperture_64.c10
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c127
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c36
-rw-r--r--arch/x86/kernel/apic/io_apic.c87
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/probe_64.c7
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c61
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c147
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event.c74
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c26
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c12
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/ftrace.c3
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head_32.S22
-rw-r--r--arch/x86/kernel/hpet.c26
-rw-r--r--arch/x86/kernel/kprobes.c113
-rw-r--r--arch/x86/kernel/microcode_amd.c34
-rw-r--r--arch/x86/kernel/microcode_intel.c16
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/process.c10
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/pvclock.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c16
-rw-r--r--arch/x86/kernel/resource.c48
-rw-r--r--arch/x86/kernel/setup.c31
-rw-r--r--arch/x86/kernel/smpboot.c22
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/trampoline_64.S2
-rw-r--r--arch/x86/kernel/traps.c35
-rw-r--r--arch/x86/kernel/tsc.c96
-rw-r--r--arch/x86/kernel/verify_cpu.S (renamed from arch/x86/kernel/verify_cpu_64.S)49
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kernel/xsave.c3
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/lguest/i386_head.S105
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/amdtopology_64.c (renamed from arch/x86/mm/k8topology_64.c)12
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/init_32.c20
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/mm/numa_64.c22
-rw-r--r--arch/x86/mm/pageattr.c33
-rw-r--r--arch/x86/mm/setup_nx.c2
-rw-r--r--arch/x86/mm/srat_32.c1
-rw-r--r--arch/x86/mm/srat_64.c10
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c3
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c3
-rw-r--r--arch/x86/oprofile/op_model_amd.c79
-rw-r--r--arch/x86/oprofile/op_model_p4.c2
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/ce4100.c315
-rw-r--r--arch/x86/pci/i386.c18
-rw-r--r--arch/x86/pci/pcbios.c23
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c132
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c91
-rw-r--r--arch/x86/platform/mrst/Makefile2
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c (renamed from arch/x86/kernel/early_printk_mrst.c)0
-rw-r--r--arch/x86/platform/mrst/mrst.c546
-rw-r--r--arch/x86/platform/mrst/vrtc.c165
-rw-r--r--arch/x86/platform/sfi/sfi.c4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c22
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/vdso/Makefile4
-rw-r--r--arch/x86/xen/time.c2
125 files changed, 2855 insertions, 2100 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e330da21b84f..b6fccb07123e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -377,6 +377,18 @@ config X86_ELAN
377 377
378 If unsure, choose "PC-compatible" instead. 378 If unsure, choose "PC-compatible" instead.
379 379
380config X86_INTEL_CE
381 bool "CE4100 TV platform"
382 depends on PCI
383 depends on PCI_GODIRECT
384 depends on X86_32
385 depends on X86_EXTENDED_PLATFORM
386 select X86_REBOOTFIXUPS
387 ---help---
388 Select for the Intel CE media processor (CE4100) SOC.
389 This option compiles in support for the CE4100 SOC for settop
390 boxes and media devices.
391
380config X86_MRST 392config X86_MRST
381 bool "Moorestown MID platform" 393 bool "Moorestown MID platform"
382 depends on PCI 394 depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
385 depends on X86_EXTENDED_PLATFORM 397 depends on X86_EXTENDED_PLATFORM
386 depends on X86_IO_APIC 398 depends on X86_IO_APIC
387 select APB_TIMER 399 select APB_TIMER
400 select I2C
401 select SPI
402 select INTEL_SCU_IPC
403 select X86_PLATFORM_DEVICES
388 ---help--- 404 ---help---
389 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 405 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
390 Internet Device(MID) platform. Moorestown consists of two chips: 406 Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
466 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is 482 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
467 supposed to run on an IA32-based Unisys ES7000 system. 483 supposed to run on an IA32-based Unisys ES7000 system.
468 484
485config X86_32_IRIS
486 tristate "Eurobraille/Iris poweroff module"
487 depends on X86_32
488 ---help---
489 The Iris machines from EuroBraille do not have APM or ACPI support
490 to shut themselves down properly. A special I/O sequence is
491 needed to do so, which is what this module does at
492 kernel shutdown.
493
494 This is only for Iris machines from EuroBraille.
495
496 If unused, say N.
497
469config SCHED_OMIT_FRAME_POINTER 498config SCHED_OMIT_FRAME_POINTER
470 def_bool y 499 def_bool y
471 prompt "Single-depth WCHAN output" 500 prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
1141comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 1170comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
1142 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) 1171 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
1143 1172
1144config K8_NUMA 1173config AMD_NUMA
1145 def_bool y 1174 def_bool y
1146 prompt "Old style AMD Opteron NUMA detection" 1175 prompt "Old style AMD Opteron NUMA detection"
1147 depends on X86_64 && NUMA && PCI 1176 depends on X86_64 && NUMA && PCI
1148 ---help--- 1177 ---help---
1149 Enable K8 NUMA node topology detection. You should say Y here if 1178 Enable AMD NUMA node topology detection. You should say Y here if
1150 you have a multi processor AMD K8 system. This uses an old 1179 you have a multi processor AMD system. This uses an old method to
1151 method to read the NUMA configuration directly from the builtin 1180 read the NUMA configuration directly from the builtin Northbridge
1152 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA 1181 of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
1153 instead, which also takes priority if both are compiled in. 1182 which also takes priority if both are compiled in.
1154 1183
1155config X86_64_ACPI_NUMA 1184config X86_64_ACPI_NUMA
1156 def_bool y 1185 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b59ee765414e..45143bbcfe5e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
117 feature as well as for the change_page_attr() infrastructure. 117 feature as well as for the change_page_attr() infrastructure.
118 If in doubt, say "N" 118 If in doubt, say "N"
119 119
120config DEBUG_SET_MODULE_RONX
121 bool "Set loadable kernel module data as NX and text as RO"
122 depends on MODULES
123 ---help---
124 This option helps catch unintended modifications to loadable
125 kernel module's text and read-only data. It also prevents execution
126 of module data. Such protection may interfere with run-time code
127 patching and dynamic kernel tracing - and they might also protect
128 against certain classes of kernel exploits.
129 If in doubt, say "N".
130
120config DEBUG_NX_TEST 131config DEBUG_NX_TEST
121 tristate "Testcase for the NX non-executable stack feature" 132 tristate "Testcase for the NX non-executable stack feature"
122 depends on DEBUG_KERNEL && m 133 depends on DEBUG_KERNEL && m
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 52f85a196fa0..35af09d13dc1 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -182,7 +182,7 @@ no_longmode:
182 hlt 182 hlt
183 jmp 1b 183 jmp 1b
184 184
185#include "../../kernel/verify_cpu_64.S" 185#include "../../kernel/verify_cpu.S"
186 186
187 /* 187 /*
188 * Be careful here startup_64 needs to be at a predictable 188 * Be careful here startup_64 needs to be at a predictable
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 23f315c9f215..325c05294fc4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
355 if (heap > 0x3fffffffffffUL) 355 if (heap > 0x3fffffffffffUL)
356 error("Destination address too large"); 356 error("Destination address too large");
357#else 357#else
358 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) 358 if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
359 error("Destination address too large"); 359 error("Destination address too large");
360#endif 360#endif
361#ifndef CONFIG_RELOCATABLE 361#ifndef CONFIG_RELOCATABLE
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index cbcc8d8ea93a..7a6e68e4f748 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -10,6 +10,7 @@
10 * by the Free Software Foundation. 10 * by the Free Software Foundation.
11 */ 11 */
12 12
13#include <linux/err.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/kernel.h> 16#include <linux/kernel.h>
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 01171f6c2c37..13009d1af99a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -181,8 +181,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
181 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 181 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
182 * inconsistent instruction while you patch. 182 * inconsistent instruction while you patch.
183 */ 183 */
184struct text_poke_param {
185 void *addr;
186 const void *opcode;
187 size_t len;
188};
189
184extern void *text_poke(void *addr, const void *opcode, size_t len); 190extern void *text_poke(void *addr, const void *opcode, size_t len);
185extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 191extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
192extern void text_poke_smp_batch(struct text_poke_param *params, int n);
186 193
187#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 194#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
188#define IDEAL_NOP_SIZE_5 5 195#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index c8517f81b21e..6aee50d655d1 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,36 +3,53 @@
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
6extern struct pci_device_id k8_nb_ids[]; 6extern struct pci_device_id amd_nb_misc_ids[];
7struct bootnode; 7struct bootnode;
8 8
9extern int early_is_k8_nb(u32 value); 9extern int early_is_amd_nb(u32 value);
10extern int cache_k8_northbridges(void); 10extern int amd_cache_northbridges(void);
11extern void k8_flush_garts(void); 11extern void amd_flush_garts(void);
12extern int k8_get_nodes(struct bootnode *nodes); 12extern int amd_get_nodes(struct bootnode *nodes);
13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 13extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
14extern int k8_scan_nodes(void); 14extern int amd_scan_nodes(void);
15 15
16struct k8_northbridge_info { 16struct amd_northbridge {
17 struct pci_dev *misc;
18};
19
20struct amd_northbridge_info {
17 u16 num; 21 u16 num;
18 u8 gart_supported; 22 u64 flags;
19 struct pci_dev **nb_misc; 23 struct amd_northbridge *nb;
20}; 24};
21extern struct k8_northbridge_info k8_northbridges; 25extern struct amd_northbridge_info amd_northbridges;
26
27#define AMD_NB_GART 0x1
28#define AMD_NB_L3_INDEX_DISABLE 0x2
22 29
23#ifdef CONFIG_AMD_NB 30#ifdef CONFIG_AMD_NB
24 31
25static inline struct pci_dev *node_to_k8_nb_misc(int node) 32static inline int amd_nb_num(void)
26{ 33{
27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; 34 return amd_northbridges.num;
28} 35}
29 36
30#else 37static inline int amd_nb_has_feature(int feature)
38{
39 return ((amd_northbridges.flags & feature) == feature);
40}
31 41
32static inline struct pci_dev *node_to_k8_nb_misc(int node) 42static inline struct amd_northbridge *node_to_amd_nb(int node)
33{ 43{
34 return NULL; 44 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
35} 45}
46
47#else
48
49#define amd_nb_num(x) 0
50#define amd_nb_has_feature(x) false
51#define node_to_amd_nb(x) NULL
52
36#endif 53#endif
37 54
38 55
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f6ce0bda3b98..cf12007796db 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -238,6 +238,7 @@ extern void setup_boot_APIC_clock(void);
238extern void setup_secondary_APIC_clock(void); 238extern void setup_secondary_APIC_clock(void);
239extern int APIC_init_uniprocessor(void); 239extern int APIC_init_uniprocessor(void);
240extern void enable_NMI_through_LVT0(void); 240extern void enable_NMI_through_LVT0(void);
241extern int apic_force_enable(void);
241 242
242/* 243/*
243 * On 32bit this is mach-xxx local 244 * On 32bit this is mach-xxx local
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index a859ca461fb0..47a30ff8e517 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -145,6 +145,7 @@
145 145
146#ifdef CONFIG_X86_32 146#ifdef CONFIG_X86_32
147# define MAX_IO_APICS 64 147# define MAX_IO_APICS 64
148# define MAX_LOCAL_APIC 256
148#else 149#else
149# define MAX_IO_APICS 128 150# define MAX_IO_APICS 128
150# define MAX_LOCAL_APIC 32768 151# define MAX_LOCAL_APIC 32768
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 8e6218550e77..c8bfe63a06de 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -124,6 +124,7 @@ enum {
124 X86_SUBARCH_LGUEST, 124 X86_SUBARCH_LGUEST,
125 X86_SUBARCH_XEN, 125 X86_SUBARCH_XEN,
126 X86_SUBARCH_MRST, 126 X86_SUBARCH_MRST,
127 X86_SUBARCH_CE4100,
127 X86_NR_SUBARCHS, 128 X86_NR_SUBARCHS,
128}; 129};
129 130
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 5be1542fbfaf..e99d55d74df5 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -72,6 +72,9 @@ struct e820map {
72#define BIOS_BEGIN 0x000a0000 72#define BIOS_BEGIN 0x000a0000
73#define BIOS_END 0x00100000 73#define BIOS_END 0x00100000
74 74
75#define BIOS_ROM_BASE 0xffe00000
76#define BIOS_ROM_END 0xffffffff
77
75#ifdef __KERNEL__ 78#ifdef __KERNEL__
76/* see comment in arch/x86/kernel/e820.c */ 79/* see comment in arch/x86/kernel/e820.c */
77extern struct e820map e820; 80extern struct e820map e820;
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 9479a037419f..0141b234406f 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -117,6 +117,10 @@ enum fixed_addresses {
117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ 117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ 118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
119 __end_of_permanent_fixed_addresses, 119 __end_of_permanent_fixed_addresses,
120
121#ifdef CONFIG_X86_MRST
122 FIX_LNW_VRTC,
123#endif
120 /* 124 /*
121 * 256 temporary boot-time mappings, used by early_ioremap(), 125 * 256 temporary boot-time mappings, used by early_ioremap(),
122 * before ioremap() is functional. 126 * before ioremap() is functional.
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 4aa2bb3b242a..ef328901c802 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
93 int err; 93 int err;
94 94
95 /* See comment in fxsave() below. */ 95 /* See comment in fxsave() below. */
96#ifdef CONFIG_AS_FXSAVEQ
97 asm volatile("1: fxrstorq %[fx]\n\t"
98 "2:\n"
99 ".section .fixup,\"ax\"\n"
100 "3: movl $-1,%[err]\n"
101 " jmp 2b\n"
102 ".previous\n"
103 _ASM_EXTABLE(1b, 3b)
104 : [err] "=r" (err)
105 : [fx] "m" (*fx), "0" (0));
106#else
96 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 107 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
97 "2:\n" 108 "2:\n"
98 ".section .fixup,\"ax\"\n" 109 ".section .fixup,\"ax\"\n"
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
102 _ASM_EXTABLE(1b, 3b) 113 _ASM_EXTABLE(1b, 3b)
103 : [err] "=r" (err) 114 : [err] "=r" (err)
104 : [fx] "R" (fx), "m" (*fx), "0" (0)); 115 : [fx] "R" (fx), "m" (*fx), "0" (0));
116#endif
105 return err; 117 return err;
106} 118}
107 119
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
119 return -EFAULT; 131 return -EFAULT;
120 132
121 /* See comment in fxsave() below. */ 133 /* See comment in fxsave() below. */
134#ifdef CONFIG_AS_FXSAVEQ
135 asm volatile("1: fxsaveq %[fx]\n\t"
136 "2:\n"
137 ".section .fixup,\"ax\"\n"
138 "3: movl $-1,%[err]\n"
139 " jmp 2b\n"
140 ".previous\n"
141 _ASM_EXTABLE(1b, 3b)
142 : [err] "=r" (err), [fx] "=m" (*fx)
143 : "0" (0));
144#else
122 asm volatile("1: rex64/fxsave (%[fx])\n\t" 145 asm volatile("1: rex64/fxsave (%[fx])\n\t"
123 "2:\n" 146 "2:\n"
124 ".section .fixup,\"ax\"\n" 147 ".section .fixup,\"ax\"\n"
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
128 _ASM_EXTABLE(1b, 3b) 151 _ASM_EXTABLE(1b, 3b)
129 : [err] "=r" (err), "=m" (*fx) 152 : [err] "=r" (err), "=m" (*fx)
130 : [fx] "R" (fx), "0" (0)); 153 : [fx] "R" (fx), "0" (0));
154#endif
131 if (unlikely(err) && 155 if (unlikely(err) &&
132 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 156 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
133 err = -EFAULT; 157 err = -EFAULT;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index a6b28d017c2f..0c5ca4e30d7b 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
159extern int io_apic_set_pci_routing(struct device *dev, int irq, 159extern int io_apic_set_pci_routing(struct device *dev, int irq,
160 struct io_apic_irq_attr *irq_attr); 160 struct io_apic_irq_attr *irq_attr);
161void setup_IO_APIC_irq_extra(u32 gsi); 161void setup_IO_APIC_irq_extra(u32 gsi);
162extern void ioapic_init_mappings(void); 162extern void ioapic_and_gsi_init(void);
163extern void ioapic_insert_resources(void); 163extern void ioapic_insert_resources(void);
164 164
165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,9 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170 170
171extern void probe_nr_irqs_gsi(void);
172extern int get_nr_irqs_gsi(void); 171extern int get_nr_irqs_gsi(void);
173
174extern void setup_ioapic_ids_from_mpc(void); 172extern void setup_ioapic_ids_from_mpc(void);
173extern void setup_ioapic_ids_from_mpc_nocheck(void);
175 174
176struct mp_ioapic_gsi{ 175struct mp_ioapic_gsi{
177 u32 gsi_base; 176 u32 gsi_base;
@@ -189,9 +188,8 @@ extern void __init pre_init_apic_IRQ0(void);
189#define io_apic_assign_pci_irqs 0 188#define io_apic_assign_pci_irqs 0
190#define setup_ioapic_ids_from_mpc x86_init_noop 189#define setup_ioapic_ids_from_mpc x86_init_noop
191static const int timer_through_8259 = 0; 190static const int timer_through_8259 = 0;
192static inline void ioapic_init_mappings(void) { } 191static inline void ioapic_and_gsi_init(void) { }
193static inline void ioapic_insert_resources(void) { } 192static inline void ioapic_insert_resources(void) { }
194static inline void probe_nr_irqs_gsi(void) { }
195#define gsi_top (NR_IRQS_LEGACY) 193#define gsi_top (NR_IRQS_LEGACY)
196static inline int mp_find_ioapic(u32 gsi) { return 0; } 194static inline int mp_find_ioapic(u32 gsi) { return 0; }
197 195
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0ebaa512f..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
15 return ((irq == 2) ? 9 : irq); 15 return ((irq == 2) ? 9 : irq);
16} 16}
17 17
18#ifdef CONFIG_X86_LOCAL_APIC
19# define ARCH_HAS_NMI_WATCHDOG
20#endif
21
22#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
23extern void irq_ctx_init(int cpu); 19extern void irq_ctx_init(int cpu);
24#else 20#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..f23eb2528464 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -28,7 +28,7 @@ extern void die(const char *, struct pt_regs *,long);
28extern int __must_check __die(const char *, struct pt_regs *, long); 28extern int __must_check __die(const char *, struct pt_regs *, long);
29extern void show_registers(struct pt_regs *regs); 29extern void show_registers(struct pt_regs *regs);
30extern void show_trace(struct task_struct *t, struct pt_regs *regs, 30extern void show_trace(struct task_struct *t, struct pt_regs *regs,
31 unsigned long *sp, unsigned long bp); 31 unsigned long *sp);
32extern void __show_regs(struct pt_regs *regs, int all); 32extern void __show_regs(struct pt_regs *regs, int all);
33extern void show_regs(struct pt_regs *regs); 33extern void show_regs(struct pt_regs *regs);
34extern unsigned long oops_begin(void); 34extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9e6fe391094e..f702f82aa1eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,7 +79,7 @@
79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) 79#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
80#define KVM_MIN_FREE_MMU_PAGES 5 80#define KVM_MIN_FREE_MMU_PAGES 5
81#define KVM_REFILL_PAGES 25 81#define KVM_REFILL_PAGES 25
82#define KVM_MAX_CPUID_ENTRIES 40 82#define KVM_MAX_CPUID_ENTRIES 80
83#define KVM_NR_FIXED_MTRR_REGION 88 83#define KVM_NR_FIXED_MTRR_REGION 88
84#define KVM_NR_VAR_MTRR 8 84#define KVM_NR_VAR_MTRR 8
85 85
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c62c13cb9788..eb16e94ae04f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
223 223
224void mce_log_therm_throt_event(__u64 status); 224void mce_log_therm_throt_event(__u64 status);
225 225
226/* Interrupt Handler for core thermal thresholds */
227extern int (*platform_thermal_notify)(__u64 msr_val);
228
226#ifdef CONFIG_X86_THERMAL_VECTOR 229#ifdef CONFIG_X86_THERMAL_VECTOR
227extern void mcheck_intel_therm_init(void); 230extern void mcheck_intel_therm_init(void);
228#else 231#else
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b501e22a..24215072d0e1 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
48 48
49#ifdef CONFIG_MICROCODE_AMD 49#ifdef CONFIG_MICROCODE_AMD
50extern struct microcode_ops * __init init_amd_microcode(void); 50extern struct microcode_ops * __init init_amd_microcode(void);
51
52static inline void get_ucode_data(void *to, const u8 *from, size_t n)
53{
54 memcpy(to, from, n);
55}
56
51#else 57#else
52static inline struct microcode_ops * __init init_amd_microcode(void) 58static inline struct microcode_ops * __init init_amd_microcode(void)
53{ 59{
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c82868e9f905..0c90dd9f0505 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -5,8 +5,9 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7#include <asm/x86_init.h> 7#include <asm/x86_init.h>
8#include <asm/apicdef.h>
8 9
9extern int apic_version[MAX_APICS]; 10extern int apic_version[];
10extern int pic_mode; 11extern int pic_mode;
11 12
12#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
107 int active_high_low); 108 int active_high_low);
108#endif /* CONFIG_ACPI */ 109#endif /* CONFIG_ACPI */
109 110
110#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) 111#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
111 112
112struct physid_mask { 113struct physid_mask {
113 unsigned long mask[PHYSID_ARRAY_SIZE]; 114 unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
122 test_and_set_bit(physid, (map).mask) 123 test_and_set_bit(physid, (map).mask)
123 124
124#define physids_and(dst, src1, src2) \ 125#define physids_and(dst, src1, src2) \
125 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 126 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
126 127
127#define physids_or(dst, src1, src2) \ 128#define physids_or(dst, src1, src2) \
128 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 129 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
129 130
130#define physids_clear(map) \ 131#define physids_clear(map) \
131 bitmap_zero((map).mask, MAX_APICS) 132 bitmap_zero((map).mask, MAX_LOCAL_APIC)
132 133
133#define physids_complement(dst, src) \ 134#define physids_complement(dst, src) \
134 bitmap_complement((dst).mask, (src).mask, MAX_APICS) 135 bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
135 136
136#define physids_empty(map) \ 137#define physids_empty(map) \
137 bitmap_empty((map).mask, MAX_APICS) 138 bitmap_empty((map).mask, MAX_LOCAL_APIC)
138 139
139#define physids_equal(map1, map2) \ 140#define physids_equal(map1, map2) \
140 bitmap_equal((map1).mask, (map2).mask, MAX_APICS) 141 bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
141 142
142#define physids_weight(map) \ 143#define physids_weight(map) \
143 bitmap_weight((map).mask, MAX_APICS) 144 bitmap_weight((map).mask, MAX_LOCAL_APIC)
144 145
145#define physids_shift_right(d, s, n) \ 146#define physids_shift_right(d, s, n) \
146 bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) 147 bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
147 148
148#define physids_shift_left(d, s, n) \ 149#define physids_shift_left(d, s, n) \
149 bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) 150 bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
150 151
151static inline unsigned long physids_coerce(physid_mask_t *map) 152static inline unsigned long physids_coerce(physid_mask_t *map)
152{ 153{
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
159 map->mask[0] = physids; 160 map->mask[0] = physids;
160} 161}
161 162
162/* Note: will create very large stack frames if physid_mask_t is big */
163#define physid_mask_of_physid(physid) \
164 ({ \
165 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
166 physid_set(physid, __physid_mask); \
167 __physid_mask; \
168 })
169
170static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) 163static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
171{ 164{
172 physids_clear(*map); 165 physids_clear(*map);
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 4a7f96d7c188..c0a955a9a087 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -15,13 +15,6 @@
15 15
16#ifdef CONFIG_X86_32 16#ifdef CONFIG_X86_32
17# define MAX_MPC_ENTRY 1024 17# define MAX_MPC_ENTRY 1024
18# define MAX_APICS 256
19#else
20# if NR_CPUS <= 255
21# define MAX_APICS 255
22# else
23# define MAX_APICS 32768
24# endif
25#endif 18#endif
26 19
27/* Intel MP Floating Pointer Structure */ 20/* Intel MP Floating Pointer Structure */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
new file mode 100644
index 000000000000..73668abdbedf
--- /dev/null
+++ b/arch/x86/include/asm/mrst-vrtc.h
@@ -0,0 +1,9 @@
1#ifndef _MRST_VRTC_H
2#define _MRST_VRTC_H
3
4extern unsigned char vrtc_cmos_read(unsigned char reg);
5extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
6extern unsigned long vrtc_get_time(void);
7extern int vrtc_set_mmss(unsigned long nowtime);
8
9#endif
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 4a711a684b17..719f00b28ff5 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -14,7 +14,9 @@
14#include <linux/sfi.h> 14#include <linux/sfi.h>
15 15
16extern int pci_mrst_init(void); 16extern int pci_mrst_init(void);
17int __init sfi_parse_mrtc(struct sfi_table_header *table); 17extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
18extern int sfi_mrtc_num;
19extern struct sfi_rtc_table_entry sfi_mrtc_array[];
18 20
19/* 21/*
20 * Medfield is the follow-up of Moorestown, it combines two chip solution into 22 * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
50 52
51extern struct console early_hsu_console; 53extern struct console early_hsu_console;
52extern void hsu_early_console_init(void); 54extern void hsu_early_console_init(void);
55
56extern void intel_scu_devices_create(void);
57extern void intel_scu_devices_destroy(void);
58
59/* VRTC timer */
60#define MRST_VRTC_MAP_SZ (1024)
61/*#define MRST_VRTC_PGOFFSET (0xc00) */
62
63extern void mrst_rtc_init(void);
64
53#endif /* _ASM_X86_MRST_H */ 65#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b89f5e86021..4d0dfa0d998e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -123,6 +123,10 @@
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b 124#define MSR_AMD64_IBSBRTARGET 0xc001103b
125 125
126/* Fam 15h MSRs */
127#define MSR_F15H_PERF_CTL 0xc0010200
128#define MSR_F15H_PERF_CTR 0xc0010201
129
126/* Fam 10h MSRs */ 130/* Fam 10h MSRs */
127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 131#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 132#define FAM10H_MMIO_CONF_ENABLE (1<<0)
@@ -253,6 +257,18 @@
253#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) 257#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
254#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) 258#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
255 259
260/* Thermal Thresholds Support */
261#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
262#define THERM_SHIFT_THRESHOLD0 8
263#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
264#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
265#define THERM_SHIFT_THRESHOLD1 16
266#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
267#define THERM_STATUS_THRESHOLD0 (1 << 6)
268#define THERM_LOG_THRESHOLD0 (1 << 7)
269#define THERM_STATUS_THRESHOLD1 (1 << 8)
270#define THERM_LOG_THRESHOLD1 (1 << 9)
271
256/* MISC_ENABLE bits: architectural */ 272/* MISC_ENABLE bits: architectural */
257#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) 273#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
258#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) 274#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..c4021b953510 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,41 +5,15 @@
5#include <asm/irq.h> 5#include <asm/irq.h>
6#include <asm/io.h> 6#include <asm/io.h>
7 7
8#ifdef ARCH_HAS_NMI_WATCHDOG 8#ifdef CONFIG_X86_LOCAL_APIC
9
10/**
11 * do_nmi_callback
12 *
13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu);
17 9
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled;
22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
24extern int reserve_perfctr_nmi(unsigned int); 12extern int reserve_perfctr_nmi(unsigned int);
25extern void release_perfctr_nmi(unsigned int); 13extern void release_perfctr_nmi(unsigned int);
26extern int reserve_evntsel_nmi(unsigned int); 14extern int reserve_evntsel_nmi(unsigned int);
27extern void release_evntsel_nmi(unsigned int); 15extern void release_evntsel_nmi(unsigned int);
28 16
29extern void setup_apic_nmi_watchdog(void *);
30extern void stop_apic_nmi_watchdog(void *);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
34extern void cpu_nmi_set_wd_enabled(void);
35
36extern atomic_t nmi_active;
37extern unsigned int nmi_watchdog;
38#define NMI_NONE 0
39#define NMI_IO_APIC 1
40#define NMI_LOCAL_APIC 2
41#define NMI_INVALID 3
42
43struct ctl_table; 17struct ctl_table;
44extern int proc_nmi_enabled(struct ctl_table *, int , 18extern int proc_nmi_enabled(struct ctl_table *, int ,
45 void __user *, size_t *, loff_t *); 19 void __user *, size_t *, loff_t *);
@@ -47,33 +21,8 @@ extern int unknown_nmi_panic;
47 21
48void arch_trigger_all_cpu_backtrace(void); 22void arch_trigger_all_cpu_backtrace(void);
49#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
50
51static inline void localise_nmi_watchdog(void)
52{
53 if (nmi_watchdog == NMI_IO_APIC)
54 nmi_watchdog = NMI_LOCAL_APIC;
55}
56
57/* check if nmi_watchdog is active (ie was specified at boot) */
58static inline int nmi_watchdog_active(void)
59{
60 /*
61 * actually it should be:
62 * return (nmi_watchdog == NMI_LOCAL_APIC ||
63 * nmi_watchdog == NMI_IO_APIC)
64 * but since they are power of two we could use a
65 * cheaper way --cvg
66 */
67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68}
69#endif 24#endif
70 25
71void lapic_watchdog_stop(void);
72int lapic_watchdog_init(unsigned nmi_hz);
73int lapic_wd_event(unsigned nmi_hz);
74unsigned lapic_adjust_nmi_hz(unsigned hz);
75void disable_lapic_nmi_watchdog(void);
76void enable_lapic_nmi_watchdog(void);
77void stop_nmi(void); 26void stop_nmi(void);
78void restart_nmi(void); 27void restart_nmi(void);
79 28
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ef9975812c77..7709c12431b8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -112,7 +112,7 @@ static inline void arch_safe_halt(void)
112 112
113static inline void halt(void) 113static inline void halt(void)
114{ 114{
115 PVOP_VCALL0(pv_irq_ops.safe_halt); 115 PVOP_VCALL0(pv_irq_ops.halt);
116} 116}
117 117
118static inline void wbinvd(void) 118static inline void wbinvd(void)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index ca0437c714b2..676129229630 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
65 65
66#define PCIBIOS_MIN_CARDBUS_IO 0x4000 66#define PCIBIOS_MIN_CARDBUS_IO 0x4000
67 67
68extern int pcibios_enabled;
68void pcibios_config_init(void); 69void pcibios_config_init(void);
69struct pci_bus *pcibios_scan_root(int bus); 70struct pci_bus *pcibios_scan_root(int bus);
70 71
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
126 126
127#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
128extern void init_hw_perf_events(void);
129extern void perf_events_lapic_init(void); 128extern void perf_events_lapic_init(void);
130 129
131#define PERF_EVENT_INDEX_OFFSET 0 130#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
156} 155}
157 156
158#else 157#else
159static inline void init_hw_perf_events(void) { }
160static inline void perf_events_lapic_init(void) { } 158static inline void perf_events_lapic_init(void) { }
161#endif 159#endif
162 160
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd216be5d..295e2ff18a6a 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -744,14 +744,6 @@ enum P4_ESCR_EMASKS {
744}; 744};
745 745
746/* 746/*
747 * P4 PEBS specifics (Replay Event only)
748 *
749 * Format (bits):
750 * 0-6: metric from P4_PEBS_METRIC enum
751 * 7 : reserved
752 * 8 : reserved
753 * 9-11 : reserved
754 *
755 * Note we have UOP and PEBS bits reserved for now 747 * Note we have UOP and PEBS bits reserved for now
756 * just in case if we will need them once 748 * just in case if we will need them once
757 */ 749 */
@@ -788,5 +780,60 @@ enum P4_PEBS_METRIC {
788 P4_PEBS_METRIC__max 780 P4_PEBS_METRIC__max
789}; 781};
790 782
783/*
784 * Notes on internal configuration of ESCR+CCCR tuples
785 *
786 * Since P4 has quite the different architecture of
787 * performance registers in compare with "architectural"
788 * once and we have on 64 bits to keep configuration
789 * of performance event, the following trick is used.
790 *
791 * 1) Since both ESCR and CCCR registers have only low
792 * 32 bits valuable, we pack them into a single 64 bit
793 * configuration. Low 32 bits of such config correspond
794 * to low 32 bits of CCCR register and high 32 bits
795 * correspond to low 32 bits of ESCR register.
796 *
797 * 2) The meaning of every bit of such config field can
798 * be found in Intel SDM but it should be noted that
799 * we "borrow" some reserved bits for own usage and
800 * clean them or set to a proper value when we do
801 * a real write to hardware registers.
802 *
803 * 3) The format of bits of config is the following
804 * and should be either 0 or set to some predefined
805 * values:
806 *
807 * Low 32 bits
808 * -----------
809 * 0-6: P4_PEBS_METRIC enum
810 * 7-11: reserved
811 * 12: reserved (Enable)
812 * 13-15: reserved (ESCR select)
813 * 16-17: Active Thread
814 * 18: Compare
815 * 19: Complement
816 * 20-23: Threshold
817 * 24: Edge
818 * 25: reserved (FORCE_OVF)
819 * 26: reserved (OVF_PMI_T0)
820 * 27: reserved (OVF_PMI_T1)
821 * 28-29: reserved
822 * 30: reserved (Cascade)
823 * 31: reserved (OVF)
824 *
825 * High 32 bits
826 * ------------
827 * 0: reserved (T1_USR)
828 * 1: reserved (T1_OS)
829 * 2: reserved (T0_USR)
830 * 3: reserved (T0_OS)
831 * 4: Tag Enable
832 * 5-8: Tag Value
833 * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
834 * 25-30: enum P4_EVENTS
835 * 31: reserved (HT thread)
836 */
837
791#endif /* PERF_EVENT_P4_H */ 838#endif /* PERF_EVENT_P4_H */
792 839
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7f7e577a0e39..31d84acc1512 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
11void pvclock_read_wallclock(struct pvclock_wall_clock *wall, 11void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
12 struct pvclock_vcpu_time_info *vcpu, 12 struct pvclock_vcpu_time_info *vcpu,
13 struct timespec *ts); 13 struct timespec *ts);
14void pvclock_resume(void);
14 15
15/* 16/*
16 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 17 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d6763b139a84..db8aa19a08a2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
53static inline void x86_mrst_early_setup(void) { } 53static inline void x86_mrst_early_setup(void) { }
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_INTEL_CE
57extern void x86_ce4100_early_setup(void);
58#else
59static inline void x86_ce4100_early_setup(void) { }
60#endif
61
56#ifndef _SETUP 62#ifndef _SETUP
57 63
58/* 64/*
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
48 setup_IO_APIC(); 48 setup_IO_APIC();
49 else { 49 else {
50 nr_ioapics = 0; 50 nr_ioapics = 0;
51 localise_nmi_watchdog();
52 } 51 }
53#endif 52#endif
54} 53}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
7#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h>
10 11
11extern int kstack_depth_to_print; 12extern int kstack_depth_to_print;
12 13
@@ -46,7 +47,7 @@ struct stacktrace_ops {
46}; 47};
47 48
48void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 49void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
49 unsigned long *stack, unsigned long bp, 50 unsigned long *stack,
50 const struct stacktrace_ops *ops, void *data); 51 const struct stacktrace_ops *ops, void *data);
51 52
52#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 58#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif 59#endif
59 60
61#ifdef CONFIG_FRAME_POINTER
62static inline unsigned long
63stack_frame(struct task_struct *task, struct pt_regs *regs)
64{
65 unsigned long bp;
66
67 if (regs)
68 return regs->bp;
69
70 if (task == current) {
71 /* Grab bp right from our regs */
72 get_bp(bp);
73 return bp;
74 }
75
76 /* bp is the last reg pushed by switch_to */
77 return *(unsigned long *)task->thread.sp;
78}
79#else
80static inline unsigned long
81stack_frame(struct task_struct *task, struct pt_regs *regs)
82{
83 return 0;
84}
85#endif
86
60extern void 87extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 88show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl); 89 unsigned long *stack, char *log_lvl);
63 90
64extern void 91extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl); 93 unsigned long *sp, char *log_lvl);
67 94
68extern unsigned int code_bytes; 95extern unsigned int code_bytes;
69 96
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
10unsigned long long native_sched_clock(void); 10unsigned long long native_sched_clock(void);
11extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
12 12
13#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
14extern int timer_ack;
15#else
16# define timer_ack (0)
17#endif
18
19extern int no_timer_check; 13extern int no_timer_check;
20 14
21/* Accelerators for sched_clock() 15/* Accelerators for sched_clock()
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 42d412fd8b02..ce1d54c8a433 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -26,20 +26,22 @@
26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, 26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
28 * 28 *
29 * We will use 31 sets, one for sending BAU messages from each of the 32 29 * We will use one set for sending BAU messages from each of the
30 * cpu's on the uvhub. 30 * cpu's on the uvhub.
31 * 31 *
32 * TLB shootdown will use the first of the 8 descriptors of each set. 32 * TLB shootdown will use the first of the 8 descriptors of each set.
33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
34 */ 34 */
35 35
36#define MAX_CPUS_PER_UVHUB 64
37#define MAX_CPUS_PER_SOCKET 32
38#define UV_ADP_SIZE 64 /* hardware-provided max. */
39#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */
36#define UV_ITEMS_PER_DESCRIPTOR 8 40#define UV_ITEMS_PER_DESCRIPTOR 8
37/* the 'throttle' to prevent the hardware stay-busy bug */ 41/* the 'throttle' to prevent the hardware stay-busy bug */
38#define MAX_BAU_CONCURRENT 3 42#define MAX_BAU_CONCURRENT 3
39#define UV_CPUS_PER_ACT_STATUS 32
40#define UV_ACT_STATUS_MASK 0x3 43#define UV_ACT_STATUS_MASK 0x3
41#define UV_ACT_STATUS_SIZE 2 44#define UV_ACT_STATUS_SIZE 2
42#define UV_ADP_SIZE 32
43#define UV_DISTRIBUTION_SIZE 256 45#define UV_DISTRIBUTION_SIZE 256
44#define UV_SW_ACK_NPENDING 8 46#define UV_SW_ACK_NPENDING 8
45#define UV_NET_ENDPOINT_INTD 0x38 47#define UV_NET_ENDPOINT_INTD 0x38
@@ -100,7 +102,6 @@
100 * number of destination side software ack resources 102 * number of destination side software ack resources
101 */ 103 */
102#define DEST_NUM_RESOURCES 8 104#define DEST_NUM_RESOURCES 8
103#define MAX_CPUS_PER_NODE 32
104/* 105/*
105 * completion statuses for sending a TLB flush message 106 * completion statuses for sending a TLB flush message
106 */ 107 */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9e13763b6092..34244b2cd880 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -45,6 +45,7 @@ obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
46obj-y += tsc.o io_delay.o rtc.o 46obj-y += tsc.o io_delay.o rtc.o
47obj-y += pci-iommu_table.o 47obj-y += pci-iommu_table.o
48obj-y += resource.o
48 49
49obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 50obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
50obj-y += process.o 51obj-y += process.o
@@ -84,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
84obj-$(CONFIG_KGDB) += kgdb.o 85obj-$(CONFIG_KGDB) += kgdb.o
85obj-$(CONFIG_VM86) += vm86_32.o 86obj-$(CONFIG_VM86) += vm86_32.o
86obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
87obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
88 88
89obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
90obj-$(CONFIG_APB_TIMER) += apb_timer.o 90obj-$(CONFIG_APB_TIMER) += apb_timer.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b941b6c..17c8090fabd4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
198{ 198{
199 unsigned int ver = 0; 199 unsigned int ver = 0;
200 200
201 if (id >= (MAX_LOCAL_APIC-1)) {
202 printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
203 return;
204 }
205
201 if (!enabled) { 206 if (!enabled) {
202 ++disabled_cpus; 207 ++disabled_cpus;
203 return; 208 return;
@@ -910,13 +915,13 @@ static int __init acpi_parse_madt_lapic_entries(void)
910 acpi_register_lapic_address(acpi_lapic_addr); 915 acpi_register_lapic_address(acpi_lapic_addr);
911 916
912 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, 917 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
913 acpi_parse_sapic, MAX_APICS); 918 acpi_parse_sapic, MAX_LOCAL_APIC);
914 919
915 if (!count) { 920 if (!count) {
916 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, 921 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
917 acpi_parse_x2apic, MAX_APICS); 922 acpi_parse_x2apic, MAX_LOCAL_APIC);
918 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, 923 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
919 acpi_parse_lapic, MAX_APICS); 924 acpi_parse_lapic, MAX_LOCAL_APIC);
920 } 925 }
921 if (!count && !x2count) { 926 if (!count && !x2count) {
922 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 927 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9f98eb400fef..123608531c8f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -592,17 +592,21 @@ static atomic_t stop_machine_first;
592static int wrote_text; 592static int wrote_text;
593 593
594struct text_poke_params { 594struct text_poke_params {
595 void *addr; 595 struct text_poke_param *params;
596 const void *opcode; 596 int nparams;
597 size_t len;
598}; 597};
599 598
600static int __kprobes stop_machine_text_poke(void *data) 599static int __kprobes stop_machine_text_poke(void *data)
601{ 600{
602 struct text_poke_params *tpp = data; 601 struct text_poke_params *tpp = data;
602 struct text_poke_param *p;
603 int i;
603 604
604 if (atomic_dec_and_test(&stop_machine_first)) { 605 if (atomic_dec_and_test(&stop_machine_first)) {
605 text_poke(tpp->addr, tpp->opcode, tpp->len); 606 for (i = 0; i < tpp->nparams; i++) {
607 p = &tpp->params[i];
608 text_poke(p->addr, p->opcode, p->len);
609 }
606 smp_wmb(); /* Make sure other cpus see that this has run */ 610 smp_wmb(); /* Make sure other cpus see that this has run */
607 wrote_text = 1; 611 wrote_text = 1;
608 } else { 612 } else {
@@ -611,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
611 smp_mb(); /* Load wrote_text before following execution */ 615 smp_mb(); /* Load wrote_text before following execution */
612 } 616 }
613 617
614 flush_icache_range((unsigned long)tpp->addr, 618 for (i = 0; i < tpp->nparams; i++) {
615 (unsigned long)tpp->addr + tpp->len); 619 p = &tpp->params[i];
620 flush_icache_range((unsigned long)p->addr,
621 (unsigned long)p->addr + p->len);
622 }
623
616 return 0; 624 return 0;
617} 625}
618 626
@@ -632,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
632void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 640void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
633{ 641{
634 struct text_poke_params tpp; 642 struct text_poke_params tpp;
643 struct text_poke_param p;
635 644
636 tpp.addr = addr; 645 p.addr = addr;
637 tpp.opcode = opcode; 646 p.opcode = opcode;
638 tpp.len = len; 647 p.len = len;
648 tpp.params = &p;
649 tpp.nparams = 1;
639 atomic_set(&stop_machine_first, 1); 650 atomic_set(&stop_machine_first, 1);
640 wrote_text = 0; 651 wrote_text = 0;
641 /* Use __stop_machine() because the caller already got online_cpus. */ 652 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -643,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
643 return addr; 654 return addr;
644} 655}
645 656
657/**
658 * text_poke_smp_batch - Update instructions on a live kernel on SMP
659 * @params: an array of text_poke parameters
660 * @n: the number of elements in params.
661 *
662 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
663 * stop_machine() is heavy task, it is better to aggregate text_poke requests
664 * and do it once if possible.
665 *
666 * Note: Must be called under get_online_cpus() and text_mutex.
667 */
668void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
669{
670 struct text_poke_params tpp = {.params = params, .nparams = n};
671
672 atomic_set(&stop_machine_first, 1);
673 wrote_text = 0;
674 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
675}
676
646#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 677#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
647 678
648#ifdef CONFIG_X86_64 679#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..affacb5e0065 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,116 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
19 {} 19 {}
20}; 20};
21EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23struct k8_northbridge_info k8_northbridges; 23struct amd_northbridge_info amd_northbridges;
24EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(amd_northbridges);
25 25
26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_northbridge(struct pci_dev *dev,
27 struct pci_device_id *ids)
27{ 28{
28 do { 29 do {
29 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 30 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
30 if (!dev) 31 if (!dev)
31 break; 32 break;
32 } while (!pci_match_id(&k8_nb_ids[0], dev)); 33 } while (!pci_match_id(ids, dev));
33 return dev; 34 return dev;
34} 35}
35 36
36int cache_k8_northbridges(void) 37int amd_cache_northbridges(void)
37{ 38{
38 int i; 39 int i = 0;
39 struct pci_dev *dev; 40 struct amd_northbridge *nb;
41 struct pci_dev *misc;
40 42
41 if (k8_northbridges.num) 43 if (amd_nb_num())
42 return 0; 44 return 0;
43 45
44 dev = NULL; 46 misc = NULL;
45 while ((dev = next_k8_northbridge(dev)) != NULL) 47 while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
46 k8_northbridges.num++; 48 i++;
47 49
48 /* some CPU families (e.g. family 0x11) do not support GART */ 50 if (i == 0)
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 51 return 0;
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
52 52
53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * 53 nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
54 sizeof(void *), GFP_KERNEL); 54 if (!nb)
55 if (!k8_northbridges.nb_misc)
56 return -ENOMEM; 55 return -ENOMEM;
57 56
58 if (!k8_northbridges.num) { 57 amd_northbridges.nb = nb;
59 k8_northbridges.nb_misc[0] = NULL; 58 amd_northbridges.num = i;
60 return 0;
61 }
62 59
63 if (k8_northbridges.gart_supported) { 60 misc = NULL;
64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32), 61 for (i = 0; i != amd_nb_num(); i++) {
65 GFP_KERNEL); 62 node_to_amd_nb(i)->misc = misc =
66 if (!flush_words) { 63 next_northbridge(misc, amd_nb_misc_ids);
67 kfree(k8_northbridges.nb_misc); 64 }
68 return -ENOMEM; 65
69 } 66 /* some CPU families (e.g. family 0x11) do not support GART */
70 } 67 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
68 boot_cpu_data.x86 == 0x15)
69 amd_northbridges.flags |= AMD_NB_GART;
70
71 /*
72 * Some CPU families support L3 Cache Index Disable. There are some
73 * limitations because of E382 and E388 on family 0x10.
74 */
75 if (boot_cpu_data.x86 == 0x10 &&
76 boot_cpu_data.x86_model >= 0x8 &&
77 (boot_cpu_data.x86_model > 0x9 ||
78 boot_cpu_data.x86_mask >= 0x1))
79 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
71 80
72 dev = NULL;
73 i = 0;
74 while ((dev = next_k8_northbridge(dev)) != NULL) {
75 k8_northbridges.nb_misc[i] = dev;
76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
78 }
79 k8_northbridges.nb_misc[i] = NULL;
80 return 0; 81 return 0;
81} 82}
82EXPORT_SYMBOL_GPL(cache_k8_northbridges); 83EXPORT_SYMBOL_GPL(amd_cache_northbridges);
83 84
84/* Ignores subdevice/subvendor but as far as I can figure out 85/* Ignores subdevice/subvendor but as far as I can figure out
85 they're useless anyways */ 86 they're useless anyways */
86int __init early_is_k8_nb(u32 device) 87int __init early_is_amd_nb(u32 device)
87{ 88{
88 struct pci_device_id *id; 89 struct pci_device_id *id;
89 u32 vendor = device & 0xffff; 90 u32 vendor = device & 0xffff;
90 device >>= 16; 91 device >>= 16;
91 for (id = k8_nb_ids; id->vendor; id++) 92 for (id = amd_nb_misc_ids; id->vendor; id++)
92 if (vendor == id->vendor && device == id->device) 93 if (vendor == id->vendor && device == id->device)
93 return 1; 94 return 1;
94 return 0; 95 return 0;
95} 96}
96 97
97void k8_flush_garts(void) 98int amd_cache_gart(void)
99{
100 int i;
101
102 if (!amd_nb_has_feature(AMD_NB_GART))
103 return 0;
104
105 flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
106 if (!flush_words) {
107 amd_northbridges.flags &= ~AMD_NB_GART;
108 return -ENOMEM;
109 }
110
111 for (i = 0; i != amd_nb_num(); i++)
112 pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
113 &flush_words[i]);
114
115 return 0;
116}
117
118void amd_flush_garts(void)
98{ 119{
99 int flushed, i; 120 int flushed, i;
100 unsigned long flags; 121 unsigned long flags;
101 static DEFINE_SPINLOCK(gart_lock); 122 static DEFINE_SPINLOCK(gart_lock);
102 123
103 if (!k8_northbridges.gart_supported) 124 if (!amd_nb_has_feature(AMD_NB_GART))
104 return; 125 return;
105 126
106 /* Avoid races between AGP and IOMMU. In theory it's not needed 127 /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
109 that it doesn't matter to serialize more. -AK */ 130 that it doesn't matter to serialize more. -AK */
110 spin_lock_irqsave(&gart_lock, flags); 131 spin_lock_irqsave(&gart_lock, flags);
111 flushed = 0; 132 flushed = 0;
112 for (i = 0; i < k8_northbridges.num; i++) { 133 for (i = 0; i < amd_nb_num(); i++) {
113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, 134 pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
114 flush_words[i]|1); 135 flush_words[i] | 1);
115 flushed++; 136 flushed++;
116 } 137 }
117 for (i = 0; i < k8_northbridges.num; i++) { 138 for (i = 0; i < amd_nb_num(); i++) {
118 u32 w; 139 u32 w;
119 /* Make sure the hardware actually executed the flush*/ 140 /* Make sure the hardware actually executed the flush*/
120 for (;;) { 141 for (;;) {
121 pci_read_config_dword(k8_northbridges.nb_misc[i], 142 pci_read_config_dword(node_to_amd_nb(i)->misc,
122 0x9c, &w); 143 0x9c, &w);
123 if (!(w & 1)) 144 if (!(w & 1))
124 break; 145 break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
129 if (!flushed) 150 if (!flushed)
130 printk("nothing to flush?\n"); 151 printk("nothing to flush?\n");
131} 152}
132EXPORT_SYMBOL_GPL(k8_flush_garts); 153EXPORT_SYMBOL_GPL(amd_flush_garts);
133 154
134static __init int init_k8_nbs(void) 155static __init int init_amd_nbs(void)
135{ 156{
136 int err = 0; 157 int err = 0;
137 158
138 err = cache_k8_northbridges(); 159 err = amd_cache_northbridges();
139 160
140 if (err < 0) 161 if (err < 0)
141 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); 162 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
163
164 if (amd_cache_gart() < 0)
165 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
166 "GART support disabled.\n");
142 167
143 return err; 168 return err;
144} 169}
145 170
146/* This has to go after the PCI subsystem */ 171/* This has to go after the PCI subsystem */
147fs_initcall(init_k8_nbs); 172fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 92543c73cf8e..7c9ab59653e8 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
315 315
316 if (system_state == SYSTEM_BOOTING) { 316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */ 319 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 320 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler, 321 if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..dcd7c83e1659 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
206 * Do an PCI bus scan by hand because we're running before the PCI 206 * Do an PCI bus scan by hand because we're running before the PCI
207 * subsystem. 207 * subsystem.
208 * 208 *
209 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan 209 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
210 * generically. It's probably overkill to always scan all slots because 210 * generically. It's probably overkill to always scan all slots because
211 * the AGP bridges should be always an own bus on the HT hierarchy, 211 * the AGP bridges should be always an own bus on the HT hierarchy,
212 * but do it here for future safety. 212 * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
303 dev_limit = bus_dev_ranges[i].dev_limit; 303 dev_limit = bus_dev_ranges[i].dev_limit;
304 304
305 for (slot = dev_base; slot < dev_limit; slot++) { 305 for (slot = dev_base; slot < dev_limit; slot++) {
306 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 306 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
358 dev_limit = bus_dev_ranges[i].dev_limit; 358 dev_limit = bus_dev_ranges[i].dev_limit;
359 359
360 for (slot = dev_base; slot < dev_limit; slot++) { 360 for (slot = dev_base; slot < dev_limit; slot++) {
361 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 361 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
400 dev_limit = bus_dev_ranges[i].dev_limit; 400 dev_limit = bus_dev_ranges[i].dev_limit;
401 401
402 for (slot = dev_base; slot < dev_limit; slot++) { 402 for (slot = dev_base; slot < dev_limit; slot++) {
403 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 403 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
404 continue; 404 continue;
405 405
406 iommu_detected = 1; 406 iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
518 dev_base = bus_dev_ranges[i].dev_base; 518 dev_base = bus_dev_ranges[i].dev_base;
519 dev_limit = bus_dev_ranges[i].dev_limit; 519 dev_limit = bus_dev_ranges[i].dev_limit;
520 for (slot = dev_base; slot < dev_limit; slot++) { 520 for (slot = dev_base; slot < dev_limit; slot++) {
521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 521 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
522 continue; 522 continue;
523 523
524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3f838d537392..879999a5230f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
432 reserved = reserve_eilvt_offset(offset, new); 431 reserved = reserve_eilvt_offset(offset, new);
433 432
434 if (reserved != new) { 433 if (reserved != new) {
435 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " 434 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
436 "vector 0x%x was already reserved by another core, " 435 "vector 0x%x, but the register is already in use for "
437 "APIC%lX=0x%x\n", 436 "vector 0x%x on another cpu\n",
438 smp_processor_id(), new, reserved, reg, old); 437 smp_processor_id(), reg, offset, new, reserved);
439 return -EINVAL; 438 return -EINVAL;
440 } 439 }
441 440
442 if (!eilvt_entry_is_changeable(old, new)) { 441 if (!eilvt_entry_is_changeable(old, new)) {
443 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " 442 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
444 "register already in use, APIC%lX=0x%x\n", 443 "vector 0x%x, but the register is already in use for "
445 smp_processor_id(), new, reg, old); 444 "vector 0x%x on this cpu\n",
445 smp_processor_id(), reg, offset, new, old);
446 return -EBUSY; 446 return -EBUSY;
447 } 447 }
448 448
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 799 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 800 * device.
801 */ 801 */
802 if (nmi_watchdog != NMI_IO_APIC) 802 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 803
808 /* Setup the lapic or request the broadcast */ 804 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 805 setup_APIC_timer();
@@ -1387,8 +1383,15 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1383 }
1388#endif 1384#endif
1389 1385
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1386 apic_pm_activate();
1387
1388 /*
1389 * Now that local APIC setup is completed for BP, configure the fault
1390 * handling for interrupt remapping.
1391 */
1392 if (!smp_processor_id() && intr_remapping_enabled)
1393 enable_drhd_fault_handling();
1394
1392} 1395}
1393 1396
1394#ifdef CONFIG_X86_X2APIC 1397#ifdef CONFIG_X86_X2APIC
@@ -1530,13 +1533,60 @@ static int __init detect_init_APIC(void)
1530 return 0; 1533 return 0;
1531} 1534}
1532#else 1535#else
1536
1537static int apic_verify(void)
1538{
1539 u32 features, h, l;
1540
1541 /*
1542 * The APIC feature bit should now be enabled
1543 * in `cpuid'
1544 */
1545 features = cpuid_edx(1);
1546 if (!(features & (1 << X86_FEATURE_APIC))) {
1547 pr_warning("Could not enable APIC!\n");
1548 return -1;
1549 }
1550 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1551 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1552
1553 /* The BIOS may have set up the APIC at some other address */
1554 rdmsr(MSR_IA32_APICBASE, l, h);
1555 if (l & MSR_IA32_APICBASE_ENABLE)
1556 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1557
1558 pr_info("Found and enabled local APIC!\n");
1559 return 0;
1560}
1561
1562int apic_force_enable(void)
1563{
1564 u32 h, l;
1565
1566 if (disable_apic)
1567 return -1;
1568
1569 /*
1570 * Some BIOSes disable the local APIC in the APIC_BASE
1571 * MSR. This can only be done in software for Intel P6 or later
1572 * and AMD K7 (Model > 1) or later.
1573 */
1574 rdmsr(MSR_IA32_APICBASE, l, h);
1575 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1576 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1577 l &= ~MSR_IA32_APICBASE_BASE;
1578 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1579 wrmsr(MSR_IA32_APICBASE, l, h);
1580 enabled_via_apicbase = 1;
1581 }
1582 return apic_verify();
1583}
1584
1533/* 1585/*
1534 * Detect and initialize APIC 1586 * Detect and initialize APIC
1535 */ 1587 */
1536static int __init detect_init_APIC(void) 1588static int __init detect_init_APIC(void)
1537{ 1589{
1538 u32 h, l, features;
1539
1540 /* Disabled by kernel option? */ 1590 /* Disabled by kernel option? */
1541 if (disable_apic) 1591 if (disable_apic)
1542 return -1; 1592 return -1;
@@ -1566,38 +1616,12 @@ static int __init detect_init_APIC(void)
1566 "you can enable it with \"lapic\"\n"); 1616 "you can enable it with \"lapic\"\n");
1567 return -1; 1617 return -1;
1568 } 1618 }
1569 /* 1619 if (apic_force_enable())
1570 * Some BIOSes disable the local APIC in the APIC_BASE 1620 return -1;
1571 * MSR. This can only be done in software for Intel P6 or later 1621 } else {
1572 * and AMD K7 (Model > 1) or later. 1622 if (apic_verify())
1573 */ 1623 return -1;
1574 rdmsr(MSR_IA32_APICBASE, l, h);
1575 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1576 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1577 l &= ~MSR_IA32_APICBASE_BASE;
1578 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1579 wrmsr(MSR_IA32_APICBASE, l, h);
1580 enabled_via_apicbase = 1;
1581 }
1582 }
1583 /*
1584 * The APIC feature bit should now be enabled
1585 * in `cpuid'
1586 */
1587 features = cpuid_edx(1);
1588 if (!(features & (1 << X86_FEATURE_APIC))) {
1589 pr_warning("Could not enable APIC!\n");
1590 return -1;
1591 } 1624 }
1592 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1593 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1594
1595 /* The BIOS may have set up the APIC at some other address */
1596 rdmsr(MSR_IA32_APICBASE, l, h);
1597 if (l & MSR_IA32_APICBASE_ENABLE)
1598 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1599
1600 pr_info("Found and enabled local APIC!\n");
1601 1625
1602 apic_pm_activate(); 1626 apic_pm_activate();
1603 1627
@@ -1685,7 +1709,7 @@ void __init init_apic_mappings(void)
1685 * This initializes the IO-APIC and APIC hardware if this is 1709 * This initializes the IO-APIC and APIC hardware if this is
1686 * a UP kernel. 1710 * a UP kernel.
1687 */ 1711 */
1688int apic_version[MAX_APICS]; 1712int apic_version[MAX_LOCAL_APIC];
1689 1713
1690int __init APIC_init_uniprocessor(void) 1714int __init APIC_init_uniprocessor(void)
1691{ 1715{
@@ -1750,17 +1774,10 @@ int __init APIC_init_uniprocessor(void)
1750 setup_IO_APIC(); 1774 setup_IO_APIC();
1751 else { 1775 else {
1752 nr_ioapics = 0; 1776 nr_ioapics = 0;
1753 localise_nmi_watchdog();
1754 } 1777 }
1755#else
1756 localise_nmi_watchdog();
1757#endif 1778#endif
1758 1779
1759 x86_init.timers.setup_percpu_clockev(); 1780 x86_init.timers.setup_percpu_clockev();
1760#ifdef CONFIG_X86_64
1761 check_nmi_watchdog();
1762#endif
1763
1764 return 0; 1781 return 0;
1765} 1782}
1766 1783
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..72ec29e1ae06 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,20 +17,31 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG 27#ifdef arch_trigger_all_cpu_backtrace
26
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
31/* "in progress" flag of arch_trigger_all_cpu_backtrace */
32static unsigned long backtrace_flag;
33
30void arch_trigger_all_cpu_backtrace(void) 34void arch_trigger_all_cpu_backtrace(void)
31{ 35{
32 int i; 36 int i;
33 37
38 if (test_and_set_bit(0, &backtrace_flag))
39 /*
40 * If there is already a trigger_all_cpu_backtrace() in progress
41 * (backtrace_flag == 1), don't output double cpu dump infos.
42 */
43 return;
44
34 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 45 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
35 46
36 printk(KERN_INFO "sending NMI to all CPUs:\n"); 47 printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
42 break; 53 break;
43 mdelay(1); 54 mdelay(1);
44 } 55 }
56
57 clear_bit(0, &backtrace_flag);
58 smp_mb__after_clear_bit();
45} 59}
46 60
47static int __kprobes 61static int __kprobes
@@ -50,7 +64,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
50{ 64{
51 struct die_args *args = __args; 65 struct die_args *args = __args;
52 struct pt_regs *regs; 66 struct pt_regs *regs;
53 int cpu = smp_processor_id(); 67 int cpu;
54 68
55 switch (cmd) { 69 switch (cmd) {
56 case DIE_NMI: 70 case DIE_NMI:
@@ -62,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
62 } 76 }
63 77
64 regs = args->regs; 78 regs = args->regs;
79 cpu = smp_processor_id();
65 80
66 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 81 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
67 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 82 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -91,18 +106,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 106}
92early_initcall(register_trigger_all_cpu_backtrace); 107early_initcall(register_trigger_all_cpu_backtrace);
93#endif 108#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7cc0a721f628..f6cd5b410770 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -1934,8 +1933,7 @@ void disable_IO_APIC(void)
1934 * 1933 *
1935 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1934 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1936 */ 1935 */
1937 1936void __init setup_ioapic_ids_from_mpc_nocheck(void)
1938void __init setup_ioapic_ids_from_mpc(void)
1939{ 1937{
1940 union IO_APIC_reg_00 reg_00; 1938 union IO_APIC_reg_00 reg_00;
1941 physid_mask_t phys_id_present_map; 1939 physid_mask_t phys_id_present_map;
@@ -1944,15 +1942,6 @@ void __init setup_ioapic_ids_from_mpc(void)
1944 unsigned char old_id; 1942 unsigned char old_id;
1945 unsigned long flags; 1943 unsigned long flags;
1946 1944
1947 if (acpi_ioapic)
1948 return;
1949 /*
1950 * Don't check I/O APIC IDs for xAPIC systems. They have
1951 * no meaning without the serial APIC bus.
1952 */
1953 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1954 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1955 return;
1956 /* 1945 /*
1957 * This is broken; anything with a real cpu count has to 1946 * This is broken; anything with a real cpu count has to
1958 * circumvent this idiocy regardless. 1947 * circumvent this idiocy regardless.
@@ -2006,7 +1995,6 @@ void __init setup_ioapic_ids_from_mpc(void)
2006 physids_or(phys_id_present_map, phys_id_present_map, tmp); 1995 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2007 } 1996 }
2008 1997
2009
2010 /* 1998 /*
2011 * We need to adjust the IRQ routing table 1999 * We need to adjust the IRQ routing table
2012 * if the ID changed. 2000 * if the ID changed.
@@ -2042,6 +2030,21 @@ void __init setup_ioapic_ids_from_mpc(void)
2042 apic_printk(APIC_VERBOSE, " ok.\n"); 2030 apic_printk(APIC_VERBOSE, " ok.\n");
2043 } 2031 }
2044} 2032}
2033
2034void __init setup_ioapic_ids_from_mpc(void)
2035{
2036
2037 if (acpi_ioapic)
2038 return;
2039 /*
2040 * Don't check I/O APIC IDs for xAPIC systems. They have
2041 * no meaning without the serial APIC bus.
2042 */
2043 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2044 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2045 return;
2046 setup_ioapic_ids_from_mpc_nocheck();
2047}
2045#endif 2048#endif
2046 2049
2047int no_timer_check __initdata; 2050int no_timer_check __initdata;
@@ -2430,13 +2433,12 @@ static void ack_apic_level(struct irq_data *data)
2430{ 2433{
2431 struct irq_cfg *cfg = data->chip_data; 2434 struct irq_cfg *cfg = data->chip_data;
2432 int i, do_unmask_irq = 0, irq = data->irq; 2435 int i, do_unmask_irq = 0, irq = data->irq;
2433 struct irq_desc *desc = irq_to_desc(irq);
2434 unsigned long v; 2436 unsigned long v;
2435 2437
2436 irq_complete_move(cfg); 2438 irq_complete_move(cfg);
2437#ifdef CONFIG_GENERIC_PENDING_IRQ 2439#ifdef CONFIG_GENERIC_PENDING_IRQ
2438 /* If we are moving the irq we need to mask it */ 2440 /* If we are moving the irq we need to mask it */
2439 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2441 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2440 do_unmask_irq = 1; 2442 do_unmask_irq = 1;
2441 mask_ioapic(cfg); 2443 mask_ioapic(cfg);
2442 } 2444 }
@@ -2643,24 +2645,6 @@ static void lapic_register_intr(int irq)
2643 "edge"); 2645 "edge");
2644} 2646}
2645 2647
2646static void __init setup_nmi(void)
2647{
2648 /*
2649 * Dirty trick to enable the NMI watchdog ...
2650 * We put the 8259A master into AEOI mode and
2651 * unmask on all local APICs LVT0 as NMI.
2652 *
2653 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2654 * is from Maciej W. Rozycki - so we do not have to EOI from
2655 * the NMI handler or the timer interrupt.
2656 */
2657 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2658
2659 enable_NMI_through_LVT0();
2660
2661 apic_printk(APIC_VERBOSE, " done.\n");
2662}
2663
2664/* 2648/*
2665 * This looks a bit hackish but it's about the only one way of sending 2649 * This looks a bit hackish but it's about the only one way of sending
2666 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2650 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2766,15 +2750,6 @@ static inline void __init check_timer(void)
2766 */ 2750 */
2767 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2751 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2768 legacy_pic->init(1); 2752 legacy_pic->init(1);
2769#ifdef CONFIG_X86_32
2770 {
2771 unsigned int ver;
2772
2773 ver = apic_read(APIC_LVR);
2774 ver = GET_APIC_VERSION(ver);
2775 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2776 }
2777#endif
2778 2753
2779 pin1 = find_isa_irq_pin(0, mp_INT); 2754 pin1 = find_isa_irq_pin(0, mp_INT);
2780 apic1 = find_isa_irq_apic(0, mp_INT); 2755 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2822,10 +2797,6 @@ static inline void __init check_timer(void)
2822 unmask_ioapic(cfg); 2797 unmask_ioapic(cfg);
2823 } 2798 }
2824 if (timer_irq_works()) { 2799 if (timer_irq_works()) {
2825 if (nmi_watchdog == NMI_IO_APIC) {
2826 setup_nmi();
2827 legacy_pic->unmask(0);
2828 }
2829 if (disable_timer_pin_1 > 0) 2800 if (disable_timer_pin_1 > 0)
2830 clear_IO_APIC_pin(0, pin1); 2801 clear_IO_APIC_pin(0, pin1);
2831 goto out; 2802 goto out;
@@ -2851,11 +2822,6 @@ static inline void __init check_timer(void)
2851 if (timer_irq_works()) { 2822 if (timer_irq_works()) {
2852 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2823 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2853 timer_through_8259 = 1; 2824 timer_through_8259 = 1;
2854 if (nmi_watchdog == NMI_IO_APIC) {
2855 legacy_pic->mask(0);
2856 setup_nmi();
2857 legacy_pic->unmask(0);
2858 }
2859 goto out; 2825 goto out;
2860 } 2826 }
2861 /* 2827 /*
@@ -2867,15 +2833,6 @@ static inline void __init check_timer(void)
2867 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2833 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2868 } 2834 }
2869 2835
2870 if (nmi_watchdog == NMI_IO_APIC) {
2871 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2872 "through the IO-APIC - disabling NMI Watchdog!\n");
2873 nmi_watchdog = NMI_NONE;
2874 }
2875#ifdef CONFIG_X86_32
2876 timer_ack = 0;
2877#endif
2878
2879 apic_printk(APIC_QUIET, KERN_INFO 2836 apic_printk(APIC_QUIET, KERN_INFO
2880 "...trying to set up timer as Virtual Wire IRQ...\n"); 2837 "...trying to set up timer as Virtual Wire IRQ...\n");
2881 2838
@@ -3413,6 +3370,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3413 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3370 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3414 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3371 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3415 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3372 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3373 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3416 3374
3417 dmar_msi_write(irq, &msg); 3375 dmar_msi_write(irq, &msg);
3418 3376
@@ -3639,7 +3597,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3639 return reg_01.bits.entries + 1; 3597 return reg_01.bits.entries + 1;
3640} 3598}
3641 3599
3642void __init probe_nr_irqs_gsi(void) 3600static void __init probe_nr_irqs_gsi(void)
3643{ 3601{
3644 int nr; 3602 int nr;
3645 3603
@@ -3956,7 +3914,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3956 return res; 3914 return res;
3957} 3915}
3958 3916
3959void __init ioapic_init_mappings(void) 3917void __init ioapic_and_gsi_init(void)
3960{ 3918{
3961 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3919 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3962 struct resource *ioapic_res; 3920 struct resource *ioapic_res;
@@ -3994,6 +3952,8 @@ fake_ioapic_page:
3994 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3952 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3995 ioapic_res++; 3953 ioapic_res++;
3996 } 3954 }
3955
3956 probe_nr_irqs_gsi();
3997} 3957}
3998 3958
3999void __init ioapic_insert_resources(void) 3959void __init ioapic_insert_resources(void)
@@ -4103,7 +4063,8 @@ void __init pre_init_apic_IRQ0(void)
4103 4063
4104 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4064 printk(KERN_INFO "Early APIC setup for system timer0\n");
4105#ifndef CONFIG_SMP 4065#ifndef CONFIG_SMP
4106 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4066 physid_set_mask_of_physid(boot_cpu_physical_apicid,
4067 &phys_cpu_present_map);
4107#endif 4068#endif
4108 /* Make sure the irq descriptor is set up */ 4069 /* Make sure the irq descriptor is set up */
4109 cfg = alloc_irq_and_cfg_at(0, 0); 4070 cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index f9e4e6a54073..d8c4a6feb286 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void)
79 /* need to update phys_pkg_id */ 79 /* need to update phys_pkg_id */
80 apic->phys_pkg_id = apicid_phys_pkg_id; 80 apic->phys_pkg_id = apicid_phys_pkg_id;
81 } 81 }
82
83 /*
84 * Now that apic routing model is selected, configure the
85 * fault handling for intr remapping.
86 */
87 if (intr_remapping_enabled)
88 enable_drhd_fault_handling();
89} 82}
90 83
91/* Same for both flat and physical. */ 84/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c1c52c341f40..2a3f2a7db243 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits); 48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
49static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
50 50
51static unsigned long __init uv_early_read_mmr(unsigned long addr)
52{
53 unsigned long val, *mmr;
54
55 mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
56 val = *mmr;
57 early_iounmap(mmr, sizeof(*mmr));
58 return val;
59}
60
51static inline bool is_GRU_range(u64 start, u64 end) 61static inline bool is_GRU_range(u64 start, u64 end)
52{ 62{
53 return start >= gru_start_paddr && end <= gru_end_paddr; 63 return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
58 return is_ISA_range(start, end) || is_GRU_range(start, end); 68 return is_ISA_range(start, end) || is_GRU_range(start, end);
59} 69}
60 70
61static int early_get_nodeid(void) 71static int __init early_get_pnodeid(void)
62{ 72{
63 union uvh_node_id_u node_id; 73 union uvh_node_id_u node_id;
64 unsigned long *mmr; 74 union uvh_rh_gam_config_mmr_u m_n_config;
65 75 int pnode;
66 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
67 node_id.v = *mmr;
68 early_iounmap(mmr, sizeof(*mmr));
69 76
70 /* Currently, all blades have same revision number */ 77 /* Currently, all blades have same revision number */
78 node_id.v = uv_early_read_mmr(UVH_NODE_ID);
79 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
71 uv_min_hub_revision_id = node_id.s.revision; 80 uv_min_hub_revision_id = node_id.s.revision;
72 81
73 return node_id.s.node_id; 82 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
83 return pnode;
74} 84}
75 85
76static void __init early_get_apic_pnode_shift(void) 86static void __init early_get_apic_pnode_shift(void)
77{ 87{
78 unsigned long *mmr; 88 uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
79
80 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
81 uvh_apicid.v = *mmr;
82 early_iounmap(mmr, sizeof(*mmr));
83 if (!uvh_apicid.v) 89 if (!uvh_apicid.v)
84 /* 90 /*
85 * Old bios, use default value 91 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
95static void __init uv_set_apicid_hibit(void) 101static void __init uv_set_apicid_hibit(void)
96{ 102{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask; 103 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99 104
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE | 105 apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; 106 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105} 107}
106 108
107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 109static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
108{ 110{
109 int nodeid; 111 int pnodeid;
110 112
111 if (!strcmp(oem_id, "SGI")) { 113 if (!strcmp(oem_id, "SGI")) {
112 nodeid = early_get_nodeid(); 114 pnodeid = early_get_pnodeid();
113 early_get_apic_pnode_shift(); 115 early_get_apic_pnode_shift();
114 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 116 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
115 x86_platform.nmi_init = uv_nmi_init; 117 x86_platform.nmi_init = uv_nmi_init;
@@ -119,7 +121,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
119 uv_system_type = UV_X2APIC; 121 uv_system_type = UV_X2APIC;
120 else if (!strcmp(oem_table_id, "UVH")) { 122 else if (!strcmp(oem_table_id, "UVH")) {
121 __get_cpu_var(x2apic_extra_bits) = 123 __get_cpu_var(x2apic_extra_bits) =
122 nodeid << (uvh_apicid.s.pnode_shift - 1); 124 pnodeid << uvh_apicid.s.pnode_shift;
123 uv_system_type = UV_NON_UNIQUE_APIC; 125 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit(); 126 uv_set_apicid_hibit();
125 return 1; 127 return 1;
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
682void __init uv_system_init(void) 684void __init uv_system_init(void)
683{ 685{
684 union uvh_rh_gam_config_mmr_u m_n_config; 686 union uvh_rh_gam_config_mmr_u m_n_config;
687 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
685 union uvh_node_id_u node_id; 688 union uvh_node_id_u node_id;
686 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 689 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
687 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 690 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
688 int gnode_extra, max_pnode = 0; 691 int gnode_extra, max_pnode = 0;
689 unsigned long mmr_base, present, paddr; 692 unsigned long mmr_base, present, paddr;
690 unsigned short pnode_mask; 693 unsigned short pnode_mask, pnode_io_mask;
691 694
692 map_low_mmrs(); 695 map_low_mmrs();
693 696
694 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 697 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
695 m_val = m_n_config.s.m_skt; 698 m_val = m_n_config.s.m_skt;
696 n_val = m_n_config.s.n_skt; 699 n_val = m_n_config.s.n_skt;
700 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
701 n_io = mmioh.s.n_io;
697 mmr_base = 702 mmr_base =
698 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 703 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
699 ~UV_MMR_ENABLE; 704 ~UV_MMR_ENABLE;
700 pnode_mask = (1 << n_val) - 1; 705 pnode_mask = (1 << n_val) - 1;
706 pnode_io_mask = (1 << n_io) - 1;
707
701 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 708 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
702 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 709 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
703 gnode_upper = ((unsigned long)gnode_extra << m_val); 710 gnode_upper = ((unsigned long)gnode_extra << m_val);
704 printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", 711 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
705 n_val, m_val, gnode_upper, gnode_extra); 712 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
706 713
707 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 714 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
708 715
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
735 for (j = 0; j < 64; j++) { 742 for (j = 0; j < 64; j++) {
736 if (!test_bit(j, &present)) 743 if (!test_bit(j, &present))
737 continue; 744 continue;
738 pnode = (i * 64 + j); 745 pnode = (i * 64 + j) & pnode_mask;
739 uv_blade_info[blade].pnode = pnode; 746 uv_blade_info[blade].pnode = pnode;
740 uv_blade_info[blade].nr_possible_cpus = 0; 747 uv_blade_info[blade].nr_possible_cpus = 0;
741 uv_blade_info[blade].nr_online_cpus = 0; 748 uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
756 /* 763 /*
757 * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); 764 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
758 */ 765 */
766 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
759 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; 767 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
760 pnode = uv_apicid_to_pnode(apicid); 768 pnode = uv_apicid_to_pnode(apicid);
761 blade = boot_pnode_to_blade(pnode); 769 blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
772 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 780 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
773 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 781 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
774 uv_cpu_hub_info(cpu)->pnode = pnode; 782 uv_cpu_hub_info(cpu)->pnode = pnode;
775 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
776 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; 783 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
777 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 784 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
778 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 785 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
796 803
797 map_gru_high(max_pnode); 804 map_gru_high(max_pnode);
798 map_mmr_high(max_pnode); 805 map_mmr_high(max_pnode);
799 map_mmioh_high(max_pnode); 806 map_mmioh_high(max_pnode & pnode_io_mask);
800 807
801 uv_cpu_init(); 808 uv_cpu_init();
802 uv_scir_register_cpu_notifier(); 809 uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 17ad03366211..9ecf81f9b90f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
149}; 149};
150 150
151struct amd_l3_cache { 151struct amd_l3_cache {
152 struct pci_dev *dev; 152 struct amd_northbridge *nb;
153 bool can_disable;
154 unsigned indices; 153 unsigned indices;
155 u8 subcaches[4]; 154 u8 subcaches[4];
156}; 155};
@@ -311,14 +310,12 @@ struct _cache_attr {
311/* 310/*
312 * L3 cache descriptors 311 * L3 cache descriptors
313 */ 312 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 313static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{ 314{
318 unsigned int sc0, sc1, sc2, sc3; 315 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0; 316 u32 val = 0;
320 317
321 pci_read_config_dword(l3->dev, 0x1C4, &val); 318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
322 319
323 /* calculate subcache sizes */ 320 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0)); 321 l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331} 328}
332 329
333static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 330static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334{ 331 int index)
335 struct amd_l3_cache *l3;
336 struct pci_dev *dev = node_to_k8_nb_misc(node);
337
338 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
339 if (!l3) {
340 printk(KERN_WARNING "Error allocating L3 struct\n");
341 return NULL;
342 }
343
344 l3->dev = dev;
345
346 amd_calc_l3_indices(l3);
347
348 return l3;
349}
350
351static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
352 int index)
353{ 332{
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
354 int node; 334 int node;
355 335
356 if (boot_cpu_data.x86 != 0x10) 336 /* only for L3, and not in virtualized environments */
357 return; 337 if (index < 3 || amd_nb_num() == 0)
358
359 if (index < 3)
360 return;
361
362 /* see errata #382 and #388 */
363 if (boot_cpu_data.x86_model < 0x8)
364 return;
365
366 if ((boot_cpu_data.x86_model == 0x8 ||
367 boot_cpu_data.x86_model == 0x9)
368 &&
369 boot_cpu_data.x86_mask < 0x1)
370 return;
371
372 /* not in virtualized environments */
373 if (k8_northbridges.num == 0)
374 return; 338 return;
375 339
376 /* 340 /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
378 * never freed but this is done only on shutdown so it doesn't matter. 342 * never freed but this is done only on shutdown so it doesn't matter.
379 */ 343 */
380 if (!l3_caches) { 344 if (!l3_caches) {
381 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); 345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
382 346
383 l3_caches = kzalloc(size, GFP_ATOMIC); 347 l3_caches = kzalloc(size, GFP_ATOMIC);
384 if (!l3_caches) 348 if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
387 351
388 node = amd_get_nb_id(smp_processor_id()); 352 node = amd_get_nb_id(smp_processor_id());
389 353
390 if (!l3_caches[node]) { 354 if (!l3_caches[node].nb) {
391 l3_caches[node] = amd_init_l3_cache(node); 355 l3_caches[node].nb = node_to_amd_nb(node);
392 l3_caches[node]->can_disable = true; 356 amd_calc_l3_indices(&l3_caches[node]);
393 } 357 }
394 358
395 WARN_ON(!l3_caches[node]); 359 this_leaf->l3 = &l3_caches[node];
396
397 this_leaf->l3 = l3_caches[node];
398} 360}
399 361
400/* 362/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
408{ 370{
409 unsigned int reg = 0; 371 unsigned int reg = 0;
410 372
411 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg); 373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
412 374
413 /* check whether this slot is activated already */ 375 /* check whether this slot is activated already */
414 if (reg & (3UL << 30)) 376 if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
422{ 384{
423 int index; 385 int index;
424 386
425 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
426 return -EINVAL; 389 return -EINVAL;
427 390
428 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
457 if (!l3->subcaches[i]) 420 if (!l3->subcaches[i])
458 continue; 421 continue;
459 422
460 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
461 424
462 /* 425 /*
463 * We need to WBINVD on a core on the node containing the L3 426 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
467 wbinvd_on_cpu(cpu); 430 wbinvd_on_cpu(cpu);
468 431
469 reg |= BIT(31); 432 reg |= BIT(31);
470 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
471 } 434 }
472} 435}
473 436
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
524 if (!capable(CAP_SYS_ADMIN)) 487 if (!capable(CAP_SYS_ADMIN))
525 return -EPERM; 488 return -EPERM;
526 489
527 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
528 return -EINVAL; 492 return -EINVAL;
529 493
530 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
545#define STORE_CACHE_DISABLE(slot) \ 509#define STORE_CACHE_DISABLE(slot) \
546static ssize_t \ 510static ssize_t \
547store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
548 const char *buf, size_t count) \ 512 const char *buf, size_t count) \
549{ \ 513{ \
550 return store_cache_disable(this_leaf, buf, count, slot); \ 514 return store_cache_disable(this_leaf, buf, count, slot); \
551} 515}
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
558 show_cache_disable_1, store_cache_disable_1); 522 show_cache_disable_1, store_cache_disable_1);
559 523
560#else /* CONFIG_AMD_NB */ 524#else /* CONFIG_AMD_NB */
561static void __cpuinit 525#define amd_init_l3_cache(x, y)
562amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
563{
564};
565#endif /* CONFIG_AMD_NB */ 526#endif /* CONFIG_AMD_NB */
566 527
567static int 528static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
575 536
576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
577 amd_cpuid4(index, &eax, &ebx, &ecx); 538 amd_cpuid4(index, &eax, &ebx, &ecx);
578 amd_check_l3_disable(this_leaf, index); 539 amd_init_l3_cache(this_leaf, index);
579 } else { 540 } else {
580 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
581 } 542 }
@@ -983,30 +944,48 @@ define_one_ro(size);
983define_one_ro(shared_cpu_map); 944define_one_ro(shared_cpu_map);
984define_one_ro(shared_cpu_list); 945define_one_ro(shared_cpu_list);
985 946
986#define DEFAULT_SYSFS_CACHE_ATTRS \
987 &type.attr, \
988 &level.attr, \
989 &coherency_line_size.attr, \
990 &physical_line_partition.attr, \
991 &ways_of_associativity.attr, \
992 &number_of_sets.attr, \
993 &size.attr, \
994 &shared_cpu_map.attr, \
995 &shared_cpu_list.attr
996
997static struct attribute *default_attrs[] = { 947static struct attribute *default_attrs[] = {
998 DEFAULT_SYSFS_CACHE_ATTRS, 948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
999 NULL 957 NULL
1000}; 958};
1001 959
1002static struct attribute *default_l3_attrs[] = {
1003 DEFAULT_SYSFS_CACHE_ATTRS,
1004#ifdef CONFIG_AMD_NB 960#ifdef CONFIG_AMD_NB
1005 &cache_disable_0.attr, 961static struct attribute ** __cpuinit amd_l3_attrs(void)
1006 &cache_disable_1.attr, 962{
963 static struct attribute **attrs;
964 int n;
965
966 if (attrs)
967 return attrs;
968
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
970
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
973
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
977
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
980
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
984 }
985
986 return attrs;
987}
1007#endif 988#endif
1008 NULL
1009};
1010 989
1011static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 990static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1012{ 991{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1117 1096
1118 this_leaf = CPUID4_INFO_IDX(cpu, i); 1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1119 1098
1120 if (this_leaf->l3 && this_leaf->l3->can_disable) 1099 ktype_cache.default_attrs = default_attrs;
1121 ktype_cache.default_attrs = default_l3_attrs; 1100#ifdef CONFIG_AMD_NB
1122 else 1101 if (this_leaf->l3)
1123 ktype_cache.default_attrs = default_attrs; 1102 ktype_cache.default_attrs = amd_l3_attrs();
1124 1103#endif
1125 retval = kobject_init_and_add(&(this_object->kobj), 1104 retval = kobject_init_and_add(&(this_object->kobj),
1126 &ktype_cache, 1105 &ktype_cache,
1127 per_cpu(ici_cache_kobject, cpu), 1106 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c482382d5c..5bf2fac52aca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
31#include <asm/mce.h> 31#include <asm/mce.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33 33
34#define PFX "mce_threshold: "
35#define VERSION "version 1.1.1"
36#define NR_BANKS 6 34#define NR_BANKS 6
37#define NR_BLOCKS 9 35#define NR_BLOCKS 9
38#define THRESHOLD_MAX 0xFFF 36#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
59 struct list_head miscj; 57 struct list_head miscj;
60}; 58};
61 59
62/* defaults used early on boot */
63static struct threshold_block threshold_defaults = {
64 .interrupt_enable = 0,
65 .threshold_limit = THRESHOLD_MAX,
66};
67
68struct threshold_bank { 60struct threshold_bank {
69 struct kobject *kobj; 61 struct kobject *kobj;
70 struct threshold_block *blocks; 62 struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
89struct thresh_restart { 81struct thresh_restart {
90 struct threshold_block *b; 82 struct threshold_block *b;
91 int reset; 83 int reset;
84 int set_lvt_off;
85 int lvt_off;
92 u16 old_limit; 86 u16 old_limit;
93}; 87};
94 88
89static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
90{
91 int msr = (hi & MASK_LVTOFF_HI) >> 20;
92
93 if (apic < 0) {
94 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
95 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
96 b->bank, b->block, b->address, hi, lo);
97 return 0;
98 }
99
100 if (apic != msr) {
101 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
102 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
103 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
104 return 0;
105 }
106
107 return 1;
108};
109
95/* must be called with correct cpu affinity */ 110/* must be called with correct cpu affinity */
96/* Called via smp_call_function_single() */ 111/* Called via smp_call_function_single() */
97static void threshold_restart_bank(void *_tr) 112static void threshold_restart_bank(void *_tr)
98{ 113{
99 struct thresh_restart *tr = _tr; 114 struct thresh_restart *tr = _tr;
100 u32 mci_misc_hi, mci_misc_lo; 115 u32 hi, lo;
101 116
102 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 117 rdmsr(tr->b->address, lo, hi);
103 118
104 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 119 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
105 tr->reset = 1; /* limit cannot be lower than err count */ 120 tr->reset = 1; /* limit cannot be lower than err count */
106 121
107 if (tr->reset) { /* reset err count and overflow bit */ 122 if (tr->reset) { /* reset err count and overflow bit */
108 mci_misc_hi = 123 hi =
109 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 124 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
110 (THRESHOLD_MAX - tr->b->threshold_limit); 125 (THRESHOLD_MAX - tr->b->threshold_limit);
111 } else if (tr->old_limit) { /* change limit w/o reset */ 126 } else if (tr->old_limit) { /* change limit w/o reset */
112 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 127 int new_count = (hi & THRESHOLD_MAX) +
113 (tr->old_limit - tr->b->threshold_limit); 128 (tr->old_limit - tr->b->threshold_limit);
114 129
115 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 130 hi = (hi & ~MASK_ERR_COUNT_HI) |
116 (new_count & THRESHOLD_MAX); 131 (new_count & THRESHOLD_MAX);
117 } 132 }
118 133
134 if (tr->set_lvt_off) {
135 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
136 /* set new lvt offset */
137 hi &= ~MASK_LVTOFF_HI;
138 hi |= tr->lvt_off << 20;
139 }
140 }
141
119 tr->b->interrupt_enable ? 142 tr->b->interrupt_enable ?
120 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 143 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
121 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 144 (hi &= ~MASK_INT_TYPE_HI);
122 145
123 mci_misc_hi |= MASK_COUNT_EN_HI; 146 hi |= MASK_COUNT_EN_HI;
124 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 147 wrmsr(tr->b->address, lo, hi);
148}
149
150static void mce_threshold_block_init(struct threshold_block *b, int offset)
151{
152 struct thresh_restart tr = {
153 .b = b,
154 .set_lvt_off = 1,
155 .lvt_off = offset,
156 };
157
158 b->threshold_limit = THRESHOLD_MAX;
159 threshold_restart_bank(&tr);
160};
161
162static int setup_APIC_mce(int reserved, int new)
163{
164 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
165 APIC_EILVT_MSG_FIX, 0))
166 return new;
167
168 return reserved;
125} 169}
126 170
127/* cpu init entry point, called from mce.c with preempt off */ 171/* cpu init entry point, called from mce.c with preempt off */
128void mce_amd_feature_init(struct cpuinfo_x86 *c) 172void mce_amd_feature_init(struct cpuinfo_x86 *c)
129{ 173{
174 struct threshold_block b;
130 unsigned int cpu = smp_processor_id(); 175 unsigned int cpu = smp_processor_id();
131 u32 low = 0, high = 0, address = 0; 176 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 177 unsigned int bank, block;
133 struct thresh_restart tr; 178 int offset = -1;
134 int lvt_off = -1;
135 u8 offset;
136 179
137 for (bank = 0; bank < NR_BANKS; ++bank) { 180 for (bank = 0; bank < NR_BANKS; ++bank) {
138 for (block = 0; block < NR_BLOCKS; ++block) { 181 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
163 if (shared_bank[bank] && c->cpu_core_id) 206 if (shared_bank[bank] && c->cpu_core_id)
164 break; 207 break;
165#endif 208#endif
166 offset = (high & MASK_LVTOFF_HI) >> 20; 209 offset = setup_APIC_mce(offset,
167 if (lvt_off < 0) { 210 (high & MASK_LVTOFF_HI) >> 20);
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
188
189 high &= ~MASK_LVTOFF_HI;
190 high |= lvt_off << 20;
191 wrmsr(address, low, high);
192 211
193 threshold_defaults.address = address; 212 memset(&b, 0, sizeof(b));
194 tr.b = &threshold_defaults; 213 b.cpu = cpu;
195 tr.reset = 0; 214 b.bank = bank;
196 tr.old_limit = 0; 215 b.block = block;
197 threshold_restart_bank(&tr); 216 b.address = address;
198 217
218 mce_threshold_block_init(&b, offset);
199 mce_threshold_vector = amd_threshold_interrupt; 219 mce_threshold_vector = amd_threshold_interrupt;
200 } 220 }
201 } 221 }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
298 318
299 b->interrupt_enable = !!new; 319 b->interrupt_enable = !!new;
300 320
321 memset(&tr, 0, sizeof(tr));
301 tr.b = b; 322 tr.b = b;
302 tr.reset = 0;
303 tr.old_limit = 0;
304 323
305 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 324 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
306 325
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
321 if (new < 1) 340 if (new < 1)
322 new = 1; 341 new = 1;
323 342
343 memset(&tr, 0, sizeof(tr));
324 tr.old_limit = b->threshold_limit; 344 tr.old_limit = b->threshold_limit;
325 b->threshold_limit = new; 345 b->threshold_limit = new;
326 tr.b = b; 346 tr.b = b;
327 tr.reset = 0;
328 347
329 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 348 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
330 349
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
603 continue; 622 continue;
604 err = threshold_create_bank(cpu, bank); 623 err = threshold_create_bank(cpu, bank);
605 if (err) 624 if (err)
606 goto out; 625 return err;
607 } 626 }
608out: 627
609 return err; 628 return err;
610} 629}
611 630
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b683267eca5..e12246ff5aa6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
53 struct _thermal_state core_power_limit; 53 struct _thermal_state core_power_limit;
54 struct _thermal_state package_throttle; 54 struct _thermal_state package_throttle;
55 struct _thermal_state package_power_limit; 55 struct _thermal_state package_power_limit;
56 struct _thermal_state core_thresh0;
57 struct _thermal_state core_thresh1;
56}; 58};
57 59
60/* Callback to handle core threshold interrupts */
61int (*platform_thermal_notify)(__u64 msr_val);
62
58static DEFINE_PER_CPU(struct thermal_state, thermal_state); 63static DEFINE_PER_CPU(struct thermal_state, thermal_state);
59 64
60static atomic_t therm_throt_en = ATOMIC_INIT(0); 65static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
200 return 0; 205 return 0;
201} 206}
202 207
208static int thresh_event_valid(int event)
209{
210 struct _thermal_state *state;
211 unsigned int this_cpu = smp_processor_id();
212 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
213 u64 now = get_jiffies_64();
214
215 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
216
217 if (time_before64(now, state->next_check))
218 return 0;
219
220 state->next_check = now + CHECK_INTERVAL;
221 return 1;
222}
223
203#ifdef CONFIG_SYSFS 224#ifdef CONFIG_SYSFS
204/* Add/Remove thermal_throttle interface for CPU device: */ 225/* Add/Remove thermal_throttle interface for CPU device: */
205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 226static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
313#define PACKAGE_THROTTLED ((__u64)2 << 62) 334#define PACKAGE_THROTTLED ((__u64)2 << 62)
314#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) 335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
315 336
337static void notify_thresholds(__u64 msr_val)
338{
339 /* check whether the interrupt handler is defined;
340 * otherwise simply return
341 */
342 if (!platform_thermal_notify)
343 return;
344
345 /* lower threshold reached */
346 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
347 platform_thermal_notify(msr_val);
348 /* higher threshold reached */
349 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
350 platform_thermal_notify(msr_val);
351}
352
316/* Thermal transition interrupt handler */ 353/* Thermal transition interrupt handler */
317static void intel_thermal_interrupt(void) 354static void intel_thermal_interrupt(void)
318{ 355{
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
321 358
322 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 359 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
323 360
361 /* Check for violation of core thermal thresholds*/
362 notify_thresholds(msr_val);
363
324 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 364 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
325 THERMAL_THROTTLING_EVENT, 365 THERMAL_THROTTLING_EVENT,
326 CORE_LEVEL) != 0) 366 CORE_LEVEL) != 0)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..0a360d146596 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -1362,7 +1391,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1391 pr_info("no hardware sampling interrupt available.\n");
1363} 1392}
1364 1393
1365void __init init_hw_perf_events(void) 1394int __init init_hw_perf_events(void)
1366{ 1395{
1367 struct event_constraint *c; 1396 struct event_constraint *c;
1368 int err; 1397 int err;
@@ -1377,20 +1406,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1406 err = amd_pmu_init();
1378 break; 1407 break;
1379 default: 1408 default:
1380 return; 1409 return 0;
1381 } 1410 }
1382 if (err != 0) { 1411 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1412 pr_cont("no PMU driver, software events only.\n");
1384 return; 1413 return 0;
1385 } 1414 }
1386 1415
1387 pmu_check_apic(); 1416 pmu_check_apic();
1388 1417
1389 /* sanity check that the hardware exists or is emulated */ 1418 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1419 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1420 return 0;
1392 return;
1393 }
1394 1421
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1422 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1423
@@ -1438,9 +1465,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1465 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1466 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1467
1441 perf_pmu_register(&pmu); 1468 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1469 perf_cpu_notifier(x86_pmu_notifier);
1470
1471 return 0;
1443} 1472}
1473early_initcall(init_hw_perf_events);
1444 1474
1445static inline void x86_pmu_read(struct perf_event *event) 1475static inline void x86_pmu_read(struct perf_event *event)
1446{ 1476{
@@ -1686,7 +1716,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1716
1687 perf_callchain_store(entry, regs->ip); 1717 perf_callchain_store(entry, regs->ip);
1688 1718
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1719 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1720}
1691 1721
1692#ifdef CONFIG_COMPAT 1722#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..24e390e40f2e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <asm/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..8474c998cbd4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -210,7 +210,7 @@ void dump_stack(void)
210 init_utsname()->release, 210 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 211 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 212 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 213 show_trace(NULL, NULL, &stack);
214} 214}
215EXPORT_SYMBOL(dump_stack); 215EXPORT_SYMBOL(dump_stack);
216 216
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 4572f25f9325..cd28a350f7f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
240 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
241 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
242#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK 243#ifdef CONFIG_EARLY_PRINTK_MRST
244 if (!strncmp(buf, "mrst", 4)) { 244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init(); 245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep); 246 early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
250 hsu_early_console_init(); 250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep); 251 early_console_register(&early_hsu_console, keep);
252 } 252 }
253
254#endif 253#endif
255 buf++; 254 buf++;
256 } 255 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f14d2d..298448656b60 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/module.h>
22 23
23#include <trace/syscall.h> 24#include <trace/syscall.h>
24 25
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
49int ftrace_arch_code_modify_prepare(void) 50int ftrace_arch_code_modify_prepare(void)
50{ 51{
51 set_kernel_text_rw(); 52 set_kernel_text_rw();
53 set_all_modules_text_rw();
52 modifying_code = 1; 54 modifying_code = 1;
53 return 0; 55 return 0;
54} 56}
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
56int ftrace_arch_code_modify_post_process(void) 58int ftrace_arch_code_modify_post_process(void)
57{ 59{
58 modifying_code = 0; 60 modifying_code = 0;
61 set_all_modules_text_ro();
59 set_kernel_text_ro(); 62 set_kernel_text_ro();
60 return 0; 63 return 0;
61} 64}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 763310165fa0..7f138b3c3c52 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
61 case X86_SUBARCH_MRST: 61 case X86_SUBARCH_MRST:
62 x86_mrst_early_setup(); 62 x86_mrst_early_setup();
63 break; 63 break;
64 case X86_SUBARCH_CE4100:
65 x86_ce4100_early_setup();
66 break;
64 default: 67 default:
65 i386_default_early_setup(); 68 i386_default_early_setup();
66 break; 69 break;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index bcece91dd311..5707fc8a7a4b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -60,16 +60,18 @@
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) 60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif 61#endif
62 62
63/* Number of possible pages in the lowmem region */
64LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
65
63/* Enough space to fit pagetables for the low memory linear map */ 66/* Enough space to fit pagetables for the low memory linear map */
64MAPPING_BEYOND_END = \ 67MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
65 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
66 68
67/* 69/*
68 * Worst-case size of the kernel mapping we need to make: 70 * Worst-case size of the kernel mapping we need to make:
69 * the worst-case size of the kernel itself, plus the extra we need 71 * a relocatable kernel can live anywhere in lowmem, so we need to be able
70 * to map for the linear map. 72 * to map all of lowmem.
71 */ 73 */
72KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 74KERNEL_PAGES = LOWMEM_PAGES
73 75
74INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm 76INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
75RESERVE_BRK(pagetables, INIT_MAP_SIZE) 77RESERVE_BRK(pagetables, INIT_MAP_SIZE)
@@ -314,6 +316,10 @@ ENTRY(startup_32_smp)
314 subl $0x80000001, %eax 316 subl $0x80000001, %eax
315 cmpl $(0x8000ffff-0x80000001), %eax 317 cmpl $(0x8000ffff-0x80000001), %eax
316 ja 6f 318 ja 6f
319
320 /* Clear bogus XD_DISABLE bits */
321 call verify_cpu
322
317 mov $0x80000001, %eax 323 mov $0x80000001, %eax
318 cpuid 324 cpuid
319 /* Execute Disable bit supported? */ 325 /* Execute Disable bit supported? */
@@ -609,6 +615,8 @@ ignore_int:
609#endif 615#endif
610 iret 616 iret
611 617
618#include "verify_cpu.S"
619
612 __REFDATA 620 __REFDATA
613.align 4 621.align 4
614ENTRY(initial_code) 622ENTRY(initial_code)
@@ -620,13 +628,13 @@ ENTRY(initial_code)
620__PAGE_ALIGNED_BSS 628__PAGE_ALIGNED_BSS
621 .align PAGE_SIZE_asm 629 .align PAGE_SIZE_asm
622#ifdef CONFIG_X86_PAE 630#ifdef CONFIG_X86_PAE
623initial_pg_pmd: 631ENTRY(initial_pg_pmd)
624 .fill 1024*KPMDS,4,0 632 .fill 1024*KPMDS,4,0
625#else 633#else
626ENTRY(initial_page_table) 634ENTRY(initial_page_table)
627 .fill 1024,4,0 635 .fill 1024,4,0
628#endif 636#endif
629initial_pg_fixmap: 637ENTRY(initial_pg_fixmap)
630 .fill 1024,4,0 638 .fill 1024,4,0
631ENTRY(empty_zero_page) 639ENTRY(empty_zero_page)
632 .fill 4096,1,0 640 .fill 4096,1,0
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ae03cab4352e..4ff5968f12d2 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -27,6 +27,9 @@
27#define HPET_DEV_FSB_CAP 0x1000 27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000 28#define HPET_DEV_PERI_CAP 0x2000
29 29
30#define HPET_MIN_CYCLES 128
31#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
32
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) 33#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
31 34
32/* 35/*
@@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void)
299 /* Calculate the min / max delta */ 302 /* Calculate the min / max delta */
300 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, 303 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
301 &hpet_clockevent); 304 &hpet_clockevent);
302 /* 5 usec minimum reprogramming delta. */ 305 /* Setup minimum reprogramming delta. */
303 hpet_clockevent.min_delta_ns = 5000; 306 hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA,
307 &hpet_clockevent);
304 308
305 /* 309 /*
306 * Start hpet with the boot cpu mask and make it 310 * Start hpet with the boot cpu mask and make it
@@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta,
393 * the wraparound into account) nor a simple count down event 397 * the wraparound into account) nor a simple count down event
394 * mode. Further the write to the comparator register is 398 * mode. Further the write to the comparator register is
395 * delayed internally up to two HPET clock cycles in certain 399 * delayed internally up to two HPET clock cycles in certain
396 * chipsets (ATI, ICH9,10). We worked around that by reading 400 * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even
397 * back the compare register, but that required another 401 * longer delays. We worked around that by reading back the
398 * workaround for ICH9,10 chips where the first readout after 402 * compare register, but that required another workaround for
399 * write can return the old stale value. We already have a 403 * ICH9,10 chips where the first readout after write can
400 * minimum delta of 5us enforced, but a NMI or SMI hitting 404 * return the old stale value. We already had a minimum
405 * programming delta of 5us enforced, but a NMI or SMI hitting
401 * between the counter readout and the comparator write can 406 * between the counter readout and the comparator write can
402 * move us behind that point easily. Now instead of reading 407 * move us behind that point easily. Now instead of reading
403 * the compare register back several times, we make the ETIME 408 * the compare register back several times, we make the ETIME
404 * decision based on the following: Return ETIME if the 409 * decision based on the following: Return ETIME if the
405 * counter value after the write is less than 8 HPET cycles 410 * counter value after the write is less than HPET_MIN_CYCLES
406 * away from the event or if the counter is already ahead of 411 * away from the event or if the counter is already ahead of
407 * the event. 412 * the event. The minimum programming delta for the generic
413 * clockevents code is set to 1.5 * HPET_MIN_CYCLES.
408 */ 414 */
409 res = (s32)(cnt - hpet_readl(HPET_COUNTER)); 415 res = (s32)(cnt - hpet_readl(HPET_COUNTER));
410 416
411 return res < 8 ? -ETIME : 0; 417 return res < HPET_MIN_CYCLES ? -ETIME : 0;
412} 418}
413 419
414static void hpet_legacy_set_mode(enum clock_event_mode mode, 420static void hpet_legacy_set_mode(enum clock_event_mode mode,
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..5940282bd2f9 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index ce0cb4721c9a..0fe6d1a66c38 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 155 return 0;
156} 156}
157 157
158static int get_ucode_data(void *to, const u8 *from, size_t n)
159{
160 memcpy(to, from, n);
161 return 0;
162}
163
164static void * 158static void *
165get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) 159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
166{ 160{
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
168 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
169 void *mc; 163 void *mc;
170 164
171 if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) 165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
172 return NULL;
173 166
174 if (section_hdr[0] != UCODE_UCODE_TYPE) { 167 if (section_hdr[0] != UCODE_UCODE_TYPE) {
175 pr_err("error: invalid type field in container file section header\n"); 168 pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
183 return NULL; 176 return NULL;
184 } 177 }
185 178
186 mc = vmalloc(UCODE_MAX_SIZE); 179 mc = vzalloc(UCODE_MAX_SIZE);
187 if (mc) { 180 if (!mc)
188 memset(mc, 0, UCODE_MAX_SIZE); 181 return NULL;
189 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, 182
190 total_size)) { 183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
191 vfree(mc); 184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
192 mc = NULL; 185
193 } else
194 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
195 }
196 return mc; 186 return mc;
197} 187}
198 188
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
202 unsigned int *buf_pos = (unsigned int *)container_hdr; 192 unsigned int *buf_pos = (unsigned int *)container_hdr;
203 unsigned long size; 193 unsigned long size;
204 194
205 if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) 195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
206 return 0;
207 196
208 size = buf_pos[2]; 197 size = buf_pos[2];
209 198
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
219 } 208 }
220 209
221 buf += UCODE_CONTAINER_HEADER_SIZE; 210 buf += UCODE_CONTAINER_HEADER_SIZE;
222 if (get_ucode_data(equiv_cpu_table, buf, size)) { 211 get_ucode_data(equiv_cpu_table, buf, size);
223 vfree(equiv_cpu_table);
224 return 0;
225 }
226 212
227 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
228} 214}
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index dcb65cc0a053..1a1b606d3e92 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -364,8 +364,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
364 364
365 /* For performance reasons, reuse mc area when possible */ 365 /* For performance reasons, reuse mc area when possible */
366 if (!mc || mc_size > curr_mc_size) { 366 if (!mc || mc_size > curr_mc_size) {
367 if (mc) 367 vfree(mc);
368 vfree(mc);
369 mc = vmalloc(mc_size); 368 mc = vmalloc(mc_size);
370 if (!mc) 369 if (!mc)
371 break; 370 break;
@@ -374,13 +373,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
374 373
375 if (get_ucode_data(mc, ucode_ptr, mc_size) || 374 if (get_ucode_data(mc, ucode_ptr, mc_size) ||
376 microcode_sanity_check(mc) < 0) { 375 microcode_sanity_check(mc) < 0) {
377 vfree(mc);
378 break; 376 break;
379 } 377 }
380 378
381 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { 379 if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
382 if (new_mc) 380 vfree(new_mc);
383 vfree(new_mc);
384 new_rev = mc_header.rev; 381 new_rev = mc_header.rev;
385 new_mc = mc; 382 new_mc = mc;
386 mc = NULL; /* trigger new vmalloc */ 383 mc = NULL; /* trigger new vmalloc */
@@ -390,12 +387,10 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
390 leftover -= mc_size; 387 leftover -= mc_size;
391 } 388 }
392 389
393 if (mc) 390 vfree(mc);
394 vfree(mc);
395 391
396 if (leftover) { 392 if (leftover) {
397 if (new_mc) 393 vfree(new_mc);
398 vfree(new_mc);
399 state = UCODE_ERROR; 394 state = UCODE_ERROR;
400 goto out; 395 goto out;
401 } 396 }
@@ -405,8 +400,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
405 goto out; 400 goto out;
406 } 401 }
407 402
408 if (uci->mc) 403 vfree(uci->mc);
409 vfree(uci->mc);
410 uci->mc = (struct microcode_intel *)new_mc; 404 uci->mc = (struct microcode_intel *)new_mc;
411 405
412 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", 406 pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
143 143
144 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 spin_lock_irqsave(&iommu_bitmap_lock, flags);
145 if (need_flush) { 145 if (need_flush) {
146 k8_flush_garts(); 146 amd_flush_garts();
147 need_flush = false; 147 need_flush = false;
148 } 148 }
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
561{ 561{
562 int i; 562 int i;
563 563
564 if (!k8_northbridges.gart_supported) 564 if (!amd_nb_has_feature(AMD_NB_GART))
565 return; 565 return;
566 566
567 for (i = 0; i < k8_northbridges.num; i++) { 567 for (i = 0; i < amd_nb_num(); i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 568 struct pci_dev *dev = node_to_amd_nb(i)->misc;
569 569
570 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
571 } 571 }
572 572
573 /* Flush the GART-TLB to remove stale entries */ 573 /* Flush the GART-TLB to remove stale entries */
574 k8_flush_garts(); 574 amd_flush_garts();
575} 575}
576 576
577/* 577/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
596 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
597 return; 597 return;
598 598
599 if (!k8_northbridges.gart_supported) 599 if (!amd_nb_has_feature(AMD_NB_GART))
600 return; 600 return;
601 601
602 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
603 603
604 for (i = 0; i < k8_northbridges.num; i++) { 604 for (i = 0; i < amd_nb_num(); i++) {
605 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 605 struct pci_dev *dev = node_to_amd_nb(i)->misc;
606 606
607 /* 607 /*
608 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
644 * Private Northbridge GATT initialization in case we cannot use the 644 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 645 * AGP driver for some reason.
646 */ 646 */
647static __init int init_k8_gatt(struct agp_kern_info *info) 647static __init int init_amd_gatt(struct agp_kern_info *info)
648{ 648{
649 unsigned aper_size, gatt_size, new_aper_size; 649 unsigned aper_size, gatt_size, new_aper_size;
650 unsigned aper_base, new_aper_base; 650 unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 656
657 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
658 dev = NULL; 658 dev = NULL;
659 for (i = 0; i < k8_northbridges.num; i++) { 659 for (i = 0; i < amd_nb_num(); i++) {
660 dev = k8_northbridges.nb_misc[i]; 660 dev = node_to_amd_nb(i)->misc;
661 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
662 if (!new_aper_base) 662 if (!new_aper_base)
663 goto nommu; 663 goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
725 if (!no_agp) 725 if (!no_agp)
726 return; 726 return;
727 727
728 if (!k8_northbridges.gart_supported) 728 if (!amd_nb_has_feature(AMD_NB_GART))
729 return; 729 return;
730 730
731 for (i = 0; i < k8_northbridges.num; i++) { 731 for (i = 0; i < amd_nb_num(); i++) {
732 u32 ctl; 732 u32 ctl;
733 733
734 dev = k8_northbridges.nb_misc[i]; 734 dev = node_to_amd_nb(i)->misc;
735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
736 736
737 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
749 unsigned long scratch; 749 unsigned long scratch;
750 long i; 750 long i;
751 751
752 if (!k8_northbridges.gart_supported) 752 if (!amd_nb_has_feature(AMD_NB_GART))
753 return 0; 753 return 0;
754 754
755#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
756 no_agp = 1; 756 no_agp = 1;
757#else 757#else
758 /* Makefile puts PCI initialization via subsys_initcall first. */ 758 /* Makefile puts PCI initialization via subsys_initcall first. */
759 /* Add other K8 AGP bridge drivers here */ 759 /* Add other AMD AGP bridge drivers here */
760 no_agp = no_agp || 760 no_agp = no_agp ||
761 (agp_amd64_init() < 0) || 761 (agp_amd64_init() < 0) ||
762 (agp_copy_info(agp_bridge, &info) < 0); 762 (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
765 if (no_iommu || 765 if (no_iommu ||
766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
767 !gart_iommu_aperture || 767 !gart_iommu_aperture ||
768 (no_agp && init_k8_gatt(&info) < 0)) { 768 (no_agp && init_amd_gatt(&info) < 0)) {
769 if (max_pfn > MAX_DMA32_PFN) { 769 if (max_pfn > MAX_DMA32_PFN) {
770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); 770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
771 pr_warning("falling back to iommu=soft.\n"); 771 pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 57d1868a86aa..c852041bfc3d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -91,8 +91,7 @@ void exit_thread(void)
91void show_regs(struct pt_regs *regs) 91void show_regs(struct pt_regs *regs)
92{ 92{
93 show_registers(regs); 93 show_registers(regs);
94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 94 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
95 regs->bp);
96} 95}
97 96
98void show_regs_common(void) 97void show_regs_common(void)
@@ -374,6 +373,7 @@ void default_idle(void)
374{ 373{
375 if (hlt_use_halt()) { 374 if (hlt_use_halt()) {
376 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 375 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
376 trace_cpu_idle(1, smp_processor_id());
377 current_thread_info()->status &= ~TS_POLLING; 377 current_thread_info()->status &= ~TS_POLLING;
378 /* 378 /*
379 * TS_POLLING-cleared state must be visible before we 379 * TS_POLLING-cleared state must be visible before we
@@ -444,6 +444,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
444void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 444void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
445{ 445{
446 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); 446 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
447 trace_cpu_idle((ax>>4)+1, smp_processor_id());
447 if (!need_resched()) { 448 if (!need_resched()) {
448 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 449 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
449 clflush((void *)&current_thread_info()->flags); 450 clflush((void *)&current_thread_info()->flags);
@@ -460,6 +461,7 @@ static void mwait_idle(void)
460{ 461{
461 if (!need_resched()) { 462 if (!need_resched()) {
462 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 463 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
464 trace_cpu_idle(1, smp_processor_id());
463 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 465 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
464 clflush((void *)&current_thread_info()->flags); 466 clflush((void *)&current_thread_info()->flags);
465 467
@@ -481,10 +483,12 @@ static void mwait_idle(void)
481static void poll_idle(void) 483static void poll_idle(void)
482{ 484{
483 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 485 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
486 trace_cpu_idle(0, smp_processor_id());
484 local_irq_enable(); 487 local_irq_enable();
485 while (!need_resched()) 488 while (!need_resched())
486 cpu_relax(); 489 cpu_relax();
487 trace_power_end(0); 490 trace_power_end(smp_processor_id());
491 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
488} 492}
489 493
490/* 494/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 96586c3cbbbf..4b9befa0e347 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,8 +113,8 @@ void cpu_idle(void)
113 stop_critical_timings(); 113 stop_critical_timings();
114 pm_idle(); 114 pm_idle();
115 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id()); 116 trace_power_end(smp_processor_id());
117 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
118 } 118 }
119 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
120 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3d7a3a04f38..4c818a738396 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,6 +142,8 @@ void cpu_idle(void)
142 start_critical_timings(); 142 start_critical_timings();
143 143
144 trace_power_end(smp_processor_id()); 144 trace_power_end(smp_processor_id());
145 trace_cpu_idle(PWR_EVENT_EXIT,
146 smp_processor_id());
145 147
146 /* In many cases the interrupt that ended idle 148 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle 149 has already called exit_idle. But some idle
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 008b91eefa18..42eb3300dfc6 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
83 83
84static atomic64_t last_value = ATOMIC64_INIT(0); 84static atomic64_t last_value = ATOMIC64_INIT(0);
85 85
86void pvclock_resume(void)
87{
88 atomic64_set(&last_value, 0);
89}
90
86cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) 91cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
87{ 92{
88 struct pvclock_shadow_time shadow; 93 struct pvclock_shadow_time shadow;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index fda313ebbb03..c8e41e90f59c 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
43 outb(1, 0x92); 43 outb(1, 0x92);
44} 44}
45 45
46static void ce4100_reset(struct pci_dev *dev)
47{
48 int i;
49
50 for (i = 0; i < 10; i++) {
51 outb(0x2, 0xcf9);
52 udelay(50);
53 }
54}
55
46struct device_fixup { 56struct device_fixup {
47 unsigned int vendor; 57 unsigned int vendor;
48 unsigned int device; 58 unsigned int device;
49 void (*reboot_fixup)(struct pci_dev *); 59 void (*reboot_fixup)(struct pci_dev *);
50}; 60};
51 61
62/*
63 * PCI ids solely used for fixups_table go here
64 */
65#define PCI_DEVICE_ID_INTEL_CE4100 0x0708
66
52static const struct device_fixup fixups_table[] = { 67static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 68{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 69{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 70{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
56{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, 71{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
72{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
57}; 73};
58 74
59/* 75/*
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
new file mode 100644
index 000000000000..2a26819bb6a8
--- /dev/null
+++ b/arch/x86/kernel/resource.c
@@ -0,0 +1,48 @@
1#include <linux/ioport.h>
2#include <asm/e820.h>
3
4static void resource_clip(struct resource *res, resource_size_t start,
5 resource_size_t end)
6{
7 resource_size_t low = 0, high = 0;
8
9 if (res->end < start || res->start > end)
10 return; /* no conflict */
11
12 if (res->start < start)
13 low = start - res->start;
14
15 if (res->end > end)
16 high = res->end - end;
17
18 /* Keep the area above or below the conflict, whichever is larger */
19 if (low > high)
20 res->end = start - 1;
21 else
22 res->start = end + 1;
23}
24
25static void remove_e820_regions(struct resource *avail)
26{
27 int i;
28 struct e820entry *entry;
29
30 for (i = 0; i < e820.nr_map; i++) {
31 entry = &e820.map[i];
32
33 resource_clip(avail, entry->addr,
34 entry->addr + entry->size - 1);
35 }
36}
37
38void arch_remove_reservations(struct resource *avail)
39{
40 /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */
41 if (avail->flags & IORESOURCE_MEM) {
42 if (avail->start < BIOS_END)
43 avail->start = BIOS_END;
44 resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
45
46 remove_e820_regions(avail);
47 }
48}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21c6746338af..d3cfe26c0252 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -501,7 +501,18 @@ static inline unsigned long long get_total_mem(void)
501 return total << PAGE_SHIFT; 501 return total << PAGE_SHIFT;
502} 502}
503 503
504#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF 504/*
505 * Keep the crash kernel below this limit. On 32 bits earlier kernels
506 * would limit the kernel to the low 512 MiB due to mapping restrictions.
507 * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
508 * limit once kexec-tools are fixed.
509 */
510#ifdef CONFIG_X86_32
511# define CRASH_KERNEL_ADDR_MAX (512 << 20)
512#else
513# define CRASH_KERNEL_ADDR_MAX (896 << 20)
514#endif
515
505static void __init reserve_crashkernel(void) 516static void __init reserve_crashkernel(void)
506{ 517{
507 unsigned long long total_mem; 518 unsigned long long total_mem;
@@ -520,10 +531,10 @@ static void __init reserve_crashkernel(void)
520 const unsigned long long alignment = 16<<20; /* 16M */ 531 const unsigned long long alignment = 16<<20; /* 16M */
521 532
522 /* 533 /*
523 * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX 534 * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
524 */ 535 */
525 crash_base = memblock_find_in_range(alignment, 536 crash_base = memblock_find_in_range(alignment,
526 DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); 537 CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
527 538
528 if (crash_base == MEMBLOCK_ERROR) { 539 if (crash_base == MEMBLOCK_ERROR) {
529 pr_info("crashkernel reservation failed - No suitable area found.\n"); 540 pr_info("crashkernel reservation failed - No suitable area found.\n");
@@ -694,7 +705,7 @@ static u64 __init get_max_mapped(void)
694void __init setup_arch(char **cmdline_p) 705void __init setup_arch(char **cmdline_p)
695{ 706{
696 int acpi = 0; 707 int acpi = 0;
697 int k8 = 0; 708 int amd = 0;
698 unsigned long flags; 709 unsigned long flags;
699 710
700#ifdef CONFIG_X86_32 711#ifdef CONFIG_X86_32
@@ -769,7 +780,6 @@ void __init setup_arch(char **cmdline_p)
769 780
770 x86_init.oem.arch_setup(); 781 x86_init.oem.arch_setup();
771 782
772 resource_alloc_from_bottom = 0;
773 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; 783 iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
774 setup_memory_map(); 784 setup_memory_map();
775 parse_setup_data(); 785 parse_setup_data();
@@ -981,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
981 acpi = acpi_numa_init(); 991 acpi = acpi_numa_init();
982#endif 992#endif
983 993
984#ifdef CONFIG_K8_NUMA 994#ifdef CONFIG_AMD_NUMA
985 if (!acpi) 995 if (!acpi)
986 k8 = !k8_numa_init(0, max_pfn); 996 amd = !amd_numa_init(0, max_pfn);
987#endif 997#endif
988 998
989 initmem_init(0, max_pfn, acpi, k8); 999 initmem_init(0, max_pfn, acpi, amd);
990 memblock_find_dma_reserve(); 1000 memblock_find_dma_reserve();
991 dma32_reserve_bootmem(); 1001 dma32_reserve_bootmem();
992 1002
@@ -1035,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
1035#endif 1045#endif
1036 1046
1037 init_apic_mappings(); 1047 init_apic_mappings();
1038 ioapic_init_mappings(); 1048 ioapic_and_gsi_init();
1039
1040 /* need to wait for io_apic is mapped */
1041 probe_nr_irqs_gsi();
1042 1049
1043 kvm_guest_init(); 1050 kvm_guest_init();
1044 1051
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 837c81e99edf..ee886fe10ef4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1210,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1210#ifdef CONFIG_X86_IO_APIC 1205#ifdef CONFIG_X86_IO_APIC
1211 setup_ioapic_dest(); 1206 setup_ioapic_dest();
1212#endif 1207#endif
1213 check_nmi_watchdog();
1214 mtrr_aps_init(); 1208 mtrr_aps_init();
1215} 1209}
1216 1210
@@ -1355,8 +1349,6 @@ int native_cpu_disable(void)
1355 if (cpu == 0) 1349 if (cpu == 0)
1356 return -EBUSY; 1350 return -EBUSY;
1357 1351
1358 if (nmi_watchdog == NMI_LOCAL_APIC)
1359 stop_apic_nmi_watchdog(NULL);
1360 clear_local_APIC(); 1352 clear_local_APIC();
1361 1353
1362 cpu_disable_common(); 1354 cpu_disable_common();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 3af2dff58b21..075d130efcf9 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -127,7 +127,7 @@ startup_64:
127no_longmode: 127no_longmode:
128 hlt 128 hlt
129 jmp no_longmode 129 jmp no_longmode
130#include "verify_cpu_64.S" 130#include "verify_cpu.S"
131 131
132 # Careful these need to be in the same 64K segment as the above; 132 # Careful these need to be in the same 64K segment as the above;
133tidt: 133tidt:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..c76aaca5694d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87
86static inline void conditional_sti(struct pt_regs *regs) 88static inline void conditional_sti(struct pt_regs *regs)
87{ 89{
88 if (regs->flags & X86_EFLAGS_IF) 90 if (regs->flags & X86_EFLAGS_IF)
@@ -300,6 +302,13 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 302 die("general protection fault", regs, error_code);
301} 303}
302 304
305static int __init setup_unknown_nmi_panic(char *str)
306{
307 unknown_nmi_panic = 1;
308 return 1;
309}
310__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
311
303static notrace __kprobes void 312static notrace __kprobes void
304mem_parity_error(unsigned char reason, struct pt_regs *regs) 313mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 314{
@@ -342,9 +351,11 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
342 reason = (reason & 0xf) | 8; 351 reason = (reason & 0xf) | 8;
343 outb(reason, 0x61); 352 outb(reason, 0x61);
344 353
345 i = 2000; 354 i = 20000;
346 while (--i) 355 while (--i) {
347 udelay(1000); 356 touch_nmi_watchdog();
357 udelay(100);
358 }
348 359
349 reason &= ~8; 360 reason &= ~8;
350 outb(reason, 0x61); 361 outb(reason, 0x61);
@@ -371,7 +382,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
371 reason, smp_processor_id()); 382 reason, smp_processor_id());
372 383
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 384 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 385 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 386 panic("NMI: Not continuing");
376 387
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 388 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
@@ -397,20 +408,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 408 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
398 == NOTIFY_STOP) 409 == NOTIFY_STOP)
399 return; 410 return;
400
401#ifndef CONFIG_LOCKUP_DETECTOR
402 /*
403 * Ok, so this is none of the documented NMI sources,
404 * so it must be the NMI watchdog.
405 */
406 if (nmi_watchdog_tick(regs, reason))
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 411#endif
412 unknown_nmi_error(reason, regs);
414 413
415 return; 414 return;
416 } 415 }
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 445
447void stop_nmi(void) 446void stop_nmi(void)
448{ 447{
449 acpi_nmi_disable();
450 ignore_nmis++; 448 ignore_nmis++;
451} 449}
452 450
453void restart_nmi(void) 451void restart_nmi(void)
454{ 452{
455 ignore_nmis--; 453 ignore_nmis--;
456 acpi_nmi_enable();
457} 454}
458 455
459/* May run on IST stack. */ 456/* May run on IST stack. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0c40d8b72416..356a0d455cf9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
872 872
873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
874 return 0; 874 return 0;
875
876 if (tsc_clocksource_reliable)
877 return 0;
875 /* 878 /*
876 * Intel systems are normally all synchronized. 879 * Intel systems are normally all synchronized.
877 * Exceptions must mark TSC as unstable: 880 * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
879 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 882 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
880 /* assume multi socket systems are not synchronized: */ 883 /* assume multi socket systems are not synchronized: */
881 if (num_possible_cpus() > 1) 884 if (num_possible_cpus() > 1)
882 tsc_unstable = 1; 885 return 1;
883 } 886 }
884 887
885 return tsc_unstable; 888 return 0;
889}
890
891
892static void tsc_refine_calibration_work(struct work_struct *work);
893static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894/**
895 * tsc_refine_calibration_work - Further refine tsc freq calibration
896 * @work - ignored.
897 *
898 * This functions uses delayed work over a period of a
899 * second to further refine the TSC freq value. Since this is
900 * timer based, instead of loop based, we don't block the boot
901 * process while this longer calibration is done.
902 *
903 * If there are any calibration anomolies (too many SMIs, etc),
904 * or the refined calibration is off by 1% of the fast early
905 * calibration, we throw out the new calibration and use the
906 * early calibration.
907 */
908static void tsc_refine_calibration_work(struct work_struct *work)
909{
910 static u64 tsc_start = -1, ref_start;
911 static int hpet;
912 u64 tsc_stop, ref_stop, delta;
913 unsigned long freq;
914
915 /* Don't bother refining TSC on unstable systems */
916 if (check_tsc_unstable())
917 goto out;
918
919 /*
920 * Since the work is started early in boot, we may be
921 * delayed the first time we expire. So set the workqueue
922 * again once we know timers are working.
923 */
924 if (tsc_start == -1) {
925 /*
926 * Only set hpet once, to avoid mixing hardware
927 * if the hpet becomes enabled later.
928 */
929 hpet = is_hpet_enabled();
930 schedule_delayed_work(&tsc_irqwork, HZ);
931 tsc_start = tsc_read_refs(&ref_start, hpet);
932 return;
933 }
934
935 tsc_stop = tsc_read_refs(&ref_stop, hpet);
936
937 /* hpet or pmtimer available ? */
938 if (!hpet && !ref_start && !ref_stop)
939 goto out;
940
941 /* Check, whether the sampling was disturbed by an SMI */
942 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943 goto out;
944
945 delta = tsc_stop - tsc_start;
946 delta *= 1000000LL;
947 if (hpet)
948 freq = calc_hpet_ref(delta, ref_start, ref_stop);
949 else
950 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951
952 /* Make sure we're within 1% */
953 if (abs(tsc_khz - freq) > tsc_khz/100)
954 goto out;
955
956 tsc_khz = freq;
957 printk(KERN_INFO "Refined TSC clocksource calibration: "
958 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959 (unsigned long)tsc_khz % 1000);
960
961out:
962 clocksource_register_khz(&clocksource_tsc, tsc_khz);
886} 963}
887 964
888static void __init init_tsc_clocksource(void) 965
966static int __init init_tsc_clocksource(void)
889{ 967{
968 if (!cpu_has_tsc || tsc_disabled > 0)
969 return 0;
970
890 if (tsc_clocksource_reliable) 971 if (tsc_clocksource_reliable)
891 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 972 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
892 /* lower the rating if we already know its unstable: */ 973 /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
894 clocksource_tsc.rating = 0; 975 clocksource_tsc.rating = 0;
895 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 976 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
896 } 977 }
897 clocksource_register_khz(&clocksource_tsc, tsc_khz); 978 schedule_delayed_work(&tsc_irqwork, 0);
979 return 0;
898} 980}
981/*
982 * We use device_initcall here, to ensure we run after the hpet
983 * is fully initialized, which may occur at fs_initcall time.
984 */
985device_initcall(init_tsc_clocksource);
899 986
900void __init tsc_init(void) 987void __init tsc_init(void)
901{ 988{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
949 mark_tsc_unstable("TSCs unsynchronized"); 1036 mark_tsc_unstable("TSCs unsynchronized");
950 1037
951 check_system_tsc_reliable(); 1038 check_system_tsc_reliable();
952 init_tsc_clocksource();
953} 1039}
954 1040
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S
index 56a8c2a867d9..0edefc19a113 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -7,6 +7,7 @@
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de) 7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) 8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) 9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
10 * 11 *
11 * This source code is licensed under the GNU General Public License, 12 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details. 13 * Version 2. See the file COPYING for more details.
@@ -14,18 +15,17 @@
14 * This is a common code for verification whether CPU supports 15 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this 16 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context. 17 * file is included at various places and compiled in that context.
17 * Following are the current usage. 18 * This file is expected to run in 32bit code. Currently:
18 * 19 *
19 * This file is included by both 16bit and 32bit code. 20 * arch/x86/boot/compressed/head_64.S: Boot cpu verification
21 * arch/x86/kernel/trampoline_64.S: secondary processor verfication
22 * arch/x86/kernel/head_32.S: processor startup
20 * 23 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) 24 * verify_cpu, returns the status of longmode and SSE in register %eax.
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure 25 * 0: Success 1: Failure
28 * 26 *
27 * On Intel, the XD_DISABLE flag will be cleared as a side-effect.
28 *
29 * The caller needs to check for the error code and take the action 29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt. 30 * appropriately. Either display a message or halt.
31 */ 31 */
@@ -62,8 +62,41 @@ verify_cpu:
62 cmpl $0x444d4163,%ecx 62 cmpl $0x444d4163,%ecx
63 jnz verify_cpu_noamd 63 jnz verify_cpu_noamd
64 mov $1,%di # cpu is from AMD 64 mov $1,%di # cpu is from AMD
65 jmp verify_cpu_check
65 66
66verify_cpu_noamd: 67verify_cpu_noamd:
68 cmpl $0x756e6547,%ebx # GenuineIntel?
69 jnz verify_cpu_check
70 cmpl $0x49656e69,%edx
71 jnz verify_cpu_check
72 cmpl $0x6c65746e,%ecx
73 jnz verify_cpu_check
74
75 # only call IA32_MISC_ENABLE when:
76 # family > 6 || (family == 6 && model >= 0xd)
77 movl $0x1, %eax # check CPU family and model
78 cpuid
79 movl %eax, %ecx
80
81 andl $0x0ff00f00, %eax # mask family and extended family
82 shrl $8, %eax
83 cmpl $6, %eax
84 ja verify_cpu_clear_xd # family > 6, ok
85 jb verify_cpu_check # family < 6, skip
86
87 andl $0x000f00f0, %ecx # mask model and extended model
88 shrl $4, %ecx
89 cmpl $0xd, %ecx
90 jb verify_cpu_check # family == 6, model < 0xd, skip
91
92verify_cpu_clear_xd:
93 movl $MSR_IA32_MISC_ENABLE, %ecx
94 rdmsr
95 btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
96 jnc verify_cpu_check # only write MSR if bit was changed
97 wrmsr
98
99verify_cpu_check:
67 movl $0x1,%eax # Does the cpu have what it takes 100 movl $0x1,%eax # Does the cpu have what it takes
68 cpuid 101 cpuid
69 andl $REQUIRED_MASK0,%edx 102 andl $REQUIRED_MASK0,%edx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e03530aebfd0..bf4700755184 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
69 69
70PHDRS { 70PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(7); /* RWE */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */ 74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
116 116
117 EXCEPTION_TABLE(16) :text = 0x9090 117 EXCEPTION_TABLE(16) :text = 0x9090
118 118
119#if defined(CONFIG_DEBUG_RODATA)
120 /* .text should occupy whole number of pages */
121 . = ALIGN(PAGE_SIZE);
122#endif
119 X64_ALIGN_DEBUG_RODATA_BEGIN 123 X64_ALIGN_DEBUG_RODATA_BEGIN
120 RO_DATA(PAGE_SIZE) 124 RO_DATA(PAGE_SIZE)
121 X64_ALIGN_DEBUG_RODATA_END 125 X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
335 __bss_start = .; 339 __bss_start = .;
336 *(.bss..page_aligned) 340 *(.bss..page_aligned)
337 *(.bss) 341 *(.bss)
338 . = ALIGN(4); 342 . = ALIGN(PAGE_SIZE);
339 __bss_stop = .; 343 __bss_stop = .;
340 } 344 }
341 345
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 9c253bd65e24..547128546cc3 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -394,7 +394,8 @@ static void __init setup_xstate_init(void)
394 * Setup init_xstate_buf to represent the init state of 394 * Setup init_xstate_buf to represent the init state of
395 * all the features managed by the xsave 395 * all the features managed by the xsave
396 */ 396 */
397 init_xstate_buf = alloc_bootmem(xstate_size); 397 init_xstate_buf = alloc_bootmem_align(xstate_size,
398 __alignof__(struct xsave_struct));
398 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; 399 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
399 400
400 clts(); 401 clts();
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index f628234fbeca..3cece05e4ac4 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -575,6 +575,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
575 s->pics[1].elcr_mask = 0xde; 575 s->pics[1].elcr_mask = 0xde;
576 s->pics[0].pics_state = s; 576 s->pics[0].pics_state = s;
577 s->pics[1].pics_state = s; 577 s->pics[1].pics_state = s;
578 s->pics[0].isr_ack = 0xff;
579 s->pics[1].isr_ack = 0xff;
578 580
579 /* 581 /*
580 * Initialize PIO device 582 * Initialize PIO device
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index fb8b376bf28c..fbb04aee8301 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2394,7 +2394,8 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
2394 ASSERT(!VALID_PAGE(root)); 2394 ASSERT(!VALID_PAGE(root));
2395 spin_lock(&vcpu->kvm->mmu_lock); 2395 spin_lock(&vcpu->kvm->mmu_lock);
2396 kvm_mmu_free_some_pages(vcpu); 2396 kvm_mmu_free_some_pages(vcpu);
2397 sp = kvm_mmu_get_page(vcpu, i << 30, i << 30, 2397 sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
2398 i << 30,
2398 PT32_ROOT_LEVEL, 1, ACC_ALL, 2399 PT32_ROOT_LEVEL, 1, ACC_ALL,
2399 NULL); 2400 NULL);
2400 root = __pa(sp->spt); 2401 root = __pa(sp->spt);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1ca12298ffc7..b81a9b7c2ca4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3494,6 +3494,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 3494static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3495{ 3495{
3496 switch (func) { 3496 switch (func) {
3497 case 0x00000001:
3498 /* Mask out xsave bit as long as it is not supported by SVM */
3499 entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
3500 break;
3497 case 0x80000001: 3501 case 0x80000001:
3498 if (nested) 3502 if (nested)
3499 entry->ecx |= (1 << 2); /* Set SVM bit */ 3503 entry->ecx |= (1 << 2); /* Set SVM bit */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ff21fdda0c53..81fcbe9515c5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4227,11 +4227,6 @@ static int vmx_get_lpage_level(void)
4227 return PT_PDPE_LEVEL; 4227 return PT_PDPE_LEVEL;
4228} 4228}
4229 4229
4230static inline u32 bit(int bitno)
4231{
4232 return 1 << (bitno & 31);
4233}
4234
4235static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 4230static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
4236{ 4231{
4237 struct kvm_cpuid_entry2 *best; 4232 struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cdac9e592aa5..b989e1f1e5d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -155,11 +155,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155 155
156u64 __read_mostly host_xcr0; 156u64 __read_mostly host_xcr0;
157 157
158static inline u32 bit(int bitno)
159{
160 return 1 << (bitno & 31);
161}
162
163static void kvm_on_user_return(struct user_return_notifier *urn) 158static void kvm_on_user_return(struct user_return_notifier *urn)
164{ 159{
165 unsigned slot; 160 unsigned slot;
@@ -4569,9 +4564,11 @@ static void kvm_timer_init(void)
4569#ifdef CONFIG_CPU_FREQ 4564#ifdef CONFIG_CPU_FREQ
4570 struct cpufreq_policy policy; 4565 struct cpufreq_policy policy;
4571 memset(&policy, 0, sizeof(policy)); 4566 memset(&policy, 0, sizeof(policy));
4572 cpufreq_get_policy(&policy, get_cpu()); 4567 cpu = get_cpu();
4568 cpufreq_get_policy(&policy, cpu);
4573 if (policy.cpuinfo.max_freq) 4569 if (policy.cpuinfo.max_freq)
4574 max_tsc_khz = policy.cpuinfo.max_freq; 4570 max_tsc_khz = policy.cpuinfo.max_freq;
4571 put_cpu();
4575#endif 4572#endif
4576 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, 4573 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
4577 CPUFREQ_TRANSITION_NOTIFIER); 4574 CPUFREQ_TRANSITION_NOTIFIER);
@@ -5522,6 +5519,8 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
5522 5519
5523 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 5520 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
5524 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5521 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
5522 if (sregs->cr4 & X86_CR4_OSXSAVE)
5523 update_cpuid(vcpu);
5525 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5524 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
5526 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3); 5525 load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
5527 mmu_reset_needed = 1; 5526 mmu_reset_needed = 1;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2cea414489f3..c600da830ce0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -70,6 +70,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 70 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
71} 71}
72 72
73static inline u32 bit(int bitno)
74{
75 return 1 << (bitno & 31);
76}
77
73void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 78void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
74void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 79void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
75int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq); 80int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 73b1e1a1f489..4996cf5f73a0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3)
531{ 531{
532 lguest_data.pgdir = cr3; 532 lguest_data.pgdir = cr3;
533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); 533 lazy_hcall1(LHCALL_NEW_PGTABLE, cr3);
534 cr3_changed = true; 534
535 /* These two page tables are simple, linear, and used during boot */
536 if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table))
537 cr3_changed = true;
535} 538}
536 539
537static unsigned long lguest_read_cr3(void) 540static unsigned long lguest_read_cr3(void)
@@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
703 * to forget all of them. Fortunately, this is very rare. 706 * to forget all of them. Fortunately, this is very rare.
704 * 707 *
705 * ... except in early boot when the kernel sets up the initial pagetables, 708 * ... except in early boot when the kernel sets up the initial pagetables,
706 * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell 709 * which makes booting astonishingly slow: 48 seconds! So we don't even tell
707 * the Host anything changed until we've done the first page table switch, 710 * the Host anything changed until we've done the first real page table switch,
708 * which brings boot back to 0.25 seconds. 711 * which brings boot back to 4.3 seconds.
709 */ 712 */
710static void lguest_set_pte(pte_t *ptep, pte_t pteval) 713static void lguest_set_pte(pte_t *ptep, pte_t pteval)
711{ 714{
@@ -1002,7 +1005,7 @@ static void lguest_time_init(void)
1002 clockevents_register_device(&lguest_clockevent); 1005 clockevents_register_device(&lguest_clockevent);
1003 1006
1004 /* Finally, we unblock the timer interrupt. */ 1007 /* Finally, we unblock the timer interrupt. */
1005 enable_lguest_irq(0); 1008 clear_bit(0, lguest_data.blocked_interrupts);
1006} 1009}
1007 1010
1008/* 1011/*
@@ -1349,9 +1352,6 @@ __init void lguest_init(void)
1349 */ 1352 */
1350 switch_to_new_gdt(0); 1353 switch_to_new_gdt(0);
1351 1354
1352 /* We actually boot with all memory mapped, but let's say 128MB. */
1353 max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
1354
1355 /* 1355 /*
1356 * The Host<->Guest Switcher lives at the top of our address space, and 1356 * The Host<->Guest Switcher lives at the top of our address space, and
1357 * the Host told us how big it is when we made LGUEST_INIT hypercall: 1357 * the Host told us how big it is when we made LGUEST_INIT hypercall:
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 4f420c2f2d55..e7d5382ef263 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,6 +4,7 @@
4#include <asm/asm-offsets.h> 4#include <asm/asm-offsets.h>
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/pgtable.h>
7 8
8/*G:020 9/*G:020
9 * Our story starts with the kernel booting into startup_32 in 10 * Our story starts with the kernel booting into startup_32 in
@@ -37,9 +38,113 @@ ENTRY(lguest_entry)
37 /* Set up the initial stack so we can run C code. */ 38 /* Set up the initial stack so we can run C code. */
38 movl $(init_thread_union+THREAD_SIZE),%esp 39 movl $(init_thread_union+THREAD_SIZE),%esp
39 40
41 call init_pagetables
42
40 /* Jumps are relative: we're running __PAGE_OFFSET too low. */ 43 /* Jumps are relative: we're running __PAGE_OFFSET too low. */
41 jmp lguest_init+__PAGE_OFFSET 44 jmp lguest_init+__PAGE_OFFSET
42 45
46/*
47 * Initialize page tables. This creates a PDE and a set of page
48 * tables, which are located immediately beyond __brk_base. The variable
49 * _brk_end is set up to point to the first "safe" location.
50 * Mappings are created both at virtual address 0 (identity mapping)
51 * and PAGE_OFFSET for up to _end.
52 *
53 * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
54 * don't have a stack at this point, so we can't just use call and ret.
55 */
56init_pagetables:
57#if PTRS_PER_PMD > 1
58#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
59#else
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif
62#define pa(X) ((X) - __PAGE_OFFSET)
63
64/* Enough space to fit pagetables for the low memory linear map */
65MAPPING_BEYOND_END = \
66 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
67#ifdef CONFIG_X86_PAE
68
69 /*
70 * In PAE mode initial_page_table is statically defined to contain
71 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
72 * entries). The identity mapping is handled by pointing two PGD entries
73 * to the first kernel PMD.
74 *
75 * Note the upper half of each PMD or PTE are always zero at this stage.
76 */
77
78#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
79
80 xorl %ebx,%ebx /* %ebx is kept at zero */
81
82 movl $pa(__brk_base), %edi
83 movl $pa(initial_pg_pmd), %edx
84 movl $PTE_IDENT_ATTR, %eax
8510:
86 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
87 movl %ecx,(%edx) /* Store PMD entry */
88 /* Upper half already zero */
89 addl $8,%edx
90 movl $512,%ecx
9111:
92 stosl
93 xchgl %eax,%ebx
94 stosl
95 xchgl %eax,%ebx
96 addl $0x1000,%eax
97 loop 11b
98
99 /*
100 * End condition: we must map up to the end + MAPPING_BEYOND_END.
101 */
102 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
103 cmpl %ebp,%eax
104 jb 10b
1051:
106 addl $__PAGE_OFFSET, %edi
107 movl %edi, pa(_brk_end)
108 shrl $12, %eax
109 movl %eax, pa(max_pfn_mapped)
110
111 /* Do early initialization of the fixmap area */
112 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
113 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
114#else /* Not PAE */
115
116page_pde_offset = (__PAGE_OFFSET >> 20);
117
118 movl $pa(__brk_base), %edi
119 movl $pa(initial_page_table), %edx
120 movl $PTE_IDENT_ATTR, %eax
12110:
122 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
123 movl %ecx,(%edx) /* Store identity PDE entry */
124 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
125 addl $4,%edx
126 movl $1024, %ecx
12711:
128 stosl
129 addl $0x1000,%eax
130 loop 11b
131 /*
132 * End condition: we must map up to the end + MAPPING_BEYOND_END.
133 */
134 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
135 cmpl %ebp,%eax
136 jb 10b
137 addl $__PAGE_OFFSET, %edi
138 movl %edi, pa(_brk_end)
139 shrl $12, %eax
140 movl %eax, pa(max_pfn_mapped)
141
142 /* Do early initialization of the fixmap area */
143 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
144 movl %eax,pa(initial_page_table+0xffc)
145#endif
146 ret
147
43/*G:055 148/*G:055
44 * We create a macro which puts the assembler code between lgstart_ and lgend_ 149 * We create a macro which puts the assembler code between lgstart_ and lgend_
45 * markers. These templates are put in the .text section: they can't be 150 * markers. These templates are put in the .text section: they can't be
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 55543397a8a7..09df2f9a3d69 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
24 24
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_K8_NUMA) += k8topology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28 28
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/amdtopology_64.c
index 804a3b6c6e14..51fae9cfdecb 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * AMD K8 NUMA support. 2 * AMD NUMA support.
3 * Discover the memory map and associated nodes. 3 * Discover the memory map and associated nodes.
4 * 4 *
5 * This version reads it directly from the K8 northbridge. 5 * This version reads it directly from the AMD northbridge.
6 * 6 *
7 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 7 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
8 */ 8 */
@@ -57,7 +57,7 @@ static __init void early_get_boot_cpu_id(void)
57{ 57{
58 /* 58 /*
59 * need to get the APIC ID of the BSP so can use that to 59 * need to get the APIC ID of the BSP so can use that to
60 * create apicid_to_node in k8_scan_nodes() 60 * create apicid_to_node in amd_scan_nodes()
61 */ 61 */
62#ifdef CONFIG_X86_MPPARSE 62#ifdef CONFIG_X86_MPPARSE
63 /* 63 /*
@@ -69,7 +69,7 @@ static __init void early_get_boot_cpu_id(void)
69 early_init_lapic_mapping(); 69 early_init_lapic_mapping();
70} 70}
71 71
72int __init k8_get_nodes(struct bootnode *physnodes) 72int __init amd_get_nodes(struct bootnode *physnodes)
73{ 73{
74 int i; 74 int i;
75 int ret = 0; 75 int ret = 0;
@@ -82,7 +82,7 @@ int __init k8_get_nodes(struct bootnode *physnodes)
82 return ret; 82 return ret;
83} 83}
84 84
85int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn) 85int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
86{ 86{
87 unsigned long start = PFN_PHYS(start_pfn); 87 unsigned long start = PFN_PHYS(start_pfn);
88 unsigned long end = PFN_PHYS(end_pfn); 88 unsigned long end = PFN_PHYS(end_pfn);
@@ -194,7 +194,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
194 return 0; 194 return 0;
195} 195}
196 196
197int __init k8_scan_nodes(void) 197int __init amd_scan_nodes(void)
198{ 198{
199 unsigned int bits; 199 unsigned int bits;
200 unsigned int cores; 200 unsigned int cores;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c0e28a13de7d..947f42abe820 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
364 /* 364 /*
365 * We just marked the kernel text read only above, now that 365 * We just marked the kernel text read only above, now that
366 * we are going to free part of that, we need to make that 366 * we are going to free part of that, we need to make that
367 * writeable first. 367 * writeable and non-executable first.
368 */ 368 */
369 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
369 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 370 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
370 371
371 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 372 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e969f9f401b..f89b5bb4e93f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
226 226
227static inline int is_kernel_text(unsigned long addr) 227static inline int is_kernel_text(unsigned long addr)
228{ 228{
229 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 229 if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
230 return 1; 230 return 1;
231 return 0; 231 return 0;
232} 232}
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
913} 913}
914 914
915static void mark_nxdata_nx(void)
916{
917 /*
918 * When this called, init has already been executed and released,
919 * so everything past _etext sould be NX.
920 */
921 unsigned long start = PFN_ALIGN(_etext);
922 /*
923 * This comes from is_kernel_text upper limit. Also HPAGE where used:
924 */
925 unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
926
927 if (__supported_pte_mask & _PAGE_NX)
928 printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
929 set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
930}
931
915void mark_rodata_ro(void) 932void mark_rodata_ro(void)
916{ 933{
917 unsigned long start = PFN_ALIGN(_text); 934 unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
946 printk(KERN_INFO "Testing CPA: write protecting again\n"); 963 printk(KERN_INFO "Testing CPA: write protecting again\n");
947 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 964 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
948#endif 965#endif
966 mark_nxdata_nx();
949} 967}
950#endif 968#endif
951 969
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7ffc9b727efd..7762a517d69d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -264,7 +264,7 @@ static struct bootnode physnodes[MAX_NUMNODES] __initdata;
264static char *cmdline __initdata; 264static char *cmdline __initdata;
265 265
266static int __init setup_physnodes(unsigned long start, unsigned long end, 266static int __init setup_physnodes(unsigned long start, unsigned long end,
267 int acpi, int k8) 267 int acpi, int amd)
268{ 268{
269 int nr_nodes = 0; 269 int nr_nodes = 0;
270 int ret = 0; 270 int ret = 0;
@@ -274,13 +274,13 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
274 if (acpi) 274 if (acpi)
275 nr_nodes = acpi_get_nodes(physnodes); 275 nr_nodes = acpi_get_nodes(physnodes);
276#endif 276#endif
277#ifdef CONFIG_K8_NUMA 277#ifdef CONFIG_AMD_NUMA
278 if (k8) 278 if (amd)
279 nr_nodes = k8_get_nodes(physnodes); 279 nr_nodes = amd_get_nodes(physnodes);
280#endif 280#endif
281 /* 281 /*
282 * Basic sanity checking on the physical node map: there may be errors 282 * Basic sanity checking on the physical node map: there may be errors
283 * if the SRAT or K8 incorrectly reported the topology or the mem= 283 * if the SRAT or AMD code incorrectly reported the topology or the mem=
284 * kernel parameter is used. 284 * kernel parameter is used.
285 */ 285 */
286 for (i = 0; i < nr_nodes; i++) { 286 for (i = 0; i < nr_nodes; i++) {
@@ -549,7 +549,7 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
549 * numa=fake command-line option. 549 * numa=fake command-line option.
550 */ 550 */
551static int __init numa_emulation(unsigned long start_pfn, 551static int __init numa_emulation(unsigned long start_pfn,
552 unsigned long last_pfn, int acpi, int k8) 552 unsigned long last_pfn, int acpi, int amd)
553{ 553{
554 u64 addr = start_pfn << PAGE_SHIFT; 554 u64 addr = start_pfn << PAGE_SHIFT;
555 u64 max_addr = last_pfn << PAGE_SHIFT; 555 u64 max_addr = last_pfn << PAGE_SHIFT;
@@ -557,7 +557,7 @@ static int __init numa_emulation(unsigned long start_pfn,
557 int num_nodes; 557 int num_nodes;
558 int i; 558 int i;
559 559
560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); 560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, amd);
561 /* 561 /*
562 * If the numa=fake command-line contains a 'M' or 'G', it represents 562 * If the numa=fake command-line contains a 'M' or 'G', it represents
563 * the fixed node size. Otherwise, if it is just a single number N, 563 * the fixed node size. Otherwise, if it is just a single number N,
@@ -602,7 +602,7 @@ static int __init numa_emulation(unsigned long start_pfn,
602#endif /* CONFIG_NUMA_EMU */ 602#endif /* CONFIG_NUMA_EMU */
603 603
604void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, 604void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
605 int acpi, int k8) 605 int acpi, int amd)
606{ 606{
607 int i; 607 int i;
608 608
@@ -610,7 +610,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
610 nodes_clear(node_online_map); 610 nodes_clear(node_online_map);
611 611
612#ifdef CONFIG_NUMA_EMU 612#ifdef CONFIG_NUMA_EMU
613 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) 613 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
614 return; 614 return;
615 nodes_clear(node_possible_map); 615 nodes_clear(node_possible_map);
616 nodes_clear(node_online_map); 616 nodes_clear(node_online_map);
@@ -624,8 +624,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
624 nodes_clear(node_online_map); 624 nodes_clear(node_online_map);
625#endif 625#endif
626 626
627#ifdef CONFIG_K8_NUMA 627#ifdef CONFIG_AMD_NUMA
628 if (!numa_off && k8 && !k8_scan_nodes()) 628 if (!numa_off && amd && !amd_scan_nodes())
629 return; 629 return;
630 nodes_clear(node_possible_map); 630 nodes_clear(node_possible_map);
631 nodes_clear(node_online_map); 631 nodes_clear(node_online_map);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 532e7933d606..8b830ca14ac4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -13,6 +13,7 @@
13#include <linux/pfn.h> 13#include <linux/pfn.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/pci.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
255 unsigned long pfn) 256 unsigned long pfn)
256{ 257{
257 pgprot_t forbidden = __pgprot(0); 258 pgprot_t forbidden = __pgprot(0);
259 pgprot_t required = __pgprot(0);
258 260
259 /* 261 /*
260 * The BIOS area between 640k and 1Mb needs to be executable for 262 * The BIOS area between 640k and 1Mb needs to be executable for
261 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 263 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
262 */ 264 */
263 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 265#ifdef CONFIG_PCI_BIOS
266 if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
264 pgprot_val(forbidden) |= _PAGE_NX; 267 pgprot_val(forbidden) |= _PAGE_NX;
268#endif
265 269
266 /* 270 /*
267 * The kernel text needs to be executable for obvious reasons 271 * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
278 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 282 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 283 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
280 pgprot_val(forbidden) |= _PAGE_RW; 284 pgprot_val(forbidden) |= _PAGE_RW;
285 /*
286 * .data and .bss should always be writable.
287 */
288 if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
289 within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
290 pgprot_val(required) |= _PAGE_RW;
281 291
282#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 292#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
283 /* 293 /*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
317#endif 327#endif
318 328
319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 329 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
330 prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
320 331
321 return prot; 332 return prot;
322} 333}
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
393{ 404{
394 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 405 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
395 pte_t new_pte, old_pte, *tmp; 406 pte_t new_pte, old_pte, *tmp;
396 pgprot_t old_prot, new_prot; 407 pgprot_t old_prot, new_prot, req_prot;
397 int i, do_split = 1; 408 int i, do_split = 1;
398 unsigned int level; 409 unsigned int level;
399 410
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
438 * We are safe now. Check whether the new pgprot is the same: 449 * We are safe now. Check whether the new pgprot is the same:
439 */ 450 */
440 old_pte = *kpte; 451 old_pte = *kpte;
441 old_prot = new_prot = pte_pgprot(old_pte); 452 old_prot = new_prot = req_prot = pte_pgprot(old_pte);
442 453
443 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 454 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
444 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 455 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
445 456
446 /* 457 /*
447 * old_pte points to the large page base address. So we need 458 * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
450 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); 461 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
451 cpa->pfn = pfn; 462 cpa->pfn = pfn;
452 463
453 new_prot = static_protections(new_prot, address, pfn); 464 new_prot = static_protections(req_prot, address, pfn);
454 465
455 /* 466 /*
456 * We need to check the full range, whether 467 * We need to check the full range, whether
457 * static_protection() requires a different pgprot for one of 468 * static_protection() requires a different pgprot for one of
458 * the pages in the range we try to preserve: 469 * the pages in the range we try to preserve:
459 */ 470 */
460 addr = address + PAGE_SIZE; 471 addr = address & pmask;
461 pfn++; 472 pfn = pte_pfn(old_pte);
462 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 473 for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
463 pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 474 pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
464 475
465 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 476 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
466 goto out_unlock; 477 goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
483 * that we limited the number of possible pages already to 494 * that we limited the number of possible pages already to
484 * the number of pages in the large page. 495 * the number of pages in the large page.
485 */ 496 */
486 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 497 if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
487 /* 498 /*
488 * The address is aligned and the number of pages 499 * The address is aligned and the number of pages
489 * covers the full page. 500 * covers the full page.
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index a3250aa34086..410531d3c292 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
41{ 41{
42 if (!cpu_has_nx) { 42 if (!cpu_has_nx) {
43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
44 "missing in CPU or disabled in BIOS!\n"); 44 "missing in CPU!\n");
45 } else { 45 } else {
46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
47 if (disable_nx) { 47 if (disable_nx) {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd136c1..f16434568a51 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
92 /* mark this node as "seen" in node bitmap */ 92 /* mark this node as "seen" in node bitmap */
93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); 93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
94 94
95 /* don't need to check apic_id here, because it is always 8 bits */
95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; 96 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
96 97
97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", 98 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d8b060..171a0aacb99a 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
134 } 134 }
135 135
136 apic_id = pa->apic_id; 136 apic_id = pa->apic_id;
137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
137 apicid_to_node[apic_id] = node; 141 apicid_to_node[apic_id] = node;
138 node_set(node, cpu_nodes_parsed); 142 node_set(node, cpu_nodes_parsed);
139 acpi_numa = 1; 143 acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; 172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else 173 else
170 apic_id = pa->apic_id; 174 apic_id = pa->apic_id;
175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
171 apicid_to_node[apic_id] = node; 181 apicid_to_node[apic_id] = node;
172 node_set(node, cpu_nodes_parsed); 182 node_set(node, cpu_nodes_parsed);
173 acpi_numa = 1; 183 acpi_numa = 1;
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
126 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
127 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
128 if (depth) 128 if (depth)
129 dump_trace(NULL, regs, (unsigned long *)stack, 0, 129 dump_trace(NULL, regs, (unsigned long *)stack,
130 &backtrace_ops, &depth); 130 &backtrace_ops, &depth);
131 return; 131 return;
132 } 132 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 4e8baad36d37..358c8b9c96a7 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -732,6 +732,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
732 case 0x14: 732 case 0x14:
733 cpu_type = "x86-64/family14h"; 733 cpu_type = "x86-64/family14h";
734 break; 734 break;
735 case 0x15:
736 cpu_type = "x86-64/family15h";
737 break;
735 default: 738 default:
736 return -ENODEV; 739 return -ENODEV;
737 } 740 }
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..0636dd93cef8 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -58,9 +58,6 @@ static void timer_stop(void)
58 58
59int __init op_nmi_timer_init(struct oprofile_operations *ops) 59int __init op_nmi_timer_init(struct oprofile_operations *ops)
60{ 60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start; 61 ops->start = timer_start;
65 ops->stop = timer_stop; 62 ops->stop = timer_stop;
66 ops->cpu_type = "timer"; 63 ops->cpu_type = "timer";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index a011bcc0f943..c3b8e24f2b16 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,11 +29,12 @@
29#include "op_x86_model.h" 29#include "op_x86_model.h"
30#include "op_counter.h" 30#include "op_counter.h"
31 31
32#define NUM_COUNTERS 4 32#define NUM_COUNTERS 4
33#define NUM_COUNTERS_F15H 6
33#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 34#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
34#define NUM_VIRT_COUNTERS 32 35#define NUM_VIRT_COUNTERS 32
35#else 36#else
36#define NUM_VIRT_COUNTERS NUM_COUNTERS 37#define NUM_VIRT_COUNTERS 0
37#endif 38#endif
38 39
39#define OP_EVENT_MASK 0x0FFF 40#define OP_EVENT_MASK 0x0FFF
@@ -41,7 +42,8 @@
41 42
42#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) 43#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
43 44
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 45static int num_counters;
46static unsigned long reset_value[OP_MAX_COUNTER];
45 47
46#define IBS_FETCH_SIZE 6 48#define IBS_FETCH_SIZE 6
47#define IBS_OP_SIZE 12 49#define IBS_OP_SIZE 12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
387 int i; 389 int i;
388 390
389 /* enable active counters */ 391 /* enable active counters */
390 for (i = 0; i < NUM_COUNTERS; ++i) { 392 for (i = 0; i < num_counters; ++i) {
391 int virt = op_x86_phys_to_virt(i); 393 int virt = op_x86_phys_to_virt(i);
392 if (!reset_value[virt]) 394 if (!reset_value[virt])
393 continue; 395 continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
406{ 408{
407 int i; 409 int i;
408 410
409 for (i = 0; i < NUM_COUNTERS; ++i) { 411 for (i = 0; i < num_counters; ++i) {
410 if (!msrs->counters[i].addr) 412 if (!msrs->counters[i].addr)
411 continue; 413 continue;
412 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 414 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
418{ 420{
419 int i; 421 int i;
420 422
421 for (i = 0; i < NUM_COUNTERS; i++) { 423 for (i = 0; i < num_counters; i++) {
422 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 424 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
423 goto fail; 425 goto fail;
424 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { 426 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
426 goto fail; 428 goto fail;
427 } 429 }
428 /* both registers must be reserved */ 430 /* both registers must be reserved */
429 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 431 if (num_counters == NUM_COUNTERS_F15H) {
430 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 432 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
433 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
434 } else {
435 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
436 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
437 }
431 continue; 438 continue;
432 fail: 439 fail:
433 if (!counter_config[i].enabled) 440 if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
447 int i; 454 int i;
448 455
449 /* setup reset_value */ 456 /* setup reset_value */
450 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 457 for (i = 0; i < OP_MAX_COUNTER; ++i) {
451 if (counter_config[i].enabled 458 if (counter_config[i].enabled
452 && msrs->counters[op_x86_virt_to_phys(i)].addr) 459 && msrs->counters[op_x86_virt_to_phys(i)].addr)
453 reset_value[i] = counter_config[i].count; 460 reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
456 } 463 }
457 464
458 /* clear all counters */ 465 /* clear all counters */
459 for (i = 0; i < NUM_COUNTERS; ++i) { 466 for (i = 0; i < num_counters; ++i) {
460 if (!msrs->controls[i].addr) 467 if (!msrs->controls[i].addr)
461 continue; 468 continue;
462 rdmsrl(msrs->controls[i].addr, val); 469 rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
472 } 479 }
473 480
474 /* enable active counters */ 481 /* enable active counters */
475 for (i = 0; i < NUM_COUNTERS; ++i) { 482 for (i = 0; i < num_counters; ++i) {
476 int virt = op_x86_phys_to_virt(i); 483 int virt = op_x86_phys_to_virt(i);
477 if (!reset_value[virt]) 484 if (!reset_value[virt])
478 continue; 485 continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
503 u64 val; 510 u64 val;
504 int i; 511 int i;
505 512
506 for (i = 0; i < NUM_COUNTERS; ++i) { 513 for (i = 0; i < num_counters; ++i) {
507 int virt = op_x86_phys_to_virt(i); 514 int virt = op_x86_phys_to_virt(i);
508 if (!reset_value[virt]) 515 if (!reset_value[virt])
509 continue; 516 continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
526 u64 val; 533 u64 val;
527 int i; 534 int i;
528 535
529 for (i = 0; i < NUM_COUNTERS; ++i) { 536 for (i = 0; i < num_counters; ++i) {
530 if (!reset_value[op_x86_phys_to_virt(i)]) 537 if (!reset_value[op_x86_phys_to_virt(i)])
531 continue; 538 continue;
532 rdmsrl(msrs->controls[i].addr, val); 539 rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
546 * Subtle: stop on all counters to avoid race with setting our 553 * Subtle: stop on all counters to avoid race with setting our
547 * pm callback 554 * pm callback
548 */ 555 */
549 for (i = 0; i < NUM_COUNTERS; ++i) { 556 for (i = 0; i < num_counters; ++i) {
550 if (!reset_value[op_x86_phys_to_virt(i)]) 557 if (!reset_value[op_x86_phys_to_virt(i)])
551 continue; 558 continue;
552 rdmsrl(msrs->controls[i].addr, val); 559 rdmsrl(msrs->controls[i].addr, val);
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
603 ret = setup_ibs_ctl(i); 610 ret = setup_ibs_ctl(i);
604 if (ret) 611 if (ret)
605 return ret; 612 return ret;
613 pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
606 return 0; 614 return 0;
607 } 615 }
608 616
@@ -630,21 +638,29 @@ static int __init_ibs_nmi(void)
630 return 0; 638 return 0;
631} 639}
632 640
633/* initialize the APIC for the IBS interrupts if available */ 641/*
642 * check and reserve APIC extended interrupt LVT offset for IBS if
643 * available
644 *
645 * init_ibs() preforms implicitly cpu-local operations, so pin this
646 * thread to its current CPU
647 */
648
634static void init_ibs(void) 649static void init_ibs(void)
635{ 650{
636 ibs_caps = get_ibs_caps(); 651 preempt_disable();
637 652
653 ibs_caps = get_ibs_caps();
638 if (!ibs_caps) 654 if (!ibs_caps)
639 return; 655 goto out;
640 656
641 if (__init_ibs_nmi()) { 657 if (__init_ibs_nmi() < 0)
642 ibs_caps = 0; 658 ibs_caps = 0;
643 return; 659 else
644 } 660 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
645 661
646 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", 662out:
647 (unsigned)ibs_caps); 663 preempt_enable();
648} 664}
649 665
650static int (*create_arch_files)(struct super_block *sb, struct dentry *root); 666static int (*create_arch_files)(struct super_block *sb, struct dentry *root);
@@ -698,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
698 return 0; 714 return 0;
699} 715}
700 716
717struct op_x86_model_spec op_amd_spec;
718
701static int op_amd_init(struct oprofile_operations *ops) 719static int op_amd_init(struct oprofile_operations *ops)
702{ 720{
703 init_ibs(); 721 init_ibs();
704 create_arch_files = ops->create_files; 722 create_arch_files = ops->create_files;
705 ops->create_files = setup_ibs_files; 723 ops->create_files = setup_ibs_files;
724
725 if (boot_cpu_data.x86 == 0x15) {
726 num_counters = NUM_COUNTERS_F15H;
727 } else {
728 num_counters = NUM_COUNTERS;
729 }
730
731 op_amd_spec.num_counters = num_counters;
732 op_amd_spec.num_controls = num_counters;
733 op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
734
706 return 0; 735 return 0;
707} 736}
708 737
709struct op_x86_model_spec op_amd_spec = { 738struct op_x86_model_spec op_amd_spec = {
710 .num_counters = NUM_COUNTERS, 739 /* num_counters/num_controls filled in at runtime */
711 .num_controls = NUM_COUNTERS,
712 .num_virt_counters = NUM_VIRT_COUNTERS,
713 .reserved = MSR_AMD_EVENTSEL_RESERVED, 740 .reserved = MSR_AMD_EVENTSEL_RESERVED,
714 .event_mask = OP_EVENT_MASK, 741 .event_mask = OP_EVENT_MASK,
715 .init = op_amd_init, 742 .init = op_amd_init,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558dd5515..9fadec074142 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h> 13#include <linux/ptrace.h>
14#include <linux/nmi.h> 14#include <asm/nmi.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/apic.h> 17#include <asm/apic.h>
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index effd96e33f16..6b8759f7634e 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o
7obj-$(CONFIG_PCI_XEN) += xen.o 7obj-$(CONFIG_PCI_XEN) += xen.o
8 8
9obj-y += fixup.o 9obj-y += fixup.o
10obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
10obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
11obj-y += legacy.o irq.o 12obj-y += legacy.o irq.o
12 13
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
new file mode 100644
index 000000000000..85b68ef5e809
--- /dev/null
+++ b/arch/x86/pci/ce4100.c
@@ -0,0 +1,315 @@
1/*
2 * GPL LICENSE SUMMARY
3 *
4 * Copyright(c) 2010 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 * The full GNU General Public License is included in this distribution
19 * in the file called LICENSE.GPL.
20 *
21 * Contact Information:
22 * Intel Corporation
23 * 2200 Mission College Blvd.
24 * Santa Clara, CA 97052
25 *
26 * This provides access methods for PCI registers that mis-behave on
27 * the CE4100. Each register can be assigned a private init, read and
28 * write routine. The exception to this is the bridge device. The
29 * bridge device is the only device on bus zero (0) that requires any
30 * fixup so it is a special case ATM
31 */
32
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/init.h>
36
37#include <asm/pci_x86.h>
38
39struct sim_reg {
40 u32 value;
41 u32 mask;
42};
43
44struct sim_dev_reg {
45 int dev_func;
46 int reg;
47 void (*init)(struct sim_dev_reg *reg);
48 void (*read)(struct sim_dev_reg *reg, u32 *value);
49 void (*write)(struct sim_dev_reg *reg, u32 value);
50 struct sim_reg sim_reg;
51};
52
53struct sim_reg_op {
54 void (*init)(struct sim_dev_reg *reg);
55 void (*read)(struct sim_dev_reg *reg, u32 value);
56 void (*write)(struct sim_dev_reg *reg, u32 value);
57};
58
59#define MB (1024 * 1024)
60#define KB (1024)
61#define SIZE_TO_MASK(size) (~(size - 1))
62
63#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
64{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
65 {0, SIZE_TO_MASK(size)} },
66
67static void reg_init(struct sim_dev_reg *reg)
68{
69 pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
70 &reg->sim_reg.value);
71}
72
73static void reg_read(struct sim_dev_reg *reg, u32 *value)
74{
75 unsigned long flags;
76
77 raw_spin_lock_irqsave(&pci_config_lock, flags);
78 *value = reg->sim_reg.value;
79 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
80}
81
82static void reg_write(struct sim_dev_reg *reg, u32 value)
83{
84 unsigned long flags;
85
86 raw_spin_lock_irqsave(&pci_config_lock, flags);
87 reg->sim_reg.value = (value & reg->sim_reg.mask) |
88 (reg->sim_reg.value & ~reg->sim_reg.mask);
89 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
90}
91
92static void sata_reg_init(struct sim_dev_reg *reg)
93{
94 pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
95 &reg->sim_reg.value);
96 reg->sim_reg.value += 0x400;
97}
98
99static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
100{
101 reg_read(reg, value);
102 if (*value != reg->sim_reg.mask)
103 *value |= 0x100;
104}
105
106void sata_revid_init(struct sim_dev_reg *reg)
107{
108 reg->sim_reg.value = 0x01060100;
109 reg->sim_reg.mask = 0;
110}
111
112static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
113{
114 reg_read(reg, value);
115}
116
117static struct sim_dev_reg bus1_fixups[] = {
118 DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
119 DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
120 DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
121 DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
122 DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
123 DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
124 DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
125 DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
126 DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
127 DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
128 DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
129 DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
130 DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
131 DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
132 DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
133 DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
134 DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
135 DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
136 DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
137 DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
138 DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
139 DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
140 DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
141 DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
142 DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
143 DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
144 DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
145 DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
146 DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
147 DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
148 DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
149 DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
150 DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
151 DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0)
152 DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
153 DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
154 DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
155 DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
156 DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
157 DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
158 DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
159 DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
160 DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
161 DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
162 DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
163 DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
164 DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
165};
166
167static void __init init_sim_regs(void)
168{
169 int i;
170
171 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
172 if (bus1_fixups[i].init)
173 bus1_fixups[i].init(&bus1_fixups[i]);
174 }
175}
176
177static inline void extract_bytes(u32 *value, int reg, int len)
178{
179 uint32_t mask;
180
181 *value >>= ((reg & 3) * 8);
182 mask = 0xFFFFFFFF >> ((4 - len) * 8);
183 *value &= mask;
184}
185
186int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
187{
188 u32 av_bridge_base, av_bridge_limit;
189 int retval = 0;
190
191 switch (reg) {
192 /* Make BARs appear to not request any memory. */
193 case PCI_BASE_ADDRESS_0:
194 case PCI_BASE_ADDRESS_0 + 1:
195 case PCI_BASE_ADDRESS_0 + 2:
196 case PCI_BASE_ADDRESS_0 + 3:
197 *value = 0;
198 break;
199
200 /* Since subordinate bus number register is hardwired
201 * to zero and read only, so do the simulation.
202 */
203 case PCI_PRIMARY_BUS:
204 if (len == 4)
205 *value = 0x00010100;
206 break;
207
208 case PCI_SUBORDINATE_BUS:
209 *value = 1;
210 break;
211
212 case PCI_MEMORY_BASE:
213 case PCI_MEMORY_LIMIT:
214 /* Get the A/V bridge base address. */
215 pci_direct_conf1.read(0, 0, devfn,
216 PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
217
218 av_bridge_limit = av_bridge_base + (512*MB - 1);
219 av_bridge_limit >>= 16;
220 av_bridge_limit &= 0xFFF0;
221
222 av_bridge_base >>= 16;
223 av_bridge_base &= 0xFFF0;
224
225 if (reg == PCI_MEMORY_LIMIT)
226 *value = av_bridge_limit;
227 else if (len == 2)
228 *value = av_bridge_base;
229 else
230 *value = (av_bridge_limit << 16) | av_bridge_base;
231 break;
232 /* Make prefetchable memory limit smaller than prefetchable
233 * memory base, so not claim prefetchable memory space.
234 */
235 case PCI_PREF_MEMORY_BASE:
236 *value = 0xFFF0;
237 break;
238 case PCI_PREF_MEMORY_LIMIT:
239 *value = 0x0;
240 break;
241 /* Make IO limit smaller than IO base, so not claim IO space. */
242 case PCI_IO_BASE:
243 *value = 0xF0;
244 break;
245 case PCI_IO_LIMIT:
246 *value = 0;
247 break;
248 default:
249 retval = 1;
250 }
251 return retval;
252}
253
254static int ce4100_conf_read(unsigned int seg, unsigned int bus,
255 unsigned int devfn, int reg, int len, u32 *value)
256{
257 int i, retval = 1;
258
259 if (bus == 1) {
260 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
261 if (bus1_fixups[i].dev_func == devfn &&
262 bus1_fixups[i].reg == (reg & ~3) &&
263 bus1_fixups[i].read) {
264 bus1_fixups[i].read(&(bus1_fixups[i]),
265 value);
266 extract_bytes(value, reg, len);
267 return 0;
268 }
269 }
270 }
271
272 if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
273 !bridge_read(devfn, reg, len, value))
274 return 0;
275
276 return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
277}
278
279static int ce4100_conf_write(unsigned int seg, unsigned int bus,
280 unsigned int devfn, int reg, int len, u32 value)
281{
282 int i;
283
284 if (bus == 1) {
285 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
286 if (bus1_fixups[i].dev_func == devfn &&
287 bus1_fixups[i].reg == (reg & ~3) &&
288 bus1_fixups[i].write) {
289 bus1_fixups[i].write(&(bus1_fixups[i]),
290 value);
291 return 0;
292 }
293 }
294 }
295
296 /* Discard writes to A/V bridge BAR. */
297 if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
298 ((reg & ~3) == PCI_BASE_ADDRESS_0))
299 return 0;
300
301 return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
302}
303
304struct pci_raw_ops ce4100_pci_conf = {
305 .read = ce4100_conf_read,
306 .write = ce4100_conf_write,
307};
308
309static int __init ce4100_pci_init(void)
310{
311 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf;
313 return 0;
314}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index c4bb261c106e..b1805b78842f 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -65,21 +65,13 @@ pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = round_down(res->end - size + 1, align); 68 resource_size_t start = res->start;
69 69
70 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
71 71 if (skip_isa_ioresource_align(dev))
72 /* 72 return start;
73 * If we're avoiding ISA aliases, the largest contiguous I/O 73 if (start & 0x300)
74 * port space is 256 bytes. Clearing bits 9 and 10 preserves 74 start = (start + 0x3ff) & ~0x3ff;
75 * all 256-byte and smaller alignments, so the result will
76 * still be correctly aligned.
77 */
78 if (!skip_isa_ioresource_align(dev))
79 start &= ~0x300;
80 } else if (res->flags & IORESOURCE_MEM) {
81 if (start < BIOS_END)
82 start = res->end; /* fail; no space */
83 } 75 }
84 return start; 76 return start;
85} 77}
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 2492d165096a..a5f7d0d63de0 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -9,6 +9,7 @@
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11#include <asm/pci-functions.h> 11#include <asm/pci-functions.h>
12#include <asm/cacheflush.h>
12 13
13/* BIOS32 signature: "_32_" */ 14/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 15#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
@@ -25,6 +26,27 @@
25#define PCIBIOS_HW_TYPE1_SPEC 0x10 26#define PCIBIOS_HW_TYPE1_SPEC 0x10
26#define PCIBIOS_HW_TYPE2_SPEC 0x20 27#define PCIBIOS_HW_TYPE2_SPEC 0x20
27 28
29int pcibios_enabled;
30
31/* According to the BIOS specification at:
32 * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
33 * restrict the x zone to some pages and make it ro. But this may be
34 * broken on some bios, complex to handle with static_protections.
35 * We could make the 0xe0000-0x100000 range rox, but this can break
36 * some ISA mapping.
37 *
38 * So we let's an rw and x hole when pcibios is used. This shouldn't
39 * happen for modern system with mmconfig, and if you don't want it
40 * you could disable pcibios...
41 */
42static inline void set_bios_x(void)
43{
44 pcibios_enabled = 1;
45 set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
46 if (__supported_pte_mask & _PAGE_NX)
47 printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
48}
49
28/* 50/*
29 * This is the standard structure used to identify the entry point 51 * This is the standard structure used to identify the entry point
30 * to the BIOS32 Service Directory, as documented in 52 * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
332 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", 354 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
333 bios32_entry); 355 bios32_entry);
334 bios32_indirect.address = bios32_entry + PAGE_OFFSET; 356 bios32_indirect.address = bios32_entry + PAGE_OFFSET;
357 set_bios_x();
335 if (check_pcibios()) 358 if (check_pcibios())
336 return &pci_bios_access; 359 return &pci_bios_access;
337 } 360 }
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 7bf70b812fa2..021eee91c056 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,5 +1,7 @@
1# Platform specific code goes here 1# Platform specific code goes here
2obj-y += ce4100/
2obj-y += efi/ 3obj-y += efi/
4obj-y += iris/
3obj-y += mrst/ 5obj-y += mrst/
4obj-y += olpc/ 6obj-y += olpc/
5obj-y += scx200/ 7obj-y += scx200/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644
index 000000000000..91fc92971d94
--- /dev/null
+++ b/arch/x86/platform/ce4100/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644
index 000000000000..d2c0d51a7178
--- /dev/null
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -0,0 +1,132 @@
1/*
2 * Intel CE4100 platform specific setup code
3 *
4 * (C) Copyright 2010 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/module.h>
15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h>
17
18#include <asm/setup.h>
19#include <asm/io.h>
20
21static int ce4100_i8042_detect(void)
22{
23 return 0;
24}
25
26static void __init sdv_find_smp_config(void)
27{
28}
29
30#ifdef CONFIG_SERIAL_8250
31
32
33static unsigned int mem_serial_in(struct uart_port *p, int offset)
34{
35 offset = offset << p->regshift;
36 return readl(p->membase + offset);
37}
38
39/*
40 * The UART Tx interrupts are not set under some conditions and therefore serial
41 * transmission hangs. This is a silicon issue and has not been root caused. The
42 * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
43 * bit of LSR register in interrupt handler to see whether at least one of these
44 * two bits is set, if so then process the transmit request. If this workaround
45 * is not applied, then the serial transmission may hang. This workaround is for
46 * errata number 9 in Errata - B step.
47*/
48
49static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
50{
51 unsigned int ret, ier, lsr;
52
53 if (offset == UART_IIR) {
54 offset = offset << p->regshift;
55 ret = readl(p->membase + offset);
56 if (ret & UART_IIR_NO_INT) {
57 /* see if the TX interrupt should have really set */
58 ier = mem_serial_in(p, UART_IER);
59 /* see if the UART's XMIT interrupt is enabled */
60 if (ier & UART_IER_THRI) {
61 lsr = mem_serial_in(p, UART_LSR);
62 /* now check to see if the UART should be
63 generating an interrupt (but isn't) */
64 if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
65 ret &= ~UART_IIR_NO_INT;
66 }
67 }
68 } else
69 ret = mem_serial_in(p, offset);
70 return ret;
71}
72
73static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
74{
75 offset = offset << p->regshift;
76 writel(value, p->membase + offset);
77}
78
79static void ce4100_serial_fixup(int port, struct uart_port *up,
80 unsigned short *capabilites)
81{
82#ifdef CONFIG_EARLY_PRINTK
83 /*
84 * Over ride the legacy port configuration that comes from
85 * asm/serial.h. Using the ioport driver then switching to the
86 * PCI memmaped driver hangs the IOAPIC
87 */
88 if (up->iotype != UPIO_MEM32) {
89 up->uartclk = 14745600;
90 up->mapbase = 0xdffe0200;
91 set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
92 up->mapbase & PAGE_MASK);
93 up->membase =
94 (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
95 up->membase += up->mapbase & ~PAGE_MASK;
96 up->iotype = UPIO_MEM32;
97 up->regshift = 2;
98 }
99#endif
100 up->iobase = 0;
101 up->serial_in = ce4100_mem_serial_in;
102 up->serial_out = ce4100_mem_serial_out;
103
104 *capabilites |= (1 << 12);
105}
106
107static __init void sdv_serial_fixup(void)
108{
109 serial8250_set_isa_configurator(ce4100_serial_fixup);
110}
111
112#else
113static inline void sdv_serial_fixup(void);
114#endif
115
116static void __init sdv_arch_setup(void)
117{
118 sdv_serial_fixup();
119}
120
121/*
122 * CE4100 specific x86_init function overrides and early setup
123 * calls.
124 */
125void __init x86_ce4100_early_setup(void)
126{
127 x86_init.oem.arch_setup = sdv_arch_setup;
128 x86_platform.i8042_detect = ce4100_i8042_detect;
129 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config;
132}
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644
index 000000000000..db921983a102
--- /dev/null
+++ b/arch/x86/platform/iris/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644
index 000000000000..1ba7f5ed8c9b
--- /dev/null
+++ b/arch/x86/platform/iris/iris.c
@@ -0,0 +1,91 @@
1/*
2 * Eurobraille/Iris power off support.
3 *
4 * Eurobraille's Iris machine is a PC with no APM or ACPI support.
5 * It is shutdown by a special I/O sequence which this module provides.
6 *
7 * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the program ; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#include <linux/moduleparam.h>
25#include <linux/module.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/delay.h>
29#include <linux/init.h>
30#include <linux/pm.h>
31#include <asm/io.h>
32
33#define IRIS_GIO_BASE 0x340
34#define IRIS_GIO_INPUT IRIS_GIO_BASE
35#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1)
36#define IRIS_GIO_PULSE 0x80 /* First byte to send */
37#define IRIS_GIO_REST 0x00 /* Second byte to send */
38#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */
39
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
42MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
43MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
44
45static int force;
46
47module_param(force, bool, 0);
48MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
49
50static void (*old_pm_power_off)(void);
51
52static void iris_power_off(void)
53{
54 outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
55 msleep(850);
56 outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
57}
58
59/*
60 * Before installing the power_off handler, try to make sure the OS is
61 * running on an Iris. Since Iris does not support DMI, this is done
62 * by reading its input port and seeing whether the read value is
63 * meaningful.
64 */
65static int iris_init(void)
66{
67 unsigned char status;
68 if (force != 1) {
69 printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
70 return -ENODEV;
71 }
72 status = inb(IRIS_GIO_INPUT);
73 if (status == IRIS_GIO_NODEV) {
74 printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
75 return -ENODEV;
76 }
77 old_pm_power_off = pm_power_off;
78 pm_power_off = &iris_power_off;
79 printk(KERN_INFO "Iris power_off handler installed.\n");
80
81 return 0;
82}
83
84static void iris_exit(void)
85{
86 pm_power_off = old_pm_power_off;
87 printk(KERN_INFO "Iris power_off handler uninstalled.\n");
88}
89
90module_init(iris_init);
91module_exit(iris_exit);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index efbbc552fa95..f61ccdd49341 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_MRST) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 65df603622b2..65df603622b2 100644
--- a/arch/x86/kernel/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 79ae68154e87..fee0b4914e07 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -9,9 +9,19 @@
9 * as published by the Free Software Foundation; version 2 9 * as published by the Free Software Foundation; version 2
10 * of the License. 10 * of the License.
11 */ 11 */
12
13#define pr_fmt(fmt) "mrst: " fmt
14
12#include <linux/init.h> 15#include <linux/init.h>
13#include <linux/kernel.h> 16#include <linux/kernel.h>
14#include <linux/sfi.h> 17#include <linux/sfi.h>
18#include <linux/intel_pmic_gpio.h>
19#include <linux/spi/spi.h>
20#include <linux/i2c.h>
21#include <linux/i2c/pca953x.h>
22#include <linux/gpio_keys.h>
23#include <linux/input.h>
24#include <linux/platform_device.h>
15#include <linux/irq.h> 25#include <linux/irq.h>
16#include <linux/module.h> 26#include <linux/module.h>
17 27
@@ -23,7 +33,9 @@
23#include <asm/mrst.h> 33#include <asm/mrst.h>
24#include <asm/io.h> 34#include <asm/io.h>
25#include <asm/i8259.h> 35#include <asm/i8259.h>
36#include <asm/intel_scu_ipc.h>
26#include <asm/apb_timer.h> 37#include <asm/apb_timer.h>
38#include <asm/reboot.h>
27 39
28/* 40/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, 41 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -102,10 +114,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
102 memcpy(sfi_mtimer_array, pentry, totallen); 114 memcpy(sfi_mtimer_array, pentry, totallen);
103 } 115 }
104 116
105 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); 117 pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
106 pentry = sfi_mtimer_array; 118 pentry = sfi_mtimer_array;
107 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { 119 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
108 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," 120 pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
109 " irq = %d\n", totallen, (u32)pentry->phys_addr, 121 " irq = %d\n", totallen, (u32)pentry->phys_addr,
110 pentry->freq_hz, pentry->irq); 122 pentry->freq_hz, pentry->irq);
111 if (!pentry->irq) 123 if (!pentry->irq)
@@ -176,14 +188,14 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
176 memcpy(sfi_mrtc_array, pentry, totallen); 188 memcpy(sfi_mrtc_array, pentry, totallen);
177 } 189 }
178 190
179 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); 191 pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
180 pentry = sfi_mrtc_array; 192 pentry = sfi_mrtc_array;
181 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { 193 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
182 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", 194 pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
183 totallen, (u32)pentry->phys_addr, pentry->irq); 195 totallen, (u32)pentry->phys_addr, pentry->irq);
184 mp_irq.type = MP_IOAPIC; 196 mp_irq.type = MP_IOAPIC;
185 mp_irq.irqtype = mp_INT; 197 mp_irq.irqtype = mp_INT;
186 mp_irq.irqflag = 0; 198 mp_irq.irqflag = 0xf; /* level trigger and active low */
187 mp_irq.srcbus = 0; 199 mp_irq.srcbus = 0;
188 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 200 mp_irq.srcbusirq = pentry->irq; /* IRQ */
189 mp_irq.dstapic = MP_APIC_ALL; 201 mp_irq.dstapic = MP_APIC_ALL;
@@ -209,6 +221,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
209 221
210void __init mrst_time_init(void) 222void __init mrst_time_init(void)
211{ 223{
224 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
212 switch (mrst_timer_options) { 225 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY: 226 case MRST_TIMER_APBT_ONLY:
214 break; 227 break;
@@ -224,16 +237,10 @@ void __init mrst_time_init(void)
224 return; 237 return;
225 } 238 }
226 /* we need at least one APB timer */ 239 /* we need at least one APB timer */
227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
228 pre_init_apic_IRQ0(); 240 pre_init_apic_IRQ0();
229 apbt_time_init(); 241 apbt_time_init();
230} 242}
231 243
232void __init mrst_rtc_init(void)
233{
234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
235}
236
237void __cpuinit mrst_arch_setup(void) 244void __cpuinit mrst_arch_setup(void)
238{ 245{
239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 246 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +263,17 @@ static int mrst_i8042_detect(void)
256 return 0; 263 return 0;
257} 264}
258 265
266/* Reboot and power off are handled by the SCU on a MID device */
267static void mrst_power_off(void)
268{
269 intel_scu_ipc_simple_command(0xf1, 1);
270}
271
272static void mrst_reboot(void)
273{
274 intel_scu_ipc_simple_command(0xf1, 0);
275}
276
259/* 277/*
260 * Moorestown specific x86_init function overrides and early setup 278 * Moorestown specific x86_init function overrides and early setup
261 * calls. 279 * calls.
@@ -281,6 +299,10 @@ void __init x86_mrst_early_setup(void)
281 299
282 legacy_pic = &null_legacy_pic; 300 legacy_pic = &null_legacy_pic;
283 301
302 /* Moorestown specific power_off/restart method */
303 pm_power_off = mrst_power_off;
304 machine_ops.emergency_restart = mrst_reboot;
305
284 /* Avoid searching for BIOS MP tables */ 306 /* Avoid searching for BIOS MP tables */
285 x86_init.mpparse.find_smp_config = x86_init_noop; 307 x86_init.mpparse.find_smp_config = x86_init_noop;
286 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 308 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +331,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
309 return 0; 331 return 0;
310} 332}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer); 333__setup("x86_mrst_timer=", setup_x86_mrst_timer);
334
335/*
336 * Parsing GPIO table first, since the DEVS table will need this table
337 * to map the pin name to the actual pin.
338 */
339static struct sfi_gpio_table_entry *gpio_table;
340static int gpio_num_entry;
341
342static int __init sfi_parse_gpio(struct sfi_table_header *table)
343{
344 struct sfi_table_simple *sb;
345 struct sfi_gpio_table_entry *pentry;
346 int num, i;
347
348 if (gpio_table)
349 return 0;
350 sb = (struct sfi_table_simple *)table;
351 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
352 pentry = (struct sfi_gpio_table_entry *)sb->pentry;
353
354 gpio_table = (struct sfi_gpio_table_entry *)
355 kmalloc(num * sizeof(*pentry), GFP_KERNEL);
356 if (!gpio_table)
357 return -1;
358 memcpy(gpio_table, pentry, num * sizeof(*pentry));
359 gpio_num_entry = num;
360
361 pr_debug("GPIO pin info:\n");
362 for (i = 0; i < num; i++, pentry++)
363 pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
364 " pin = %d\n", i,
365 pentry->controller_name,
366 pentry->pin_name,
367 pentry->pin_no);
368 return 0;
369}
370
371static int get_gpio_by_name(const char *name)
372{
373 struct sfi_gpio_table_entry *pentry = gpio_table;
374 int i;
375
376 if (!pentry)
377 return -1;
378 for (i = 0; i < gpio_num_entry; i++, pentry++) {
379 if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
380 return pentry->pin_no;
381 }
382 return -1;
383}
384
385/*
386 * Here defines the array of devices platform data that IAFW would export
387 * through SFI "DEVS" table, we use name and type to match the device and
388 * its platform data.
389 */
390struct devs_id {
391 char name[SFI_NAME_LEN + 1];
392 u8 type;
393 u8 delay;
394 void *(*get_platform_data)(void *info);
395};
396
397/* the offset for the mapping of global gpio pin to irq */
398#define MRST_IRQ_OFFSET 0x100
399
400static void __init *pmic_gpio_platform_data(void *info)
401{
402 static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
403 int gpio_base = get_gpio_by_name("pmic_gpio_base");
404
405 if (gpio_base == -1)
406 gpio_base = 64;
407 pmic_gpio_pdata.gpio_base = gpio_base;
408 pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
409 pmic_gpio_pdata.gpiointr = 0xffffeff8;
410
411 return &pmic_gpio_pdata;
412}
413
414static void __init *max3111_platform_data(void *info)
415{
416 struct spi_board_info *spi_info = info;
417 int intr = get_gpio_by_name("max3111_int");
418
419 if (intr == -1)
420 return NULL;
421 spi_info->irq = intr + MRST_IRQ_OFFSET;
422 return NULL;
423}
424
425/* we have multiple max7315 on the board ... */
426#define MAX7315_NUM 2
427static void __init *max7315_platform_data(void *info)
428{
429 static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
430 static int nr;
431 struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
432 struct i2c_board_info *i2c_info = info;
433 int gpio_base, intr;
434 char base_pin_name[SFI_NAME_LEN + 1];
435 char intr_pin_name[SFI_NAME_LEN + 1];
436
437 if (nr == MAX7315_NUM) {
438 pr_err("too many max7315s, we only support %d\n",
439 MAX7315_NUM);
440 return NULL;
441 }
442 /* we have several max7315 on the board, we only need load several
443 * instances of the same pca953x driver to cover them
444 */
445 strcpy(i2c_info->type, "max7315");
446 if (nr++) {
447 sprintf(base_pin_name, "max7315_%d_base", nr);
448 sprintf(intr_pin_name, "max7315_%d_int", nr);
449 } else {
450 strcpy(base_pin_name, "max7315_base");
451 strcpy(intr_pin_name, "max7315_int");
452 }
453
454 gpio_base = get_gpio_by_name(base_pin_name);
455 intr = get_gpio_by_name(intr_pin_name);
456
457 if (gpio_base == -1)
458 return NULL;
459 max7315->gpio_base = gpio_base;
460 if (intr != -1) {
461 i2c_info->irq = intr + MRST_IRQ_OFFSET;
462 max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
463 } else {
464 i2c_info->irq = -1;
465 max7315->irq_base = -1;
466 }
467 return max7315;
468}
469
470static void __init *emc1403_platform_data(void *info)
471{
472 static short intr2nd_pdata;
473 struct i2c_board_info *i2c_info = info;
474 int intr = get_gpio_by_name("thermal_int");
475 int intr2nd = get_gpio_by_name("thermal_alert");
476
477 if (intr == -1 || intr2nd == -1)
478 return NULL;
479
480 i2c_info->irq = intr + MRST_IRQ_OFFSET;
481 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
482
483 return &intr2nd_pdata;
484}
485
486static void __init *lis331dl_platform_data(void *info)
487{
488 static short intr2nd_pdata;
489 struct i2c_board_info *i2c_info = info;
490 int intr = get_gpio_by_name("accel_int");
491 int intr2nd = get_gpio_by_name("accel_2");
492
493 if (intr == -1 || intr2nd == -1)
494 return NULL;
495
496 i2c_info->irq = intr + MRST_IRQ_OFFSET;
497 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
498
499 return &intr2nd_pdata;
500}
501
502static void __init *no_platform_data(void *info)
503{
504 return NULL;
505}
506
507static const struct devs_id __initconst device_ids[] = {
508 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
509 {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
510 {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
511 {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
512 {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
513 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
514 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
515 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
516 {},
517};
518
519#define MAX_IPCDEVS 24
520static struct platform_device *ipc_devs[MAX_IPCDEVS];
521static int ipc_next_dev;
522
523#define MAX_SCU_SPI 24
524static struct spi_board_info *spi_devs[MAX_SCU_SPI];
525static int spi_next_dev;
526
527#define MAX_SCU_I2C 24
528static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
529static int i2c_bus[MAX_SCU_I2C];
530static int i2c_next_dev;
531
532static void __init intel_scu_device_register(struct platform_device *pdev)
533{
534 if(ipc_next_dev == MAX_IPCDEVS)
535 pr_err("too many SCU IPC devices");
536 else
537 ipc_devs[ipc_next_dev++] = pdev;
538}
539
540static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
541{
542 struct spi_board_info *new_dev;
543
544 if (spi_next_dev == MAX_SCU_SPI) {
545 pr_err("too many SCU SPI devices");
546 return;
547 }
548
549 new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
550 if (!new_dev) {
551 pr_err("failed to alloc mem for delayed spi dev %s\n",
552 sdev->modalias);
553 return;
554 }
555 memcpy(new_dev, sdev, sizeof(*sdev));
556
557 spi_devs[spi_next_dev++] = new_dev;
558}
559
560static void __init intel_scu_i2c_device_register(int bus,
561 struct i2c_board_info *idev)
562{
563 struct i2c_board_info *new_dev;
564
565 if (i2c_next_dev == MAX_SCU_I2C) {
566 pr_err("too many SCU I2C devices");
567 return;
568 }
569
570 new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
571 if (!new_dev) {
572 pr_err("failed to alloc mem for delayed i2c dev %s\n",
573 idev->type);
574 return;
575 }
576 memcpy(new_dev, idev, sizeof(*idev));
577
578 i2c_bus[i2c_next_dev] = bus;
579 i2c_devs[i2c_next_dev++] = new_dev;
580}
581
582/* Called by IPC driver */
583void intel_scu_devices_create(void)
584{
585 int i;
586
587 for (i = 0; i < ipc_next_dev; i++)
588 platform_device_add(ipc_devs[i]);
589
590 for (i = 0; i < spi_next_dev; i++)
591 spi_register_board_info(spi_devs[i], 1);
592
593 for (i = 0; i < i2c_next_dev; i++) {
594 struct i2c_adapter *adapter;
595 struct i2c_client *client;
596
597 adapter = i2c_get_adapter(i2c_bus[i]);
598 if (adapter) {
599 client = i2c_new_device(adapter, i2c_devs[i]);
600 if (!client)
601 pr_err("can't create i2c device %s\n",
602 i2c_devs[i]->type);
603 } else
604 i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
605 }
606}
607EXPORT_SYMBOL_GPL(intel_scu_devices_create);
608
609/* Called by IPC driver */
610void intel_scu_devices_destroy(void)
611{
612 int i;
613
614 for (i = 0; i < ipc_next_dev; i++)
615 platform_device_del(ipc_devs[i]);
616}
617EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
618
619static void __init install_irq_resource(struct platform_device *pdev, int irq)
620{
621 /* Single threaded */
622 static struct resource __initdata res = {
623 .name = "IRQ",
624 .flags = IORESOURCE_IRQ,
625 };
626 res.start = irq;
627 platform_device_add_resources(pdev, &res, 1);
628}
629
630static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
631{
632 const struct devs_id *dev = device_ids;
633 void *pdata = NULL;
634
635 while (dev->name[0]) {
636 if (dev->type == SFI_DEV_TYPE_IPC &&
637 !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
638 pdata = dev->get_platform_data(pdev);
639 break;
640 }
641 dev++;
642 }
643 pdev->dev.platform_data = pdata;
644 intel_scu_device_register(pdev);
645}
646
647static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
648{
649 const struct devs_id *dev = device_ids;
650 void *pdata = NULL;
651
652 while (dev->name[0]) {
653 if (dev->type == SFI_DEV_TYPE_SPI &&
654 !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
655 pdata = dev->get_platform_data(spi_info);
656 break;
657 }
658 dev++;
659 }
660 spi_info->platform_data = pdata;
661 if (dev->delay)
662 intel_scu_spi_device_register(spi_info);
663 else
664 spi_register_board_info(spi_info, 1);
665}
666
667static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
668{
669 const struct devs_id *dev = device_ids;
670 void *pdata = NULL;
671
672 while (dev->name[0]) {
673 if (dev->type == SFI_DEV_TYPE_I2C &&
674 !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
675 pdata = dev->get_platform_data(i2c_info);
676 break;
677 }
678 dev++;
679 }
680 i2c_info->platform_data = pdata;
681
682 if (dev->delay)
683 intel_scu_i2c_device_register(bus, i2c_info);
684 else
685 i2c_register_board_info(bus, i2c_info, 1);
686 }
687
688
689static int __init sfi_parse_devs(struct sfi_table_header *table)
690{
691 struct sfi_table_simple *sb;
692 struct sfi_device_table_entry *pentry;
693 struct spi_board_info spi_info;
694 struct i2c_board_info i2c_info;
695 struct platform_device *pdev;
696 int num, i, bus;
697 int ioapic;
698 struct io_apic_irq_attr irq_attr;
699
700 sb = (struct sfi_table_simple *)table;
701 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
702 pentry = (struct sfi_device_table_entry *)sb->pentry;
703
704 for (i = 0; i < num; i++, pentry++) {
705 if (pentry->irq != (u8)0xff) { /* native RTE case */
706 /* these SPI2 devices are not exposed to system as PCI
707 * devices, but they have separate RTE entry in IOAPIC
708 * so we have to enable them one by one here
709 */
710 ioapic = mp_find_ioapic(pentry->irq);
711 irq_attr.ioapic = ioapic;
712 irq_attr.ioapic_pin = pentry->irq;
713 irq_attr.trigger = 1;
714 irq_attr.polarity = 1;
715 io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
716 }
717 switch (pentry->type) {
718 case SFI_DEV_TYPE_IPC:
719 /* ID as IRQ is a hack that will go away */
720 pdev = platform_device_alloc(pentry->name, pentry->irq);
721 if (pdev == NULL) {
722 pr_err("out of memory for SFI platform device '%s'.\n",
723 pentry->name);
724 continue;
725 }
726 install_irq_resource(pdev, pentry->irq);
727 pr_debug("info[%2d]: IPC bus, name = %16.16s, "
728 "irq = 0x%2x\n", i, pentry->name, pentry->irq);
729 sfi_handle_ipc_dev(pdev);
730 break;
731 case SFI_DEV_TYPE_SPI:
732 memset(&spi_info, 0, sizeof(spi_info));
733 strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
734 spi_info.irq = pentry->irq;
735 spi_info.bus_num = pentry->host_num;
736 spi_info.chip_select = pentry->addr;
737 spi_info.max_speed_hz = pentry->max_freq;
738 pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
739 "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
740 spi_info.bus_num,
741 spi_info.modalias,
742 spi_info.irq,
743 spi_info.max_speed_hz,
744 spi_info.chip_select);
745 sfi_handle_spi_dev(&spi_info);
746 break;
747 case SFI_DEV_TYPE_I2C:
748 memset(&i2c_info, 0, sizeof(i2c_info));
749 bus = pentry->host_num;
750 strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
751 i2c_info.irq = pentry->irq;
752 i2c_info.addr = pentry->addr;
753 pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
754 "irq = 0x%2x, addr = 0x%x\n", i, bus,
755 i2c_info.type,
756 i2c_info.irq,
757 i2c_info.addr);
758 sfi_handle_i2c_dev(bus, &i2c_info);
759 break;
760 case SFI_DEV_TYPE_UART:
761 case SFI_DEV_TYPE_HSI:
762 default:
763 ;
764 }
765 }
766 return 0;
767}
768
769static int __init mrst_platform_init(void)
770{
771 sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
772 sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
773 return 0;
774}
775arch_initcall(mrst_platform_init);
776
777/*
778 * we will search these buttons in SFI GPIO table (by name)
779 * and register them dynamically. Please add all possible
780 * buttons here, we will shrink them if no GPIO found.
781 */
782static struct gpio_keys_button gpio_button[] = {
783 {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
784 {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
785 {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
786 {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
787 {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
788 {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
789 {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
790 {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
791 {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
792 {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
793};
794
795static struct gpio_keys_platform_data mrst_gpio_keys = {
796 .buttons = gpio_button,
797 .rep = 1,
798 .nbuttons = -1, /* will fill it after search */
799};
800
801static struct platform_device pb_device = {
802 .name = "gpio-keys",
803 .id = -1,
804 .dev = {
805 .platform_data = &mrst_gpio_keys,
806 },
807};
808
809/*
810 * Shrink the non-existent buttons, register the gpio button
811 * device if there is some
812 */
813static int __init pb_keys_init(void)
814{
815 struct gpio_keys_button *gb = gpio_button;
816 int i, num, good = 0;
817
818 num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
819 for (i = 0; i < num; i++) {
820 gb[i].gpio = get_gpio_by_name(gb[i].desc);
821 if (gb[i].gpio == -1)
822 continue;
823
824 if (i != good)
825 gb[good] = gb[i];
826 good++;
827 }
828
829 if (good) {
830 mrst_gpio_keys.nbuttons = good;
831 return platform_device_register(&pb_device);
832 }
833 return 0;
834}
835late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644
index 000000000000..32cd7edd71a0
--- /dev/null
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -0,0 +1,165 @@
1/*
2 * vrtc.c: Driver for virtual RTC device on Intel MID platform
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 *
11 * Note:
12 * VRTC is emulated by system controller firmware, the real HW
13 * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
14 * in a memory mapped IO space that is visible to the host IA
15 * processor.
16 *
17 * This driver is based on RTC CMOS driver.
18 */
19
20#include <linux/kernel.h>
21#include <linux/init.h>
22#include <linux/sfi.h>
23#include <linux/platform_device.h>
24
25#include <asm/mrst.h>
26#include <asm/mrst-vrtc.h>
27#include <asm/time.h>
28#include <asm/fixmap.h>
29
30static unsigned char __iomem *vrtc_virt_base;
31
32unsigned char vrtc_cmos_read(unsigned char reg)
33{
34 unsigned char retval;
35
36 /* vRTC's registers range from 0x0 to 0xD */
37 if (reg > 0xd || !vrtc_virt_base)
38 return 0xff;
39
40 lock_cmos_prefix(reg);
41 retval = __raw_readb(vrtc_virt_base + (reg << 2));
42 lock_cmos_suffix(reg);
43 return retval;
44}
45EXPORT_SYMBOL_GPL(vrtc_cmos_read);
46
47void vrtc_cmos_write(unsigned char val, unsigned char reg)
48{
49 if (reg > 0xd || !vrtc_virt_base)
50 return;
51
52 lock_cmos_prefix(reg);
53 __raw_writeb(val, vrtc_virt_base + (reg << 2));
54 lock_cmos_suffix(reg);
55}
56EXPORT_SYMBOL_GPL(vrtc_cmos_write);
57
58unsigned long vrtc_get_time(void)
59{
60 u8 sec, min, hour, mday, mon;
61 u32 year;
62
63 while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
64 cpu_relax();
65
66 sec = vrtc_cmos_read(RTC_SECONDS);
67 min = vrtc_cmos_read(RTC_MINUTES);
68 hour = vrtc_cmos_read(RTC_HOURS);
69 mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
70 mon = vrtc_cmos_read(RTC_MONTH);
71 year = vrtc_cmos_read(RTC_YEAR);
72
73 /* vRTC YEAR reg contains the offset to 1960 */
74 year += 1960;
75
76 printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
77 "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
78
79 return mktime(year, mon, mday, hour, min, sec);
80}
81
82/* Only care about the minutes and seconds */
83int vrtc_set_mmss(unsigned long nowtime)
84{
85 int real_sec, real_min;
86 int vrtc_min;
87
88 vrtc_min = vrtc_cmos_read(RTC_MINUTES);
89
90 real_sec = nowtime % 60;
91 real_min = nowtime / 60;
92 if (((abs(real_min - vrtc_min) + 15)/30) & 1)
93 real_min += 30;
94 real_min %= 60;
95
96 vrtc_cmos_write(real_sec, RTC_SECONDS);
97 vrtc_cmos_write(real_min, RTC_MINUTES);
98 return 0;
99}
100
101void __init mrst_rtc_init(void)
102{
103 unsigned long rtc_paddr;
104 void __iomem *virt_base;
105
106 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
107 if (!sfi_mrtc_num)
108 return;
109
110 rtc_paddr = sfi_mrtc_array[0].phys_addr;
111
112 /* vRTC's register address may not be page aligned */
113 set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
114
115 virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
116 virt_base += rtc_paddr & ~PAGE_MASK;
117 vrtc_virt_base = virt_base;
118
119 x86_platform.get_wallclock = vrtc_get_time;
120 x86_platform.set_wallclock = vrtc_set_mmss;
121}
122
123/*
124 * The Moorestown platform has a memory mapped virtual RTC device that emulates
125 * the programming interface of the RTC.
126 */
127
128static struct resource vrtc_resources[] = {
129 [0] = {
130 .flags = IORESOURCE_MEM,
131 },
132 [1] = {
133 .flags = IORESOURCE_IRQ,
134 }
135};
136
137static struct platform_device vrtc_device = {
138 .name = "rtc_mrst",
139 .id = -1,
140 .resource = vrtc_resources,
141 .num_resources = ARRAY_SIZE(vrtc_resources),
142};
143
144/* Register the RTC device if appropriate */
145static int __init mrst_device_create(void)
146{
147 /* No Moorestown, no device */
148 if (!mrst_identify_cpu())
149 return -ENODEV;
150 /* No timer, no device */
151 if (!sfi_mrtc_num)
152 return -ENODEV;
153
154 /* iomem resource */
155 vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
156 vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
157 MRST_VRTC_MAP_SZ;
158 /* irq resource */
159 vrtc_resources[1].start = sfi_mrtc_array[0].irq;
160 vrtc_resources[1].end = sfi_mrtc_array[0].irq;
161
162 return platform_device_register(&vrtc_device);
163}
164
165module_init(mrst_device_create);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index dd4c281ffe57..ca54875ac795 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -48,9 +48,9 @@ static void __init mp_sfi_register_lapic_address(unsigned long address)
48/* All CPUs enumerated by SFI must be present and enabled */ 48/* All CPUs enumerated by SFI must be present and enabled */
49static void __cpuinit mp_sfi_register_lapic(u8 id) 49static void __cpuinit mp_sfi_register_lapic(u8 id)
50{ 50{
51 if (MAX_APICS - id <= 0) { 51 if (MAX_LOCAL_APIC - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n", 52 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS); 53 id, MAX_LOCAL_APIC);
54 return; 54 return;
55 } 55 }
56 56
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ba9caa808a9c..df58e9cad96a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode)
1341 1341
1342 /* 1342 /*
1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1344 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 1344 * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
1345 */ 1345 */
1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void)
1490/* 1490/*
1491 * initialize the bau_control structure for each cpu 1491 * initialize the bau_control structure for each cpu
1492 */ 1492 */
1493static void __init uv_init_per_cpu(int nuvhubs) 1493static int __init uv_init_per_cpu(int nuvhubs)
1494{ 1494{
1495 int i; 1495 int i;
1496 int cpu; 1496 int cpu;
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
1507 struct bau_control *smaster = NULL; 1507 struct bau_control *smaster = NULL;
1508 struct socket_desc { 1508 struct socket_desc {
1509 short num_cpus; 1509 short num_cpus;
1510 short cpu_number[16]; 1510 short cpu_number[MAX_CPUS_PER_SOCKET];
1511 }; 1511 };
1512 struct uvhub_desc { 1512 struct uvhub_desc {
1513 unsigned short socket_mask; 1513 unsigned short socket_mask;
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs)
1540 sdp = &bdp->socket[socket]; 1540 sdp = &bdp->socket[socket];
1541 sdp->cpu_number[sdp->num_cpus] = cpu; 1541 sdp->cpu_number[sdp->num_cpus] = cpu;
1542 sdp->num_cpus++; 1542 sdp->num_cpus++;
1543 if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
1544 printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
1545 return 1;
1546 }
1543 } 1547 }
1544 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1548 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1545 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) 1549 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs)
1570 bcp->uvhub_master = hmaster; 1574 bcp->uvhub_master = hmaster;
1571 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1575 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
1572 blade_processor_id; 1576 blade_processor_id;
1577 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1578 printk(KERN_EMERG
1579 "%d cpus per uvhub invalid\n",
1580 bcp->uvhub_cpu);
1581 return 1;
1582 }
1573 } 1583 }
1574nextsocket: 1584nextsocket:
1575 socket++; 1585 socket++;
@@ -1595,6 +1605,7 @@ nextsocket:
1595 bcp->congested_reps = congested_reps; 1605 bcp->congested_reps = congested_reps;
1596 bcp->congested_period = congested_period; 1606 bcp->congested_period = congested_period;
1597 } 1607 }
1608 return 0;
1598} 1609}
1599 1610
1600/* 1611/*
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void)
1625 spin_lock_init(&disable_lock); 1636 spin_lock_init(&disable_lock);
1626 congested_cycles = microsec_2_cycles(congested_response_us); 1637 congested_cycles = microsec_2_cycles(congested_response_us);
1627 1638
1628 uv_init_per_cpu(nuvhubs); 1639 if (uv_init_per_cpu(nuvhubs)) {
1640 nobau = 1;
1641 return 0;
1642 }
1629 1643
1630 uv_partition_base_pnode = 0x7fffffff; 1644 uv_partition_base_pnode = 0x7fffffff;
1631 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1645 for (uvhub = 0; uvhub < nuvhubs; uvhub++)
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 3371bd053b89..632037671746 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
171 ver = m->apicver; 171 ver = m->apicver;
172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { 172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
174 m->apicid, MAX_APICS); 174 m->apicid, MAX_LOCAL_APIC);
175 return; 175 return;
176 } 176 }
177 177
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4a2afa1bac51..b6552b189bcd 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
25 25
26export CPPFLAGS_vdso.lds += -P -C 26export CPPFLAGS_vdso.lds += -P -C
27 27
28VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ 28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
30 30
31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
@@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter
69vdso32-images = $(vdso32.so-y:%=vdso32-%.so) 69vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
70 70
71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 71CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
72VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 72VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1
73 73
74# This makes sure the $(obj) subdirectory exists even though vdso32/ 74# This makes sure the $(obj) subdirectory exists even though vdso32/
75# is not a kbuild sub-make subdirectory. 75# is not a kbuild sub-make subdirectory.
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b2bb5aa3b054..5da5e53fb94c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -426,6 +426,8 @@ void xen_timer_resume(void)
426{ 426{
427 int cpu; 427 int cpu;
428 428
429 pvclock_resume();
430
429 if (xen_clockevent != &xen_vcpuop_clockevent) 431 if (xen_clockevent != &xen_vcpuop_clockevent)
430 return; 432 return;
431 433