aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2011-01-12 18:06:06 -0500
committerLen Brown <len.brown@intel.com>2011-01-12 18:06:06 -0500
commit56dbed129df3fdd4caf9018b6e7599ee258a5420 (patch)
treeb902491aef3a99efe0d9d49edd0f6e414dba654f /arch/x86
parent2a2d31c8dc6f1ebcf5eab1d93a0cb0fb4ed57c7c (diff)
parentf878133bf022717b880d0e0995b8f91436fd605c (diff)
Merge branch 'linus' into idle-test
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig41
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/Kconfig.debug11
-rw-r--r--arch/x86/boot/compressed/head_64.S2
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/include/asm/amd_nb.h60
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/apicdef.h1
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/gpio.h5
-rw-r--r--arch/x86/include/asm/hypervisor.h12
-rw-r--r--arch/x86/include/asm/i387.h24
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq.h4
-rw-r--r--arch/x86/include/asm/kdebug.h3
-rw-r--r--arch/x86/include/asm/mach_traps.h12
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/include/asm/microcode.h6
-rw-r--r--arch/x86/include/asm/mpspec.h31
-rw-r--r--arch/x86/include/asm/mpspec_def.h7
-rw-r--r--arch/x86/include/asm/mrst-vrtc.h9
-rw-r--r--arch/x86/include/asm/mrst.h14
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/nmi.h71
-rw-r--r--arch/x86/include/asm/numa_64.h2
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/pci.h1
-rw-r--r--arch/x86/include/asm/percpu.h158
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h66
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/setup.h6
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h1
-rw-r--r--arch/x86/include/asm/stacktrace.h33
-rw-r--r--arch/x86/include/asm/timer.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h9
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h35
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c59
-rw-r--r--arch/x86/kernel/alternative.c52
-rw-r--r--arch/x86/kernel/amd_nb.c142
-rw-r--r--arch/x86/kernel/apb_timer.c1
-rw-r--r--arch/x86/kernel/aperture_64.c54
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c192
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c39
-rw-r--r--arch/x86/kernel/apic/io_apic.c115
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c151
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event.c104
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c18
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/ftrace.c9
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head_32.S83
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c4
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/kprobes.c127
-rw-r--r--arch/x86/kernel/microcode_amd.c34
-rw-r--r--arch/x86/kernel/mpparse.c114
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/process.c14
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c16
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/smpboot.c54
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/trampoline_64.S2
-rw-r--r--arch/x86/kernel/traps.c131
-rw-r--r--arch/x86/kernel/tsc.c98
-rw-r--r--arch/x86/kernel/verify_cpu.S (renamed from arch/x86/kernel/verify_cpu_64.S)49
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/lguest/i386_head.S105
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/amdtopology_64.c (renamed from arch/x86/mm/k8topology_64.c)97
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/mm/init_32.c20
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/mm/numa_64.c175
-rw-r--r--arch/x86/mm/pageattr.c33
-rw-r--r--arch/x86/mm/setup_nx.c2
-rw-r--r--arch/x86/mm/srat_32.c1
-rw-r--r--arch/x86/mm/srat_64.c36
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/oprofile/nmi_int.c8
-rw-r--r--arch/x86/oprofile/nmi_timer_int.c5
-rw-r--r--arch/x86/oprofile/op_model_amd.c55
-rw-r--r--arch/x86/oprofile/op_model_p4.c2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c8
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/amd_bus.c33
-rw-r--r--arch/x86/pci/ce4100.c315
-rw-r--r--arch/x86/pci/pcbios.c23
-rw-r--r--arch/x86/platform/Makefile2
-rw-r--r--arch/x86/platform/ce4100/Makefile1
-rw-r--r--arch/x86/platform/ce4100/ce4100.c132
-rw-r--r--arch/x86/platform/iris/Makefile1
-rw-r--r--arch/x86/platform/iris/iris.c91
-rw-r--r--arch/x86/platform/mrst/Makefile2
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c (renamed from arch/x86/kernel/early_printk_mrst.c)0
-rw-r--r--arch/x86/platform/mrst/mrst.c576
-rw-r--r--arch/x86/platform/mrst/vrtc.c165
-rw-r--r--arch/x86/platform/sfi/sfi.c17
-rw-r--r--arch/x86/platform/uv/tlb_uv.c22
-rw-r--r--arch/x86/platform/visws/visws_quirks.c2
-rw-r--r--arch/x86/xen/enlighten.c44
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/spinlock.c8
-rw-r--r--arch/x86/xen/time.c8
135 files changed, 3453 insertions, 2692 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e330da21b84f..b6fccb07123e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -377,6 +377,18 @@ config X86_ELAN
377 377
378 If unsure, choose "PC-compatible" instead. 378 If unsure, choose "PC-compatible" instead.
379 379
380config X86_INTEL_CE
381 bool "CE4100 TV platform"
382 depends on PCI
383 depends on PCI_GODIRECT
384 depends on X86_32
385 depends on X86_EXTENDED_PLATFORM
386 select X86_REBOOTFIXUPS
387 ---help---
388 Select for the Intel CE media processor (CE4100) SOC.
389 This option compiles in support for the CE4100 SOC for settop
390 boxes and media devices.
391
380config X86_MRST 392config X86_MRST
381 bool "Moorestown MID platform" 393 bool "Moorestown MID platform"
382 depends on PCI 394 depends on PCI
@@ -385,6 +397,10 @@ config X86_MRST
385 depends on X86_EXTENDED_PLATFORM 397 depends on X86_EXTENDED_PLATFORM
386 depends on X86_IO_APIC 398 depends on X86_IO_APIC
387 select APB_TIMER 399 select APB_TIMER
400 select I2C
401 select SPI
402 select INTEL_SCU_IPC
403 select X86_PLATFORM_DEVICES
388 ---help--- 404 ---help---
389 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin 405 Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
390 Internet Device(MID) platform. Moorestown consists of two chips: 406 Internet Device(MID) platform. Moorestown consists of two chips:
@@ -466,6 +482,19 @@ config X86_ES7000
466 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is 482 Support for Unisys ES7000 systems. Say 'Y' here if this kernel is
467 supposed to run on an IA32-based Unisys ES7000 system. 483 supposed to run on an IA32-based Unisys ES7000 system.
468 484
485config X86_32_IRIS
486 tristate "Eurobraille/Iris poweroff module"
487 depends on X86_32
488 ---help---
489 The Iris machines from EuroBraille do not have APM or ACPI support
490 to shut themselves down properly. A special I/O sequence is
491 needed to do so, which is what this module does at
492 kernel shutdown.
493
494 This is only for Iris machines from EuroBraille.
495
496 If unused, say N.
497
469config SCHED_OMIT_FRAME_POINTER 498config SCHED_OMIT_FRAME_POINTER
470 def_bool y 499 def_bool y
471 prompt "Single-depth WCHAN output" 500 prompt "Single-depth WCHAN output"
@@ -1141,16 +1170,16 @@ config NUMA
1141comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 1170comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
1142 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) 1171 depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)
1143 1172
1144config K8_NUMA 1173config AMD_NUMA
1145 def_bool y 1174 def_bool y
1146 prompt "Old style AMD Opteron NUMA detection" 1175 prompt "Old style AMD Opteron NUMA detection"
1147 depends on X86_64 && NUMA && PCI 1176 depends on X86_64 && NUMA && PCI
1148 ---help--- 1177 ---help---
1149 Enable K8 NUMA node topology detection. You should say Y here if 1178 Enable AMD NUMA node topology detection. You should say Y here if
1150 you have a multi processor AMD K8 system. This uses an old 1179 you have a multi processor AMD system. This uses an old method to
1151 method to read the NUMA configuration directly from the builtin 1180 read the NUMA configuration directly from the builtin Northbridge
1152 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA 1181 of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
1153 instead, which also takes priority if both are compiled in. 1182 which also takes priority if both are compiled in.
1154 1183
1155config X86_64_ACPI_NUMA 1184config X86_64_ACPI_NUMA
1156 def_bool y 1185 def_bool y
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 2ac9069890cd..15588a0ef466 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -310,6 +310,9 @@ config X86_INTERNODE_CACHE_SHIFT
310config X86_CMPXCHG 310config X86_CMPXCHG
311 def_bool X86_64 || (X86_32 && !M386) 311 def_bool X86_64 || (X86_32 && !M386)
312 312
313config CMPXCHG_LOCAL
314 def_bool X86_64 || (X86_32 && !M386)
315
313config X86_L1_CACHE_SHIFT 316config X86_L1_CACHE_SHIFT
314 int 317 int
315 default "7" if MPENTIUM4 || MPSC 318 default "7" if MPENTIUM4 || MPSC
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b59ee765414e..45143bbcfe5e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,17 @@ config DEBUG_RODATA_TEST
117 feature as well as for the change_page_attr() infrastructure. 117 feature as well as for the change_page_attr() infrastructure.
118 If in doubt, say "N" 118 If in doubt, say "N"
119 119
120config DEBUG_SET_MODULE_RONX
121 bool "Set loadable kernel module data as NX and text as RO"
122 depends on MODULES
123 ---help---
124 This option helps catch unintended modifications to loadable
125 kernel module's text and read-only data. It also prevents execution
126 of module data. Such protection may interfere with run-time code
127 patching and dynamic kernel tracing - and they might also protect
128 against certain classes of kernel exploits.
129 If in doubt, say "N".
130
120config DEBUG_NX_TEST 131config DEBUG_NX_TEST
121 tristate "Testcase for the NX non-executable stack feature" 132 tristate "Testcase for the NX non-executable stack feature"
122 depends on DEBUG_KERNEL && m 133 depends on DEBUG_KERNEL && m
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 52f85a196fa0..35af09d13dc1 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -182,7 +182,7 @@ no_longmode:
182 hlt 182 hlt
183 jmp 1b 183 jmp 1b
184 184
185#include "../../kernel/verify_cpu_64.S" 185#include "../../kernel/verify_cpu.S"
186 186
187 /* 187 /*
188 * Be careful here startup_64 needs to be at a predictable 188 * Be careful here startup_64 needs to be at a predictable
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 55d106b5e31b..211ca3f7fd16 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -185,17 +185,16 @@ struct bootnode;
185 185
186#ifdef CONFIG_ACPI_NUMA 186#ifdef CONFIG_ACPI_NUMA
187extern int acpi_numa; 187extern int acpi_numa;
188extern int acpi_get_nodes(struct bootnode *physnodes); 188extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
189 unsigned long end);
189extern int acpi_scan_nodes(unsigned long start, unsigned long end); 190extern int acpi_scan_nodes(unsigned long start, unsigned long end);
190#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) 191#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
192
193#ifdef CONFIG_NUMA_EMU
191extern void acpi_fake_nodes(const struct bootnode *fake_nodes, 194extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
192 int num_nodes); 195 int num_nodes);
193#else
194static inline void acpi_fake_nodes(const struct bootnode *fake_nodes,
195 int num_nodes)
196{
197}
198#endif 196#endif
197#endif /* CONFIG_ACPI_NUMA */
199 198
200#define acpi_unlazy_tlb(x) leave_mm(x) 199#define acpi_unlazy_tlb(x) leave_mm(x)
201 200
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 76561d20ea2f..13009d1af99a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -66,6 +66,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
66extern void alternatives_smp_module_del(struct module *mod); 66extern void alternatives_smp_module_del(struct module *mod);
67extern void alternatives_smp_switch(int smp); 67extern void alternatives_smp_switch(int smp);
68extern int alternatives_text_reserved(void *start, void *end); 68extern int alternatives_text_reserved(void *start, void *end);
69extern bool skip_smp_alternatives;
69#else 70#else
70static inline void alternatives_smp_module_add(struct module *mod, char *name, 71static inline void alternatives_smp_module_add(struct module *mod, char *name,
71 void *locks, void *locks_end, 72 void *locks, void *locks_end,
@@ -180,8 +181,15 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
180 * On the local CPU you need to be protected again NMI or MCE handlers seeing an 181 * On the local CPU you need to be protected again NMI or MCE handlers seeing an
181 * inconsistent instruction while you patch. 182 * inconsistent instruction while you patch.
182 */ 183 */
184struct text_poke_param {
185 void *addr;
186 const void *opcode;
187 size_t len;
188};
189
183extern void *text_poke(void *addr, const void *opcode, size_t len); 190extern void *text_poke(void *addr, const void *opcode, size_t len);
184extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 191extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
192extern void text_poke_smp_batch(struct text_poke_param *params, int n);
185 193
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 194#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5 195#define IDEAL_NOP_SIZE_5 5
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index c8517f81b21e..64dc82ee19f0 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,36 +3,64 @@
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
6extern struct pci_device_id k8_nb_ids[]; 6struct amd_nb_bus_dev_range {
7 u8 bus;
8 u8 dev_base;
9 u8 dev_limit;
10};
11
12extern struct pci_device_id amd_nb_misc_ids[];
13extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
7struct bootnode; 14struct bootnode;
8 15
9extern int early_is_k8_nb(u32 value); 16extern int early_is_amd_nb(u32 value);
10extern int cache_k8_northbridges(void); 17extern int amd_cache_northbridges(void);
11extern void k8_flush_garts(void); 18extern void amd_flush_garts(void);
12extern int k8_get_nodes(struct bootnode *nodes); 19extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 20extern int amd_scan_nodes(void);
14extern int k8_scan_nodes(void); 21
22#ifdef CONFIG_NUMA_EMU
23extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
24extern void amd_get_nodes(struct bootnode *nodes);
25#endif
15 26
16struct k8_northbridge_info { 27struct amd_northbridge {
28 struct pci_dev *misc;
29};
30
31struct amd_northbridge_info {
17 u16 num; 32 u16 num;
18 u8 gart_supported; 33 u64 flags;
19 struct pci_dev **nb_misc; 34 struct amd_northbridge *nb;
20}; 35};
21extern struct k8_northbridge_info k8_northbridges; 36extern struct amd_northbridge_info amd_northbridges;
37
38#define AMD_NB_GART 0x1
39#define AMD_NB_L3_INDEX_DISABLE 0x2
22 40
23#ifdef CONFIG_AMD_NB 41#ifdef CONFIG_AMD_NB
24 42
25static inline struct pci_dev *node_to_k8_nb_misc(int node) 43static inline int amd_nb_num(void)
26{ 44{
27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; 45 return amd_northbridges.num;
28} 46}
29 47
30#else 48static inline int amd_nb_has_feature(int feature)
49{
50 return ((amd_northbridges.flags & feature) == feature);
51}
31 52
32static inline struct pci_dev *node_to_k8_nb_misc(int node) 53static inline struct amd_northbridge *node_to_amd_nb(int node)
33{ 54{
34 return NULL; 55 return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
35} 56}
57
58#else
59
60#define amd_nb_num(x) 0
61#define amd_nb_has_feature(x) false
62#define node_to_amd_nb(x) NULL
63
36#endif 64#endif
37 65
38 66
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f6ce0bda3b98..5e3969c36d7f 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -234,16 +234,17 @@ extern void init_bsp_APIC(void);
234extern void setup_local_APIC(void); 234extern void setup_local_APIC(void);
235extern void end_local_APIC_setup(void); 235extern void end_local_APIC_setup(void);
236extern void init_apic_mappings(void); 236extern void init_apic_mappings(void);
237void register_lapic_address(unsigned long address);
237extern void setup_boot_APIC_clock(void); 238extern void setup_boot_APIC_clock(void);
238extern void setup_secondary_APIC_clock(void); 239extern void setup_secondary_APIC_clock(void);
239extern int APIC_init_uniprocessor(void); 240extern int APIC_init_uniprocessor(void);
240extern void enable_NMI_through_LVT0(void); 241extern void enable_NMI_through_LVT0(void);
242extern int apic_force_enable(void);
241 243
242/* 244/*
243 * On 32bit this is mach-xxx local 245 * On 32bit this is mach-xxx local
244 */ 246 */
245#ifdef CONFIG_X86_64 247#ifdef CONFIG_X86_64
246extern void early_init_lapic_mapping(void);
247extern int apic_is_clustered_box(void); 248extern int apic_is_clustered_box(void);
248#else 249#else
249static inline int apic_is_clustered_box(void) 250static inline int apic_is_clustered_box(void)
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index a859ca461fb0..47a30ff8e517 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -145,6 +145,7 @@
145 145
146#ifdef CONFIG_X86_32 146#ifdef CONFIG_X86_32
147# define MAX_IO_APICS 64 147# define MAX_IO_APICS 64
148# define MAX_LOCAL_APIC 256
148#else 149#else
149# define MAX_IO_APICS 128 150# define MAX_IO_APICS 128
150# define MAX_LOCAL_APIC 32768 151# define MAX_LOCAL_APIC 32768
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 8e6218550e77..c8bfe63a06de 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -124,6 +124,7 @@ enum {
124 X86_SUBARCH_LGUEST, 124 X86_SUBARCH_LGUEST,
125 X86_SUBARCH_XEN, 125 X86_SUBARCH_XEN,
126 X86_SUBARCH_MRST, 126 X86_SUBARCH_MRST,
127 X86_SUBARCH_CE4100,
127 X86_NR_SUBARCHS, 128 X86_NR_SUBARCHS,
128}; 129};
129 130
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b81002f23614..078ad0caefc6 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -94,7 +94,7 @@ static inline void hw_breakpoint_disable(void)
94 94
95static inline int hw_breakpoint_active(void) 95static inline int hw_breakpoint_active(void)
96{ 96{
97 return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; 97 return __this_cpu_read(cpu_dr7) & DR_GLOBAL_ENABLE_MASK;
98} 98}
99 99
100extern void aout_dump_debugregs(struct user *dump); 100extern void aout_dump_debugregs(struct user *dump);
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 9479a037419f..4729b2b63117 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -116,7 +116,11 @@ enum fixed_addresses {
116#endif 116#endif
117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ 117 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ 118 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
119#ifdef CONFIG_X86_MRST
120 FIX_LNW_VRTC,
121#endif
119 __end_of_permanent_fixed_addresses, 122 __end_of_permanent_fixed_addresses,
123
120 /* 124 /*
121 * 256 temporary boot-time mappings, used by early_ioremap(), 125 * 256 temporary boot-time mappings, used by early_ioremap(),
122 * before ioremap() is functional. 126 * before ioremap() is functional.
diff --git a/arch/x86/include/asm/gpio.h b/arch/x86/include/asm/gpio.h
index 49dbfdfa50f9..91d915a65259 100644
--- a/arch/x86/include/asm/gpio.h
+++ b/arch/x86/include/asm/gpio.h
@@ -38,12 +38,9 @@ static inline int gpio_cansleep(unsigned int gpio)
38 return __gpio_cansleep(gpio); 38 return __gpio_cansleep(gpio);
39} 39}
40 40
41/*
42 * Not implemented, yet.
43 */
44static inline int gpio_to_irq(unsigned int gpio) 41static inline int gpio_to_irq(unsigned int gpio)
45{ 42{
46 return -ENOSYS; 43 return __gpio_to_irq(gpio);
47} 44}
48 45
49static inline int irq_to_gpio(unsigned int irq) 46static inline int irq_to_gpio(unsigned int irq)
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index ff2546ce7178..7a15153c675d 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,6 +20,9 @@
20#ifndef _ASM_X86_HYPERVISOR_H 20#ifndef _ASM_X86_HYPERVISOR_H
21#define _ASM_X86_HYPERVISOR_H 21#define _ASM_X86_HYPERVISOR_H
22 22
23#include <asm/kvm_para.h>
24#include <asm/xen/hypervisor.h>
25
23extern void init_hypervisor(struct cpuinfo_x86 *c); 26extern void init_hypervisor(struct cpuinfo_x86 *c);
24extern void init_hypervisor_platform(void); 27extern void init_hypervisor_platform(void);
25 28
@@ -47,4 +50,13 @@ extern const struct hypervisor_x86 x86_hyper_vmware;
47extern const struct hypervisor_x86 x86_hyper_ms_hyperv; 50extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
48extern const struct hypervisor_x86 x86_hyper_xen_hvm; 51extern const struct hypervisor_x86 x86_hyper_xen_hvm;
49 52
53static inline bool hypervisor_x2apic_available(void)
54{
55 if (kvm_para_available())
56 return true;
57 if (xen_x2apic_para_available())
58 return true;
59 return false;
60}
61
50#endif 62#endif
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 4aa2bb3b242a..ef328901c802 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -93,6 +93,17 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
93 int err; 93 int err;
94 94
95 /* See comment in fxsave() below. */ 95 /* See comment in fxsave() below. */
96#ifdef CONFIG_AS_FXSAVEQ
97 asm volatile("1: fxrstorq %[fx]\n\t"
98 "2:\n"
99 ".section .fixup,\"ax\"\n"
100 "3: movl $-1,%[err]\n"
101 " jmp 2b\n"
102 ".previous\n"
103 _ASM_EXTABLE(1b, 3b)
104 : [err] "=r" (err)
105 : [fx] "m" (*fx), "0" (0));
106#else
96 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 107 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
97 "2:\n" 108 "2:\n"
98 ".section .fixup,\"ax\"\n" 109 ".section .fixup,\"ax\"\n"
@@ -102,6 +113,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
102 _ASM_EXTABLE(1b, 3b) 113 _ASM_EXTABLE(1b, 3b)
103 : [err] "=r" (err) 114 : [err] "=r" (err)
104 : [fx] "R" (fx), "m" (*fx), "0" (0)); 115 : [fx] "R" (fx), "m" (*fx), "0" (0));
116#endif
105 return err; 117 return err;
106} 118}
107 119
@@ -119,6 +131,17 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
119 return -EFAULT; 131 return -EFAULT;
120 132
121 /* See comment in fxsave() below. */ 133 /* See comment in fxsave() below. */
134#ifdef CONFIG_AS_FXSAVEQ
135 asm volatile("1: fxsaveq %[fx]\n\t"
136 "2:\n"
137 ".section .fixup,\"ax\"\n"
138 "3: movl $-1,%[err]\n"
139 " jmp 2b\n"
140 ".previous\n"
141 _ASM_EXTABLE(1b, 3b)
142 : [err] "=r" (err), [fx] "=m" (*fx)
143 : "0" (0));
144#else
122 asm volatile("1: rex64/fxsave (%[fx])\n\t" 145 asm volatile("1: rex64/fxsave (%[fx])\n\t"
123 "2:\n" 146 "2:\n"
124 ".section .fixup,\"ax\"\n" 147 ".section .fixup,\"ax\"\n"
@@ -128,6 +151,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
128 _ASM_EXTABLE(1b, 3b) 151 _ASM_EXTABLE(1b, 3b)
129 : [err] "=r" (err), "=m" (*fx) 152 : [err] "=r" (err), "=m" (*fx)
130 : [fx] "R" (fx), "0" (0)); 153 : [fx] "R" (fx), "0" (0));
154#endif
131 if (unlikely(err) && 155 if (unlikely(err) &&
132 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 156 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
133 err = -EFAULT; 157 err = -EFAULT;
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index a6b28d017c2f..f327d386d6cc 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -159,7 +159,7 @@ struct io_apic_irq_attr;
159extern int io_apic_set_pci_routing(struct device *dev, int irq, 159extern int io_apic_set_pci_routing(struct device *dev, int irq,
160 struct io_apic_irq_attr *irq_attr); 160 struct io_apic_irq_attr *irq_attr);
161void setup_IO_APIC_irq_extra(u32 gsi); 161void setup_IO_APIC_irq_extra(u32 gsi);
162extern void ioapic_init_mappings(void); 162extern void ioapic_and_gsi_init(void);
163extern void ioapic_insert_resources(void); 163extern void ioapic_insert_resources(void);
164 164
165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); 165extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
@@ -168,10 +168,10 @@ extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 168extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); 169extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
170 170
171extern void probe_nr_irqs_gsi(void);
172extern int get_nr_irqs_gsi(void); 171extern int get_nr_irqs_gsi(void);
173 172
174extern void setup_ioapic_ids_from_mpc(void); 173extern void setup_ioapic_ids_from_mpc(void);
174extern void setup_ioapic_ids_from_mpc_nocheck(void);
175 175
176struct mp_ioapic_gsi{ 176struct mp_ioapic_gsi{
177 u32 gsi_base; 177 u32 gsi_base;
@@ -184,14 +184,15 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi);
184void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); 184void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
185extern void __init pre_init_apic_IRQ0(void); 185extern void __init pre_init_apic_IRQ0(void);
186 186
187extern void mp_save_irq(struct mpc_intsrc *m);
188
187#else /* !CONFIG_X86_IO_APIC */ 189#else /* !CONFIG_X86_IO_APIC */
188 190
189#define io_apic_assign_pci_irqs 0 191#define io_apic_assign_pci_irqs 0
190#define setup_ioapic_ids_from_mpc x86_init_noop 192#define setup_ioapic_ids_from_mpc x86_init_noop
191static const int timer_through_8259 = 0; 193static const int timer_through_8259 = 0;
192static inline void ioapic_init_mappings(void) { } 194static inline void ioapic_and_gsi_init(void) { }
193static inline void ioapic_insert_resources(void) { } 195static inline void ioapic_insert_resources(void) { }
194static inline void probe_nr_irqs_gsi(void) { }
195#define gsi_top (NR_IRQS_LEGACY) 196#define gsi_top (NR_IRQS_LEGACY)
196static inline int mp_find_ioapic(u32 gsi) { return 0; } 197static inline int mp_find_ioapic(u32 gsi) { return 0; }
197 198
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 13b0ebaa512f..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -15,10 +15,6 @@ static inline int irq_canonicalize(int irq)
15 return ((irq == 2) ? 9 : irq); 15 return ((irq == 2) ? 9 : irq);
16} 16}
17 17
18#ifdef CONFIG_X86_LOCAL_APIC
19# define ARCH_HAS_NMI_WATCHDOG
20#endif
21
22#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
23extern void irq_ctx_init(int cpu); 19extern void irq_ctx_init(int cpu);
24#else 20#else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca86581b..ca242d35e873 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
18 DIE_TRAP, 18 DIE_TRAP,
19 DIE_GPF, 19 DIE_GPF,
20 DIE_CALL, 20 DIE_CALL,
21 DIE_NMI_IPI,
22 DIE_PAGE_FAULT, 21 DIE_PAGE_FAULT,
23 DIE_NMIUNKNOWN, 22 DIE_NMIUNKNOWN,
24}; 23};
@@ -28,7 +27,7 @@ extern void die(const char *, struct pt_regs *,long);
28extern int __must_check __die(const char *, struct pt_regs *, long); 27extern int __must_check __die(const char *, struct pt_regs *, long);
29extern void show_registers(struct pt_regs *regs); 28extern void show_registers(struct pt_regs *regs);
30extern void show_trace(struct task_struct *t, struct pt_regs *regs, 29extern void show_trace(struct task_struct *t, struct pt_regs *regs,
31 unsigned long *sp, unsigned long bp); 30 unsigned long *sp);
32extern void __show_regs(struct pt_regs *regs, int all); 31extern void __show_regs(struct pt_regs *regs, int all);
33extern void show_regs(struct pt_regs *regs); 32extern void show_regs(struct pt_regs *regs);
34extern unsigned long oops_begin(void); 33extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index f7920601e472..72a8b52e7dfd 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
7 7
8#include <asm/mc146818rtc.h> 8#include <asm/mc146818rtc.h>
9 9
10#define NMI_REASON_PORT 0x61
11
12#define NMI_REASON_SERR 0x80
13#define NMI_REASON_IOCHK 0x40
14#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK)
15
16#define NMI_REASON_CLEAR_SERR 0x04
17#define NMI_REASON_CLEAR_IOCHK 0x08
18#define NMI_REASON_CLEAR_MASK 0x0f
19
10static inline unsigned char get_nmi_reason(void) 20static inline unsigned char get_nmi_reason(void)
11{ 21{
12 return inb(0x61); 22 return inb(NMI_REASON_PORT);
13} 23}
14 24
15static inline void reassert_nmi(void) 25static inline void reassert_nmi(void)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c62c13cb9788..eb16e94ae04f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -223,6 +223,9 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
223 223
224void mce_log_therm_throt_event(__u64 status); 224void mce_log_therm_throt_event(__u64 status);
225 225
226/* Interrupt Handler for core thermal thresholds */
227extern int (*platform_thermal_notify)(__u64 msr_val);
228
226#ifdef CONFIG_X86_THERMAL_VECTOR 229#ifdef CONFIG_X86_THERMAL_VECTOR
227extern void mcheck_intel_therm_init(void); 230extern void mcheck_intel_therm_init(void);
228#else 231#else
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index ef51b501e22a..24215072d0e1 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -48,6 +48,12 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
48 48
49#ifdef CONFIG_MICROCODE_AMD 49#ifdef CONFIG_MICROCODE_AMD
50extern struct microcode_ops * __init init_amd_microcode(void); 50extern struct microcode_ops * __init init_amd_microcode(void);
51
52static inline void get_ucode_data(void *to, const u8 *from, size_t n)
53{
54 memcpy(to, from, n);
55}
56
51#else 57#else
52static inline struct microcode_ops * __init init_amd_microcode(void) 58static inline struct microcode_ops * __init init_amd_microcode(void)
53{ 59{
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c82868e9f905..0c90dd9f0505 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -5,8 +5,9 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7#include <asm/x86_init.h> 7#include <asm/x86_init.h>
8#include <asm/apicdef.h>
8 9
9extern int apic_version[MAX_APICS]; 10extern int apic_version[];
10extern int pic_mode; 11extern int pic_mode;
11 12
12#ifdef CONFIG_X86_32 13#ifdef CONFIG_X86_32
@@ -107,7 +108,7 @@ extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
107 int active_high_low); 108 int active_high_low);
108#endif /* CONFIG_ACPI */ 109#endif /* CONFIG_ACPI */
109 110
110#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) 111#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC)
111 112
112struct physid_mask { 113struct physid_mask {
113 unsigned long mask[PHYSID_ARRAY_SIZE]; 114 unsigned long mask[PHYSID_ARRAY_SIZE];
@@ -122,31 +123,31 @@ typedef struct physid_mask physid_mask_t;
122 test_and_set_bit(physid, (map).mask) 123 test_and_set_bit(physid, (map).mask)
123 124
124#define physids_and(dst, src1, src2) \ 125#define physids_and(dst, src1, src2) \
125 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 126 bitmap_and((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
126 127
127#define physids_or(dst, src1, src2) \ 128#define physids_or(dst, src1, src2) \
128 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_APICS) 129 bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC)
129 130
130#define physids_clear(map) \ 131#define physids_clear(map) \
131 bitmap_zero((map).mask, MAX_APICS) 132 bitmap_zero((map).mask, MAX_LOCAL_APIC)
132 133
133#define physids_complement(dst, src) \ 134#define physids_complement(dst, src) \
134 bitmap_complement((dst).mask, (src).mask, MAX_APICS) 135 bitmap_complement((dst).mask, (src).mask, MAX_LOCAL_APIC)
135 136
136#define physids_empty(map) \ 137#define physids_empty(map) \
137 bitmap_empty((map).mask, MAX_APICS) 138 bitmap_empty((map).mask, MAX_LOCAL_APIC)
138 139
139#define physids_equal(map1, map2) \ 140#define physids_equal(map1, map2) \
140 bitmap_equal((map1).mask, (map2).mask, MAX_APICS) 141 bitmap_equal((map1).mask, (map2).mask, MAX_LOCAL_APIC)
141 142
142#define physids_weight(map) \ 143#define physids_weight(map) \
143 bitmap_weight((map).mask, MAX_APICS) 144 bitmap_weight((map).mask, MAX_LOCAL_APIC)
144 145
145#define physids_shift_right(d, s, n) \ 146#define physids_shift_right(d, s, n) \
146 bitmap_shift_right((d).mask, (s).mask, n, MAX_APICS) 147 bitmap_shift_right((d).mask, (s).mask, n, MAX_LOCAL_APIC)
147 148
148#define physids_shift_left(d, s, n) \ 149#define physids_shift_left(d, s, n) \
149 bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) 150 bitmap_shift_left((d).mask, (s).mask, n, MAX_LOCAL_APIC)
150 151
151static inline unsigned long physids_coerce(physid_mask_t *map) 152static inline unsigned long physids_coerce(physid_mask_t *map)
152{ 153{
@@ -159,14 +160,6 @@ static inline void physids_promote(unsigned long physids, physid_mask_t *map)
159 map->mask[0] = physids; 160 map->mask[0] = physids;
160} 161}
161 162
162/* Note: will create very large stack frames if physid_mask_t is big */
163#define physid_mask_of_physid(physid) \
164 ({ \
165 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
166 physid_set(physid, __physid_mask); \
167 __physid_mask; \
168 })
169
170static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) 163static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
171{ 164{
172 physids_clear(*map); 165 physids_clear(*map);
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 4a7f96d7c188..c0a955a9a087 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -15,13 +15,6 @@
15 15
16#ifdef CONFIG_X86_32 16#ifdef CONFIG_X86_32
17# define MAX_MPC_ENTRY 1024 17# define MAX_MPC_ENTRY 1024
18# define MAX_APICS 256
19#else
20# if NR_CPUS <= 255
21# define MAX_APICS 255
22# else
23# define MAX_APICS 32768
24# endif
25#endif 18#endif
26 19
27/* Intel MP Floating Pointer Structure */ 20/* Intel MP Floating Pointer Structure */
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
new file mode 100644
index 000000000000..73668abdbedf
--- /dev/null
+++ b/arch/x86/include/asm/mrst-vrtc.h
@@ -0,0 +1,9 @@
1#ifndef _MRST_VRTC_H
2#define _MRST_VRTC_H
3
4extern unsigned char vrtc_cmos_read(unsigned char reg);
5extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
6extern unsigned long vrtc_get_time(void);
7extern int vrtc_set_mmss(unsigned long nowtime);
8
9#endif
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 4a711a684b17..719f00b28ff5 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -14,7 +14,9 @@
14#include <linux/sfi.h> 14#include <linux/sfi.h>
15 15
16extern int pci_mrst_init(void); 16extern int pci_mrst_init(void);
17int __init sfi_parse_mrtc(struct sfi_table_header *table); 17extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
18extern int sfi_mrtc_num;
19extern struct sfi_rtc_table_entry sfi_mrtc_array[];
18 20
19/* 21/*
20 * Medfield is the follow-up of Moorestown, it combines two chip solution into 22 * Medfield is the follow-up of Moorestown, it combines two chip solution into
@@ -50,4 +52,14 @@ extern void mrst_early_console_init(void);
50 52
51extern struct console early_hsu_console; 53extern struct console early_hsu_console;
52extern void hsu_early_console_init(void); 54extern void hsu_early_console_init(void);
55
56extern void intel_scu_devices_create(void);
57extern void intel_scu_devices_destroy(void);
58
59/* VRTC timer */
60#define MRST_VRTC_MAP_SZ (1024)
61/*#define MRST_VRTC_PGOFFSET (0xc00) */
62
63extern void mrst_rtc_init(void);
64
53#endif /* _ASM_X86_MRST_H */ 65#endif /* _ASM_X86_MRST_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 6b89f5e86021..4d0dfa0d998e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -123,6 +123,10 @@
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b 124#define MSR_AMD64_IBSBRTARGET 0xc001103b
125 125
126/* Fam 15h MSRs */
127#define MSR_F15H_PERF_CTL 0xc0010200
128#define MSR_F15H_PERF_CTR 0xc0010201
129
126/* Fam 10h MSRs */ 130/* Fam 10h MSRs */
127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 131#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
128#define FAM10H_MMIO_CONF_ENABLE (1<<0) 132#define FAM10H_MMIO_CONF_ENABLE (1<<0)
@@ -253,6 +257,18 @@
253#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) 257#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
254#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) 258#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
255 259
260/* Thermal Thresholds Support */
261#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
262#define THERM_SHIFT_THRESHOLD0 8
263#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
264#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
265#define THERM_SHIFT_THRESHOLD1 16
266#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
267#define THERM_STATUS_THRESHOLD0 (1 << 6)
268#define THERM_LOG_THRESHOLD0 (1 << 7)
269#define THERM_STATUS_THRESHOLD1 (1 << 8)
270#define THERM_LOG_THRESHOLD1 (1 << 9)
271
256/* MISC_ENABLE bits: architectural */ 272/* MISC_ENABLE bits: architectural */
257#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) 273#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
258#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) 274#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f86b4b7..c76f5b92b840 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -5,41 +5,15 @@
5#include <asm/irq.h> 5#include <asm/irq.h>
6#include <asm/io.h> 6#include <asm/io.h>
7 7
8#ifdef ARCH_HAS_NMI_WATCHDOG 8#ifdef CONFIG_X86_LOCAL_APIC
9
10/**
11 * do_nmi_callback
12 *
13 * Check to see if a callback exists and execute it. Return 1
14 * if the handler exists and was handled successfully.
15 */
16int do_nmi_callback(struct pt_regs *regs, int cpu);
17 9
18extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); 10extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void);
20#if !defined(CONFIG_LOCKUP_DETECTOR)
21extern int nmi_watchdog_enabled;
22#endif
23extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 11extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
24extern int reserve_perfctr_nmi(unsigned int); 12extern int reserve_perfctr_nmi(unsigned int);
25extern void release_perfctr_nmi(unsigned int); 13extern void release_perfctr_nmi(unsigned int);
26extern int reserve_evntsel_nmi(unsigned int); 14extern int reserve_evntsel_nmi(unsigned int);
27extern void release_evntsel_nmi(unsigned int); 15extern void release_evntsel_nmi(unsigned int);
28 16
29extern void setup_apic_nmi_watchdog(void *);
30extern void stop_apic_nmi_watchdog(void *);
31extern void disable_timer_nmi_watchdog(void);
32extern void enable_timer_nmi_watchdog(void);
33extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
34extern void cpu_nmi_set_wd_enabled(void);
35
36extern atomic_t nmi_active;
37extern unsigned int nmi_watchdog;
38#define NMI_NONE 0
39#define NMI_IO_APIC 1
40#define NMI_LOCAL_APIC 2
41#define NMI_INVALID 3
42
43struct ctl_table; 17struct ctl_table;
44extern int proc_nmi_enabled(struct ctl_table *, int , 18extern int proc_nmi_enabled(struct ctl_table *, int ,
45 void __user *, size_t *, loff_t *); 19 void __user *, size_t *, loff_t *);
@@ -47,33 +21,28 @@ extern int unknown_nmi_panic;
47 21
48void arch_trigger_all_cpu_backtrace(void); 22void arch_trigger_all_cpu_backtrace(void);
49#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 23#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
24#endif
50 25
51static inline void localise_nmi_watchdog(void) 26/*
52{ 27 * Define some priorities for the nmi notifier call chain.
53 if (nmi_watchdog == NMI_IO_APIC) 28 *
54 nmi_watchdog = NMI_LOCAL_APIC; 29 * Create a local nmi bit that has a higher priority than
55} 30 * external nmis, because the local ones are more frequent.
31 *
32 * Also setup some default high/normal/low settings for
33 * subsystems to registers with. Using 4 bits to seperate
34 * the priorities. This can go alot higher if needed be.
35 */
56 36
57/* check if nmi_watchdog is active (ie was specified at boot) */ 37#define NMI_LOCAL_SHIFT 16 /* randomly picked */
58static inline int nmi_watchdog_active(void) 38#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT)
59{ 39#define NMI_HIGH_PRIOR (1ULL << 8)
60 /* 40#define NMI_NORMAL_PRIOR (1ULL << 4)
61 * actually it should be: 41#define NMI_LOW_PRIOR (1ULL << 0)
62 * return (nmi_watchdog == NMI_LOCAL_APIC || 42#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
63 * nmi_watchdog == NMI_IO_APIC) 43#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
64 * but since they are power of two we could use a 44#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR)
65 * cheaper way --cvg
66 */
67 return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
68}
69#endif
70 45
71void lapic_watchdog_stop(void);
72int lapic_watchdog_init(unsigned nmi_hz);
73int lapic_wd_event(unsigned nmi_hz);
74unsigned lapic_adjust_nmi_hz(unsigned hz);
75void disable_lapic_nmi_watchdog(void);
76void enable_lapic_nmi_watchdog(void);
77void stop_nmi(void); 46void stop_nmi(void);
78void restart_nmi(void); 47void restart_nmi(void);
79 48
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 823e070e7c26..5ae87285a502 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -38,7 +38,7 @@ extern void __cpuinit numa_add_cpu(int cpu);
38extern void __cpuinit numa_remove_cpu(int cpu); 38extern void __cpuinit numa_remove_cpu(int cpu);
39 39
40#ifdef CONFIG_NUMA_EMU 40#ifdef CONFIG_NUMA_EMU
41#define FAKE_NODE_MIN_SIZE ((u64)64 << 20) 41#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
42#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) 42#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
43#endif /* CONFIG_NUMA_EMU */ 43#endif /* CONFIG_NUMA_EMU */
44#else 44#else
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ef9975812c77..7709c12431b8 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -112,7 +112,7 @@ static inline void arch_safe_halt(void)
112 112
113static inline void halt(void) 113static inline void halt(void)
114{ 114{
115 PVOP_VCALL0(pv_irq_ops.safe_halt); 115 PVOP_VCALL0(pv_irq_ops.halt);
116} 116}
117 117
118static inline void wbinvd(void) 118static inline void wbinvd(void)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index ca0437c714b2..676129229630 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -65,6 +65,7 @@ extern unsigned long pci_mem_start;
65 65
66#define PCIBIOS_MIN_CARDBUS_IO 0x4000 66#define PCIBIOS_MIN_CARDBUS_IO 0x4000
67 67
68extern int pcibios_enabled;
68void pcibios_config_init(void); 69void pcibios_config_init(void);
69struct pci_bus *pcibios_scan_root(int bus); 70struct pci_bus *pcibios_scan_root(int bus);
70 71
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index f899e01a8ac9..8ee45167e817 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -230,6 +230,125 @@ do { \
230}) 230})
231 231
232/* 232/*
233 * Add return operation
234 */
235#define percpu_add_return_op(var, val) \
236({ \
237 typeof(var) paro_ret__ = val; \
238 switch (sizeof(var)) { \
239 case 1: \
240 asm("xaddb %0, "__percpu_arg(1) \
241 : "+q" (paro_ret__), "+m" (var) \
242 : : "memory"); \
243 break; \
244 case 2: \
245 asm("xaddw %0, "__percpu_arg(1) \
246 : "+r" (paro_ret__), "+m" (var) \
247 : : "memory"); \
248 break; \
249 case 4: \
250 asm("xaddl %0, "__percpu_arg(1) \
251 : "+r" (paro_ret__), "+m" (var) \
252 : : "memory"); \
253 break; \
254 case 8: \
255 asm("xaddq %0, "__percpu_arg(1) \
256 : "+re" (paro_ret__), "+m" (var) \
257 : : "memory"); \
258 break; \
259 default: __bad_percpu_size(); \
260 } \
261 paro_ret__ += val; \
262 paro_ret__; \
263})
264
265/*
266 * xchg is implemented using cmpxchg without a lock prefix. xchg is
267 * expensive due to the implied lock prefix. The processor cannot prefetch
268 * cachelines if xchg is used.
269 */
270#define percpu_xchg_op(var, nval) \
271({ \
272 typeof(var) pxo_ret__; \
273 typeof(var) pxo_new__ = (nval); \
274 switch (sizeof(var)) { \
275 case 1: \
276 asm("\n1:mov "__percpu_arg(1)",%%al" \
277 "\n\tcmpxchgb %2, "__percpu_arg(1) \
278 "\n\tjnz 1b" \
279 : "=a" (pxo_ret__), "+m" (var) \
280 : "q" (pxo_new__) \
281 : "memory"); \
282 break; \
283 case 2: \
284 asm("\n1:mov "__percpu_arg(1)",%%ax" \
285 "\n\tcmpxchgw %2, "__percpu_arg(1) \
286 "\n\tjnz 1b" \
287 : "=a" (pxo_ret__), "+m" (var) \
288 : "r" (pxo_new__) \
289 : "memory"); \
290 break; \
291 case 4: \
292 asm("\n1:mov "__percpu_arg(1)",%%eax" \
293 "\n\tcmpxchgl %2, "__percpu_arg(1) \
294 "\n\tjnz 1b" \
295 : "=a" (pxo_ret__), "+m" (var) \
296 : "r" (pxo_new__) \
297 : "memory"); \
298 break; \
299 case 8: \
300 asm("\n1:mov "__percpu_arg(1)",%%rax" \
301 "\n\tcmpxchgq %2, "__percpu_arg(1) \
302 "\n\tjnz 1b" \
303 : "=a" (pxo_ret__), "+m" (var) \
304 : "r" (pxo_new__) \
305 : "memory"); \
306 break; \
307 default: __bad_percpu_size(); \
308 } \
309 pxo_ret__; \
310})
311
312/*
313 * cmpxchg has no such implied lock semantics as a result it is much
314 * more efficient for cpu local operations.
315 */
316#define percpu_cmpxchg_op(var, oval, nval) \
317({ \
318 typeof(var) pco_ret__; \
319 typeof(var) pco_old__ = (oval); \
320 typeof(var) pco_new__ = (nval); \
321 switch (sizeof(var)) { \
322 case 1: \
323 asm("cmpxchgb %2, "__percpu_arg(1) \
324 : "=a" (pco_ret__), "+m" (var) \
325 : "q" (pco_new__), "0" (pco_old__) \
326 : "memory"); \
327 break; \
328 case 2: \
329 asm("cmpxchgw %2, "__percpu_arg(1) \
330 : "=a" (pco_ret__), "+m" (var) \
331 : "r" (pco_new__), "0" (pco_old__) \
332 : "memory"); \
333 break; \
334 case 4: \
335 asm("cmpxchgl %2, "__percpu_arg(1) \
336 : "=a" (pco_ret__), "+m" (var) \
337 : "r" (pco_new__), "0" (pco_old__) \
338 : "memory"); \
339 break; \
340 case 8: \
341 asm("cmpxchgq %2, "__percpu_arg(1) \
342 : "=a" (pco_ret__), "+m" (var) \
343 : "r" (pco_new__), "0" (pco_old__) \
344 : "memory"); \
345 break; \
346 default: __bad_percpu_size(); \
347 } \
348 pco_ret__; \
349})
350
351/*
233 * percpu_read() makes gcc load the percpu variable every time it is 352 * percpu_read() makes gcc load the percpu variable every time it is
234 * accessed while percpu_read_stable() allows the value to be cached. 353 * accessed while percpu_read_stable() allows the value to be cached.
235 * percpu_read_stable() is more efficient and can be used if its value 354 * percpu_read_stable() is more efficient and can be used if its value
@@ -267,6 +386,12 @@ do { \
267#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 386#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
268#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 387#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
269#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 388#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
389/*
390 * Generic fallback operations for __this_cpu_xchg_[1-4] are okay and much
391 * faster than an xchg with forced lock semantics.
392 */
393#define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
394#define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
270 395
271#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 396#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
272#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 397#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
@@ -286,6 +411,11 @@ do { \
286#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 411#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
287#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 412#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
288#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 413#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
414#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
415#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
416#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
417#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
418#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
289 419
290#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val) 420#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
291#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val) 421#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
@@ -299,6 +429,31 @@ do { \
299#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) 429#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val)
300#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) 430#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
301#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) 431#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
432#define irqsafe_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval)
433#define irqsafe_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
434#define irqsafe_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
435#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
436#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
437
438#ifndef CONFIG_M386
439#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
440#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
441#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
442#define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
443#define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
444#define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
445
446#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
447#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
448#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
449#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
450#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
451#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
452
453#define irqsafe_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
454#define irqsafe_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
455#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
456#endif /* !CONFIG_M386 */
302 457
303/* 458/*
304 * Per cpu atomic 64 bit operations are only available under 64 bit. 459 * Per cpu atomic 64 bit operations are only available under 64 bit.
@@ -311,6 +466,7 @@ do { \
311#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 466#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
312#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 467#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
313#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 468#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
469#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
314 470
315#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) 471#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
316#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) 472#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
@@ -318,12 +474,12 @@ do { \
318#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 474#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
319#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 475#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
320#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 476#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
477#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val)
321 478
322#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val) 479#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
323#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) 480#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
324#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) 481#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
325#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) 482#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
326
327#endif 483#endif
328 484
329/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 485/* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 550e26b1dbb3..d9d4dae305f6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -125,7 +125,6 @@ union cpuid10_edx {
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ 125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
126 126
127#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
128extern void init_hw_perf_events(void);
129extern void perf_events_lapic_init(void); 128extern void perf_events_lapic_init(void);
130 129
131#define PERF_EVENT_INDEX_OFFSET 0 130#define PERF_EVENT_INDEX_OFFSET 0
@@ -156,7 +155,6 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
156} 155}
157 156
158#else 157#else
159static inline void init_hw_perf_events(void) { }
160static inline void perf_events_lapic_init(void) { } 158static inline void perf_events_lapic_init(void) { }
161#endif 159#endif
162 160
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index a70cd216be5d..e2f6a99f14ab 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -20,6 +20,9 @@
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18) 21#define ARCH_P4_MAX_CCCR (18)
22 22
23#define ARCH_P4_CNTRVAL_BITS (40)
24#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25
23#define P4_ESCR_EVENT_MASK 0x7e000000U 26#define P4_ESCR_EVENT_MASK 0x7e000000U
24#define P4_ESCR_EVENT_SHIFT 25 27#define P4_ESCR_EVENT_SHIFT 25
25#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U 28#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
@@ -744,14 +747,6 @@ enum P4_ESCR_EMASKS {
744}; 747};
745 748
746/* 749/*
747 * P4 PEBS specifics (Replay Event only)
748 *
749 * Format (bits):
750 * 0-6: metric from P4_PEBS_METRIC enum
751 * 7 : reserved
752 * 8 : reserved
753 * 9-11 : reserved
754 *
755 * Note we have UOP and PEBS bits reserved for now 750 * Note we have UOP and PEBS bits reserved for now
756 * just in case if we will need them once 751 * just in case if we will need them once
757 */ 752 */
@@ -788,5 +783,60 @@ enum P4_PEBS_METRIC {
788 P4_PEBS_METRIC__max 783 P4_PEBS_METRIC__max
789}; 784};
790 785
786/*
787 * Notes on internal configuration of ESCR+CCCR tuples
788 *
789 * Since P4 has quite the different architecture of
790 * performance registers in compare with "architectural"
791 * once and we have on 64 bits to keep configuration
792 * of performance event, the following trick is used.
793 *
794 * 1) Since both ESCR and CCCR registers have only low
795 * 32 bits valuable, we pack them into a single 64 bit
796 * configuration. Low 32 bits of such config correspond
797 * to low 32 bits of CCCR register and high 32 bits
798 * correspond to low 32 bits of ESCR register.
799 *
800 * 2) The meaning of every bit of such config field can
801 * be found in Intel SDM but it should be noted that
802 * we "borrow" some reserved bits for own usage and
803 * clean them or set to a proper value when we do
804 * a real write to hardware registers.
805 *
806 * 3) The format of bits of config is the following
807 * and should be either 0 or set to some predefined
808 * values:
809 *
810 * Low 32 bits
811 * -----------
812 * 0-6: P4_PEBS_METRIC enum
813 * 7-11: reserved
814 * 12: reserved (Enable)
815 * 13-15: reserved (ESCR select)
816 * 16-17: Active Thread
817 * 18: Compare
818 * 19: Complement
819 * 20-23: Threshold
820 * 24: Edge
821 * 25: reserved (FORCE_OVF)
822 * 26: reserved (OVF_PMI_T0)
823 * 27: reserved (OVF_PMI_T1)
824 * 28-29: reserved
825 * 30: reserved (Cascade)
826 * 31: reserved (OVF)
827 *
828 * High 32 bits
829 * ------------
830 * 0: reserved (T1_USR)
831 * 1: reserved (T1_OS)
832 * 2: reserved (T0_USR)
833 * 3: reserved (T0_OS)
834 * 4: Tag Enable
835 * 5-8: Tag Value
836 * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
837 * 25-30: enum P4_EVENTS
838 * 31: reserved (HT thread)
839 */
840
791#endif /* PERF_EVENT_P4_H */ 841#endif /* PERF_EVENT_P4_H */
792 842
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b79bd980461c..521acfc47e7d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -141,10 +141,9 @@ extern __u32 cpu_caps_set[NCAPINTS];
141#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
142DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 142DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
143#define cpu_data(cpu) per_cpu(cpu_info, cpu) 143#define cpu_data(cpu) per_cpu(cpu_info, cpu)
144#define current_cpu_data __get_cpu_var(cpu_info)
145#else 144#else
145#define cpu_info boot_cpu_data
146#define cpu_data(cpu) boot_cpu_data 146#define cpu_data(cpu) boot_cpu_data
147#define current_cpu_data boot_cpu_data
148#endif 147#endif
149 148
150extern const struct seq_operations cpuinfo_op; 149extern const struct seq_operations cpuinfo_op;
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d6763b139a84..db8aa19a08a2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -53,6 +53,12 @@ extern void x86_mrst_early_setup(void);
53static inline void x86_mrst_early_setup(void) { } 53static inline void x86_mrst_early_setup(void) { }
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_INTEL_CE
57extern void x86_ce4100_early_setup(void);
58#else
59static inline void x86_ce4100_early_setup(void) { }
60#endif
61
56#ifndef _SETUP 62#ifndef _SETUP
57 63
58/* 64/*
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 1def60114906..6c22bf353f26 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -48,7 +48,6 @@ static inline void __init smpboot_setup_io_apic(void)
48 setup_IO_APIC(); 48 setup_IO_APIC();
49 else { 49 else {
50 nr_ioapics = 0; 50 nr_ioapics = 0;
51 localise_nmi_watchdog();
52 } 51 }
53#endif 52#endif
54} 53}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 2b16a2ad23dc..52b5c7ed3608 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -7,6 +7,7 @@
7#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
8 8
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <linux/ptrace.h>
10 11
11extern int kstack_depth_to_print; 12extern int kstack_depth_to_print;
12 13
@@ -46,7 +47,7 @@ struct stacktrace_ops {
46}; 47};
47 48
48void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 49void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
49 unsigned long *stack, unsigned long bp, 50 unsigned long *stack,
50 const struct stacktrace_ops *ops, void *data); 51 const struct stacktrace_ops *ops, void *data);
51 52
52#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
@@ -57,13 +58,39 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) 58#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif 59#endif
59 60
61#ifdef CONFIG_FRAME_POINTER
62static inline unsigned long
63stack_frame(struct task_struct *task, struct pt_regs *regs)
64{
65 unsigned long bp;
66
67 if (regs)
68 return regs->bp;
69
70 if (task == current) {
71 /* Grab bp right from our regs */
72 get_bp(bp);
73 return bp;
74 }
75
76 /* bp is the last reg pushed by switch_to */
77 return *(unsigned long *)task->thread.sp;
78}
79#else
80static inline unsigned long
81stack_frame(struct task_struct *task, struct pt_regs *regs)
82{
83 return 0;
84}
85#endif
86
60extern void 87extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 88show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl); 89 unsigned long *stack, char *log_lvl);
63 90
64extern void 91extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 92show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl); 93 unsigned long *sp, char *log_lvl);
67 94
68extern unsigned int code_bytes; 95extern unsigned int code_bytes;
69 96
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 5469630b27f5..fa7b9176b76c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -10,12 +10,6 @@
10unsigned long long native_sched_clock(void); 10unsigned long long native_sched_clock(void);
11extern int recalibrate_cpu_khz(void); 11extern int recalibrate_cpu_khz(void);
12 12
13#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
14extern int timer_ack;
15#else
16# define timer_ack (0)
17#endif
18
19extern int no_timer_check; 13extern int no_timer_check;
20 14
21/* Accelerators for sched_clock() 15/* Accelerators for sched_clock()
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 42d412fd8b02..ce1d54c8a433 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -26,20 +26,22 @@
26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, 26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. 27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
28 * 28 *
29 * We will use 31 sets, one for sending BAU messages from each of the 32 29 * We will use one set for sending BAU messages from each of the
30 * cpu's on the uvhub. 30 * cpu's on the uvhub.
31 * 31 *
32 * TLB shootdown will use the first of the 8 descriptors of each set. 32 * TLB shootdown will use the first of the 8 descriptors of each set.
33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
34 */ 34 */
35 35
36#define MAX_CPUS_PER_UVHUB 64
37#define MAX_CPUS_PER_SOCKET 32
38#define UV_ADP_SIZE 64 /* hardware-provided max. */
39#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */
36#define UV_ITEMS_PER_DESCRIPTOR 8 40#define UV_ITEMS_PER_DESCRIPTOR 8
37/* the 'throttle' to prevent the hardware stay-busy bug */ 41/* the 'throttle' to prevent the hardware stay-busy bug */
38#define MAX_BAU_CONCURRENT 3 42#define MAX_BAU_CONCURRENT 3
39#define UV_CPUS_PER_ACT_STATUS 32
40#define UV_ACT_STATUS_MASK 0x3 43#define UV_ACT_STATUS_MASK 0x3
41#define UV_ACT_STATUS_SIZE 2 44#define UV_ACT_STATUS_SIZE 2
42#define UV_ADP_SIZE 32
43#define UV_DISTRIBUTION_SIZE 256 45#define UV_DISTRIBUTION_SIZE 256
44#define UV_SW_ACK_NPENDING 8 46#define UV_SW_ACK_NPENDING 8
45#define UV_NET_ENDPOINT_INTD 0x38 47#define UV_NET_ENDPOINT_INTD 0x38
@@ -100,7 +102,6 @@
100 * number of destination side software ack resources 102 * number of destination side software ack resources
101 */ 103 */
102#define DEST_NUM_RESOURCES 8 104#define DEST_NUM_RESOURCES 8
103#define MAX_CPUS_PER_NODE 32
104/* 105/*
105 * completion statuses for sending a TLB flush message 106 * completion statuses for sending a TLB flush message
106 */ 107 */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 396ff4cc8ed4..66d0fff1ee84 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -37,4 +37,39 @@
37extern struct shared_info *HYPERVISOR_shared_info; 37extern struct shared_info *HYPERVISOR_shared_info;
38extern struct start_info *xen_start_info; 38extern struct start_info *xen_start_info;
39 39
40#include <asm/processor.h>
41
42static inline uint32_t xen_cpuid_base(void)
43{
44 uint32_t base, eax, ebx, ecx, edx;
45 char signature[13];
46
47 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
48 cpuid(base, &eax, &ebx, &ecx, &edx);
49 *(uint32_t *)(signature + 0) = ebx;
50 *(uint32_t *)(signature + 4) = ecx;
51 *(uint32_t *)(signature + 8) = edx;
52 signature[12] = 0;
53
54 if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
55 return base;
56 }
57
58 return 0;
59}
60
61#ifdef CONFIG_XEN
62extern bool xen_hvm_need_lapic(void);
63
64static inline bool xen_x2apic_para_available(void)
65{
66 return xen_hvm_need_lapic();
67}
68#else
69static inline bool xen_x2apic_para_available(void)
70{
71 return (xen_cpuid_base() != 0);
72}
73#endif
74
40#endif /* _ASM_X86_XEN_HYPERVISOR_H */ 75#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1e994754d323..34244b2cd880 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
85obj-$(CONFIG_KGDB) += kgdb.o 85obj-$(CONFIG_KGDB) += kgdb.o
86obj-$(CONFIG_VM86) += vm86_32.o 86obj-$(CONFIG_VM86) += vm86_32.o
87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 87obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
88obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
89 88
90obj-$(CONFIG_HPET_TIMER) += hpet.o 89obj-$(CONFIG_HPET_TIMER) += hpet.o
91obj-$(CONFIG_APB_TIMER) += apb_timer.o 90obj-$(CONFIG_APB_TIMER) += apb_timer.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b941b6c..ec881c6bfee0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
198{ 198{
199 unsigned int ver = 0; 199 unsigned int ver = 0;
200 200
201 if (id >= (MAX_LOCAL_APIC-1)) {
202 printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
203 return;
204 }
205
201 if (!enabled) { 206 if (!enabled) {
202 ++disabled_cpus; 207 ++disabled_cpus;
203 return; 208 return;
@@ -847,18 +852,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
847 * returns 0 on success, < 0 on error 852 * returns 0 on success, < 0 on error
848 */ 853 */
849 854
850static void __init acpi_register_lapic_address(unsigned long address)
851{
852 mp_lapic_addr = address;
853
854 set_fixmap_nocache(FIX_APIC_BASE, address);
855 if (boot_cpu_physical_apicid == -1U) {
856 boot_cpu_physical_apicid = read_apic_id();
857 apic_version[boot_cpu_physical_apicid] =
858 GET_APIC_VERSION(apic_read(APIC_LVR));
859 }
860}
861
862static int __init early_acpi_parse_madt_lapic_addr_ovr(void) 855static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
863{ 856{
864 int count; 857 int count;
@@ -880,7 +873,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
880 return count; 873 return count;
881 } 874 }
882 875
883 acpi_register_lapic_address(acpi_lapic_addr); 876 register_lapic_address(acpi_lapic_addr);
884 877
885 return count; 878 return count;
886} 879}
@@ -907,16 +900,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
907 return count; 900 return count;
908 } 901 }
909 902
910 acpi_register_lapic_address(acpi_lapic_addr); 903 register_lapic_address(acpi_lapic_addr);
911 904
912 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, 905 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
913 acpi_parse_sapic, MAX_APICS); 906 acpi_parse_sapic, MAX_LOCAL_APIC);
914 907
915 if (!count) { 908 if (!count) {
916 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, 909 x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
917 acpi_parse_x2apic, MAX_APICS); 910 acpi_parse_x2apic, MAX_LOCAL_APIC);
918 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, 911 count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
919 acpi_parse_lapic, MAX_APICS); 912 acpi_parse_lapic, MAX_LOCAL_APIC);
920 } 913 }
921 if (!count && !x2count) { 914 if (!count && !x2count) {
922 printk(KERN_ERR PREFIX "No LAPIC entries present\n"); 915 printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -949,32 +942,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
949extern int es7000_plat; 942extern int es7000_plat;
950#endif 943#endif
951 944
952static void assign_to_mp_irq(struct mpc_intsrc *m,
953 struct mpc_intsrc *mp_irq)
954{
955 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
956}
957
958static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
959 struct mpc_intsrc *m)
960{
961 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
962}
963
964static void save_mp_irq(struct mpc_intsrc *m)
965{
966 int i;
967
968 for (i = 0; i < mp_irq_entries; i++) {
969 if (!mp_irq_cmp(&mp_irqs[i], m))
970 return;
971 }
972
973 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
974 if (++mp_irq_entries == MAX_IRQ_SOURCES)
975 panic("Max # of irq sources exceeded!!\n");
976}
977
978void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) 945void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
979{ 946{
980 int ioapic; 947 int ioapic;
@@ -1005,7 +972,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1005 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */ 972 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1006 mp_irq.dstirq = pin; /* INTIN# */ 973 mp_irq.dstirq = pin; /* INTIN# */
1007 974
1008 save_mp_irq(&mp_irq); 975 mp_save_irq(&mp_irq);
1009 976
1010 isa_irq_to_gsi[bus_irq] = gsi; 977 isa_irq_to_gsi[bus_irq] = gsi;
1011} 978}
@@ -1080,7 +1047,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1080 mp_irq.srcbusirq = i; /* Identity mapped */ 1047 mp_irq.srcbusirq = i; /* Identity mapped */
1081 mp_irq.dstirq = pin; 1048 mp_irq.dstirq = pin;
1082 1049
1083 save_mp_irq(&mp_irq); 1050 mp_save_irq(&mp_irq);
1084 } 1051 }
1085} 1052}
1086 1053
@@ -1117,7 +1084,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
1117 mp_irq.dstapic = mp_ioapics[ioapic].apicid; 1084 mp_irq.dstapic = mp_ioapics[ioapic].apicid;
1118 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); 1085 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1119 1086
1120 save_mp_irq(&mp_irq); 1087 mp_save_irq(&mp_irq);
1121#endif 1088#endif
1122 return 0; 1089 return 0;
1123} 1090}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24c955a..123608531c8f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
353 mutex_unlock(&smp_alt); 353 mutex_unlock(&smp_alt);
354} 354}
355 355
356bool skip_smp_alternatives;
356void alternatives_smp_switch(int smp) 357void alternatives_smp_switch(int smp)
357{ 358{
358 struct smp_alt_module *mod; 359 struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
368 printk("lockdep: fixing up alternatives.\n"); 369 printk("lockdep: fixing up alternatives.\n");
369#endif 370#endif
370 371
371 if (noreplace_smp || smp_alt_once) 372 if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
372 return; 373 return;
373 BUG_ON(!smp && (num_online_cpus() > 1)); 374 BUG_ON(!smp && (num_online_cpus() > 1));
374 375
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first;
591static int wrote_text; 592static int wrote_text;
592 593
593struct text_poke_params { 594struct text_poke_params {
594 void *addr; 595 struct text_poke_param *params;
595 const void *opcode; 596 int nparams;
596 size_t len;
597}; 597};
598 598
599static int __kprobes stop_machine_text_poke(void *data) 599static int __kprobes stop_machine_text_poke(void *data)
600{ 600{
601 struct text_poke_params *tpp = data; 601 struct text_poke_params *tpp = data;
602 struct text_poke_param *p;
603 int i;
602 604
603 if (atomic_dec_and_test(&stop_machine_first)) { 605 if (atomic_dec_and_test(&stop_machine_first)) {
604 text_poke(tpp->addr, tpp->opcode, tpp->len); 606 for (i = 0; i < tpp->nparams; i++) {
607 p = &tpp->params[i];
608 text_poke(p->addr, p->opcode, p->len);
609 }
605 smp_wmb(); /* Make sure other cpus see that this has run */ 610 smp_wmb(); /* Make sure other cpus see that this has run */
606 wrote_text = 1; 611 wrote_text = 1;
607 } else { 612 } else {
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
610 smp_mb(); /* Load wrote_text before following execution */ 615 smp_mb(); /* Load wrote_text before following execution */
611 } 616 }
612 617
613 flush_icache_range((unsigned long)tpp->addr, 618 for (i = 0; i < tpp->nparams; i++) {
614 (unsigned long)tpp->addr + tpp->len); 619 p = &tpp->params[i];
620 flush_icache_range((unsigned long)p->addr,
621 (unsigned long)p->addr + p->len);
622 }
623
615 return 0; 624 return 0;
616} 625}
617 626
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
631void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) 640void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
632{ 641{
633 struct text_poke_params tpp; 642 struct text_poke_params tpp;
643 struct text_poke_param p;
634 644
635 tpp.addr = addr; 645 p.addr = addr;
636 tpp.opcode = opcode; 646 p.opcode = opcode;
637 tpp.len = len; 647 p.len = len;
648 tpp.params = &p;
649 tpp.nparams = 1;
638 atomic_set(&stop_machine_first, 1); 650 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 651 wrote_text = 0;
640 /* Use __stop_machine() because the caller already got online_cpus. */ 652 /* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
642 return addr; 654 return addr;
643} 655}
644 656
657/**
658 * text_poke_smp_batch - Update instructions on a live kernel on SMP
659 * @params: an array of text_poke parameters
660 * @n: the number of elements in params.
661 *
662 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
663 * stop_machine() is heavy task, it is better to aggregate text_poke requests
664 * and do it once if possible.
665 *
666 * Note: Must be called under get_online_cpus() and text_mutex.
667 */
668void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
669{
670 struct text_poke_params tpp = {.params = params, .nparams = n};
671
672 atomic_set(&stop_machine_first, 1);
673 wrote_text = 0;
674 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
675}
676
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) 677#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646 678
647#ifdef CONFIG_X86_64 679#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d8ed0d..0a99f7198bc3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,123 @@
12 12
13static u32 *flush_words; 13static u32 *flush_words;
14 14
15struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id amd_nb_misc_ids[] = {
16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, 18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
19 {} 19 {}
20}; 20};
21EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(amd_nb_misc_ids);
22 22
23struct k8_northbridge_info k8_northbridges; 23const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
24EXPORT_SYMBOL(k8_northbridges); 24 { 0x00, 0x18, 0x20 },
25 { 0xff, 0x00, 0x20 },
26 { 0xfe, 0x00, 0x20 },
27 { }
28};
29
30struct amd_northbridge_info amd_northbridges;
31EXPORT_SYMBOL(amd_northbridges);
25 32
26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 33static struct pci_dev *next_northbridge(struct pci_dev *dev,
34 struct pci_device_id *ids)
27{ 35{
28 do { 36 do {
29 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); 37 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
30 if (!dev) 38 if (!dev)
31 break; 39 break;
32 } while (!pci_match_id(&k8_nb_ids[0], dev)); 40 } while (!pci_match_id(ids, dev));
33 return dev; 41 return dev;
34} 42}
35 43
36int cache_k8_northbridges(void) 44int amd_cache_northbridges(void)
37{ 45{
38 int i; 46 int i = 0;
39 struct pci_dev *dev; 47 struct amd_northbridge *nb;
48 struct pci_dev *misc;
40 49
41 if (k8_northbridges.num) 50 if (amd_nb_num())
42 return 0; 51 return 0;
43 52
44 dev = NULL; 53 misc = NULL;
45 while ((dev = next_k8_northbridge(dev)) != NULL) 54 while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
46 k8_northbridges.num++; 55 i++;
47 56
48 /* some CPU families (e.g. family 0x11) do not support GART */ 57 if (i == 0)
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 58 return 0;
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
52 59
53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * 60 nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
54 sizeof(void *), GFP_KERNEL); 61 if (!nb)
55 if (!k8_northbridges.nb_misc)
56 return -ENOMEM; 62 return -ENOMEM;
57 63
58 if (!k8_northbridges.num) { 64 amd_northbridges.nb = nb;
59 k8_northbridges.nb_misc[0] = NULL; 65 amd_northbridges.num = i;
60 return 0;
61 }
62 66
63 if (k8_northbridges.gart_supported) { 67 misc = NULL;
64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32), 68 for (i = 0; i != amd_nb_num(); i++) {
65 GFP_KERNEL); 69 node_to_amd_nb(i)->misc = misc =
66 if (!flush_words) { 70 next_northbridge(misc, amd_nb_misc_ids);
67 kfree(k8_northbridges.nb_misc); 71 }
68 return -ENOMEM; 72
69 } 73 /* some CPU families (e.g. family 0x11) do not support GART */
70 } 74 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
75 boot_cpu_data.x86 == 0x15)
76 amd_northbridges.flags |= AMD_NB_GART;
77
78 /*
79 * Some CPU families support L3 Cache Index Disable. There are some
80 * limitations because of E382 and E388 on family 0x10.
81 */
82 if (boot_cpu_data.x86 == 0x10 &&
83 boot_cpu_data.x86_model >= 0x8 &&
84 (boot_cpu_data.x86_model > 0x9 ||
85 boot_cpu_data.x86_mask >= 0x1))
86 amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
71 87
72 dev = NULL;
73 i = 0;
74 while ((dev = next_k8_northbridge(dev)) != NULL) {
75 k8_northbridges.nb_misc[i] = dev;
76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
78 }
79 k8_northbridges.nb_misc[i] = NULL;
80 return 0; 88 return 0;
81} 89}
82EXPORT_SYMBOL_GPL(cache_k8_northbridges); 90EXPORT_SYMBOL_GPL(amd_cache_northbridges);
83 91
84/* Ignores subdevice/subvendor but as far as I can figure out 92/* Ignores subdevice/subvendor but as far as I can figure out
85 they're useless anyways */ 93 they're useless anyways */
86int __init early_is_k8_nb(u32 device) 94int __init early_is_amd_nb(u32 device)
87{ 95{
88 struct pci_device_id *id; 96 struct pci_device_id *id;
89 u32 vendor = device & 0xffff; 97 u32 vendor = device & 0xffff;
90 device >>= 16; 98 device >>= 16;
91 for (id = k8_nb_ids; id->vendor; id++) 99 for (id = amd_nb_misc_ids; id->vendor; id++)
92 if (vendor == id->vendor && device == id->device) 100 if (vendor == id->vendor && device == id->device)
93 return 1; 101 return 1;
94 return 0; 102 return 0;
95} 103}
96 104
97void k8_flush_garts(void) 105int amd_cache_gart(void)
106{
107 int i;
108
109 if (!amd_nb_has_feature(AMD_NB_GART))
110 return 0;
111
112 flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
113 if (!flush_words) {
114 amd_northbridges.flags &= ~AMD_NB_GART;
115 return -ENOMEM;
116 }
117
118 for (i = 0; i != amd_nb_num(); i++)
119 pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
120 &flush_words[i]);
121
122 return 0;
123}
124
125void amd_flush_garts(void)
98{ 126{
99 int flushed, i; 127 int flushed, i;
100 unsigned long flags; 128 unsigned long flags;
101 static DEFINE_SPINLOCK(gart_lock); 129 static DEFINE_SPINLOCK(gart_lock);
102 130
103 if (!k8_northbridges.gart_supported) 131 if (!amd_nb_has_feature(AMD_NB_GART))
104 return; 132 return;
105 133
106 /* Avoid races between AGP and IOMMU. In theory it's not needed 134 /* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +137,16 @@ void k8_flush_garts(void)
109 that it doesn't matter to serialize more. -AK */ 137 that it doesn't matter to serialize more. -AK */
110 spin_lock_irqsave(&gart_lock, flags); 138 spin_lock_irqsave(&gart_lock, flags);
111 flushed = 0; 139 flushed = 0;
112 for (i = 0; i < k8_northbridges.num; i++) { 140 for (i = 0; i < amd_nb_num(); i++) {
113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, 141 pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
114 flush_words[i]|1); 142 flush_words[i] | 1);
115 flushed++; 143 flushed++;
116 } 144 }
117 for (i = 0; i < k8_northbridges.num; i++) { 145 for (i = 0; i < amd_nb_num(); i++) {
118 u32 w; 146 u32 w;
119 /* Make sure the hardware actually executed the flush*/ 147 /* Make sure the hardware actually executed the flush*/
120 for (;;) { 148 for (;;) {
121 pci_read_config_dword(k8_northbridges.nb_misc[i], 149 pci_read_config_dword(node_to_amd_nb(i)->misc,
122 0x9c, &w); 150 0x9c, &w);
123 if (!(w & 1)) 151 if (!(w & 1))
124 break; 152 break;
@@ -129,19 +157,23 @@ void k8_flush_garts(void)
129 if (!flushed) 157 if (!flushed)
130 printk("nothing to flush?\n"); 158 printk("nothing to flush?\n");
131} 159}
132EXPORT_SYMBOL_GPL(k8_flush_garts); 160EXPORT_SYMBOL_GPL(amd_flush_garts);
133 161
134static __init int init_k8_nbs(void) 162static __init int init_amd_nbs(void)
135{ 163{
136 int err = 0; 164 int err = 0;
137 165
138 err = cache_k8_northbridges(); 166 err = amd_cache_northbridges();
139 167
140 if (err < 0) 168 if (err < 0)
141 printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); 169 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
170
171 if (amd_cache_gart() < 0)
172 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
173 "GART support disabled.\n");
142 174
143 return err; 175 return err;
144} 176}
145 177
146/* This has to go after the PCI subsystem */ 178/* This has to go after the PCI subsystem */
147fs_initcall(init_k8_nbs); 179fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 92543c73cf8e..7c9ab59653e8 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
315 315
316 if (system_state == SYSTEM_BOOTING) { 316 if (system_state == SYSTEM_BOOTING) {
317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 317 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
318 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
318 /* APB timer irqs are set up as mp_irqs, timer is edge type */ 319 /* APB timer irqs are set up as mp_irqs, timer is edge type */
319 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge"); 320 __set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
320 if (request_irq(adev->irq, apbt_interrupt_handler, 321 if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8f0703..5955a7800a96 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -39,18 +39,6 @@ int fallback_aper_force __initdata;
39 39
40int fix_aperture __initdata = 1; 40int fix_aperture __initdata = 1;
41 41
42struct bus_dev_range {
43 int bus;
44 int dev_base;
45 int dev_limit;
46};
47
48static struct bus_dev_range bus_dev_ranges[] __initdata = {
49 { 0x00, 0x18, 0x20},
50 { 0xff, 0x00, 0x20},
51 { 0xfe, 0x00, 0x20}
52};
53
54static struct resource gart_resource = { 42static struct resource gart_resource = {
55 .name = "GART", 43 .name = "GART",
56 .flags = IORESOURCE_MEM, 44 .flags = IORESOURCE_MEM,
@@ -206,7 +194,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
206 * Do an PCI bus scan by hand because we're running before the PCI 194 * Do an PCI bus scan by hand because we're running before the PCI
207 * subsystem. 195 * subsystem.
208 * 196 *
209 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan 197 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
210 * generically. It's probably overkill to always scan all slots because 198 * generically. It's probably overkill to always scan all slots because
211 * the AGP bridges should be always an own bus on the HT hierarchy, 199 * the AGP bridges should be always an own bus on the HT hierarchy,
212 * but do it here for future safety. 200 * but do it here for future safety.
@@ -294,16 +282,16 @@ void __init early_gart_iommu_check(void)
294 search_agp_bridge(&agp_aper_order, &valid_agp); 282 search_agp_bridge(&agp_aper_order, &valid_agp);
295 283
296 fix = 0; 284 fix = 0;
297 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 285 for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) {
298 int bus; 286 int bus;
299 int dev_base, dev_limit; 287 int dev_base, dev_limit;
300 288
301 bus = bus_dev_ranges[i].bus; 289 bus = amd_nb_bus_dev_ranges[i].bus;
302 dev_base = bus_dev_ranges[i].dev_base; 290 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
303 dev_limit = bus_dev_ranges[i].dev_limit; 291 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
304 292
305 for (slot = dev_base; slot < dev_limit; slot++) { 293 for (slot = dev_base; slot < dev_limit; slot++) {
306 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 294 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
307 continue; 295 continue;
308 296
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 297 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -349,16 +337,16 @@ void __init early_gart_iommu_check(void)
349 return; 337 return;
350 338
351 /* disable them all at first */ 339 /* disable them all at first */
352 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 340 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
353 int bus; 341 int bus;
354 int dev_base, dev_limit; 342 int dev_base, dev_limit;
355 343
356 bus = bus_dev_ranges[i].bus; 344 bus = amd_nb_bus_dev_ranges[i].bus;
357 dev_base = bus_dev_ranges[i].dev_base; 345 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
358 dev_limit = bus_dev_ranges[i].dev_limit; 346 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
359 347
360 for (slot = dev_base; slot < dev_limit; slot++) { 348 for (slot = dev_base; slot < dev_limit; slot++) {
361 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 349 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
362 continue; 350 continue;
363 351
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 352 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -390,17 +378,17 @@ int __init gart_iommu_hole_init(void)
390 378
391 fix = 0; 379 fix = 0;
392 node = 0; 380 node = 0;
393 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 381 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
394 int bus; 382 int bus;
395 int dev_base, dev_limit; 383 int dev_base, dev_limit;
396 u32 ctl; 384 u32 ctl;
397 385
398 bus = bus_dev_ranges[i].bus; 386 bus = amd_nb_bus_dev_ranges[i].bus;
399 dev_base = bus_dev_ranges[i].dev_base; 387 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
400 dev_limit = bus_dev_ranges[i].dev_limit; 388 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
401 389
402 for (slot = dev_base; slot < dev_limit; slot++) { 390 for (slot = dev_base; slot < dev_limit; slot++) {
403 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 391 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
404 continue; 392 continue;
405 393
406 iommu_detected = 1; 394 iommu_detected = 1;
@@ -505,7 +493,7 @@ out:
505 } 493 }
506 494
507 /* Fix up the north bridges */ 495 /* Fix up the north bridges */
508 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 496 for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
509 int bus, dev_base, dev_limit; 497 int bus, dev_base, dev_limit;
510 498
511 /* 499 /*
@@ -514,11 +502,11 @@ out:
514 */ 502 */
515 u32 ctl = DISTLBWALKPRB | aper_order << 1; 503 u32 ctl = DISTLBWALKPRB | aper_order << 1;
516 504
517 bus = bus_dev_ranges[i].bus; 505 bus = amd_nb_bus_dev_ranges[i].bus;
518 dev_base = bus_dev_ranges[i].dev_base; 506 dev_base = amd_nb_bus_dev_ranges[i].dev_base;
519 dev_limit = bus_dev_ranges[i].dev_limit; 507 dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
520 for (slot = dev_base; slot < dev_limit; slot++) { 508 for (slot = dev_base; slot < dev_limit; slot++) {
521 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 509 if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
522 continue; 510 continue;
523 511
524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 512 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..3966b564ea47 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 78218135b48e..06c196d7e59c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -50,8 +49,8 @@
50#include <asm/mtrr.h> 49#include <asm/mtrr.h>
51#include <asm/smp.h> 50#include <asm/smp.h>
52#include <asm/mce.h> 51#include <asm/mce.h>
53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 52#include <asm/tsc.h>
53#include <asm/hypervisor.h>
55 54
56unsigned int num_processors; 55unsigned int num_processors;
57 56
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
432 reserved = reserve_eilvt_offset(offset, new); 431 reserved = reserve_eilvt_offset(offset, new);
433 432
434 if (reserved != new) { 433 if (reserved != new) {
435 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but " 434 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
436 "vector 0x%x was already reserved by another core, " 435 "vector 0x%x, but the register is already in use for "
437 "APIC%lX=0x%x\n", 436 "vector 0x%x on another cpu\n",
438 smp_processor_id(), new, reserved, reg, old); 437 smp_processor_id(), reg, offset, new, reserved);
439 return -EINVAL; 438 return -EINVAL;
440 } 439 }
441 440
442 if (!eilvt_entry_is_changeable(old, new)) { 441 if (!eilvt_entry_is_changeable(old, new)) {
443 pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but " 442 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
444 "register already in use, APIC%lX=0x%x\n", 443 "vector 0x%x, but the register is already in use for "
445 smp_processor_id(), new, reg, old); 444 "vector 0x%x on this cpu\n",
445 smp_processor_id(), reg, offset, new, old);
446 return -EBUSY; 446 return -EBUSY;
447 } 447 }
448 448
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void)
516{ 516{
517 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 517 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
518 518
519 if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) { 519 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
520 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 520 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
521 /* Make LAPIC timer preferrable over percpu HPET */ 521 /* Make LAPIC timer preferrable over percpu HPET */
522 lapic_clockevent.rating = 150; 522 lapic_clockevent.rating = 150;
@@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void)
684 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 684 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
685 lapic_clockevent.shift); 685 lapic_clockevent.shift);
686 lapic_clockevent.max_delta_ns = 686 lapic_clockevent.max_delta_ns =
687 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 687 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
688 lapic_clockevent.min_delta_ns = 688 lapic_clockevent.min_delta_ns =
689 clockevent_delta2ns(0xF, &lapic_clockevent); 689 clockevent_delta2ns(0xF, &lapic_clockevent);
690 690
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
799 * PIT/HPET going. Otherwise register lapic as a dummy 799 * PIT/HPET going. Otherwise register lapic as a dummy
800 * device. 800 * device.
801 */ 801 */
802 if (nmi_watchdog != NMI_IO_APIC) 802 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
803 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
804 else
805 pr_warning("APIC timer registered as dummy,"
806 " due to nmi_watchdog=%d!\n", nmi_watchdog);
807 803
808 /* Setup the lapic or request the broadcast */ 804 /* Setup the lapic or request the broadcast */
809 setup_APIC_timer(); 805 setup_APIC_timer();
@@ -1195,12 +1191,15 @@ static void __cpuinit lapic_setup_esr(void)
1195 oldvalue, value); 1191 oldvalue, value);
1196} 1192}
1197 1193
1198
1199/** 1194/**
1200 * setup_local_APIC - setup the local APIC 1195 * setup_local_APIC - setup the local APIC
1196 *
1197 * Used to setup local APIC while initializing BSP or bringin up APs.
1198 * Always called with preemption disabled.
1201 */ 1199 */
1202void __cpuinit setup_local_APIC(void) 1200void __cpuinit setup_local_APIC(void)
1203{ 1201{
1202 int cpu = smp_processor_id();
1204 unsigned int value, queued; 1203 unsigned int value, queued;
1205 int i, j, acked = 0; 1204 int i, j, acked = 0;
1206 unsigned long long tsc = 0, ntsc; 1205 unsigned long long tsc = 0, ntsc;
@@ -1225,8 +1224,6 @@ void __cpuinit setup_local_APIC(void)
1225#endif 1224#endif
1226 perf_events_lapic_init(); 1225 perf_events_lapic_init();
1227 1226
1228 preempt_disable();
1229
1230 /* 1227 /*
1231 * Double-check whether this APIC is really registered. 1228 * Double-check whether this APIC is really registered.
1232 * This is meaningless in clustered apic mode, so we skip it. 1229 * This is meaningless in clustered apic mode, so we skip it.
@@ -1342,21 +1339,19 @@ void __cpuinit setup_local_APIC(void)
1342 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1339 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1343 */ 1340 */
1344 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1341 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1345 if (!smp_processor_id() && (pic_mode || !value)) { 1342 if (!cpu && (pic_mode || !value)) {
1346 value = APIC_DM_EXTINT; 1343 value = APIC_DM_EXTINT;
1347 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", 1344 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1348 smp_processor_id());
1349 } else { 1345 } else {
1350 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1346 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1351 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", 1347 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1352 smp_processor_id());
1353 } 1348 }
1354 apic_write(APIC_LVT0, value); 1349 apic_write(APIC_LVT0, value);
1355 1350
1356 /* 1351 /*
1357 * only the BP should see the LINT1 NMI signal, obviously. 1352 * only the BP should see the LINT1 NMI signal, obviously.
1358 */ 1353 */
1359 if (!smp_processor_id()) 1354 if (!cpu)
1360 value = APIC_DM_NMI; 1355 value = APIC_DM_NMI;
1361 else 1356 else
1362 value = APIC_DM_NMI | APIC_LVT_MASKED; 1357 value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -1364,11 +1359,9 @@ void __cpuinit setup_local_APIC(void)
1364 value |= APIC_LVT_LEVEL_TRIGGER; 1359 value |= APIC_LVT_LEVEL_TRIGGER;
1365 apic_write(APIC_LVT1, value); 1360 apic_write(APIC_LVT1, value);
1366 1361
1367 preempt_enable();
1368
1369#ifdef CONFIG_X86_MCE_INTEL 1362#ifdef CONFIG_X86_MCE_INTEL
1370 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1363 /* Recheck CMCI information after local APIC is up on CPU #0 */
1371 if (smp_processor_id() == 0) 1364 if (!cpu)
1372 cmci_recheck(); 1365 cmci_recheck();
1373#endif 1366#endif
1374} 1367}
@@ -1387,7 +1380,6 @@ void __cpuinit end_local_APIC_setup(void)
1387 } 1380 }
1388#endif 1381#endif
1389 1382
1390 setup_apic_nmi_watchdog(NULL);
1391 apic_pm_activate(); 1383 apic_pm_activate();
1392 1384
1393 /* 1385 /*
@@ -1484,7 +1476,8 @@ void __init enable_IR_x2apic(void)
1484 /* IR is required if there is APIC ID > 255 even when running 1476 /* IR is required if there is APIC ID > 255 even when running
1485 * under KVM 1477 * under KVM
1486 */ 1478 */
1487 if (max_physical_apicid > 255 || !kvm_para_available()) 1479 if (max_physical_apicid > 255 ||
1480 !hypervisor_x2apic_available())
1488 goto nox2apic; 1481 goto nox2apic;
1489 /* 1482 /*
1490 * without IR all CPUs can be addressed by IOAPIC/MSI 1483 * without IR all CPUs can be addressed by IOAPIC/MSI
@@ -1538,13 +1531,60 @@ static int __init detect_init_APIC(void)
1538 return 0; 1531 return 0;
1539} 1532}
1540#else 1533#else
1534
1535static int apic_verify(void)
1536{
1537 u32 features, h, l;
1538
1539 /*
1540 * The APIC feature bit should now be enabled
1541 * in `cpuid'
1542 */
1543 features = cpuid_edx(1);
1544 if (!(features & (1 << X86_FEATURE_APIC))) {
1545 pr_warning("Could not enable APIC!\n");
1546 return -1;
1547 }
1548 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1549 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1550
1551 /* The BIOS may have set up the APIC at some other address */
1552 rdmsr(MSR_IA32_APICBASE, l, h);
1553 if (l & MSR_IA32_APICBASE_ENABLE)
1554 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1555
1556 pr_info("Found and enabled local APIC!\n");
1557 return 0;
1558}
1559
1560int apic_force_enable(void)
1561{
1562 u32 h, l;
1563
1564 if (disable_apic)
1565 return -1;
1566
1567 /*
1568 * Some BIOSes disable the local APIC in the APIC_BASE
1569 * MSR. This can only be done in software for Intel P6 or later
1570 * and AMD K7 (Model > 1) or later.
1571 */
1572 rdmsr(MSR_IA32_APICBASE, l, h);
1573 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1574 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1575 l &= ~MSR_IA32_APICBASE_BASE;
1576 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1577 wrmsr(MSR_IA32_APICBASE, l, h);
1578 enabled_via_apicbase = 1;
1579 }
1580 return apic_verify();
1581}
1582
1541/* 1583/*
1542 * Detect and initialize APIC 1584 * Detect and initialize APIC
1543 */ 1585 */
1544static int __init detect_init_APIC(void) 1586static int __init detect_init_APIC(void)
1545{ 1587{
1546 u32 h, l, features;
1547
1548 /* Disabled by kernel option? */ 1588 /* Disabled by kernel option? */
1549 if (disable_apic) 1589 if (disable_apic)
1550 return -1; 1590 return -1;
@@ -1574,38 +1614,12 @@ static int __init detect_init_APIC(void)
1574 "you can enable it with \"lapic\"\n"); 1614 "you can enable it with \"lapic\"\n");
1575 return -1; 1615 return -1;
1576 } 1616 }
1577 /* 1617 if (apic_force_enable())
1578 * Some BIOSes disable the local APIC in the APIC_BASE 1618 return -1;
1579 * MSR. This can only be done in software for Intel P6 or later 1619 } else {
1580 * and AMD K7 (Model > 1) or later. 1620 if (apic_verify())
1581 */ 1621 return -1;
1582 rdmsr(MSR_IA32_APICBASE, l, h);
1583 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1584 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1585 l &= ~MSR_IA32_APICBASE_BASE;
1586 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1587 wrmsr(MSR_IA32_APICBASE, l, h);
1588 enabled_via_apicbase = 1;
1589 }
1590 }
1591 /*
1592 * The APIC feature bit should now be enabled
1593 * in `cpuid'
1594 */
1595 features = cpuid_edx(1);
1596 if (!(features & (1 << X86_FEATURE_APIC))) {
1597 pr_warning("Could not enable APIC!\n");
1598 return -1;
1599 } 1622 }
1600 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1601 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1602
1603 /* The BIOS may have set up the APIC at some other address */
1604 rdmsr(MSR_IA32_APICBASE, l, h);
1605 if (l & MSR_IA32_APICBASE_ENABLE)
1606 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1607
1608 pr_info("Found and enabled local APIC!\n");
1609 1623
1610 apic_pm_activate(); 1624 apic_pm_activate();
1611 1625
@@ -1617,28 +1631,6 @@ no_apic:
1617} 1631}
1618#endif 1632#endif
1619 1633
1620#ifdef CONFIG_X86_64
1621void __init early_init_lapic_mapping(void)
1622{
1623 /*
1624 * If no local APIC can be found then go out
1625 * : it means there is no mpatable and MADT
1626 */
1627 if (!smp_found_config)
1628 return;
1629
1630 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
1631 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1632 APIC_BASE, mp_lapic_addr);
1633
1634 /*
1635 * Fetch the APIC ID of the BSP in case we have a
1636 * default configuration (or the MP table is broken).
1637 */
1638 boot_cpu_physical_apicid = read_apic_id();
1639}
1640#endif
1641
1642/** 1634/**
1643 * init_apic_mappings - initialize APIC mappings 1635 * init_apic_mappings - initialize APIC mappings
1644 */ 1636 */
@@ -1664,10 +1656,7 @@ void __init init_apic_mappings(void)
1664 * acpi_register_lapic_address() 1656 * acpi_register_lapic_address()
1665 */ 1657 */
1666 if (!acpi_lapic && !smp_found_config) 1658 if (!acpi_lapic && !smp_found_config)
1667 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1659 register_lapic_address(apic_phys);
1668
1669 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1670 APIC_BASE, apic_phys);
1671 } 1660 }
1672 1661
1673 /* 1662 /*
@@ -1689,11 +1678,27 @@ void __init init_apic_mappings(void)
1689 } 1678 }
1690} 1679}
1691 1680
1681void __init register_lapic_address(unsigned long address)
1682{
1683 mp_lapic_addr = address;
1684
1685 if (!x2apic_mode) {
1686 set_fixmap_nocache(FIX_APIC_BASE, address);
1687 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1688 APIC_BASE, mp_lapic_addr);
1689 }
1690 if (boot_cpu_physical_apicid == -1U) {
1691 boot_cpu_physical_apicid = read_apic_id();
1692 apic_version[boot_cpu_physical_apicid] =
1693 GET_APIC_VERSION(apic_read(APIC_LVR));
1694 }
1695}
1696
1692/* 1697/*
1693 * This initializes the IO-APIC and APIC hardware if this is 1698 * This initializes the IO-APIC and APIC hardware if this is
1694 * a UP kernel. 1699 * a UP kernel.
1695 */ 1700 */
1696int apic_version[MAX_APICS]; 1701int apic_version[MAX_LOCAL_APIC];
1697 1702
1698int __init APIC_init_uniprocessor(void) 1703int __init APIC_init_uniprocessor(void)
1699{ 1704{
@@ -1758,17 +1763,10 @@ int __init APIC_init_uniprocessor(void)
1758 setup_IO_APIC(); 1763 setup_IO_APIC();
1759 else { 1764 else {
1760 nr_ioapics = 0; 1765 nr_ioapics = 0;
1761 localise_nmi_watchdog();
1762 } 1766 }
1763#else
1764 localise_nmi_watchdog();
1765#endif 1767#endif
1766 1768
1767 x86_init.timers.setup_percpu_clockev(); 1769 x86_init.timers.setup_percpu_clockev();
1768#ifdef CONFIG_X86_64
1769 check_nmi_watchdog();
1770#endif
1771
1772 return 0; 1770 return 0;
1773} 1771}
1774 1772
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e55b90..79fd43ca6f96 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,20 +17,31 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20#ifdef CONFIG_HARDLOCKUP_DETECTOR
20u64 hw_nmi_get_sample_period(void) 21u64 hw_nmi_get_sample_period(void)
21{ 22{
22 return (u64)(cpu_khz) * 1000 * 60; 23 return (u64)(cpu_khz) * 1000 * 60;
23} 24}
25#endif
24 26
25#ifdef ARCH_HAS_NMI_WATCHDOG 27#ifdef arch_trigger_all_cpu_backtrace
26
27/* For reliability, we're prepared to waste bits here. */ 28/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 29static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29 30
31/* "in progress" flag of arch_trigger_all_cpu_backtrace */
32static unsigned long backtrace_flag;
33
30void arch_trigger_all_cpu_backtrace(void) 34void arch_trigger_all_cpu_backtrace(void)
31{ 35{
32 int i; 36 int i;
33 37
38 if (test_and_set_bit(0, &backtrace_flag))
39 /*
40 * If there is already a trigger_all_cpu_backtrace() in progress
41 * (backtrace_flag == 1), don't output double cpu dump infos.
42 */
43 return;
44
34 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 45 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
35 46
36 printk(KERN_INFO "sending NMI to all CPUs:\n"); 47 printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
42 break; 53 break;
43 mdelay(1); 54 mdelay(1);
44 } 55 }
56
57 clear_bit(0, &backtrace_flag);
58 smp_mb__after_clear_bit();
45} 59}
46 60
47static int __kprobes 61static int __kprobes
@@ -50,11 +64,10 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
50{ 64{
51 struct die_args *args = __args; 65 struct die_args *args = __args;
52 struct pt_regs *regs; 66 struct pt_regs *regs;
53 int cpu = smp_processor_id(); 67 int cpu;
54 68
55 switch (cmd) { 69 switch (cmd) {
56 case DIE_NMI: 70 case DIE_NMI:
57 case DIE_NMI_IPI:
58 break; 71 break;
59 72
60 default: 73 default:
@@ -62,6 +75,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
62 } 75 }
63 76
64 regs = args->regs; 77 regs = args->regs;
78 cpu = smp_processor_id();
65 79
66 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 80 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
67 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 81 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -81,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
81static __read_mostly struct notifier_block backtrace_notifier = { 95static __read_mostly struct notifier_block backtrace_notifier = {
82 .notifier_call = arch_trigger_all_cpu_backtrace_handler, 96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
83 .next = NULL, 97 .next = NULL,
84 .priority = 1 98 .priority = NMI_LOCAL_LOW_PRIOR,
85}; 99};
86 100
87static int __init register_trigger_all_cpu_backtrace(void) 101static int __init register_trigger_all_cpu_backtrace(void)
@@ -91,18 +105,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
91} 105}
92early_initcall(register_trigger_all_cpu_backtrace); 106early_initcall(register_trigger_all_cpu_backtrace);
93#endif 107#endif
94
95/* STUB calls to mimic old nmi_watchdog behaviour */
96#if defined(CONFIG_X86_LOCAL_APIC)
97unsigned int nmi_watchdog = NMI_NONE;
98EXPORT_SYMBOL(nmi_watchdog);
99void acpi_nmi_enable(void) { return; }
100void acpi_nmi_disable(void) { return; }
101#endif
102atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
103EXPORT_SYMBOL(nmi_active);
104int unknown_nmi_panic;
105void cpu_nmi_set_wd_enabled(void) { return; }
106void stop_apic_nmi_watchdog(void *unused) { return; }
107void setup_apic_nmi_watchdog(void *unused) { return; }
108int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd743a74f..697dc34b7b87 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -126,6 +125,26 @@ static int __init parse_noapic(char *str)
126} 125}
127early_param("noapic", parse_noapic); 126early_param("noapic", parse_noapic);
128 127
128/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
129void mp_save_irq(struct mpc_intsrc *m)
130{
131 int i;
132
133 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
134 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
135 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
136 m->srcbusirq, m->dstapic, m->dstirq);
137
138 for (i = 0; i < mp_irq_entries; i++) {
139 if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
140 return;
141 }
142
143 memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
144 if (++mp_irq_entries == MAX_IRQ_SOURCES)
145 panic("Max # of irq sources exceeded!!\n");
146}
147
129struct irq_pin_list { 148struct irq_pin_list {
130 int apic, pin; 149 int apic, pin;
131 struct irq_pin_list *next; 150 struct irq_pin_list *next;
@@ -136,6 +155,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
136 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); 155 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
137} 156}
138 157
158
139/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 159/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
140#ifdef CONFIG_SPARSE_IRQ 160#ifdef CONFIG_SPARSE_IRQ
141static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; 161static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -1934,8 +1954,7 @@ void disable_IO_APIC(void)
1934 * 1954 *
1935 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1955 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1936 */ 1956 */
1937 1957void __init setup_ioapic_ids_from_mpc_nocheck(void)
1938void __init setup_ioapic_ids_from_mpc(void)
1939{ 1958{
1940 union IO_APIC_reg_00 reg_00; 1959 union IO_APIC_reg_00 reg_00;
1941 physid_mask_t phys_id_present_map; 1960 physid_mask_t phys_id_present_map;
@@ -1944,15 +1963,6 @@ void __init setup_ioapic_ids_from_mpc(void)
1944 unsigned char old_id; 1963 unsigned char old_id;
1945 unsigned long flags; 1964 unsigned long flags;
1946 1965
1947 if (acpi_ioapic)
1948 return;
1949 /*
1950 * Don't check I/O APIC IDs for xAPIC systems. They have
1951 * no meaning without the serial APIC bus.
1952 */
1953 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1954 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1955 return;
1956 /* 1966 /*
1957 * This is broken; anything with a real cpu count has to 1967 * This is broken; anything with a real cpu count has to
1958 * circumvent this idiocy regardless. 1968 * circumvent this idiocy regardless.
@@ -2006,7 +2016,6 @@ void __init setup_ioapic_ids_from_mpc(void)
2006 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2016 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2007 } 2017 }
2008 2018
2009
2010 /* 2019 /*
2011 * We need to adjust the IRQ routing table 2020 * We need to adjust the IRQ routing table
2012 * if the ID changed. 2021 * if the ID changed.
@@ -2018,9 +2027,12 @@ void __init setup_ioapic_ids_from_mpc(void)
2018 = mp_ioapics[apic_id].apicid; 2027 = mp_ioapics[apic_id].apicid;
2019 2028
2020 /* 2029 /*
2021 * Read the right value from the MPC table and 2030 * Update the ID register according to the right value
2022 * write it into the ID register. 2031 * from the MPC table if they are different.
2023 */ 2032 */
2033 if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
2034 continue;
2035
2024 apic_printk(APIC_VERBOSE, KERN_INFO 2036 apic_printk(APIC_VERBOSE, KERN_INFO
2025 "...changing IO-APIC physical APIC ID to %d ...", 2037 "...changing IO-APIC physical APIC ID to %d ...",
2026 mp_ioapics[apic_id].apicid); 2038 mp_ioapics[apic_id].apicid);
@@ -2042,6 +2054,21 @@ void __init setup_ioapic_ids_from_mpc(void)
2042 apic_printk(APIC_VERBOSE, " ok.\n"); 2054 apic_printk(APIC_VERBOSE, " ok.\n");
2043 } 2055 }
2044} 2056}
2057
2058void __init setup_ioapic_ids_from_mpc(void)
2059{
2060
2061 if (acpi_ioapic)
2062 return;
2063 /*
2064 * Don't check I/O APIC IDs for xAPIC systems. They have
2065 * no meaning without the serial APIC bus.
2066 */
2067 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2068 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2069 return;
2070 setup_ioapic_ids_from_mpc_nocheck();
2071}
2045#endif 2072#endif
2046 2073
2047int no_timer_check __initdata; 2074int no_timer_check __initdata;
@@ -2302,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2302 unsigned int irr; 2329 unsigned int irr;
2303 struct irq_desc *desc; 2330 struct irq_desc *desc;
2304 struct irq_cfg *cfg; 2331 struct irq_cfg *cfg;
2305 irq = __get_cpu_var(vector_irq)[vector]; 2332 irq = __this_cpu_read(vector_irq[vector]);
2306 2333
2307 if (irq == -1) 2334 if (irq == -1)
2308 continue; 2335 continue;
@@ -2336,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2336 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); 2363 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2337 goto unlock; 2364 goto unlock;
2338 } 2365 }
2339 __get_cpu_var(vector_irq)[vector] = -1; 2366 __this_cpu_write(vector_irq[vector], -1);
2340unlock: 2367unlock:
2341 raw_spin_unlock(&desc->lock); 2368 raw_spin_unlock(&desc->lock);
2342 } 2369 }
@@ -2642,24 +2669,6 @@ static void lapic_register_intr(int irq)
2642 "edge"); 2669 "edge");
2643} 2670}
2644 2671
2645static void __init setup_nmi(void)
2646{
2647 /*
2648 * Dirty trick to enable the NMI watchdog ...
2649 * We put the 8259A master into AEOI mode and
2650 * unmask on all local APICs LVT0 as NMI.
2651 *
2652 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2653 * is from Maciej W. Rozycki - so we do not have to EOI from
2654 * the NMI handler or the timer interrupt.
2655 */
2656 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2657
2658 enable_NMI_through_LVT0();
2659
2660 apic_printk(APIC_VERBOSE, " done.\n");
2661}
2662
2663/* 2672/*
2664 * This looks a bit hackish but it's about the only one way of sending 2673 * This looks a bit hackish but it's about the only one way of sending
2665 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2674 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2774,6 @@ static inline void __init check_timer(void)
2765 */ 2774 */
2766 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2775 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2767 legacy_pic->init(1); 2776 legacy_pic->init(1);
2768#ifdef CONFIG_X86_32
2769 {
2770 unsigned int ver;
2771
2772 ver = apic_read(APIC_LVR);
2773 ver = GET_APIC_VERSION(ver);
2774 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2775 }
2776#endif
2777 2777
2778 pin1 = find_isa_irq_pin(0, mp_INT); 2778 pin1 = find_isa_irq_pin(0, mp_INT);
2779 apic1 = find_isa_irq_apic(0, mp_INT); 2779 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2821,6 @@ static inline void __init check_timer(void)
2821 unmask_ioapic(cfg); 2821 unmask_ioapic(cfg);
2822 } 2822 }
2823 if (timer_irq_works()) { 2823 if (timer_irq_works()) {
2824 if (nmi_watchdog == NMI_IO_APIC) {
2825 setup_nmi();
2826 legacy_pic->unmask(0);
2827 }
2828 if (disable_timer_pin_1 > 0) 2824 if (disable_timer_pin_1 > 0)
2829 clear_IO_APIC_pin(0, pin1); 2825 clear_IO_APIC_pin(0, pin1);
2830 goto out; 2826 goto out;
@@ -2850,11 +2846,6 @@ static inline void __init check_timer(void)
2850 if (timer_irq_works()) { 2846 if (timer_irq_works()) {
2851 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2847 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2852 timer_through_8259 = 1; 2848 timer_through_8259 = 1;
2853 if (nmi_watchdog == NMI_IO_APIC) {
2854 legacy_pic->mask(0);
2855 setup_nmi();
2856 legacy_pic->unmask(0);
2857 }
2858 goto out; 2849 goto out;
2859 } 2850 }
2860 /* 2851 /*
@@ -2866,15 +2857,6 @@ static inline void __init check_timer(void)
2866 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2857 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2867 } 2858 }
2868 2859
2869 if (nmi_watchdog == NMI_IO_APIC) {
2870 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2871 "through the IO-APIC - disabling NMI Watchdog!\n");
2872 nmi_watchdog = NMI_NONE;
2873 }
2874#ifdef CONFIG_X86_32
2875 timer_ack = 0;
2876#endif
2877
2878 apic_printk(APIC_QUIET, KERN_INFO 2860 apic_printk(APIC_QUIET, KERN_INFO
2879 "...trying to set up timer as Virtual Wire IRQ...\n"); 2861 "...trying to set up timer as Virtual Wire IRQ...\n");
2880 2862
@@ -3639,7 +3621,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3639 return reg_01.bits.entries + 1; 3621 return reg_01.bits.entries + 1;
3640} 3622}
3641 3623
3642void __init probe_nr_irqs_gsi(void) 3624static void __init probe_nr_irqs_gsi(void)
3643{ 3625{
3644 int nr; 3626 int nr;
3645 3627
@@ -3956,7 +3938,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3956 return res; 3938 return res;
3957} 3939}
3958 3940
3959void __init ioapic_init_mappings(void) 3941void __init ioapic_and_gsi_init(void)
3960{ 3942{
3961 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3943 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3962 struct resource *ioapic_res; 3944 struct resource *ioapic_res;
@@ -3994,6 +3976,8 @@ fake_ioapic_page:
3994 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3976 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3995 ioapic_res++; 3977 ioapic_res++;
3996 } 3978 }
3979
3980 probe_nr_irqs_gsi();
3997} 3981}
3998 3982
3999void __init ioapic_insert_resources(void) 3983void __init ioapic_insert_resources(void)
@@ -4103,7 +4087,8 @@ void __init pre_init_apic_IRQ0(void)
4103 4087
4104 printk(KERN_INFO "Early APIC setup for system timer0\n"); 4088 printk(KERN_INFO "Early APIC setup for system timer0\n");
4105#ifndef CONFIG_SMP 4089#ifndef CONFIG_SMP
4106 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 4090 physid_set_mask_of_physid(boot_cpu_physical_apicid,
4091 &phys_cpu_present_map);
4107#endif 4092#endif
4108 /* Make sure the irq descriptor is set up */ 4093 /* Make sure the irq descriptor is set up */
4109 cfg = alloc_irq_and_cfg_at(0, 0); 4094 cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041ccb742..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c1c52c341f40..bd16b58b8850 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
48EXPORT_SYMBOL_GPL(uv_apicid_hibits); 48EXPORT_SYMBOL_GPL(uv_apicid_hibits);
49static DEFINE_SPINLOCK(uv_nmi_lock); 49static DEFINE_SPINLOCK(uv_nmi_lock);
50 50
51static unsigned long __init uv_early_read_mmr(unsigned long addr)
52{
53 unsigned long val, *mmr;
54
55 mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
56 val = *mmr;
57 early_iounmap(mmr, sizeof(*mmr));
58 return val;
59}
60
51static inline bool is_GRU_range(u64 start, u64 end) 61static inline bool is_GRU_range(u64 start, u64 end)
52{ 62{
53 return start >= gru_start_paddr && end <= gru_end_paddr; 63 return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
58 return is_ISA_range(start, end) || is_GRU_range(start, end); 68 return is_ISA_range(start, end) || is_GRU_range(start, end);
59} 69}
60 70
61static int early_get_nodeid(void) 71static int __init early_get_pnodeid(void)
62{ 72{
63 union uvh_node_id_u node_id; 73 union uvh_node_id_u node_id;
64 unsigned long *mmr; 74 union uvh_rh_gam_config_mmr_u m_n_config;
65 75 int pnode;
66 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
67 node_id.v = *mmr;
68 early_iounmap(mmr, sizeof(*mmr));
69 76
70 /* Currently, all blades have same revision number */ 77 /* Currently, all blades have same revision number */
78 node_id.v = uv_early_read_mmr(UVH_NODE_ID);
79 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
71 uv_min_hub_revision_id = node_id.s.revision; 80 uv_min_hub_revision_id = node_id.s.revision;
72 81
73 return node_id.s.node_id; 82 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
83 return pnode;
74} 84}
75 85
76static void __init early_get_apic_pnode_shift(void) 86static void __init early_get_apic_pnode_shift(void)
77{ 87{
78 unsigned long *mmr; 88 uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
79
80 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
81 uvh_apicid.v = *mmr;
82 early_iounmap(mmr, sizeof(*mmr));
83 if (!uvh_apicid.v) 89 if (!uvh_apicid.v)
84 /* 90 /*
85 * Old bios, use default value 91 * Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
95static void __init uv_set_apicid_hibit(void) 101static void __init uv_set_apicid_hibit(void)
96{ 102{
97 union uvh_lb_target_physical_apic_id_mask_u apicid_mask; 103 union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
98 unsigned long *mmr;
99 104
100 mmr = early_ioremap(UV_LOCAL_MMR_BASE | 105 apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
101 UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
102 apicid_mask.v = *mmr;
103 early_iounmap(mmr, sizeof(*mmr));
104 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; 106 uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
105} 107}
106 108
107static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 109static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
108{ 110{
109 int nodeid; 111 int pnodeid;
110 112
111 if (!strcmp(oem_id, "SGI")) { 113 if (!strcmp(oem_id, "SGI")) {
112 nodeid = early_get_nodeid(); 114 pnodeid = early_get_pnodeid();
113 early_get_apic_pnode_shift(); 115 early_get_apic_pnode_shift();
114 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 116 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
115 x86_platform.nmi_init = uv_nmi_init; 117 x86_platform.nmi_init = uv_nmi_init;
@@ -118,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
118 else if (!strcmp(oem_table_id, "UVX")) 120 else if (!strcmp(oem_table_id, "UVX"))
119 uv_system_type = UV_X2APIC; 121 uv_system_type = UV_X2APIC;
120 else if (!strcmp(oem_table_id, "UVH")) { 122 else if (!strcmp(oem_table_id, "UVH")) {
121 __get_cpu_var(x2apic_extra_bits) = 123 __this_cpu_write(x2apic_extra_bits,
122 nodeid << (uvh_apicid.s.pnode_shift - 1); 124 pnodeid << uvh_apicid.s.pnode_shift);
123 uv_system_type = UV_NON_UNIQUE_APIC; 125 uv_system_type = UV_NON_UNIQUE_APIC;
124 uv_set_apicid_hibit(); 126 uv_set_apicid_hibit();
125 return 1; 127 return 1;
@@ -284,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
284 unsigned int id; 286 unsigned int id;
285 287
286 WARN_ON(preemptible() && num_online_cpus() > 1); 288 WARN_ON(preemptible() && num_online_cpus() > 1);
287 id = x | __get_cpu_var(x2apic_extra_bits); 289 id = x | __this_cpu_read(x2apic_extra_bits);
288 290
289 return id; 291 return id;
290} 292}
@@ -376,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
376 378
377static __cpuinit void set_x2apic_extra_bits(int pnode) 379static __cpuinit void set_x2apic_extra_bits(int pnode)
378{ 380{
379 __get_cpu_var(x2apic_extra_bits) = (pnode << 6); 381 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
380} 382}
381 383
382/* 384/*
@@ -639,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
639 */ 641 */
640int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 642int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
641{ 643{
642 if (reason != DIE_NMI_IPI) 644 if (reason != DIE_NMIUNKNOWN)
643 return NOTIFY_OK; 645 return NOTIFY_OK;
644 646
645 if (in_crash_kexec) 647 if (in_crash_kexec)
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
682void __init uv_system_init(void) 684void __init uv_system_init(void)
683{ 685{
684 union uvh_rh_gam_config_mmr_u m_n_config; 686 union uvh_rh_gam_config_mmr_u m_n_config;
687 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
685 union uvh_node_id_u node_id; 688 union uvh_node_id_u node_id;
686 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 689 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
687 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 690 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
688 int gnode_extra, max_pnode = 0; 691 int gnode_extra, max_pnode = 0;
689 unsigned long mmr_base, present, paddr; 692 unsigned long mmr_base, present, paddr;
690 unsigned short pnode_mask; 693 unsigned short pnode_mask, pnode_io_mask;
691 694
692 map_low_mmrs(); 695 map_low_mmrs();
693 696
694 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); 697 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
695 m_val = m_n_config.s.m_skt; 698 m_val = m_n_config.s.m_skt;
696 n_val = m_n_config.s.n_skt; 699 n_val = m_n_config.s.n_skt;
700 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
701 n_io = mmioh.s.n_io;
697 mmr_base = 702 mmr_base =
698 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 703 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
699 ~UV_MMR_ENABLE; 704 ~UV_MMR_ENABLE;
700 pnode_mask = (1 << n_val) - 1; 705 pnode_mask = (1 << n_val) - 1;
706 pnode_io_mask = (1 << n_io) - 1;
707
701 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 708 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
702 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 709 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
703 gnode_upper = ((unsigned long)gnode_extra << m_val); 710 gnode_upper = ((unsigned long)gnode_extra << m_val);
704 printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", 711 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
705 n_val, m_val, gnode_upper, gnode_extra); 712 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
706 713
707 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 714 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
708 715
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
735 for (j = 0; j < 64; j++) { 742 for (j = 0; j < 64; j++) {
736 if (!test_bit(j, &present)) 743 if (!test_bit(j, &present))
737 continue; 744 continue;
738 pnode = (i * 64 + j); 745 pnode = (i * 64 + j) & pnode_mask;
739 uv_blade_info[blade].pnode = pnode; 746 uv_blade_info[blade].pnode = pnode;
740 uv_blade_info[blade].nr_possible_cpus = 0; 747 uv_blade_info[blade].nr_possible_cpus = 0;
741 uv_blade_info[blade].nr_online_cpus = 0; 748 uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
756 /* 763 /*
757 * apic_pnode_shift must be set before calling uv_apicid_to_pnode(); 764 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
758 */ 765 */
766 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
759 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; 767 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
760 pnode = uv_apicid_to_pnode(apicid); 768 pnode = uv_apicid_to_pnode(apicid);
761 blade = boot_pnode_to_blade(pnode); 769 blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
772 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 780 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
773 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 781 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
774 uv_cpu_hub_info(cpu)->pnode = pnode; 782 uv_cpu_hub_info(cpu)->pnode = pnode;
775 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
776 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; 783 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
777 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 784 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
778 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 785 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
796 803
797 map_gru_high(max_pnode); 804 map_gru_high(max_pnode);
798 map_mmr_high(max_pnode); 805 map_mmr_high(max_pnode);
799 map_mmioh_high(max_pnode); 806 map_mmioh_high(max_pnode & pnode_io_mask);
800 807
801 uv_cpu_init(); 808 uv_cpu_init();
802 uv_scir_register_cpu_notifier(); 809 uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8fe78c..7c7bedb83c5a 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383);
668 668
669bool cpu_has_amd_erratum(const int *erratum) 669bool cpu_has_amd_erratum(const int *erratum)
670{ 670{
671 struct cpuinfo_x86 *cpu = &current_cpu_data; 671 struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
672 int osvw_id = *erratum++; 672 int osvw_id = *erratum++;
673 u32 range; 673 u32 range;
674 u32 ms; 674 u32 ms;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda30938..1d59834396bd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
894#else 894#else
895 vgetcpu_set_mode(); 895 vgetcpu_set_mode();
896#endif 896#endif
897 init_hw_perf_events();
898} 897}
899 898
900void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 899void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 491977baf6c0..35c7e65e59be 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc)
521 521
522 *rc = -ENODEV; 522 *rc = -ENODEV;
523 523
524 if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) 524 if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
525 return; 525 return;
526 526
527 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); 527 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1377static void query_values_on_cpu(void *_err) 1377static void query_values_on_cpu(void *_err)
1378{ 1378{
1379 int *err = _err; 1379 int *err = _err;
1380 struct powernow_k8_data *data = __get_cpu_var(powernow_data); 1380 struct powernow_k8_data *data = __this_cpu_read(powernow_data);
1381 1381
1382 *err = query_current_values_with_pending_wait(data); 1382 *err = query_current_values_with_pending_wait(data);
1383} 1383}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 17ad03366211..7283e98deaae 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
149}; 149};
150 150
151struct amd_l3_cache { 151struct amd_l3_cache {
152 struct pci_dev *dev; 152 struct amd_northbridge *nb;
153 bool can_disable;
154 unsigned indices; 153 unsigned indices;
155 u8 subcaches[4]; 154 u8 subcaches[4];
156}; 155};
@@ -266,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
266 line_size = l2.line_size; 265 line_size = l2.line_size;
267 lines_per_tag = l2.lines_per_tag; 266 lines_per_tag = l2.lines_per_tag;
268 /* cpu_data has errata corrections for K7 applied */ 267 /* cpu_data has errata corrections for K7 applied */
269 size_in_kb = current_cpu_data.x86_cache_size; 268 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
270 break; 269 break;
271 case 3: 270 case 3:
272 if (!l3.val) 271 if (!l3.val)
@@ -288,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
288 eax->split.type = types[leaf]; 287 eax->split.type = types[leaf];
289 eax->split.level = levels[leaf]; 288 eax->split.level = levels[leaf];
290 eax->split.num_threads_sharing = 0; 289 eax->split.num_threads_sharing = 0;
291 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 290 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
292 291
293 292
294 if (assoc == 0xffff) 293 if (assoc == 0xffff)
@@ -311,14 +310,12 @@ struct _cache_attr {
311/* 310/*
312 * L3 cache descriptors 311 * L3 cache descriptors
313 */ 312 */
314static struct amd_l3_cache **__cpuinitdata l3_caches;
315
316static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) 313static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
317{ 314{
318 unsigned int sc0, sc1, sc2, sc3; 315 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0; 316 u32 val = 0;
320 317
321 pci_read_config_dword(l3->dev, 0x1C4, &val); 318 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
322 319
323 /* calculate subcache sizes */ 320 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0)); 321 l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
330 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 327 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
331} 328}
332 329
333static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) 330static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
334{ 331 int index)
335 struct amd_l3_cache *l3;
336 struct pci_dev *dev = node_to_k8_nb_misc(node);
337
338 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
339 if (!l3) {
340 printk(KERN_WARNING "Error allocating L3 struct\n");
341 return NULL;
342 }
343
344 l3->dev = dev;
345
346 amd_calc_l3_indices(l3);
347
348 return l3;
349}
350
351static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
352 int index)
353{ 332{
333 static struct amd_l3_cache *__cpuinitdata l3_caches;
354 int node; 334 int node;
355 335
356 if (boot_cpu_data.x86 != 0x10) 336 /* only for L3, and not in virtualized environments */
357 return; 337 if (index < 3 || amd_nb_num() == 0)
358
359 if (index < 3)
360 return;
361
362 /* see errata #382 and #388 */
363 if (boot_cpu_data.x86_model < 0x8)
364 return;
365
366 if ((boot_cpu_data.x86_model == 0x8 ||
367 boot_cpu_data.x86_model == 0x9)
368 &&
369 boot_cpu_data.x86_mask < 0x1)
370 return;
371
372 /* not in virtualized environments */
373 if (k8_northbridges.num == 0)
374 return; 338 return;
375 339
376 /* 340 /*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
378 * never freed but this is done only on shutdown so it doesn't matter. 342 * never freed but this is done only on shutdown so it doesn't matter.
379 */ 343 */
380 if (!l3_caches) { 344 if (!l3_caches) {
381 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); 345 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
382 346
383 l3_caches = kzalloc(size, GFP_ATOMIC); 347 l3_caches = kzalloc(size, GFP_ATOMIC);
384 if (!l3_caches) 348 if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
387 351
388 node = amd_get_nb_id(smp_processor_id()); 352 node = amd_get_nb_id(smp_processor_id());
389 353
390 if (!l3_caches[node]) { 354 if (!l3_caches[node].nb) {
391 l3_caches[node] = amd_init_l3_cache(node); 355 l3_caches[node].nb = node_to_amd_nb(node);
392 l3_caches[node]->can_disable = true; 356 amd_calc_l3_indices(&l3_caches[node]);
393 } 357 }
394 358
395 WARN_ON(!l3_caches[node]); 359 this_leaf->l3 = &l3_caches[node];
396
397 this_leaf->l3 = l3_caches[node];
398} 360}
399 361
400/* 362/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
408{ 370{
409 unsigned int reg = 0; 371 unsigned int reg = 0;
410 372
411 pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg); 373 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
412 374
413 /* check whether this slot is activated already */ 375 /* check whether this slot is activated already */
414 if (reg & (3UL << 30)) 376 if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
422{ 384{
423 int index; 385 int index;
424 386
425 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 387 if (!this_leaf->l3 ||
388 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
426 return -EINVAL; 389 return -EINVAL;
427 390
428 index = amd_get_l3_disable_slot(this_leaf->l3, slot); 391 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
457 if (!l3->subcaches[i]) 420 if (!l3->subcaches[i])
458 continue; 421 continue;
459 422
460 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 423 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
461 424
462 /* 425 /*
463 * We need to WBINVD on a core on the node containing the L3 426 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
467 wbinvd_on_cpu(cpu); 430 wbinvd_on_cpu(cpu);
468 431
469 reg |= BIT(31); 432 reg |= BIT(31);
470 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); 433 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
471 } 434 }
472} 435}
473 436
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
524 if (!capable(CAP_SYS_ADMIN)) 487 if (!capable(CAP_SYS_ADMIN))
525 return -EPERM; 488 return -EPERM;
526 489
527 if (!this_leaf->l3 || !this_leaf->l3->can_disable) 490 if (!this_leaf->l3 ||
491 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
528 return -EINVAL; 492 return -EINVAL;
529 493
530 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 494 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
545#define STORE_CACHE_DISABLE(slot) \ 509#define STORE_CACHE_DISABLE(slot) \
546static ssize_t \ 510static ssize_t \
547store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ 511store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
548 const char *buf, size_t count) \ 512 const char *buf, size_t count) \
549{ \ 513{ \
550 return store_cache_disable(this_leaf, buf, count, slot); \ 514 return store_cache_disable(this_leaf, buf, count, slot); \
551} 515}
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
558 show_cache_disable_1, store_cache_disable_1); 522 show_cache_disable_1, store_cache_disable_1);
559 523
560#else /* CONFIG_AMD_NB */ 524#else /* CONFIG_AMD_NB */
561static void __cpuinit 525#define amd_init_l3_cache(x, y)
562amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
563{
564};
565#endif /* CONFIG_AMD_NB */ 526#endif /* CONFIG_AMD_NB */
566 527
567static int 528static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
575 536
576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 537 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
577 amd_cpuid4(index, &eax, &ebx, &ecx); 538 amd_cpuid4(index, &eax, &ebx, &ecx);
578 amd_check_l3_disable(this_leaf, index); 539 amd_init_l3_cache(this_leaf, index);
579 } else { 540 } else {
580 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 541 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
581 } 542 }
@@ -983,30 +944,48 @@ define_one_ro(size);
983define_one_ro(shared_cpu_map); 944define_one_ro(shared_cpu_map);
984define_one_ro(shared_cpu_list); 945define_one_ro(shared_cpu_list);
985 946
986#define DEFAULT_SYSFS_CACHE_ATTRS \
987 &type.attr, \
988 &level.attr, \
989 &coherency_line_size.attr, \
990 &physical_line_partition.attr, \
991 &ways_of_associativity.attr, \
992 &number_of_sets.attr, \
993 &size.attr, \
994 &shared_cpu_map.attr, \
995 &shared_cpu_list.attr
996
997static struct attribute *default_attrs[] = { 947static struct attribute *default_attrs[] = {
998 DEFAULT_SYSFS_CACHE_ATTRS, 948 &type.attr,
949 &level.attr,
950 &coherency_line_size.attr,
951 &physical_line_partition.attr,
952 &ways_of_associativity.attr,
953 &number_of_sets.attr,
954 &size.attr,
955 &shared_cpu_map.attr,
956 &shared_cpu_list.attr,
999 NULL 957 NULL
1000}; 958};
1001 959
1002static struct attribute *default_l3_attrs[] = {
1003 DEFAULT_SYSFS_CACHE_ATTRS,
1004#ifdef CONFIG_AMD_NB 960#ifdef CONFIG_AMD_NB
1005 &cache_disable_0.attr, 961static struct attribute ** __cpuinit amd_l3_attrs(void)
1006 &cache_disable_1.attr, 962{
963 static struct attribute **attrs;
964 int n;
965
966 if (attrs)
967 return attrs;
968
969 n = sizeof (default_attrs) / sizeof (struct attribute *);
970
971 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
972 n += 2;
973
974 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
975 if (attrs == NULL)
976 return attrs = default_attrs;
977
978 for (n = 0; default_attrs[n]; n++)
979 attrs[n] = default_attrs[n];
980
981 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
982 attrs[n++] = &cache_disable_0.attr;
983 attrs[n++] = &cache_disable_1.attr;
984 }
985
986 return attrs;
987}
1007#endif 988#endif
1008 NULL
1009};
1010 989
1011static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 990static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1012{ 991{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1117 1096
1118 this_leaf = CPUID4_INFO_IDX(cpu, i); 1097 this_leaf = CPUID4_INFO_IDX(cpu, i);
1119 1098
1120 if (this_leaf->l3 && this_leaf->l3->can_disable) 1099 ktype_cache.default_attrs = default_attrs;
1121 ktype_cache.default_attrs = default_l3_attrs; 1100#ifdef CONFIG_AMD_NB
1122 else 1101 if (this_leaf->l3)
1123 ktype_cache.default_attrs = default_attrs; 1102 ktype_cache.default_attrs = amd_l3_attrs();
1124 1103#endif
1125 retval = kobject_init_and_add(&(this_object->kobj), 1104 retval = kobject_init_and_add(&(this_object->kobj),
1126 &ktype_cache, 1105 &ktype_cache,
1127 per_cpu(ici_cache_kobject, cpu), 1106 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfedb..a77971979564 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <asm/mce.h> 26#include <asm/mce.h>
27#include <asm/apic.h> 27#include <asm/apic.h>
28#include <asm/nmi.h>
28 29
29/* Update fake mce registers on current CPU. */ 30/* Update fake mce registers on current CPU. */
30static void inject_mce(struct mce *m) 31static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
83 struct die_args *args = (struct die_args *)data; 84 struct die_args *args = (struct die_args *)data;
84 int cpu = smp_processor_id(); 85 int cpu = smp_processor_id();
85 struct mce *m = &__get_cpu_var(injectm); 86 struct mce *m = &__get_cpu_var(injectm);
86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
87 return NOTIFY_DONE; 88 return NOTIFY_DONE;
88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 89 cpumask_clear_cpu(cpu, mce_inject_cpumask);
89 if (m->inject_flags & MCJ_EXCEPTION) 90 if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
95 96
96static struct notifier_block mce_raise_nb = { 97static struct notifier_block mce_raise_nb = {
97 .notifier_call = mce_raise_notify, 98 .notifier_call = mce_raise_notify,
98 .priority = 1000, 99 .priority = NMI_LOCAL_NORMAL_PRIOR,
99}; 100};
100 101
101/* Inject mce on current CPU */ 102/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a35b72d7c03..d916183b7f9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
326 326
327static int msr_to_offset(u32 msr) 327static int msr_to_offset(u32 msr)
328{ 328{
329 unsigned bank = __get_cpu_var(injectm.bank); 329 unsigned bank = __this_cpu_read(injectm.bank);
330 330
331 if (msr == rip_msr) 331 if (msr == rip_msr)
332 return offsetof(struct mce, ip); 332 return offsetof(struct mce, ip);
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr)
346{ 346{
347 u64 v; 347 u64 v;
348 348
349 if (__get_cpu_var(injectm).finished) { 349 if (__this_cpu_read(injectm.finished)) {
350 int offset = msr_to_offset(msr); 350 int offset = msr_to_offset(msr);
351 351
352 if (offset < 0) 352 if (offset < 0)
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr)
369 369
370static void mce_wrmsrl(u32 msr, u64 v) 370static void mce_wrmsrl(u32 msr, u64 v)
371{ 371{
372 if (__get_cpu_var(injectm).finished) { 372 if (__this_cpu_read(injectm.finished)) {
373 int offset = msr_to_offset(msr); 373 int offset = msr_to_offset(msr);
374 374
375 if (offset >= 0) 375 if (offset >= 0)
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data)
1159 1159
1160 WARN_ON(smp_processor_id() != data); 1160 WARN_ON(smp_processor_id() != data);
1161 1161
1162 if (mce_available(&current_cpu_data)) { 1162 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1163 machine_check_poll(MCP_TIMESTAMP, 1163 machine_check_poll(MCP_TIMESTAMP,
1164 &__get_cpu_var(mce_poll_banks)); 1164 &__get_cpu_var(mce_poll_banks));
1165 } 1165 }
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev)
1767static int mce_resume(struct sys_device *dev) 1767static int mce_resume(struct sys_device *dev)
1768{ 1768{
1769 __mcheck_cpu_init_generic(); 1769 __mcheck_cpu_init_generic();
1770 __mcheck_cpu_init_vendor(&current_cpu_data); 1770 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1771 1771
1772 return 0; 1772 return 0;
1773} 1773}
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev)
1775static void mce_cpu_restart(void *data) 1775static void mce_cpu_restart(void *data)
1776{ 1776{
1777 del_timer_sync(&__get_cpu_var(mce_timer)); 1777 del_timer_sync(&__get_cpu_var(mce_timer));
1778 if (!mce_available(&current_cpu_data)) 1778 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1779 return; 1779 return;
1780 __mcheck_cpu_init_generic(); 1780 __mcheck_cpu_init_generic();
1781 __mcheck_cpu_init_timer(); 1781 __mcheck_cpu_init_timer();
@@ -1790,7 +1790,7 @@ static void mce_restart(void)
1790/* Toggle features for corrected errors */ 1790/* Toggle features for corrected errors */
1791static void mce_disable_ce(void *all) 1791static void mce_disable_ce(void *all)
1792{ 1792{
1793 if (!mce_available(&current_cpu_data)) 1793 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1794 return; 1794 return;
1795 if (all) 1795 if (all)
1796 del_timer_sync(&__get_cpu_var(mce_timer)); 1796 del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all)
1799 1799
1800static void mce_enable_ce(void *all) 1800static void mce_enable_ce(void *all)
1801{ 1801{
1802 if (!mce_available(&current_cpu_data)) 1802 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1803 return; 1803 return;
1804 cmci_reenable(); 1804 cmci_reenable();
1805 cmci_recheck(); 1805 cmci_recheck();
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h)
2022 unsigned long action = *(unsigned long *)h; 2022 unsigned long action = *(unsigned long *)h;
2023 int i; 2023 int i;
2024 2024
2025 if (!mce_available(&current_cpu_data)) 2025 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2026 return; 2026 return;
2027 2027
2028 if (!(action & CPU_TASKS_FROZEN)) 2028 if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2040 unsigned long action = *(unsigned long *)h; 2040 unsigned long action = *(unsigned long *)h;
2041 int i; 2041 int i;
2042 2042
2043 if (!mce_available(&current_cpu_data)) 2043 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2044 return; 2044 return;
2045 2045
2046 if (!(action & CPU_TASKS_FROZEN)) 2046 if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c482382d5c..5bf2fac52aca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
31#include <asm/mce.h> 31#include <asm/mce.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33 33
34#define PFX "mce_threshold: "
35#define VERSION "version 1.1.1"
36#define NR_BANKS 6 34#define NR_BANKS 6
37#define NR_BLOCKS 9 35#define NR_BLOCKS 9
38#define THRESHOLD_MAX 0xFFF 36#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
59 struct list_head miscj; 57 struct list_head miscj;
60}; 58};
61 59
62/* defaults used early on boot */
63static struct threshold_block threshold_defaults = {
64 .interrupt_enable = 0,
65 .threshold_limit = THRESHOLD_MAX,
66};
67
68struct threshold_bank { 60struct threshold_bank {
69 struct kobject *kobj; 61 struct kobject *kobj;
70 struct threshold_block *blocks; 62 struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
89struct thresh_restart { 81struct thresh_restart {
90 struct threshold_block *b; 82 struct threshold_block *b;
91 int reset; 83 int reset;
84 int set_lvt_off;
85 int lvt_off;
92 u16 old_limit; 86 u16 old_limit;
93}; 87};
94 88
89static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
90{
91 int msr = (hi & MASK_LVTOFF_HI) >> 20;
92
93 if (apic < 0) {
94 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
95 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
96 b->bank, b->block, b->address, hi, lo);
97 return 0;
98 }
99
100 if (apic != msr) {
101 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
102 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
103 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
104 return 0;
105 }
106
107 return 1;
108};
109
95/* must be called with correct cpu affinity */ 110/* must be called with correct cpu affinity */
96/* Called via smp_call_function_single() */ 111/* Called via smp_call_function_single() */
97static void threshold_restart_bank(void *_tr) 112static void threshold_restart_bank(void *_tr)
98{ 113{
99 struct thresh_restart *tr = _tr; 114 struct thresh_restart *tr = _tr;
100 u32 mci_misc_hi, mci_misc_lo; 115 u32 hi, lo;
101 116
102 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 117 rdmsr(tr->b->address, lo, hi);
103 118
104 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 119 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
105 tr->reset = 1; /* limit cannot be lower than err count */ 120 tr->reset = 1; /* limit cannot be lower than err count */
106 121
107 if (tr->reset) { /* reset err count and overflow bit */ 122 if (tr->reset) { /* reset err count and overflow bit */
108 mci_misc_hi = 123 hi =
109 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 124 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
110 (THRESHOLD_MAX - tr->b->threshold_limit); 125 (THRESHOLD_MAX - tr->b->threshold_limit);
111 } else if (tr->old_limit) { /* change limit w/o reset */ 126 } else if (tr->old_limit) { /* change limit w/o reset */
112 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 127 int new_count = (hi & THRESHOLD_MAX) +
113 (tr->old_limit - tr->b->threshold_limit); 128 (tr->old_limit - tr->b->threshold_limit);
114 129
115 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 130 hi = (hi & ~MASK_ERR_COUNT_HI) |
116 (new_count & THRESHOLD_MAX); 131 (new_count & THRESHOLD_MAX);
117 } 132 }
118 133
134 if (tr->set_lvt_off) {
135 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
136 /* set new lvt offset */
137 hi &= ~MASK_LVTOFF_HI;
138 hi |= tr->lvt_off << 20;
139 }
140 }
141
119 tr->b->interrupt_enable ? 142 tr->b->interrupt_enable ?
120 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 143 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
121 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 144 (hi &= ~MASK_INT_TYPE_HI);
122 145
123 mci_misc_hi |= MASK_COUNT_EN_HI; 146 hi |= MASK_COUNT_EN_HI;
124 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 147 wrmsr(tr->b->address, lo, hi);
148}
149
150static void mce_threshold_block_init(struct threshold_block *b, int offset)
151{
152 struct thresh_restart tr = {
153 .b = b,
154 .set_lvt_off = 1,
155 .lvt_off = offset,
156 };
157
158 b->threshold_limit = THRESHOLD_MAX;
159 threshold_restart_bank(&tr);
160};
161
162static int setup_APIC_mce(int reserved, int new)
163{
164 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
165 APIC_EILVT_MSG_FIX, 0))
166 return new;
167
168 return reserved;
125} 169}
126 170
127/* cpu init entry point, called from mce.c with preempt off */ 171/* cpu init entry point, called from mce.c with preempt off */
128void mce_amd_feature_init(struct cpuinfo_x86 *c) 172void mce_amd_feature_init(struct cpuinfo_x86 *c)
129{ 173{
174 struct threshold_block b;
130 unsigned int cpu = smp_processor_id(); 175 unsigned int cpu = smp_processor_id();
131 u32 low = 0, high = 0, address = 0; 176 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 177 unsigned int bank, block;
133 struct thresh_restart tr; 178 int offset = -1;
134 int lvt_off = -1;
135 u8 offset;
136 179
137 for (bank = 0; bank < NR_BANKS; ++bank) { 180 for (bank = 0; bank < NR_BANKS; ++bank) {
138 for (block = 0; block < NR_BLOCKS; ++block) { 181 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
163 if (shared_bank[bank] && c->cpu_core_id) 206 if (shared_bank[bank] && c->cpu_core_id)
164 break; 207 break;
165#endif 208#endif
166 offset = (high & MASK_LVTOFF_HI) >> 20; 209 offset = setup_APIC_mce(offset,
167 if (lvt_off < 0) { 210 (high & MASK_LVTOFF_HI) >> 20);
168 if (setup_APIC_eilvt(offset,
169 THRESHOLD_APIC_VECTOR,
170 APIC_EILVT_MSG_FIX, 0)) {
171 pr_err(FW_BUG "cpu %d, failed to "
172 "setup threshold interrupt "
173 "for bank %d, block %d "
174 "(MSR%08X=0x%x%08x)",
175 smp_processor_id(), bank, block,
176 address, high, low);
177 continue;
178 }
179 lvt_off = offset;
180 } else if (lvt_off != offset) {
181 pr_err(FW_BUG "cpu %d, invalid threshold "
182 "interrupt offset %d for bank %d,"
183 "block %d (MSR%08X=0x%x%08x)",
184 smp_processor_id(), lvt_off, bank,
185 block, address, high, low);
186 continue;
187 }
188
189 high &= ~MASK_LVTOFF_HI;
190 high |= lvt_off << 20;
191 wrmsr(address, low, high);
192 211
193 threshold_defaults.address = address; 212 memset(&b, 0, sizeof(b));
194 tr.b = &threshold_defaults; 213 b.cpu = cpu;
195 tr.reset = 0; 214 b.bank = bank;
196 tr.old_limit = 0; 215 b.block = block;
197 threshold_restart_bank(&tr); 216 b.address = address;
198 217
218 mce_threshold_block_init(&b, offset);
199 mce_threshold_vector = amd_threshold_interrupt; 219 mce_threshold_vector = amd_threshold_interrupt;
200 } 220 }
201 } 221 }
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
298 318
299 b->interrupt_enable = !!new; 319 b->interrupt_enable = !!new;
300 320
321 memset(&tr, 0, sizeof(tr));
301 tr.b = b; 322 tr.b = b;
302 tr.reset = 0;
303 tr.old_limit = 0;
304 323
305 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 324 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
306 325
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
321 if (new < 1) 340 if (new < 1)
322 new = 1; 341 new = 1;
323 342
343 memset(&tr, 0, sizeof(tr));
324 tr.old_limit = b->threshold_limit; 344 tr.old_limit = b->threshold_limit;
325 b->threshold_limit = new; 345 b->threshold_limit = new;
326 tr.b = b; 346 tr.b = b;
327 tr.reset = 0;
328 347
329 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 348 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
330 349
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
603 continue; 622 continue;
604 err = threshold_create_bank(cpu, bank); 623 err = threshold_create_bank(cpu, bank);
605 if (err) 624 if (err)
606 goto out; 625 return err;
607 } 626 }
608out: 627
609 return err; 628 return err;
610} 629}
611 630
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd0936194f..8694ef56459d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
130 unsigned long flags; 130 unsigned long flags;
131 int banks; 131 int banks;
132 132
133 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks)) 133 if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
134 return; 134 return;
135 local_irq_save(flags); 135 local_irq_save(flags);
136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b683267eca5..e12246ff5aa6 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
53 struct _thermal_state core_power_limit; 53 struct _thermal_state core_power_limit;
54 struct _thermal_state package_throttle; 54 struct _thermal_state package_throttle;
55 struct _thermal_state package_power_limit; 55 struct _thermal_state package_power_limit;
56 struct _thermal_state core_thresh0;
57 struct _thermal_state core_thresh1;
56}; 58};
57 59
60/* Callback to handle core threshold interrupts */
61int (*platform_thermal_notify)(__u64 msr_val);
62
58static DEFINE_PER_CPU(struct thermal_state, thermal_state); 63static DEFINE_PER_CPU(struct thermal_state, thermal_state);
59 64
60static atomic_t therm_throt_en = ATOMIC_INIT(0); 65static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
200 return 0; 205 return 0;
201} 206}
202 207
208static int thresh_event_valid(int event)
209{
210 struct _thermal_state *state;
211 unsigned int this_cpu = smp_processor_id();
212 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
213 u64 now = get_jiffies_64();
214
215 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
216
217 if (time_before64(now, state->next_check))
218 return 0;
219
220 state->next_check = now + CHECK_INTERVAL;
221 return 1;
222}
223
203#ifdef CONFIG_SYSFS 224#ifdef CONFIG_SYSFS
204/* Add/Remove thermal_throttle interface for CPU device: */ 225/* Add/Remove thermal_throttle interface for CPU device: */
205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 226static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
313#define PACKAGE_THROTTLED ((__u64)2 << 62) 334#define PACKAGE_THROTTLED ((__u64)2 << 62)
314#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) 335#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
315 336
337static void notify_thresholds(__u64 msr_val)
338{
339 /* check whether the interrupt handler is defined;
340 * otherwise simply return
341 */
342 if (!platform_thermal_notify)
343 return;
344
345 /* lower threshold reached */
346 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
347 platform_thermal_notify(msr_val);
348 /* higher threshold reached */
349 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
350 platform_thermal_notify(msr_val);
351}
352
316/* Thermal transition interrupt handler */ 353/* Thermal transition interrupt handler */
317static void intel_thermal_interrupt(void) 354static void intel_thermal_interrupt(void)
318{ 355{
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
321 358
322 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 359 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
323 360
361 /* Check for violation of core thermal thresholds*/
362 notify_thresholds(msr_val);
363
324 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 364 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
325 THERMAL_THROTTLING_EVENT, 365 THERMAL_THROTTLING_EVENT,
326 CORE_LEVEL) != 0) 366 CORE_LEVEL) != 0)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b9145b13..9d977a2ea693 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
330{ 330{
331 int i; 331 int i;
332 332
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
338 goto perfctr_fail; 335 goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
355 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 353 release_perfctr_nmi(x86_pmu.perfctr + i);
357 354
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360
361 return false; 355 return false;
362} 356}
363 357
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
369 release_perfctr_nmi(x86_pmu.perfctr + i); 363 release_perfctr_nmi(x86_pmu.perfctr + i);
370 release_evntsel_nmi(x86_pmu.eventsel + i); 364 release_evntsel_nmi(x86_pmu.eventsel + i);
371 } 365 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 366}
376 367
377#else 368#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
384static bool check_hw_exists(void) 375static bool check_hw_exists(void)
385{ 376{
386 u64 val, val_new = 0; 377 u64 val, val_new = 0;
387 int ret = 0; 378 int i, reg, ret = 0;
379
380 /*
381 * Check to see if the BIOS enabled any of the counters, if so
382 * complain and bail.
383 */
384 for (i = 0; i < x86_pmu.num_counters; i++) {
385 reg = x86_pmu.eventsel + i;
386 ret = rdmsrl_safe(reg, &val);
387 if (ret)
388 goto msr_fail;
389 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
390 goto bios_fail;
391 }
388 392
393 if (x86_pmu.num_counters_fixed) {
394 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
395 ret = rdmsrl_safe(reg, &val);
396 if (ret)
397 goto msr_fail;
398 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
399 if (val & (0x03 << i*4))
400 goto bios_fail;
401 }
402 }
403
404 /*
405 * Now write a value and read it back to see if it matches,
406 * this is needed to detect certain hardware emulators (qemu/kvm)
407 * that don't trap on the MSR access and always return 0s.
408 */
389 val = 0xabcdUL; 409 val = 0xabcdUL;
390 ret |= checking_wrmsrl(x86_pmu.perfctr, val); 410 ret = checking_wrmsrl(x86_pmu.perfctr, val);
391 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); 411 ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
392 if (ret || val != val_new) 412 if (ret || val != val_new)
393 return false; 413 goto msr_fail;
394 414
395 return true; 415 return true;
416
417bios_fail:
418 printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
419 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
420 return false;
421
422msr_fail:
423 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
424 return false;
396} 425}
397 426
398static void reserve_ds_buffers(void); 427static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
451 struct hw_perf_event *hwc = &event->hw; 480 struct hw_perf_event *hwc = &event->hw;
452 u64 config; 481 u64 config;
453 482
454 if (!hwc->sample_period) { 483 if (!is_sampling_event(event)) {
455 hwc->sample_period = x86_pmu.max_period; 484 hwc->sample_period = x86_pmu.max_period;
456 hwc->last_period = hwc->sample_period; 485 hwc->last_period = hwc->sample_period;
457 local64_set(&hwc->period_left, hwc->sample_period); 486 local64_set(&hwc->period_left, hwc->sample_period);
@@ -968,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event)
968 997
969static void x86_pmu_enable_event(struct perf_event *event) 998static void x86_pmu_enable_event(struct perf_event *event)
970{ 999{
971 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1000 if (__this_cpu_read(cpu_hw_events.enabled))
972 if (cpuc->enabled)
973 __x86_pmu_enable_event(&event->hw, 1001 __x86_pmu_enable_event(&event->hw,
974 ARCH_PERFMON_EVENTSEL_ENABLE); 1002 ARCH_PERFMON_EVENTSEL_ENABLE);
975} 1003}
@@ -1239,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self,
1239 1267
1240 switch (cmd) { 1268 switch (cmd) {
1241 case DIE_NMI: 1269 case DIE_NMI:
1242 case DIE_NMI_IPI:
1243 break; 1270 break;
1244 case DIE_NMIUNKNOWN: 1271 case DIE_NMIUNKNOWN:
1245 this_nmi = percpu_read(irq_stat.__nmi_count); 1272 this_nmi = percpu_read(irq_stat.__nmi_count);
1246 if (this_nmi != __get_cpu_var(pmu_nmi).marked) 1273 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1247 /* let the kernel handle the unknown nmi */ 1274 /* let the kernel handle the unknown nmi */
1248 return NOTIFY_DONE; 1275 return NOTIFY_DONE;
1249 /* 1276 /*
@@ -1267,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1267 this_nmi = percpu_read(irq_stat.__nmi_count); 1294 this_nmi = percpu_read(irq_stat.__nmi_count);
1268 if ((handled > 1) || 1295 if ((handled > 1) ||
1269 /* the next nmi could be a back-to-back nmi */ 1296 /* the next nmi could be a back-to-back nmi */
1270 ((__get_cpu_var(pmu_nmi).marked == this_nmi) && 1297 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1271 (__get_cpu_var(pmu_nmi).handled > 1))) { 1298 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1272 /* 1299 /*
1273 * We could have two subsequent back-to-back nmis: The 1300 * We could have two subsequent back-to-back nmis: The
1274 * first handles more than one counter, the 2nd 1301 * first handles more than one counter, the 2nd
@@ -1279,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1279 * handling more than one counter. We will mark the 1306 * handling more than one counter. We will mark the
1280 * next (3rd) and then drop it if unhandled. 1307 * next (3rd) and then drop it if unhandled.
1281 */ 1308 */
1282 __get_cpu_var(pmu_nmi).marked = this_nmi + 1; 1309 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1283 __get_cpu_var(pmu_nmi).handled = handled; 1310 __this_cpu_write(pmu_nmi.handled, handled);
1284 } 1311 }
1285 1312
1286 return NOTIFY_STOP; 1313 return NOTIFY_STOP;
@@ -1289,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1289static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1316static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1290 .notifier_call = perf_event_nmi_handler, 1317 .notifier_call = perf_event_nmi_handler,
1291 .next = NULL, 1318 .next = NULL,
1292 .priority = 1 1319 .priority = NMI_LOCAL_LOW_PRIOR,
1293}; 1320};
1294 1321
1295static struct event_constraint unconstrained; 1322static struct event_constraint unconstrained;
@@ -1362,7 +1389,7 @@ static void __init pmu_check_apic(void)
1362 pr_info("no hardware sampling interrupt available.\n"); 1389 pr_info("no hardware sampling interrupt available.\n");
1363} 1390}
1364 1391
1365void __init init_hw_perf_events(void) 1392int __init init_hw_perf_events(void)
1366{ 1393{
1367 struct event_constraint *c; 1394 struct event_constraint *c;
1368 int err; 1395 int err;
@@ -1377,20 +1404,18 @@ void __init init_hw_perf_events(void)
1377 err = amd_pmu_init(); 1404 err = amd_pmu_init();
1378 break; 1405 break;
1379 default: 1406 default:
1380 return; 1407 return 0;
1381 } 1408 }
1382 if (err != 0) { 1409 if (err != 0) {
1383 pr_cont("no PMU driver, software events only.\n"); 1410 pr_cont("no PMU driver, software events only.\n");
1384 return; 1411 return 0;
1385 } 1412 }
1386 1413
1387 pmu_check_apic(); 1414 pmu_check_apic();
1388 1415
1389 /* sanity check that the hardware exists or is emulated */ 1416 /* sanity check that the hardware exists or is emulated */
1390 if (!check_hw_exists()) { 1417 if (!check_hw_exists())
1391 pr_cont("Broken PMU hardware detected, software events only.\n"); 1418 return 0;
1392 return;
1393 }
1394 1419
1395 pr_cont("%s PMU driver.\n", x86_pmu.name); 1420 pr_cont("%s PMU driver.\n", x86_pmu.name);
1396 1421
@@ -1438,9 +1463,12 @@ void __init init_hw_perf_events(void)
1438 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1463 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1439 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1464 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1440 1465
1441 perf_pmu_register(&pmu); 1466 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1442 perf_cpu_notifier(x86_pmu_notifier); 1467 perf_cpu_notifier(x86_pmu_notifier);
1468
1469 return 0;
1443} 1470}
1471early_initcall(init_hw_perf_events);
1444 1472
1445static inline void x86_pmu_read(struct perf_event *event) 1473static inline void x86_pmu_read(struct perf_event *event)
1446{ 1474{
@@ -1454,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event)
1454 */ 1482 */
1455static void x86_pmu_start_txn(struct pmu *pmu) 1483static void x86_pmu_start_txn(struct pmu *pmu)
1456{ 1484{
1457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1458
1459 perf_pmu_disable(pmu); 1485 perf_pmu_disable(pmu);
1460 cpuc->group_flag |= PERF_EVENT_TXN; 1486 __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
1461 cpuc->n_txn = 0; 1487 __this_cpu_write(cpu_hw_events.n_txn, 0);
1462} 1488}
1463 1489
1464/* 1490/*
@@ -1468,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu)
1468 */ 1494 */
1469static void x86_pmu_cancel_txn(struct pmu *pmu) 1495static void x86_pmu_cancel_txn(struct pmu *pmu)
1470{ 1496{
1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1497 __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1472
1473 cpuc->group_flag &= ~PERF_EVENT_TXN;
1474 /* 1498 /*
1475 * Truncate the collected events. 1499 * Truncate the collected events.
1476 */ 1500 */
1477 cpuc->n_added -= cpuc->n_txn; 1501 __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1478 cpuc->n_events -= cpuc->n_txn; 1502 __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
1479 perf_pmu_enable(pmu); 1503 perf_pmu_enable(pmu);
1480} 1504}
1481 1505
@@ -1686,7 +1710,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1686 1710
1687 perf_callchain_store(entry, regs->ip); 1711 perf_callchain_store(entry, regs->ip);
1688 1712
1689 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1713 dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
1690} 1714}
1691 1715
1692#ifdef CONFIG_COMPAT 1716#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8cd6944..67e2202a6039 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
1#ifdef CONFIG_CPU_SUP_AMD 1#ifdef CONFIG_CPU_SUP_AMD
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst const u64 amd_hw_cache_event_ids 3static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 4 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 5 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
275 return &emptyconstraint; 273 return &emptyconstraint;
276} 274}
277 275
278static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) 276static struct amd_nb *amd_alloc_nb(int cpu)
279{ 277{
280 struct amd_nb *nb; 278 struct amd_nb *nb;
281 int i; 279 int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
285 if (!nb) 283 if (!nb)
286 return NULL; 284 return NULL;
287 285
288 nb->nb_id = nb_id; 286 nb->nb_id = -1;
289 287
290 /* 288 /*
291 * initialize all possible NB constraints 289 * initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
306 if (boot_cpu_data.x86_max_cores < 2) 304 if (boot_cpu_data.x86_max_cores < 2)
307 return NOTIFY_OK; 305 return NOTIFY_OK;
308 306
309 cpuc->amd_nb = amd_alloc_nb(cpu, -1); 307 cpuc->amd_nb = amd_alloc_nb(cpu);
310 if (!cpuc->amd_nb) 308 if (!cpuc->amd_nb)
311 return NOTIFY_BAD; 309 return NOTIFY_BAD;
312 310
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
325 nb_id = amd_get_nb_id(cpu); 323 nb_id = amd_get_nb_id(cpu);
326 WARN_ON_ONCE(nb_id == BAD_APICID); 324 WARN_ON_ONCE(nb_id == BAD_APICID);
327 325
328 raw_spin_lock(&amd_nb_lock);
329
330 for_each_online_cpu(i) { 326 for_each_online_cpu(i) {
331 nb = per_cpu(cpu_hw_events, i).amd_nb; 327 nb = per_cpu(cpu_hw_events, i).amd_nb;
332 if (WARN_ON_ONCE(!nb)) 328 if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
341 337
342 cpuc->amd_nb->nb_id = nb_id; 338 cpuc->amd_nb->nb_id = nb_id;
343 cpuc->amd_nb->refcnt++; 339 cpuc->amd_nb->refcnt++;
344
345 raw_spin_unlock(&amd_nb_lock);
346} 340}
347 341
348static void amd_pmu_cpu_dead(int cpu) 342static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
354 348
355 cpuhw = &per_cpu(cpu_hw_events, cpu); 349 cpuhw = &per_cpu(cpu_hw_events, cpu);
356 350
357 raw_spin_lock(&amd_nb_lock);
358
359 if (cpuhw->amd_nb) { 351 if (cpuhw->amd_nb) {
360 struct amd_nb *nb = cpuhw->amd_nb; 352 struct amd_nb *nb = cpuhw->amd_nb;
361 353
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
364 356
365 cpuhw->amd_nb = NULL; 357 cpuhw->amd_nb = NULL;
366 } 358 }
367
368 raw_spin_unlock(&amd_nb_lock);
369} 359}
370 360
371static __initconst const struct x86_pmu amd_pmu = { 361static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c088cad1..008835c1d79c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
649 struct hw_perf_event *hwc = &event->hw; 649 struct hw_perf_event *hwc = &event->hw;
650 650
651 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 651 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
652 if (!__get_cpu_var(cpu_hw_events).enabled) 652 if (!__this_cpu_read(cpu_hw_events.enabled))
653 return; 653 return;
654 654
655 intel_pmu_enable_bts(hwc->config); 655 intel_pmu_enable_bts(hwc->config);
@@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event)
679 679
680static void intel_pmu_reset(void) 680static void intel_pmu_reset(void)
681{ 681{
682 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; 682 struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
683 unsigned long flags; 683 unsigned long flags;
684 int idx; 684 int idx;
685 685
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
816 if (ret) 816 if (ret)
817 return ret; 817 return ret;
818 818
819 if (event->attr.precise_ip &&
820 (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
821 /*
822 * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
823 * (0x003c) so that we can use it with PEBS.
824 *
825 * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
826 * PEBS capable. However we can use INST_RETIRED.ANY_P
827 * (0x00c0), which is a PEBS capable event, to get the same
828 * count.
829 *
830 * INST_RETIRED.ANY_P counts the number of cycles that retires
831 * CNTMASK instructions. By setting CNTMASK to a value (16)
832 * larger than the maximum number of instructions that can be
833 * retired per cycle (4) and then inverting the condition, we
834 * count all cycles that retire 16 or less instructions, which
835 * is every cycle.
836 *
837 * Thereby we gain a PEBS capable cycle counter.
838 */
839 u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
840
841 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
842 event->hw.config = alt_config;
843 }
844
819 if (event->attr.type != PERF_TYPE_RAW) 845 if (event->attr.type != PERF_TYPE_RAW)
820 return 0; 846 return 0;
821 847
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b93e694..e56b9bfbabd1 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
753 753
754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 754static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
755{ 755{
756 int overflow = 0; 756 u64 v;
757 u32 low, high;
758 757
759 rdmsr(hwc->config_base + hwc->idx, low, high); 758 /* an official way for overflow indication */
760 759 rdmsrl(hwc->config_base + hwc->idx, v);
761 /* we need to check high bit for unflagged overflows */ 760 if (v & P4_CCCR_OVF) {
762 if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { 761 wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
763 overflow = 1; 762 return 1;
764 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
765 ((u64)low) & ~P4_CCCR_OVF);
766 } 763 }
767 764
768 return overflow; 765 /* it might be unflagged overflow */
766 rdmsrl(hwc->event_base + hwc->idx, v);
767 if (!(v & ARCH_P4_CNTRVAL_MASK))
768 return 1;
769
770 return 0;
769} 771}
770 772
771static void p4_pmu_disable_pebs(void) 773static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
1152 */ 1154 */
1153 .num_counters = ARCH_P4_MAX_CCCR, 1155 .num_counters = ARCH_P4_MAX_CCCR,
1154 .apic = 1, 1156 .apic = 1,
1155 .cntval_bits = 40, 1157 .cntval_bits = ARCH_P4_CNTRVAL_BITS,
1156 .cntval_mask = (1ULL << 40) - 1, 1158 .cntval_mask = ARCH_P4_CNTRVAL_MASK,
1157 .max_period = (1ULL << 39) - 1, 1159 .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
1158 .hw_config = p4_hw_config, 1160 .hw_config = p4_hw_config,
1159 .schedule_events = p4_pmu_schedule_events, 1161 .schedule_events = p4_pmu_schedule_events,
1160 /* 1162 /*
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8fcd69..d5a236615501 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <asm/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/apic.h>
23#include <asm/perf_event.h> 23#include <asm/perf_event.h>
24 24
25struct nmi_watchdog_ctlblk {
26 unsigned int cccr_msr;
27 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
28 unsigned int evntsel_msr; /* the MSR to select the events to handle */
29};
30
31/* Interface defining a CPU specific perfctr watchdog */
32struct wd_ops {
33 int (*reserve)(void);
34 void (*unreserve)(void);
35 int (*setup)(unsigned nmi_hz);
36 void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
37 void (*stop)(void);
38 unsigned perfctr;
39 unsigned evntsel;
40 u64 checkbit;
41};
42
43static const struct wd_ops *wd_ops;
44
45/* 25/*
46 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's 26 * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
47 * offset from MSR_P4_BSU_ESCR0. 27 * offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
60static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); 40static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
61static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); 41static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
62 42
63static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
64
65/* converts an msr to an appropriate reservation bit */ 43/* converts an msr to an appropriate reservation bit */
66static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) 44static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
67{ 45{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
172 clear_bit(counter, evntsel_nmi_owner); 150 clear_bit(counter, evntsel_nmi_owner);
173} 151}
174EXPORT_SYMBOL(release_evntsel_nmi); 152EXPORT_SYMBOL(release_evntsel_nmi);
175
176void disable_lapic_nmi_watchdog(void)
177{
178 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
179
180 if (atomic_read(&nmi_active) <= 0)
181 return;
182
183 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
184
185 if (wd_ops)
186 wd_ops->unreserve();
187
188 BUG_ON(atomic_read(&nmi_active) != 0);
189}
190
191void enable_lapic_nmi_watchdog(void)
192{
193 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
194
195 /* are we already enabled */
196 if (atomic_read(&nmi_active) != 0)
197 return;
198
199 /* are we lapic aware */
200 if (!wd_ops)
201 return;
202 if (!wd_ops->reserve()) {
203 printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
204 return;
205 }
206
207 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
208 touch_nmi_watchdog();
209}
210
211/*
212 * Activate the NMI watchdog via the local APIC.
213 */
214
215static unsigned int adjust_for_32bit_ctr(unsigned int hz)
216{
217 u64 counter_val;
218 unsigned int retval = hz;
219
220 /*
221 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
222 * are writable, with higher bits sign extending from bit 31.
223 * So, we can only program the counter with 31 bit values and
224 * 32nd bit should be 1, for 33.. to be 1.
225 * Find the appropriate nmi_hz
226 */
227 counter_val = (u64)cpu_khz * 1000;
228 do_div(counter_val, retval);
229 if (counter_val > 0x7fffffffULL) {
230 u64 count = (u64)cpu_khz * 1000;
231 do_div(count, 0x7fffffffUL);
232 retval = count + 1;
233 }
234 return retval;
235}
236
237static void write_watchdog_counter(unsigned int perfctr_msr,
238 const char *descr, unsigned nmi_hz)
239{
240 u64 count = (u64)cpu_khz * 1000;
241
242 do_div(count, nmi_hz);
243 if (descr)
244 pr_debug("setting %s to -0x%08Lx\n", descr, count);
245 wrmsrl(perfctr_msr, 0 - count);
246}
247
248static void write_watchdog_counter32(unsigned int perfctr_msr,
249 const char *descr, unsigned nmi_hz)
250{
251 u64 count = (u64)cpu_khz * 1000;
252
253 do_div(count, nmi_hz);
254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsr(perfctr_msr, (u32)(-count), 0);
257}
258
259/*
260 * AMD K7/K8/Family10h/Family11h support.
261 * AMD keeps this interface nicely stable so there is not much variety
262 */
263#define K7_EVNTSEL_ENABLE (1 << 22)
264#define K7_EVNTSEL_INT (1 << 20)
265#define K7_EVNTSEL_OS (1 << 17)
266#define K7_EVNTSEL_USR (1 << 16)
267#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
268#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
269
270static int setup_k7_watchdog(unsigned nmi_hz)
271{
272 unsigned int perfctr_msr, evntsel_msr;
273 unsigned int evntsel;
274 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
275
276 perfctr_msr = wd_ops->perfctr;
277 evntsel_msr = wd_ops->evntsel;
278
279 wrmsrl(perfctr_msr, 0UL);
280
281 evntsel = K7_EVNTSEL_INT
282 | K7_EVNTSEL_OS
283 | K7_EVNTSEL_USR
284 | K7_NMI_EVENT;
285
286 /* setup the timer */
287 wrmsr(evntsel_msr, evntsel, 0);
288 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
289
290 /* initialize the wd struct before enabling */
291 wd->perfctr_msr = perfctr_msr;
292 wd->evntsel_msr = evntsel_msr;
293 wd->cccr_msr = 0; /* unused */
294
295 /* ok, everything is initialized, announce that we're set */
296 cpu_nmi_set_wd_enabled();
297
298 apic_write(APIC_LVTPC, APIC_DM_NMI);
299 evntsel |= K7_EVNTSEL_ENABLE;
300 wrmsr(evntsel_msr, evntsel, 0);
301
302 return 1;
303}
304
305static void single_msr_stop_watchdog(void)
306{
307 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
308
309 wrmsr(wd->evntsel_msr, 0, 0);
310}
311
312static int single_msr_reserve(void)
313{
314 if (!reserve_perfctr_nmi(wd_ops->perfctr))
315 return 0;
316
317 if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
318 release_perfctr_nmi(wd_ops->perfctr);
319 return 0;
320 }
321 return 1;
322}
323
324static void single_msr_unreserve(void)
325{
326 release_evntsel_nmi(wd_ops->evntsel);
327 release_perfctr_nmi(wd_ops->perfctr);
328}
329
330static void __kprobes
331single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
332{
333 /* start the cycle over again */
334 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
335}
336
337static const struct wd_ops k7_wd_ops = {
338 .reserve = single_msr_reserve,
339 .unreserve = single_msr_unreserve,
340 .setup = setup_k7_watchdog,
341 .rearm = single_msr_rearm,
342 .stop = single_msr_stop_watchdog,
343 .perfctr = MSR_K7_PERFCTR0,
344 .evntsel = MSR_K7_EVNTSEL0,
345 .checkbit = 1ULL << 47,
346};
347
348/*
349 * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
350 */
351#define P6_EVNTSEL0_ENABLE (1 << 22)
352#define P6_EVNTSEL_INT (1 << 20)
353#define P6_EVNTSEL_OS (1 << 17)
354#define P6_EVNTSEL_USR (1 << 16)
355#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
356#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
357
358static int setup_p6_watchdog(unsigned nmi_hz)
359{
360 unsigned int perfctr_msr, evntsel_msr;
361 unsigned int evntsel;
362 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
363
364 perfctr_msr = wd_ops->perfctr;
365 evntsel_msr = wd_ops->evntsel;
366
367 /* KVM doesn't implement this MSR */
368 if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
369 return 0;
370
371 evntsel = P6_EVNTSEL_INT
372 | P6_EVNTSEL_OS
373 | P6_EVNTSEL_USR
374 | P6_NMI_EVENT;
375
376 /* setup the timer */
377 wrmsr(evntsel_msr, evntsel, 0);
378 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
379 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
380
381 /* initialize the wd struct before enabling */
382 wd->perfctr_msr = perfctr_msr;
383 wd->evntsel_msr = evntsel_msr;
384 wd->cccr_msr = 0; /* unused */
385
386 /* ok, everything is initialized, announce that we're set */
387 cpu_nmi_set_wd_enabled();
388
389 apic_write(APIC_LVTPC, APIC_DM_NMI);
390 evntsel |= P6_EVNTSEL0_ENABLE;
391 wrmsr(evntsel_msr, evntsel, 0);
392
393 return 1;
394}
395
396static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
397{
398 /*
399 * P6 based Pentium M need to re-unmask
400 * the apic vector but it doesn't hurt
401 * other P6 variant.
402 * ArchPerfom/Core Duo also needs this
403 */
404 apic_write(APIC_LVTPC, APIC_DM_NMI);
405
406 /* P6/ARCH_PERFMON has 32 bit counter write */
407 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
408}
409
410static const struct wd_ops p6_wd_ops = {
411 .reserve = single_msr_reserve,
412 .unreserve = single_msr_unreserve,
413 .setup = setup_p6_watchdog,
414 .rearm = p6_rearm,
415 .stop = single_msr_stop_watchdog,
416 .perfctr = MSR_P6_PERFCTR0,
417 .evntsel = MSR_P6_EVNTSEL0,
418 .checkbit = 1ULL << 39,
419};
420
421/*
422 * Intel P4 performance counters.
423 * By far the most complicated of all.
424 */
425#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
426#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
427#define P4_ESCR_OS (1 << 3)
428#define P4_ESCR_USR (1 << 2)
429#define P4_CCCR_OVF_PMI0 (1 << 26)
430#define P4_CCCR_OVF_PMI1 (1 << 27)
431#define P4_CCCR_THRESHOLD(N) ((N) << 20)
432#define P4_CCCR_COMPLEMENT (1 << 19)
433#define P4_CCCR_COMPARE (1 << 18)
434#define P4_CCCR_REQUIRED (3 << 16)
435#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
436#define P4_CCCR_ENABLE (1 << 12)
437#define P4_CCCR_OVF (1 << 31)
438
439#define P4_CONTROLS 18
440static unsigned int p4_controls[18] = {
441 MSR_P4_BPU_CCCR0,
442 MSR_P4_BPU_CCCR1,
443 MSR_P4_BPU_CCCR2,
444 MSR_P4_BPU_CCCR3,
445 MSR_P4_MS_CCCR0,
446 MSR_P4_MS_CCCR1,
447 MSR_P4_MS_CCCR2,
448 MSR_P4_MS_CCCR3,
449 MSR_P4_FLAME_CCCR0,
450 MSR_P4_FLAME_CCCR1,
451 MSR_P4_FLAME_CCCR2,
452 MSR_P4_FLAME_CCCR3,
453 MSR_P4_IQ_CCCR0,
454 MSR_P4_IQ_CCCR1,
455 MSR_P4_IQ_CCCR2,
456 MSR_P4_IQ_CCCR3,
457 MSR_P4_IQ_CCCR4,
458 MSR_P4_IQ_CCCR5,
459};
460/*
461 * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
462 * CRU_ESCR0 (with any non-null event selector) through a complemented
463 * max threshold. [IA32-Vol3, Section 14.9.9]
464 */
465static int setup_p4_watchdog(unsigned nmi_hz)
466{
467 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
468 unsigned int evntsel, cccr_val;
469 unsigned int misc_enable, dummy;
470 unsigned int ht_num;
471 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
472
473 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
474 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
475 return 0;
476
477#ifdef CONFIG_SMP
478 /* detect which hyperthread we are on */
479 if (smp_num_siblings == 2) {
480 unsigned int ebx, apicid;
481
482 ebx = cpuid_ebx(1);
483 apicid = (ebx >> 24) & 0xff;
484 ht_num = apicid & 1;
485 } else
486#endif
487 ht_num = 0;
488
489 /*
490 * performance counters are shared resources
491 * assign each hyperthread its own set
492 * (re-use the ESCR0 register, seems safe
493 * and keeps the cccr_val the same)
494 */
495 if (!ht_num) {
496 /* logical cpu 0 */
497 perfctr_msr = MSR_P4_IQ_PERFCTR0;
498 evntsel_msr = MSR_P4_CRU_ESCR0;
499 cccr_msr = MSR_P4_IQ_CCCR0;
500 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
501
502 /*
503 * If we're on the kdump kernel or other situation, we may
504 * still have other performance counter registers set to
505 * interrupt and they'll keep interrupting forever because
506 * of the P4_CCCR_OVF quirk. So we need to ACK all the
507 * pending interrupts and disable all the registers here,
508 * before reenabling the NMI delivery. Refer to p4_rearm()
509 * about the P4_CCCR_OVF quirk.
510 */
511 if (reset_devices) {
512 unsigned int low, high;
513 int i;
514
515 for (i = 0; i < P4_CONTROLS; i++) {
516 rdmsr(p4_controls[i], low, high);
517 low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
518 wrmsr(p4_controls[i], low, high);
519 }
520 }
521 } else {
522 /* logical cpu 1 */
523 perfctr_msr = MSR_P4_IQ_PERFCTR1;
524 evntsel_msr = MSR_P4_CRU_ESCR0;
525 cccr_msr = MSR_P4_IQ_CCCR1;
526
527 /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
528 if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
529 cccr_val = P4_CCCR_OVF_PMI0;
530 else
531 cccr_val = P4_CCCR_OVF_PMI1;
532 cccr_val |= P4_CCCR_ESCR_SELECT(4);
533 }
534
535 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
536 | P4_ESCR_OS
537 | P4_ESCR_USR;
538
539 cccr_val |= P4_CCCR_THRESHOLD(15)
540 | P4_CCCR_COMPLEMENT
541 | P4_CCCR_COMPARE
542 | P4_CCCR_REQUIRED;
543
544 wrmsr(evntsel_msr, evntsel, 0);
545 wrmsr(cccr_msr, cccr_val, 0);
546 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
547
548 wd->perfctr_msr = perfctr_msr;
549 wd->evntsel_msr = evntsel_msr;
550 wd->cccr_msr = cccr_msr;
551
552 /* ok, everything is initialized, announce that we're set */
553 cpu_nmi_set_wd_enabled();
554
555 apic_write(APIC_LVTPC, APIC_DM_NMI);
556 cccr_val |= P4_CCCR_ENABLE;
557 wrmsr(cccr_msr, cccr_val, 0);
558 return 1;
559}
560
561static void stop_p4_watchdog(void)
562{
563 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
564 wrmsr(wd->cccr_msr, 0, 0);
565 wrmsr(wd->evntsel_msr, 0, 0);
566}
567
568static int p4_reserve(void)
569{
570 if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
571 return 0;
572#ifdef CONFIG_SMP
573 if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
574 goto fail1;
575#endif
576 if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
577 goto fail2;
578 /* RED-PEN why is ESCR1 not reserved here? */
579 return 1;
580 fail2:
581#ifdef CONFIG_SMP
582 if (smp_num_siblings > 1)
583 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
584 fail1:
585#endif
586 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
587 return 0;
588}
589
590static void p4_unreserve(void)
591{
592#ifdef CONFIG_SMP
593 if (smp_num_siblings > 1)
594 release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
595#endif
596 release_evntsel_nmi(MSR_P4_CRU_ESCR0);
597 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
598}
599
600static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
601{
602 unsigned dummy;
603 /*
604 * P4 quirks:
605 * - An overflown perfctr will assert its interrupt
606 * until the OVF flag in its CCCR is cleared.
607 * - LVTPC is masked on interrupt and must be
608 * unmasked by the LVTPC handler.
609 */
610 rdmsrl(wd->cccr_msr, dummy);
611 dummy &= ~P4_CCCR_OVF;
612 wrmsrl(wd->cccr_msr, dummy);
613 apic_write(APIC_LVTPC, APIC_DM_NMI);
614 /* start the cycle over again */
615 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
616}
617
618static const struct wd_ops p4_wd_ops = {
619 .reserve = p4_reserve,
620 .unreserve = p4_unreserve,
621 .setup = setup_p4_watchdog,
622 .rearm = p4_rearm,
623 .stop = stop_p4_watchdog,
624 /* RED-PEN this is wrong for the other sibling */
625 .perfctr = MSR_P4_BPU_PERFCTR0,
626 .evntsel = MSR_P4_BSU_ESCR0,
627 .checkbit = 1ULL << 39,
628};
629
630/*
631 * Watchdog using the Intel architected PerfMon.
632 * Used for Core2 and hopefully all future Intel CPUs.
633 */
634#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
635#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
636
637static struct wd_ops intel_arch_wd_ops;
638
639static int setup_intel_arch_watchdog(unsigned nmi_hz)
640{
641 unsigned int ebx;
642 union cpuid10_eax eax;
643 unsigned int unused;
644 unsigned int perfctr_msr, evntsel_msr;
645 unsigned int evntsel;
646 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
647
648 /*
649 * Check whether the Architectural PerfMon supports
650 * Unhalted Core Cycles Event or not.
651 * NOTE: Corresponding bit = 0 in ebx indicates event present.
652 */
653 cpuid(10, &(eax.full), &ebx, &unused, &unused);
654 if ((eax.split.mask_length <
655 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
656 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
657 return 0;
658
659 perfctr_msr = wd_ops->perfctr;
660 evntsel_msr = wd_ops->evntsel;
661
662 wrmsrl(perfctr_msr, 0UL);
663
664 evntsel = ARCH_PERFMON_EVENTSEL_INT
665 | ARCH_PERFMON_EVENTSEL_OS
666 | ARCH_PERFMON_EVENTSEL_USR
667 | ARCH_PERFMON_NMI_EVENT_SEL
668 | ARCH_PERFMON_NMI_EVENT_UMASK;
669
670 /* setup the timer */
671 wrmsr(evntsel_msr, evntsel, 0);
672 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
673 write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
674
675 wd->perfctr_msr = perfctr_msr;
676 wd->evntsel_msr = evntsel_msr;
677 wd->cccr_msr = 0; /* unused */
678
679 /* ok, everything is initialized, announce that we're set */
680 cpu_nmi_set_wd_enabled();
681
682 apic_write(APIC_LVTPC, APIC_DM_NMI);
683 evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
684 wrmsr(evntsel_msr, evntsel, 0);
685 intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
686 return 1;
687}
688
689static struct wd_ops intel_arch_wd_ops __read_mostly = {
690 .reserve = single_msr_reserve,
691 .unreserve = single_msr_unreserve,
692 .setup = setup_intel_arch_watchdog,
693 .rearm = p6_rearm,
694 .stop = single_msr_stop_watchdog,
695 .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
696 .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
697};
698
699static void probe_nmi_watchdog(void)
700{
701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 == 6 ||
704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 wd_ops = &k7_wd_ops;
706 return;
707 case X86_VENDOR_INTEL:
708 /* Work around where perfctr1 doesn't have a working enable
709 * bit as described in the following errata:
710 * AE49 Core Duo and Intel Core Solo 65 nm
711 * AN49 Intel Pentium Dual-Core
712 * AF49 Dual-Core Intel Xeon Processor LV
713 */
714 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
715 ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
716 boot_cpu_data.x86_mask == 4))) {
717 intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
718 intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
719 }
720 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
721 wd_ops = &intel_arch_wd_ops;
722 break;
723 }
724 switch (boot_cpu_data.x86) {
725 case 6:
726 if (boot_cpu_data.x86_model > 13)
727 return;
728
729 wd_ops = &p6_wd_ops;
730 break;
731 case 15:
732 wd_ops = &p4_wd_ops;
733 break;
734 default:
735 return;
736 }
737 break;
738 }
739}
740
741/* Interface to nmi.c */
742
743int lapic_watchdog_init(unsigned nmi_hz)
744{
745 if (!wd_ops) {
746 probe_nmi_watchdog();
747 if (!wd_ops) {
748 printk(KERN_INFO "NMI watchdog: CPU not supported\n");
749 return -1;
750 }
751
752 if (!wd_ops->reserve()) {
753 printk(KERN_ERR
754 "NMI watchdog: cannot reserve perfctrs\n");
755 return -1;
756 }
757 }
758
759 if (!(wd_ops->setup(nmi_hz))) {
760 printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
761 raw_smp_processor_id());
762 return -1;
763 }
764
765 return 0;
766}
767
768void lapic_watchdog_stop(void)
769{
770 if (wd_ops)
771 wd_ops->stop();
772}
773
774unsigned lapic_adjust_nmi_hz(unsigned hz)
775{
776 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
777 if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
778 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
779 hz = adjust_for_32bit_ctr(hz);
780 return hz;
781}
782
783int __kprobes lapic_wd_event(unsigned nmi_hz)
784{
785 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
786 u64 ctr;
787
788 rdmsrl(wd->perfctr_msr, ctr);
789 if (ctr & wd_ops->checkbit) /* perfctr still running? */
790 return 0;
791
792 wd_ops->rearm(wd, nmi_hz);
793 return 1;
794}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c1bd52..d6fb146c0d8b 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
175 175
176void 176void
177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 177show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
178 unsigned long *stack, unsigned long bp, char *log_lvl) 178 unsigned long *stack, char *log_lvl)
179{ 179{
180 printk("%sCall Trace:\n", log_lvl); 180 printk("%sCall Trace:\n", log_lvl);
181 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 181 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
182} 182}
183 183
184void show_trace(struct task_struct *task, struct pt_regs *regs, 184void show_trace(struct task_struct *task, struct pt_regs *regs,
185 unsigned long *stack, unsigned long bp) 185 unsigned long *stack)
186{ 186{
187 show_trace_log_lvl(task, regs, stack, bp, ""); 187 show_trace_log_lvl(task, regs, stack, "");
188} 188}
189 189
190void show_stack(struct task_struct *task, unsigned long *sp) 190void show_stack(struct task_struct *task, unsigned long *sp)
191{ 191{
192 show_stack_log_lvl(task, NULL, sp, 0, ""); 192 show_stack_log_lvl(task, NULL, sp, "");
193} 193}
194 194
195/* 195/*
@@ -197,20 +197,14 @@ void show_stack(struct task_struct *task, unsigned long *sp)
197 */ 197 */
198void dump_stack(void) 198void dump_stack(void)
199{ 199{
200 unsigned long bp = 0;
201 unsigned long stack; 200 unsigned long stack;
202 201
203#ifdef CONFIG_FRAME_POINTER
204 if (!bp)
205 get_bp(bp);
206#endif
207
208 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 202 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
209 current->pid, current->comm, print_tainted(), 203 current->pid, current->comm, print_tainted(),
210 init_utsname()->release, 204 init_utsname()->release,
211 (int)strcspn(init_utsname()->version, " "), 205 (int)strcspn(init_utsname()->version, " "),
212 init_utsname()->version); 206 init_utsname()->version);
213 show_trace(NULL, NULL, &stack, bp); 207 show_trace(NULL, NULL, &stack);
214} 208}
215EXPORT_SYMBOL(dump_stack); 209EXPORT_SYMBOL(dump_stack);
216 210
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75a5bda..74cc1eda384b 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19 19
20void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task,
21 unsigned long *stack, unsigned long bp, 21 struct pt_regs *regs, unsigned long *stack,
22 const struct stacktrace_ops *ops, void *data) 22 const struct stacktrace_ops *ops, void *data)
23{ 23{
24 int graph = 0; 24 int graph = 0;
25 unsigned long bp;
25 26
26 if (!task) 27 if (!task)
27 task = current; 28 task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
34 stack = (unsigned long *)task->thread.sp; 35 stack = (unsigned long *)task->thread.sp;
35 } 36 }
36 37
37#ifdef CONFIG_FRAME_POINTER 38 bp = stack_frame(task, regs);
38 if (!bp) {
39 if (task == current) {
40 /* Grab bp right from our regs */
41 get_bp(bp);
42 } else {
43 /* bp is the last reg pushed by switch_to */
44 bp = *(unsigned long *) task->thread.sp;
45 }
46 }
47#endif
48
49 for (;;) { 39 for (;;) {
50 struct thread_info *context; 40 struct thread_info *context;
51 41
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
65 55
66void 56void
67show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 57show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
68 unsigned long *sp, unsigned long bp, char *log_lvl) 58 unsigned long *sp, char *log_lvl)
69{ 59{
70 unsigned long *stack; 60 unsigned long *stack;
71 int i; 61 int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
87 touch_nmi_watchdog(); 77 touch_nmi_watchdog();
88 } 78 }
89 printk(KERN_CONT "\n"); 79 printk(KERN_CONT "\n");
90 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 80 show_trace_log_lvl(task, regs, sp, log_lvl);
91} 81}
92 82
93 83
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
112 u8 *ip; 102 u8 *ip;
113 103
114 printk(KERN_EMERG "Stack:\n"); 104 printk(KERN_EMERG "Stack:\n");
115 show_stack_log_lvl(NULL, regs, &regs->sp, 105 show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
116 0, KERN_EMERG);
117 106
118 printk(KERN_EMERG "Code: "); 107 printk(KERN_EMERG "Code: ");
119 108
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a340485249a..64101335de19 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack 139 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
140 */ 140 */
141 141
142void dump_trace(struct task_struct *task, struct pt_regs *regs, 142void dump_trace(struct task_struct *task,
143 unsigned long *stack, unsigned long bp, 143 struct pt_regs *regs, unsigned long *stack,
144 const struct stacktrace_ops *ops, void *data) 144 const struct stacktrace_ops *ops, void *data)
145{ 145{
146 const unsigned cpu = get_cpu(); 146 const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
149 unsigned used = 0; 149 unsigned used = 0;
150 struct thread_info *tinfo; 150 struct thread_info *tinfo;
151 int graph = 0; 151 int graph = 0;
152 unsigned long bp;
152 153
153 if (!task) 154 if (!task)
154 task = current; 155 task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *)task->thread.sp; 161 stack = (unsigned long *)task->thread.sp;
161 } 162 }
162 163
163#ifdef CONFIG_FRAME_POINTER 164 bp = stack_frame(task, regs);
164 if (!bp) {
165 if (task == current) {
166 /* Grab bp right from our regs */
167 get_bp(bp);
168 } else {
169 /* bp is the last reg pushed by switch_to */
170 bp = *(unsigned long *) task->thread.sp;
171 }
172 }
173#endif
174
175 /* 165 /*
176 * Print function call entries in all stacks, starting at the 166 * Print function call entries in all stacks, starting at the
177 * current stack address. If the stacks consist of nested 167 * current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
235 225
236void 226void
237show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, 227show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
238 unsigned long *sp, unsigned long bp, char *log_lvl) 228 unsigned long *sp, char *log_lvl)
239{ 229{
240 unsigned long *irq_stack_end; 230 unsigned long *irq_stack_end;
241 unsigned long *irq_stack; 231 unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
279 preempt_enable(); 269 preempt_enable();
280 270
281 printk(KERN_CONT "\n"); 271 printk(KERN_CONT "\n");
282 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 272 show_trace_log_lvl(task, regs, sp, log_lvl);
283} 273}
284 274
285void show_registers(struct pt_regs *regs) 275void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
308 298
309 printk(KERN_EMERG "Stack:\n"); 299 printk(KERN_EMERG "Stack:\n");
310 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 300 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
311 regs->bp, KERN_EMERG); 301 KERN_EMERG);
312 302
313 printk(KERN_EMERG "Code: "); 303 printk(KERN_EMERG "Code: ");
314 304
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 4572f25f9325..cd28a350f7f9 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
240 if (!strncmp(buf, "xen", 3)) 240 if (!strncmp(buf, "xen", 3))
241 early_console_register(&xenboot_console, keep); 241 early_console_register(&xenboot_console, keep);
242#endif 242#endif
243#ifdef CONFIG_X86_MRST_EARLY_PRINTK 243#ifdef CONFIG_EARLY_PRINTK_MRST
244 if (!strncmp(buf, "mrst", 4)) { 244 if (!strncmp(buf, "mrst", 4)) {
245 mrst_early_console_init(); 245 mrst_early_console_init();
246 early_console_register(&early_mrst_console, keep); 246 early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
250 hsu_early_console_init(); 250 hsu_early_console_init();
251 early_console_register(&early_hsu_console, keep); 251 early_console_register(&early_hsu_console, keep);
252 } 252 }
253
254#endif 253#endif
255 buf++; 254 buf++;
256 } 255 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417e8697..d3b895f375d3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
299ENTRY(save_args) 299ENTRY(save_args)
300 XCPT_FRAME 300 XCPT_FRAME
301 cld 301 cld
302 movq_cfi rdi, RDI+16-ARGOFFSET 302 /*
303 movq_cfi rsi, RSI+16-ARGOFFSET 303 * start from rbp in pt_regs and jump over
304 movq_cfi rdx, RDX+16-ARGOFFSET 304 * return address.
305 movq_cfi rcx, RCX+16-ARGOFFSET 305 */
306 movq_cfi rax, RAX+16-ARGOFFSET 306 movq_cfi rdi, RDI+8-RBP
307 movq_cfi r8, R8+16-ARGOFFSET 307 movq_cfi rsi, RSI+8-RBP
308 movq_cfi r9, R9+16-ARGOFFSET 308 movq_cfi rdx, RDX+8-RBP
309 movq_cfi r10, R10+16-ARGOFFSET 309 movq_cfi rcx, RCX+8-RBP
310 movq_cfi r11, R11+16-ARGOFFSET 310 movq_cfi rax, RAX+8-RBP
311 311 movq_cfi r8, R8+8-RBP
312 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 312 movq_cfi r9, R9+8-RBP
313 movq_cfi r10, R10+8-RBP
314 movq_cfi r11, R11+8-RBP
315
316 leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
313 movq_cfi rbp, 8 /* push %rbp */ 317 movq_cfi rbp, 8 /* push %rbp */
314 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 318 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
315 testl $3, CS(%rdi) 319 testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
782 786
783/* 0(%rsp): ~(interrupt number) */ 787/* 0(%rsp): ~(interrupt number) */
784 .macro interrupt func 788 .macro interrupt func
785 subq $ORIG_RAX-ARGOFFSET+8, %rsp 789 /* reserve pt_regs for scratch regs and rbp */
786 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 790 subq $ORIG_RAX-RBP, %rsp
791 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
787 call save_args 792 call save_args
788 PARTIAL_FRAME 0 793 PARTIAL_FRAME 0
789 call \func 794 call \func
@@ -808,9 +813,14 @@ ret_from_intr:
808 TRACE_IRQS_OFF 813 TRACE_IRQS_OFF
809 decl PER_CPU_VAR(irq_count) 814 decl PER_CPU_VAR(irq_count)
810 leaveq 815 leaveq
816
811 CFI_RESTORE rbp 817 CFI_RESTORE rbp
812 CFI_DEF_CFA_REGISTER rsp 818 CFI_DEF_CFA_REGISTER rsp
813 CFI_ADJUST_CFA_OFFSET -8 819 CFI_ADJUST_CFA_OFFSET -8
820
821 /* we did not save rbx, restore only from ARGOFFSET */
822 addq $8, %rsp
823 CFI_ADJUST_CFA_OFFSET -8
814exit_intr: 824exit_intr:
815 GET_THREAD_INFO(%rcx) 825 GET_THREAD_INFO(%rcx)
816 testl $3,CS-ARGOFFSET(%rsp) 826 testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f14d2d..382eb2936d4d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/list.h> 21#include <linux/list.h>
22#include <linux/module.h>
22 23
23#include <trace/syscall.h> 24#include <trace/syscall.h>
24 25
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
49int ftrace_arch_code_modify_prepare(void) 50int ftrace_arch_code_modify_prepare(void)
50{ 51{
51 set_kernel_text_rw(); 52 set_kernel_text_rw();
53 set_all_modules_text_rw();
52 modifying_code = 1; 54 modifying_code = 1;
53 return 0; 55 return 0;
54} 56}
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
56int ftrace_arch_code_modify_post_process(void) 58int ftrace_arch_code_modify_post_process(void)
57{ 59{
58 modifying_code = 0; 60 modifying_code = 0;
61 set_all_modules_text_ro();
59 set_kernel_text_ro(); 62 set_kernel_text_ro();
60 return 0; 63 return 0;
61} 64}
@@ -167,9 +170,9 @@ static void ftrace_mod_code(void)
167 170
168void ftrace_nmi_enter(void) 171void ftrace_nmi_enter(void)
169{ 172{
170 __get_cpu_var(save_modifying_code) = modifying_code; 173 __this_cpu_write(save_modifying_code, modifying_code);
171 174
172 if (!__get_cpu_var(save_modifying_code)) 175 if (!__this_cpu_read(save_modifying_code))
173 return; 176 return;
174 177
175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
@@ -183,7 +186,7 @@ void ftrace_nmi_enter(void)
183 186
184void ftrace_nmi_exit(void) 187void ftrace_nmi_exit(void)
185{ 188{
186 if (!__get_cpu_var(save_modifying_code)) 189 if (!__this_cpu_read(save_modifying_code))
187 return; 190 return;
188 191
189 /* Finish all executions before clearing nmi_running */ 192 /* Finish all executions before clearing nmi_running */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 763310165fa0..7f138b3c3c52 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
61 case X86_SUBARCH_MRST: 61 case X86_SUBARCH_MRST:
62 x86_mrst_early_setup(); 62 x86_mrst_early_setup();
63 break; 63 break;
64 case X86_SUBARCH_CE4100:
65 x86_ce4100_early_setup();
66 break;
64 default: 67 default:
65 i386_default_early_setup(); 68 i386_default_early_setup();
66 break; 69 break;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c0dbd9ac24f0..9f54b209c378 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -139,39 +139,6 @@ ENTRY(startup_32)
139 movl %eax, pa(olpc_ofw_pgd) 139 movl %eax, pa(olpc_ofw_pgd)
140#endif 140#endif
141 141
142#ifdef CONFIG_PARAVIRT
143 /* This is can only trip for a broken bootloader... */
144 cmpw $0x207, pa(boot_params + BP_version)
145 jb default_entry
146
147 /* Paravirt-compatible boot parameters. Look to see what architecture
148 we're booting under. */
149 movl pa(boot_params + BP_hardware_subarch), %eax
150 cmpl $num_subarch_entries, %eax
151 jae bad_subarch
152
153 movl pa(subarch_entries)(,%eax,4), %eax
154 subl $__PAGE_OFFSET, %eax
155 jmp *%eax
156
157bad_subarch:
158WEAK(lguest_entry)
159WEAK(xen_entry)
160 /* Unknown implementation; there's really
161 nothing we can do at this point. */
162 ud2a
163
164 __INITDATA
165
166subarch_entries:
167 .long default_entry /* normal x86/PC */
168 .long lguest_entry /* lguest hypervisor */
169 .long xen_entry /* Xen hypervisor */
170 .long default_entry /* Moorestown MID */
171num_subarch_entries = (. - subarch_entries) / 4
172.previous
173#endif /* CONFIG_PARAVIRT */
174
175/* 142/*
176 * Initialize page tables. This creates a PDE and a set of page 143 * Initialize page tables. This creates a PDE and a set of page
177 * tables, which are located immediately beyond __brk_base. The variable 144 * tables, which are located immediately beyond __brk_base. The variable
@@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4
181 * 148 *
182 * Note that the stack is not yet set up! 149 * Note that the stack is not yet set up!
183 */ 150 */
184default_entry:
185#ifdef CONFIG_X86_PAE 151#ifdef CONFIG_X86_PAE
186 152
187 /* 153 /*
@@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
261 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax 227 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
262 movl %eax,pa(initial_page_table+0xffc) 228 movl %eax,pa(initial_page_table+0xffc)
263#endif 229#endif
264 jmp 3f 230
231#ifdef CONFIG_PARAVIRT
232 /* This is can only trip for a broken bootloader... */
233 cmpw $0x207, pa(boot_params + BP_version)
234 jb default_entry
235
236 /* Paravirt-compatible boot parameters. Look to see what architecture
237 we're booting under. */
238 movl pa(boot_params + BP_hardware_subarch), %eax
239 cmpl $num_subarch_entries, %eax
240 jae bad_subarch
241
242 movl pa(subarch_entries)(,%eax,4), %eax
243 subl $__PAGE_OFFSET, %eax
244 jmp *%eax
245
246bad_subarch:
247WEAK(lguest_entry)
248WEAK(xen_entry)
249 /* Unknown implementation; there's really
250 nothing we can do at this point. */
251 ud2a
252
253 __INITDATA
254
255subarch_entries:
256 .long default_entry /* normal x86/PC */
257 .long lguest_entry /* lguest hypervisor */
258 .long xen_entry /* Xen hypervisor */
259 .long default_entry /* Moorestown MID */
260num_subarch_entries = (. - subarch_entries) / 4
261.previous
262#else
263 jmp default_entry
264#endif /* CONFIG_PARAVIRT */
265
265/* 266/*
266 * Non-boot CPU entry point; entered from trampoline.S 267 * Non-boot CPU entry point; entered from trampoline.S
267 * We can't lgdt here, because lgdt itself uses a data segment, but 268 * We can't lgdt here, because lgdt itself uses a data segment, but
@@ -282,7 +283,7 @@ ENTRY(startup_32_smp)
282 movl %eax,%fs 283 movl %eax,%fs
283 movl %eax,%gs 284 movl %eax,%gs
284#endif /* CONFIG_SMP */ 285#endif /* CONFIG_SMP */
2853: 286default_entry:
286 287
287/* 288/*
288 * New page tables may be in 4Mbyte page mode and may 289 * New page tables may be in 4Mbyte page mode and may
@@ -316,6 +317,10 @@ ENTRY(startup_32_smp)
316 subl $0x80000001, %eax 317 subl $0x80000001, %eax
317 cmpl $(0x8000ffff-0x80000001), %eax 318 cmpl $(0x8000ffff-0x80000001), %eax
318 ja 6f 319 ja 6f
320
321 /* Clear bogus XD_DISABLE bits */
322 call verify_cpu
323
319 mov $0x80000001, %eax 324 mov $0x80000001, %eax
320 cpuid 325 cpuid
321 /* Execute Disable bit supported? */ 326 /* Execute Disable bit supported? */
@@ -611,6 +616,8 @@ ignore_int:
611#endif 616#endif
612 iret 617 iret
613 618
619#include "verify_cpu.S"
620
614 __REFDATA 621 __REFDATA
615.align 4 622.align 4
616ENTRY(initial_code) 623ENTRY(initial_code)
@@ -622,13 +629,13 @@ ENTRY(initial_code)
622__PAGE_ALIGNED_BSS 629__PAGE_ALIGNED_BSS
623 .align PAGE_SIZE_asm 630 .align PAGE_SIZE_asm
624#ifdef CONFIG_X86_PAE 631#ifdef CONFIG_X86_PAE
625ENTRY(initial_pg_pmd) 632initial_pg_pmd:
626 .fill 1024*KPMDS,4,0 633 .fill 1024*KPMDS,4,0
627#else 634#else
628ENTRY(initial_page_table) 635ENTRY(initial_page_table)
629 .fill 1024,4,0 636 .fill 1024,4,0
630#endif 637#endif
631ENTRY(initial_pg_fixmap) 638initial_pg_fixmap:
632 .fill 1024,4,0 639 .fill 1024,4,0
633ENTRY(empty_zero_page) 640ENTRY(empty_zero_page)
634 .fill 4096,1,0 641 .fill 4096,1,0
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 42c594254507..02f07634d265 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
122 return -EBUSY; 122 return -EBUSY;
123 123
124 set_debugreg(info->address, i); 124 set_debugreg(info->address, i);
125 __get_cpu_var(cpu_debugreg[i]) = info->address; 125 __this_cpu_write(cpu_debugreg[i], info->address);
126 126
127 dr7 = &__get_cpu_var(cpu_dr7); 127 dr7 = &__get_cpu_var(cpu_dr7);
128 *dr7 |= encode_dr7(i, info->len, info->type); 128 *dr7 |= encode_dr7(i, info->len, info->type);
@@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
397 397
398void hw_breakpoint_restore(void) 398void hw_breakpoint_restore(void)
399{ 399{
400 set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); 400 set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
401 set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); 401 set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
402 set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); 402 set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
403 set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); 403 set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
404 set_debugreg(current->thread.debugreg6, 6); 404 set_debugreg(current->thread.debugreg6, 6);
405 set_debugreg(__get_cpu_var(cpu_dr7), 7); 405 set_debugreg(__this_cpu_read(cpu_dr7), 7);
406} 406}
407EXPORT_SYMBOL_GPL(hw_breakpoint_restore); 407EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
408 408
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 83ec0175f986..3a43caa3beb7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -234,7 +234,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
234 exit_idle(); 234 exit_idle();
235 irq_enter(); 235 irq_enter();
236 236
237 irq = __get_cpu_var(vector_irq)[vector]; 237 irq = __this_cpu_read(vector_irq[vector]);
238 238
239 if (!handle_irq(irq, regs)) { 239 if (!handle_irq(irq, regs)) {
240 ack_APIC_irq(); 240 ack_APIC_irq();
@@ -350,12 +350,12 @@ void fixup_irqs(void)
350 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 350 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
351 unsigned int irr; 351 unsigned int irr;
352 352
353 if (__get_cpu_var(vector_irq)[vector] < 0) 353 if (__this_cpu_read(vector_irq[vector]) < 0)
354 continue; 354 continue;
355 355
356 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); 356 irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
357 if (irr & (1 << (vector % 32))) { 357 if (irr & (1 << (vector % 32))) {
358 irq = __get_cpu_var(vector_irq)[vector]; 358 irq = __this_cpu_read(vector_irq[vector]);
359 359
360 data = irq_get_irq_data(irq); 360 data = irq_get_irq_data(irq);
361 raw_spin_lock(&desc->lock); 361 raw_spin_lock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 96656f207751..48ff6dcffa02 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
79 u32 *isp, arg1, arg2; 79 u32 *isp, arg1, arg2;
80 80
81 curctx = (union irq_ctx *) current_thread_info(); 81 curctx = (union irq_ctx *) current_thread_info();
82 irqctx = __get_cpu_var(hardirq_ctx); 82 irqctx = __this_cpu_read(hardirq_ctx);
83 83
84 /* 84 /*
85 * this is where we switch to the IRQ stack. However, if we are 85 * this is where we switch to the IRQ stack. However, if we are
@@ -166,7 +166,7 @@ asmlinkage void do_softirq(void)
166 166
167 if (local_softirq_pending()) { 167 if (local_softirq_pending()) {
168 curctx = current_thread_info(); 168 curctx = current_thread_info();
169 irqctx = __get_cpu_var(softirq_ctx); 169 irqctx = __this_cpu_read(softirq_ctx);
170 irqctx->tinfo.task = curctx->task; 170 irqctx->tinfo.task = curctx->task;
171 irqctx->tinfo.previous_esp = current_stack_pointer; 171 irqctx->tinfo.previous_esp = current_stack_pointer;
172 172
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b654dec6..a4130005028a 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
48#include <asm/apicdef.h> 48#include <asm/apicdef.h>
49#include <asm/system.h> 49#include <asm/system.h>
50#include <asm/apic.h> 50#include <asm/apic.h>
51#include <asm/nmi.h>
51 52
52struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 53struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
53{ 54{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
525 } 526 }
526 return NOTIFY_DONE; 527 return NOTIFY_DONE;
527 528
528 case DIE_NMI_IPI:
529 /* Just ignore, we will handle the roundup on DIE_NMI. */
530 return NOTIFY_DONE;
531
532 case DIE_NMIUNKNOWN: 529 case DIE_NMIUNKNOWN:
533 if (was_in_debug_nmi[raw_smp_processor_id()]) { 530 if (was_in_debug_nmi[raw_smp_processor_id()]) {
534 was_in_debug_nmi[raw_smp_processor_id()] = 0; 531 was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
606 /* 603 /*
607 * Lowest-prio notifier priority, we want to be notified last: 604 * Lowest-prio notifier priority, we want to be notified last:
608 */ 605 */
609 .priority = -INT_MAX, 606 .priority = NMI_LOCAL_LOW_PRIOR,
610}; 607};
611 608
612/** 609/**
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c0df99..d91c477b3f62 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
403 403
404static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 404static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
405{ 405{
406 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; 406 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
407 kcb->kprobe_status = kcb->prev_kprobe.status; 407 kcb->kprobe_status = kcb->prev_kprobe.status;
408 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; 408 kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
409 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; 409 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
412static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 412static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
413 struct kprobe_ctlblk *kcb) 413 struct kprobe_ctlblk *kcb)
414{ 414{
415 __get_cpu_var(current_kprobe) = p; 415 __this_cpu_write(current_kprobe, p);
416 kcb->kprobe_saved_flags = kcb->kprobe_old_flags 416 kcb->kprobe_saved_flags = kcb->kprobe_old_flags
417 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); 417 = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
418 if (is_IF_modifier(p->ainsn.insn)) 418 if (is_IF_modifier(p->ainsn.insn))
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
586 preempt_enable_no_resched(); 586 preempt_enable_no_resched();
587 return 1; 587 return 1;
588 } else if (kprobe_running()) { 588 } else if (kprobe_running()) {
589 p = __get_cpu_var(current_kprobe); 589 p = __this_cpu_read(current_kprobe);
590 if (p->break_handler && p->break_handler(p, regs)) { 590 if (p->break_handler && p->break_handler(p, regs)) {
591 setup_singlestep(p, regs, kcb, 0); 591 setup_singlestep(p, regs, kcb, 0);
592 return 1; 592 return 1;
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
759 759
760 orig_ret_address = (unsigned long)ri->ret_addr; 760 orig_ret_address = (unsigned long)ri->ret_addr;
761 if (ri->rp && ri->rp->handler) { 761 if (ri->rp && ri->rp->handler) {
762 __get_cpu_var(current_kprobe) = &ri->rp->kp; 762 __this_cpu_write(current_kprobe, &ri->rp->kp);
763 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 763 get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
764 ri->ret_addr = correct_ret_addr; 764 ri->ret_addr = correct_ret_addr;
765 ri->rp->handler(ri, regs); 765 ri->rp->handler(ri, regs);
766 __get_cpu_var(current_kprobe) = NULL; 766 __this_cpu_write(current_kprobe, NULL);
767 } 767 }
768 768
769 recycle_rp_inst(ri, &empty_rp); 769 recycle_rp_inst(ri, &empty_rp);
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1184{ 1184{
1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1185 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
1186 1186
1187 /* This is possible if op is under delayed unoptimizing */
1188 if (kprobe_disabled(&op->kp))
1189 return;
1190
1187 preempt_disable(); 1191 preempt_disable();
1188 if (kprobe_running()) { 1192 if (kprobe_running()) {
1189 kprobes_inc_nmissed_count(&op->kp); 1193 kprobes_inc_nmissed_count(&op->kp);
@@ -1198,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
1198 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; 1202 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
1199 regs->orig_ax = ~0UL; 1203 regs->orig_ax = ~0UL;
1200 1204
1201 __get_cpu_var(current_kprobe) = &op->kp; 1205 __this_cpu_write(current_kprobe, &op->kp);
1202 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 1206 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
1203 opt_pre_handler(&op->kp, regs); 1207 opt_pre_handler(&op->kp, regs);
1204 __get_cpu_var(current_kprobe) = NULL; 1208 __this_cpu_write(current_kprobe, NULL);
1205 } 1209 }
1206 preempt_enable_no_resched(); 1210 preempt_enable_no_resched();
1207} 1211}
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
1401 return 0; 1405 return 0;
1402} 1406}
1403 1407
1404/* Replace a breakpoint (int3) with a relative jump. */ 1408#define MAX_OPTIMIZE_PROBES 256
1405int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) 1409static struct text_poke_param *jump_poke_params;
1410static struct jump_poke_buffer {
1411 u8 buf[RELATIVEJUMP_SIZE];
1412} *jump_poke_bufs;
1413
1414static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
1415 u8 *insn_buf,
1416 struct optimized_kprobe *op)
1406{ 1417{
1407 unsigned char jmp_code[RELATIVEJUMP_SIZE];
1408 s32 rel = (s32)((long)op->optinsn.insn - 1418 s32 rel = (s32)((long)op->optinsn.insn -
1409 ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 1419 ((long)op->kp.addr + RELATIVEJUMP_SIZE));
1410 1420
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
1412 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, 1422 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
1413 RELATIVE_ADDR_SIZE); 1423 RELATIVE_ADDR_SIZE);
1414 1424
1415 jmp_code[0] = RELATIVEJUMP_OPCODE; 1425 insn_buf[0] = RELATIVEJUMP_OPCODE;
1416 *(s32 *)(&jmp_code[1]) = rel; 1426 *(s32 *)(&insn_buf[1]) = rel;
1427
1428 tprm->addr = op->kp.addr;
1429 tprm->opcode = insn_buf;
1430 tprm->len = RELATIVEJUMP_SIZE;
1431}
1432
1433/*
1434 * Replace breakpoints (int3) with relative jumps.
1435 * Caller must call with locking kprobe_mutex and text_mutex.
1436 */
1437void __kprobes arch_optimize_kprobes(struct list_head *oplist)
1438{
1439 struct optimized_kprobe *op, *tmp;
1440 int c = 0;
1441
1442 list_for_each_entry_safe(op, tmp, oplist, list) {
1443 WARN_ON(kprobe_disabled(&op->kp));
1444 /* Setup param */
1445 setup_optimize_kprobe(&jump_poke_params[c],
1446 jump_poke_bufs[c].buf, op);
1447 list_del_init(&op->list);
1448 if (++c >= MAX_OPTIMIZE_PROBES)
1449 break;
1450 }
1417 1451
1418 /* 1452 /*
1419 * text_poke_smp doesn't support NMI/MCE code modifying. 1453 * text_poke_smp doesn't support NMI/MCE code modifying.
1420 * However, since kprobes itself also doesn't support NMI/MCE 1454 * However, since kprobes itself also doesn't support NMI/MCE
1421 * code probing, it's not a problem. 1455 * code probing, it's not a problem.
1422 */ 1456 */
1423 text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); 1457 text_poke_smp_batch(jump_poke_params, c);
1424 return 0; 1458}
1459
1460static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
1461 u8 *insn_buf,
1462 struct optimized_kprobe *op)
1463{
1464 /* Set int3 to first byte for kprobes */
1465 insn_buf[0] = BREAKPOINT_INSTRUCTION;
1466 memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
1467
1468 tprm->addr = op->kp.addr;
1469 tprm->opcode = insn_buf;
1470 tprm->len = RELATIVEJUMP_SIZE;
1471}
1472
1473/*
1474 * Recover original instructions and breakpoints from relative jumps.
1475 * Caller must call with locking kprobe_mutex.
1476 */
1477extern void arch_unoptimize_kprobes(struct list_head *oplist,
1478 struct list_head *done_list)
1479{
1480 struct optimized_kprobe *op, *tmp;
1481 int c = 0;
1482
1483 list_for_each_entry_safe(op, tmp, oplist, list) {
1484 /* Setup param */
1485 setup_unoptimize_kprobe(&jump_poke_params[c],
1486 jump_poke_bufs[c].buf, op);
1487 list_move(&op->list, done_list);
1488 if (++c >= MAX_OPTIMIZE_PROBES)
1489 break;
1490 }
1491
1492 /*
1493 * text_poke_smp doesn't support NMI/MCE code modifying.
1494 * However, since kprobes itself also doesn't support NMI/MCE
1495 * code probing, it's not a problem.
1496 */
1497 text_poke_smp_batch(jump_poke_params, c);
1425} 1498}
1426 1499
1427/* Replace a relative jump with a breakpoint (int3). */ 1500/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
1453 } 1526 }
1454 return 0; 1527 return 0;
1455} 1528}
1529
1530static int __kprobes init_poke_params(void)
1531{
1532 /* Allocate code buffer and parameter array */
1533 jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
1534 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1535 if (!jump_poke_bufs)
1536 return -ENOMEM;
1537
1538 jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
1539 MAX_OPTIMIZE_PROBES, GFP_KERNEL);
1540 if (!jump_poke_params) {
1541 kfree(jump_poke_bufs);
1542 jump_poke_bufs = NULL;
1543 return -ENOMEM;
1544 }
1545
1546 return 0;
1547}
1548#else /* !CONFIG_OPTPROBES */
1549static int __kprobes init_poke_params(void)
1550{
1551 return 0;
1552}
1456#endif 1553#endif
1457 1554
1458int __init arch_init_kprobes(void) 1555int __init arch_init_kprobes(void)
1459{ 1556{
1460 return 0; 1557 return init_poke_params();
1461} 1558}
1462 1559
1463int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1560int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index ce0cb4721c9a..0fe6d1a66c38 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
155 return 0; 155 return 0;
156} 156}
157 157
158static int get_ucode_data(void *to, const u8 *from, size_t n)
159{
160 memcpy(to, from, n);
161 return 0;
162}
163
164static void * 158static void *
165get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) 159get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
166{ 160{
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
168 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; 162 u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
169 void *mc; 163 void *mc;
170 164
171 if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR)) 165 get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
172 return NULL;
173 166
174 if (section_hdr[0] != UCODE_UCODE_TYPE) { 167 if (section_hdr[0] != UCODE_UCODE_TYPE) {
175 pr_err("error: invalid type field in container file section header\n"); 168 pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
183 return NULL; 176 return NULL;
184 } 177 }
185 178
186 mc = vmalloc(UCODE_MAX_SIZE); 179 mc = vzalloc(UCODE_MAX_SIZE);
187 if (mc) { 180 if (!mc)
188 memset(mc, 0, UCODE_MAX_SIZE); 181 return NULL;
189 if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, 182
190 total_size)) { 183 get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
191 vfree(mc); 184 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
192 mc = NULL; 185
193 } else
194 *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
195 }
196 return mc; 186 return mc;
197} 187}
198 188
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
202 unsigned int *buf_pos = (unsigned int *)container_hdr; 192 unsigned int *buf_pos = (unsigned int *)container_hdr;
203 unsigned long size; 193 unsigned long size;
204 194
205 if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE)) 195 get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
206 return 0;
207 196
208 size = buf_pos[2]; 197 size = buf_pos[2];
209 198
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
219 } 208 }
220 209
221 buf += UCODE_CONTAINER_HEADER_SIZE; 210 buf += UCODE_CONTAINER_HEADER_SIZE;
222 if (get_ucode_data(equiv_cpu_table, buf, size)) { 211 get_ucode_data(equiv_cpu_table, buf, size);
223 vfree(equiv_cpu_table);
224 return 0;
225 }
226 212
227 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ 213 return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
228} 214}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9af64d9c4b67..01b0f6d06451 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -118,21 +118,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
118 118
119static void __init MP_ioapic_info(struct mpc_ioapic *m) 119static void __init MP_ioapic_info(struct mpc_ioapic *m)
120{ 120{
121 if (!(m->flags & MPC_APIC_USABLE)) 121 if (m->flags & MPC_APIC_USABLE)
122 return; 122 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
123
124 printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
125 m->apicid, m->apicver, m->apicaddr);
126
127 mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
128}
129
130static void print_MP_intsrc_info(struct mpc_intsrc *m)
131{
132 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
133 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
134 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
135 m->srcbusirq, m->dstapic, m->dstirq);
136} 123}
137 124
138static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) 125static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
@@ -144,73 +131,11 @@ static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
144 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); 131 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
145} 132}
146 133
147static void __init assign_to_mp_irq(struct mpc_intsrc *m,
148 struct mpc_intsrc *mp_irq)
149{
150 mp_irq->dstapic = m->dstapic;
151 mp_irq->type = m->type;
152 mp_irq->irqtype = m->irqtype;
153 mp_irq->irqflag = m->irqflag;
154 mp_irq->srcbus = m->srcbus;
155 mp_irq->srcbusirq = m->srcbusirq;
156 mp_irq->dstirq = m->dstirq;
157}
158
159static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
160 struct mpc_intsrc *m)
161{
162 m->dstapic = mp_irq->dstapic;
163 m->type = mp_irq->type;
164 m->irqtype = mp_irq->irqtype;
165 m->irqflag = mp_irq->irqflag;
166 m->srcbus = mp_irq->srcbus;
167 m->srcbusirq = mp_irq->srcbusirq;
168 m->dstirq = mp_irq->dstirq;
169}
170
171static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
172 struct mpc_intsrc *m)
173{
174 if (mp_irq->dstapic != m->dstapic)
175 return 1;
176 if (mp_irq->type != m->type)
177 return 2;
178 if (mp_irq->irqtype != m->irqtype)
179 return 3;
180 if (mp_irq->irqflag != m->irqflag)
181 return 4;
182 if (mp_irq->srcbus != m->srcbus)
183 return 5;
184 if (mp_irq->srcbusirq != m->srcbusirq)
185 return 6;
186 if (mp_irq->dstirq != m->dstirq)
187 return 7;
188
189 return 0;
190}
191
192static void __init MP_intsrc_info(struct mpc_intsrc *m)
193{
194 int i;
195
196 print_MP_intsrc_info(m);
197
198 for (i = 0; i < mp_irq_entries; i++) {
199 if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
200 return;
201 }
202
203 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
204 if (++mp_irq_entries == MAX_IRQ_SOURCES)
205 panic("Max # of irq sources exceeded!!\n");
206}
207#else /* CONFIG_X86_IO_APIC */ 134#else /* CONFIG_X86_IO_APIC */
208static inline void __init MP_bus_info(struct mpc_bus *m) {} 135static inline void __init MP_bus_info(struct mpc_bus *m) {}
209static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} 136static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
210static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
211#endif /* CONFIG_X86_IO_APIC */ 137#endif /* CONFIG_X86_IO_APIC */
212 138
213
214static void __init MP_lintsrc_info(struct mpc_lintsrc *m) 139static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
215{ 140{
216 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," 141 apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
@@ -222,7 +147,6 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
222/* 147/*
223 * Read/parse the MPC 148 * Read/parse the MPC
224 */ 149 */
225
226static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) 150static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
227{ 151{
228 152
@@ -275,18 +199,6 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
275 199
276void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } 200void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
277 201
278static void __init smp_register_lapic_address(unsigned long address)
279{
280 mp_lapic_addr = address;
281
282 set_fixmap_nocache(FIX_APIC_BASE, address);
283 if (boot_cpu_physical_apicid == -1U) {
284 boot_cpu_physical_apicid = read_apic_id();
285 apic_version[boot_cpu_physical_apicid] =
286 GET_APIC_VERSION(apic_read(APIC_LVR));
287 }
288}
289
290static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) 202static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
291{ 203{
292 char str[16]; 204 char str[16];
@@ -301,17 +213,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
301#ifdef CONFIG_X86_32 213#ifdef CONFIG_X86_32
302 generic_mps_oem_check(mpc, oem, str); 214 generic_mps_oem_check(mpc, oem, str);
303#endif 215#endif
304 /* save the local APIC address, it might be non-default */ 216 /* Initialize the lapic mapping */
305 if (!acpi_lapic) 217 if (!acpi_lapic)
306 mp_lapic_addr = mpc->lapic; 218 register_lapic_address(mpc->lapic);
307 219
308 if (early) 220 if (early)
309 return 1; 221 return 1;
310 222
311 /* Initialize the lapic mapping */
312 if (!acpi_lapic)
313 smp_register_lapic_address(mpc->lapic);
314
315 if (mpc->oemptr) 223 if (mpc->oemptr)
316 x86_init.mpparse.smp_read_mpc_oem(mpc); 224 x86_init.mpparse.smp_read_mpc_oem(mpc);
317 225
@@ -337,7 +245,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
337 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); 245 skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
338 break; 246 break;
339 case MP_INTSRC: 247 case MP_INTSRC:
340 MP_intsrc_info((struct mpc_intsrc *)mpt); 248 mp_save_irq((struct mpc_intsrc *)mpt);
341 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); 249 skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
342 break; 250 break;
343 case MP_LINTSRC: 251 case MP_LINTSRC:
@@ -429,13 +337,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
429 337
430 intsrc.srcbusirq = i; 338 intsrc.srcbusirq = i;
431 intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ 339 intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
432 MP_intsrc_info(&intsrc); 340 mp_save_irq(&intsrc);
433 } 341 }
434 342
435 intsrc.irqtype = mp_ExtINT; 343 intsrc.irqtype = mp_ExtINT;
436 intsrc.srcbusirq = 0; 344 intsrc.srcbusirq = 0;
437 intsrc.dstirq = 0; /* 8259A to INTIN0 */ 345 intsrc.dstirq = 0; /* 8259A to INTIN0 */
438 MP_intsrc_info(&intsrc); 346 mp_save_irq(&intsrc);
439} 347}
440 348
441 349
@@ -784,11 +692,11 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
784 int i; 692 int i;
785 693
786 apic_printk(APIC_VERBOSE, "OLD "); 694 apic_printk(APIC_VERBOSE, "OLD ");
787 print_MP_intsrc_info(m); 695 print_mp_irq_info(m);
788 696
789 i = get_MP_intsrc_index(m); 697 i = get_MP_intsrc_index(m);
790 if (i > 0) { 698 if (i > 0) {
791 assign_to_mpc_intsrc(&mp_irqs[i], m); 699 memcpy(m, &mp_irqs[i], sizeof(*m));
792 apic_printk(APIC_VERBOSE, "NEW "); 700 apic_printk(APIC_VERBOSE, "NEW ");
793 print_mp_irq_info(&mp_irqs[i]); 701 print_mp_irq_info(&mp_irqs[i]);
794 return; 702 return;
@@ -875,14 +783,14 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
875 if (nr_m_spare > 0) { 783 if (nr_m_spare > 0) {
876 apic_printk(APIC_VERBOSE, "*NEW* found\n"); 784 apic_printk(APIC_VERBOSE, "*NEW* found\n");
877 nr_m_spare--; 785 nr_m_spare--;
878 assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); 786 memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
879 m_spare[nr_m_spare] = NULL; 787 m_spare[nr_m_spare] = NULL;
880 } else { 788 } else {
881 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; 789 struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
882 count += sizeof(struct mpc_intsrc); 790 count += sizeof(struct mpc_intsrc);
883 if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) 791 if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
884 goto out; 792 goto out;
885 assign_to_mpc_intsrc(&mp_irqs[i], m); 793 memcpy(m, &mp_irqs[i], sizeof(*m));
886 mpc->length = count; 794 mpc->length = count;
887 mpt += sizeof(struct mpc_intsrc); 795 mpt += sizeof(struct mpc_intsrc);
888 } 796 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca9f280..c01ffa5b9b87 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
143 143
144 spin_lock_irqsave(&iommu_bitmap_lock, flags); 144 spin_lock_irqsave(&iommu_bitmap_lock, flags);
145 if (need_flush) { 145 if (need_flush) {
146 k8_flush_garts(); 146 amd_flush_garts();
147 need_flush = false; 147 need_flush = false;
148 } 148 }
149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags); 149 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
561{ 561{
562 int i; 562 int i;
563 563
564 if (!k8_northbridges.gart_supported) 564 if (!amd_nb_has_feature(AMD_NB_GART))
565 return; 565 return;
566 566
567 for (i = 0; i < k8_northbridges.num; i++) { 567 for (i = 0; i < amd_nb_num(); i++) {
568 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 568 struct pci_dev *dev = node_to_amd_nb(i)->misc;
569 569
570 enable_gart_translation(dev, __pa(agp_gatt_table)); 570 enable_gart_translation(dev, __pa(agp_gatt_table));
571 } 571 }
572 572
573 /* Flush the GART-TLB to remove stale entries */ 573 /* Flush the GART-TLB to remove stale entries */
574 k8_flush_garts(); 574 amd_flush_garts();
575} 575}
576 576
577/* 577/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
596 if (!fix_up_north_bridges) 596 if (!fix_up_north_bridges)
597 return; 597 return;
598 598
599 if (!k8_northbridges.gart_supported) 599 if (!amd_nb_has_feature(AMD_NB_GART))
600 return; 600 return;
601 601
602 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 602 pr_info("PCI-DMA: Restoring GART aperture settings\n");
603 603
604 for (i = 0; i < k8_northbridges.num; i++) { 604 for (i = 0; i < amd_nb_num(); i++) {
605 struct pci_dev *dev = k8_northbridges.nb_misc[i]; 605 struct pci_dev *dev = node_to_amd_nb(i)->misc;
606 606
607 /* 607 /*
608 * Don't enable translations just yet. That is the next 608 * Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
644 * Private Northbridge GATT initialization in case we cannot use the 644 * Private Northbridge GATT initialization in case we cannot use the
645 * AGP driver for some reason. 645 * AGP driver for some reason.
646 */ 646 */
647static __init int init_k8_gatt(struct agp_kern_info *info) 647static __init int init_amd_gatt(struct agp_kern_info *info)
648{ 648{
649 unsigned aper_size, gatt_size, new_aper_size; 649 unsigned aper_size, gatt_size, new_aper_size;
650 unsigned aper_base, new_aper_base; 650 unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
656 656
657 aper_size = aper_base = info->aper_size = 0; 657 aper_size = aper_base = info->aper_size = 0;
658 dev = NULL; 658 dev = NULL;
659 for (i = 0; i < k8_northbridges.num; i++) { 659 for (i = 0; i < amd_nb_num(); i++) {
660 dev = k8_northbridges.nb_misc[i]; 660 dev = node_to_amd_nb(i)->misc;
661 new_aper_base = read_aperture(dev, &new_aper_size); 661 new_aper_base = read_aperture(dev, &new_aper_size);
662 if (!new_aper_base) 662 if (!new_aper_base)
663 goto nommu; 663 goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
725 if (!no_agp) 725 if (!no_agp)
726 return; 726 return;
727 727
728 if (!k8_northbridges.gart_supported) 728 if (!amd_nb_has_feature(AMD_NB_GART))
729 return; 729 return;
730 730
731 for (i = 0; i < k8_northbridges.num; i++) { 731 for (i = 0; i < amd_nb_num(); i++) {
732 u32 ctl; 732 u32 ctl;
733 733
734 dev = k8_northbridges.nb_misc[i]; 734 dev = node_to_amd_nb(i)->misc;
735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 735 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
736 736
737 ctl &= ~GARTEN; 737 ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
749 unsigned long scratch; 749 unsigned long scratch;
750 long i; 750 long i;
751 751
752 if (!k8_northbridges.gart_supported) 752 if (!amd_nb_has_feature(AMD_NB_GART))
753 return 0; 753 return 0;
754 754
755#ifndef CONFIG_AGP_AMD64 755#ifndef CONFIG_AGP_AMD64
756 no_agp = 1; 756 no_agp = 1;
757#else 757#else
758 /* Makefile puts PCI initialization via subsys_initcall first. */ 758 /* Makefile puts PCI initialization via subsys_initcall first. */
759 /* Add other K8 AGP bridge drivers here */ 759 /* Add other AMD AGP bridge drivers here */
760 no_agp = no_agp || 760 no_agp = no_agp ||
761 (agp_amd64_init() < 0) || 761 (agp_amd64_init() < 0) ||
762 (agp_copy_info(agp_bridge, &info) < 0); 762 (agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
765 if (no_iommu || 765 if (no_iommu ||
766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) || 766 (!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
767 !gart_iommu_aperture || 767 !gart_iommu_aperture ||
768 (no_agp && init_k8_gatt(&info) < 0)) { 768 (no_agp && init_amd_gatt(&info) < 0)) {
769 if (max_pfn > MAX_DMA32_PFN) { 769 if (max_pfn > MAX_DMA32_PFN) {
770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); 770 pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
771 pr_warning("falling back to iommu=soft.\n"); 771 pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b6472153e45b..7c23a0cd3eb9 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -86,8 +86,7 @@ void exit_thread(void)
86void show_regs(struct pt_regs *regs) 86void show_regs(struct pt_regs *regs)
87{ 87{
88 show_registers(regs); 88 show_registers(regs);
89 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 89 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
90 regs->bp);
91} 90}
92 91
93void show_regs_common(void) 92void show_regs_common(void)
@@ -369,6 +368,7 @@ void default_idle(void)
369{ 368{
370 if (hlt_use_halt()) { 369 if (hlt_use_halt()) {
371 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 370 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
371 trace_cpu_idle(1, smp_processor_id());
372 current_thread_info()->status &= ~TS_POLLING; 372 current_thread_info()->status &= ~TS_POLLING;
373 /* 373 /*
374 * TS_POLLING-cleared state must be visible before we 374 * TS_POLLING-cleared state must be visible before we
@@ -439,8 +439,9 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
439void mwait_idle_with_hints(unsigned long ax, unsigned long cx) 439void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
440{ 440{
441 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); 441 trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
442 trace_cpu_idle((ax>>4)+1, smp_processor_id());
442 if (!need_resched()) { 443 if (!need_resched()) {
443 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 444 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
444 clflush((void *)&current_thread_info()->flags); 445 clflush((void *)&current_thread_info()->flags);
445 446
446 __monitor((void *)&current_thread_info()->flags, 0, 0); 447 __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -455,7 +456,8 @@ static void mwait_idle(void)
455{ 456{
456 if (!need_resched()) { 457 if (!need_resched()) {
457 trace_power_start(POWER_CSTATE, 1, smp_processor_id()); 458 trace_power_start(POWER_CSTATE, 1, smp_processor_id());
458 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) 459 trace_cpu_idle(1, smp_processor_id());
460 if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
459 clflush((void *)&current_thread_info()->flags); 461 clflush((void *)&current_thread_info()->flags);
460 462
461 __monitor((void *)&current_thread_info()->flags, 0, 0); 463 __monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -476,10 +478,12 @@ static void mwait_idle(void)
476static void poll_idle(void) 478static void poll_idle(void)
477{ 479{
478 trace_power_start(POWER_CSTATE, 0, smp_processor_id()); 480 trace_power_start(POWER_CSTATE, 0, smp_processor_id());
481 trace_cpu_idle(0, smp_processor_id());
479 local_irq_enable(); 482 local_irq_enable();
480 while (!need_resched()) 483 while (!need_resched())
481 cpu_relax(); 484 cpu_relax();
482 trace_power_end(0); 485 trace_power_end(smp_processor_id());
486 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
483} 487}
484 488
485/* 489/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 96586c3cbbbf..4b9befa0e347 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,8 +113,8 @@ void cpu_idle(void)
113 stop_critical_timings(); 113 stop_critical_timings();
114 pm_idle(); 114 pm_idle();
115 start_critical_timings(); 115 start_critical_timings();
116
117 trace_power_end(smp_processor_id()); 116 trace_power_end(smp_processor_id());
117 trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
118 } 118 }
119 tick_nohz_restart_sched_tick(); 119 tick_nohz_restart_sched_tick();
120 preempt_enable_no_resched(); 120 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3d7a3a04f38..4c818a738396 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,6 +142,8 @@ void cpu_idle(void)
142 start_critical_timings(); 142 start_critical_timings();
143 143
144 trace_power_end(smp_processor_id()); 144 trace_power_end(smp_processor_id());
145 trace_cpu_idle(PWR_EVENT_EXIT,
146 smp_processor_id());
145 147
146 /* In many cases the interrupt that ended idle 148 /* In many cases the interrupt that ended idle
147 has already called exit_idle. But some idle 149 has already called exit_idle. But some idle
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d4815..fc7aae1e2bc7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
18#include <asm/pci_x86.h> 18#include <asm/pci_x86.h>
19#include <asm/virtext.h> 19#include <asm/virtext.h>
20#include <asm/cpu.h> 20#include <asm/cpu.h>
21#include <asm/nmi.h>
21 22
22#ifdef CONFIG_X86_32 23#ifdef CONFIG_X86_32
23# include <linux/ctype.h> 24# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
747{ 748{
748 int cpu; 749 int cpu;
749 750
750 if (val != DIE_NMI_IPI) 751 if (val != DIE_NMI)
751 return NOTIFY_OK; 752 return NOTIFY_OK;
752 753
753 cpu = raw_smp_processor_id(); 754 cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
778 779
779static struct notifier_block crash_nmi_nb = { 780static struct notifier_block crash_nmi_nb = {
780 .notifier_call = crash_nmi_callback, 781 .notifier_call = crash_nmi_callback,
782 /* we want to be the first one called */
783 .priority = NMI_LOCAL_HIGH_PRIOR+1,
781}; 784};
782 785
783/* Halt all other CPUs, calling the specified function on each of them 786/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index fda313ebbb03..c8e41e90f59c 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
43 outb(1, 0x92); 43 outb(1, 0x92);
44} 44}
45 45
46static void ce4100_reset(struct pci_dev *dev)
47{
48 int i;
49
50 for (i = 0; i < 10; i++) {
51 outb(0x2, 0xcf9);
52 udelay(50);
53 }
54}
55
46struct device_fixup { 56struct device_fixup {
47 unsigned int vendor; 57 unsigned int vendor;
48 unsigned int device; 58 unsigned int device;
49 void (*reboot_fixup)(struct pci_dev *); 59 void (*reboot_fixup)(struct pci_dev *);
50}; 60};
51 61
62/*
63 * PCI ids solely used for fixups_table go here
64 */
65#define PCI_DEVICE_ID_INTEL_CE4100 0x0708
66
52static const struct device_fixup fixups_table[] = { 67static const struct device_fixup fixups_table[] = {
53{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset }, 68{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
54{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset }, 69{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
55{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset }, 70{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
56{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset }, 71{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
72{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
57}; 73};
58 74
59/* 75/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a0f52af256a0..d3cfe26c0252 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void)
705void __init setup_arch(char **cmdline_p) 705void __init setup_arch(char **cmdline_p)
706{ 706{
707 int acpi = 0; 707 int acpi = 0;
708 int k8 = 0; 708 int amd = 0;
709 unsigned long flags; 709 unsigned long flags;
710 710
711#ifdef CONFIG_X86_32 711#ifdef CONFIG_X86_32
@@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
991 acpi = acpi_numa_init(); 991 acpi = acpi_numa_init();
992#endif 992#endif
993 993
994#ifdef CONFIG_K8_NUMA 994#ifdef CONFIG_AMD_NUMA
995 if (!acpi) 995 if (!acpi)
996 k8 = !k8_numa_init(0, max_pfn); 996 amd = !amd_numa_init(0, max_pfn);
997#endif 997#endif
998 998
999 initmem_init(0, max_pfn, acpi, k8); 999 initmem_init(0, max_pfn, acpi, amd);
1000 memblock_find_dma_reserve(); 1000 memblock_find_dma_reserve();
1001 dma32_reserve_bootmem(); 1001 dma32_reserve_bootmem();
1002 1002
@@ -1045,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
1045#endif 1045#endif
1046 1046
1047 init_apic_mappings(); 1047 init_apic_mappings();
1048 ioapic_init_mappings(); 1048 ioapic_and_gsi_init();
1049
1050 /* need to wait for io_apic is mapped */
1051 probe_nr_irqs_gsi();
1052 1049
1053 kvm_guest_init(); 1050 kvm_guest_init();
1054 1051
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d1b7df..763df77343dd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
97 */ 97 */
98static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); 98static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
99 99
100void cpu_hotplug_driver_lock() 100void cpu_hotplug_driver_lock(void)
101{ 101{
102 mutex_lock(&x86_cpu_hotplug_driver_mutex); 102 mutex_lock(&x86_cpu_hotplug_driver_mutex);
103} 103}
104 104
105void cpu_hotplug_driver_unlock() 105void cpu_hotplug_driver_unlock(void)
106{ 106{
107 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 107 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
108} 108}
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
281 */ 281 */
282 smp_store_cpu_info(cpuid); 282 smp_store_cpu_info(cpuid);
283 283
284 /*
285 * This must be done before setting cpu_online_mask
286 * or calling notify_cpu_starting.
287 */
288 set_cpu_sibling_map(raw_smp_processor_id());
289 wmb();
290
284 notify_cpu_starting(cpuid); 291 notify_cpu_starting(cpuid);
285 292
286 /* 293 /*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
316 */ 323 */
317 check_tsc_sync_target(); 324 check_tsc_sync_target();
318 325
319 if (nmi_watchdog == NMI_IO_APIC) {
320 legacy_pic->mask(0);
321 enable_NMI_through_LVT0();
322 legacy_pic->unmask(0);
323 }
324
325 /* This must be done before setting cpu_online_mask */
326 set_cpu_sibling_map(raw_smp_processor_id());
327 wmb();
328
329 /* 326 /*
330 * We need to hold call_lock, so there is no inconsistency 327 * We need to hold call_lock, so there is no inconsistency
331 * between the time smp_call_function() determines number of 328 * between the time smp_call_function() determines number of
@@ -430,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
430 427
431 cpumask_set_cpu(cpu, c->llc_shared_map); 428 cpumask_set_cpu(cpu, c->llc_shared_map);
432 429
433 if (current_cpu_data.x86_max_cores == 1) { 430 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
434 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 431 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
435 c->booted_cores = 1; 432 c->booted_cores = 1;
436 return; 433 return;
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
1061 printk(KERN_INFO "SMP mode deactivated.\n"); 1058 printk(KERN_INFO "SMP mode deactivated.\n");
1062 smpboot_clear_io_apic(); 1059 smpboot_clear_io_apic();
1063 1060
1064 localise_nmi_watchdog();
1065
1066 connect_bsp_APIC(); 1061 connect_bsp_APIC();
1067 setup_local_APIC(); 1062 setup_local_APIC();
1068 end_local_APIC_setup(); 1063 end_local_APIC_setup();
@@ -1094,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1094 1089
1095 preempt_disable(); 1090 preempt_disable();
1096 smp_cpu_index_default(); 1091 smp_cpu_index_default();
1097 current_cpu_data = boot_cpu_data; 1092 memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
1098 cpumask_copy(cpu_callin_mask, cpumask_of(0)); 1093 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1099 mb(); 1094 mb();
1100 /* 1095 /*
@@ -1166,6 +1161,20 @@ out:
1166 preempt_enable(); 1161 preempt_enable();
1167} 1162}
1168 1163
1164void arch_disable_nonboot_cpus_begin(void)
1165{
1166 /*
1167 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
1168 * In the suspend path, we will be back in the SMP mode shortly anyways.
1169 */
1170 skip_smp_alternatives = true;
1171}
1172
1173void arch_disable_nonboot_cpus_end(void)
1174{
1175 skip_smp_alternatives = false;
1176}
1177
1169void arch_enable_nonboot_cpus_begin(void) 1178void arch_enable_nonboot_cpus_begin(void)
1170{ 1179{
1171 set_mtrr_aps_delayed_init(); 1180 set_mtrr_aps_delayed_init();
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1196#ifdef CONFIG_X86_IO_APIC 1205#ifdef CONFIG_X86_IO_APIC
1197 setup_ioapic_dest(); 1206 setup_ioapic_dest();
1198#endif 1207#endif
1199 check_nmi_watchdog();
1200 mtrr_aps_init(); 1208 mtrr_aps_init();
1201} 1209}
1202 1210
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void)
1341 if (cpu == 0) 1349 if (cpu == 0)
1342 return -EBUSY; 1350 return -EBUSY;
1343 1351
1344 if (nmi_watchdog == NMI_LOCAL_APIC)
1345 stop_apic_nmi_watchdog(NULL);
1346 clear_local_APIC(); 1352 clear_local_APIC();
1347 1353
1348 cpu_disable_common(); 1354 cpu_disable_common();
@@ -1377,7 +1383,7 @@ void play_dead_common(void)
1377 1383
1378 mb(); 1384 mb();
1379 /* Ack it */ 1385 /* Ack it */
1380 __get_cpu_var(cpu_state) = CPU_DEAD; 1386 __this_cpu_write(cpu_state, CPU_DEAD);
1381 1387
1382 /* 1388 /*
1383 * With physical CPU hotplug, we should halt the cpu 1389 * With physical CPU hotplug, we should halt the cpu
@@ -1397,11 +1403,11 @@ static inline void mwait_play_dead(void)
1397 int i; 1403 int i;
1398 void *mwait_ptr; 1404 void *mwait_ptr;
1399 1405
1400 if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT)) 1406 if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
1401 return; 1407 return;
1402 if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH)) 1408 if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
1403 return; 1409 return;
1404 if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1410 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1405 return; 1411 return;
1406 1412
1407 eax = CPUID_MWAIT_LEAF; 1413 eax = CPUID_MWAIT_LEAF;
@@ -1452,7 +1458,7 @@ static inline void mwait_play_dead(void)
1452 1458
1453static inline void hlt_play_dead(void) 1459static inline void hlt_play_dead(void)
1454{ 1460{
1455 if (current_cpu_data.x86 >= 4) 1461 if (__this_cpu_read(cpu_info.x86) >= 4)
1456 wbinvd(); 1462 wbinvd();
1457 1463
1458 while (1) { 1464 while (1) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525368a7..938c8e10a19a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
73 */ 73 */
74void save_stack_trace(struct stack_trace *trace) 74void save_stack_trace(struct stack_trace *trace)
75{ 75{
76 dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); 76 dump_trace(current, NULL, NULL, &save_stack_ops, trace);
77 if (trace->nr_entries < trace->max_entries) 77 if (trace->nr_entries < trace->max_entries)
78 trace->entries[trace->nr_entries++] = ULONG_MAX; 78 trace->entries[trace->nr_entries++] = ULONG_MAX;
79} 79}
80EXPORT_SYMBOL_GPL(save_stack_trace); 80EXPORT_SYMBOL_GPL(save_stack_trace);
81 81
82void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) 82void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
83{ 83{
84 dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); 84 dump_trace(current, regs, NULL, &save_stack_ops, trace);
85 if (trace->nr_entries < trace->max_entries) 85 if (trace->nr_entries < trace->max_entries)
86 trace->entries[trace->nr_entries++] = ULONG_MAX; 86 trace->entries[trace->nr_entries++] = ULONG_MAX;
87} 87}
88 88
89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 89void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
90{ 90{
91 dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); 91 dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
92 if (trace->nr_entries < trace->max_entries) 92 if (trace->nr_entries < trace->max_entries)
93 trace->entries[trace->nr_entries++] = ULONG_MAX; 93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94} 94}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e14cfa..25a28a245937 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
22#include <asm/hpet.h> 22#include <asm/hpet.h>
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
26int timer_ack;
27#endif
28
29#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
30volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
31#endif 27#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
63 /* Keep nmi watchdog up to date */ 59 /* Keep nmi watchdog up to date */
64 inc_irq_stat(irq0_irqs); 60 inc_irq_stat(irq0_irqs);
65 61
66 /* Optimized out for !IO_APIC and x86_64 */
67 if (timer_ack) {
68 /*
69 * Subtle, when I/O APICs are used we have to ack timer IRQ
70 * manually to deassert NMI lines for the watchdog if run
71 * on an 82489DX-based system.
72 */
73 raw_spin_lock(&i8259A_lock);
74 outb(0x0c, PIC_MASTER_OCW3);
75 /* Ack the IRQ; AEOI will end it automatically. */
76 inb(PIC_MASTER_POLL);
77 raw_spin_unlock(&i8259A_lock);
78 }
79
80 global_clock_event->event_handler(global_clock_event); 62 global_clock_event->event_handler(global_clock_event);
81 63
82 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ 64 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 3af2dff58b21..075d130efcf9 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -127,7 +127,7 @@ startup_64:
127no_longmode: 127no_longmode:
128 hlt 128 hlt
129 jmp no_longmode 129 jmp no_longmode
130#include "verify_cpu_64.S" 130#include "verify_cpu.S"
131 131
132 # Careful these need to be in the same 64K segment as the above; 132 # Careful these need to be in the same 64K segment as the above;
133tidt: 133tidt:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca42c96..b9b67166f9de 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,13 @@ EXPORT_SYMBOL_GPL(used_vectors);
83 83
84static int ignore_nmis; 84static int ignore_nmis;
85 85
86int unknown_nmi_panic;
87/*
88 * Prevent NMI reason port (0x61) being accessed simultaneously, can
89 * only be used in NMI handler.
90 */
91static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
92
86static inline void conditional_sti(struct pt_regs *regs) 93static inline void conditional_sti(struct pt_regs *regs)
87{ 94{
88 if (regs->flags & X86_EFLAGS_IF) 95 if (regs->flags & X86_EFLAGS_IF)
@@ -300,16 +307,23 @@ gp_in_kernel:
300 die("general protection fault", regs, error_code); 307 die("general protection fault", regs, error_code);
301} 308}
302 309
303static notrace __kprobes void 310static int __init setup_unknown_nmi_panic(char *str)
304mem_parity_error(unsigned char reason, struct pt_regs *regs)
305{ 311{
306 printk(KERN_EMERG 312 unknown_nmi_panic = 1;
307 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 313 return 1;
308 reason, smp_processor_id()); 314}
315__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
309 316
310 printk(KERN_EMERG 317static notrace __kprobes void
311 "You have some hardware problem, likely on the PCI bus.\n"); 318pci_serr_error(unsigned char reason, struct pt_regs *regs)
319{
320 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
321 reason, smp_processor_id());
312 322
323 /*
324 * On some machines, PCI SERR line is used to report memory
325 * errors. EDAC makes use of it.
326 */
313#if defined(CONFIG_EDAC) 327#if defined(CONFIG_EDAC)
314 if (edac_handler_set()) { 328 if (edac_handler_set()) {
315 edac_atomic_assert_error(); 329 edac_atomic_assert_error();
@@ -320,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
320 if (panic_on_unrecovered_nmi) 334 if (panic_on_unrecovered_nmi)
321 panic("NMI: Not continuing"); 335 panic("NMI: Not continuing");
322 336
323 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 337 pr_emerg("Dazed and confused, but trying to continue\n");
324 338
325 /* Clear and disable the memory parity error line. */ 339 /* Clear and disable the PCI SERR error line. */
326 reason = (reason & 0xf) | 4; 340 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
327 outb(reason, 0x61); 341 outb(reason, NMI_REASON_PORT);
328} 342}
329 343
330static notrace __kprobes void 344static notrace __kprobes void
@@ -332,22 +346,26 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
332{ 346{
333 unsigned long i; 347 unsigned long i;
334 348
335 printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 349 pr_emerg(
350 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
351 reason, smp_processor_id());
336 show_registers(regs); 352 show_registers(regs);
337 353
338 if (panic_on_io_nmi) 354 if (panic_on_io_nmi)
339 panic("NMI IOCK error: Not continuing"); 355 panic("NMI IOCK error: Not continuing");
340 356
341 /* Re-enable the IOCK line, wait for a few seconds */ 357 /* Re-enable the IOCK line, wait for a few seconds */
342 reason = (reason & 0xf) | 8; 358 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
343 outb(reason, 0x61); 359 outb(reason, NMI_REASON_PORT);
344 360
345 i = 2000; 361 i = 20000;
346 while (--i) 362 while (--i) {
347 udelay(1000); 363 touch_nmi_watchdog();
364 udelay(100);
365 }
348 366
349 reason &= ~8; 367 reason &= ~NMI_REASON_CLEAR_IOCHK;
350 outb(reason, 0x61); 368 outb(reason, NMI_REASON_PORT);
351} 369}
352 370
353static notrace __kprobes void 371static notrace __kprobes void
@@ -366,69 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
366 return; 384 return;
367 } 385 }
368#endif 386#endif
369 printk(KERN_EMERG 387 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
370 "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 388 reason, smp_processor_id());
371 reason, smp_processor_id());
372 389
373 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 390 pr_emerg("Do you have a strange power saving mode enabled?\n");
374 if (panic_on_unrecovered_nmi) 391 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
375 panic("NMI: Not continuing"); 392 panic("NMI: Not continuing");
376 393
377 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 394 pr_emerg("Dazed and confused, but trying to continue\n");
378} 395}
379 396
380static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 397static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
381{ 398{
382 unsigned char reason = 0; 399 unsigned char reason = 0;
383 int cpu;
384 400
385 cpu = smp_processor_id(); 401 /*
386 402 * CPU-specific NMI must be processed before non-CPU-specific
387 /* Only the BSP gets external NMIs from the system. */ 403 * NMI, otherwise we may lose it, because the CPU-specific
388 if (!cpu) 404 * NMI can not be detected/processed on other CPUs.
389 reason = get_nmi_reason(); 405 */
390 406 if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
391 if (!(reason & 0xc0)) { 407 return;
392 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
393 == NOTIFY_STOP)
394 return;
395 408
396#ifdef CONFIG_X86_LOCAL_APIC 409 /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
397 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 410 raw_spin_lock(&nmi_reason_lock);
398 == NOTIFY_STOP) 411 reason = get_nmi_reason();
399 return;
400 412
401#ifndef CONFIG_LOCKUP_DETECTOR 413 if (reason & NMI_REASON_MASK) {
414 if (reason & NMI_REASON_SERR)
415 pci_serr_error(reason, regs);
416 else if (reason & NMI_REASON_IOCHK)
417 io_check_error(reason, regs);
418#ifdef CONFIG_X86_32
402 /* 419 /*
403 * Ok, so this is none of the documented NMI sources, 420 * Reassert NMI in case it became active
404 * so it must be the NMI watchdog. 421 * meanwhile as it's edge-triggered:
405 */ 422 */
406 if (nmi_watchdog_tick(regs, reason)) 423 reassert_nmi();
407 return;
408 if (!do_nmi_callback(regs, cpu))
409#endif /* !CONFIG_LOCKUP_DETECTOR */
410 unknown_nmi_error(reason, regs);
411#else
412 unknown_nmi_error(reason, regs);
413#endif 424#endif
414 425 raw_spin_unlock(&nmi_reason_lock);
415 return; 426 return;
416 } 427 }
417 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 428 raw_spin_unlock(&nmi_reason_lock);
418 return;
419 429
420 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 430 unknown_nmi_error(reason, regs);
421 if (reason & 0x80)
422 mem_parity_error(reason, regs);
423 if (reason & 0x40)
424 io_check_error(reason, regs);
425#ifdef CONFIG_X86_32
426 /*
427 * Reassert NMI in case it became active meanwhile
428 * as it's edge-triggered:
429 */
430 reassert_nmi();
431#endif
432} 431}
433 432
434dotraplinkage notrace __kprobes void 433dotraplinkage notrace __kprobes void
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
446 445
447void stop_nmi(void) 446void stop_nmi(void)
448{ 447{
449 acpi_nmi_disable();
450 ignore_nmis++; 448 ignore_nmis++;
451} 449}
452 450
453void restart_nmi(void) 451void restart_nmi(void)
454{ 452{
455 ignore_nmis--; 453 ignore_nmis--;
456 acpi_nmi_enable();
457} 454}
458 455
459/* May run on IST stack. */ 456/* May run on IST stack. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0c40d8b72416..823f79a17ad1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -659,7 +659,7 @@ void restore_sched_clock_state(void)
659 659
660 local_irq_save(flags); 660 local_irq_save(flags);
661 661
662 __get_cpu_var(cyc2ns_offset) = 0; 662 __this_cpu_write(cyc2ns_offset, 0);
663 offset = cyc2ns_suspend - sched_clock(); 663 offset = cyc2ns_suspend - sched_clock();
664 664
665 for_each_possible_cpu(cpu) 665 for_each_possible_cpu(cpu)
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
872 872
873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 873 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
874 return 0; 874 return 0;
875
876 if (tsc_clocksource_reliable)
877 return 0;
875 /* 878 /*
876 * Intel systems are normally all synchronized. 879 * Intel systems are normally all synchronized.
877 * Exceptions must mark TSC as unstable: 880 * Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
879 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { 882 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
880 /* assume multi socket systems are not synchronized: */ 883 /* assume multi socket systems are not synchronized: */
881 if (num_possible_cpus() > 1) 884 if (num_possible_cpus() > 1)
882 tsc_unstable = 1; 885 return 1;
883 } 886 }
884 887
885 return tsc_unstable; 888 return 0;
889}
890
891
892static void tsc_refine_calibration_work(struct work_struct *work);
893static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
894/**
895 * tsc_refine_calibration_work - Further refine tsc freq calibration
896 * @work - ignored.
897 *
898 * This functions uses delayed work over a period of a
899 * second to further refine the TSC freq value. Since this is
900 * timer based, instead of loop based, we don't block the boot
901 * process while this longer calibration is done.
902 *
903 * If there are any calibration anomolies (too many SMIs, etc),
904 * or the refined calibration is off by 1% of the fast early
905 * calibration, we throw out the new calibration and use the
906 * early calibration.
907 */
908static void tsc_refine_calibration_work(struct work_struct *work)
909{
910 static u64 tsc_start = -1, ref_start;
911 static int hpet;
912 u64 tsc_stop, ref_stop, delta;
913 unsigned long freq;
914
915 /* Don't bother refining TSC on unstable systems */
916 if (check_tsc_unstable())
917 goto out;
918
919 /*
920 * Since the work is started early in boot, we may be
921 * delayed the first time we expire. So set the workqueue
922 * again once we know timers are working.
923 */
924 if (tsc_start == -1) {
925 /*
926 * Only set hpet once, to avoid mixing hardware
927 * if the hpet becomes enabled later.
928 */
929 hpet = is_hpet_enabled();
930 schedule_delayed_work(&tsc_irqwork, HZ);
931 tsc_start = tsc_read_refs(&ref_start, hpet);
932 return;
933 }
934
935 tsc_stop = tsc_read_refs(&ref_stop, hpet);
936
937 /* hpet or pmtimer available ? */
938 if (!hpet && !ref_start && !ref_stop)
939 goto out;
940
941 /* Check, whether the sampling was disturbed by an SMI */
942 if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
943 goto out;
944
945 delta = tsc_stop - tsc_start;
946 delta *= 1000000LL;
947 if (hpet)
948 freq = calc_hpet_ref(delta, ref_start, ref_stop);
949 else
950 freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
951
952 /* Make sure we're within 1% */
953 if (abs(tsc_khz - freq) > tsc_khz/100)
954 goto out;
955
956 tsc_khz = freq;
957 printk(KERN_INFO "Refined TSC clocksource calibration: "
958 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
959 (unsigned long)tsc_khz % 1000);
960
961out:
962 clocksource_register_khz(&clocksource_tsc, tsc_khz);
886} 963}
887 964
888static void __init init_tsc_clocksource(void) 965
966static int __init init_tsc_clocksource(void)
889{ 967{
968 if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
969 return 0;
970
890 if (tsc_clocksource_reliable) 971 if (tsc_clocksource_reliable)
891 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 972 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
892 /* lower the rating if we already know its unstable: */ 973 /* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
894 clocksource_tsc.rating = 0; 975 clocksource_tsc.rating = 0;
895 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; 976 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
896 } 977 }
897 clocksource_register_khz(&clocksource_tsc, tsc_khz); 978 schedule_delayed_work(&tsc_irqwork, 0);
979 return 0;
898} 980}
981/*
982 * We use device_initcall here, to ensure we run after the hpet
983 * is fully initialized, which may occur at fs_initcall time.
984 */
985device_initcall(init_tsc_clocksource);
899 986
900void __init tsc_init(void) 987void __init tsc_init(void)
901{ 988{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
949 mark_tsc_unstable("TSCs unsynchronized"); 1036 mark_tsc_unstable("TSCs unsynchronized");
950 1037
951 check_system_tsc_reliable(); 1038 check_system_tsc_reliable();
952 init_tsc_clocksource();
953} 1039}
954 1040
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S
index 56a8c2a867d9..0edefc19a113 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -7,6 +7,7 @@
7 * Copyright (c) 2007 Andi Kleen (ak@suse.de) 7 * Copyright (c) 2007 Andi Kleen (ak@suse.de)
8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) 8 * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) 9 * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
10 * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
10 * 11 *
11 * This source code is licensed under the GNU General Public License, 12 * This source code is licensed under the GNU General Public License,
12 * Version 2. See the file COPYING for more details. 13 * Version 2. See the file COPYING for more details.
@@ -14,18 +15,17 @@
14 * This is a common code for verification whether CPU supports 15 * This is a common code for verification whether CPU supports
15 * long mode and SSE or not. It is not called directly instead this 16 * long mode and SSE or not. It is not called directly instead this
16 * file is included at various places and compiled in that context. 17 * file is included at various places and compiled in that context.
17 * Following are the current usage. 18 * This file is expected to run in 32bit code. Currently:
18 * 19 *
19 * This file is included by both 16bit and 32bit code. 20 * arch/x86/boot/compressed/head_64.S: Boot cpu verification
21 * arch/x86/kernel/trampoline_64.S: secondary processor verfication
22 * arch/x86/kernel/head_32.S: processor startup
20 * 23 *
21 * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) 24 * verify_cpu, returns the status of longmode and SSE in register %eax.
22 * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
23 * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
24 * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
25 *
26 * verify_cpu, returns the status of cpu check in register %eax.
27 * 0: Success 1: Failure 25 * 0: Success 1: Failure
28 * 26 *
27 * On Intel, the XD_DISABLE flag will be cleared as a side-effect.
28 *
29 * The caller needs to check for the error code and take the action 29 * The caller needs to check for the error code and take the action
30 * appropriately. Either display a message or halt. 30 * appropriately. Either display a message or halt.
31 */ 31 */
@@ -62,8 +62,41 @@ verify_cpu:
62 cmpl $0x444d4163,%ecx 62 cmpl $0x444d4163,%ecx
63 jnz verify_cpu_noamd 63 jnz verify_cpu_noamd
64 mov $1,%di # cpu is from AMD 64 mov $1,%di # cpu is from AMD
65 jmp verify_cpu_check
65 66
66verify_cpu_noamd: 67verify_cpu_noamd:
68 cmpl $0x756e6547,%ebx # GenuineIntel?
69 jnz verify_cpu_check
70 cmpl $0x49656e69,%edx
71 jnz verify_cpu_check
72 cmpl $0x6c65746e,%ecx
73 jnz verify_cpu_check
74
75 # only call IA32_MISC_ENABLE when:
76 # family > 6 || (family == 6 && model >= 0xd)
77 movl $0x1, %eax # check CPU family and model
78 cpuid
79 movl %eax, %ecx
80
81 andl $0x0ff00f00, %eax # mask family and extended family
82 shrl $8, %eax
83 cmpl $6, %eax
84 ja verify_cpu_clear_xd # family > 6, ok
85 jb verify_cpu_check # family < 6, skip
86
87 andl $0x000f00f0, %ecx # mask model and extended model
88 shrl $4, %ecx
89 cmpl $0xd, %ecx
90 jb verify_cpu_check # family == 6, model < 0xd, skip
91
92verify_cpu_clear_xd:
93 movl $MSR_IA32_MISC_ENABLE, %ecx
94 rdmsr
95 btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
96 jnc verify_cpu_check # only write MSR if bit was changed
97 wrmsr
98
99verify_cpu_check:
67 movl $0x1,%eax # Does the cpu have what it takes 100 movl $0x1,%eax # Does the cpu have what it takes
68 cpuid 101 cpuid
69 andl $REQUIRED_MASK0,%edx 102 andl $REQUIRED_MASK0,%edx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e03530aebfd0..bf4700755184 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
69 69
70PHDRS { 70PHDRS {
71 text PT_LOAD FLAGS(5); /* R_E */ 71 text PT_LOAD FLAGS(5); /* R_E */
72 data PT_LOAD FLAGS(7); /* RWE */ 72 data PT_LOAD FLAGS(6); /* RW_ */
73#ifdef CONFIG_X86_64 73#ifdef CONFIG_X86_64
74 user PT_LOAD FLAGS(5); /* R_E */ 74 user PT_LOAD FLAGS(5); /* R_E */
75#ifdef CONFIG_SMP 75#ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
116 116
117 EXCEPTION_TABLE(16) :text = 0x9090 117 EXCEPTION_TABLE(16) :text = 0x9090
118 118
119#if defined(CONFIG_DEBUG_RODATA)
120 /* .text should occupy whole number of pages */
121 . = ALIGN(PAGE_SIZE);
122#endif
119 X64_ALIGN_DEBUG_RODATA_BEGIN 123 X64_ALIGN_DEBUG_RODATA_BEGIN
120 RO_DATA(PAGE_SIZE) 124 RO_DATA(PAGE_SIZE)
121 X64_ALIGN_DEBUG_RODATA_END 125 X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
335 __bss_start = .; 339 __bss_start = .;
336 *(.bss..page_aligned) 340 *(.bss..page_aligned)
337 *(.bss) 341 *(.bss)
338 . = ALIGN(4); 342 . = ALIGN(PAGE_SIZE);
339 __bss_stop = .; 343 __bss_stop = .;
340 } 344 }
341 345
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b989e1f1e5d3..46a368cb651e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -976,7 +976,7 @@ static inline u64 nsec_to_cycles(u64 nsec)
976 if (kvm_tsc_changes_freq()) 976 if (kvm_tsc_changes_freq())
977 printk_once(KERN_WARNING 977 printk_once(KERN_WARNING
978 "kvm: unreliable cycle conversion on adjustable rate TSC\n"); 978 "kvm: unreliable cycle conversion on adjustable rate TSC\n");
979 ret = nsec * __get_cpu_var(cpu_tsc_khz); 979 ret = nsec * __this_cpu_read(cpu_tsc_khz);
980 do_div(ret, USEC_PER_SEC); 980 do_div(ret, USEC_PER_SEC);
981 return ret; 981 return ret;
982} 982}
@@ -1061,7 +1061,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1061 local_irq_save(flags); 1061 local_irq_save(flags);
1062 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); 1062 kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
1063 kernel_ns = get_kernel_ns(); 1063 kernel_ns = get_kernel_ns();
1064 this_tsc_khz = __get_cpu_var(cpu_tsc_khz); 1064 this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1065 1065
1066 if (unlikely(this_tsc_khz == 0)) { 1066 if (unlikely(this_tsc_khz == 0)) {
1067 local_irq_restore(flags); 1067 local_irq_restore(flags);
@@ -4427,7 +4427,7 @@ EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4427 4427
4428static void tsc_bad(void *info) 4428static void tsc_bad(void *info)
4429{ 4429{
4430 __get_cpu_var(cpu_tsc_khz) = 0; 4430 __this_cpu_write(cpu_tsc_khz, 0);
4431} 4431}
4432 4432
4433static void tsc_khz_changed(void *data) 4433static void tsc_khz_changed(void *data)
@@ -4441,7 +4441,7 @@ static void tsc_khz_changed(void *data)
4441 khz = cpufreq_quick_get(raw_smp_processor_id()); 4441 khz = cpufreq_quick_get(raw_smp_processor_id());
4442 if (!khz) 4442 if (!khz)
4443 khz = tsc_khz; 4443 khz = tsc_khz;
4444 __get_cpu_var(cpu_tsc_khz) = khz; 4444 __this_cpu_write(cpu_tsc_khz, khz);
4445} 4445}
4446 4446
4447static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, 4447static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index e7d5382ef263..4f420c2f2d55 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -4,7 +4,6 @@
4#include <asm/asm-offsets.h> 4#include <asm/asm-offsets.h>
5#include <asm/thread_info.h> 5#include <asm/thread_info.h>
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/pgtable.h>
8 7
9/*G:020 8/*G:020
10 * Our story starts with the kernel booting into startup_32 in 9 * Our story starts with the kernel booting into startup_32 in
@@ -38,113 +37,9 @@ ENTRY(lguest_entry)
38 /* Set up the initial stack so we can run C code. */ 37 /* Set up the initial stack so we can run C code. */
39 movl $(init_thread_union+THREAD_SIZE),%esp 38 movl $(init_thread_union+THREAD_SIZE),%esp
40 39
41 call init_pagetables
42
43 /* Jumps are relative: we're running __PAGE_OFFSET too low. */ 40 /* Jumps are relative: we're running __PAGE_OFFSET too low. */
44 jmp lguest_init+__PAGE_OFFSET 41 jmp lguest_init+__PAGE_OFFSET
45 42
46/*
47 * Initialize page tables. This creates a PDE and a set of page
48 * tables, which are located immediately beyond __brk_base. The variable
49 * _brk_end is set up to point to the first "safe" location.
50 * Mappings are created both at virtual address 0 (identity mapping)
51 * and PAGE_OFFSET for up to _end.
52 *
53 * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they
54 * don't have a stack at this point, so we can't just use call and ret.
55 */
56init_pagetables:
57#if PTRS_PER_PMD > 1
58#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
59#else
60#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
61#endif
62#define pa(X) ((X) - __PAGE_OFFSET)
63
64/* Enough space to fit pagetables for the low memory linear map */
65MAPPING_BEYOND_END = \
66 PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
67#ifdef CONFIG_X86_PAE
68
69 /*
70 * In PAE mode initial_page_table is statically defined to contain
71 * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3
72 * entries). The identity mapping is handled by pointing two PGD entries
73 * to the first kernel PMD.
74 *
75 * Note the upper half of each PMD or PTE are always zero at this stage.
76 */
77
78#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
79
80 xorl %ebx,%ebx /* %ebx is kept at zero */
81
82 movl $pa(__brk_base), %edi
83 movl $pa(initial_pg_pmd), %edx
84 movl $PTE_IDENT_ATTR, %eax
8510:
86 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */
87 movl %ecx,(%edx) /* Store PMD entry */
88 /* Upper half already zero */
89 addl $8,%edx
90 movl $512,%ecx
9111:
92 stosl
93 xchgl %eax,%ebx
94 stosl
95 xchgl %eax,%ebx
96 addl $0x1000,%eax
97 loop 11b
98
99 /*
100 * End condition: we must map up to the end + MAPPING_BEYOND_END.
101 */
102 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
103 cmpl %ebp,%eax
104 jb 10b
1051:
106 addl $__PAGE_OFFSET, %edi
107 movl %edi, pa(_brk_end)
108 shrl $12, %eax
109 movl %eax, pa(max_pfn_mapped)
110
111 /* Do early initialization of the fixmap area */
112 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
113 movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8)
114#else /* Not PAE */
115
116page_pde_offset = (__PAGE_OFFSET >> 20);
117
118 movl $pa(__brk_base), %edi
119 movl $pa(initial_page_table), %edx
120 movl $PTE_IDENT_ATTR, %eax
12110:
122 leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */
123 movl %ecx,(%edx) /* Store identity PDE entry */
124 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
125 addl $4,%edx
126 movl $1024, %ecx
12711:
128 stosl
129 addl $0x1000,%eax
130 loop 11b
131 /*
132 * End condition: we must map up to the end + MAPPING_BEYOND_END.
133 */
134 movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
135 cmpl %ebp,%eax
136 jb 10b
137 addl $__PAGE_OFFSET, %edi
138 movl %edi, pa(_brk_end)
139 shrl $12, %eax
140 movl %eax, pa(max_pfn_mapped)
141
142 /* Do early initialization of the fixmap area */
143 movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
144 movl %eax,pa(initial_page_table+0xffc)
145#endif
146 ret
147
148/*G:055 43/*G:055
149 * We create a macro which puts the assembler code between lgstart_ and lgend_ 44 * We create a macro which puts the assembler code between lgstart_ and lgend_
150 * markers. These templates are put in the .text section: they can't be 45 * markers. These templates are put in the .text section: they can't be
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index ff485d361182..fc45ba887d05 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -121,7 +121,7 @@ inline void __const_udelay(unsigned long xloops)
121 asm("mull %%edx" 121 asm("mull %%edx"
122 :"=d" (xloops), "=&a" (d0) 122 :"=d" (xloops), "=&a" (d0)
123 :"1" (xloops), "0" 123 :"1" (xloops), "0"
124 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); 124 (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4)));
125 125
126 __delay(++xloops); 126 __delay(++xloops);
127} 127}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 55543397a8a7..09df2f9a3d69 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -23,7 +23,7 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o 23obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
24 24
25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o 25obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
26obj-$(CONFIG_K8_NUMA) += k8topology_64.o 26obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o 27obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
28 28
29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o 29obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/amdtopology_64.c
index 804a3b6c6e14..f21962c435ed 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * AMD K8 NUMA support. 2 * AMD NUMA support.
3 * Discover the memory map and associated nodes. 3 * Discover the memory map and associated nodes.
4 * 4 *
5 * This version reads it directly from the K8 northbridge. 5 * This version reads it directly from the AMD northbridge.
6 * 6 *
7 * Copyright 2002,2003 Andi Kleen, SuSE Labs. 7 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
8 */ 8 */
@@ -27,6 +27,7 @@
27#include <asm/amd_nb.h> 27#include <asm/amd_nb.h>
28 28
29static struct bootnode __initdata nodes[8]; 29static struct bootnode __initdata nodes[8];
30static unsigned char __initdata nodeids[8];
30static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
31 32
32static __init int find_northbridge(void) 33static __init int find_northbridge(void)
@@ -57,7 +58,7 @@ static __init void early_get_boot_cpu_id(void)
57{ 58{
58 /* 59 /*
59 * need to get the APIC ID of the BSP so can use that to 60 * need to get the APIC ID of the BSP so can use that to
60 * create apicid_to_node in k8_scan_nodes() 61 * create apicid_to_node in amd_scan_nodes()
61 */ 62 */
62#ifdef CONFIG_X86_MPPARSE 63#ifdef CONFIG_X86_MPPARSE
63 /* 64 /*
@@ -66,23 +67,9 @@ static __init void early_get_boot_cpu_id(void)
66 if (smp_found_config) 67 if (smp_found_config)
67 early_get_smp_config(); 68 early_get_smp_config();
68#endif 69#endif
69 early_init_lapic_mapping();
70} 70}
71 71
72int __init k8_get_nodes(struct bootnode *physnodes) 72int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
73{
74 int i;
75 int ret = 0;
76
77 for_each_node_mask(i, nodes_parsed) {
78 physnodes[ret].start = nodes[i].start;
79 physnodes[ret].end = nodes[i].end;
80 ret++;
81 }
82 return ret;
83}
84
85int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
86{ 73{
87 unsigned long start = PFN_PHYS(start_pfn); 74 unsigned long start = PFN_PHYS(start_pfn);
88 unsigned long end = PFN_PHYS(end_pfn); 75 unsigned long end = PFN_PHYS(end_pfn);
@@ -114,7 +101,7 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
114 base = read_pci_config(0, nb, 1, 0x40 + i*8); 101 base = read_pci_config(0, nb, 1, 0x40 + i*8);
115 limit = read_pci_config(0, nb, 1, 0x44 + i*8); 102 limit = read_pci_config(0, nb, 1, 0x44 + i*8);
116 103
117 nodeid = limit & 7; 104 nodeids[i] = nodeid = limit & 7;
118 if ((base & 3) == 0) { 105 if ((base & 3) == 0) {
119 if (i < numnodes) 106 if (i < numnodes)
120 pr_info("Skipping disabled node %d\n", i); 107 pr_info("Skipping disabled node %d\n", i);
@@ -194,7 +181,77 @@ int __init k8_numa_init(unsigned long start_pfn, unsigned long end_pfn)
194 return 0; 181 return 0;
195} 182}
196 183
197int __init k8_scan_nodes(void) 184#ifdef CONFIG_NUMA_EMU
185static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
186 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
187};
188
189void __init amd_get_nodes(struct bootnode *physnodes)
190{
191 int i;
192
193 for_each_node_mask(i, nodes_parsed) {
194 physnodes[i].start = nodes[i].start;
195 physnodes[i].end = nodes[i].end;
196 }
197}
198
199static int __init find_node_by_addr(unsigned long addr)
200{
201 int ret = NUMA_NO_NODE;
202 int i;
203
204 for (i = 0; i < 8; i++)
205 if (addr >= nodes[i].start && addr < nodes[i].end) {
206 ret = i;
207 break;
208 }
209 return ret;
210}
211
212/*
213 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
214 * setup to represent the physical topology but reflect the emulated
215 * environment. For each emulated node, the real node which it appears on is
216 * found and a fake pxm to nid mapping is created which mirrors the actual
217 * locality. node_distance() then represents the correct distances between
218 * emulated nodes by using the fake acpi mappings to pxms.
219 */
220void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
221{
222 unsigned int bits;
223 unsigned int cores;
224 unsigned int apicid_base = 0;
225 int i;
226
227 bits = boot_cpu_data.x86_coreid_bits;
228 cores = 1 << bits;
229 early_get_boot_cpu_id();
230 if (boot_cpu_physical_apicid > 0)
231 apicid_base = boot_cpu_physical_apicid;
232
233 for (i = 0; i < nr_nodes; i++) {
234 int index;
235 int nid;
236 int j;
237
238 nid = find_node_by_addr(nodes[i].start);
239 if (nid == NUMA_NO_NODE)
240 continue;
241
242 index = nodeids[nid] << bits;
243 if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
244 for (j = apicid_base; j < cores + apicid_base; j++)
245 fake_apicid_to_node[index + j] = i;
246#ifdef CONFIG_ACPI_NUMA
247 __acpi_map_pxm_to_node(nid, i);
248#endif
249 }
250 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
251}
252#endif /* CONFIG_NUMA_EMU */
253
254int __init amd_scan_nodes(void)
198{ 255{
199 unsigned int bits; 256 unsigned int bits;
200 unsigned int cores; 257 unsigned int cores;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index c0e28a13de7d..947f42abe820 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -364,8 +364,9 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
364 /* 364 /*
365 * We just marked the kernel text read only above, now that 365 * We just marked the kernel text read only above, now that
366 * we are going to free part of that, we need to make that 366 * we are going to free part of that, we need to make that
367 * writeable first. 367 * writeable and non-executable first.
368 */ 368 */
369 set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
369 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); 370 set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
370 371
371 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); 372 printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e969f9f401b..f89b5bb4e93f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -226,7 +226,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
226 226
227static inline int is_kernel_text(unsigned long addr) 227static inline int is_kernel_text(unsigned long addr)
228{ 228{
229 if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) 229 if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
230 return 1; 230 return 1;
231 return 0; 231 return 0;
232} 232}
@@ -912,6 +912,23 @@ void set_kernel_text_ro(void)
912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 912 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
913} 913}
914 914
915static void mark_nxdata_nx(void)
916{
917 /*
918 * When this called, init has already been executed and released,
919 * so everything past _etext sould be NX.
920 */
921 unsigned long start = PFN_ALIGN(_etext);
922 /*
923 * This comes from is_kernel_text upper limit. Also HPAGE where used:
924 */
925 unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
926
927 if (__supported_pte_mask & _PAGE_NX)
928 printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
929 set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
930}
931
915void mark_rodata_ro(void) 932void mark_rodata_ro(void)
916{ 933{
917 unsigned long start = PFN_ALIGN(_text); 934 unsigned long start = PFN_ALIGN(_text);
@@ -946,6 +963,7 @@ void mark_rodata_ro(void)
946 printk(KERN_INFO "Testing CPA: write protecting again\n"); 963 printk(KERN_INFO "Testing CPA: write protecting again\n");
947 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); 964 set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
948#endif 965#endif
966 mark_nxdata_nx();
949} 967}
950#endif 968#endif
951 969
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index af3b6c8a436f..704a37cedddb 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_bp(&e->trace, regs->bp); 188 save_stack_trace_regs(&e->trace, regs);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 7ffc9b727efd..1e72102e80c9 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -260,30 +260,30 @@ void __init numa_init_array(void)
260#ifdef CONFIG_NUMA_EMU 260#ifdef CONFIG_NUMA_EMU
261/* Numa emulation */ 261/* Numa emulation */
262static struct bootnode nodes[MAX_NUMNODES] __initdata; 262static struct bootnode nodes[MAX_NUMNODES] __initdata;
263static struct bootnode physnodes[MAX_NUMNODES] __initdata; 263static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
264static char *cmdline __initdata; 264static char *cmdline __initdata;
265 265
266static int __init setup_physnodes(unsigned long start, unsigned long end, 266static int __init setup_physnodes(unsigned long start, unsigned long end,
267 int acpi, int k8) 267 int acpi, int amd)
268{ 268{
269 int nr_nodes = 0;
270 int ret = 0; 269 int ret = 0;
271 int i; 270 int i;
272 271
272 memset(physnodes, 0, sizeof(physnodes));
273#ifdef CONFIG_ACPI_NUMA 273#ifdef CONFIG_ACPI_NUMA
274 if (acpi) 274 if (acpi)
275 nr_nodes = acpi_get_nodes(physnodes); 275 acpi_get_nodes(physnodes, start, end);
276#endif 276#endif
277#ifdef CONFIG_K8_NUMA 277#ifdef CONFIG_AMD_NUMA
278 if (k8) 278 if (amd)
279 nr_nodes = k8_get_nodes(physnodes); 279 amd_get_nodes(physnodes);
280#endif 280#endif
281 /* 281 /*
282 * Basic sanity checking on the physical node map: there may be errors 282 * Basic sanity checking on the physical node map: there may be errors
283 * if the SRAT or K8 incorrectly reported the topology or the mem= 283 * if the SRAT or AMD code incorrectly reported the topology or the mem=
284 * kernel parameter is used. 284 * kernel parameter is used.
285 */ 285 */
286 for (i = 0; i < nr_nodes; i++) { 286 for (i = 0; i < MAX_NUMNODES; i++) {
287 if (physnodes[i].start == physnodes[i].end) 287 if (physnodes[i].start == physnodes[i].end)
288 continue; 288 continue;
289 if (physnodes[i].start > end) { 289 if (physnodes[i].start > end) {
@@ -298,17 +298,6 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
298 physnodes[i].start = start; 298 physnodes[i].start = start;
299 if (physnodes[i].end > end) 299 if (physnodes[i].end > end)
300 physnodes[i].end = end; 300 physnodes[i].end = end;
301 }
302
303 /*
304 * Remove all nodes that have no memory or were truncated because of the
305 * limited address range.
306 */
307 for (i = 0; i < nr_nodes; i++) {
308 if (physnodes[i].start == physnodes[i].end)
309 continue;
310 physnodes[ret].start = physnodes[i].start;
311 physnodes[ret].end = physnodes[i].end;
312 ret++; 301 ret++;
313 } 302 }
314 303
@@ -324,6 +313,24 @@ static int __init setup_physnodes(unsigned long start, unsigned long end,
324 return ret; 313 return ret;
325} 314}
326 315
316static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
317{
318 int i;
319
320 BUG_ON(acpi && amd);
321#ifdef CONFIG_ACPI_NUMA
322 if (acpi)
323 acpi_fake_nodes(nodes, nr_nodes);
324#endif
325#ifdef CONFIG_AMD_NUMA
326 if (amd)
327 amd_fake_nodes(nodes, nr_nodes);
328#endif
329 if (!acpi && !amd)
330 for (i = 0; i < nr_cpu_ids; i++)
331 numa_set_node(i, 0);
332}
333
327/* 334/*
328 * Setups up nid to range from addr to addr + size. If the end 335 * Setups up nid to range from addr to addr + size. If the end
329 * boundary is greater than max_addr, then max_addr is used instead. 336 * boundary is greater than max_addr, then max_addr is used instead.
@@ -352,8 +359,7 @@ static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
352 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr 359 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
353 * to max_addr. The return value is the number of nodes allocated. 360 * to max_addr. The return value is the number of nodes allocated.
354 */ 361 */
355static int __init split_nodes_interleave(u64 addr, u64 max_addr, 362static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
356 int nr_phys_nodes, int nr_nodes)
357{ 363{
358 nodemask_t physnode_mask = NODE_MASK_NONE; 364 nodemask_t physnode_mask = NODE_MASK_NONE;
359 u64 size; 365 u64 size;
@@ -384,7 +390,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
384 return -1; 390 return -1;
385 } 391 }
386 392
387 for (i = 0; i < nr_phys_nodes; i++) 393 for (i = 0; i < MAX_NUMNODES; i++)
388 if (physnodes[i].start != physnodes[i].end) 394 if (physnodes[i].start != physnodes[i].end)
389 node_set(i, physnode_mask); 395 node_set(i, physnode_mask);
390 396
@@ -549,15 +555,13 @@ static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
549 * numa=fake command-line option. 555 * numa=fake command-line option.
550 */ 556 */
551static int __init numa_emulation(unsigned long start_pfn, 557static int __init numa_emulation(unsigned long start_pfn,
552 unsigned long last_pfn, int acpi, int k8) 558 unsigned long last_pfn, int acpi, int amd)
553{ 559{
554 u64 addr = start_pfn << PAGE_SHIFT; 560 u64 addr = start_pfn << PAGE_SHIFT;
555 u64 max_addr = last_pfn << PAGE_SHIFT; 561 u64 max_addr = last_pfn << PAGE_SHIFT;
556 int num_phys_nodes;
557 int num_nodes; 562 int num_nodes;
558 int i; 563 int i;
559 564
560 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
561 /* 565 /*
562 * If the numa=fake command-line contains a 'M' or 'G', it represents 566 * If the numa=fake command-line contains a 'M' or 'G', it represents
563 * the fixed node size. Otherwise, if it is just a single number N, 567 * the fixed node size. Otherwise, if it is just a single number N,
@@ -572,7 +576,7 @@ static int __init numa_emulation(unsigned long start_pfn,
572 unsigned long n; 576 unsigned long n;
573 577
574 n = simple_strtoul(cmdline, NULL, 0); 578 n = simple_strtoul(cmdline, NULL, 0);
575 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n); 579 num_nodes = split_nodes_interleave(addr, max_addr, n);
576 } 580 }
577 581
578 if (num_nodes < 0) 582 if (num_nodes < 0)
@@ -595,14 +599,15 @@ static int __init numa_emulation(unsigned long start_pfn,
595 nodes[i].end >> PAGE_SHIFT); 599 nodes[i].end >> PAGE_SHIFT);
596 setup_node_bootmem(i, nodes[i].start, nodes[i].end); 600 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
597 } 601 }
598 acpi_fake_nodes(nodes, num_nodes); 602 setup_physnodes(addr, max_addr, acpi, amd);
603 fake_physnodes(acpi, amd, num_nodes);
599 numa_init_array(); 604 numa_init_array();
600 return 0; 605 return 0;
601} 606}
602#endif /* CONFIG_NUMA_EMU */ 607#endif /* CONFIG_NUMA_EMU */
603 608
604void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, 609void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
605 int acpi, int k8) 610 int acpi, int amd)
606{ 611{
607 int i; 612 int i;
608 613
@@ -610,8 +615,12 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
610 nodes_clear(node_online_map); 615 nodes_clear(node_online_map);
611 616
612#ifdef CONFIG_NUMA_EMU 617#ifdef CONFIG_NUMA_EMU
613 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8)) 618 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
619 acpi, amd);
620 if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
614 return; 621 return;
622 setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
623 acpi, amd);
615 nodes_clear(node_possible_map); 624 nodes_clear(node_possible_map);
616 nodes_clear(node_online_map); 625 nodes_clear(node_online_map);
617#endif 626#endif
@@ -624,8 +633,8 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
624 nodes_clear(node_online_map); 633 nodes_clear(node_online_map);
625#endif 634#endif
626 635
627#ifdef CONFIG_K8_NUMA 636#ifdef CONFIG_AMD_NUMA
628 if (!numa_off && k8 && !k8_scan_nodes()) 637 if (!numa_off && amd && !amd_scan_nodes())
629 return; 638 return;
630 nodes_clear(node_possible_map); 639 nodes_clear(node_possible_map);
631 nodes_clear(node_online_map); 640 nodes_clear(node_online_map);
@@ -767,6 +776,7 @@ void __cpuinit numa_clear_node(int cpu)
767 776
768#ifndef CONFIG_DEBUG_PER_CPU_MAPS 777#ifndef CONFIG_DEBUG_PER_CPU_MAPS
769 778
779#ifndef CONFIG_NUMA_EMU
770void __cpuinit numa_add_cpu(int cpu) 780void __cpuinit numa_add_cpu(int cpu)
771{ 781{
772 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 782 cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
@@ -776,34 +786,115 @@ void __cpuinit numa_remove_cpu(int cpu)
776{ 786{
777 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); 787 cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
778} 788}
789#else
790void __cpuinit numa_add_cpu(int cpu)
791{
792 unsigned long addr;
793 u16 apicid;
794 int physnid;
795 int nid = NUMA_NO_NODE;
796
797 apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
798 if (apicid != BAD_APICID)
799 nid = apicid_to_node[apicid];
800 if (nid == NUMA_NO_NODE)
801 nid = early_cpu_to_node(cpu);
802 BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
803
804 /*
805 * Use the starting address of the emulated node to find which physical
806 * node it is allocated on.
807 */
808 addr = node_start_pfn(nid) << PAGE_SHIFT;
809 for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
810 if (addr >= physnodes[physnid].start &&
811 addr < physnodes[physnid].end)
812 break;
813
814 /*
815 * Map the cpu to each emulated node that is allocated on the physical
816 * node of the cpu's apic id.
817 */
818 for_each_online_node(nid) {
819 addr = node_start_pfn(nid) << PAGE_SHIFT;
820 if (addr >= physnodes[physnid].start &&
821 addr < physnodes[physnid].end)
822 cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
823 }
824}
825
826void __cpuinit numa_remove_cpu(int cpu)
827{
828 int i;
829
830 for_each_online_node(i)
831 cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
832}
833#endif /* !CONFIG_NUMA_EMU */
779 834
780#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 835#else /* CONFIG_DEBUG_PER_CPU_MAPS */
836static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
837{
838 int node = early_cpu_to_node(cpu);
839 struct cpumask *mask;
840 char buf[64];
841
842 mask = node_to_cpumask_map[node];
843 if (!mask) {
844 pr_err("node_to_cpumask_map[%i] NULL\n", node);
845 dump_stack();
846 return NULL;
847 }
848
849 cpulist_scnprintf(buf, sizeof(buf), mask);
850 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
851 enable ? "numa_add_cpu" : "numa_remove_cpu",
852 cpu, node, buf);
853 return mask;
854}
781 855
782/* 856/*
783 * --------- debug versions of the numa functions --------- 857 * --------- debug versions of the numa functions ---------
784 */ 858 */
859#ifndef CONFIG_NUMA_EMU
785static void __cpuinit numa_set_cpumask(int cpu, int enable) 860static void __cpuinit numa_set_cpumask(int cpu, int enable)
786{ 861{
787 int node = early_cpu_to_node(cpu);
788 struct cpumask *mask; 862 struct cpumask *mask;
789 char buf[64];
790 863
791 mask = node_to_cpumask_map[node]; 864 mask = debug_cpumask_set_cpu(cpu, enable);
792 if (mask == NULL) { 865 if (!mask)
793 printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
794 dump_stack();
795 return; 866 return;
796 }
797 867
798 if (enable) 868 if (enable)
799 cpumask_set_cpu(cpu, mask); 869 cpumask_set_cpu(cpu, mask);
800 else 870 else
801 cpumask_clear_cpu(cpu, mask); 871 cpumask_clear_cpu(cpu, mask);
872}
873#else
874static void __cpuinit numa_set_cpumask(int cpu, int enable)
875{
876 int node = early_cpu_to_node(cpu);
877 struct cpumask *mask;
878 int i;
802 879
803 cpulist_scnprintf(buf, sizeof(buf), mask); 880 for_each_online_node(i) {
804 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", 881 unsigned long addr;
805 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf); 882
883 addr = node_start_pfn(i) << PAGE_SHIFT;
884 if (addr < physnodes[node].start ||
885 addr >= physnodes[node].end)
886 continue;
887 mask = debug_cpumask_set_cpu(cpu, enable);
888 if (!mask)
889 return;
890
891 if (enable)
892 cpumask_set_cpu(cpu, mask);
893 else
894 cpumask_clear_cpu(cpu, mask);
895 }
806} 896}
897#endif /* CONFIG_NUMA_EMU */
807 898
808void __cpuinit numa_add_cpu(int cpu) 899void __cpuinit numa_add_cpu(int cpu)
809{ 900{
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 532e7933d606..8b830ca14ac4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -13,6 +13,7 @@
13#include <linux/pfn.h> 13#include <linux/pfn.h>
14#include <linux/percpu.h> 14#include <linux/percpu.h>
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/pci.h>
16 17
17#include <asm/e820.h> 18#include <asm/e820.h>
18#include <asm/processor.h> 19#include <asm/processor.h>
@@ -255,13 +256,16 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
255 unsigned long pfn) 256 unsigned long pfn)
256{ 257{
257 pgprot_t forbidden = __pgprot(0); 258 pgprot_t forbidden = __pgprot(0);
259 pgprot_t required = __pgprot(0);
258 260
259 /* 261 /*
260 * The BIOS area between 640k and 1Mb needs to be executable for 262 * The BIOS area between 640k and 1Mb needs to be executable for
261 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. 263 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
262 */ 264 */
263 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) 265#ifdef CONFIG_PCI_BIOS
266 if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
264 pgprot_val(forbidden) |= _PAGE_NX; 267 pgprot_val(forbidden) |= _PAGE_NX;
268#endif
265 269
266 /* 270 /*
267 * The kernel text needs to be executable for obvious reasons 271 * The kernel text needs to be executable for obvious reasons
@@ -278,6 +282,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
278 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, 282 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
279 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) 283 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
280 pgprot_val(forbidden) |= _PAGE_RW; 284 pgprot_val(forbidden) |= _PAGE_RW;
285 /*
286 * .data and .bss should always be writable.
287 */
288 if (within(address, (unsigned long)_sdata, (unsigned long)_edata) ||
289 within(address, (unsigned long)__bss_start, (unsigned long)__bss_stop))
290 pgprot_val(required) |= _PAGE_RW;
281 291
282#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) 292#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
283 /* 293 /*
@@ -317,6 +327,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
317#endif 327#endif
318 328
319 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); 329 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
330 prot = __pgprot(pgprot_val(prot) | pgprot_val(required));
320 331
321 return prot; 332 return prot;
322} 333}
@@ -393,7 +404,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
393{ 404{
394 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; 405 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
395 pte_t new_pte, old_pte, *tmp; 406 pte_t new_pte, old_pte, *tmp;
396 pgprot_t old_prot, new_prot; 407 pgprot_t old_prot, new_prot, req_prot;
397 int i, do_split = 1; 408 int i, do_split = 1;
398 unsigned int level; 409 unsigned int level;
399 410
@@ -438,10 +449,10 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
438 * We are safe now. Check whether the new pgprot is the same: 449 * We are safe now. Check whether the new pgprot is the same:
439 */ 450 */
440 old_pte = *kpte; 451 old_pte = *kpte;
441 old_prot = new_prot = pte_pgprot(old_pte); 452 old_prot = new_prot = req_prot = pte_pgprot(old_pte);
442 453
443 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); 454 pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
444 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); 455 pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
445 456
446 /* 457 /*
447 * old_pte points to the large page base address. So we need 458 * old_pte points to the large page base address. So we need
@@ -450,17 +461,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
450 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); 461 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
451 cpa->pfn = pfn; 462 cpa->pfn = pfn;
452 463
453 new_prot = static_protections(new_prot, address, pfn); 464 new_prot = static_protections(req_prot, address, pfn);
454 465
455 /* 466 /*
456 * We need to check the full range, whether 467 * We need to check the full range, whether
457 * static_protection() requires a different pgprot for one of 468 * static_protection() requires a different pgprot for one of
458 * the pages in the range we try to preserve: 469 * the pages in the range we try to preserve:
459 */ 470 */
460 addr = address + PAGE_SIZE; 471 addr = address & pmask;
461 pfn++; 472 pfn = pte_pfn(old_pte);
462 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 473 for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) {
463 pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 474 pgprot_t chk_prot = static_protections(req_prot, addr, pfn);
464 475
465 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 476 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
466 goto out_unlock; 477 goto out_unlock;
@@ -483,7 +494,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
483 * that we limited the number of possible pages already to 494 * that we limited the number of possible pages already to
484 * the number of pages in the large page. 495 * the number of pages in the large page.
485 */ 496 */
486 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 497 if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) {
487 /* 498 /*
488 * The address is aligned and the number of pages 499 * The address is aligned and the number of pages
489 * covers the full page. 500 * covers the full page.
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
index a3250aa34086..410531d3c292 100644
--- a/arch/x86/mm/setup_nx.c
+++ b/arch/x86/mm/setup_nx.c
@@ -41,7 +41,7 @@ void __init x86_report_nx(void)
41{ 41{
42 if (!cpu_has_nx) { 42 if (!cpu_has_nx) {
43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection " 43 printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
44 "missing in CPU or disabled in BIOS!\n"); 44 "missing in CPU!\n");
45 } else { 45 } else {
46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 46#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
47 if (disable_nx) { 47 if (disable_nx) {
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index a17dffd136c1..f16434568a51 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -92,6 +92,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
92 /* mark this node as "seen" in node bitmap */ 92 /* mark this node as "seen" in node bitmap */
93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo); 93 BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
94 94
95 /* don't need to check apic_id here, because it is always 8 bits */
95 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo; 96 apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
96 97
97 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n", 98 printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a35cb9d8b060..603d285d1daa 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -134,6 +134,10 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
134 } 134 }
135 135
136 apic_id = pa->apic_id; 136 apic_id = pa->apic_id;
137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
137 apicid_to_node[apic_id] = node; 141 apicid_to_node[apic_id] = node;
138 node_set(node, cpu_nodes_parsed); 142 node_set(node, cpu_nodes_parsed);
139 acpi_numa = 1; 143 acpi_numa = 1;
@@ -168,6 +172,12 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid; 172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else 173 else
170 apic_id = pa->apic_id; 174 apic_id = pa->apic_id;
175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
171 apicid_to_node[apic_id] = node; 181 apicid_to_node[apic_id] = node;
172 node_set(node, cpu_nodes_parsed); 182 node_set(node, cpu_nodes_parsed);
173 acpi_numa = 1; 183 acpi_numa = 1;
@@ -339,18 +349,19 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
339 349
340void __init acpi_numa_arch_fixup(void) {} 350void __init acpi_numa_arch_fixup(void) {}
341 351
342int __init acpi_get_nodes(struct bootnode *physnodes) 352#ifdef CONFIG_NUMA_EMU
353void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
354 unsigned long end)
343{ 355{
344 int i; 356 int i;
345 int ret = 0;
346 357
347 for_each_node_mask(i, nodes_parsed) { 358 for_each_node_mask(i, nodes_parsed) {
348 physnodes[ret].start = nodes[i].start; 359 cutoff_node(i, start, end);
349 physnodes[ret].end = nodes[i].end; 360 physnodes[i].start = nodes[i].start;
350 ret++; 361 physnodes[i].end = nodes[i].end;
351 } 362 }
352 return ret;
353} 363}
364#endif /* CONFIG_NUMA_EMU */
354 365
355/* Use the information discovered above to actually set up the nodes. */ 366/* Use the information discovered above to actually set up the nodes. */
356int __init acpi_scan_nodes(unsigned long start, unsigned long end) 367int __init acpi_scan_nodes(unsigned long start, unsigned long end)
@@ -495,8 +506,6 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
495{ 506{
496 int i, j; 507 int i, j;
497 508
498 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
499 "topology.\n");
500 for (i = 0; i < num_nodes; i++) { 509 for (i = 0; i < num_nodes; i++) {
501 int nid, pxm; 510 int nid, pxm;
502 511
@@ -516,6 +525,17 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
516 fake_apicid_to_node[j] == NUMA_NO_NODE) 525 fake_apicid_to_node[j] == NUMA_NO_NODE)
517 fake_apicid_to_node[j] = i; 526 fake_apicid_to_node[j] = i;
518 } 527 }
528
529 /*
530 * If there are apicid-to-node mappings for physical nodes that do not
531 * have a corresponding emulated node, it should default to a guaranteed
532 * value.
533 */
534 for (i = 0; i < MAX_LOCAL_APIC; i++)
535 if (apicid_to_node[i] != NUMA_NO_NODE &&
536 fake_apicid_to_node[i] == NUMA_NO_NODE)
537 fake_apicid_to_node[i] = 0;
538
519 for (i = 0; i < num_nodes; i++) 539 for (i = 0; i < num_nodes; i++)
520 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); 540 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
521 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); 541 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 2d49d4e19a36..72cbec14d783 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
126 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
127 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
128 if (depth) 128 if (depth)
129 dump_trace(NULL, regs, (unsigned long *)stack, 0, 129 dump_trace(NULL, regs, (unsigned long *)stack,
130 &backtrace_ops, &depth); 130 &backtrace_ops, &depth);
131 return; 131 return;
132 } 132 }
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 4e8baad36d37..e2b7b0c06cdf 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -65,7 +65,6 @@ static int profile_exceptions_notify(struct notifier_block *self,
65 65
66 switch (val) { 66 switch (val) {
67 case DIE_NMI: 67 case DIE_NMI:
68 case DIE_NMI_IPI:
69 if (ctr_running) 68 if (ctr_running)
70 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); 69 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
71 else if (!nmi_enabled) 70 else if (!nmi_enabled)
@@ -143,7 +142,7 @@ static inline int has_mux(void)
143 142
144inline int op_x86_phys_to_virt(int phys) 143inline int op_x86_phys_to_virt(int phys)
145{ 144{
146 return __get_cpu_var(switch_index) + phys; 145 return __this_cpu_read(switch_index) + phys;
147} 146}
148 147
149inline int op_x86_virt_to_phys(int virt) 148inline int op_x86_virt_to_phys(int virt)
@@ -361,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
361static struct notifier_block profile_exceptions_nb = { 360static struct notifier_block profile_exceptions_nb = {
362 .notifier_call = profile_exceptions_notify, 361 .notifier_call = profile_exceptions_notify,
363 .next = NULL, 362 .next = NULL,
364 .priority = 2 363 .priority = NMI_LOCAL_LOW_PRIOR,
365}; 364};
366 365
367static void nmi_cpu_restore_registers(struct op_msrs *msrs) 366static void nmi_cpu_restore_registers(struct op_msrs *msrs)
@@ -732,6 +731,9 @@ int __init op_nmi_init(struct oprofile_operations *ops)
732 case 0x14: 731 case 0x14:
733 cpu_type = "x86-64/family14h"; 732 cpu_type = "x86-64/family14h";
734 break; 733 break;
734 case 0x15:
735 cpu_type = "x86-64/family15h";
736 break;
735 default: 737 default:
736 return -ENODEV; 738 return -ENODEV;
737 } 739 }
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index e3ecb71b5790..720bf5a53c51 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
38static struct notifier_block profile_timer_exceptions_nb = { 38static struct notifier_block profile_timer_exceptions_nb = {
39 .notifier_call = profile_timer_exceptions_notify, 39 .notifier_call = profile_timer_exceptions_notify,
40 .next = NULL, 40 .next = NULL,
41 .priority = 0 41 .priority = NMI_LOW_PRIOR,
42}; 42};
43 43
44static int timer_start(void) 44static int timer_start(void)
@@ -58,9 +58,6 @@ static void timer_stop(void)
58 58
59int __init op_nmi_timer_init(struct oprofile_operations *ops) 59int __init op_nmi_timer_init(struct oprofile_operations *ops)
60{ 60{
61 if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
62 return -ENODEV;
63
64 ops->start = timer_start; 61 ops->start = timer_start;
65 ops->stop = timer_stop; 62 ops->stop = timer_stop;
66 ops->cpu_type = "timer"; 63 ops->cpu_type = "timer";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 7d90d47655ba..c3b8e24f2b16 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -29,11 +29,12 @@
29#include "op_x86_model.h" 29#include "op_x86_model.h"
30#include "op_counter.h" 30#include "op_counter.h"
31 31
32#define NUM_COUNTERS 4 32#define NUM_COUNTERS 4
33#define NUM_COUNTERS_F15H 6
33#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 34#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
34#define NUM_VIRT_COUNTERS 32 35#define NUM_VIRT_COUNTERS 32
35#else 36#else
36#define NUM_VIRT_COUNTERS NUM_COUNTERS 37#define NUM_VIRT_COUNTERS 0
37#endif 38#endif
38 39
39#define OP_EVENT_MASK 0x0FFF 40#define OP_EVENT_MASK 0x0FFF
@@ -41,7 +42,8 @@
41 42
42#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) 43#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
43 44
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 45static int num_counters;
46static unsigned long reset_value[OP_MAX_COUNTER];
45 47
46#define IBS_FETCH_SIZE 6 48#define IBS_FETCH_SIZE 6
47#define IBS_OP_SIZE 12 49#define IBS_OP_SIZE 12
@@ -387,7 +389,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
387 int i; 389 int i;
388 390
389 /* enable active counters */ 391 /* enable active counters */
390 for (i = 0; i < NUM_COUNTERS; ++i) { 392 for (i = 0; i < num_counters; ++i) {
391 int virt = op_x86_phys_to_virt(i); 393 int virt = op_x86_phys_to_virt(i);
392 if (!reset_value[virt]) 394 if (!reset_value[virt])
393 continue; 395 continue;
@@ -406,7 +408,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
406{ 408{
407 int i; 409 int i;
408 410
409 for (i = 0; i < NUM_COUNTERS; ++i) { 411 for (i = 0; i < num_counters; ++i) {
410 if (!msrs->counters[i].addr) 412 if (!msrs->counters[i].addr)
411 continue; 413 continue;
412 release_perfctr_nmi(MSR_K7_PERFCTR0 + i); 414 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
@@ -418,7 +420,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
418{ 420{
419 int i; 421 int i;
420 422
421 for (i = 0; i < NUM_COUNTERS; i++) { 423 for (i = 0; i < num_counters; i++) {
422 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 424 if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
423 goto fail; 425 goto fail;
424 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { 426 if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
@@ -426,8 +428,13 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
426 goto fail; 428 goto fail;
427 } 429 }
428 /* both registers must be reserved */ 430 /* both registers must be reserved */
429 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 431 if (num_counters == NUM_COUNTERS_F15H) {
430 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 432 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
433 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
434 } else {
435 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
436 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
437 }
431 continue; 438 continue;
432 fail: 439 fail:
433 if (!counter_config[i].enabled) 440 if (!counter_config[i].enabled)
@@ -447,7 +454,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
447 int i; 454 int i;
448 455
449 /* setup reset_value */ 456 /* setup reset_value */
450 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 457 for (i = 0; i < OP_MAX_COUNTER; ++i) {
451 if (counter_config[i].enabled 458 if (counter_config[i].enabled
452 && msrs->counters[op_x86_virt_to_phys(i)].addr) 459 && msrs->counters[op_x86_virt_to_phys(i)].addr)
453 reset_value[i] = counter_config[i].count; 460 reset_value[i] = counter_config[i].count;
@@ -456,7 +463,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
456 } 463 }
457 464
458 /* clear all counters */ 465 /* clear all counters */
459 for (i = 0; i < NUM_COUNTERS; ++i) { 466 for (i = 0; i < num_counters; ++i) {
460 if (!msrs->controls[i].addr) 467 if (!msrs->controls[i].addr)
461 continue; 468 continue;
462 rdmsrl(msrs->controls[i].addr, val); 469 rdmsrl(msrs->controls[i].addr, val);
@@ -472,7 +479,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
472 } 479 }
473 480
474 /* enable active counters */ 481 /* enable active counters */
475 for (i = 0; i < NUM_COUNTERS; ++i) { 482 for (i = 0; i < num_counters; ++i) {
476 int virt = op_x86_phys_to_virt(i); 483 int virt = op_x86_phys_to_virt(i);
477 if (!reset_value[virt]) 484 if (!reset_value[virt])
478 continue; 485 continue;
@@ -503,7 +510,7 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
503 u64 val; 510 u64 val;
504 int i; 511 int i;
505 512
506 for (i = 0; i < NUM_COUNTERS; ++i) { 513 for (i = 0; i < num_counters; ++i) {
507 int virt = op_x86_phys_to_virt(i); 514 int virt = op_x86_phys_to_virt(i);
508 if (!reset_value[virt]) 515 if (!reset_value[virt])
509 continue; 516 continue;
@@ -526,7 +533,7 @@ static void op_amd_start(struct op_msrs const * const msrs)
526 u64 val; 533 u64 val;
527 int i; 534 int i;
528 535
529 for (i = 0; i < NUM_COUNTERS; ++i) { 536 for (i = 0; i < num_counters; ++i) {
530 if (!reset_value[op_x86_phys_to_virt(i)]) 537 if (!reset_value[op_x86_phys_to_virt(i)])
531 continue; 538 continue;
532 rdmsrl(msrs->controls[i].addr, val); 539 rdmsrl(msrs->controls[i].addr, val);
@@ -546,7 +553,7 @@ static void op_amd_stop(struct op_msrs const * const msrs)
546 * Subtle: stop on all counters to avoid race with setting our 553 * Subtle: stop on all counters to avoid race with setting our
547 * pm callback 554 * pm callback
548 */ 555 */
549 for (i = 0; i < NUM_COUNTERS; ++i) { 556 for (i = 0; i < num_counters; ++i) {
550 if (!reset_value[op_x86_phys_to_virt(i)]) 557 if (!reset_value[op_x86_phys_to_virt(i)])
551 continue; 558 continue;
552 rdmsrl(msrs->controls[i].addr, val); 559 rdmsrl(msrs->controls[i].addr, val);
@@ -603,6 +610,7 @@ static int force_ibs_eilvt_setup(void)
603 ret = setup_ibs_ctl(i); 610 ret = setup_ibs_ctl(i);
604 if (ret) 611 if (ret)
605 return ret; 612 return ret;
613 pr_err(FW_BUG "using offset %d for IBS interrupts\n", i);
606 return 0; 614 return 0;
607 } 615 }
608 616
@@ -706,18 +714,29 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
706 return 0; 714 return 0;
707} 715}
708 716
717struct op_x86_model_spec op_amd_spec;
718
709static int op_amd_init(struct oprofile_operations *ops) 719static int op_amd_init(struct oprofile_operations *ops)
710{ 720{
711 init_ibs(); 721 init_ibs();
712 create_arch_files = ops->create_files; 722 create_arch_files = ops->create_files;
713 ops->create_files = setup_ibs_files; 723 ops->create_files = setup_ibs_files;
724
725 if (boot_cpu_data.x86 == 0x15) {
726 num_counters = NUM_COUNTERS_F15H;
727 } else {
728 num_counters = NUM_COUNTERS;
729 }
730
731 op_amd_spec.num_counters = num_counters;
732 op_amd_spec.num_controls = num_counters;
733 op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
734
714 return 0; 735 return 0;
715} 736}
716 737
717struct op_x86_model_spec op_amd_spec = { 738struct op_x86_model_spec op_amd_spec = {
718 .num_counters = NUM_COUNTERS, 739 /* num_counters/num_controls filled in at runtime */
719 .num_controls = NUM_COUNTERS,
720 .num_virt_counters = NUM_VIRT_COUNTERS,
721 .reserved = MSR_AMD_EVENTSEL_RESERVED, 740 .reserved = MSR_AMD_EVENTSEL_RESERVED,
722 .event_mask = OP_EVENT_MASK, 741 .event_mask = OP_EVENT_MASK,
723 .init = op_amd_init, 742 .init = op_amd_init,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 182558dd5515..9fadec074142 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -11,7 +11,7 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/ptrace.h> 13#include <linux/ptrace.h>
14#include <linux/nmi.h> 14#include <asm/nmi.h>
15#include <asm/msr.h> 15#include <asm/msr.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/apic.h> 17#include <asm/apic.h>
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index d769cda54082..94b745045e45 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -95,8 +95,8 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
95 * counter width: 95 * counter width:
96 */ 96 */
97 if (!(eax.split.version_id == 0 && 97 if (!(eax.split.version_id == 0 &&
98 current_cpu_data.x86 == 6 && 98 __this_cpu_read(cpu_info.x86) == 6 &&
99 current_cpu_data.x86_model == 15)) { 99 __this_cpu_read(cpu_info.x86_model) == 15)) {
100 100
101 if (counter_width < eax.split.bit_width) 101 if (counter_width < eax.split.bit_width)
102 counter_width = eax.split.bit_width; 102 counter_width = eax.split.bit_width;
@@ -235,8 +235,8 @@ static void arch_perfmon_setup_counters(void)
235 eax.full = cpuid_eax(0xa); 235 eax.full = cpuid_eax(0xa);
236 236
237 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ 237 /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
238 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && 238 if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 &&
239 current_cpu_data.x86_model == 15) { 239 __this_cpu_read(cpu_info.x86_model) == 15) {
240 eax.split.version_id = 2; 240 eax.split.version_id = 2;
241 eax.split.num_counters = 2; 241 eax.split.num_counters = 2;
242 eax.split.bit_width = 40; 242 eax.split.bit_width = 40;
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index effd96e33f16..6b8759f7634e 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_PCI_OLPC) += olpc.o
7obj-$(CONFIG_PCI_XEN) += xen.o 7obj-$(CONFIG_PCI_XEN) += xen.o
8 8
9obj-y += fixup.o 9obj-y += fixup.o
10obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
10obj-$(CONFIG_ACPI) += acpi.o 11obj-$(CONFIG_ACPI) += acpi.o
11obj-y += legacy.o irq.o 12obj-y += legacy.o irq.o
12 13
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index fc1e8fe07e5c..e27dffbbb1a7 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -4,6 +4,7 @@
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include <linux/range.h> 5#include <linux/range.h>
6 6
7#include <asm/amd_nb.h>
7#include <asm/pci_x86.h> 8#include <asm/pci_x86.h>
8 9
9#include <asm/pci-direct.h> 10#include <asm/pci-direct.h>
@@ -378,6 +379,34 @@ static struct notifier_block __cpuinitdata amd_cpu_notifier = {
378 .notifier_call = amd_cpu_notify, 379 .notifier_call = amd_cpu_notify,
379}; 380};
380 381
382static void __init pci_enable_pci_io_ecs(void)
383{
384#ifdef CONFIG_AMD_NB
385 unsigned int i, n;
386
387 for (n = i = 0; !n && amd_nb_bus_dev_ranges[i].dev_limit; ++i) {
388 u8 bus = amd_nb_bus_dev_ranges[i].bus;
389 u8 slot = amd_nb_bus_dev_ranges[i].dev_base;
390 u8 limit = amd_nb_bus_dev_ranges[i].dev_limit;
391
392 for (; slot < limit; ++slot) {
393 u32 val = read_pci_config(bus, slot, 3, 0);
394
395 if (!early_is_amd_nb(val))
396 continue;
397
398 val = read_pci_config(bus, slot, 3, 0x8c);
399 if (!(val & (ENABLE_CF8_EXT_CFG >> 32))) {
400 val |= ENABLE_CF8_EXT_CFG >> 32;
401 write_pci_config(bus, slot, 3, 0x8c, val);
402 }
403 ++n;
404 }
405 }
406 pr_info("Extended Config Space enabled on %u nodes\n", n);
407#endif
408}
409
381static int __init pci_io_ecs_init(void) 410static int __init pci_io_ecs_init(void)
382{ 411{
383 int cpu; 412 int cpu;
@@ -386,6 +415,10 @@ static int __init pci_io_ecs_init(void)
386 if (boot_cpu_data.x86 < 0x10) 415 if (boot_cpu_data.x86 < 0x10)
387 return 0; 416 return 0;
388 417
418 /* Try the PCI method first. */
419 if (early_pci_allowed())
420 pci_enable_pci_io_ecs();
421
389 register_cpu_notifier(&amd_cpu_notifier); 422 register_cpu_notifier(&amd_cpu_notifier);
390 for_each_online_cpu(cpu) 423 for_each_online_cpu(cpu)
391 amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, 424 amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
new file mode 100644
index 000000000000..85b68ef5e809
--- /dev/null
+++ b/arch/x86/pci/ce4100.c
@@ -0,0 +1,315 @@
1/*
2 * GPL LICENSE SUMMARY
3 *
4 * Copyright(c) 2010 Intel Corporation. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of version 2 of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 * The full GNU General Public License is included in this distribution
19 * in the file called LICENSE.GPL.
20 *
21 * Contact Information:
22 * Intel Corporation
23 * 2200 Mission College Blvd.
24 * Santa Clara, CA 97052
25 *
26 * This provides access methods for PCI registers that mis-behave on
27 * the CE4100. Each register can be assigned a private init, read and
28 * write routine. The exception to this is the bridge device. The
29 * bridge device is the only device on bus zero (0) that requires any
30 * fixup so it is a special case ATM
31 */
32
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/init.h>
36
37#include <asm/pci_x86.h>
38
39struct sim_reg {
40 u32 value;
41 u32 mask;
42};
43
44struct sim_dev_reg {
45 int dev_func;
46 int reg;
47 void (*init)(struct sim_dev_reg *reg);
48 void (*read)(struct sim_dev_reg *reg, u32 *value);
49 void (*write)(struct sim_dev_reg *reg, u32 value);
50 struct sim_reg sim_reg;
51};
52
53struct sim_reg_op {
54 void (*init)(struct sim_dev_reg *reg);
55 void (*read)(struct sim_dev_reg *reg, u32 value);
56 void (*write)(struct sim_dev_reg *reg, u32 value);
57};
58
59#define MB (1024 * 1024)
60#define KB (1024)
61#define SIZE_TO_MASK(size) (~(size - 1))
62
63#define DEFINE_REG(device, func, offset, size, init_op, read_op, write_op)\
64{ PCI_DEVFN(device, func), offset, init_op, read_op, write_op,\
65 {0, SIZE_TO_MASK(size)} },
66
67static void reg_init(struct sim_dev_reg *reg)
68{
69 pci_direct_conf1.read(0, 1, reg->dev_func, reg->reg, 4,
70 &reg->sim_reg.value);
71}
72
73static void reg_read(struct sim_dev_reg *reg, u32 *value)
74{
75 unsigned long flags;
76
77 raw_spin_lock_irqsave(&pci_config_lock, flags);
78 *value = reg->sim_reg.value;
79 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
80}
81
82static void reg_write(struct sim_dev_reg *reg, u32 value)
83{
84 unsigned long flags;
85
86 raw_spin_lock_irqsave(&pci_config_lock, flags);
87 reg->sim_reg.value = (value & reg->sim_reg.mask) |
88 (reg->sim_reg.value & ~reg->sim_reg.mask);
89 raw_spin_unlock_irqrestore(&pci_config_lock, flags);
90}
91
92static void sata_reg_init(struct sim_dev_reg *reg)
93{
94 pci_direct_conf1.read(0, 1, PCI_DEVFN(14, 0), 0x10, 4,
95 &reg->sim_reg.value);
96 reg->sim_reg.value += 0x400;
97}
98
99static void ehci_reg_read(struct sim_dev_reg *reg, u32 *value)
100{
101 reg_read(reg, value);
102 if (*value != reg->sim_reg.mask)
103 *value |= 0x100;
104}
105
106void sata_revid_init(struct sim_dev_reg *reg)
107{
108 reg->sim_reg.value = 0x01060100;
109 reg->sim_reg.mask = 0;
110}
111
112static void sata_revid_read(struct sim_dev_reg *reg, u32 *value)
113{
114 reg_read(reg, value);
115}
116
117static struct sim_dev_reg bus1_fixups[] = {
118 DEFINE_REG(2, 0, 0x10, (16*MB), reg_init, reg_read, reg_write)
119 DEFINE_REG(2, 0, 0x14, (256), reg_init, reg_read, reg_write)
120 DEFINE_REG(2, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
121 DEFINE_REG(3, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
122 DEFINE_REG(4, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
123 DEFINE_REG(4, 1, 0x10, (128*KB), reg_init, reg_read, reg_write)
124 DEFINE_REG(6, 0, 0x10, (512*KB), reg_init, reg_read, reg_write)
125 DEFINE_REG(6, 1, 0x10, (512*KB), reg_init, reg_read, reg_write)
126 DEFINE_REG(6, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
127 DEFINE_REG(8, 0, 0x10, (1*MB), reg_init, reg_read, reg_write)
128 DEFINE_REG(8, 1, 0x10, (64*KB), reg_init, reg_read, reg_write)
129 DEFINE_REG(8, 2, 0x10, (64*KB), reg_init, reg_read, reg_write)
130 DEFINE_REG(9, 0, 0x10 , (1*MB), reg_init, reg_read, reg_write)
131 DEFINE_REG(9, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
132 DEFINE_REG(10, 0, 0x10, (256), reg_init, reg_read, reg_write)
133 DEFINE_REG(10, 0, 0x14, (256*MB), reg_init, reg_read, reg_write)
134 DEFINE_REG(11, 0, 0x10, (256), reg_init, reg_read, reg_write)
135 DEFINE_REG(11, 0, 0x14, (256), reg_init, reg_read, reg_write)
136 DEFINE_REG(11, 1, 0x10, (256), reg_init, reg_read, reg_write)
137 DEFINE_REG(11, 2, 0x10, (256), reg_init, reg_read, reg_write)
138 DEFINE_REG(11, 2, 0x14, (256), reg_init, reg_read, reg_write)
139 DEFINE_REG(11, 2, 0x18, (256), reg_init, reg_read, reg_write)
140 DEFINE_REG(11, 3, 0x10, (256), reg_init, reg_read, reg_write)
141 DEFINE_REG(11, 3, 0x14, (256), reg_init, reg_read, reg_write)
142 DEFINE_REG(11, 4, 0x10, (256), reg_init, reg_read, reg_write)
143 DEFINE_REG(11, 5, 0x10, (64*KB), reg_init, reg_read, reg_write)
144 DEFINE_REG(11, 6, 0x10, (256), reg_init, reg_read, reg_write)
145 DEFINE_REG(11, 7, 0x10, (64*KB), reg_init, reg_read, reg_write)
146 DEFINE_REG(12, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
147 DEFINE_REG(12, 0, 0x14, (256), reg_init, reg_read, reg_write)
148 DEFINE_REG(12, 1, 0x10, (1024), reg_init, reg_read, reg_write)
149 DEFINE_REG(13, 0, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
150 DEFINE_REG(13, 1, 0x10, (32*KB), reg_init, ehci_reg_read, reg_write)
151 DEFINE_REG(14, 0, 0x8, 0, sata_revid_init, sata_revid_read, 0)
152 DEFINE_REG(14, 0, 0x10, 0, reg_init, reg_read, reg_write)
153 DEFINE_REG(14, 0, 0x14, 0, reg_init, reg_read, reg_write)
154 DEFINE_REG(14, 0, 0x18, 0, reg_init, reg_read, reg_write)
155 DEFINE_REG(14, 0, 0x1C, 0, reg_init, reg_read, reg_write)
156 DEFINE_REG(14, 0, 0x20, 0, reg_init, reg_read, reg_write)
157 DEFINE_REG(14, 0, 0x24, (0x200), sata_reg_init, reg_read, reg_write)
158 DEFINE_REG(15, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
159 DEFINE_REG(15, 0, 0x14, (64*KB), reg_init, reg_read, reg_write)
160 DEFINE_REG(16, 0, 0x10, (64*KB), reg_init, reg_read, reg_write)
161 DEFINE_REG(16, 0, 0x14, (64*MB), reg_init, reg_read, reg_write)
162 DEFINE_REG(16, 0, 0x18, (64*MB), reg_init, reg_read, reg_write)
163 DEFINE_REG(17, 0, 0x10, (128*KB), reg_init, reg_read, reg_write)
164 DEFINE_REG(18, 0, 0x10, (1*KB), reg_init, reg_read, reg_write)
165};
166
167static void __init init_sim_regs(void)
168{
169 int i;
170
171 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
172 if (bus1_fixups[i].init)
173 bus1_fixups[i].init(&bus1_fixups[i]);
174 }
175}
176
177static inline void extract_bytes(u32 *value, int reg, int len)
178{
179 uint32_t mask;
180
181 *value >>= ((reg & 3) * 8);
182 mask = 0xFFFFFFFF >> ((4 - len) * 8);
183 *value &= mask;
184}
185
186int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
187{
188 u32 av_bridge_base, av_bridge_limit;
189 int retval = 0;
190
191 switch (reg) {
192 /* Make BARs appear to not request any memory. */
193 case PCI_BASE_ADDRESS_0:
194 case PCI_BASE_ADDRESS_0 + 1:
195 case PCI_BASE_ADDRESS_0 + 2:
196 case PCI_BASE_ADDRESS_0 + 3:
197 *value = 0;
198 break;
199
200 /* Since subordinate bus number register is hardwired
201 * to zero and read only, so do the simulation.
202 */
203 case PCI_PRIMARY_BUS:
204 if (len == 4)
205 *value = 0x00010100;
206 break;
207
208 case PCI_SUBORDINATE_BUS:
209 *value = 1;
210 break;
211
212 case PCI_MEMORY_BASE:
213 case PCI_MEMORY_LIMIT:
214 /* Get the A/V bridge base address. */
215 pci_direct_conf1.read(0, 0, devfn,
216 PCI_BASE_ADDRESS_0, 4, &av_bridge_base);
217
218 av_bridge_limit = av_bridge_base + (512*MB - 1);
219 av_bridge_limit >>= 16;
220 av_bridge_limit &= 0xFFF0;
221
222 av_bridge_base >>= 16;
223 av_bridge_base &= 0xFFF0;
224
225 if (reg == PCI_MEMORY_LIMIT)
226 *value = av_bridge_limit;
227 else if (len == 2)
228 *value = av_bridge_base;
229 else
230 *value = (av_bridge_limit << 16) | av_bridge_base;
231 break;
232 /* Make prefetchable memory limit smaller than prefetchable
233 * memory base, so not claim prefetchable memory space.
234 */
235 case PCI_PREF_MEMORY_BASE:
236 *value = 0xFFF0;
237 break;
238 case PCI_PREF_MEMORY_LIMIT:
239 *value = 0x0;
240 break;
241 /* Make IO limit smaller than IO base, so not claim IO space. */
242 case PCI_IO_BASE:
243 *value = 0xF0;
244 break;
245 case PCI_IO_LIMIT:
246 *value = 0;
247 break;
248 default:
249 retval = 1;
250 }
251 return retval;
252}
253
254static int ce4100_conf_read(unsigned int seg, unsigned int bus,
255 unsigned int devfn, int reg, int len, u32 *value)
256{
257 int i, retval = 1;
258
259 if (bus == 1) {
260 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
261 if (bus1_fixups[i].dev_func == devfn &&
262 bus1_fixups[i].reg == (reg & ~3) &&
263 bus1_fixups[i].read) {
264 bus1_fixups[i].read(&(bus1_fixups[i]),
265 value);
266 extract_bytes(value, reg, len);
267 return 0;
268 }
269 }
270 }
271
272 if (bus == 0 && (PCI_DEVFN(1, 0) == devfn) &&
273 !bridge_read(devfn, reg, len, value))
274 return 0;
275
276 return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
277}
278
279static int ce4100_conf_write(unsigned int seg, unsigned int bus,
280 unsigned int devfn, int reg, int len, u32 value)
281{
282 int i;
283
284 if (bus == 1) {
285 for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
286 if (bus1_fixups[i].dev_func == devfn &&
287 bus1_fixups[i].reg == (reg & ~3) &&
288 bus1_fixups[i].write) {
289 bus1_fixups[i].write(&(bus1_fixups[i]),
290 value);
291 return 0;
292 }
293 }
294 }
295
296 /* Discard writes to A/V bridge BAR. */
297 if (bus == 0 && PCI_DEVFN(1, 0) == devfn &&
298 ((reg & ~3) == PCI_BASE_ADDRESS_0))
299 return 0;
300
301 return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
302}
303
304struct pci_raw_ops ce4100_pci_conf = {
305 .read = ce4100_conf_read,
306 .write = ce4100_conf_write,
307};
308
309static int __init ce4100_pci_init(void)
310{
311 init_sim_regs();
312 raw_pci_ops = &ce4100_pci_conf;
313 return 0;
314}
315subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 2492d165096a..a5f7d0d63de0 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -9,6 +9,7 @@
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11#include <asm/pci-functions.h> 11#include <asm/pci-functions.h>
12#include <asm/cacheflush.h>
12 13
13/* BIOS32 signature: "_32_" */ 14/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 15#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
@@ -25,6 +26,27 @@
25#define PCIBIOS_HW_TYPE1_SPEC 0x10 26#define PCIBIOS_HW_TYPE1_SPEC 0x10
26#define PCIBIOS_HW_TYPE2_SPEC 0x20 27#define PCIBIOS_HW_TYPE2_SPEC 0x20
27 28
29int pcibios_enabled;
30
31/* According to the BIOS specification at:
32 * http://members.datafast.net.au/dft0802/specs/bios21.pdf, we could
33 * restrict the x zone to some pages and make it ro. But this may be
34 * broken on some bios, complex to handle with static_protections.
35 * We could make the 0xe0000-0x100000 range rox, but this can break
36 * some ISA mapping.
37 *
38 * So we let's an rw and x hole when pcibios is used. This shouldn't
39 * happen for modern system with mmconfig, and if you don't want it
40 * you could disable pcibios...
41 */
42static inline void set_bios_x(void)
43{
44 pcibios_enabled = 1;
45 set_memory_x(PAGE_OFFSET + BIOS_BEGIN, (BIOS_END - BIOS_BEGIN) >> PAGE_SHIFT);
46 if (__supported_pte_mask & _PAGE_NX)
47 printk(KERN_INFO "PCI : PCI BIOS aera is rw and x. Use pci=nobios if you want it NX.\n");
48}
49
28/* 50/*
29 * This is the standard structure used to identify the entry point 51 * This is the standard structure used to identify the entry point
30 * to the BIOS32 Service Directory, as documented in 52 * to the BIOS32 Service Directory, as documented in
@@ -332,6 +354,7 @@ static struct pci_raw_ops * __devinit pci_find_bios(void)
332 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n", 354 DBG("PCI: BIOS32 Service Directory entry at 0x%lx\n",
333 bios32_entry); 355 bios32_entry);
334 bios32_indirect.address = bios32_entry + PAGE_OFFSET; 356 bios32_indirect.address = bios32_entry + PAGE_OFFSET;
357 set_bios_x();
335 if (check_pcibios()) 358 if (check_pcibios())
336 return &pci_bios_access; 359 return &pci_bios_access;
337 } 360 }
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 7bf70b812fa2..021eee91c056 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -1,5 +1,7 @@
1# Platform specific code goes here 1# Platform specific code goes here
2obj-y += ce4100/
2obj-y += efi/ 3obj-y += efi/
4obj-y += iris/
3obj-y += mrst/ 5obj-y += mrst/
4obj-y += olpc/ 6obj-y += olpc/
5obj-y += scx200/ 7obj-y += scx200/
diff --git a/arch/x86/platform/ce4100/Makefile b/arch/x86/platform/ce4100/Makefile
new file mode 100644
index 000000000000..91fc92971d94
--- /dev/null
+++ b/arch/x86/platform/ce4100/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_INTEL_CE) += ce4100.o
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
new file mode 100644
index 000000000000..d2c0d51a7178
--- /dev/null
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -0,0 +1,132 @@
1/*
2 * Intel CE4100 platform specific setup code
3 *
4 * (C) Copyright 2010 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 */
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/module.h>
15#include <linux/serial_reg.h>
16#include <linux/serial_8250.h>
17
18#include <asm/setup.h>
19#include <asm/io.h>
20
21static int ce4100_i8042_detect(void)
22{
23 return 0;
24}
25
26static void __init sdv_find_smp_config(void)
27{
28}
29
30#ifdef CONFIG_SERIAL_8250
31
32
33static unsigned int mem_serial_in(struct uart_port *p, int offset)
34{
35 offset = offset << p->regshift;
36 return readl(p->membase + offset);
37}
38
39/*
40 * The UART Tx interrupts are not set under some conditions and therefore serial
41 * transmission hangs. This is a silicon issue and has not been root caused. The
42 * workaround for this silicon issue checks UART_LSR_THRE bit and UART_LSR_TEMT
43 * bit of LSR register in interrupt handler to see whether at least one of these
44 * two bits is set, if so then process the transmit request. If this workaround
45 * is not applied, then the serial transmission may hang. This workaround is for
46 * errata number 9 in Errata - B step.
47*/
48
49static unsigned int ce4100_mem_serial_in(struct uart_port *p, int offset)
50{
51 unsigned int ret, ier, lsr;
52
53 if (offset == UART_IIR) {
54 offset = offset << p->regshift;
55 ret = readl(p->membase + offset);
56 if (ret & UART_IIR_NO_INT) {
57 /* see if the TX interrupt should have really set */
58 ier = mem_serial_in(p, UART_IER);
59 /* see if the UART's XMIT interrupt is enabled */
60 if (ier & UART_IER_THRI) {
61 lsr = mem_serial_in(p, UART_LSR);
62 /* now check to see if the UART should be
63 generating an interrupt (but isn't) */
64 if (lsr & (UART_LSR_THRE | UART_LSR_TEMT))
65 ret &= ~UART_IIR_NO_INT;
66 }
67 }
68 } else
69 ret = mem_serial_in(p, offset);
70 return ret;
71}
72
73static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
74{
75 offset = offset << p->regshift;
76 writel(value, p->membase + offset);
77}
78
79static void ce4100_serial_fixup(int port, struct uart_port *up,
80 unsigned short *capabilites)
81{
82#ifdef CONFIG_EARLY_PRINTK
83 /*
84 * Over ride the legacy port configuration that comes from
85 * asm/serial.h. Using the ioport driver then switching to the
86 * PCI memmaped driver hangs the IOAPIC
87 */
88 if (up->iotype != UPIO_MEM32) {
89 up->uartclk = 14745600;
90 up->mapbase = 0xdffe0200;
91 set_fixmap_nocache(FIX_EARLYCON_MEM_BASE,
92 up->mapbase & PAGE_MASK);
93 up->membase =
94 (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
95 up->membase += up->mapbase & ~PAGE_MASK;
96 up->iotype = UPIO_MEM32;
97 up->regshift = 2;
98 }
99#endif
100 up->iobase = 0;
101 up->serial_in = ce4100_mem_serial_in;
102 up->serial_out = ce4100_mem_serial_out;
103
104 *capabilites |= (1 << 12);
105}
106
107static __init void sdv_serial_fixup(void)
108{
109 serial8250_set_isa_configurator(ce4100_serial_fixup);
110}
111
112#else
113static inline void sdv_serial_fixup(void);
114#endif
115
116static void __init sdv_arch_setup(void)
117{
118 sdv_serial_fixup();
119}
120
121/*
122 * CE4100 specific x86_init function overrides and early setup
123 * calls.
124 */
125void __init x86_ce4100_early_setup(void)
126{
127 x86_init.oem.arch_setup = sdv_arch_setup;
128 x86_platform.i8042_detect = ce4100_i8042_detect;
129 x86_init.resources.probe_roms = x86_init_noop;
130 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
131 x86_init.mpparse.find_smp_config = sdv_find_smp_config;
132}
diff --git a/arch/x86/platform/iris/Makefile b/arch/x86/platform/iris/Makefile
new file mode 100644
index 000000000000..db921983a102
--- /dev/null
+++ b/arch/x86/platform/iris/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_X86_32_IRIS) += iris.o
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
new file mode 100644
index 000000000000..1ba7f5ed8c9b
--- /dev/null
+++ b/arch/x86/platform/iris/iris.c
@@ -0,0 +1,91 @@
1/*
2 * Eurobraille/Iris power off support.
3 *
4 * Eurobraille's Iris machine is a PC with no APM or ACPI support.
5 * It is shutdown by a special I/O sequence which this module provides.
6 *
7 * Copyright (C) Shérab <Sebastien.Hinderer@ens-lyon.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with the program ; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#include <linux/moduleparam.h>
25#include <linux/module.h>
26#include <linux/kernel.h>
27#include <linux/errno.h>
28#include <linux/delay.h>
29#include <linux/init.h>
30#include <linux/pm.h>
31#include <asm/io.h>
32
33#define IRIS_GIO_BASE 0x340
34#define IRIS_GIO_INPUT IRIS_GIO_BASE
35#define IRIS_GIO_OUTPUT (IRIS_GIO_BASE + 1)
36#define IRIS_GIO_PULSE 0x80 /* First byte to send */
37#define IRIS_GIO_REST 0x00 /* Second byte to send */
38#define IRIS_GIO_NODEV 0xff /* Likely not an Iris */
39
40MODULE_LICENSE("GPL");
41MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
42MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
43MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
44
45static int force;
46
47module_param(force, bool, 0);
48MODULE_PARM_DESC(force, "Set to one to force poweroff handler installation.");
49
50static void (*old_pm_power_off)(void);
51
52static void iris_power_off(void)
53{
54 outb(IRIS_GIO_PULSE, IRIS_GIO_OUTPUT);
55 msleep(850);
56 outb(IRIS_GIO_REST, IRIS_GIO_OUTPUT);
57}
58
59/*
60 * Before installing the power_off handler, try to make sure the OS is
61 * running on an Iris. Since Iris does not support DMI, this is done
62 * by reading its input port and seeing whether the read value is
63 * meaningful.
64 */
65static int iris_init(void)
66{
67 unsigned char status;
68 if (force != 1) {
69 printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
70 return -ENODEV;
71 }
72 status = inb(IRIS_GIO_INPUT);
73 if (status == IRIS_GIO_NODEV) {
74 printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
75 return -ENODEV;
76 }
77 old_pm_power_off = pm_power_off;
78 pm_power_off = &iris_power_off;
79 printk(KERN_INFO "Iris power_off handler installed.\n");
80
81 return 0;
82}
83
84static void iris_exit(void)
85{
86 pm_power_off = old_pm_power_off;
87 printk(KERN_INFO "Iris power_off handler uninstalled.\n");
88}
89
90module_init(iris_init);
91module_exit(iris_exit);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index efbbc552fa95..f61ccdd49341 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1 +1,3 @@
1obj-$(CONFIG_X86_MRST) += mrst.o 1obj-$(CONFIG_X86_MRST) += mrst.o
2obj-$(CONFIG_X86_MRST) += vrtc.o
3obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 65df603622b2..65df603622b2 100644
--- a/arch/x86/kernel/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 79ae68154e87..ea6529e93c6f 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -9,9 +9,19 @@
9 * as published by the Free Software Foundation; version 2 9 * as published by the Free Software Foundation; version 2
10 * of the License. 10 * of the License.
11 */ 11 */
12
13#define pr_fmt(fmt) "mrst: " fmt
14
12#include <linux/init.h> 15#include <linux/init.h>
13#include <linux/kernel.h> 16#include <linux/kernel.h>
14#include <linux/sfi.h> 17#include <linux/sfi.h>
18#include <linux/intel_pmic_gpio.h>
19#include <linux/spi/spi.h>
20#include <linux/i2c.h>
21#include <linux/i2c/pca953x.h>
22#include <linux/gpio_keys.h>
23#include <linux/input.h>
24#include <linux/platform_device.h>
15#include <linux/irq.h> 25#include <linux/irq.h>
16#include <linux/module.h> 26#include <linux/module.h>
17 27
@@ -23,7 +33,9 @@
23#include <asm/mrst.h> 33#include <asm/mrst.h>
24#include <asm/io.h> 34#include <asm/io.h>
25#include <asm/i8259.h> 35#include <asm/i8259.h>
36#include <asm/intel_scu_ipc.h>
26#include <asm/apb_timer.h> 37#include <asm/apb_timer.h>
38#include <asm/reboot.h>
27 39
28/* 40/*
29 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, 41 * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
@@ -59,32 +71,6 @@ struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
59EXPORT_SYMBOL_GPL(sfi_mrtc_array); 71EXPORT_SYMBOL_GPL(sfi_mrtc_array);
60int sfi_mrtc_num; 72int sfi_mrtc_num;
61 73
62static inline void assign_to_mp_irq(struct mpc_intsrc *m,
63 struct mpc_intsrc *mp_irq)
64{
65 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
66}
67
68static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq,
69 struct mpc_intsrc *m)
70{
71 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
72}
73
74static void save_mp_irq(struct mpc_intsrc *m)
75{
76 int i;
77
78 for (i = 0; i < mp_irq_entries; i++) {
79 if (!mp_irq_cmp(&mp_irqs[i], m))
80 return;
81 }
82
83 assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
84 if (++mp_irq_entries == MAX_IRQ_SOURCES)
85 panic("Max # of irq sources exceeded!!\n");
86}
87
88/* parse all the mtimer info to a static mtimer array */ 74/* parse all the mtimer info to a static mtimer array */
89static int __init sfi_parse_mtmr(struct sfi_table_header *table) 75static int __init sfi_parse_mtmr(struct sfi_table_header *table)
90{ 76{
@@ -102,10 +88,10 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
102 memcpy(sfi_mtimer_array, pentry, totallen); 88 memcpy(sfi_mtimer_array, pentry, totallen);
103 } 89 }
104 90
105 printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); 91 pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
106 pentry = sfi_mtimer_array; 92 pentry = sfi_mtimer_array;
107 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { 93 for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
108 printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," 94 pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz,"
109 " irq = %d\n", totallen, (u32)pentry->phys_addr, 95 " irq = %d\n", totallen, (u32)pentry->phys_addr,
110 pentry->freq_hz, pentry->irq); 96 pentry->freq_hz, pentry->irq);
111 if (!pentry->irq) 97 if (!pentry->irq)
@@ -118,7 +104,7 @@ static int __init sfi_parse_mtmr(struct sfi_table_header *table)
118 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 104 mp_irq.srcbusirq = pentry->irq; /* IRQ */
119 mp_irq.dstapic = MP_APIC_ALL; 105 mp_irq.dstapic = MP_APIC_ALL;
120 mp_irq.dstirq = pentry->irq; 106 mp_irq.dstirq = pentry->irq;
121 save_mp_irq(&mp_irq); 107 mp_save_irq(&mp_irq);
122 } 108 }
123 109
124 return 0; 110 return 0;
@@ -176,19 +162,19 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
176 memcpy(sfi_mrtc_array, pentry, totallen); 162 memcpy(sfi_mrtc_array, pentry, totallen);
177 } 163 }
178 164
179 printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); 165 pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
180 pentry = sfi_mrtc_array; 166 pentry = sfi_mrtc_array;
181 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { 167 for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
182 printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", 168 pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
183 totallen, (u32)pentry->phys_addr, pentry->irq); 169 totallen, (u32)pentry->phys_addr, pentry->irq);
184 mp_irq.type = MP_IOAPIC; 170 mp_irq.type = MP_IOAPIC;
185 mp_irq.irqtype = mp_INT; 171 mp_irq.irqtype = mp_INT;
186 mp_irq.irqflag = 0; 172 mp_irq.irqflag = 0xf; /* level trigger and active low */
187 mp_irq.srcbus = 0; 173 mp_irq.srcbus = 0;
188 mp_irq.srcbusirq = pentry->irq; /* IRQ */ 174 mp_irq.srcbusirq = pentry->irq; /* IRQ */
189 mp_irq.dstapic = MP_APIC_ALL; 175 mp_irq.dstapic = MP_APIC_ALL;
190 mp_irq.dstirq = pentry->irq; 176 mp_irq.dstirq = pentry->irq;
191 save_mp_irq(&mp_irq); 177 mp_save_irq(&mp_irq);
192 } 178 }
193 return 0; 179 return 0;
194} 180}
@@ -209,6 +195,7 @@ static unsigned long __init mrst_calibrate_tsc(void)
209 195
210void __init mrst_time_init(void) 196void __init mrst_time_init(void)
211{ 197{
198 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
212 switch (mrst_timer_options) { 199 switch (mrst_timer_options) {
213 case MRST_TIMER_APBT_ONLY: 200 case MRST_TIMER_APBT_ONLY:
214 break; 201 break;
@@ -224,16 +211,10 @@ void __init mrst_time_init(void)
224 return; 211 return;
225 } 212 }
226 /* we need at least one APB timer */ 213 /* we need at least one APB timer */
227 sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
228 pre_init_apic_IRQ0(); 214 pre_init_apic_IRQ0();
229 apbt_time_init(); 215 apbt_time_init();
230} 216}
231 217
232void __init mrst_rtc_init(void)
233{
234 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
235}
236
237void __cpuinit mrst_arch_setup(void) 218void __cpuinit mrst_arch_setup(void)
238{ 219{
239 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) 220 if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
@@ -256,6 +237,17 @@ static int mrst_i8042_detect(void)
256 return 0; 237 return 0;
257} 238}
258 239
240/* Reboot and power off are handled by the SCU on a MID device */
241static void mrst_power_off(void)
242{
243 intel_scu_ipc_simple_command(0xf1, 1);
244}
245
246static void mrst_reboot(void)
247{
248 intel_scu_ipc_simple_command(0xf1, 0);
249}
250
259/* 251/*
260 * Moorestown specific x86_init function overrides and early setup 252 * Moorestown specific x86_init function overrides and early setup
261 * calls. 253 * calls.
@@ -281,6 +273,10 @@ void __init x86_mrst_early_setup(void)
281 273
282 legacy_pic = &null_legacy_pic; 274 legacy_pic = &null_legacy_pic;
283 275
276 /* Moorestown specific power_off/restart method */
277 pm_power_off = mrst_power_off;
278 machine_ops.emergency_restart = mrst_reboot;
279
284 /* Avoid searching for BIOS MP tables */ 280 /* Avoid searching for BIOS MP tables */
285 x86_init.mpparse.find_smp_config = x86_init_noop; 281 x86_init.mpparse.find_smp_config = x86_init_noop;
286 x86_init.mpparse.get_smp_config = x86_init_uint_noop; 282 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
@@ -309,3 +305,505 @@ static inline int __init setup_x86_mrst_timer(char *arg)
309 return 0; 305 return 0;
310} 306}
311__setup("x86_mrst_timer=", setup_x86_mrst_timer); 307__setup("x86_mrst_timer=", setup_x86_mrst_timer);
308
309/*
310 * Parsing GPIO table first, since the DEVS table will need this table
311 * to map the pin name to the actual pin.
312 */
313static struct sfi_gpio_table_entry *gpio_table;
314static int gpio_num_entry;
315
316static int __init sfi_parse_gpio(struct sfi_table_header *table)
317{
318 struct sfi_table_simple *sb;
319 struct sfi_gpio_table_entry *pentry;
320 int num, i;
321
322 if (gpio_table)
323 return 0;
324 sb = (struct sfi_table_simple *)table;
325 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
326 pentry = (struct sfi_gpio_table_entry *)sb->pentry;
327
328 gpio_table = (struct sfi_gpio_table_entry *)
329 kmalloc(num * sizeof(*pentry), GFP_KERNEL);
330 if (!gpio_table)
331 return -1;
332 memcpy(gpio_table, pentry, num * sizeof(*pentry));
333 gpio_num_entry = num;
334
335 pr_debug("GPIO pin info:\n");
336 for (i = 0; i < num; i++, pentry++)
337 pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
338 " pin = %d\n", i,
339 pentry->controller_name,
340 pentry->pin_name,
341 pentry->pin_no);
342 return 0;
343}
344
345static int get_gpio_by_name(const char *name)
346{
347 struct sfi_gpio_table_entry *pentry = gpio_table;
348 int i;
349
350 if (!pentry)
351 return -1;
352 for (i = 0; i < gpio_num_entry; i++, pentry++) {
353 if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
354 return pentry->pin_no;
355 }
356 return -1;
357}
358
359/*
360 * Here defines the array of devices platform data that IAFW would export
361 * through SFI "DEVS" table, we use name and type to match the device and
362 * its platform data.
363 */
364struct devs_id {
365 char name[SFI_NAME_LEN + 1];
366 u8 type;
367 u8 delay;
368 void *(*get_platform_data)(void *info);
369};
370
371/* the offset for the mapping of global gpio pin to irq */
372#define MRST_IRQ_OFFSET 0x100
373
374static void __init *pmic_gpio_platform_data(void *info)
375{
376 static struct intel_pmic_gpio_platform_data pmic_gpio_pdata;
377 int gpio_base = get_gpio_by_name("pmic_gpio_base");
378
379 if (gpio_base == -1)
380 gpio_base = 64;
381 pmic_gpio_pdata.gpio_base = gpio_base;
382 pmic_gpio_pdata.irq_base = gpio_base + MRST_IRQ_OFFSET;
383 pmic_gpio_pdata.gpiointr = 0xffffeff8;
384
385 return &pmic_gpio_pdata;
386}
387
388static void __init *max3111_platform_data(void *info)
389{
390 struct spi_board_info *spi_info = info;
391 int intr = get_gpio_by_name("max3111_int");
392
393 if (intr == -1)
394 return NULL;
395 spi_info->irq = intr + MRST_IRQ_OFFSET;
396 return NULL;
397}
398
399/* we have multiple max7315 on the board ... */
400#define MAX7315_NUM 2
401static void __init *max7315_platform_data(void *info)
402{
403 static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
404 static int nr;
405 struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
406 struct i2c_board_info *i2c_info = info;
407 int gpio_base, intr;
408 char base_pin_name[SFI_NAME_LEN + 1];
409 char intr_pin_name[SFI_NAME_LEN + 1];
410
411 if (nr == MAX7315_NUM) {
412 pr_err("too many max7315s, we only support %d\n",
413 MAX7315_NUM);
414 return NULL;
415 }
416 /* we have several max7315 on the board, we only need load several
417 * instances of the same pca953x driver to cover them
418 */
419 strcpy(i2c_info->type, "max7315");
420 if (nr++) {
421 sprintf(base_pin_name, "max7315_%d_base", nr);
422 sprintf(intr_pin_name, "max7315_%d_int", nr);
423 } else {
424 strcpy(base_pin_name, "max7315_base");
425 strcpy(intr_pin_name, "max7315_int");
426 }
427
428 gpio_base = get_gpio_by_name(base_pin_name);
429 intr = get_gpio_by_name(intr_pin_name);
430
431 if (gpio_base == -1)
432 return NULL;
433 max7315->gpio_base = gpio_base;
434 if (intr != -1) {
435 i2c_info->irq = intr + MRST_IRQ_OFFSET;
436 max7315->irq_base = gpio_base + MRST_IRQ_OFFSET;
437 } else {
438 i2c_info->irq = -1;
439 max7315->irq_base = -1;
440 }
441 return max7315;
442}
443
444static void __init *emc1403_platform_data(void *info)
445{
446 static short intr2nd_pdata;
447 struct i2c_board_info *i2c_info = info;
448 int intr = get_gpio_by_name("thermal_int");
449 int intr2nd = get_gpio_by_name("thermal_alert");
450
451 if (intr == -1 || intr2nd == -1)
452 return NULL;
453
454 i2c_info->irq = intr + MRST_IRQ_OFFSET;
455 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
456
457 return &intr2nd_pdata;
458}
459
460static void __init *lis331dl_platform_data(void *info)
461{
462 static short intr2nd_pdata;
463 struct i2c_board_info *i2c_info = info;
464 int intr = get_gpio_by_name("accel_int");
465 int intr2nd = get_gpio_by_name("accel_2");
466
467 if (intr == -1 || intr2nd == -1)
468 return NULL;
469
470 i2c_info->irq = intr + MRST_IRQ_OFFSET;
471 intr2nd_pdata = intr2nd + MRST_IRQ_OFFSET;
472
473 return &intr2nd_pdata;
474}
475
476static void __init *no_platform_data(void *info)
477{
478 return NULL;
479}
480
481static const struct devs_id __initconst device_ids[] = {
482 {"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
483 {"spi_max3111", SFI_DEV_TYPE_SPI, 0, &max3111_platform_data},
484 {"i2c_max7315", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
485 {"i2c_max7315_2", SFI_DEV_TYPE_I2C, 1, &max7315_platform_data},
486 {"emc1403", SFI_DEV_TYPE_I2C, 1, &emc1403_platform_data},
487 {"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
488 {"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
489 {"msic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
490 {},
491};
492
493#define MAX_IPCDEVS 24
494static struct platform_device *ipc_devs[MAX_IPCDEVS];
495static int ipc_next_dev;
496
497#define MAX_SCU_SPI 24
498static struct spi_board_info *spi_devs[MAX_SCU_SPI];
499static int spi_next_dev;
500
501#define MAX_SCU_I2C 24
502static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
503static int i2c_bus[MAX_SCU_I2C];
504static int i2c_next_dev;
505
506static void __init intel_scu_device_register(struct platform_device *pdev)
507{
508 if(ipc_next_dev == MAX_IPCDEVS)
509 pr_err("too many SCU IPC devices");
510 else
511 ipc_devs[ipc_next_dev++] = pdev;
512}
513
514static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
515{
516 struct spi_board_info *new_dev;
517
518 if (spi_next_dev == MAX_SCU_SPI) {
519 pr_err("too many SCU SPI devices");
520 return;
521 }
522
523 new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
524 if (!new_dev) {
525 pr_err("failed to alloc mem for delayed spi dev %s\n",
526 sdev->modalias);
527 return;
528 }
529 memcpy(new_dev, sdev, sizeof(*sdev));
530
531 spi_devs[spi_next_dev++] = new_dev;
532}
533
534static void __init intel_scu_i2c_device_register(int bus,
535 struct i2c_board_info *idev)
536{
537 struct i2c_board_info *new_dev;
538
539 if (i2c_next_dev == MAX_SCU_I2C) {
540 pr_err("too many SCU I2C devices");
541 return;
542 }
543
544 new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
545 if (!new_dev) {
546 pr_err("failed to alloc mem for delayed i2c dev %s\n",
547 idev->type);
548 return;
549 }
550 memcpy(new_dev, idev, sizeof(*idev));
551
552 i2c_bus[i2c_next_dev] = bus;
553 i2c_devs[i2c_next_dev++] = new_dev;
554}
555
556/* Called by IPC driver */
557void intel_scu_devices_create(void)
558{
559 int i;
560
561 for (i = 0; i < ipc_next_dev; i++)
562 platform_device_add(ipc_devs[i]);
563
564 for (i = 0; i < spi_next_dev; i++)
565 spi_register_board_info(spi_devs[i], 1);
566
567 for (i = 0; i < i2c_next_dev; i++) {
568 struct i2c_adapter *adapter;
569 struct i2c_client *client;
570
571 adapter = i2c_get_adapter(i2c_bus[i]);
572 if (adapter) {
573 client = i2c_new_device(adapter, i2c_devs[i]);
574 if (!client)
575 pr_err("can't create i2c device %s\n",
576 i2c_devs[i]->type);
577 } else
578 i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
579 }
580}
581EXPORT_SYMBOL_GPL(intel_scu_devices_create);
582
583/* Called by IPC driver */
584void intel_scu_devices_destroy(void)
585{
586 int i;
587
588 for (i = 0; i < ipc_next_dev; i++)
589 platform_device_del(ipc_devs[i]);
590}
591EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
592
593static void __init install_irq_resource(struct platform_device *pdev, int irq)
594{
595 /* Single threaded */
596 static struct resource __initdata res = {
597 .name = "IRQ",
598 .flags = IORESOURCE_IRQ,
599 };
600 res.start = irq;
601 platform_device_add_resources(pdev, &res, 1);
602}
603
604static void __init sfi_handle_ipc_dev(struct platform_device *pdev)
605{
606 const struct devs_id *dev = device_ids;
607 void *pdata = NULL;
608
609 while (dev->name[0]) {
610 if (dev->type == SFI_DEV_TYPE_IPC &&
611 !strncmp(dev->name, pdev->name, SFI_NAME_LEN)) {
612 pdata = dev->get_platform_data(pdev);
613 break;
614 }
615 dev++;
616 }
617 pdev->dev.platform_data = pdata;
618 intel_scu_device_register(pdev);
619}
620
621static void __init sfi_handle_spi_dev(struct spi_board_info *spi_info)
622{
623 const struct devs_id *dev = device_ids;
624 void *pdata = NULL;
625
626 while (dev->name[0]) {
627 if (dev->type == SFI_DEV_TYPE_SPI &&
628 !strncmp(dev->name, spi_info->modalias, SFI_NAME_LEN)) {
629 pdata = dev->get_platform_data(spi_info);
630 break;
631 }
632 dev++;
633 }
634 spi_info->platform_data = pdata;
635 if (dev->delay)
636 intel_scu_spi_device_register(spi_info);
637 else
638 spi_register_board_info(spi_info, 1);
639}
640
641static void __init sfi_handle_i2c_dev(int bus, struct i2c_board_info *i2c_info)
642{
643 const struct devs_id *dev = device_ids;
644 void *pdata = NULL;
645
646 while (dev->name[0]) {
647 if (dev->type == SFI_DEV_TYPE_I2C &&
648 !strncmp(dev->name, i2c_info->type, SFI_NAME_LEN)) {
649 pdata = dev->get_platform_data(i2c_info);
650 break;
651 }
652 dev++;
653 }
654 i2c_info->platform_data = pdata;
655
656 if (dev->delay)
657 intel_scu_i2c_device_register(bus, i2c_info);
658 else
659 i2c_register_board_info(bus, i2c_info, 1);
660 }
661
662
663static int __init sfi_parse_devs(struct sfi_table_header *table)
664{
665 struct sfi_table_simple *sb;
666 struct sfi_device_table_entry *pentry;
667 struct spi_board_info spi_info;
668 struct i2c_board_info i2c_info;
669 struct platform_device *pdev;
670 int num, i, bus;
671 int ioapic;
672 struct io_apic_irq_attr irq_attr;
673
674 sb = (struct sfi_table_simple *)table;
675 num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
676 pentry = (struct sfi_device_table_entry *)sb->pentry;
677
678 for (i = 0; i < num; i++, pentry++) {
679 if (pentry->irq != (u8)0xff) { /* native RTE case */
680 /* these SPI2 devices are not exposed to system as PCI
681 * devices, but they have separate RTE entry in IOAPIC
682 * so we have to enable them one by one here
683 */
684 ioapic = mp_find_ioapic(pentry->irq);
685 irq_attr.ioapic = ioapic;
686 irq_attr.ioapic_pin = pentry->irq;
687 irq_attr.trigger = 1;
688 irq_attr.polarity = 1;
689 io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
690 }
691 switch (pentry->type) {
692 case SFI_DEV_TYPE_IPC:
693 /* ID as IRQ is a hack that will go away */
694 pdev = platform_device_alloc(pentry->name, pentry->irq);
695 if (pdev == NULL) {
696 pr_err("out of memory for SFI platform device '%s'.\n",
697 pentry->name);
698 continue;
699 }
700 install_irq_resource(pdev, pentry->irq);
701 pr_debug("info[%2d]: IPC bus, name = %16.16s, "
702 "irq = 0x%2x\n", i, pentry->name, pentry->irq);
703 sfi_handle_ipc_dev(pdev);
704 break;
705 case SFI_DEV_TYPE_SPI:
706 memset(&spi_info, 0, sizeof(spi_info));
707 strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
708 spi_info.irq = pentry->irq;
709 spi_info.bus_num = pentry->host_num;
710 spi_info.chip_select = pentry->addr;
711 spi_info.max_speed_hz = pentry->max_freq;
712 pr_debug("info[%2d]: SPI bus = %d, name = %16.16s, "
713 "irq = 0x%2x, max_freq = %d, cs = %d\n", i,
714 spi_info.bus_num,
715 spi_info.modalias,
716 spi_info.irq,
717 spi_info.max_speed_hz,
718 spi_info.chip_select);
719 sfi_handle_spi_dev(&spi_info);
720 break;
721 case SFI_DEV_TYPE_I2C:
722 memset(&i2c_info, 0, sizeof(i2c_info));
723 bus = pentry->host_num;
724 strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
725 i2c_info.irq = pentry->irq;
726 i2c_info.addr = pentry->addr;
727 pr_debug("info[%2d]: I2C bus = %d, name = %16.16s, "
728 "irq = 0x%2x, addr = 0x%x\n", i, bus,
729 i2c_info.type,
730 i2c_info.irq,
731 i2c_info.addr);
732 sfi_handle_i2c_dev(bus, &i2c_info);
733 break;
734 case SFI_DEV_TYPE_UART:
735 case SFI_DEV_TYPE_HSI:
736 default:
737 ;
738 }
739 }
740 return 0;
741}
742
743static int __init mrst_platform_init(void)
744{
745 sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
746 sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
747 return 0;
748}
749arch_initcall(mrst_platform_init);
750
751/*
752 * we will search these buttons in SFI GPIO table (by name)
753 * and register them dynamically. Please add all possible
754 * buttons here, we will shrink them if no GPIO found.
755 */
756static struct gpio_keys_button gpio_button[] = {
757 {KEY_POWER, -1, 1, "power_btn", EV_KEY, 0, 3000},
758 {KEY_PROG1, -1, 1, "prog_btn1", EV_KEY, 0, 20},
759 {KEY_PROG2, -1, 1, "prog_btn2", EV_KEY, 0, 20},
760 {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20},
761 {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20},
762 {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20},
763 {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20},
764 {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20},
765 {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20},
766 {SW_KEYPAD_SLIDE, -1, 1, "MagSw2", EV_SW, 0, 20},
767};
768
769static struct gpio_keys_platform_data mrst_gpio_keys = {
770 .buttons = gpio_button,
771 .rep = 1,
772 .nbuttons = -1, /* will fill it after search */
773};
774
775static struct platform_device pb_device = {
776 .name = "gpio-keys",
777 .id = -1,
778 .dev = {
779 .platform_data = &mrst_gpio_keys,
780 },
781};
782
783/*
784 * Shrink the non-existent buttons, register the gpio button
785 * device if there is some
786 */
787static int __init pb_keys_init(void)
788{
789 struct gpio_keys_button *gb = gpio_button;
790 int i, num, good = 0;
791
792 num = sizeof(gpio_button) / sizeof(struct gpio_keys_button);
793 for (i = 0; i < num; i++) {
794 gb[i].gpio = get_gpio_by_name(gb[i].desc);
795 if (gb[i].gpio == -1)
796 continue;
797
798 if (i != good)
799 gb[good] = gb[i];
800 good++;
801 }
802
803 if (good) {
804 mrst_gpio_keys.nbuttons = good;
805 return platform_device_register(&pb_device);
806 }
807 return 0;
808}
809late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
new file mode 100644
index 000000000000..32cd7edd71a0
--- /dev/null
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -0,0 +1,165 @@
1/*
2 * vrtc.c: Driver for virtual RTC device on Intel MID platform
3 *
4 * (C) Copyright 2009 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; version 2
9 * of the License.
10 *
11 * Note:
12 * VRTC is emulated by system controller firmware, the real HW
13 * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
14 * in a memory mapped IO space that is visible to the host IA
15 * processor.
16 *
17 * This driver is based on RTC CMOS driver.
18 */
19
20#include <linux/kernel.h>
21#include <linux/init.h>
22#include <linux/sfi.h>
23#include <linux/platform_device.h>
24
25#include <asm/mrst.h>
26#include <asm/mrst-vrtc.h>
27#include <asm/time.h>
28#include <asm/fixmap.h>
29
30static unsigned char __iomem *vrtc_virt_base;
31
32unsigned char vrtc_cmos_read(unsigned char reg)
33{
34 unsigned char retval;
35
36 /* vRTC's registers range from 0x0 to 0xD */
37 if (reg > 0xd || !vrtc_virt_base)
38 return 0xff;
39
40 lock_cmos_prefix(reg);
41 retval = __raw_readb(vrtc_virt_base + (reg << 2));
42 lock_cmos_suffix(reg);
43 return retval;
44}
45EXPORT_SYMBOL_GPL(vrtc_cmos_read);
46
47void vrtc_cmos_write(unsigned char val, unsigned char reg)
48{
49 if (reg > 0xd || !vrtc_virt_base)
50 return;
51
52 lock_cmos_prefix(reg);
53 __raw_writeb(val, vrtc_virt_base + (reg << 2));
54 lock_cmos_suffix(reg);
55}
56EXPORT_SYMBOL_GPL(vrtc_cmos_write);
57
58unsigned long vrtc_get_time(void)
59{
60 u8 sec, min, hour, mday, mon;
61 u32 year;
62
63 while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
64 cpu_relax();
65
66 sec = vrtc_cmos_read(RTC_SECONDS);
67 min = vrtc_cmos_read(RTC_MINUTES);
68 hour = vrtc_cmos_read(RTC_HOURS);
69 mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
70 mon = vrtc_cmos_read(RTC_MONTH);
71 year = vrtc_cmos_read(RTC_YEAR);
72
73 /* vRTC YEAR reg contains the offset to 1960 */
74 year += 1960;
75
76 printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
77 "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
78
79 return mktime(year, mon, mday, hour, min, sec);
80}
81
82/* Only care about the minutes and seconds */
83int vrtc_set_mmss(unsigned long nowtime)
84{
85 int real_sec, real_min;
86 int vrtc_min;
87
88 vrtc_min = vrtc_cmos_read(RTC_MINUTES);
89
90 real_sec = nowtime % 60;
91 real_min = nowtime / 60;
92 if (((abs(real_min - vrtc_min) + 15)/30) & 1)
93 real_min += 30;
94 real_min %= 60;
95
96 vrtc_cmos_write(real_sec, RTC_SECONDS);
97 vrtc_cmos_write(real_min, RTC_MINUTES);
98 return 0;
99}
100
101void __init mrst_rtc_init(void)
102{
103 unsigned long rtc_paddr;
104 void __iomem *virt_base;
105
106 sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
107 if (!sfi_mrtc_num)
108 return;
109
110 rtc_paddr = sfi_mrtc_array[0].phys_addr;
111
112 /* vRTC's register address may not be page aligned */
113 set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
114
115 virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
116 virt_base += rtc_paddr & ~PAGE_MASK;
117 vrtc_virt_base = virt_base;
118
119 x86_platform.get_wallclock = vrtc_get_time;
120 x86_platform.set_wallclock = vrtc_set_mmss;
121}
122
123/*
124 * The Moorestown platform has a memory mapped virtual RTC device that emulates
125 * the programming interface of the RTC.
126 */
127
128static struct resource vrtc_resources[] = {
129 [0] = {
130 .flags = IORESOURCE_MEM,
131 },
132 [1] = {
133 .flags = IORESOURCE_IRQ,
134 }
135};
136
137static struct platform_device vrtc_device = {
138 .name = "rtc_mrst",
139 .id = -1,
140 .resource = vrtc_resources,
141 .num_resources = ARRAY_SIZE(vrtc_resources),
142};
143
144/* Register the RTC device if appropriate */
145static int __init mrst_device_create(void)
146{
147 /* No Moorestown, no device */
148 if (!mrst_identify_cpu())
149 return -ENODEV;
150 /* No timer, no device */
151 if (!sfi_mrtc_num)
152 return -ENODEV;
153
154 /* iomem resource */
155 vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
156 vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
157 MRST_VRTC_MAP_SZ;
158 /* irq resource */
159 vrtc_resources[1].start = sfi_mrtc_array[0].irq;
160 vrtc_resources[1].end = sfi_mrtc_array[0].irq;
161
162 return platform_device_register(&vrtc_device);
163}
164
165module_init(mrst_device_create);
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index dd4c281ffe57..7785b72ecc3a 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -34,23 +34,12 @@
34#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; 35static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
36 36
37static void __init mp_sfi_register_lapic_address(unsigned long address)
38{
39 mp_lapic_addr = address;
40
41 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
42 if (boot_cpu_physical_apicid == -1U)
43 boot_cpu_physical_apicid = read_apic_id();
44
45 pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
46}
47
48/* All CPUs enumerated by SFI must be present and enabled */ 37/* All CPUs enumerated by SFI must be present and enabled */
49static void __cpuinit mp_sfi_register_lapic(u8 id) 38static void __cpuinit mp_sfi_register_lapic(u8 id)
50{ 39{
51 if (MAX_APICS - id <= 0) { 40 if (MAX_LOCAL_APIC - id <= 0) {
52 pr_warning("Processor #%d invalid (max %d)\n", 41 pr_warning("Processor #%d invalid (max %d)\n",
53 id, MAX_APICS); 42 id, MAX_LOCAL_APIC);
54 return; 43 return;
55 } 44 }
56 45
@@ -110,7 +99,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table)
110int __init sfi_platform_init(void) 99int __init sfi_platform_init(void)
111{ 100{
112#ifdef CONFIG_X86_LOCAL_APIC 101#ifdef CONFIG_X86_LOCAL_APIC
113 mp_sfi_register_lapic_address(sfi_lapic_addr); 102 register_lapic_address(sfi_lapic_addr);
114 sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus); 103 sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
115#endif 104#endif
116#ifdef CONFIG_X86_IO_APIC 105#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ba9caa808a9c..df58e9cad96a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1341,7 +1341,7 @@ uv_activation_descriptor_init(int node, int pnode)
1341 1341
1342 /* 1342 /*
1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) 1343 * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
1344 * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub 1344 * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE)
1345 */ 1345 */
1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE 1346 bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE
1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); 1347 * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
@@ -1490,7 +1490,7 @@ calculate_destination_timeout(void)
1490/* 1490/*
1491 * initialize the bau_control structure for each cpu 1491 * initialize the bau_control structure for each cpu
1492 */ 1492 */
1493static void __init uv_init_per_cpu(int nuvhubs) 1493static int __init uv_init_per_cpu(int nuvhubs)
1494{ 1494{
1495 int i; 1495 int i;
1496 int cpu; 1496 int cpu;
@@ -1507,7 +1507,7 @@ static void __init uv_init_per_cpu(int nuvhubs)
1507 struct bau_control *smaster = NULL; 1507 struct bau_control *smaster = NULL;
1508 struct socket_desc { 1508 struct socket_desc {
1509 short num_cpus; 1509 short num_cpus;
1510 short cpu_number[16]; 1510 short cpu_number[MAX_CPUS_PER_SOCKET];
1511 }; 1511 };
1512 struct uvhub_desc { 1512 struct uvhub_desc {
1513 unsigned short socket_mask; 1513 unsigned short socket_mask;
@@ -1540,6 +1540,10 @@ static void __init uv_init_per_cpu(int nuvhubs)
1540 sdp = &bdp->socket[socket]; 1540 sdp = &bdp->socket[socket];
1541 sdp->cpu_number[sdp->num_cpus] = cpu; 1541 sdp->cpu_number[sdp->num_cpus] = cpu;
1542 sdp->num_cpus++; 1542 sdp->num_cpus++;
1543 if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
1544 printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus);
1545 return 1;
1546 }
1543 } 1547 }
1544 for (uvhub = 0; uvhub < nuvhubs; uvhub++) { 1548 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1545 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) 1549 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
@@ -1570,6 +1574,12 @@ static void __init uv_init_per_cpu(int nuvhubs)
1570 bcp->uvhub_master = hmaster; 1574 bcp->uvhub_master = hmaster;
1571 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> 1575 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->
1572 blade_processor_id; 1576 blade_processor_id;
1577 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1578 printk(KERN_EMERG
1579 "%d cpus per uvhub invalid\n",
1580 bcp->uvhub_cpu);
1581 return 1;
1582 }
1573 } 1583 }
1574nextsocket: 1584nextsocket:
1575 socket++; 1585 socket++;
@@ -1595,6 +1605,7 @@ nextsocket:
1595 bcp->congested_reps = congested_reps; 1605 bcp->congested_reps = congested_reps;
1596 bcp->congested_period = congested_period; 1606 bcp->congested_period = congested_period;
1597 } 1607 }
1608 return 0;
1598} 1609}
1599 1610
1600/* 1611/*
@@ -1625,7 +1636,10 @@ static int __init uv_bau_init(void)
1625 spin_lock_init(&disable_lock); 1636 spin_lock_init(&disable_lock);
1626 congested_cycles = microsec_2_cycles(congested_response_us); 1637 congested_cycles = microsec_2_cycles(congested_response_us);
1627 1638
1628 uv_init_per_cpu(nuvhubs); 1639 if (uv_init_per_cpu(nuvhubs)) {
1640 nobau = 1;
1641 return 0;
1642 }
1629 1643
1630 uv_partition_base_pnode = 0x7fffffff; 1644 uv_partition_base_pnode = 0x7fffffff;
1631 for (uvhub = 0; uvhub < nuvhubs; uvhub++) 1645 for (uvhub = 0; uvhub < nuvhubs; uvhub++)
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 3371bd053b89..632037671746 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -171,7 +171,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
171 ver = m->apicver; 171 ver = m->apicver;
172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { 172 if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) {
173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", 173 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
174 m->apicid, MAX_APICS); 174 m->apicid, MAX_LOCAL_APIC);
175 return; 175 return;
176 } 176 }
177 177
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 44dcad43989d..7e8d3bc80af6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -574,8 +574,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
574 574
575 preempt_disable(); 575 preempt_disable();
576 576
577 start = __get_cpu_var(idt_desc).address; 577 start = __this_cpu_read(idt_desc.address);
578 end = start + __get_cpu_var(idt_desc).size + 1; 578 end = start + __this_cpu_read(idt_desc.size) + 1;
579 579
580 xen_mc_flush(); 580 xen_mc_flush();
581 581
@@ -1174,6 +1174,15 @@ asmlinkage void __init xen_start_kernel(void)
1174 1174
1175 xen_smp_init(); 1175 xen_smp_init();
1176 1176
1177#ifdef CONFIG_ACPI_NUMA
1178 /*
1179 * The pages we from Xen are not related to machine pages, so
1180 * any NUMA information the kernel tries to get from ACPI will
1181 * be meaningless. Prevent it from trying.
1182 */
1183 acpi_numa = -1;
1184#endif
1185
1177 pgd = (pgd_t *)xen_start_info->pt_base; 1186 pgd = (pgd_t *)xen_start_info->pt_base;
1178 1187
1179 if (!xen_initial_domain()) 1188 if (!xen_initial_domain())
@@ -1256,25 +1265,6 @@ asmlinkage void __init xen_start_kernel(void)
1256#endif 1265#endif
1257} 1266}
1258 1267
1259static uint32_t xen_cpuid_base(void)
1260{
1261 uint32_t base, eax, ebx, ecx, edx;
1262 char signature[13];
1263
1264 for (base = 0x40000000; base < 0x40010000; base += 0x100) {
1265 cpuid(base, &eax, &ebx, &ecx, &edx);
1266 *(uint32_t *)(signature + 0) = ebx;
1267 *(uint32_t *)(signature + 4) = ecx;
1268 *(uint32_t *)(signature + 8) = edx;
1269 signature[12] = 0;
1270
1271 if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
1272 return base;
1273 }
1274
1275 return 0;
1276}
1277
1278static int init_hvm_pv_info(int *major, int *minor) 1268static int init_hvm_pv_info(int *major, int *minor)
1279{ 1269{
1280 uint32_t eax, ebx, ecx, edx, pages, msr, base; 1270 uint32_t eax, ebx, ecx, edx, pages, msr, base;
@@ -1384,6 +1374,18 @@ static bool __init xen_hvm_platform(void)
1384 return true; 1374 return true;
1385} 1375}
1386 1376
1377bool xen_hvm_need_lapic(void)
1378{
1379 if (xen_pv_domain())
1380 return false;
1381 if (!xen_hvm_domain())
1382 return false;
1383 if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
1384 return false;
1385 return true;
1386}
1387EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
1388
1387const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = { 1389const __refconst struct hypervisor_x86 x86_hyper_xen_hvm = {
1388 .name = "Xen HVM", 1390 .name = "Xen HVM",
1389 .detect = xen_hvm_platform, 1391 .detect = xen_hvm_platform,
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 9e565da5d1f7..4ec8035e3216 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -22,7 +22,7 @@ static inline void xen_mc_batch(void)
22 unsigned long flags; 22 unsigned long flags;
23 /* need to disable interrupts until this entry is complete */ 23 /* need to disable interrupts until this entry is complete */
24 local_irq_save(flags); 24 local_irq_save(flags);
25 __get_cpu_var(xen_mc_irq_flags) = flags; 25 __this_cpu_write(xen_mc_irq_flags, flags);
26} 26}
27 27
28static inline struct multicall_space xen_mc_entry(size_t args) 28static inline struct multicall_space xen_mc_entry(size_t args)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23e061b9327b..cc9b1e182fcf 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -159,8 +159,8 @@ static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
159{ 159{
160 struct xen_spinlock *prev; 160 struct xen_spinlock *prev;
161 161
162 prev = __get_cpu_var(lock_spinners); 162 prev = __this_cpu_read(lock_spinners);
163 __get_cpu_var(lock_spinners) = xl; 163 __this_cpu_write(lock_spinners, xl);
164 164
165 wmb(); /* set lock of interest before count */ 165 wmb(); /* set lock of interest before count */
166 166
@@ -179,14 +179,14 @@ static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock
179 asm(LOCK_PREFIX " decw %0" 179 asm(LOCK_PREFIX " decw %0"
180 : "+m" (xl->spinners) : : "memory"); 180 : "+m" (xl->spinners) : : "memory");
181 wmb(); /* decrement count before restoring lock */ 181 wmb(); /* decrement count before restoring lock */
182 __get_cpu_var(lock_spinners) = prev; 182 __this_cpu_write(lock_spinners, prev);
183} 183}
184 184
185static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) 185static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable)
186{ 186{
187 struct xen_spinlock *xl = (struct xen_spinlock *)lock; 187 struct xen_spinlock *xl = (struct xen_spinlock *)lock;
188 struct xen_spinlock *prev; 188 struct xen_spinlock *prev;
189 int irq = __get_cpu_var(lock_kicker_irq); 189 int irq = __this_cpu_read(lock_kicker_irq);
190 int ret; 190 int ret;
191 u64 start; 191 u64 start;
192 192
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 5da5e53fb94c..067759e3d6a5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -135,24 +135,24 @@ static void do_stolen_accounting(void)
135 135
136 /* Add the appropriate number of ticks of stolen time, 136 /* Add the appropriate number of ticks of stolen time,
137 including any left-overs from last time. */ 137 including any left-overs from last time. */
138 stolen = runnable + offline + __get_cpu_var(xen_residual_stolen); 138 stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
139 139
140 if (stolen < 0) 140 if (stolen < 0)
141 stolen = 0; 141 stolen = 0;
142 142
143 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 143 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
144 __get_cpu_var(xen_residual_stolen) = stolen; 144 __this_cpu_write(xen_residual_stolen, stolen);
145 account_steal_ticks(ticks); 145 account_steal_ticks(ticks);
146 146
147 /* Add the appropriate number of ticks of blocked time, 147 /* Add the appropriate number of ticks of blocked time,
148 including any left-overs from last time. */ 148 including any left-overs from last time. */
149 blocked += __get_cpu_var(xen_residual_blocked); 149 blocked += __this_cpu_read(xen_residual_blocked);
150 150
151 if (blocked < 0) 151 if (blocked < 0)
152 blocked = 0; 152 blocked = 0;
153 153
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(xen_residual_blocked) = blocked; 155 __this_cpu_write(xen_residual_blocked, blocked);
156 account_idle_ticks(ticks); 156 account_idle_ticks(ticks);
157} 157}
158 158