aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig65
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/ia32/ia32_aout.c22
-rw-r--r--arch/x86/include/asm/alternative.h11
-rw-r--r--arch/x86/include/asm/amd_iommu.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h23
-rw-r--r--arch/x86/include/asm/amd_nb.h (renamed from arch/x86/include/asm/k8.h)21
-rw-r--r--arch/x86/include/asm/apb_timer.h1
-rw-r--r--arch/x86/include/asm/cpu.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h13
-rw-r--r--arch/x86/include/asm/dwarf2.h20
-rw-r--r--arch/x86/include/asm/entry_arch.h4
-rw-r--r--arch/x86/include/asm/gart.h15
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h2
-rw-r--r--arch/x86/include/asm/i387.h185
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/jump_label.h37
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/mwait.h15
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/perf_event_p4.h52
-rw-r--r--arch/x86/include/asm/pgtable.h4
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/processor.h29
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/kernel/Makefile6
-rw-r--r--arch/x86/kernel/acpi/cstate.c11
-rw-r--r--arch/x86/kernel/alternative.c71
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c124
-rw-r--r--arch/x86/kernel/amd_nb.c (renamed from arch/x86/kernel/k8.c)56
-rw-r--r--arch/x86/kernel/apb_timer.c6
-rw-r--r--arch/x86/kernel/aperture_64.c20
-rw-r--r--arch/x86/kernel/apic/io_apic.c12
-rw-r--r--arch/x86/kernel/cpu/amd.c77
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c5
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c280
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c292
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c9
-rw-r--r--arch/x86/kernel/cpu/scattered.c6
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/entry_32.S310
-rw-r--r--arch/x86/kernel/entry_64.S114
-rw-r--r--arch/x86/kernel/ftrace.c63
-rw-r--r--arch/x86/kernel/i387.c58
-rw-r--r--arch/x86/kernel/irq.c8
-rw-r--r--arch/x86/kernel/irq_work.c30
-rw-r--r--arch/x86/kernel/irqinit.c6
-rw-r--r--arch/x86/kernel/jump_label.c50
-rw-r--r--arch/x86/kernel/kprobes.c14
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c31
-rw-r--r--arch/x86/kernel/pmtimer_64.c69
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c115
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c108
-rw-r--r--arch/x86/kernel/sys_i386_32.c4
-rw-r--r--arch/x86/kernel/traps.c35
-rw-r--r--arch/x86/kernel/tsc.c66
-rw-r--r--arch/x86/kvm/lapic.c3
-rw-r--r--arch/x86/kvm/svm.c17
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lib/memcpy_32.c199
-rw-r--r--arch/x86/lib/memcpy_64.S158
-rw-r--r--arch/x86/lib/memmove_64.c189
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/mm/k8topology_64.c8
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/opcode.c2
-rw-r--r--arch/x86/mm/numa_64.c2
-rw-r--r--arch/x86/mm/pgtable.c20
-rw-r--r--arch/x86/mm/srat_64.c8
-rw-r--r--arch/x86/mm/tlb.c48
-rw-r--r--arch/x86/oprofile/backtrace.c70
-rw-r--r--arch/x86/oprofile/nmi_int.c9
94 files changed, 2180 insertions, 1411 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cea0cd9a316f..8e9c4d4772fb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -25,6 +25,7 @@ config X86
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_EVENTS if (!M386 && !M486) 27 select HAVE_PERF_EVENTS if (!M386 && !M486)
28 select HAVE_IRQ_WORK
28 select HAVE_IOREMAP_PROT 29 select HAVE_IOREMAP_PROT
29 select HAVE_KPROBES 30 select HAVE_KPROBES
30 select ARCH_WANT_OPTIONAL_GPIOLIB 31 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -33,6 +34,7 @@ config X86
33 select HAVE_KRETPROBES 34 select HAVE_KRETPROBES
34 select HAVE_OPTPROBES 35 select HAVE_OPTPROBES
35 select HAVE_FTRACE_MCOUNT_RECORD 36 select HAVE_FTRACE_MCOUNT_RECORD
37 select HAVE_C_RECORDMCOUNT
36 select HAVE_DYNAMIC_FTRACE 38 select HAVE_DYNAMIC_FTRACE
37 select HAVE_FUNCTION_TRACER 39 select HAVE_FUNCTION_TRACER
38 select HAVE_FUNCTION_GRAPH_TRACER 40 select HAVE_FUNCTION_GRAPH_TRACER
@@ -59,6 +61,8 @@ config X86
59 select ANON_INODES 61 select ANON_INODES
60 select HAVE_ARCH_KMEMCHECK 62 select HAVE_ARCH_KMEMCHECK
61 select HAVE_USER_RETURN_NOTIFIER 63 select HAVE_USER_RETURN_NOTIFIER
64 select HAVE_ARCH_JUMP_LABEL
65 select HAVE_TEXT_POKE_SMP
62 66
63config INSTRUCTION_DECODER 67config INSTRUCTION_DECODER
64 def_bool (KPROBES || PERF_EVENTS) 68 def_bool (KPROBES || PERF_EVENTS)
@@ -670,7 +674,7 @@ config GART_IOMMU
670 bool "GART IOMMU support" if EMBEDDED 674 bool "GART IOMMU support" if EMBEDDED
671 default y 675 default y
672 select SWIOTLB 676 select SWIOTLB
673 depends on X86_64 && PCI && K8_NB 677 depends on X86_64 && PCI && AMD_NB
674 ---help--- 678 ---help---
675 Support for full DMA access of devices with 32bit memory access only 679 Support for full DMA access of devices with 32bit memory access only
676 on systems with more than 3GB. This is usually needed for USB, 680 on systems with more than 3GB. This is usually needed for USB,
@@ -795,6 +799,17 @@ config SCHED_MC
795 making when dealing with multi-core CPU chips at a cost of slightly 799 making when dealing with multi-core CPU chips at a cost of slightly
796 increased overhead in some places. If unsure say N here. 800 increased overhead in some places. If unsure say N here.
797 801
802config IRQ_TIME_ACCOUNTING
803 bool "Fine granularity task level IRQ time accounting"
804 default n
805 ---help---
806 Select this option to enable fine granularity task irq time
807 accounting. This is done by reading a timestamp on each
808 transitions between softirq and hardirq state, so there can be a
809 small performance impact.
810
811 If in doubt, say N here.
812
798source "kernel/Kconfig.preempt" 813source "kernel/Kconfig.preempt"
799 814
800config X86_UP_APIC 815config X86_UP_APIC
@@ -1148,6 +1163,9 @@ config X86_PAE
1148config ARCH_PHYS_ADDR_T_64BIT 1163config ARCH_PHYS_ADDR_T_64BIT
1149 def_bool X86_64 || X86_PAE 1164 def_bool X86_64 || X86_PAE
1150 1165
1166config ARCH_DMA_ADDR_T_64BIT
1167 def_bool X86_64 || HIGHMEM64G
1168
1151config DIRECT_GBPAGES 1169config DIRECT_GBPAGES
1152 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED 1170 bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
1153 default y 1171 default y
@@ -1326,25 +1344,34 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
1326 Set whether the default state of memory_corruption_check is 1344 Set whether the default state of memory_corruption_check is
1327 on or off. 1345 on or off.
1328 1346
1329config X86_RESERVE_LOW_64K 1347config X86_RESERVE_LOW
1330 bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen" 1348 int "Amount of low memory, in kilobytes, to reserve for the BIOS"
1331 default y 1349 default 64
1350 range 4 640
1332 ---help--- 1351 ---help---
1333 Reserve the first 64K of physical RAM on BIOSes that are known 1352 Specify the amount of low memory to reserve for the BIOS.
1334 to potentially corrupt that memory range. A numbers of BIOSes are
1335 known to utilize this area during suspend/resume, so it must not
1336 be used by the kernel.
1337 1353
1338 Set this to N if you are absolutely sure that you trust the BIOS 1354 The first page contains BIOS data structures that the kernel
1339 to get all its memory reservations and usages right. 1355 must not use, so that page must always be reserved.
1340 1356
1341 If you have doubts about the BIOS (e.g. suspend/resume does not 1357 By default we reserve the first 64K of physical RAM, as a
1342 work or there's kernel crashes after certain hardware hotplug 1358 number of BIOSes are known to corrupt that memory range
1343 events) and it's not AMI or Phoenix, then you might want to enable 1359 during events such as suspend/resume or monitor cable
1344 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical 1360 insertion, so it must not be used by the kernel.
1345 corruption patterns.
1346 1361
1347 Say Y if unsure. 1362 You can set this to 4 if you are absolutely sure that you
1363 trust the BIOS to get all its memory reservations and usages
1364 right. If you know your BIOS have problems beyond the
1365 default 64K area, you can set this to 640 to avoid using the
1366 entire low memory range.
1367
1368 If you have doubts about the BIOS (e.g. suspend/resume does
1369 not work or there's kernel crashes after certain hardware
1370 hotplug events) then you might want to enable
1371 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
1372 typical corruption patterns.
1373
1374 Leave this to the default value of 64 if you are unsure.
1348 1375
1349config MATH_EMULATION 1376config MATH_EMULATION
1350 bool 1377 bool
@@ -2076,7 +2103,7 @@ config OLPC_OPENFIRMWARE
2076 2103
2077endif # X86_32 2104endif # X86_32
2078 2105
2079config K8_NB 2106config AMD_NB
2080 def_bool y 2107 def_bool y
2081 depends on CPU_SUP_AMD && PCI 2108 depends on CPU_SUP_AMD && PCI
2082 2109
@@ -2125,6 +2152,10 @@ config HAVE_ATOMIC_IOMAP
2125 def_bool y 2152 def_bool y
2126 depends on X86_32 2153 depends on X86_32
2127 2154
2155config HAVE_TEXT_POKE_SMP
2156 bool
2157 select STOP_MACHINE if SMP
2158
2128source "net/Kconfig" 2159source "net/Kconfig"
2129 2160
2130source "drivers/Kconfig" 2161source "drivers/Kconfig"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index e8c8881351b3..b02e509072a7 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -96,8 +96,12 @@ cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_en
96# is .cfi_signal_frame supported too? 96# is .cfi_signal_frame supported too?
97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) 97cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1)
98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) 98cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1)
99KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 99
100KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) 100# does binutils support specific instructions?
101asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
102
103KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
104KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr)
101 105
102LDFLAGS := -m elf_$(UTS_MACHINE) 106LDFLAGS := -m elf_$(UTS_MACHINE)
103 107
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 0350311906ae..2d93bdbc9ac0 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -34,7 +34,7 @@
34#include <asm/ia32.h> 34#include <asm/ia32.h>
35 35
36#undef WARN_OLD 36#undef WARN_OLD
37#undef CORE_DUMP /* probably broken */ 37#undef CORE_DUMP /* definitely broken */
38 38
39static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs); 39static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
40static int load_aout_library(struct file *); 40static int load_aout_library(struct file *);
@@ -131,21 +131,15 @@ static void set_brk(unsigned long start, unsigned long end)
131 * macros to write out all the necessary info. 131 * macros to write out all the necessary info.
132 */ 132 */
133 133
134static int dump_write(struct file *file, const void *addr, int nr) 134#include <linux/coredump.h>
135{
136 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
137}
138 135
139#define DUMP_WRITE(addr, nr) \ 136#define DUMP_WRITE(addr, nr) \
140 if (!dump_write(file, (void *)(addr), (nr))) \ 137 if (!dump_write(file, (void *)(addr), (nr))) \
141 goto end_coredump; 138 goto end_coredump;
142 139
143#define DUMP_SEEK(offset) \ 140#define DUMP_SEEK(offset) \
144 if (file->f_op->llseek) { \ 141 if (!dump_seek(file, offset)) \
145 if (file->f_op->llseek(file, (offset), 0) != (offset)) \ 142 goto end_coredump;
146 goto end_coredump; \
147 } else \
148 file->f_pos = (offset)
149 143
150#define START_DATA() (u.u_tsize << PAGE_SHIFT) 144#define START_DATA() (u.u_tsize << PAGE_SHIFT)
151#define START_STACK(u) (u.start_stack) 145#define START_STACK(u) (u.start_stack)
@@ -217,12 +211,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
217 dump_size = dump.u_ssize << PAGE_SHIFT; 211 dump_size = dump.u_ssize << PAGE_SHIFT;
218 DUMP_WRITE(dump_start, dump_size); 212 DUMP_WRITE(dump_start, dump_size);
219 } 213 }
220 /*
221 * Finally dump the task struct. Not be used by gdb, but
222 * could be useful
223 */
224 set_fs(KERNEL_DS);
225 DUMP_WRITE(current, sizeof(*current));
226end_coredump: 214end_coredump:
227 set_fs(fs); 215 set_fs(fs);
228 return has_dumped; 216 return has_dumped;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bc6abb7bc7ee..76561d20ea2f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/jump_label.h>
7#include <asm/asm.h> 8#include <asm/asm.h>
8 9
9/* 10/*
@@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
160#define __parainstructions_end NULL 161#define __parainstructions_end NULL
161#endif 162#endif
162 163
164extern void *text_poke_early(void *addr, const void *opcode, size_t len);
165
163/* 166/*
164 * Clear and restore the kernel write-protection flag on the local CPU. 167 * Clear and restore the kernel write-protection flag on the local CPU.
165 * Allows the kernel to edit read-only pages. 168 * Allows the kernel to edit read-only pages.
@@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
180extern void *text_poke(void *addr, const void *opcode, size_t len); 183extern void *text_poke(void *addr, const void *opcode, size_t len);
181extern void *text_poke_smp(void *addr, const void *opcode, size_t len); 184extern void *text_poke_smp(void *addr, const void *opcode, size_t len);
182 185
186#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
187#define IDEAL_NOP_SIZE_5 5
188extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
189extern void arch_init_ideal_nop5(void);
190#else
191static inline void arch_init_ideal_nop5(void) {}
192#endif
193
183#endif /* _ASM_X86_ALTERNATIVE_H */ 194#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index 5af2982133b5..f16a2caca1e0 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index cb030374b90a..916bc8111a01 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2009-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify it 5 * This program is free software; you can redistribute it and/or modify it
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 08616180deaf..e3509fc303bf 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -416,13 +416,22 @@ struct amd_iommu {
416 struct dma_ops_domain *default_dom; 416 struct dma_ops_domain *default_dom;
417 417
418 /* 418 /*
419 * This array is required to work around a potential BIOS bug. 419 * We can't rely on the BIOS to restore all values on reinit, so we
420 * The BIOS may miss to restore parts of the PCI configuration 420 * need to stash them
421 * space when the system resumes from S3. The result is that the
422 * IOMMU does not execute commands anymore which leads to system
423 * failure.
424 */ 421 */
425 u32 cache_cfg[4]; 422
423 /* The iommu BAR */
424 u32 stored_addr_lo;
425 u32 stored_addr_hi;
426
427 /*
428 * Each iommu has 6 l1s, each of which is documented as having 0x12
429 * registers
430 */
431 u32 stored_l1[6][0x12];
432
433 /* The l2 indirect registers */
434 u32 stored_l2[0x83];
426}; 435};
427 436
428/* 437/*
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/amd_nb.h
index af00bd1d2089..c8517f81b21e 100644
--- a/arch/x86/include/asm/k8.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -1,5 +1,5 @@
1#ifndef _ASM_X86_K8_H 1#ifndef _ASM_X86_AMD_NB_H
2#define _ASM_X86_K8_H 2#define _ASM_X86_AMD_NB_H
3 3
4#include <linux/pci.h> 4#include <linux/pci.h>
5 5
@@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[];
7struct bootnode; 7struct bootnode;
8 8
9extern int early_is_k8_nb(u32 value); 9extern int early_is_k8_nb(u32 value);
10extern struct pci_dev **k8_northbridges;
11extern int num_k8_northbridges;
12extern int cache_k8_northbridges(void); 10extern int cache_k8_northbridges(void);
13extern void k8_flush_garts(void); 11extern void k8_flush_garts(void);
14extern int k8_get_nodes(struct bootnode *nodes); 12extern int k8_get_nodes(struct bootnode *nodes);
15extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); 13extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
16extern int k8_scan_nodes(void); 14extern int k8_scan_nodes(void);
17 15
18#ifdef CONFIG_K8_NB 16struct k8_northbridge_info {
19extern int num_k8_northbridges; 17 u16 num;
18 u8 gart_supported;
19 struct pci_dev **nb_misc;
20};
21extern struct k8_northbridge_info k8_northbridges;
22
23#ifdef CONFIG_AMD_NB
20 24
21static inline struct pci_dev *node_to_k8_nb_misc(int node) 25static inline struct pci_dev *node_to_k8_nb_misc(int node)
22{ 26{
23 return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; 27 return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
24} 28}
25 29
26#else 30#else
27#define num_k8_northbridges 0
28 31
29static inline struct pci_dev *node_to_k8_nb_misc(int node) 32static inline struct pci_dev *node_to_k8_nb_misc(int node)
30{ 33{
@@ -33,4 +36,4 @@ static inline struct pci_dev *node_to_k8_nb_misc(int node)
33#endif 36#endif
34 37
35 38
36#endif /* _ASM_X86_K8_H */ 39#endif /* _ASM_X86_AMD_NB_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index a69b1ac9eaf8..2fefa501d3ba 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -54,7 +54,6 @@ extern struct clock_event_device *global_clock_event;
54extern unsigned long apbt_quick_calibrate(void); 54extern unsigned long apbt_quick_calibrate(void);
55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); 55extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu);
56extern void apbt_setup_secondary_clock(void); 56extern void apbt_setup_secondary_clock(void);
57extern unsigned int boot_cpu_id;
58 57
59extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); 58extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
60extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr); 59extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index b185091bf19c..4fab24de26b1 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -32,6 +32,5 @@ extern void arch_unregister_cpu(int);
32 32
33DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34 34
35extern unsigned int boot_cpu_id;
36 35
37#endif /* _ASM_X86_CPU_H */ 36#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 3f76523589af..220e2ea08e80 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -152,10 +152,14 @@
152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ 152#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ 153#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ 154#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
155#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ 155#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */
156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ 156#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ 157#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
158#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */
159#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */
158#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ 160#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
161#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
162#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
159 163
160/* 164/*
161 * Auxiliary flags: Linux defined - For features scattered in various 165 * Auxiliary flags: Linux defined - For features scattered in various
@@ -180,6 +184,13 @@
180#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ 184#define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */
181#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ 185#define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */
182#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ 186#define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */
187#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */
188#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */
189#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */
190#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */
191#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */
192#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */
193
183 194
184/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 195/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
185#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 196#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 733f7e91e7a9..326099199318 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -89,6 +89,16 @@
89 CFI_ADJUST_CFA_OFFSET -8 89 CFI_ADJUST_CFA_OFFSET -8
90 .endm 90 .endm
91 91
92 .macro pushfq_cfi
93 pushfq
94 CFI_ADJUST_CFA_OFFSET 8
95 .endm
96
97 .macro popfq_cfi
98 popfq
99 CFI_ADJUST_CFA_OFFSET -8
100 .endm
101
92 .macro movq_cfi reg offset=0 102 .macro movq_cfi reg offset=0
93 movq %\reg, \offset(%rsp) 103 movq %\reg, \offset(%rsp)
94 CFI_REL_OFFSET \reg, \offset 104 CFI_REL_OFFSET \reg, \offset
@@ -109,6 +119,16 @@
109 CFI_ADJUST_CFA_OFFSET -4 119 CFI_ADJUST_CFA_OFFSET -4
110 .endm 120 .endm
111 121
122 .macro pushfl_cfi
123 pushfl
124 CFI_ADJUST_CFA_OFFSET 4
125 .endm
126
127 .macro popfl_cfi
128 popfl
129 CFI_ADJUST_CFA_OFFSET -4
130 .endm
131
112 .macro movl_cfi reg offset=0 132 .macro movl_cfi reg offset=0
113 movl %\reg, \offset(%esp) 133 movl %\reg, \offset(%esp)
114 CFI_REL_OFFSET \reg, \offset 134 CFI_REL_OFFSET \reg, \offset
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 8e8ec663a98f..b8e96a18676b 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,8 +49,8 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) 49BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) 50BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
51 51
52#ifdef CONFIG_PERF_EVENTS 52#ifdef CONFIG_IRQ_WORK
53BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) 53BUILD_INTERRUPT(irq_work_interrupt, IRQ_WORK_VECTOR)
54#endif 54#endif
55 55
56#ifdef CONFIG_X86_THERMAL_VECTOR 56#ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h
index 4ac5b0f33fc1..bf357f9b25f0 100644
--- a/arch/x86/include/asm/gart.h
+++ b/arch/x86/include/asm/gart.h
@@ -17,6 +17,7 @@ extern int fix_aperture;
17#define GARTEN (1<<0) 17#define GARTEN (1<<0)
18#define DISGARTCPU (1<<4) 18#define DISGARTCPU (1<<4)
19#define DISGARTIO (1<<5) 19#define DISGARTIO (1<<5)
20#define DISTLBWALKPRB (1<<6)
20 21
21/* GART cache control register bits. */ 22/* GART cache control register bits. */
22#define INVGART (1<<0) 23#define INVGART (1<<0)
@@ -27,7 +28,6 @@ extern int fix_aperture;
27#define AMD64_GARTAPERTUREBASE 0x94 28#define AMD64_GARTAPERTUREBASE 0x94
28#define AMD64_GARTTABLEBASE 0x98 29#define AMD64_GARTTABLEBASE 0x98
29#define AMD64_GARTCACHECTL 0x9c 30#define AMD64_GARTCACHECTL 0x9c
30#define AMD64_GARTEN (1<<0)
31 31
32#ifdef CONFIG_GART_IOMMU 32#ifdef CONFIG_GART_IOMMU
33extern int gart_iommu_aperture; 33extern int gart_iommu_aperture;
@@ -57,6 +57,19 @@ static inline void gart_iommu_hole_init(void)
57 57
58extern int agp_amd64_init(void); 58extern int agp_amd64_init(void);
59 59
60static inline void gart_set_size_and_enable(struct pci_dev *dev, u32 order)
61{
62 u32 ctl;
63
64 /*
65 * Don't enable translation but enable GART IO and CPU accesses.
66 * Also, set DISTLBWALKPRB since GART tables memory is UC.
67 */
68 ctl = DISTLBWALKPRB | order << 1;
69
70 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, ctl);
71}
72
60static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) 73static inline void enable_gart_translation(struct pci_dev *dev, u64 addr)
61{ 74{
62 u32 tmp, ctl; 75 u32 tmp, ctl;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index aeab29aee617..55e4de613f0e 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -14,7 +14,7 @@ typedef struct {
14#endif 14#endif
15 unsigned int x86_platform_ipis; /* arch dependent */ 15 unsigned int x86_platform_ipis; /* arch dependent */
16 unsigned int apic_perf_irqs; 16 unsigned int apic_perf_irqs;
17 unsigned int apic_pending_irqs; 17 unsigned int apic_irq_work_irqs;
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 unsigned int irq_resched_count; 19 unsigned int irq_resched_count;
20 unsigned int irq_call_count; 20 unsigned int irq_call_count;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 46c0fe05f230..3a54a1ca1a02 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -29,7 +29,7 @@
29extern void apic_timer_interrupt(void); 29extern void apic_timer_interrupt(void);
30extern void x86_platform_ipi(void); 30extern void x86_platform_ipi(void);
31extern void error_interrupt(void); 31extern void error_interrupt(void);
32extern void perf_pending_interrupt(void); 32extern void irq_work_interrupt(void);
33 33
34extern void spurious_interrupt(void); 34extern void spurious_interrupt(void);
35extern void thermal_interrupt(void); 35extern void thermal_interrupt(void);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index a73a8d5a5e69..4aa2bb3b242a 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf);
55extern int restore_i387_xstate_ia32(void __user *buf); 55extern int restore_i387_xstate_ia32(void __user *buf);
56#endif 56#endif
57 57
58#ifdef CONFIG_MATH_EMULATION
59extern void finit_soft_fpu(struct i387_soft_struct *soft);
60#else
61static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
62#endif
63
58#define X87_FSW_ES (1 << 7) /* Exception Summary */ 64#define X87_FSW_ES (1 << 7) /* Exception Summary */
59 65
60static __always_inline __pure bool use_xsaveopt(void) 66static __always_inline __pure bool use_xsaveopt(void)
@@ -67,6 +73,11 @@ static __always_inline __pure bool use_xsave(void)
67 return static_cpu_has(X86_FEATURE_XSAVE); 73 return static_cpu_has(X86_FEATURE_XSAVE);
68} 74}
69 75
76static __always_inline __pure bool use_fxsr(void)
77{
78 return static_cpu_has(X86_FEATURE_FXSR);
79}
80
70extern void __sanitize_i387_state(struct task_struct *); 81extern void __sanitize_i387_state(struct task_struct *);
71 82
72static inline void sanitize_i387_state(struct task_struct *tsk) 83static inline void sanitize_i387_state(struct task_struct *tsk)
@@ -77,19 +88,11 @@ static inline void sanitize_i387_state(struct task_struct *tsk)
77} 88}
78 89
79#ifdef CONFIG_X86_64 90#ifdef CONFIG_X86_64
80
81/* Ignore delayed exceptions from user space */
82static inline void tolerant_fwait(void)
83{
84 asm volatile("1: fwait\n"
85 "2:\n"
86 _ASM_EXTABLE(1b, 2b));
87}
88
89static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 91static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
90{ 92{
91 int err; 93 int err;
92 94
95 /* See comment in fxsave() below. */
93 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 96 asm volatile("1: rex64/fxrstor (%[fx])\n\t"
94 "2:\n" 97 "2:\n"
95 ".section .fixup,\"ax\"\n" 98 ".section .fixup,\"ax\"\n"
@@ -98,44 +101,10 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
98 ".previous\n" 101 ".previous\n"
99 _ASM_EXTABLE(1b, 3b) 102 _ASM_EXTABLE(1b, 3b)
100 : [err] "=r" (err) 103 : [err] "=r" (err)
101#if 0 /* See comment in fxsave() below. */ 104 : [fx] "R" (fx), "m" (*fx), "0" (0));
102 : [fx] "r" (fx), "m" (*fx), "0" (0));
103#else
104 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
105#endif
106 return err; 105 return err;
107} 106}
108 107
109/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
110 is pending. Clear the x87 state here by setting it to fixed
111 values. The kernel data segment can be sometimes 0 and sometimes
112 new user value. Both should be ok.
113 Use the PDA as safe address because it should be already in L1. */
114static inline void fpu_clear(struct fpu *fpu)
115{
116 struct xsave_struct *xstate = &fpu->state->xsave;
117 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
118
119 /*
120 * xsave header may indicate the init state of the FP.
121 */
122 if (use_xsave() &&
123 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
124 return;
125
126 if (unlikely(fx->swd & X87_FSW_ES))
127 asm volatile("fnclex");
128 alternative_input(ASM_NOP8 ASM_NOP2,
129 " emms\n" /* clear stack tags */
130 " fildl %%gs:0", /* load to clear state */
131 X86_FEATURE_FXSAVE_LEAK);
132}
133
134static inline void clear_fpu_state(struct task_struct *tsk)
135{
136 fpu_clear(&tsk->thread.fpu);
137}
138
139static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 108static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
140{ 109{
141 int err; 110 int err;
@@ -149,6 +118,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
149 if (unlikely(err)) 118 if (unlikely(err))
150 return -EFAULT; 119 return -EFAULT;
151 120
121 /* See comment in fxsave() below. */
152 asm volatile("1: rex64/fxsave (%[fx])\n\t" 122 asm volatile("1: rex64/fxsave (%[fx])\n\t"
153 "2:\n" 123 "2:\n"
154 ".section .fixup,\"ax\"\n" 124 ".section .fixup,\"ax\"\n"
@@ -157,11 +127,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
157 ".previous\n" 127 ".previous\n"
158 _ASM_EXTABLE(1b, 3b) 128 _ASM_EXTABLE(1b, 3b)
159 : [err] "=r" (err), "=m" (*fx) 129 : [err] "=r" (err), "=m" (*fx)
160#if 0 /* See comment in fxsave() below. */ 130 : [fx] "R" (fx), "0" (0));
161 : [fx] "r" (fx), "0" (0));
162#else
163 : [fx] "cdaSDb" (fx), "0" (0));
164#endif
165 if (unlikely(err) && 131 if (unlikely(err) &&
166 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 132 __clear_user(fx, sizeof(struct i387_fxsave_struct)))
167 err = -EFAULT; 133 err = -EFAULT;
@@ -175,56 +141,29 @@ static inline void fpu_fxsave(struct fpu *fpu)
175 uses any extended registers for addressing, a second REX prefix 141 uses any extended registers for addressing, a second REX prefix
176 will be generated (to the assembler, rex64 followed by semicolon 142 will be generated (to the assembler, rex64 followed by semicolon
177 is a separate instruction), and hence the 64-bitness is lost. */ 143 is a separate instruction), and hence the 64-bitness is lost. */
178#if 0 144
145#ifdef CONFIG_AS_FXSAVEQ
179 /* Using "fxsaveq %0" would be the ideal choice, but is only supported 146 /* Using "fxsaveq %0" would be the ideal choice, but is only supported
180 starting with gas 2.16. */ 147 starting with gas 2.16. */
181 __asm__ __volatile__("fxsaveq %0" 148 __asm__ __volatile__("fxsaveq %0"
182 : "=m" (fpu->state->fxsave)); 149 : "=m" (fpu->state->fxsave));
183#elif 0 150#else
184 /* Using, as a workaround, the properly prefixed form below isn't 151 /* Using, as a workaround, the properly prefixed form below isn't
185 accepted by any binutils version so far released, complaining that 152 accepted by any binutils version so far released, complaining that
186 the same type of prefix is used twice if an extended register is 153 the same type of prefix is used twice if an extended register is
187 needed for addressing (fix submitted to mainline 2005-11-21). */ 154 needed for addressing (fix submitted to mainline 2005-11-21).
188 __asm__ __volatile__("rex64/fxsave %0" 155 asm volatile("rex64/fxsave %0"
189 : "=m" (fpu->state->fxsave)); 156 : "=m" (fpu->state->fxsave));
190#else 157 This, however, we can work around by forcing the compiler to select
191 /* This, however, we can work around by forcing the compiler to select
192 an addressing mode that doesn't require extended registers. */ 158 an addressing mode that doesn't require extended registers. */
193 __asm__ __volatile__("rex64/fxsave (%1)" 159 asm volatile("rex64/fxsave (%[fx])"
194 : "=m" (fpu->state->fxsave) 160 : "=m" (fpu->state->fxsave)
195 : "cdaSDb" (&fpu->state->fxsave)); 161 : [fx] "R" (&fpu->state->fxsave));
196#endif 162#endif
197} 163}
198 164
199static inline void fpu_save_init(struct fpu *fpu)
200{
201 if (use_xsave())
202 fpu_xsave(fpu);
203 else
204 fpu_fxsave(fpu);
205
206 fpu_clear(fpu);
207}
208
209static inline void __save_init_fpu(struct task_struct *tsk)
210{
211 fpu_save_init(&tsk->thread.fpu);
212 task_thread_info(tsk)->status &= ~TS_USEDFPU;
213}
214
215#else /* CONFIG_X86_32 */ 165#else /* CONFIG_X86_32 */
216 166
217#ifdef CONFIG_MATH_EMULATION
218extern void finit_soft_fpu(struct i387_soft_struct *soft);
219#else
220static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
221#endif
222
223static inline void tolerant_fwait(void)
224{
225 asm volatile("fnclex ; fwait");
226}
227
228/* perform fxrstor iff the processor has extended states, otherwise frstor */ 167/* perform fxrstor iff the processor has extended states, otherwise frstor */
229static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 168static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
230{ 169{
@@ -241,6 +180,14 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
241 return 0; 180 return 0;
242} 181}
243 182
183static inline void fpu_fxsave(struct fpu *fpu)
184{
185 asm volatile("fxsave %[fx]"
186 : [fx] "=m" (fpu->state->fxsave));
187}
188
189#endif /* CONFIG_X86_64 */
190
244/* We need a safe address that is cheap to find and that is already 191/* We need a safe address that is cheap to find and that is already
245 in L1 during context switch. The best choices are unfortunately 192 in L1 during context switch. The best choices are unfortunately
246 different for UP and SMP */ 193 different for UP and SMP */
@@ -256,47 +203,33 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
256static inline void fpu_save_init(struct fpu *fpu) 203static inline void fpu_save_init(struct fpu *fpu)
257{ 204{
258 if (use_xsave()) { 205 if (use_xsave()) {
259 struct xsave_struct *xstate = &fpu->state->xsave;
260 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
261
262 fpu_xsave(fpu); 206 fpu_xsave(fpu);
263 207
264 /* 208 /*
265 * xsave header may indicate the init state of the FP. 209 * xsave header may indicate the init state of the FP.
266 */ 210 */
267 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) 211 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
268 goto end; 212 return;
269 213 } else if (use_fxsr()) {
270 if (unlikely(fx->swd & X87_FSW_ES)) 214 fpu_fxsave(fpu);
271 asm volatile("fnclex"); 215 } else {
272 216 asm volatile("fsave %[fx]; fwait"
273 /* 217 : [fx] "=m" (fpu->state->fsave));
274 * we can do a simple return here or be paranoid :) 218 return;
275 */
276 goto clear_state;
277 } 219 }
278 220
279 /* Use more nops than strictly needed in case the compiler 221 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES))
280 varies code */ 222 asm volatile("fnclex");
281 alternative_input( 223
282 "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4,
283 "fxsave %[fx]\n"
284 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:",
285 X86_FEATURE_FXSR,
286 [fx] "m" (fpu->state->fxsave),
287 [fsw] "m" (fpu->state->fxsave.swd) : "memory");
288clear_state:
289 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 224 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
290 is pending. Clear the x87 state here by setting it to fixed 225 is pending. Clear the x87 state here by setting it to fixed
291 values. safe_address is a random variable that should be in L1 */ 226 values. safe_address is a random variable that should be in L1 */
292 alternative_input( 227 alternative_input(
293 GENERIC_NOP8 GENERIC_NOP2, 228 ASM_NOP8 ASM_NOP2,
294 "emms\n\t" /* clear stack tags */ 229 "emms\n\t" /* clear stack tags */
295 "fildl %[addr]", /* set F?P to defined value */ 230 "fildl %P[addr]", /* set F?P to defined value */
296 X86_FEATURE_FXSAVE_LEAK, 231 X86_FEATURE_FXSAVE_LEAK,
297 [addr] "m" (safe_address)); 232 [addr] "m" (safe_address));
298end:
299 ;
300} 233}
301 234
302static inline void __save_init_fpu(struct task_struct *tsk) 235static inline void __save_init_fpu(struct task_struct *tsk)
@@ -305,9 +238,6 @@ static inline void __save_init_fpu(struct task_struct *tsk)
305 task_thread_info(tsk)->status &= ~TS_USEDFPU; 238 task_thread_info(tsk)->status &= ~TS_USEDFPU;
306} 239}
307 240
308
309#endif /* CONFIG_X86_64 */
310
311static inline int fpu_fxrstor_checking(struct fpu *fpu) 241static inline int fpu_fxrstor_checking(struct fpu *fpu)
312{ 242{
313 return fxrstor_checking(&fpu->state->fxsave); 243 return fxrstor_checking(&fpu->state->fxsave);
@@ -344,7 +274,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk)
344static inline void __clear_fpu(struct task_struct *tsk) 274static inline void __clear_fpu(struct task_struct *tsk)
345{ 275{
346 if (task_thread_info(tsk)->status & TS_USEDFPU) { 276 if (task_thread_info(tsk)->status & TS_USEDFPU) {
347 tolerant_fwait(); 277 /* Ignore delayed exceptions from user space */
278 asm volatile("1: fwait\n"
279 "2:\n"
280 _ASM_EXTABLE(1b, 2b));
348 task_thread_info(tsk)->status &= ~TS_USEDFPU; 281 task_thread_info(tsk)->status &= ~TS_USEDFPU;
349 stts(); 282 stts();
350 } 283 }
@@ -405,19 +338,6 @@ static inline void irq_ts_restore(int TS_state)
405 stts(); 338 stts();
406} 339}
407 340
408#ifdef CONFIG_X86_64
409
410static inline void save_init_fpu(struct task_struct *tsk)
411{
412 __save_init_fpu(tsk);
413 stts();
414}
415
416#define unlazy_fpu __unlazy_fpu
417#define clear_fpu __clear_fpu
418
419#else /* CONFIG_X86_32 */
420
421/* 341/*
422 * These disable preemption on their own and are safe 342 * These disable preemption on their own and are safe
423 */ 343 */
@@ -443,8 +363,6 @@ static inline void clear_fpu(struct task_struct *tsk)
443 preempt_enable(); 363 preempt_enable();
444} 364}
445 365
446#endif /* CONFIG_X86_64 */
447
448/* 366/*
449 * i387 state interaction 367 * i387 state interaction
450 */ 368 */
@@ -508,7 +426,4 @@ extern void fpu_finit(struct fpu *fpu);
508 426
509#endif /* __ASSEMBLY__ */ 427#endif /* __ASSEMBLY__ */
510 428
511#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
512#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
513
514#endif /* _ASM_X86_I387_H */ 429#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 30a3e9776123..6a45ec41ec26 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -206,6 +206,7 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
206 206
207extern void iounmap(volatile void __iomem *addr); 207extern void iounmap(volatile void __iomem *addr);
208 208
209extern void set_iounmap_nonlazy(void);
209 210
210#ifdef __KERNEL__ 211#ifdef __KERNEL__
211 212
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e2ca30092557..6af0894dafb4 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,9 +114,9 @@
114#define X86_PLATFORM_IPI_VECTOR 0xed 114#define X86_PLATFORM_IPI_VECTOR 0xed
115 115
116/* 116/*
117 * Performance monitoring pending work vector: 117 * IRQ work vector:
118 */ 118 */
119#define LOCAL_PENDING_VECTOR 0xec 119#define IRQ_WORK_VECTOR 0xec
120 120
121#define UV_BAU_MESSAGE 0xea 121#define UV_BAU_MESSAGE 0xea
122 122
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
new file mode 100644
index 000000000000..f52d42e80585
--- /dev/null
+++ b/arch/x86/include/asm/jump_label.h
@@ -0,0 +1,37 @@
1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H
3
4#ifdef __KERNEL__
5
6#include <linux/types.h>
7#include <asm/nops.h>
8
9#define JUMP_LABEL_NOP_SIZE 5
10
11# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
12
13# define JUMP_LABEL(key, label) \
14 do { \
15 asm goto("1:" \
16 JUMP_LABEL_INITIAL_NOP \
17 ".pushsection __jump_table, \"a\" \n\t"\
18 _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \
19 ".popsection \n\t" \
20 : : "i" (key) : : label); \
21 } while (0)
22
23#endif /* __KERNEL__ */
24
25#ifdef CONFIG_X86_64
26typedef u64 jump_label_t;
27#else
28typedef u32 jump_label_t;
29#endif
30
31struct jump_entry {
32 jump_label_t code;
33 jump_label_t target;
34 jump_label_t key;
35};
36
37#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 502e53f999cf..c52e2eb40a1e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -652,20 +652,6 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
652 return (struct kvm_mmu_page *)page_private(page); 652 return (struct kvm_mmu_page *)page_private(page);
653} 653}
654 654
655static inline u16 kvm_read_fs(void)
656{
657 u16 seg;
658 asm("mov %%fs, %0" : "=g"(seg));
659 return seg;
660}
661
662static inline u16 kvm_read_gs(void)
663{
664 u16 seg;
665 asm("mov %%gs, %0" : "=g"(seg));
666 return seg;
667}
668
669static inline u16 kvm_read_ldt(void) 655static inline u16 kvm_read_ldt(void)
670{ 656{
671 u16 ldt; 657 u16 ldt;
@@ -673,16 +659,6 @@ static inline u16 kvm_read_ldt(void)
673 return ldt; 659 return ldt;
674} 660}
675 661
676static inline void kvm_load_fs(u16 sel)
677{
678 asm("mov %0, %%fs" : : "rm"(sel));
679}
680
681static inline void kvm_load_gs(u16 sel)
682{
683 asm("mov %0, %%gs" : : "rm"(sel));
684}
685
686static inline void kvm_load_ldt(u16 sel) 662static inline void kvm_load_ldt(u16 sel)
687{ 663{
688 asm("lldt %0" : : "rm"(sel)); 664 asm("lldt %0" : : "rm"(sel));
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
new file mode 100644
index 000000000000..bcdff997668c
--- /dev/null
+++ b/arch/x86/include/asm/mwait.h
@@ -0,0 +1,15 @@
1#ifndef _ASM_X86_MWAIT_H
2#define _ASM_X86_MWAIT_H
3
4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8
8
9#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
11#define CPUID5_ECX_INTERRUPT_BREAK 0x2
12
13#define MWAIT_ECX_INTERRUPT_BREAK 0x1
14
15#endif /* _ASM_X86_MWAIT_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index a667f24c7254..1df66211fd1b 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -8,7 +8,7 @@
8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) 8#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
9#define PAGE_MASK (~(PAGE_SIZE-1)) 9#define PAGE_MASK (~(PAGE_SIZE-1))
10 10
11#define __PHYSICAL_MASK ((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1) 11#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) 12#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
13 13
14/* Cast PAGE_MASK to a signed type so that it is sign-extended if 14/* Cast PAGE_MASK to a signed type so that it is sign-extended if
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index def500776b16..a70cd216be5d 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -36,19 +36,6 @@
36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) 36#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) 37#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
38 38
39/* Non HT mask */
40#define P4_ESCR_MASK \
41 (P4_ESCR_EVENT_MASK | \
42 P4_ESCR_EVENTMASK_MASK | \
43 P4_ESCR_TAG_MASK | \
44 P4_ESCR_TAG_ENABLE | \
45 P4_ESCR_T0_OS | \
46 P4_ESCR_T0_USR)
47
48/* HT mask */
49#define P4_ESCR_MASK_HT \
50 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
51
52#define P4_CCCR_OVF 0x80000000U 39#define P4_CCCR_OVF 0x80000000U
53#define P4_CCCR_CASCADE 0x40000000U 40#define P4_CCCR_CASCADE 0x40000000U
54#define P4_CCCR_OVF_PMI_T0 0x04000000U 41#define P4_CCCR_OVF_PMI_T0 0x04000000U
@@ -70,23 +57,6 @@
70#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) 57#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
71#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) 58#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
72 59
73/* Non HT mask */
74#define P4_CCCR_MASK \
75 (P4_CCCR_OVF | \
76 P4_CCCR_CASCADE | \
77 P4_CCCR_OVF_PMI_T0 | \
78 P4_CCCR_FORCE_OVF | \
79 P4_CCCR_EDGE | \
80 P4_CCCR_THRESHOLD_MASK | \
81 P4_CCCR_COMPLEMENT | \
82 P4_CCCR_COMPARE | \
83 P4_CCCR_ESCR_SELECT_MASK | \
84 P4_CCCR_ENABLE)
85
86/* HT mask */
87#define P4_CCCR_MASK_HT \
88 (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY)
89
90#define P4_GEN_ESCR_EMASK(class, name, bit) \ 60#define P4_GEN_ESCR_EMASK(class, name, bit) \
91 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) 61 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
92#define P4_ESCR_EMASK_BIT(class, name) class##__##name 62#define P4_ESCR_EMASK_BIT(class, name) class##__##name
@@ -127,6 +97,28 @@
127#define P4_CONFIG_HT_SHIFT 63 97#define P4_CONFIG_HT_SHIFT 63
128#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 98#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
129 99
100/*
101 * The bits we allow to pass for RAW events
102 */
103#define P4_CONFIG_MASK_ESCR \
104 P4_ESCR_EVENT_MASK | \
105 P4_ESCR_EVENTMASK_MASK | \
106 P4_ESCR_TAG_MASK | \
107 P4_ESCR_TAG_ENABLE
108
109#define P4_CONFIG_MASK_CCCR \
110 P4_CCCR_EDGE | \
111 P4_CCCR_THRESHOLD_MASK | \
112 P4_CCCR_COMPLEMENT | \
113 P4_CCCR_COMPARE | \
114 P4_CCCR_THREAD_ANY | \
115 P4_CCCR_RESERVED
116
117/* some dangerous bits are reserved for kernel internals */
118#define P4_CONFIG_MASK \
119 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
120 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
121
130static inline bool p4_is_event_cascaded(u64 config) 122static inline bool p4_is_event_cascaded(u64 config)
131{ 123{
132 u32 cccr = p4_config_unpack_cccr(config); 124 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a34c785c5a63..ada823a13c7c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
28extern spinlock_t pgd_lock; 28extern spinlock_t pgd_lock;
29extern struct list_head pgd_list; 29extern struct list_head pgd_list;
30 30
31extern struct mm_struct *pgd_page_get_mm(struct page *page);
32
31#ifdef CONFIG_PARAVIRT 33#ifdef CONFIG_PARAVIRT
32#include <asm/paravirt.h> 34#include <asm/paravirt.h>
33#else /* !CONFIG_PARAVIRT */ 35#else /* !CONFIG_PARAVIRT */
@@ -603,6 +605,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
603 pte_update(mm, addr, ptep); 605 pte_update(mm, addr, ptep);
604} 606}
605 607
608#define flush_tlb_fix_spurious_fault(vma, address)
609
606/* 610/*
607 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); 611 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
608 * 612 *
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 076052cd62be..f96ac9bedf75 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
102 native_set_pgd(pgd, native_make_pgd(0)); 102 native_set_pgd(pgd, native_make_pgd(0));
103} 103}
104 104
105extern void sync_global_pgds(unsigned long start, unsigned long end);
106
105/* 107/*
106 * Conversion functions: convert a page and protection to a page entry, 108 * Conversion functions: convert a page and protection to a page entry,
107 * and a page entry and page directory to the page they refer to. 109 * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 325b7bdbebaa..cae9c3cb95cf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -110,6 +110,8 @@ struct cpuinfo_x86 {
110 u16 phys_proc_id; 110 u16 phys_proc_id;
111 /* Core id: */ 111 /* Core id: */
112 u16 cpu_core_id; 112 u16 cpu_core_id;
113 /* Compute unit id */
114 u8 compute_unit_id;
113 /* Index into per_cpu list: */ 115 /* Index into per_cpu list: */
114 u16 cpu_index; 116 u16 cpu_index;
115#endif 117#endif
@@ -602,7 +604,7 @@ extern unsigned long mmu_cr4_features;
602 604
603static inline void set_in_cr4(unsigned long mask) 605static inline void set_in_cr4(unsigned long mask)
604{ 606{
605 unsigned cr4; 607 unsigned long cr4;
606 608
607 mmu_cr4_features |= mask; 609 mmu_cr4_features |= mask;
608 cr4 = read_cr4(); 610 cr4 = read_cr4();
@@ -612,7 +614,7 @@ static inline void set_in_cr4(unsigned long mask)
612 614
613static inline void clear_in_cr4(unsigned long mask) 615static inline void clear_in_cr4(unsigned long mask)
614{ 616{
615 unsigned cr4; 617 unsigned long cr4;
616 618
617 mmu_cr4_features &= ~mask; 619 mmu_cr4_features &= ~mask;
618 cr4 = read_cr4(); 620 cr4 = read_cr4();
@@ -764,29 +766,6 @@ extern unsigned long idle_halt;
764extern unsigned long idle_nomwait; 766extern unsigned long idle_nomwait;
765extern bool c1e_detected; 767extern bool c1e_detected;
766 768
767/*
768 * on systems with caches, caches must be flashed as the absolute
769 * last instruction before going into a suspended halt. Otherwise,
770 * dirty data can linger in the cache and become stale on resume,
771 * leading to strange errors.
772 *
773 * perform a variety of operations to guarantee that the compiler
774 * will not reorder instructions. wbinvd itself is serializing
775 * so the processor will not reorder.
776 *
777 * Systems without cache can just go into halt.
778 */
779static inline void wbinvd_halt(void)
780{
781 mb();
782 /* check for clflush to determine if wbinvd is legal */
783 if (cpu_has_clflush)
784 asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
785 else
786 while (1)
787 halt();
788}
789
790extern void enable_sep_cpu(void); 769extern void enable_sep_cpu(void);
791extern int sysenter_setup(void); 770extern int sysenter_setup(void);
792 771
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ef292c792d74..d6763b139a84 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -93,6 +93,11 @@ void *extend_brk(size_t size, size_t align);
93 : : "i" (sz)); \ 93 : : "i" (sz)); \
94 } 94 }
95 95
96/* Helper for reserving space for arrays of things */
97#define RESERVE_BRK_ARRAY(type, name, entries) \
98 type *name; \
99 RESERVE_BRK(name, sizeof(type) * entries)
100
96#ifdef __i386__ 101#ifdef __i386__
97 102
98void __init i386_start_kernel(void); 103void __init i386_start_kernel(void);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3cd01d04613d..4370154f4122 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -34,7 +34,8 @@ GCOV_PROFILE_paravirt.o := n
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 34obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 36obj-y += time.o ioport.o ldt.o dumpstack.o
37obj-y += setup.o x86_init.o i8259.o irqinit.o 37obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
38obj-$(CONFIG_IRQ_WORK) += irq_work.o
38obj-$(CONFIG_X86_VISWS) += visws_quirks.o 39obj-$(CONFIG_X86_VISWS) += visws_quirks.o
39obj-$(CONFIG_X86_32) += probe_roms_32.o 40obj-$(CONFIG_X86_32) += probe_roms_32.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 41obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -90,7 +91,7 @@ obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
90obj-$(CONFIG_HPET_TIMER) += hpet.o 91obj-$(CONFIG_HPET_TIMER) += hpet.o
91obj-$(CONFIG_APB_TIMER) += apb_timer.o 92obj-$(CONFIG_APB_TIMER) += apb_timer.o
92 93
93obj-$(CONFIG_K8_NB) += k8.o 94obj-$(CONFIG_AMD_NB) += amd_nb.o
94obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o 95obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
95obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o 96obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
96 97
@@ -123,7 +124,6 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
123# 64 bit specific files 124# 64 bit specific files
124ifeq ($(CONFIG_X86_64),y) 125ifeq ($(CONFIG_X86_64),y)
125 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o 126 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
126 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
127 obj-$(CONFIG_AUDIT) += audit_64.o 127 obj-$(CONFIG_AUDIT) += audit_64.o
128 128
129 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 129 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index fb16f17e59be..5812404a0d4c 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -13,6 +13,7 @@
13 13
14#include <acpi/processor.h> 14#include <acpi/processor.h>
15#include <asm/acpi.h> 15#include <asm/acpi.h>
16#include <asm/mwait.h>
16 17
17/* 18/*
18 * Initialize bm_flags based on the CPU cache properties 19 * Initialize bm_flags based on the CPU cache properties
@@ -65,16 +66,6 @@ static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
65 66
66static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; 67static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
67 68
68#define MWAIT_SUBSTATE_MASK (0xf)
69#define MWAIT_CSTATE_MASK (0xf)
70#define MWAIT_SUBSTATE_SIZE (4)
71
72#define CPUID_MWAIT_LEAF (5)
73#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
74#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
75
76#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
77
78#define NATIVE_CSTATE_BEYOND_HALT (2) 69#define NATIVE_CSTATE_BEYOND_HALT (2)
79 70
80static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) 71static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f65ab8b014c4..a36bb90aef53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
195 195
196extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
197extern s32 __smp_locks[], __smp_locks_end[]; 197extern s32 __smp_locks[], __smp_locks_end[];
198static void *text_poke_early(void *addr, const void *opcode, size_t len); 198void *text_poke_early(void *addr, const void *opcode, size_t len);
199 199
200/* Replace instructions with better alternatives for this CPU type. 200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with 201 This runs before SMP is initialized to avoid SMP problems with
@@ -522,7 +522,7 @@ void __init alternative_instructions(void)
522 * instructions. And on the local CPU you need to be protected again NMI or MCE 522 * instructions. And on the local CPU you need to be protected again NMI or MCE
523 * handlers seeing an inconsistent instruction while you patch. 523 * handlers seeing an inconsistent instruction while you patch.
524 */ 524 */
525static void *__init_or_module text_poke_early(void *addr, const void *opcode, 525void *__init_or_module text_poke_early(void *addr, const void *opcode,
526 size_t len) 526 size_t len)
527{ 527{
528 unsigned long flags; 528 unsigned long flags;
@@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
637 tpp.len = len; 637 tpp.len = len;
638 atomic_set(&stop_machine_first, 1); 638 atomic_set(&stop_machine_first, 1);
639 wrote_text = 0; 639 wrote_text = 0;
640 stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); 640 /* Use __stop_machine() because the caller already got online_cpus. */
641 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
641 return addr; 642 return addr;
642} 643}
643 644
645#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
646
647unsigned char ideal_nop5[IDEAL_NOP_SIZE_5];
648
649void __init arch_init_ideal_nop5(void)
650{
651 extern const unsigned char ftrace_test_p6nop[];
652 extern const unsigned char ftrace_test_nop5[];
653 extern const unsigned char ftrace_test_jmp[];
654 int faulted = 0;
655
656 /*
657 * There is no good nop for all x86 archs.
658 * We will default to using the P6_NOP5, but first we
659 * will test to make sure that the nop will actually
660 * work on this CPU. If it faults, we will then
661 * go to a lesser efficient 5 byte nop. If that fails
662 * we then just use a jmp as our nop. This isn't the most
663 * efficient nop, but we can not use a multi part nop
664 * since we would then risk being preempted in the middle
665 * of that nop, and if we enabled tracing then, it might
666 * cause a system crash.
667 *
668 * TODO: check the cpuid to determine the best nop.
669 */
670 asm volatile (
671 "ftrace_test_jmp:"
672 "jmp ftrace_test_p6nop\n"
673 "nop\n"
674 "nop\n"
675 "nop\n" /* 2 byte jmp + 3 bytes */
676 "ftrace_test_p6nop:"
677 P6_NOP5
678 "jmp 1f\n"
679 "ftrace_test_nop5:"
680 ".byte 0x66,0x66,0x66,0x66,0x90\n"
681 "1:"
682 ".section .fixup, \"ax\"\n"
683 "2: movl $1, %0\n"
684 " jmp ftrace_test_nop5\n"
685 "3: movl $2, %0\n"
686 " jmp 1b\n"
687 ".previous\n"
688 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
689 _ASM_EXTABLE(ftrace_test_nop5, 3b)
690 : "=r"(faulted) : "0" (faulted));
691
692 switch (faulted) {
693 case 0:
694 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
695 memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5);
696 break;
697 case 1:
698 pr_info("converting mcount calls to 66 66 66 66 90\n");
699 memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5);
700 break;
701 case 2:
702 pr_info("converting mcount calls to jmp . + 5\n");
703 memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5);
704 break;
705 }
706
707}
708#endif
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 679b6450382b..d2fdb0826df2 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 5a170cbbbed8..3cb482e123de 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. 2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com> 3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 * Leo Duran <leo.duran@amd.com> 4 * Leo Duran <leo.duran@amd.com>
5 * 5 *
@@ -194,6 +194,39 @@ static inline unsigned long tbl_size(int entry_size)
194 return 1UL << shift; 194 return 1UL << shift;
195} 195}
196 196
197/* Access to l1 and l2 indexed register spaces */
198
199static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
200{
201 u32 val;
202
203 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
204 pci_read_config_dword(iommu->dev, 0xfc, &val);
205 return val;
206}
207
208static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
209{
210 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
211 pci_write_config_dword(iommu->dev, 0xfc, val);
212 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
213}
214
215static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
216{
217 u32 val;
218
219 pci_write_config_dword(iommu->dev, 0xf0, address);
220 pci_read_config_dword(iommu->dev, 0xf4, &val);
221 return val;
222}
223
224static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
225{
226 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
227 pci_write_config_dword(iommu->dev, 0xf4, val);
228}
229
197/**************************************************************************** 230/****************************************************************************
198 * 231 *
199 * AMD IOMMU MMIO register space handling functions 232 * AMD IOMMU MMIO register space handling functions
@@ -619,6 +652,7 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
619{ 652{
620 int cap_ptr = iommu->cap_ptr; 653 int cap_ptr = iommu->cap_ptr;
621 u32 range, misc; 654 u32 range, misc;
655 int i, j;
622 656
623 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET, 657 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
624 &iommu->cap); 658 &iommu->cap);
@@ -633,12 +667,29 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu)
633 MMIO_GET_LD(range)); 667 MMIO_GET_LD(range));
634 iommu->evt_msi_num = MMIO_MSI_NUM(misc); 668 iommu->evt_msi_num = MMIO_MSI_NUM(misc);
635 669
636 if (is_rd890_iommu(iommu->dev)) { 670 if (!is_rd890_iommu(iommu->dev))
637 pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); 671 return;
638 pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); 672
639 pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); 673 /*
640 pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); 674 * Some rd890 systems may not be fully reconfigured by the BIOS, so
641 } 675 * it's necessary for us to store this information so it can be
676 * reprogrammed on resume
677 */
678
679 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
680 &iommu->stored_addr_lo);
681 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
682 &iommu->stored_addr_hi);
683
684 /* Low bit locks writes to configuration space */
685 iommu->stored_addr_lo &= ~1;
686
687 for (i = 0; i < 6; i++)
688 for (j = 0; j < 0x12; j++)
689 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
690
691 for (i = 0; i < 0x83; i++)
692 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
642} 693}
643 694
644/* 695/*
@@ -1127,14 +1178,53 @@ static void iommu_init_flags(struct amd_iommu *iommu)
1127 iommu_feature_enable(iommu, CONTROL_COHERENT_EN); 1178 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
1128} 1179}
1129 1180
1130static void iommu_apply_quirks(struct amd_iommu *iommu) 1181static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
1131{ 1182{
1132 if (is_rd890_iommu(iommu->dev)) { 1183 int i, j;
1133 pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); 1184 u32 ioc_feature_control;
1134 pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); 1185 struct pci_dev *pdev = NULL;
1135 pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); 1186
1136 pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); 1187 /* RD890 BIOSes may not have completely reconfigured the iommu */
1137 } 1188 if (!is_rd890_iommu(iommu->dev))
1189 return;
1190
1191 /*
1192 * First, we need to ensure that the iommu is enabled. This is
1193 * controlled by a register in the northbridge
1194 */
1195 pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0));
1196
1197 if (!pdev)
1198 return;
1199
1200 /* Select Northbridge indirect register 0x75 and enable writing */
1201 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
1202 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
1203
1204 /* Enable the iommu */
1205 if (!(ioc_feature_control & 0x1))
1206 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
1207
1208 pci_dev_put(pdev);
1209
1210 /* Restore the iommu BAR */
1211 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1212 iommu->stored_addr_lo);
1213 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
1214 iommu->stored_addr_hi);
1215
1216 /* Restore the l1 indirect regs for each of the 6 l1s */
1217 for (i = 0; i < 6; i++)
1218 for (j = 0; j < 0x12; j++)
1219 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
1220
1221 /* Restore the l2 indirect regs */
1222 for (i = 0; i < 0x83; i++)
1223 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
1224
1225 /* Lock PCI setup registers */
1226 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
1227 iommu->stored_addr_lo | 1);
1138} 1228}
1139 1229
1140/* 1230/*
@@ -1147,7 +1237,6 @@ static void enable_iommus(void)
1147 1237
1148 for_each_iommu(iommu) { 1238 for_each_iommu(iommu) {
1149 iommu_disable(iommu); 1239 iommu_disable(iommu);
1150 iommu_apply_quirks(iommu);
1151 iommu_init_flags(iommu); 1240 iommu_init_flags(iommu);
1152 iommu_set_device_table(iommu); 1241 iommu_set_device_table(iommu);
1153 iommu_enable_command_buffer(iommu); 1242 iommu_enable_command_buffer(iommu);
@@ -1173,6 +1262,11 @@ static void disable_iommus(void)
1173 1262
1174static int amd_iommu_resume(struct sys_device *dev) 1263static int amd_iommu_resume(struct sys_device *dev)
1175{ 1264{
1265 struct amd_iommu *iommu;
1266
1267 for_each_iommu(iommu)
1268 iommu_apply_resume_quirks(iommu);
1269
1176 /* re-load the hardware */ 1270 /* re-load the hardware */
1177 enable_iommus(); 1271 enable_iommus();
1178 1272
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/amd_nb.c
index 0f7bc20cfcde..8f6463d8ed0d 100644
--- a/arch/x86/kernel/k8.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -8,21 +8,19 @@
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/spinlock.h> 10#include <linux/spinlock.h>
11#include <asm/k8.h> 11#include <asm/amd_nb.h>
12
13int num_k8_northbridges;
14EXPORT_SYMBOL(num_k8_northbridges);
15 12
16static u32 *flush_words; 13static u32 *flush_words;
17 14
18struct pci_device_id k8_nb_ids[] = { 15struct pci_device_id k8_nb_ids[] = {
19 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, 16 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 17 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
18 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
21 {} 19 {}
22}; 20};
23EXPORT_SYMBOL(k8_nb_ids); 21EXPORT_SYMBOL(k8_nb_ids);
24 22
25struct pci_dev **k8_northbridges; 23struct k8_northbridge_info k8_northbridges;
26EXPORT_SYMBOL(k8_northbridges); 24EXPORT_SYMBOL(k8_northbridges);
27 25
28static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) 26static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
@@ -40,36 +38,45 @@ int cache_k8_northbridges(void)
40 int i; 38 int i;
41 struct pci_dev *dev; 39 struct pci_dev *dev;
42 40
43 if (num_k8_northbridges) 41 if (k8_northbridges.num)
44 return 0; 42 return 0;
45 43
46 dev = NULL; 44 dev = NULL;
47 while ((dev = next_k8_northbridge(dev)) != NULL) 45 while ((dev = next_k8_northbridge(dev)) != NULL)
48 num_k8_northbridges++; 46 k8_northbridges.num++;
47
48 /* some CPU families (e.g. family 0x11) do not support GART */
49 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
50 boot_cpu_data.x86 == 0x15)
51 k8_northbridges.gart_supported = 1;
49 52
50 k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), 53 k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
51 GFP_KERNEL); 54 sizeof(void *), GFP_KERNEL);
52 if (!k8_northbridges) 55 if (!k8_northbridges.nb_misc)
53 return -ENOMEM; 56 return -ENOMEM;
54 57
55 if (!num_k8_northbridges) { 58 if (!k8_northbridges.num) {
56 k8_northbridges[0] = NULL; 59 k8_northbridges.nb_misc[0] = NULL;
57 return 0; 60 return 0;
58 } 61 }
59 62
60 flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); 63 if (k8_northbridges.gart_supported) {
61 if (!flush_words) { 64 flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
62 kfree(k8_northbridges); 65 GFP_KERNEL);
63 return -ENOMEM; 66 if (!flush_words) {
67 kfree(k8_northbridges.nb_misc);
68 return -ENOMEM;
69 }
64 } 70 }
65 71
66 dev = NULL; 72 dev = NULL;
67 i = 0; 73 i = 0;
68 while ((dev = next_k8_northbridge(dev)) != NULL) { 74 while ((dev = next_k8_northbridge(dev)) != NULL) {
69 k8_northbridges[i] = dev; 75 k8_northbridges.nb_misc[i] = dev;
70 pci_read_config_dword(dev, 0x9c, &flush_words[i++]); 76 if (k8_northbridges.gart_supported)
77 pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
71 } 78 }
72 k8_northbridges[i] = NULL; 79 k8_northbridges.nb_misc[i] = NULL;
73 return 0; 80 return 0;
74} 81}
75EXPORT_SYMBOL_GPL(cache_k8_northbridges); 82EXPORT_SYMBOL_GPL(cache_k8_northbridges);
@@ -93,22 +100,25 @@ void k8_flush_garts(void)
93 unsigned long flags; 100 unsigned long flags;
94 static DEFINE_SPINLOCK(gart_lock); 101 static DEFINE_SPINLOCK(gart_lock);
95 102
103 if (!k8_northbridges.gart_supported)
104 return;
105
96 /* Avoid races between AGP and IOMMU. In theory it's not needed 106 /* Avoid races between AGP and IOMMU. In theory it's not needed
97 but I'm not sure if the hardware won't lose flush requests 107 but I'm not sure if the hardware won't lose flush requests
98 when another is pending. This whole thing is so expensive anyways 108 when another is pending. This whole thing is so expensive anyways
99 that it doesn't matter to serialize more. -AK */ 109 that it doesn't matter to serialize more. -AK */
100 spin_lock_irqsave(&gart_lock, flags); 110 spin_lock_irqsave(&gart_lock, flags);
101 flushed = 0; 111 flushed = 0;
102 for (i = 0; i < num_k8_northbridges; i++) { 112 for (i = 0; i < k8_northbridges.num; i++) {
103 pci_write_config_dword(k8_northbridges[i], 0x9c, 113 pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
104 flush_words[i]|1); 114 flush_words[i]|1);
105 flushed++; 115 flushed++;
106 } 116 }
107 for (i = 0; i < num_k8_northbridges; i++) { 117 for (i = 0; i < k8_northbridges.num; i++) {
108 u32 w; 118 u32 w;
109 /* Make sure the hardware actually executed the flush*/ 119 /* Make sure the hardware actually executed the flush*/
110 for (;;) { 120 for (;;) {
111 pci_read_config_dword(k8_northbridges[i], 121 pci_read_config_dword(k8_northbridges.nb_misc[i],
112 0x9c, &w); 122 0x9c, &w);
113 if (!(w & 1)) 123 if (!(w & 1))
114 break; 124 break;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 8dd77800ff5d..6fe2b5cb4f3c 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -343,7 +343,7 @@ void apbt_setup_secondary_clock(void)
343 343
344 /* Don't register boot CPU clockevent */ 344 /* Don't register boot CPU clockevent */
345 cpu = smp_processor_id(); 345 cpu = smp_processor_id();
346 if (cpu == boot_cpu_id) 346 if (!cpu)
347 return; 347 return;
348 /* 348 /*
349 * We need to calculate the scaled math multiplication factor for 349 * We need to calculate the scaled math multiplication factor for
@@ -398,7 +398,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,
398 } 398 }
399 break; 399 break;
400 default: 400 default:
401 pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); 401 pr_debug("APBT notified %lu, no action\n", action);
402 } 402 }
403 return NOTIFY_OK; 403 return NOTIFY_OK;
404} 404}
@@ -552,7 +552,7 @@ bad_count:
552 pr_debug("APB CS going back %lx:%lx:%lx ", 552 pr_debug("APB CS going back %lx:%lx:%lx ",
553 t2, last_read, t2 - last_read); 553 t2, last_read, t2 - last_read);
554bad_count_x3: 554bad_count_x3:
555 pr_debug(KERN_INFO "tripple check enforced\n"); 555 pr_debug("triple check enforced\n");
556 t0 = apbt_readl(phy_cs_timer_id, 556 t0 = apbt_readl(phy_cs_timer_id,
557 APBTMR_N_CURRENT_VALUE); 557 APBTMR_N_CURRENT_VALUE);
558 udelay(1); 558 udelay(1);
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index a2e0caf26e17..377f5db3b8b4 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -27,7 +27,7 @@
27#include <asm/gart.h> 27#include <asm/gart.h>
28#include <asm/pci-direct.h> 28#include <asm/pci-direct.h>
29#include <asm/dma.h> 29#include <asm/dma.h>
30#include <asm/k8.h> 30#include <asm/amd_nb.h>
31#include <asm/x86_init.h> 31#include <asm/x86_init.h>
32 32
33int gart_iommu_aperture; 33int gart_iommu_aperture;
@@ -307,7 +307,7 @@ void __init early_gart_iommu_check(void)
307 continue; 307 continue;
308 308
309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 309 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
310 aper_enabled = ctl & AMD64_GARTEN; 310 aper_enabled = ctl & GARTEN;
311 aper_order = (ctl >> 1) & 7; 311 aper_order = (ctl >> 1) & 7;
312 aper_size = (32 * 1024 * 1024) << aper_order; 312 aper_size = (32 * 1024 * 1024) << aper_order;
313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; 313 aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
@@ -362,7 +362,7 @@ void __init early_gart_iommu_check(void)
362 continue; 362 continue;
363 363
364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL); 364 ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
365 ctl &= ~AMD64_GARTEN; 365 ctl &= ~GARTEN;
366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); 366 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
367 } 367 }
368 } 368 }
@@ -505,8 +505,13 @@ out:
505 505
506 /* Fix up the north bridges */ 506 /* Fix up the north bridges */
507 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { 507 for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
508 int bus; 508 int bus, dev_base, dev_limit;
509 int dev_base, dev_limit; 509
510 /*
511 * Don't enable translation yet but enable GART IO and CPU
512 * accesses and set DISTLBWALKPRB since GART table memory is UC.
513 */
514 u32 ctl = DISTLBWALKPRB | aper_order << 1;
510 515
511 bus = bus_dev_ranges[i].bus; 516 bus = bus_dev_ranges[i].bus;
512 dev_base = bus_dev_ranges[i].dev_base; 517 dev_base = bus_dev_ranges[i].dev_base;
@@ -515,10 +520,7 @@ out:
515 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00))) 520 if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
516 continue; 521 continue;
517 522
518 /* Don't enable translation yet. That is done later. 523 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
519 Assume this BIOS didn't initialise the GART so
520 just overwrite all previous bits */
521 write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
522 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25); 524 write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
523 } 525 }
524 } 526 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c5b8f3dddb5..f1e78940d908 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -162,7 +162,7 @@ int __init arch_early_irq_init(void)
162 162
163 cfg = irq_cfgx; 163 cfg = irq_cfgx;
164 count = ARRAY_SIZE(irq_cfgx); 164 count = ARRAY_SIZE(irq_cfgx);
165 node= cpu_to_node(boot_cpu_id); 165 node = cpu_to_node(0);
166 166
167 for (i = 0; i < count; i++) { 167 for (i = 0; i < count; i++) {
168 desc = irq_to_desc(i); 168 desc = irq_to_desc(i);
@@ -1488,7 +1488,7 @@ static void __init setup_IO_APIC_irqs(void)
1488 int notcon = 0; 1488 int notcon = 0;
1489 struct irq_desc *desc; 1489 struct irq_desc *desc;
1490 struct irq_cfg *cfg; 1490 struct irq_cfg *cfg;
1491 int node = cpu_to_node(boot_cpu_id); 1491 int node = cpu_to_node(0);
1492 1492
1493 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1493 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1494 1494
@@ -1553,7 +1553,7 @@ static void __init setup_IO_APIC_irqs(void)
1553void setup_IO_APIC_irq_extra(u32 gsi) 1553void setup_IO_APIC_irq_extra(u32 gsi)
1554{ 1554{
1555 int apic_id = 0, pin, idx, irq; 1555 int apic_id = 0, pin, idx, irq;
1556 int node = cpu_to_node(boot_cpu_id); 1556 int node = cpu_to_node(0);
1557 struct irq_desc *desc; 1557 struct irq_desc *desc;
1558 struct irq_cfg *cfg; 1558 struct irq_cfg *cfg;
1559 1559
@@ -2932,7 +2932,7 @@ static inline void __init check_timer(void)
2932{ 2932{
2933 struct irq_desc *desc = irq_to_desc(0); 2933 struct irq_desc *desc = irq_to_desc(0);
2934 struct irq_cfg *cfg = desc->chip_data; 2934 struct irq_cfg *cfg = desc->chip_data;
2935 int node = cpu_to_node(boot_cpu_id); 2935 int node = cpu_to_node(0);
2936 int apic1, pin1, apic2, pin2; 2936 int apic1, pin1, apic2, pin2;
2937 unsigned long flags; 2937 unsigned long flags;
2938 int no_pin1 = 0; 2938 int no_pin1 = 0;
@@ -3286,7 +3286,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3286 3286
3287int create_irq(void) 3287int create_irq(void)
3288{ 3288{
3289 int node = cpu_to_node(boot_cpu_id); 3289 int node = cpu_to_node(0);
3290 unsigned int irq_want; 3290 unsigned int irq_want;
3291 int irq; 3291 int irq;
3292 3292
@@ -3908,7 +3908,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3908 if (dev) 3908 if (dev)
3909 node = dev_to_node(dev); 3909 node = dev_to_node(dev);
3910 else 3910 else
3911 node = cpu_to_node(boot_cpu_id); 3911 node = cpu_to_node(0);
3912 3912
3913 desc = irq_to_desc_alloc_node(irq, node); 3913 desc = irq_to_desc_alloc_node(irq, node);
3914 if (!desc) { 3914 if (!desc) {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index ba5f62f45f01..9e093f8fe78c 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -148,7 +148,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
148{ 148{
149#ifdef CONFIG_SMP 149#ifdef CONFIG_SMP
150 /* calling is from identify_secondary_cpu() ? */ 150 /* calling is from identify_secondary_cpu() ? */
151 if (c->cpu_index == boot_cpu_id) 151 if (!c->cpu_index)
152 return; 152 return;
153 153
154 /* 154 /*
@@ -253,37 +253,51 @@ static int __cpuinit nearby_node(int apicid)
253#endif 253#endif
254 254
255/* 255/*
256 * Fixup core topology information for AMD multi-node processors. 256 * Fixup core topology information for
257 * Assumption: Number of cores in each internal node is the same. 257 * (1) AMD multi-node processors
258 * Assumption: Number of cores in each internal node is the same.
259 * (2) AMD processors supporting compute units
258 */ 260 */
259#ifdef CONFIG_X86_HT 261#ifdef CONFIG_X86_HT
260static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) 262static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
261{ 263{
262 unsigned long long value; 264 u32 nodes;
263 u32 nodes, cores_per_node; 265 u8 node_id;
264 int cpu = smp_processor_id(); 266 int cpu = smp_processor_id();
265 267
266 if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) 268 /* get information required for multi-node processors */
267 return; 269 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
270 u32 eax, ebx, ecx, edx;
268 271
269 /* fixup topology information only once for a core */ 272 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
270 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 273 nodes = ((ecx >> 8) & 7) + 1;
271 return; 274 node_id = ecx & 7;
272 275
273 rdmsrl(MSR_FAM10H_NODE_ID, value); 276 /* get compute unit information */
277 smp_num_siblings = ((ebx >> 8) & 3) + 1;
278 c->compute_unit_id = ebx & 0xff;
279 } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
280 u64 value;
274 281
275 nodes = ((value >> 3) & 7) + 1; 282 rdmsrl(MSR_FAM10H_NODE_ID, value);
276 if (nodes == 1) 283 nodes = ((value >> 3) & 7) + 1;
284 node_id = value & 7;
285 } else
277 return; 286 return;
278 287
279 set_cpu_cap(c, X86_FEATURE_AMD_DCM); 288 /* fixup multi-node processor information */
280 cores_per_node = c->x86_max_cores / nodes; 289 if (nodes > 1) {
290 u32 cores_per_node;
291
292 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
293 cores_per_node = c->x86_max_cores / nodes;
281 294
282 /* store NodeID, use llc_shared_map to store sibling info */ 295 /* store NodeID, use llc_shared_map to store sibling info */
283 per_cpu(cpu_llc_id, cpu) = value & 7; 296 per_cpu(cpu_llc_id, cpu) = node_id;
284 297
285 /* fixup core id to be in range from 0 to (cores_per_node - 1) */ 298 /* core id to be in range from 0 to (cores_per_node - 1) */
286 c->cpu_core_id = c->cpu_core_id % cores_per_node; 299 c->cpu_core_id = c->cpu_core_id % cores_per_node;
300 }
287} 301}
288#endif 302#endif
289 303
@@ -304,9 +318,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
304 c->phys_proc_id = c->initial_apicid >> bits; 318 c->phys_proc_id = c->initial_apicid >> bits;
305 /* use socket ID also for last level cache */ 319 /* use socket ID also for last level cache */
306 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 320 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
307 /* fixup topology information on multi-node processors */ 321 amd_get_topology(c);
308 if ((c->x86 == 0x10) && (c->x86_model == 9))
309 amd_fixup_dcm(c);
310#endif 322#endif
311} 323}
312 324
@@ -412,6 +424,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
412 set_cpu_cap(c, X86_FEATURE_EXTD_APICID); 424 set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
413 } 425 }
414#endif 426#endif
427
428 /* We need to do the following only once */
429 if (c != &boot_cpu_data)
430 return;
431
432 if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
433
434 if (c->x86 > 0x10 ||
435 (c->x86 == 0x10 && c->x86_model >= 0x2)) {
436 u64 val;
437
438 rdmsrl(MSR_K7_HWCR, val);
439 if (!(val & BIT(24)))
440 printk(KERN_WARNING FW_BUG "TSC doesn't count "
441 "with P0 frequency!\n");
442 }
443 }
415} 444}
416 445
417static void __cpuinit init_amd(struct cpuinfo_x86 *c) 446static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -523,7 +552,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
523#endif 552#endif
524 553
525 if (c->extended_cpuid_level >= 0x80000006) { 554 if (c->extended_cpuid_level >= 0x80000006) {
526 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) 555 if (cpuid_edx(0x80000006) & 0xf000)
527 num_cache_leaves = 4; 556 num_cache_leaves = 4;
528 else 557 else
529 num_cache_leaves = 3; 558 num_cache_leaves = 3;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f2f9ac7da25c..4b68bda30938 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -665,7 +665,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
665 this_cpu->c_early_init(c); 665 this_cpu->c_early_init(c);
666 666
667#ifdef CONFIG_SMP 667#ifdef CONFIG_SMP
668 c->cpu_index = boot_cpu_id; 668 c->cpu_index = 0;
669#endif 669#endif
670 filter_cpuid_features(c, false); 670 filter_cpuid_features(c, false);
671} 671}
@@ -704,16 +704,21 @@ void __init early_cpu_init(void)
704} 704}
705 705
706/* 706/*
707 * The NOPL instruction is supposed to exist on all CPUs with 707 * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
708 * family >= 6; unfortunately, that's not true in practice because 708 * unfortunately, that's not true in practice because of early VIA
709 * of early VIA chips and (more importantly) broken virtualizers that 709 * chips and (more importantly) broken virtualizers that are not easy
710 * are not easy to detect. In the latter case it doesn't even *fail* 710 * to detect. In the latter case it doesn't even *fail* reliably, so
711 * reliably, so probing for it doesn't even work. Disable it completely 711 * probing for it doesn't even work. Disable it completely on 32-bit
712 * unless we can find a reliable way to detect all the broken cases. 712 * unless we can find a reliable way to detect all the broken cases.
713 * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
713 */ 714 */
714static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) 715static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
715{ 716{
717#ifdef CONFIG_X86_32
716 clear_cpu_cap(c, X86_FEATURE_NOPL); 718 clear_cpu_cap(c, X86_FEATURE_NOPL);
719#else
720 set_cpu_cap(c, X86_FEATURE_NOPL);
721#endif
717} 722}
718 723
719static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 724static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1264,13 +1269,6 @@ void __cpuinit cpu_init(void)
1264 clear_all_debug_regs(); 1269 clear_all_debug_regs();
1265 dbg_restore_debug_regs(); 1270 dbg_restore_debug_regs();
1266 1271
1267 /*
1268 * Force FPU initialization:
1269 */
1270 current_thread_info()->status = 0;
1271 clear_used_math();
1272 mxcsr_feature_mask_init();
1273
1274 fpu_init(); 1272 fpu_init();
1275 xsave_init(); 1273 xsave_init();
1276} 1274}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index f668bb1f7d43..e765633f210e 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -32,6 +32,7 @@ struct cpu_dev {
32extern const struct cpu_dev *const __x86_cpu_dev_start[], 32extern const struct cpu_dev *const __x86_cpu_dev_start[],
33 *const __x86_cpu_dev_end[]; 33 *const __x86_cpu_dev_end[];
34 34
35extern void get_cpu_cap(struct cpuinfo_x86 *c);
35extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); 36extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
36extern void get_cpu_cap(struct cpuinfo_x86 *c); 37extern void get_cpu_cap(struct cpuinfo_x86 *c);
37 38
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b4389441efbb..695f17731e23 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -170,7 +170,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
170{ 170{
171#ifdef CONFIG_SMP 171#ifdef CONFIG_SMP
172 /* calling is from identify_secondary_cpu() ? */ 172 /* calling is from identify_secondary_cpu() ? */
173 if (c->cpu_index == boot_cpu_id) 173 if (!c->cpu_index)
174 return; 174 return;
175 175
176 /* 176 /*
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 898c2f4eab88..12cd823c8d03 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -17,7 +17,7 @@
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/amd_nb.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22 22
23#define LVL_1_INST 1 23#define LVL_1_INST 1
@@ -306,7 +306,7 @@ struct _cache_attr {
306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
307}; 307};
308 308
309#ifdef CONFIG_CPU_SUP_AMD 309#ifdef CONFIG_AMD_NB
310 310
311/* 311/*
312 * L3 cache descriptors 312 * L3 cache descriptors
@@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
369 return; 369 return;
370 370
371 /* not in virtualized environments */ 371 /* not in virtualized environments */
372 if (num_k8_northbridges == 0) 372 if (k8_northbridges.num == 0)
373 return; 373 return;
374 374
375 /* 375 /*
@@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
377 * never freed but this is done only on shutdown so it doesn't matter. 377 * never freed but this is done only on shutdown so it doesn't matter.
378 */ 378 */
379 if (!l3_caches) { 379 if (!l3_caches) {
380 int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); 380 int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
381 381
382 l3_caches = kzalloc(size, GFP_ATOMIC); 382 l3_caches = kzalloc(size, GFP_ATOMIC);
383 if (!l3_caches) 383 if (!l3_caches)
@@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 556static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
557 show_cache_disable_1, store_cache_disable_1); 557 show_cache_disable_1, store_cache_disable_1);
558 558
559#else /* CONFIG_CPU_SUP_AMD */ 559#else /* CONFIG_AMD_NB */
560static void __cpuinit 560static void __cpuinit
561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) 561amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
562{ 562{
563}; 563};
564#endif /* CONFIG_CPU_SUP_AMD */ 564#endif /* CONFIG_AMD_NB */
565 565
566static int 566static int
567__cpuinit cpuid4_cache_lookup_regs(int index, 567__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = {
1000 1000
1001static struct attribute *default_l3_attrs[] = { 1001static struct attribute *default_l3_attrs[] = {
1002 DEFAULT_SYSFS_CACHE_ATTRS, 1002 DEFAULT_SYSFS_CACHE_ATTRS,
1003#ifdef CONFIG_CPU_SUP_AMD 1003#ifdef CONFIG_AMD_NB
1004 &cache_disable_0.attr, 1004 &cache_disable_0.attr,
1005 &cache_disable_1.attr, 1005 &cache_disable_1.attr,
1006#endif 1006#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5e975298fa81..39aaee5c1ab2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
141 address = (low & MASK_BLKPTR_LO) >> 21; 141 address = (low & MASK_BLKPTR_LO) >> 21;
142 if (!address) 142 if (!address)
143 break; 143 break;
144
144 address += MCG_XBLK_ADDR; 145 address += MCG_XBLK_ADDR;
145 } else 146 } else
146 ++address; 147 ++address;
@@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
148 if (rdmsr_safe(address, &low, &high)) 149 if (rdmsr_safe(address, &low, &high))
149 break; 150 break;
150 151
151 if (!(high & MASK_VALID_HI)) { 152 if (!(high & MASK_VALID_HI))
152 if (block) 153 continue;
153 continue;
154 else
155 break;
156 }
157 154
158 if (!(high & MASK_CNTP_HI) || 155 if (!(high & MASK_CNTP_HI) ||
159 (high & MASK_LOCKED_HI)) 156 (high & MASK_LOCKED_HI))
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d9368eeda309..4b683267eca5 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -216,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
216 err = sysfs_add_file_to_group(&sys_dev->kobj, 216 err = sysfs_add_file_to_group(&sys_dev->kobj,
217 &attr_core_power_limit_count.attr, 217 &attr_core_power_limit_count.attr,
218 thermal_attr_group.name); 218 thermal_attr_group.name);
219 if (cpu_has(c, X86_FEATURE_PTS)) 219 if (cpu_has(c, X86_FEATURE_PTS)) {
220 err = sysfs_add_file_to_group(&sys_dev->kobj, 220 err = sysfs_add_file_to_group(&sys_dev->kobj,
221 &attr_package_throttle_count.attr, 221 &attr_package_throttle_count.attr,
222 thermal_attr_group.name); 222 thermal_attr_group.name);
@@ -224,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
224 err = sysfs_add_file_to_group(&sys_dev->kobj, 224 err = sysfs_add_file_to_group(&sys_dev->kobj,
225 &attr_package_power_limit_count.attr, 225 &attr_package_power_limit_count.attr,
226 thermal_attr_group.name); 226 thermal_attr_group.name);
227 }
227 228
228 return err; 229 return err;
229} 230}
@@ -349,7 +350,7 @@ static void intel_thermal_interrupt(void)
349 350
350static void unexpected_thermal_interrupt(void) 351static void unexpected_thermal_interrupt(void)
351{ 352{
352 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 353 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
353 smp_processor_id()); 354 smp_processor_id());
354 add_taint(TAINT_MACHINE_CHECK); 355 add_taint(TAINT_MACHINE_CHECK);
355} 356}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index c5f59d071425..ac140c7be396 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void)
827 827
828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 828 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
829 return 0; 829 return 0;
830 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 830 if (boot_cpu_data.x86 < 0xf)
831 return 0; 831 return 0;
832 /* In case some hypervisor doesn't pass SYSCFG through: */ 832 /* In case some hypervisor doesn't pass SYSCFG through: */
833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 833 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..fe73c1844a9a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 531/*
532 * Setup the hardware configuration for a given attr_type 532 * Setup the hardware configuration for a given attr_type
533 */ 533 */
534static int __hw_perf_event_init(struct perf_event *event) 534static int __x86_pmu_event_init(struct perf_event *event)
535{ 535{
536 int err; 536 int err;
537 537
@@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void)
584 } 584 }
585} 585}
586 586
587void hw_perf_disable(void) 587static void x86_pmu_disable(struct pmu *pmu)
588{ 588{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 590
@@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added)
619 } 619 }
620} 620}
621 621
622static const struct pmu pmu; 622static struct pmu pmu;
623 623
624static inline int is_x86_event(struct perf_event *event) 624static inline int is_x86_event(struct perf_event *event)
625{ 625{
@@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 801 hwc->last_tag == cpuc->tags[i];
802} 802}
803 803
804static int x86_pmu_start(struct perf_event *event); 804static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 805static void x86_pmu_stop(struct perf_event *event, int flags);
806 806
807void hw_perf_enable(void) 807static void x86_pmu_enable(struct pmu *pmu)
808{ 808{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 810 struct perf_event *event;
@@ -840,7 +840,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 840 match_prev_assignment(hwc, cpuc, i))
841 continue; 841 continue;
842 842
843 x86_pmu_stop(event); 843 /*
844 * Ensure we don't accidentally enable a stopped
845 * counter simply because we rescheduled.
846 */
847 if (hwc->state & PERF_HES_STOPPED)
848 hwc->state |= PERF_HES_ARCH;
849
850 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 851 }
845 852
846 for (i = 0; i < cpuc->n_events; i++) { 853 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +859,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 859 else if (i < n_running)
853 continue; 860 continue;
854 861
855 x86_pmu_start(event); 862 if (hwc->state & PERF_HES_ARCH)
863 continue;
864
865 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 866 }
857 cpuc->n_added = 0; 867 cpuc->n_added = 0;
858 perf_events_lapic_init(); 868 perf_events_lapic_init();
@@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event)
953} 963}
954 964
955/* 965/*
956 * activate a single event 966 * Add a single event to the PMU.
957 * 967 *
958 * The event is added to the group of enabled events 968 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 969 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 970 */
964static int x86_pmu_enable(struct perf_event *event) 971static int x86_pmu_add(struct perf_event *event, int flags)
965{ 972{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 973 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 974 struct hw_perf_event *hwc;
@@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 977
971 hwc = &event->hw; 978 hwc = &event->hw;
972 979
980 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 981 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 982 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 983 if (ret < 0)
976 return n; 984 goto out;
985
986 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
987 if (!(flags & PERF_EF_START))
988 hwc->state |= PERF_HES_ARCH;
977 989
978 /* 990 /*
979 * If group events scheduling transaction was started, 991 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 992 * skip the schedulability test here, it will be peformed
981 * at commit time(->commit_txn) as a whole 993 * at commit time (->commit_txn) as a whole
982 */ 994 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 995 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 996 goto done_collect;
985 997
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 998 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 999 if (ret)
988 return ret; 1000 goto out;
989 /* 1001 /*
990 * copy new assignment, now we know it is possible 1002 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1003 * will be used by hw_perf_enable()
992 */ 1004 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1005 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1006
995out: 1007done_collect:
996 cpuc->n_events = n; 1008 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1009 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1010 cpuc->n_txn += n - n0;
999 1011
1000 return 0; 1012 ret = 0;
1013out:
1014 perf_pmu_enable(event->pmu);
1015 return ret;
1001} 1016}
1002 1017
1003static int x86_pmu_start(struct perf_event *event) 1018static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1019{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1020 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1021 int idx = event->hw.idx;
1007 1022
1008 if (idx == -1) 1023 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1024 return;
1025
1026 if (WARN_ON_ONCE(idx == -1))
1027 return;
1028
1029 if (flags & PERF_EF_RELOAD) {
1030 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1031 x86_perf_event_set_period(event);
1032 }
1033
1034 event->hw.state = 0;
1010 1035
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1036 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1037 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1038 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1039 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1040 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1041}
1026 1042
1027void perf_event_print_debug(void) 1043void perf_event_print_debug(void)
@@ -1078,27 +1094,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1094 local_irq_restore(flags);
1079} 1095}
1080 1096
1081static void x86_pmu_stop(struct perf_event *event) 1097static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1098{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1099 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1100 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086 1101
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask)) 1102 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1088 return; 1103 x86_pmu.disable(event);
1089 1104 cpuc->events[hwc->idx] = NULL;
1090 x86_pmu.disable(event); 1105 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1091 1106 hwc->state |= PERF_HES_STOPPED;
1092 /* 1107 }
1093 * Drain the remaining delta count out of a event
1094 * that we are disabling:
1095 */
1096 x86_perf_event_update(event);
1097 1108
1098 cpuc->events[idx] = NULL; 1109 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1110 /*
1111 * Drain the remaining delta count out of a event
1112 * that we are disabling:
1113 */
1114 x86_perf_event_update(event);
1115 hwc->state |= PERF_HES_UPTODATE;
1116 }
1099} 1117}
1100 1118
1101static void x86_pmu_disable(struct perf_event *event) 1119static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1120{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1121 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1122 int i;
@@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1129 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1130 return;
1113 1131
1114 x86_pmu_stop(event); 1132 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1133
1116 for (i = 0; i < cpuc->n_events; i++) { 1134 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1135 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1152 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1153 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1154 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1155 int idx, handled = 0;
1139 u64 val; 1156 u64 val;
1140 1157
@@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1172 }
1156 1173
1157 event = cpuc->events[idx]; 1174 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1175
1160 val = x86_perf_event_update(event); 1176 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1177 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1187 continue;
1172 1188
1173 if (perf_event_overflow(event, 1, &data, regs)) 1189 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1190 x86_pmu_stop(event, 0);
1175 } 1191 }
1176 1192
1177 if (handled) 1193 if (handled)
@@ -1180,25 +1196,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1180 return handled; 1196 return handled;
1181} 1197}
1182 1198
1183void smp_perf_pending_interrupt(struct pt_regs *regs)
1184{
1185 irq_enter();
1186 ack_APIC_irq();
1187 inc_irq_stat(apic_pending_irqs);
1188 perf_event_do_pending();
1189 irq_exit();
1190}
1191
1192void set_perf_event_pending(void)
1193{
1194#ifdef CONFIG_X86_LOCAL_APIC
1195 if (!x86_pmu.apic || !x86_pmu_initialized())
1196 return;
1197
1198 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1199#endif
1200}
1201
1202void perf_events_lapic_init(void) 1199void perf_events_lapic_init(void)
1203{ 1200{
1204 if (!x86_pmu.apic || !x86_pmu_initialized()) 1201 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1388,7 +1385,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1385 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1386 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1387 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1388
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1389 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1390 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,6 +1420,7 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1420 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1421 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1422
1423 perf_pmu_register(&pmu);
1427 perf_cpu_notifier(x86_pmu_notifier); 1424 perf_cpu_notifier(x86_pmu_notifier);
1428} 1425}
1429 1426
@@ -1437,10 +1434,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1434 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1435 * schedulability test, it will be performed at commit time
1439 */ 1436 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1437static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1438{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1439 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1443 1440
1441 perf_pmu_disable(pmu);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1442 cpuc->group_flag |= PERF_EVENT_TXN;
1445 cpuc->n_txn = 0; 1443 cpuc->n_txn = 0;
1446} 1444}
@@ -1450,7 +1448,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1448 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1449 * schedulability test.
1452 */ 1450 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1451static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1452{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1453 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1456 1454
@@ -1460,6 +1458,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1460 */ 1458 */
1461 cpuc->n_added -= cpuc->n_txn; 1459 cpuc->n_added -= cpuc->n_txn;
1462 cpuc->n_events -= cpuc->n_txn; 1460 cpuc->n_events -= cpuc->n_txn;
1461 perf_pmu_enable(pmu);
1463} 1462}
1464 1463
1465/* 1464/*
@@ -1467,7 +1466,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1466 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1467 * Return 0 if success
1469 */ 1468 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1469static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1470{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1471 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1472 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1488,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1488 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1489
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1490 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1491 perf_pmu_enable(pmu);
1493 return 0; 1492 return 0;
1494} 1493}
1495 1494
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1495/*
1509 * validate that we can schedule this event 1496 * validate that we can schedule this event
1510 */ 1497 */
@@ -1579,12 +1566,22 @@ out:
1579 return ret; 1566 return ret;
1580} 1567}
1581 1568
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1569int x86_pmu_event_init(struct perf_event *event)
1583{ 1570{
1584 const struct pmu *tmp; 1571 struct pmu *tmp;
1585 int err; 1572 int err;
1586 1573
1587 err = __hw_perf_event_init(event); 1574 switch (event->attr.type) {
1575 case PERF_TYPE_RAW:
1576 case PERF_TYPE_HARDWARE:
1577 case PERF_TYPE_HW_CACHE:
1578 break;
1579
1580 default:
1581 return -ENOENT;
1582 }
1583
1584 err = __x86_pmu_event_init(event);
1588 if (!err) { 1585 if (!err) {
1589 /* 1586 /*
1590 * we temporarily connect event to its pmu 1587 * we temporarily connect event to its pmu
@@ -1604,26 +1601,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1601 if (err) {
1605 if (event->destroy) 1602 if (event->destroy)
1606 event->destroy(event); 1603 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1604 }
1609 1605
1610 return &pmu; 1606 return err;
1611} 1607}
1612 1608
1613/* 1609static struct pmu pmu = {
1614 * callchain support 1610 .pmu_enable = x86_pmu_enable,
1615 */ 1611 .pmu_disable = x86_pmu_disable,
1616 1612
1617static inline 1613 .event_init = x86_pmu_event_init,
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1614
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1615 .add = x86_pmu_add,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1616 .del = x86_pmu_del,
1617 .start = x86_pmu_start,
1618 .stop = x86_pmu_stop,
1619 .read = x86_pmu_read,
1626 1620
1621 .start_txn = x86_pmu_start_txn,
1622 .cancel_txn = x86_pmu_cancel_txn,
1623 .commit_txn = x86_pmu_commit_txn,
1624};
1625
1626/*
1627 * callchain support
1628 */
1627 1629
1628static void 1630static void
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1631backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
@@ -1645,7 +1647,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1647{
1646 struct perf_callchain_entry *entry = data; 1648 struct perf_callchain_entry *entry = data;
1647 1649
1648 callchain_store(entry, addr); 1650 perf_callchain_store(entry, addr);
1649} 1651}
1650 1652
1651static const struct stacktrace_ops backtrace_ops = { 1653static const struct stacktrace_ops backtrace_ops = {
@@ -1656,11 +1658,15 @@ static const struct stacktrace_ops backtrace_ops = {
1656 .walk_stack = print_context_stack_bp, 1658 .walk_stack = print_context_stack_bp,
1657}; 1659};
1658 1660
1659static void 1661void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1662perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1663{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1664 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1665 /* TODO: We don't support guest os callchain now */
1666 return;
1667 }
1668
1669 perf_callchain_store(entry, regs->ip);
1664 1670
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1671 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1666} 1672}
@@ -1689,7 +1695,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1695 if (fp < compat_ptr(regs->sp))
1690 break; 1696 break;
1691 1697
1692 callchain_store(entry, frame.return_address); 1698 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1699 fp = compat_ptr(frame.next_frame);
1694 } 1700 }
1695 return 1; 1701 return 1;
@@ -1702,19 +1708,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1708}
1703#endif 1709#endif
1704 1710
1705static void 1711void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1712perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1713{
1708 struct stack_frame frame; 1714 struct stack_frame frame;
1709 const void __user *fp; 1715 const void __user *fp;
1710 1716
1711 if (!user_mode(regs)) 1717 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1718 /* TODO: We don't support guest os callchain now */
1719 return;
1720 }
1713 1721
1714 fp = (void __user *)regs->bp; 1722 fp = (void __user *)regs->bp;
1715 1723
1716 callchain_store(entry, PERF_CONTEXT_USER); 1724 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1725
1719 if (perf_callchain_user32(regs, entry)) 1726 if (perf_callchain_user32(regs, entry))
1720 return; 1727 return;
@@ -1731,52 +1738,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1738 if ((unsigned long)fp < regs->sp)
1732 break; 1739 break;
1733 1740
1734 callchain_store(entry, frame.return_address); 1741 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1742 fp = frame.next_frame;
1736 } 1743 }
1737} 1744}
1738 1745
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1746unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1747{
1782 unsigned long ip; 1748 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index c2897b7b4a3b..46d58448c3af 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -52,7 +52,7 @@ static __initconst const u64 amd_hw_cache_event_ids
52 [ C(DTLB) ] = { 52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = { 53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ 55 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */
56 }, 56 },
57 [ C(OP_WRITE) ] = { 57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0, 58 [ C(RESULT_ACCESS) ] = 0,
@@ -66,7 +66,7 @@ static __initconst const u64 amd_hw_cache_event_ids
66 [ C(ITLB) ] = { 66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = { 67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ 69 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */
70 }, 70 },
71 [ C(OP_WRITE) ] = { 71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1, 72 [ C(RESULT_ACCESS) ] = -1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ee05c90012d2..c8f5c088cad1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
713 struct cpu_hw_events *cpuc; 713 struct cpu_hw_events *cpuc;
714 int bit, loops; 714 int bit, loops;
715 u64 status; 715 u64 status;
716 int handled = 0; 716 int handled;
717 717
718 perf_sample_data_init(&data, 0); 718 perf_sample_data_init(&data, 0);
719 719
720 cpuc = &__get_cpu_var(cpu_hw_events); 720 cpuc = &__get_cpu_var(cpu_hw_events);
721 721
722 intel_pmu_disable_all(); 722 intel_pmu_disable_all();
723 intel_pmu_drain_bts_buffer(); 723 handled = intel_pmu_drain_bts_buffer();
724 status = intel_pmu_get_status(); 724 status = intel_pmu_get_status();
725 if (!status) { 725 if (!status) {
726 intel_pmu_enable_all(0); 726 intel_pmu_enable_all(0);
727 return 0; 727 return handled;
728 } 728 }
729 729
730 loops = 0; 730 loops = 0;
@@ -763,7 +763,7 @@ again:
763 data.period = event->hw.last_period; 763 data.period = event->hw.last_period;
764 764
765 if (perf_event_overflow(event, 1, &data, regs)) 765 if (perf_event_overflow(event, 1, &data, regs))
766 x86_pmu_stop(event); 766 x86_pmu_stop(event, 0);
767 } 767 }
768 768
769 /* 769 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 18018d1311cd..4977f9c400e5 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void)
214 update_debugctlmsr(debugctlmsr); 214 update_debugctlmsr(debugctlmsr);
215} 215}
216 216
217static void intel_pmu_drain_bts_buffer(void) 217static int intel_pmu_drain_bts_buffer(void)
218{ 218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds; 220 struct debug_store *ds = cpuc->ds;
@@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void)
231 struct pt_regs regs; 231 struct pt_regs regs;
232 232
233 if (!event) 233 if (!event)
234 return; 234 return 0;
235 235
236 if (!ds) 236 if (!ds)
237 return; 237 return 0;
238 238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index; 240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241 241
242 if (top <= at) 242 if (top <= at)
243 return; 243 return 0;
244 244
245 ds->bts_index = ds->bts_buffer_base; 245 ds->bts_index = ds->bts_buffer_base;
246 246
@@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void)
256 perf_prepare_sample(&header, &data, event, &regs); 256 perf_prepare_sample(&header, &data, event, &regs);
257 257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return; 259 return 1;
260 260
261 for (; at < top; at++) { 261 for (; at < top; at++) {
262 data.ip = at->from; 262 data.ip = at->from;
@@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void)
270 /* There's new data available. */ 270 /* There's new data available. */
271 event->hw.interrupts++; 271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN; 272 event->pending_kill = POLL_IN;
273 return 1;
273} 274}
274 275
275/* 276/*
@@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 regs.flags &= ~PERF_EFLAGS_EXACT;
492 493
493 if (perf_event_overflow(event, 1, &data, &regs)) 494 if (perf_event_overflow(event, 1, &data, &regs))
494 x86_pmu_stop(event); 495 x86_pmu_stop(event, 0);
495} 496}
496 497
497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 249015173992..81400b93e694 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -18,6 +18,8 @@
18struct p4_event_bind { 18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */ 19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */ 20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned int escr_emask; /* valid ESCR EventMask bits */
22 unsigned int shared; /* event is shared across threads */
21 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ 23 char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22}; 24};
23 25
@@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = {
66 [P4_EVENT_TC_DELIVER_MODE] = { 68 [P4_EVENT_TC_DELIVER_MODE] = {
67 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), 69 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
68 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 70 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
71 .escr_emask =
72 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) |
73 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) |
74 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) |
75 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) |
76 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) |
77 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) |
78 P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID),
79 .shared = 1,
69 .cntr = { {4, 5, -1}, {6, 7, -1} }, 80 .cntr = { {4, 5, -1}, {6, 7, -1} },
70 }, 81 },
71 [P4_EVENT_BPU_FETCH_REQUEST] = { 82 [P4_EVENT_BPU_FETCH_REQUEST] = {
72 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), 83 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
73 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, 84 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
85 .escr_emask =
86 P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS),
74 .cntr = { {0, -1, -1}, {2, -1, -1} }, 87 .cntr = { {0, -1, -1}, {2, -1, -1} },
75 }, 88 },
76 [P4_EVENT_ITLB_REFERENCE] = { 89 [P4_EVENT_ITLB_REFERENCE] = {
77 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), 90 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
78 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, 91 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
92 .escr_emask =
93 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) |
94 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) |
95 P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK),
79 .cntr = { {0, -1, -1}, {2, -1, -1} }, 96 .cntr = { {0, -1, -1}, {2, -1, -1} },
80 }, 97 },
81 [P4_EVENT_MEMORY_CANCEL] = { 98 [P4_EVENT_MEMORY_CANCEL] = {
82 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), 99 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
83 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 100 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
101 .escr_emask =
102 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) |
103 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF),
84 .cntr = { {8, 9, -1}, {10, 11, -1} }, 104 .cntr = { {8, 9, -1}, {10, 11, -1} },
85 }, 105 },
86 [P4_EVENT_MEMORY_COMPLETE] = { 106 [P4_EVENT_MEMORY_COMPLETE] = {
87 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), 107 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
88 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 108 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
109 .escr_emask =
110 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) |
111 P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC),
89 .cntr = { {8, 9, -1}, {10, 11, -1} }, 112 .cntr = { {8, 9, -1}, {10, 11, -1} },
90 }, 113 },
91 [P4_EVENT_LOAD_PORT_REPLAY] = { 114 [P4_EVENT_LOAD_PORT_REPLAY] = {
92 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), 115 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
93 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, 116 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
117 .escr_emask =
118 P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD),
94 .cntr = { {8, 9, -1}, {10, 11, -1} }, 119 .cntr = { {8, 9, -1}, {10, 11, -1} },
95 }, 120 },
96 [P4_EVENT_STORE_PORT_REPLAY] = { 121 [P4_EVENT_STORE_PORT_REPLAY] = {
97 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), 122 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
98 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, 123 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
124 .escr_emask =
125 P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST),
99 .cntr = { {8, 9, -1}, {10, 11, -1} }, 126 .cntr = { {8, 9, -1}, {10, 11, -1} },
100 }, 127 },
101 [P4_EVENT_MOB_LOAD_REPLAY] = { 128 [P4_EVENT_MOB_LOAD_REPLAY] = {
102 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), 129 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
103 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, 130 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
131 .escr_emask =
132 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) |
133 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) |
134 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) |
135 P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR),
104 .cntr = { {0, -1, -1}, {2, -1, -1} }, 136 .cntr = { {0, -1, -1}, {2, -1, -1} },
105 }, 137 },
106 [P4_EVENT_PAGE_WALK_TYPE] = { 138 [P4_EVENT_PAGE_WALK_TYPE] = {
107 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), 139 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
108 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, 140 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
141 .escr_emask =
142 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) |
143 P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS),
144 .shared = 1,
109 .cntr = { {0, -1, -1}, {2, -1, -1} }, 145 .cntr = { {0, -1, -1}, {2, -1, -1} },
110 }, 146 },
111 [P4_EVENT_BSQ_CACHE_REFERENCE] = { 147 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
112 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), 148 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
113 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, 149 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
150 .escr_emask =
151 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
152 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
153 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
154 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
155 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
156 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) |
157 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
158 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
159 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
114 .cntr = { {0, -1, -1}, {2, -1, -1} }, 160 .cntr = { {0, -1, -1}, {2, -1, -1} },
115 }, 161 },
116 [P4_EVENT_IOQ_ALLOCATION] = { 162 [P4_EVENT_IOQ_ALLOCATION] = {
117 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), 163 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
118 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 164 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
165 .escr_emask =
166 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) |
167 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) |
168 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) |
169 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) |
170 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) |
171 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) |
172 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) |
173 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) |
174 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) |
175 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) |
176 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH),
119 .cntr = { {0, -1, -1}, {2, -1, -1} }, 177 .cntr = { {0, -1, -1}, {2, -1, -1} },
120 }, 178 },
121 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 179 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
122 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), 180 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
123 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, 181 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
182 .escr_emask =
183 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) |
184 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) |
185 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) |
186 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) |
187 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) |
188 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) |
189 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) |
190 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) |
191 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) |
192 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) |
193 P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH),
124 .cntr = { {2, -1, -1}, {3, -1, -1} }, 194 .cntr = { {2, -1, -1}, {3, -1, -1} },
125 }, 195 },
126 [P4_EVENT_FSB_DATA_ACTIVITY] = { 196 [P4_EVENT_FSB_DATA_ACTIVITY] = {
127 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), 197 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
128 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 198 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
199 .escr_emask =
200 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
201 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) |
202 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) |
203 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) |
204 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) |
205 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER),
206 .shared = 1,
129 .cntr = { {0, -1, -1}, {2, -1, -1} }, 207 .cntr = { {0, -1, -1}, {2, -1, -1} },
130 }, 208 },
131 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ 209 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
132 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), 210 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
133 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, 211 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
212 .escr_emask =
213 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) |
214 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) |
215 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) |
216 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) |
217 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) |
218 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) |
219 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) |
220 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) |
221 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) |
222 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) |
223 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) |
224 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) |
225 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2),
134 .cntr = { {0, -1, -1}, {1, -1, -1} }, 226 .cntr = { {0, -1, -1}, {1, -1, -1} },
135 }, 227 },
136 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ 228 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
137 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), 229 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
138 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, 230 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
231 .escr_emask =
232 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) |
233 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) |
234 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) |
235 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) |
236 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) |
237 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) |
238 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) |
239 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) |
240 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) |
241 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) |
242 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) |
243 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) |
244 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2),
139 .cntr = { {2, -1, -1}, {3, -1, -1} }, 245 .cntr = { {2, -1, -1}, {3, -1, -1} },
140 }, 246 },
141 [P4_EVENT_SSE_INPUT_ASSIST] = { 247 [P4_EVENT_SSE_INPUT_ASSIST] = {
142 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), 248 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
143 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 249 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
250 .escr_emask =
251 P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL),
252 .shared = 1,
144 .cntr = { {8, 9, -1}, {10, 11, -1} }, 253 .cntr = { {8, 9, -1}, {10, 11, -1} },
145 }, 254 },
146 [P4_EVENT_PACKED_SP_UOP] = { 255 [P4_EVENT_PACKED_SP_UOP] = {
147 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), 256 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
148 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 257 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
258 .escr_emask =
259 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL),
260 .shared = 1,
149 .cntr = { {8, 9, -1}, {10, 11, -1} }, 261 .cntr = { {8, 9, -1}, {10, 11, -1} },
150 }, 262 },
151 [P4_EVENT_PACKED_DP_UOP] = { 263 [P4_EVENT_PACKED_DP_UOP] = {
152 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), 264 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
153 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 265 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
266 .escr_emask =
267 P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL),
268 .shared = 1,
154 .cntr = { {8, 9, -1}, {10, 11, -1} }, 269 .cntr = { {8, 9, -1}, {10, 11, -1} },
155 }, 270 },
156 [P4_EVENT_SCALAR_SP_UOP] = { 271 [P4_EVENT_SCALAR_SP_UOP] = {
157 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), 272 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
158 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 273 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
274 .escr_emask =
275 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL),
276 .shared = 1,
159 .cntr = { {8, 9, -1}, {10, 11, -1} }, 277 .cntr = { {8, 9, -1}, {10, 11, -1} },
160 }, 278 },
161 [P4_EVENT_SCALAR_DP_UOP] = { 279 [P4_EVENT_SCALAR_DP_UOP] = {
162 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), 280 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
163 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 281 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
282 .escr_emask =
283 P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL),
284 .shared = 1,
164 .cntr = { {8, 9, -1}, {10, 11, -1} }, 285 .cntr = { {8, 9, -1}, {10, 11, -1} },
165 }, 286 },
166 [P4_EVENT_64BIT_MMX_UOP] = { 287 [P4_EVENT_64BIT_MMX_UOP] = {
167 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), 288 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
168 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 289 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
290 .escr_emask =
291 P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL),
292 .shared = 1,
169 .cntr = { {8, 9, -1}, {10, 11, -1} }, 293 .cntr = { {8, 9, -1}, {10, 11, -1} },
170 }, 294 },
171 [P4_EVENT_128BIT_MMX_UOP] = { 295 [P4_EVENT_128BIT_MMX_UOP] = {
172 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), 296 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
173 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 297 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
298 .escr_emask =
299 P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL),
300 .shared = 1,
174 .cntr = { {8, 9, -1}, {10, 11, -1} }, 301 .cntr = { {8, 9, -1}, {10, 11, -1} },
175 }, 302 },
176 [P4_EVENT_X87_FP_UOP] = { 303 [P4_EVENT_X87_FP_UOP] = {
177 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), 304 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
178 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, 305 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
306 .escr_emask =
307 P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL),
308 .shared = 1,
179 .cntr = { {8, 9, -1}, {10, 11, -1} }, 309 .cntr = { {8, 9, -1}, {10, 11, -1} },
180 }, 310 },
181 [P4_EVENT_TC_MISC] = { 311 [P4_EVENT_TC_MISC] = {
182 .opcode = P4_OPCODE(P4_EVENT_TC_MISC), 312 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
183 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, 313 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
314 .escr_emask =
315 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH),
184 .cntr = { {4, 5, -1}, {6, 7, -1} }, 316 .cntr = { {4, 5, -1}, {6, 7, -1} },
185 }, 317 },
186 [P4_EVENT_GLOBAL_POWER_EVENTS] = { 318 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
187 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), 319 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
188 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 320 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
321 .escr_emask =
322 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING),
189 .cntr = { {0, -1, -1}, {2, -1, -1} }, 323 .cntr = { {0, -1, -1}, {2, -1, -1} },
190 }, 324 },
191 [P4_EVENT_TC_MS_XFER] = { 325 [P4_EVENT_TC_MS_XFER] = {
192 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), 326 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
193 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 327 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
328 .escr_emask =
329 P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC),
194 .cntr = { {4, 5, -1}, {6, 7, -1} }, 330 .cntr = { {4, 5, -1}, {6, 7, -1} },
195 }, 331 },
196 [P4_EVENT_UOP_QUEUE_WRITES] = { 332 [P4_EVENT_UOP_QUEUE_WRITES] = {
197 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), 333 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
198 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, 334 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
335 .escr_emask =
336 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) |
337 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) |
338 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM),
199 .cntr = { {4, 5, -1}, {6, 7, -1} }, 339 .cntr = { {4, 5, -1}, {6, 7, -1} },
200 }, 340 },
201 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { 341 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
202 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), 342 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
203 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, 343 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
344 .escr_emask =
345 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) |
346 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) |
347 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) |
348 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT),
204 .cntr = { {4, 5, -1}, {6, 7, -1} }, 349 .cntr = { {4, 5, -1}, {6, 7, -1} },
205 }, 350 },
206 [P4_EVENT_RETIRED_BRANCH_TYPE] = { 351 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
207 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), 352 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
208 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, 353 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
354 .escr_emask =
355 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
356 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT),
209 .cntr = { {4, 5, -1}, {6, 7, -1} }, 359 .cntr = { {4, 5, -1}, {6, 7, -1} },
210 }, 360 },
211 [P4_EVENT_RESOURCE_STALL] = { 361 [P4_EVENT_RESOURCE_STALL] = {
212 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), 362 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
213 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, 363 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
364 .escr_emask =
365 P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL),
214 .cntr = { {12, 13, 16}, {14, 15, 17} }, 366 .cntr = { {12, 13, 16}, {14, 15, 17} },
215 }, 367 },
216 [P4_EVENT_WC_BUFFER] = { 368 [P4_EVENT_WC_BUFFER] = {
217 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), 369 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
218 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, 370 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
371 .escr_emask =
372 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) |
373 P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS),
374 .shared = 1,
219 .cntr = { {8, 9, -1}, {10, 11, -1} }, 375 .cntr = { {8, 9, -1}, {10, 11, -1} },
220 }, 376 },
221 [P4_EVENT_B2B_CYCLES] = { 377 [P4_EVENT_B2B_CYCLES] = {
222 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), 378 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
223 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 379 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
380 .escr_emask = 0,
224 .cntr = { {0, -1, -1}, {2, -1, -1} }, 381 .cntr = { {0, -1, -1}, {2, -1, -1} },
225 }, 382 },
226 [P4_EVENT_BNR] = { 383 [P4_EVENT_BNR] = {
227 .opcode = P4_OPCODE(P4_EVENT_BNR), 384 .opcode = P4_OPCODE(P4_EVENT_BNR),
228 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 385 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
386 .escr_emask = 0,
229 .cntr = { {0, -1, -1}, {2, -1, -1} }, 387 .cntr = { {0, -1, -1}, {2, -1, -1} },
230 }, 388 },
231 [P4_EVENT_SNOOP] = { 389 [P4_EVENT_SNOOP] = {
232 .opcode = P4_OPCODE(P4_EVENT_SNOOP), 390 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
233 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 391 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
392 .escr_emask = 0,
234 .cntr = { {0, -1, -1}, {2, -1, -1} }, 393 .cntr = { {0, -1, -1}, {2, -1, -1} },
235 }, 394 },
236 [P4_EVENT_RESPONSE] = { 395 [P4_EVENT_RESPONSE] = {
237 .opcode = P4_OPCODE(P4_EVENT_RESPONSE), 396 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
238 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, 397 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
398 .escr_emask = 0,
239 .cntr = { {0, -1, -1}, {2, -1, -1} }, 399 .cntr = { {0, -1, -1}, {2, -1, -1} },
240 }, 400 },
241 [P4_EVENT_FRONT_END_EVENT] = { 401 [P4_EVENT_FRONT_END_EVENT] = {
242 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), 402 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
243 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 403 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
404 .escr_emask =
405 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) |
406 P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS),
244 .cntr = { {12, 13, 16}, {14, 15, 17} }, 407 .cntr = { {12, 13, 16}, {14, 15, 17} },
245 }, 408 },
246 [P4_EVENT_EXECUTION_EVENT] = { 409 [P4_EVENT_EXECUTION_EVENT] = {
247 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), 410 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
248 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 411 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
412 .escr_emask =
413 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
414 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
415 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
416 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
417 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
418 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
419 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
420 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3),
249 .cntr = { {12, 13, 16}, {14, 15, 17} }, 421 .cntr = { {12, 13, 16}, {14, 15, 17} },
250 }, 422 },
251 [P4_EVENT_REPLAY_EVENT] = { 423 [P4_EVENT_REPLAY_EVENT] = {
252 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), 424 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
253 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 425 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
426 .escr_emask =
427 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) |
428 P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS),
254 .cntr = { {12, 13, 16}, {14, 15, 17} }, 429 .cntr = { {12, 13, 16}, {14, 15, 17} },
255 }, 430 },
256 [P4_EVENT_INSTR_RETIRED] = { 431 [P4_EVENT_INSTR_RETIRED] = {
257 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), 432 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
258 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 433 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
434 .escr_emask =
435 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
436 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) |
437 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) |
438 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG),
259 .cntr = { {12, 13, 16}, {14, 15, 17} }, 439 .cntr = { {12, 13, 16}, {14, 15, 17} },
260 }, 440 },
261 [P4_EVENT_UOPS_RETIRED] = { 441 [P4_EVENT_UOPS_RETIRED] = {
262 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), 442 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
263 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 443 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
444 .escr_emask =
445 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) |
446 P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS),
264 .cntr = { {12, 13, 16}, {14, 15, 17} }, 447 .cntr = { {12, 13, 16}, {14, 15, 17} },
265 }, 448 },
266 [P4_EVENT_UOP_TYPE] = { 449 [P4_EVENT_UOP_TYPE] = {
267 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), 450 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
268 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, 451 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
452 .escr_emask =
453 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) |
454 P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES),
269 .cntr = { {12, 13, 16}, {14, 15, 17} }, 455 .cntr = { {12, 13, 16}, {14, 15, 17} },
270 }, 456 },
271 [P4_EVENT_BRANCH_RETIRED] = { 457 [P4_EVENT_BRANCH_RETIRED] = {
272 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), 458 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
273 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 459 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
460 .escr_emask =
461 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) |
462 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) |
463 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) |
464 P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM),
274 .cntr = { {12, 13, 16}, {14, 15, 17} }, 465 .cntr = { {12, 13, 16}, {14, 15, 17} },
275 }, 466 },
276 [P4_EVENT_MISPRED_BRANCH_RETIRED] = { 467 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
277 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), 468 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
278 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 469 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
470 .escr_emask =
471 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS),
279 .cntr = { {12, 13, 16}, {14, 15, 17} }, 472 .cntr = { {12, 13, 16}, {14, 15, 17} },
280 }, 473 },
281 [P4_EVENT_X87_ASSIST] = { 474 [P4_EVENT_X87_ASSIST] = {
282 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), 475 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
283 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 476 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
477 .escr_emask =
478 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) |
479 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) |
480 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) |
481 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) |
482 P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA),
284 .cntr = { {12, 13, 16}, {14, 15, 17} }, 483 .cntr = { {12, 13, 16}, {14, 15, 17} },
285 }, 484 },
286 [P4_EVENT_MACHINE_CLEAR] = { 485 [P4_EVENT_MACHINE_CLEAR] = {
287 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), 486 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
288 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, 487 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
488 .escr_emask =
489 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) |
490 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) |
491 P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR),
289 .cntr = { {12, 13, 16}, {14, 15, 17} }, 492 .cntr = { {12, 13, 16}, {14, 15, 17} },
290 }, 493 },
291 [P4_EVENT_INSTR_COMPLETED] = { 494 [P4_EVENT_INSTR_COMPLETED] = {
292 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), 495 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
293 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, 496 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
497 .escr_emask =
498 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) |
499 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS),
294 .cntr = { {12, 13, 16}, {14, 15, 17} }, 500 .cntr = { {12, 13, 16}, {14, 15, 17} },
295 }, 501 },
296}; 502};
@@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event)
428 return config; 634 return config;
429} 635}
430 636
637/* check cpu model specifics */
638static bool p4_event_match_cpu_model(unsigned int event_idx)
639{
640 /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */
641 if (event_idx == P4_EVENT_INSTR_COMPLETED) {
642 if (boot_cpu_data.x86_model != 3 &&
643 boot_cpu_data.x86_model != 4 &&
644 boot_cpu_data.x86_model != 6)
645 return false;
646 }
647
648 /*
649 * For info
650 * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2
651 */
652
653 return true;
654}
655
431static int p4_validate_raw_event(struct perf_event *event) 656static int p4_validate_raw_event(struct perf_event *event)
432{ 657{
433 unsigned int v; 658 unsigned int v, emask;
434 659
435 /* user data may have out-of-bound event index */ 660 /* User data may have out-of-bound event index */
436 v = p4_config_unpack_event(event->attr.config); 661 v = p4_config_unpack_event(event->attr.config);
437 if (v >= ARRAY_SIZE(p4_event_bind_map)) { 662 if (v >= ARRAY_SIZE(p4_event_bind_map))
438 pr_warning("P4 PMU: Unknown event code: %d\n", v); 663 return -EINVAL;
664
665 /* It may be unsupported: */
666 if (!p4_event_match_cpu_model(v))
439 return -EINVAL; 667 return -EINVAL;
668
669 /*
670 * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as
671 * in Architectural Performance Monitoring, it means not
672 * on _which_ logical cpu to count but rather _when_, ie it
673 * depends on logical cpu state -- count event if one cpu active,
674 * none, both or any, so we just allow user to pass any value
675 * desired.
676 *
677 * In turn we always set Tx_OS/Tx_USR bits bound to logical
678 * cpu without their propagation to another cpu
679 */
680
681 /*
682 * if an event is shared accross the logical threads
683 * the user needs special permissions to be able to use it
684 */
685 if (p4_event_bind_map[v].shared) {
686 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
687 return -EACCES;
440 } 688 }
441 689
690 /* ESCR EventMask bits may be invalid */
691 emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK;
692 if (emask & ~p4_event_bind_map[v].escr_emask)
693 return -EINVAL;
694
442 /* 695 /*
443 * it may have some screwed PEBS bits 696 * it may have some invalid PEBS bits
444 */ 697 */
445 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { 698 if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE))
446 pr_warning("P4 PMU: PEBS are not supported yet\n");
447 return -EINVAL; 699 return -EINVAL;
448 } 700
449 v = p4_config_unpack_metric(event->attr.config); 701 v = p4_config_unpack_metric(event->attr.config);
450 if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { 702 if (v >= ARRAY_SIZE(p4_pebs_bind_map))
451 pr_warning("P4 PMU: Unknown metric code: %d\n", v);
452 return -EINVAL; 703 return -EINVAL;
453 }
454 704
455 return 0; 705 return 0;
456} 706}
@@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event)
478 728
479 if (event->attr.type == PERF_TYPE_RAW) { 729 if (event->attr.type == PERF_TYPE_RAW) {
480 730
731 /*
732 * Clear bits we reserve to be managed by kernel itself
733 * and never allowed from a user space
734 */
735 event->attr.config &= P4_CONFIG_MASK;
736
481 rc = p4_validate_raw_event(event); 737 rc = p4_validate_raw_event(event);
482 if (rc) 738 if (rc)
483 goto out; 739 goto out;
484 740
485 /* 741 /*
486 * We don't control raw events so it's up to the caller
487 * to pass sane values (and we don't count the thread number
488 * on HT machine but allow HT-compatible specifics to be
489 * passed on)
490 *
491 * Note that for RAW events we allow user to use P4_CCCR_RESERVED 742 * Note that for RAW events we allow user to use P4_CCCR_RESERVED
492 * bits since we keep additional info here (for cache events and etc) 743 * bits since we keep additional info here (for cache events and etc)
493 *
494 * XXX: HT wide things should check perf_paranoid_cpu() &&
495 * CAP_SYS_ADMIN
496 */ 744 */
497 event->hw.config |= event->attr.config & 745 event->hw.config |= event->attr.config;
498 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
499 p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
500
501 event->hw.config &= ~P4_CCCR_FORCE_OVF;
502 } 746 }
503 747
504 rc = x86_setup_perfctr(event); 748 rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index fb329e9f8494..d9f4ff8fcd69 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void)
700{ 700{
701 switch (boot_cpu_data.x86_vendor) { 701 switch (boot_cpu_data.x86_vendor) {
702 case X86_VENDOR_AMD: 702 case X86_VENDOR_AMD:
703 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && 703 if (boot_cpu_data.x86 == 6 ||
704 boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) 704 (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
705 return; 705 wd_ops = &k7_wd_ops;
706 wd_ops = &k7_wd_ops; 706 return;
707 break;
708 case X86_VENDOR_INTEL: 707 case X86_VENDOR_INTEL:
709 /* Work around where perfctr1 doesn't have a working enable 708 /* Work around where perfctr1 doesn't have a working enable
710 * bit as described in the following errata: 709 * bit as described in the following errata:
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d49079515122..c7f64e6f537a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -44,6 +44,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, 44 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, 45 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, 46 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 },
47 { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 },
48 { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 },
49 { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 },
50 { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
51 { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
52 { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
47 { 0, 0, 0, 0, 0 } 53 { 0, 0, 0, 0, 0 }
48 }; 54 };
49 55
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 045b36cada65..994828899e09 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
34 if (!csize) 34 if (!csize)
35 return 0; 35 return 0;
36 36
37 vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); 37 vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
38 if (!vaddr) 38 if (!vaddr)
39 return -ENOMEM; 39 return -ENOMEM;
40 40
@@ -46,6 +46,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
46 } else 46 } else
47 memcpy(buf, vaddr + offset, csize); 47 memcpy(buf, vaddr + offset, csize);
48 48
49 set_iounmap_nonlazy();
49 iounmap(vaddr); 50 iounmap(vaddr);
50 return csize; 51 return csize;
51} 52}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf2686..76b8cd953dee 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -97,7 +97,6 @@ static void __init nvidia_bugs(int num, int slot, int func)
97} 97}
98 98
99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) 99#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
100#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
101static u32 __init ati_ixp4x0_rev(int num, int slot, int func) 100static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
102{ 101{
103 u32 d; 102 u32 d;
@@ -115,7 +114,6 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
115 d &= 0xff; 114 d &= 0xff;
116 return d; 115 return d;
117} 116}
118#endif
119 117
120static void __init ati_bugs(int num, int slot, int func) 118static void __init ati_bugs(int num, int slot, int func)
121{ 119{
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 227d00920d2f..9fb188d7bc76 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -115,8 +115,7 @@
115 115
116 /* unfortunately push/pop can't be no-op */ 116 /* unfortunately push/pop can't be no-op */
117.macro PUSH_GS 117.macro PUSH_GS
118 pushl $0 118 pushl_cfi $0
119 CFI_ADJUST_CFA_OFFSET 4
120.endm 119.endm
121.macro POP_GS pop=0 120.macro POP_GS pop=0
122 addl $(4 + \pop), %esp 121 addl $(4 + \pop), %esp
@@ -140,14 +139,12 @@
140#else /* CONFIG_X86_32_LAZY_GS */ 139#else /* CONFIG_X86_32_LAZY_GS */
141 140
142.macro PUSH_GS 141.macro PUSH_GS
143 pushl %gs 142 pushl_cfi %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/ 143 /*CFI_REL_OFFSET gs, 0*/
146.endm 144.endm
147 145
148.macro POP_GS pop=0 146.macro POP_GS pop=0
14998: popl %gs 14798: popl_cfi %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/ 148 /*CFI_RESTORE gs*/
152 .if \pop <> 0 149 .if \pop <> 0
153 add $\pop, %esp 150 add $\pop, %esp
@@ -195,35 +192,25 @@
195.macro SAVE_ALL 192.macro SAVE_ALL
196 cld 193 cld
197 PUSH_GS 194 PUSH_GS
198 pushl %fs 195 pushl_cfi %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/ 196 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es 197 pushl_cfi %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/ 198 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds 199 pushl_cfi %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/ 200 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax 201 pushl_cfi %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0 202 CFI_REL_OFFSET eax, 0
210 pushl %ebp 203 pushl_cfi %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0 204 CFI_REL_OFFSET ebp, 0
213 pushl %edi 205 pushl_cfi %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0 206 CFI_REL_OFFSET edi, 0
216 pushl %esi 207 pushl_cfi %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0 208 CFI_REL_OFFSET esi, 0
219 pushl %edx 209 pushl_cfi %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0 210 CFI_REL_OFFSET edx, 0
222 pushl %ecx 211 pushl_cfi %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0 212 CFI_REL_OFFSET ecx, 0
225 pushl %ebx 213 pushl_cfi %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0 214 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx 215 movl $(__USER_DS), %edx
229 movl %edx, %ds 216 movl %edx, %ds
@@ -234,39 +221,29 @@
234.endm 221.endm
235 222
236.macro RESTORE_INT_REGS 223.macro RESTORE_INT_REGS
237 popl %ebx 224 popl_cfi %ebx
238 CFI_ADJUST_CFA_OFFSET -4
239 CFI_RESTORE ebx 225 CFI_RESTORE ebx
240 popl %ecx 226 popl_cfi %ecx
241 CFI_ADJUST_CFA_OFFSET -4
242 CFI_RESTORE ecx 227 CFI_RESTORE ecx
243 popl %edx 228 popl_cfi %edx
244 CFI_ADJUST_CFA_OFFSET -4
245 CFI_RESTORE edx 229 CFI_RESTORE edx
246 popl %esi 230 popl_cfi %esi
247 CFI_ADJUST_CFA_OFFSET -4
248 CFI_RESTORE esi 231 CFI_RESTORE esi
249 popl %edi 232 popl_cfi %edi
250 CFI_ADJUST_CFA_OFFSET -4
251 CFI_RESTORE edi 233 CFI_RESTORE edi
252 popl %ebp 234 popl_cfi %ebp
253 CFI_ADJUST_CFA_OFFSET -4
254 CFI_RESTORE ebp 235 CFI_RESTORE ebp
255 popl %eax 236 popl_cfi %eax
256 CFI_ADJUST_CFA_OFFSET -4
257 CFI_RESTORE eax 237 CFI_RESTORE eax
258.endm 238.endm
259 239
260.macro RESTORE_REGS pop=0 240.macro RESTORE_REGS pop=0
261 RESTORE_INT_REGS 241 RESTORE_INT_REGS
2621: popl %ds 2421: popl_cfi %ds
263 CFI_ADJUST_CFA_OFFSET -4
264 /*CFI_RESTORE ds;*/ 243 /*CFI_RESTORE ds;*/
2652: popl %es 2442: popl_cfi %es
266 CFI_ADJUST_CFA_OFFSET -4
267 /*CFI_RESTORE es;*/ 245 /*CFI_RESTORE es;*/
2683: popl %fs 2463: popl_cfi %fs
269 CFI_ADJUST_CFA_OFFSET -4
270 /*CFI_RESTORE fs;*/ 247 /*CFI_RESTORE fs;*/
271 POP_GS \pop 248 POP_GS \pop
272.pushsection .fixup, "ax" 249.pushsection .fixup, "ax"
@@ -320,16 +297,12 @@
320 297
321ENTRY(ret_from_fork) 298ENTRY(ret_from_fork)
322 CFI_STARTPROC 299 CFI_STARTPROC
323 pushl %eax 300 pushl_cfi %eax
324 CFI_ADJUST_CFA_OFFSET 4
325 call schedule_tail 301 call schedule_tail
326 GET_THREAD_INFO(%ebp) 302 GET_THREAD_INFO(%ebp)
327 popl %eax 303 popl_cfi %eax
328 CFI_ADJUST_CFA_OFFSET -4 304 pushl_cfi $0x0202 # Reset kernel eflags
329 pushl $0x0202 # Reset kernel eflags 305 popfl_cfi
330 CFI_ADJUST_CFA_OFFSET 4
331 popfl
332 CFI_ADJUST_CFA_OFFSET -4
333 jmp syscall_exit 306 jmp syscall_exit
334 CFI_ENDPROC 307 CFI_ENDPROC
335END(ret_from_fork) 308END(ret_from_fork)
@@ -409,29 +382,23 @@ sysenter_past_esp:
409 * enough kernel state to call TRACE_IRQS_OFF can be called - but 382 * enough kernel state to call TRACE_IRQS_OFF can be called - but
410 * we immediately enable interrupts at that point anyway. 383 * we immediately enable interrupts at that point anyway.
411 */ 384 */
412 pushl $(__USER_DS) 385 pushl_cfi $(__USER_DS)
413 CFI_ADJUST_CFA_OFFSET 4
414 /*CFI_REL_OFFSET ss, 0*/ 386 /*CFI_REL_OFFSET ss, 0*/
415 pushl %ebp 387 pushl_cfi %ebp
416 CFI_ADJUST_CFA_OFFSET 4
417 CFI_REL_OFFSET esp, 0 388 CFI_REL_OFFSET esp, 0
418 pushfl 389 pushfl_cfi
419 orl $X86_EFLAGS_IF, (%esp) 390 orl $X86_EFLAGS_IF, (%esp)
420 CFI_ADJUST_CFA_OFFSET 4 391 pushl_cfi $(__USER_CS)
421 pushl $(__USER_CS)
422 CFI_ADJUST_CFA_OFFSET 4
423 /*CFI_REL_OFFSET cs, 0*/ 392 /*CFI_REL_OFFSET cs, 0*/
424 /* 393 /*
425 * Push current_thread_info()->sysenter_return to the stack. 394 * Push current_thread_info()->sysenter_return to the stack.
426 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words 395 * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
427 * pushed above; +8 corresponds to copy_thread's esp0 setting. 396 * pushed above; +8 corresponds to copy_thread's esp0 setting.
428 */ 397 */
429 pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) 398 pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
430 CFI_ADJUST_CFA_OFFSET 4
431 CFI_REL_OFFSET eip, 0 399 CFI_REL_OFFSET eip, 0
432 400
433 pushl %eax 401 pushl_cfi %eax
434 CFI_ADJUST_CFA_OFFSET 4
435 SAVE_ALL 402 SAVE_ALL
436 ENABLE_INTERRUPTS(CLBR_NONE) 403 ENABLE_INTERRUPTS(CLBR_NONE)
437 404
@@ -486,8 +453,7 @@ sysenter_audit:
486 movl %eax,%edx /* 2nd arg: syscall number */ 453 movl %eax,%edx /* 2nd arg: syscall number */
487 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ 454 movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
488 call audit_syscall_entry 455 call audit_syscall_entry
489 pushl %ebx 456 pushl_cfi %ebx
490 CFI_ADJUST_CFA_OFFSET 4
491 movl PT_EAX(%esp),%eax /* reload syscall number */ 457 movl PT_EAX(%esp),%eax /* reload syscall number */
492 jmp sysenter_do_call 458 jmp sysenter_do_call
493 459
@@ -529,8 +495,7 @@ ENDPROC(ia32_sysenter_target)
529 # system call handler stub 495 # system call handler stub
530ENTRY(system_call) 496ENTRY(system_call)
531 RING0_INT_FRAME # can't unwind into user space anyway 497 RING0_INT_FRAME # can't unwind into user space anyway
532 pushl %eax # save orig_eax 498 pushl_cfi %eax # save orig_eax
533 CFI_ADJUST_CFA_OFFSET 4
534 SAVE_ALL 499 SAVE_ALL
535 GET_THREAD_INFO(%ebp) 500 GET_THREAD_INFO(%ebp)
536 # system call tracing in operation / emulation 501 # system call tracing in operation / emulation
@@ -566,7 +531,6 @@ restore_all_notrace:
566 je ldt_ss # returning to user-space with LDT SS 531 je ldt_ss # returning to user-space with LDT SS
567restore_nocheck: 532restore_nocheck:
568 RESTORE_REGS 4 # skip orig_eax/error_code 533 RESTORE_REGS 4 # skip orig_eax/error_code
569 CFI_ADJUST_CFA_OFFSET -4
570irq_return: 534irq_return:
571 INTERRUPT_RETURN 535 INTERRUPT_RETURN
572.section .fixup,"ax" 536.section .fixup,"ax"
@@ -619,10 +583,8 @@ ldt_ss:
619 shr $16, %edx 583 shr $16, %edx
620 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ 584 mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
621 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ 585 mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
622 pushl $__ESPFIX_SS 586 pushl_cfi $__ESPFIX_SS
623 CFI_ADJUST_CFA_OFFSET 4 587 pushl_cfi %eax /* new kernel esp */
624 push %eax /* new kernel esp */
625 CFI_ADJUST_CFA_OFFSET 4
626 /* Disable interrupts, but do not irqtrace this section: we 588 /* Disable interrupts, but do not irqtrace this section: we
627 * will soon execute iret and the tracer was already set to 589 * will soon execute iret and the tracer was already set to
628 * the irqstate after the iret */ 590 * the irqstate after the iret */
@@ -666,11 +628,9 @@ work_notifysig: # deal with pending signals and
666 628
667 ALIGN 629 ALIGN
668work_notifysig_v86: 630work_notifysig_v86:
669 pushl %ecx # save ti_flags for do_notify_resume 631 pushl_cfi %ecx # save ti_flags for do_notify_resume
670 CFI_ADJUST_CFA_OFFSET 4
671 call save_v86_state # %eax contains pt_regs pointer 632 call save_v86_state # %eax contains pt_regs pointer
672 popl %ecx 633 popl_cfi %ecx
673 CFI_ADJUST_CFA_OFFSET -4
674 movl %eax, %esp 634 movl %eax, %esp
675#else 635#else
676 movl %esp, %eax 636 movl %esp, %eax
@@ -750,14 +710,18 @@ ptregs_##name: \
750#define PTREGSCALL3(name) \ 710#define PTREGSCALL3(name) \
751 ALIGN; \ 711 ALIGN; \
752ptregs_##name: \ 712ptregs_##name: \
713 CFI_STARTPROC; \
753 leal 4(%esp),%eax; \ 714 leal 4(%esp),%eax; \
754 pushl %eax; \ 715 pushl_cfi %eax; \
755 movl PT_EDX(%eax),%ecx; \ 716 movl PT_EDX(%eax),%ecx; \
756 movl PT_ECX(%eax),%edx; \ 717 movl PT_ECX(%eax),%edx; \
757 movl PT_EBX(%eax),%eax; \ 718 movl PT_EBX(%eax),%eax; \
758 call sys_##name; \ 719 call sys_##name; \
759 addl $4,%esp; \ 720 addl $4,%esp; \
760 ret 721 CFI_ADJUST_CFA_OFFSET -4; \
722 ret; \
723 CFI_ENDPROC; \
724ENDPROC(ptregs_##name)
761 725
762PTREGSCALL1(iopl) 726PTREGSCALL1(iopl)
763PTREGSCALL0(fork) 727PTREGSCALL0(fork)
@@ -772,15 +736,19 @@ PTREGSCALL1(vm86old)
772/* Clone is an oddball. The 4th arg is in %edi */ 736/* Clone is an oddball. The 4th arg is in %edi */
773 ALIGN; 737 ALIGN;
774ptregs_clone: 738ptregs_clone:
739 CFI_STARTPROC
775 leal 4(%esp),%eax 740 leal 4(%esp),%eax
776 pushl %eax 741 pushl_cfi %eax
777 pushl PT_EDI(%eax) 742 pushl_cfi PT_EDI(%eax)
778 movl PT_EDX(%eax),%ecx 743 movl PT_EDX(%eax),%ecx
779 movl PT_ECX(%eax),%edx 744 movl PT_ECX(%eax),%edx
780 movl PT_EBX(%eax),%eax 745 movl PT_EBX(%eax),%eax
781 call sys_clone 746 call sys_clone
782 addl $8,%esp 747 addl $8,%esp
748 CFI_ADJUST_CFA_OFFSET -8
783 ret 749 ret
750 CFI_ENDPROC
751ENDPROC(ptregs_clone)
784 752
785.macro FIXUP_ESPFIX_STACK 753.macro FIXUP_ESPFIX_STACK
786/* 754/*
@@ -795,10 +763,8 @@ ptregs_clone:
795 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ 763 mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
796 shl $16, %eax 764 shl $16, %eax
797 addl %esp, %eax /* the adjusted stack pointer */ 765 addl %esp, %eax /* the adjusted stack pointer */
798 pushl $__KERNEL_DS 766 pushl_cfi $__KERNEL_DS
799 CFI_ADJUST_CFA_OFFSET 4 767 pushl_cfi %eax
800 pushl %eax
801 CFI_ADJUST_CFA_OFFSET 4
802 lss (%esp), %esp /* switch to the normal stack segment */ 768 lss (%esp), %esp /* switch to the normal stack segment */
803 CFI_ADJUST_CFA_OFFSET -8 769 CFI_ADJUST_CFA_OFFSET -8
804.endm 770.endm
@@ -835,8 +801,7 @@ vector=FIRST_EXTERNAL_VECTOR
835 .if vector <> FIRST_EXTERNAL_VECTOR 801 .if vector <> FIRST_EXTERNAL_VECTOR
836 CFI_ADJUST_CFA_OFFSET -4 802 CFI_ADJUST_CFA_OFFSET -4
837 .endif 803 .endif
8381: pushl $(~vector+0x80) /* Note: always in signed byte range */ 8041: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
839 CFI_ADJUST_CFA_OFFSET 4
840 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 805 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
841 jmp 2f 806 jmp 2f
842 .endif 807 .endif
@@ -876,8 +841,7 @@ ENDPROC(common_interrupt)
876#define BUILD_INTERRUPT3(name, nr, fn) \ 841#define BUILD_INTERRUPT3(name, nr, fn) \
877ENTRY(name) \ 842ENTRY(name) \
878 RING0_INT_FRAME; \ 843 RING0_INT_FRAME; \
879 pushl $~(nr); \ 844 pushl_cfi $~(nr); \
880 CFI_ADJUST_CFA_OFFSET 4; \
881 SAVE_ALL; \ 845 SAVE_ALL; \
882 TRACE_IRQS_OFF \ 846 TRACE_IRQS_OFF \
883 movl %esp,%eax; \ 847 movl %esp,%eax; \
@@ -893,21 +857,18 @@ ENDPROC(name)
893 857
894ENTRY(coprocessor_error) 858ENTRY(coprocessor_error)
895 RING0_INT_FRAME 859 RING0_INT_FRAME
896 pushl $0 860 pushl_cfi $0
897 CFI_ADJUST_CFA_OFFSET 4 861 pushl_cfi $do_coprocessor_error
898 pushl $do_coprocessor_error
899 CFI_ADJUST_CFA_OFFSET 4
900 jmp error_code 862 jmp error_code
901 CFI_ENDPROC 863 CFI_ENDPROC
902END(coprocessor_error) 864END(coprocessor_error)
903 865
904ENTRY(simd_coprocessor_error) 866ENTRY(simd_coprocessor_error)
905 RING0_INT_FRAME 867 RING0_INT_FRAME
906 pushl $0 868 pushl_cfi $0
907 CFI_ADJUST_CFA_OFFSET 4
908#ifdef CONFIG_X86_INVD_BUG 869#ifdef CONFIG_X86_INVD_BUG
909 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ 870 /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
910661: pushl $do_general_protection 871661: pushl_cfi $do_general_protection
911662: 872662:
912.section .altinstructions,"a" 873.section .altinstructions,"a"
913 .balign 4 874 .balign 4
@@ -922,19 +883,16 @@ ENTRY(simd_coprocessor_error)
922664: 883664:
923.previous 884.previous
924#else 885#else
925 pushl $do_simd_coprocessor_error 886 pushl_cfi $do_simd_coprocessor_error
926#endif 887#endif
927 CFI_ADJUST_CFA_OFFSET 4
928 jmp error_code 888 jmp error_code
929 CFI_ENDPROC 889 CFI_ENDPROC
930END(simd_coprocessor_error) 890END(simd_coprocessor_error)
931 891
932ENTRY(device_not_available) 892ENTRY(device_not_available)
933 RING0_INT_FRAME 893 RING0_INT_FRAME
934 pushl $-1 # mark this as an int 894 pushl_cfi $-1 # mark this as an int
935 CFI_ADJUST_CFA_OFFSET 4 895 pushl_cfi $do_device_not_available
936 pushl $do_device_not_available
937 CFI_ADJUST_CFA_OFFSET 4
938 jmp error_code 896 jmp error_code
939 CFI_ENDPROC 897 CFI_ENDPROC
940END(device_not_available) 898END(device_not_available)
@@ -956,82 +914,68 @@ END(native_irq_enable_sysexit)
956 914
957ENTRY(overflow) 915ENTRY(overflow)
958 RING0_INT_FRAME 916 RING0_INT_FRAME
959 pushl $0 917 pushl_cfi $0
960 CFI_ADJUST_CFA_OFFSET 4 918 pushl_cfi $do_overflow
961 pushl $do_overflow
962 CFI_ADJUST_CFA_OFFSET 4
963 jmp error_code 919 jmp error_code
964 CFI_ENDPROC 920 CFI_ENDPROC
965END(overflow) 921END(overflow)
966 922
967ENTRY(bounds) 923ENTRY(bounds)
968 RING0_INT_FRAME 924 RING0_INT_FRAME
969 pushl $0 925 pushl_cfi $0
970 CFI_ADJUST_CFA_OFFSET 4 926 pushl_cfi $do_bounds
971 pushl $do_bounds
972 CFI_ADJUST_CFA_OFFSET 4
973 jmp error_code 927 jmp error_code
974 CFI_ENDPROC 928 CFI_ENDPROC
975END(bounds) 929END(bounds)
976 930
977ENTRY(invalid_op) 931ENTRY(invalid_op)
978 RING0_INT_FRAME 932 RING0_INT_FRAME
979 pushl $0 933 pushl_cfi $0
980 CFI_ADJUST_CFA_OFFSET 4 934 pushl_cfi $do_invalid_op
981 pushl $do_invalid_op
982 CFI_ADJUST_CFA_OFFSET 4
983 jmp error_code 935 jmp error_code
984 CFI_ENDPROC 936 CFI_ENDPROC
985END(invalid_op) 937END(invalid_op)
986 938
987ENTRY(coprocessor_segment_overrun) 939ENTRY(coprocessor_segment_overrun)
988 RING0_INT_FRAME 940 RING0_INT_FRAME
989 pushl $0 941 pushl_cfi $0
990 CFI_ADJUST_CFA_OFFSET 4 942 pushl_cfi $do_coprocessor_segment_overrun
991 pushl $do_coprocessor_segment_overrun
992 CFI_ADJUST_CFA_OFFSET 4
993 jmp error_code 943 jmp error_code
994 CFI_ENDPROC 944 CFI_ENDPROC
995END(coprocessor_segment_overrun) 945END(coprocessor_segment_overrun)
996 946
997ENTRY(invalid_TSS) 947ENTRY(invalid_TSS)
998 RING0_EC_FRAME 948 RING0_EC_FRAME
999 pushl $do_invalid_TSS 949 pushl_cfi $do_invalid_TSS
1000 CFI_ADJUST_CFA_OFFSET 4
1001 jmp error_code 950 jmp error_code
1002 CFI_ENDPROC 951 CFI_ENDPROC
1003END(invalid_TSS) 952END(invalid_TSS)
1004 953
1005ENTRY(segment_not_present) 954ENTRY(segment_not_present)
1006 RING0_EC_FRAME 955 RING0_EC_FRAME
1007 pushl $do_segment_not_present 956 pushl_cfi $do_segment_not_present
1008 CFI_ADJUST_CFA_OFFSET 4
1009 jmp error_code 957 jmp error_code
1010 CFI_ENDPROC 958 CFI_ENDPROC
1011END(segment_not_present) 959END(segment_not_present)
1012 960
1013ENTRY(stack_segment) 961ENTRY(stack_segment)
1014 RING0_EC_FRAME 962 RING0_EC_FRAME
1015 pushl $do_stack_segment 963 pushl_cfi $do_stack_segment
1016 CFI_ADJUST_CFA_OFFSET 4
1017 jmp error_code 964 jmp error_code
1018 CFI_ENDPROC 965 CFI_ENDPROC
1019END(stack_segment) 966END(stack_segment)
1020 967
1021ENTRY(alignment_check) 968ENTRY(alignment_check)
1022 RING0_EC_FRAME 969 RING0_EC_FRAME
1023 pushl $do_alignment_check 970 pushl_cfi $do_alignment_check
1024 CFI_ADJUST_CFA_OFFSET 4
1025 jmp error_code 971 jmp error_code
1026 CFI_ENDPROC 972 CFI_ENDPROC
1027END(alignment_check) 973END(alignment_check)
1028 974
1029ENTRY(divide_error) 975ENTRY(divide_error)
1030 RING0_INT_FRAME 976 RING0_INT_FRAME
1031 pushl $0 # no error code 977 pushl_cfi $0 # no error code
1032 CFI_ADJUST_CFA_OFFSET 4 978 pushl_cfi $do_divide_error
1033 pushl $do_divide_error
1034 CFI_ADJUST_CFA_OFFSET 4
1035 jmp error_code 979 jmp error_code
1036 CFI_ENDPROC 980 CFI_ENDPROC
1037END(divide_error) 981END(divide_error)
@@ -1039,10 +983,8 @@ END(divide_error)
1039#ifdef CONFIG_X86_MCE 983#ifdef CONFIG_X86_MCE
1040ENTRY(machine_check) 984ENTRY(machine_check)
1041 RING0_INT_FRAME 985 RING0_INT_FRAME
1042 pushl $0 986 pushl_cfi $0
1043 CFI_ADJUST_CFA_OFFSET 4 987 pushl_cfi machine_check_vector
1044 pushl machine_check_vector
1045 CFI_ADJUST_CFA_OFFSET 4
1046 jmp error_code 988 jmp error_code
1047 CFI_ENDPROC 989 CFI_ENDPROC
1048END(machine_check) 990END(machine_check)
@@ -1050,10 +992,8 @@ END(machine_check)
1050 992
1051ENTRY(spurious_interrupt_bug) 993ENTRY(spurious_interrupt_bug)
1052 RING0_INT_FRAME 994 RING0_INT_FRAME
1053 pushl $0 995 pushl_cfi $0
1054 CFI_ADJUST_CFA_OFFSET 4 996 pushl_cfi $do_spurious_interrupt_bug
1055 pushl $do_spurious_interrupt_bug
1056 CFI_ADJUST_CFA_OFFSET 4
1057 jmp error_code 997 jmp error_code
1058 CFI_ENDPROC 998 CFI_ENDPROC
1059END(spurious_interrupt_bug) 999END(spurious_interrupt_bug)
@@ -1084,8 +1024,7 @@ ENTRY(xen_sysenter_target)
1084 1024
1085ENTRY(xen_hypervisor_callback) 1025ENTRY(xen_hypervisor_callback)
1086 CFI_STARTPROC 1026 CFI_STARTPROC
1087 pushl $0 1027 pushl_cfi $0
1088 CFI_ADJUST_CFA_OFFSET 4
1089 SAVE_ALL 1028 SAVE_ALL
1090 TRACE_IRQS_OFF 1029 TRACE_IRQS_OFF
1091 1030
@@ -1121,23 +1060,20 @@ ENDPROC(xen_hypervisor_callback)
1121# We distinguish between categories by maintaining a status value in EAX. 1060# We distinguish between categories by maintaining a status value in EAX.
1122ENTRY(xen_failsafe_callback) 1061ENTRY(xen_failsafe_callback)
1123 CFI_STARTPROC 1062 CFI_STARTPROC
1124 pushl %eax 1063 pushl_cfi %eax
1125 CFI_ADJUST_CFA_OFFSET 4
1126 movl $1,%eax 1064 movl $1,%eax
11271: mov 4(%esp),%ds 10651: mov 4(%esp),%ds
11282: mov 8(%esp),%es 10662: mov 8(%esp),%es
11293: mov 12(%esp),%fs 10673: mov 12(%esp),%fs
11304: mov 16(%esp),%gs 10684: mov 16(%esp),%gs
1131 testl %eax,%eax 1069 testl %eax,%eax
1132 popl %eax 1070 popl_cfi %eax
1133 CFI_ADJUST_CFA_OFFSET -4
1134 lea 16(%esp),%esp 1071 lea 16(%esp),%esp
1135 CFI_ADJUST_CFA_OFFSET -16 1072 CFI_ADJUST_CFA_OFFSET -16
1136 jz 5f 1073 jz 5f
1137 addl $16,%esp 1074 addl $16,%esp
1138 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) 1075 jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
11395: pushl $0 # EAX == 0 => Category 1 (Bad segment) 10765: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
1140 CFI_ADJUST_CFA_OFFSET 4
1141 SAVE_ALL 1077 SAVE_ALL
1142 jmp ret_from_exception 1078 jmp ret_from_exception
1143 CFI_ENDPROC 1079 CFI_ENDPROC
@@ -1287,40 +1223,29 @@ syscall_table_size=(.-sys_call_table)
1287 1223
1288ENTRY(page_fault) 1224ENTRY(page_fault)
1289 RING0_EC_FRAME 1225 RING0_EC_FRAME
1290 pushl $do_page_fault 1226 pushl_cfi $do_page_fault
1291 CFI_ADJUST_CFA_OFFSET 4
1292 ALIGN 1227 ALIGN
1293error_code: 1228error_code:
1294 /* the function address is in %gs's slot on the stack */ 1229 /* the function address is in %gs's slot on the stack */
1295 pushl %fs 1230 pushl_cfi %fs
1296 CFI_ADJUST_CFA_OFFSET 4
1297 /*CFI_REL_OFFSET fs, 0*/ 1231 /*CFI_REL_OFFSET fs, 0*/
1298 pushl %es 1232 pushl_cfi %es
1299 CFI_ADJUST_CFA_OFFSET 4
1300 /*CFI_REL_OFFSET es, 0*/ 1233 /*CFI_REL_OFFSET es, 0*/
1301 pushl %ds 1234 pushl_cfi %ds
1302 CFI_ADJUST_CFA_OFFSET 4
1303 /*CFI_REL_OFFSET ds, 0*/ 1235 /*CFI_REL_OFFSET ds, 0*/
1304 pushl %eax 1236 pushl_cfi %eax
1305 CFI_ADJUST_CFA_OFFSET 4
1306 CFI_REL_OFFSET eax, 0 1237 CFI_REL_OFFSET eax, 0
1307 pushl %ebp 1238 pushl_cfi %ebp
1308 CFI_ADJUST_CFA_OFFSET 4
1309 CFI_REL_OFFSET ebp, 0 1239 CFI_REL_OFFSET ebp, 0
1310 pushl %edi 1240 pushl_cfi %edi
1311 CFI_ADJUST_CFA_OFFSET 4
1312 CFI_REL_OFFSET edi, 0 1241 CFI_REL_OFFSET edi, 0
1313 pushl %esi 1242 pushl_cfi %esi
1314 CFI_ADJUST_CFA_OFFSET 4
1315 CFI_REL_OFFSET esi, 0 1243 CFI_REL_OFFSET esi, 0
1316 pushl %edx 1244 pushl_cfi %edx
1317 CFI_ADJUST_CFA_OFFSET 4
1318 CFI_REL_OFFSET edx, 0 1245 CFI_REL_OFFSET edx, 0
1319 pushl %ecx 1246 pushl_cfi %ecx
1320 CFI_ADJUST_CFA_OFFSET 4
1321 CFI_REL_OFFSET ecx, 0 1247 CFI_REL_OFFSET ecx, 0
1322 pushl %ebx 1248 pushl_cfi %ebx
1323 CFI_ADJUST_CFA_OFFSET 4
1324 CFI_REL_OFFSET ebx, 0 1249 CFI_REL_OFFSET ebx, 0
1325 cld 1250 cld
1326 movl $(__KERNEL_PERCPU), %ecx 1251 movl $(__KERNEL_PERCPU), %ecx
@@ -1362,12 +1287,9 @@ END(page_fault)
1362 movl TSS_sysenter_sp0 + \offset(%esp), %esp 1287 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1363 CFI_DEF_CFA esp, 0 1288 CFI_DEF_CFA esp, 0
1364 CFI_UNDEFINED eip 1289 CFI_UNDEFINED eip
1365 pushfl 1290 pushfl_cfi
1366 CFI_ADJUST_CFA_OFFSET 4 1291 pushl_cfi $__KERNEL_CS
1367 pushl $__KERNEL_CS 1292 pushl_cfi $sysenter_past_esp
1368 CFI_ADJUST_CFA_OFFSET 4
1369 pushl $sysenter_past_esp
1370 CFI_ADJUST_CFA_OFFSET 4
1371 CFI_REL_OFFSET eip, 0 1293 CFI_REL_OFFSET eip, 0
1372.endm 1294.endm
1373 1295
@@ -1377,8 +1299,7 @@ ENTRY(debug)
1377 jne debug_stack_correct 1299 jne debug_stack_correct
1378 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn 1300 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1379debug_stack_correct: 1301debug_stack_correct:
1380 pushl $-1 # mark this as an int 1302 pushl_cfi $-1 # mark this as an int
1381 CFI_ADJUST_CFA_OFFSET 4
1382 SAVE_ALL 1303 SAVE_ALL
1383 TRACE_IRQS_OFF 1304 TRACE_IRQS_OFF
1384 xorl %edx,%edx # error code 0 1305 xorl %edx,%edx # error code 0
@@ -1398,32 +1319,27 @@ END(debug)
1398 */ 1319 */
1399ENTRY(nmi) 1320ENTRY(nmi)
1400 RING0_INT_FRAME 1321 RING0_INT_FRAME
1401 pushl %eax 1322 pushl_cfi %eax
1402 CFI_ADJUST_CFA_OFFSET 4
1403 movl %ss, %eax 1323 movl %ss, %eax
1404 cmpw $__ESPFIX_SS, %ax 1324 cmpw $__ESPFIX_SS, %ax
1405 popl %eax 1325 popl_cfi %eax
1406 CFI_ADJUST_CFA_OFFSET -4
1407 je nmi_espfix_stack 1326 je nmi_espfix_stack
1408 cmpl $ia32_sysenter_target,(%esp) 1327 cmpl $ia32_sysenter_target,(%esp)
1409 je nmi_stack_fixup 1328 je nmi_stack_fixup
1410 pushl %eax 1329 pushl_cfi %eax
1411 CFI_ADJUST_CFA_OFFSET 4
1412 movl %esp,%eax 1330 movl %esp,%eax
1413 /* Do not access memory above the end of our stack page, 1331 /* Do not access memory above the end of our stack page,
1414 * it might not exist. 1332 * it might not exist.
1415 */ 1333 */
1416 andl $(THREAD_SIZE-1),%eax 1334 andl $(THREAD_SIZE-1),%eax
1417 cmpl $(THREAD_SIZE-20),%eax 1335 cmpl $(THREAD_SIZE-20),%eax
1418 popl %eax 1336 popl_cfi %eax
1419 CFI_ADJUST_CFA_OFFSET -4
1420 jae nmi_stack_correct 1337 jae nmi_stack_correct
1421 cmpl $ia32_sysenter_target,12(%esp) 1338 cmpl $ia32_sysenter_target,12(%esp)
1422 je nmi_debug_stack_check 1339 je nmi_debug_stack_check
1423nmi_stack_correct: 1340nmi_stack_correct:
1424 /* We have a RING0_INT_FRAME here */ 1341 /* We have a RING0_INT_FRAME here */
1425 pushl %eax 1342 pushl_cfi %eax
1426 CFI_ADJUST_CFA_OFFSET 4
1427 SAVE_ALL 1343 SAVE_ALL
1428 xorl %edx,%edx # zero error code 1344 xorl %edx,%edx # zero error code
1429 movl %esp,%eax # pt_regs pointer 1345 movl %esp,%eax # pt_regs pointer
@@ -1452,18 +1368,14 @@ nmi_espfix_stack:
1452 * 1368 *
1453 * create the pointer to lss back 1369 * create the pointer to lss back
1454 */ 1370 */
1455 pushl %ss 1371 pushl_cfi %ss
1456 CFI_ADJUST_CFA_OFFSET 4 1372 pushl_cfi %esp
1457 pushl %esp
1458 CFI_ADJUST_CFA_OFFSET 4
1459 addl $4, (%esp) 1373 addl $4, (%esp)
1460 /* copy the iret frame of 12 bytes */ 1374 /* copy the iret frame of 12 bytes */
1461 .rept 3 1375 .rept 3
1462 pushl 16(%esp) 1376 pushl_cfi 16(%esp)
1463 CFI_ADJUST_CFA_OFFSET 4
1464 .endr 1377 .endr
1465 pushl %eax 1378 pushl_cfi %eax
1466 CFI_ADJUST_CFA_OFFSET 4
1467 SAVE_ALL 1379 SAVE_ALL
1468 FIXUP_ESPFIX_STACK # %eax == %esp 1380 FIXUP_ESPFIX_STACK # %eax == %esp
1469 xorl %edx,%edx # zero error code 1381 xorl %edx,%edx # zero error code
@@ -1477,8 +1389,7 @@ END(nmi)
1477 1389
1478ENTRY(int3) 1390ENTRY(int3)
1479 RING0_INT_FRAME 1391 RING0_INT_FRAME
1480 pushl $-1 # mark this as an int 1392 pushl_cfi $-1 # mark this as an int
1481 CFI_ADJUST_CFA_OFFSET 4
1482 SAVE_ALL 1393 SAVE_ALL
1483 TRACE_IRQS_OFF 1394 TRACE_IRQS_OFF
1484 xorl %edx,%edx # zero error code 1395 xorl %edx,%edx # zero error code
@@ -1490,8 +1401,7 @@ END(int3)
1490 1401
1491ENTRY(general_protection) 1402ENTRY(general_protection)
1492 RING0_EC_FRAME 1403 RING0_EC_FRAME
1493 pushl $do_general_protection 1404 pushl_cfi $do_general_protection
1494 CFI_ADJUST_CFA_OFFSET 4
1495 jmp error_code 1405 jmp error_code
1496 CFI_ENDPROC 1406 CFI_ENDPROC
1497END(general_protection) 1407END(general_protection)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 17be5ec7cbba..a7ae7fd1010f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -213,23 +213,17 @@ ENDPROC(native_usergs_sysret64)
213 .macro FAKE_STACK_FRAME child_rip 213 .macro FAKE_STACK_FRAME child_rip
214 /* push in order ss, rsp, eflags, cs, rip */ 214 /* push in order ss, rsp, eflags, cs, rip */
215 xorl %eax, %eax 215 xorl %eax, %eax
216 pushq $__KERNEL_DS /* ss */ 216 pushq_cfi $__KERNEL_DS /* ss */
217 CFI_ADJUST_CFA_OFFSET 8
218 /*CFI_REL_OFFSET ss,0*/ 217 /*CFI_REL_OFFSET ss,0*/
219 pushq %rax /* rsp */ 218 pushq_cfi %rax /* rsp */
220 CFI_ADJUST_CFA_OFFSET 8
221 CFI_REL_OFFSET rsp,0 219 CFI_REL_OFFSET rsp,0
222 pushq $X86_EFLAGS_IF /* eflags - interrupts on */ 220 pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
223 CFI_ADJUST_CFA_OFFSET 8
224 /*CFI_REL_OFFSET rflags,0*/ 221 /*CFI_REL_OFFSET rflags,0*/
225 pushq $__KERNEL_CS /* cs */ 222 pushq_cfi $__KERNEL_CS /* cs */
226 CFI_ADJUST_CFA_OFFSET 8
227 /*CFI_REL_OFFSET cs,0*/ 223 /*CFI_REL_OFFSET cs,0*/
228 pushq \child_rip /* rip */ 224 pushq_cfi \child_rip /* rip */
229 CFI_ADJUST_CFA_OFFSET 8
230 CFI_REL_OFFSET rip,0 225 CFI_REL_OFFSET rip,0
231 pushq %rax /* orig rax */ 226 pushq_cfi %rax /* orig rax */
232 CFI_ADJUST_CFA_OFFSET 8
233 .endm 227 .endm
234 228
235 .macro UNFAKE_STACK_FRAME 229 .macro UNFAKE_STACK_FRAME
@@ -398,10 +392,8 @@ ENTRY(ret_from_fork)
398 392
399 LOCK ; btr $TIF_FORK,TI_flags(%r8) 393 LOCK ; btr $TIF_FORK,TI_flags(%r8)
400 394
401 push kernel_eflags(%rip) 395 pushq_cfi kernel_eflags(%rip)
402 CFI_ADJUST_CFA_OFFSET 8 396 popfq_cfi # reset kernel eflags
403 popf # reset kernel eflags
404 CFI_ADJUST_CFA_OFFSET -8
405 397
406 call schedule_tail # rdi: 'prev' task parameter 398 call schedule_tail # rdi: 'prev' task parameter
407 399
@@ -521,11 +513,9 @@ sysret_careful:
521 jnc sysret_signal 513 jnc sysret_signal
522 TRACE_IRQS_ON 514 TRACE_IRQS_ON
523 ENABLE_INTERRUPTS(CLBR_NONE) 515 ENABLE_INTERRUPTS(CLBR_NONE)
524 pushq %rdi 516 pushq_cfi %rdi
525 CFI_ADJUST_CFA_OFFSET 8
526 call schedule 517 call schedule
527 popq %rdi 518 popq_cfi %rdi
528 CFI_ADJUST_CFA_OFFSET -8
529 jmp sysret_check 519 jmp sysret_check
530 520
531 /* Handle a signal */ 521 /* Handle a signal */
@@ -634,11 +624,9 @@ int_careful:
634 jnc int_very_careful 624 jnc int_very_careful
635 TRACE_IRQS_ON 625 TRACE_IRQS_ON
636 ENABLE_INTERRUPTS(CLBR_NONE) 626 ENABLE_INTERRUPTS(CLBR_NONE)
637 pushq %rdi 627 pushq_cfi %rdi
638 CFI_ADJUST_CFA_OFFSET 8
639 call schedule 628 call schedule
640 popq %rdi 629 popq_cfi %rdi
641 CFI_ADJUST_CFA_OFFSET -8
642 DISABLE_INTERRUPTS(CLBR_NONE) 630 DISABLE_INTERRUPTS(CLBR_NONE)
643 TRACE_IRQS_OFF 631 TRACE_IRQS_OFF
644 jmp int_with_check 632 jmp int_with_check
@@ -652,12 +640,10 @@ int_check_syscall_exit_work:
652 /* Check for syscall exit trace */ 640 /* Check for syscall exit trace */
653 testl $_TIF_WORK_SYSCALL_EXIT,%edx 641 testl $_TIF_WORK_SYSCALL_EXIT,%edx
654 jz int_signal 642 jz int_signal
655 pushq %rdi 643 pushq_cfi %rdi
656 CFI_ADJUST_CFA_OFFSET 8
657 leaq 8(%rsp),%rdi # &ptregs -> arg1 644 leaq 8(%rsp),%rdi # &ptregs -> arg1
658 call syscall_trace_leave 645 call syscall_trace_leave
659 popq %rdi 646 popq_cfi %rdi
660 CFI_ADJUST_CFA_OFFSET -8
661 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi 647 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
662 jmp int_restore_rest 648 jmp int_restore_rest
663 649
@@ -714,9 +700,8 @@ END(ptregscall_common)
714 700
715ENTRY(stub_execve) 701ENTRY(stub_execve)
716 CFI_STARTPROC 702 CFI_STARTPROC
717 popq %r11 703 addq $8, %rsp
718 CFI_ADJUST_CFA_OFFSET -8 704 PARTIAL_FRAME 0
719 CFI_REGISTER rip, r11
720 SAVE_REST 705 SAVE_REST
721 FIXUP_TOP_OF_STACK %r11 706 FIXUP_TOP_OF_STACK %r11
722 movq %rsp, %rcx 707 movq %rsp, %rcx
@@ -735,7 +720,7 @@ END(stub_execve)
735ENTRY(stub_rt_sigreturn) 720ENTRY(stub_rt_sigreturn)
736 CFI_STARTPROC 721 CFI_STARTPROC
737 addq $8, %rsp 722 addq $8, %rsp
738 CFI_ADJUST_CFA_OFFSET -8 723 PARTIAL_FRAME 0
739 SAVE_REST 724 SAVE_REST
740 movq %rsp,%rdi 725 movq %rsp,%rdi
741 FIXUP_TOP_OF_STACK %r11 726 FIXUP_TOP_OF_STACK %r11
@@ -766,8 +751,7 @@ vector=FIRST_EXTERNAL_VECTOR
766 .if vector <> FIRST_EXTERNAL_VECTOR 751 .if vector <> FIRST_EXTERNAL_VECTOR
767 CFI_ADJUST_CFA_OFFSET -8 752 CFI_ADJUST_CFA_OFFSET -8
768 .endif 753 .endif
7691: pushq $(~vector+0x80) /* Note: always in signed byte range */ 7541: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
770 CFI_ADJUST_CFA_OFFSET 8
771 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 755 .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
772 jmp 2f 756 jmp 2f
773 .endif 757 .endif
@@ -796,8 +780,8 @@ END(interrupt)
796 780
797/* 0(%rsp): ~(interrupt number) */ 781/* 0(%rsp): ~(interrupt number) */
798 .macro interrupt func 782 .macro interrupt func
799 subq $10*8, %rsp 783 subq $ORIG_RAX-ARGOFFSET+8, %rsp
800 CFI_ADJUST_CFA_OFFSET 10*8 784 CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
801 call save_args 785 call save_args
802 PARTIAL_FRAME 0 786 PARTIAL_FRAME 0
803 call \func 787 call \func
@@ -822,6 +806,7 @@ ret_from_intr:
822 TRACE_IRQS_OFF 806 TRACE_IRQS_OFF
823 decl PER_CPU_VAR(irq_count) 807 decl PER_CPU_VAR(irq_count)
824 leaveq 808 leaveq
809 CFI_RESTORE rbp
825 CFI_DEF_CFA_REGISTER rsp 810 CFI_DEF_CFA_REGISTER rsp
826 CFI_ADJUST_CFA_OFFSET -8 811 CFI_ADJUST_CFA_OFFSET -8
827exit_intr: 812exit_intr:
@@ -903,11 +888,9 @@ retint_careful:
903 jnc retint_signal 888 jnc retint_signal
904 TRACE_IRQS_ON 889 TRACE_IRQS_ON
905 ENABLE_INTERRUPTS(CLBR_NONE) 890 ENABLE_INTERRUPTS(CLBR_NONE)
906 pushq %rdi 891 pushq_cfi %rdi
907 CFI_ADJUST_CFA_OFFSET 8
908 call schedule 892 call schedule
909 popq %rdi 893 popq_cfi %rdi
910 CFI_ADJUST_CFA_OFFSET -8
911 GET_THREAD_INFO(%rcx) 894 GET_THREAD_INFO(%rcx)
912 DISABLE_INTERRUPTS(CLBR_NONE) 895 DISABLE_INTERRUPTS(CLBR_NONE)
913 TRACE_IRQS_OFF 896 TRACE_IRQS_OFF
@@ -956,8 +939,7 @@ END(common_interrupt)
956.macro apicinterrupt num sym do_sym 939.macro apicinterrupt num sym do_sym
957ENTRY(\sym) 940ENTRY(\sym)
958 INTR_FRAME 941 INTR_FRAME
959 pushq $~(\num) 942 pushq_cfi $~(\num)
960 CFI_ADJUST_CFA_OFFSET 8
961 interrupt \do_sym 943 interrupt \do_sym
962 jmp ret_from_intr 944 jmp ret_from_intr
963 CFI_ENDPROC 945 CFI_ENDPROC
@@ -1023,9 +1005,9 @@ apicinterrupt ERROR_APIC_VECTOR \
1023apicinterrupt SPURIOUS_APIC_VECTOR \ 1005apicinterrupt SPURIOUS_APIC_VECTOR \
1024 spurious_interrupt smp_spurious_interrupt 1006 spurious_interrupt smp_spurious_interrupt
1025 1007
1026#ifdef CONFIG_PERF_EVENTS 1008#ifdef CONFIG_IRQ_WORK
1027apicinterrupt LOCAL_PENDING_VECTOR \ 1009apicinterrupt IRQ_WORK_VECTOR \
1028 perf_pending_interrupt smp_perf_pending_interrupt 1010 irq_work_interrupt smp_irq_work_interrupt
1029#endif 1011#endif
1030 1012
1031/* 1013/*
@@ -1036,8 +1018,8 @@ ENTRY(\sym)
1036 INTR_FRAME 1018 INTR_FRAME
1037 PARAVIRT_ADJUST_EXCEPTION_FRAME 1019 PARAVIRT_ADJUST_EXCEPTION_FRAME
1038 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ 1020 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1039 subq $15*8,%rsp 1021 subq $ORIG_RAX-R15, %rsp
1040 CFI_ADJUST_CFA_OFFSET 15*8 1022 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1041 call error_entry 1023 call error_entry
1042 DEFAULT_FRAME 0 1024 DEFAULT_FRAME 0
1043 movq %rsp,%rdi /* pt_regs pointer */ 1025 movq %rsp,%rdi /* pt_regs pointer */
@@ -1052,9 +1034,9 @@ END(\sym)
1052ENTRY(\sym) 1034ENTRY(\sym)
1053 INTR_FRAME 1035 INTR_FRAME
1054 PARAVIRT_ADJUST_EXCEPTION_FRAME 1036 PARAVIRT_ADJUST_EXCEPTION_FRAME
1055 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1037 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1056 CFI_ADJUST_CFA_OFFSET 8 1038 subq $ORIG_RAX-R15, %rsp
1057 subq $15*8, %rsp 1039 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1058 call save_paranoid 1040 call save_paranoid
1059 TRACE_IRQS_OFF 1041 TRACE_IRQS_OFF
1060 movq %rsp,%rdi /* pt_regs pointer */ 1042 movq %rsp,%rdi /* pt_regs pointer */
@@ -1070,9 +1052,9 @@ END(\sym)
1070ENTRY(\sym) 1052ENTRY(\sym)
1071 INTR_FRAME 1053 INTR_FRAME
1072 PARAVIRT_ADJUST_EXCEPTION_FRAME 1054 PARAVIRT_ADJUST_EXCEPTION_FRAME
1073 pushq $-1 /* ORIG_RAX: no syscall to restart */ 1055 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1074 CFI_ADJUST_CFA_OFFSET 8 1056 subq $ORIG_RAX-R15, %rsp
1075 subq $15*8, %rsp 1057 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1076 call save_paranoid 1058 call save_paranoid
1077 TRACE_IRQS_OFF 1059 TRACE_IRQS_OFF
1078 movq %rsp,%rdi /* pt_regs pointer */ 1060 movq %rsp,%rdi /* pt_regs pointer */
@@ -1089,8 +1071,8 @@ END(\sym)
1089ENTRY(\sym) 1071ENTRY(\sym)
1090 XCPT_FRAME 1072 XCPT_FRAME
1091 PARAVIRT_ADJUST_EXCEPTION_FRAME 1073 PARAVIRT_ADJUST_EXCEPTION_FRAME
1092 subq $15*8,%rsp 1074 subq $ORIG_RAX-R15, %rsp
1093 CFI_ADJUST_CFA_OFFSET 15*8 1075 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1094 call error_entry 1076 call error_entry
1095 DEFAULT_FRAME 0 1077 DEFAULT_FRAME 0
1096 movq %rsp,%rdi /* pt_regs pointer */ 1078 movq %rsp,%rdi /* pt_regs pointer */
@@ -1107,8 +1089,8 @@ END(\sym)
1107ENTRY(\sym) 1089ENTRY(\sym)
1108 XCPT_FRAME 1090 XCPT_FRAME
1109 PARAVIRT_ADJUST_EXCEPTION_FRAME 1091 PARAVIRT_ADJUST_EXCEPTION_FRAME
1110 subq $15*8,%rsp 1092 subq $ORIG_RAX-R15, %rsp
1111 CFI_ADJUST_CFA_OFFSET 15*8 1093 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1112 call save_paranoid 1094 call save_paranoid
1113 DEFAULT_FRAME 0 1095 DEFAULT_FRAME 0
1114 TRACE_IRQS_OFF 1096 TRACE_IRQS_OFF
@@ -1139,16 +1121,14 @@ zeroentry simd_coprocessor_error do_simd_coprocessor_error
1139 /* edi: new selector */ 1121 /* edi: new selector */
1140ENTRY(native_load_gs_index) 1122ENTRY(native_load_gs_index)
1141 CFI_STARTPROC 1123 CFI_STARTPROC
1142 pushf 1124 pushfq_cfi
1143 CFI_ADJUST_CFA_OFFSET 8
1144 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) 1125 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1145 SWAPGS 1126 SWAPGS
1146gs_change: 1127gs_change:
1147 movl %edi,%gs 1128 movl %edi,%gs
11482: mfence /* workaround */ 11292: mfence /* workaround */
1149 SWAPGS 1130 SWAPGS
1150 popf 1131 popfq_cfi
1151 CFI_ADJUST_CFA_OFFSET -8
1152 ret 1132 ret
1153 CFI_ENDPROC 1133 CFI_ENDPROC
1154END(native_load_gs_index) 1134END(native_load_gs_index)
@@ -1215,8 +1195,7 @@ END(kernel_execve)
1215/* Call softirq on interrupt stack. Interrupts are off. */ 1195/* Call softirq on interrupt stack. Interrupts are off. */
1216ENTRY(call_softirq) 1196ENTRY(call_softirq)
1217 CFI_STARTPROC 1197 CFI_STARTPROC
1218 push %rbp 1198 pushq_cfi %rbp
1219 CFI_ADJUST_CFA_OFFSET 8
1220 CFI_REL_OFFSET rbp,0 1199 CFI_REL_OFFSET rbp,0
1221 mov %rsp,%rbp 1200 mov %rsp,%rbp
1222 CFI_DEF_CFA_REGISTER rbp 1201 CFI_DEF_CFA_REGISTER rbp
@@ -1225,6 +1204,7 @@ ENTRY(call_softirq)
1225 push %rbp # backlink for old unwinder 1204 push %rbp # backlink for old unwinder
1226 call __do_softirq 1205 call __do_softirq
1227 leaveq 1206 leaveq
1207 CFI_RESTORE rbp
1228 CFI_DEF_CFA_REGISTER rsp 1208 CFI_DEF_CFA_REGISTER rsp
1229 CFI_ADJUST_CFA_OFFSET -8 1209 CFI_ADJUST_CFA_OFFSET -8
1230 decl PER_CPU_VAR(irq_count) 1210 decl PER_CPU_VAR(irq_count)
@@ -1368,7 +1348,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
1368 1348
1369 /* ebx: no swapgs flag */ 1349 /* ebx: no swapgs flag */
1370ENTRY(paranoid_exit) 1350ENTRY(paranoid_exit)
1371 INTR_FRAME 1351 DEFAULT_FRAME
1372 DISABLE_INTERRUPTS(CLBR_NONE) 1352 DISABLE_INTERRUPTS(CLBR_NONE)
1373 TRACE_IRQS_OFF 1353 TRACE_IRQS_OFF
1374 testl %ebx,%ebx /* swapgs needed? */ 1354 testl %ebx,%ebx /* swapgs needed? */
@@ -1445,7 +1425,6 @@ error_swapgs:
1445error_sti: 1425error_sti:
1446 TRACE_IRQS_OFF 1426 TRACE_IRQS_OFF
1447 ret 1427 ret
1448 CFI_ENDPROC
1449 1428
1450/* 1429/*
1451 * There are two places in the kernel that can potentially fault with 1430 * There are two places in the kernel that can potentially fault with
@@ -1470,6 +1449,7 @@ bstep_iret:
1470 /* Fix truncated RIP */ 1449 /* Fix truncated RIP */
1471 movq %rcx,RIP+8(%rsp) 1450 movq %rcx,RIP+8(%rsp)
1472 jmp error_swapgs 1451 jmp error_swapgs
1452 CFI_ENDPROC
1473END(error_entry) 1453END(error_entry)
1474 1454
1475 1455
@@ -1498,8 +1478,8 @@ ENTRY(nmi)
1498 INTR_FRAME 1478 INTR_FRAME
1499 PARAVIRT_ADJUST_EXCEPTION_FRAME 1479 PARAVIRT_ADJUST_EXCEPTION_FRAME
1500 pushq_cfi $-1 1480 pushq_cfi $-1
1501 subq $15*8, %rsp 1481 subq $ORIG_RAX-R15, %rsp
1502 CFI_ADJUST_CFA_OFFSET 15*8 1482 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1503 call save_paranoid 1483 call save_paranoid
1504 DEFAULT_FRAME 0 1484 DEFAULT_FRAME 0
1505 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1485 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index cd37469b54ee..3afb33f14d2d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
257 return mod_code_status; 257 return mod_code_status;
258} 258}
259 259
260
261
262
263static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
264
265static unsigned char *ftrace_nop_replace(void) 260static unsigned char *ftrace_nop_replace(void)
266{ 261{
267 return ftrace_nop; 262 return ideal_nop5;
268} 263}
269 264
270static int 265static int
@@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
338 333
339int __init ftrace_dyn_arch_init(void *data) 334int __init ftrace_dyn_arch_init(void *data)
340{ 335{
341 extern const unsigned char ftrace_test_p6nop[];
342 extern const unsigned char ftrace_test_nop5[];
343 extern const unsigned char ftrace_test_jmp[];
344 int faulted = 0;
345
346 /*
347 * There is no good nop for all x86 archs.
348 * We will default to using the P6_NOP5, but first we
349 * will test to make sure that the nop will actually
350 * work on this CPU. If it faults, we will then
351 * go to a lesser efficient 5 byte nop. If that fails
352 * we then just use a jmp as our nop. This isn't the most
353 * efficient nop, but we can not use a multi part nop
354 * since we would then risk being preempted in the middle
355 * of that nop, and if we enabled tracing then, it might
356 * cause a system crash.
357 *
358 * TODO: check the cpuid to determine the best nop.
359 */
360 asm volatile (
361 "ftrace_test_jmp:"
362 "jmp ftrace_test_p6nop\n"
363 "nop\n"
364 "nop\n"
365 "nop\n" /* 2 byte jmp + 3 bytes */
366 "ftrace_test_p6nop:"
367 P6_NOP5
368 "jmp 1f\n"
369 "ftrace_test_nop5:"
370 ".byte 0x66,0x66,0x66,0x66,0x90\n"
371 "1:"
372 ".section .fixup, \"ax\"\n"
373 "2: movl $1, %0\n"
374 " jmp ftrace_test_nop5\n"
375 "3: movl $2, %0\n"
376 " jmp 1b\n"
377 ".previous\n"
378 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
379 _ASM_EXTABLE(ftrace_test_nop5, 3b)
380 : "=r"(faulted) : "0" (faulted));
381
382 switch (faulted) {
383 case 0:
384 pr_info("converting mcount calls to 0f 1f 44 00 00\n");
385 memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE);
386 break;
387 case 1:
388 pr_info("converting mcount calls to 66 66 66 66 90\n");
389 memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE);
390 break;
391 case 2:
392 pr_info("converting mcount calls to jmp . + 5\n");
393 memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE);
394 break;
395 }
396
397 /* The return code is retured via data */ 336 /* The return code is retured via data */
398 *(unsigned long *)data = 0; 337 *(unsigned long *)data = 0;
399 338
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index a46cb3522c0c..58bb239a2fd7 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -68,19 +68,22 @@ static void __cpuinit init_thread_xstate(void)
68 */ 68 */
69 69
70 if (!HAVE_HWFP) { 70 if (!HAVE_HWFP) {
71 /*
72 * Disable xsave as we do not support it if i387
73 * emulation is enabled.
74 */
75 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
76 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
71 xstate_size = sizeof(struct i387_soft_struct); 77 xstate_size = sizeof(struct i387_soft_struct);
72 return; 78 return;
73 } 79 }
74 80
75 if (cpu_has_fxsr) 81 if (cpu_has_fxsr)
76 xstate_size = sizeof(struct i387_fxsave_struct); 82 xstate_size = sizeof(struct i387_fxsave_struct);
77#ifdef CONFIG_X86_32
78 else 83 else
79 xstate_size = sizeof(struct i387_fsave_struct); 84 xstate_size = sizeof(struct i387_fsave_struct);
80#endif
81} 85}
82 86
83#ifdef CONFIG_X86_64
84/* 87/*
85 * Called at bootup to set up the initial FPU state that is later cloned 88 * Called at bootup to set up the initial FPU state that is later cloned
86 * into all processes. 89 * into all processes.
@@ -88,12 +91,21 @@ static void __cpuinit init_thread_xstate(void)
88 91
89void __cpuinit fpu_init(void) 92void __cpuinit fpu_init(void)
90{ 93{
91 unsigned long oldcr0 = read_cr0(); 94 unsigned long cr0;
92 95 unsigned long cr4_mask = 0;
93 set_in_cr4(X86_CR4_OSFXSR);
94 set_in_cr4(X86_CR4_OSXMMEXCPT);
95 96
96 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 97 if (cpu_has_fxsr)
98 cr4_mask |= X86_CR4_OSFXSR;
99 if (cpu_has_xmm)
100 cr4_mask |= X86_CR4_OSXMMEXCPT;
101 if (cr4_mask)
102 set_in_cr4(cr4_mask);
103
104 cr0 = read_cr0();
105 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
106 if (!HAVE_HWFP)
107 cr0 |= X86_CR0_EM;
108 write_cr0(cr0);
97 109
98 if (!smp_processor_id()) 110 if (!smp_processor_id())
99 init_thread_xstate(); 111 init_thread_xstate();
@@ -104,24 +116,12 @@ void __cpuinit fpu_init(void)
104 clear_used_math(); 116 clear_used_math();
105} 117}
106 118
107#else /* CONFIG_X86_64 */
108
109void __cpuinit fpu_init(void)
110{
111 if (!smp_processor_id())
112 init_thread_xstate();
113}
114
115#endif /* CONFIG_X86_32 */
116
117void fpu_finit(struct fpu *fpu) 119void fpu_finit(struct fpu *fpu)
118{ 120{
119#ifdef CONFIG_X86_32
120 if (!HAVE_HWFP) { 121 if (!HAVE_HWFP) {
121 finit_soft_fpu(&fpu->state->soft); 122 finit_soft_fpu(&fpu->state->soft);
122 return; 123 return;
123 } 124 }
124#endif
125 125
126 if (cpu_has_fxsr) { 126 if (cpu_has_fxsr) {
127 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 127 struct i387_fxsave_struct *fx = &fpu->state->fxsave;
@@ -386,19 +386,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
386#ifdef CONFIG_X86_64 386#ifdef CONFIG_X86_64
387 env->fip = fxsave->rip; 387 env->fip = fxsave->rip;
388 env->foo = fxsave->rdp; 388 env->foo = fxsave->rdp;
389 /*
390 * should be actually ds/cs at fpu exception time, but
391 * that information is not available in 64bit mode.
392 */
393 env->fcs = task_pt_regs(tsk)->cs;
389 if (tsk == current) { 394 if (tsk == current) {
390 /* 395 savesegment(ds, env->fos);
391 * should be actually ds/cs at fpu exception time, but
392 * that information is not available in 64bit mode.
393 */
394 asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
395 asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
396 } else { 396 } else {
397 struct pt_regs *regs = task_pt_regs(tsk); 397 env->fos = tsk->thread.ds;
398
399 env->fos = 0xffff0000 | tsk->thread.ds;
400 env->fcs = regs->cs;
401 } 398 }
399 env->fos |= 0xffff0000;
402#else 400#else
403 env->fip = fxsave->fip; 401 env->fip = fxsave->fip;
404 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); 402 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 91fd0c70a18a..44edb03fc9ec 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
67 for_each_online_cpu(j) 67 for_each_online_cpu(j)
68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); 68 seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
69 seq_printf(p, " Performance monitoring interrupts\n"); 69 seq_printf(p, " Performance monitoring interrupts\n");
70 seq_printf(p, "%*s: ", prec, "PND"); 70 seq_printf(p, "%*s: ", prec, "IWI");
71 for_each_online_cpu(j) 71 for_each_online_cpu(j)
72 seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); 72 seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
73 seq_printf(p, " Performance pending work\n"); 73 seq_printf(p, " IRQ work interrupts\n");
74#endif 74#endif
75 if (x86_platform_ipi_callback) { 75 if (x86_platform_ipi_callback) {
76 seq_printf(p, "%*s: ", prec, "PLT"); 76 seq_printf(p, "%*s: ", prec, "PLT");
@@ -185,7 +185,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
185 sum += irq_stats(cpu)->apic_timer_irqs; 185 sum += irq_stats(cpu)->apic_timer_irqs;
186 sum += irq_stats(cpu)->irq_spurious_count; 186 sum += irq_stats(cpu)->irq_spurious_count;
187 sum += irq_stats(cpu)->apic_perf_irqs; 187 sum += irq_stats(cpu)->apic_perf_irqs;
188 sum += irq_stats(cpu)->apic_pending_irqs; 188 sum += irq_stats(cpu)->apic_irq_work_irqs;
189#endif 189#endif
190 if (x86_platform_ipi_callback) 190 if (x86_platform_ipi_callback)
191 sum += irq_stats(cpu)->x86_platform_ipis; 191 sum += irq_stats(cpu)->x86_platform_ipis;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
new file mode 100644
index 000000000000..ca8f703a1e70
--- /dev/null
+++ b/arch/x86/kernel/irq_work.c
@@ -0,0 +1,30 @@
1/*
2 * x86 specific code for irq_work
3 *
4 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 */
6
7#include <linux/kernel.h>
8#include <linux/irq_work.h>
9#include <linux/hardirq.h>
10#include <asm/apic.h>
11
12void smp_irq_work_interrupt(struct pt_regs *regs)
13{
14 irq_enter();
15 ack_APIC_irq();
16 inc_irq_stat(apic_irq_work_irqs);
17 irq_work_run();
18 irq_exit();
19}
20
21void arch_irq_work_raise(void)
22{
23#ifdef CONFIG_X86_LOCAL_APIC
24 if (!cpu_has_apic)
25 return;
26
27 apic->send_IPI_self(IRQ_WORK_VECTOR);
28 apic_wait_icr_idle();
29#endif
30}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 990ae7cfc578..713969b9266b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -224,9 +224,9 @@ static void __init apic_intr_init(void)
224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 224 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 225 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
226 226
227 /* Performance monitoring interrupts: */ 227 /* IRQ work interrupts: */
228# ifdef CONFIG_PERF_EVENTS 228# ifdef CONFIG_IRQ_WORK
229 alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); 229 alloc_intr_gate(IRQ_WORK_VECTOR, irq_work_interrupt);
230# endif 230# endif
231 231
232#endif 232#endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
new file mode 100644
index 000000000000..961b6b30ba90
--- /dev/null
+++ b/arch/x86/kernel/jump_label.c
@@ -0,0 +1,50 @@
1/*
2 * jump label x86 support
3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 *
6 */
7#include <linux/jump_label.h>
8#include <linux/memory.h>
9#include <linux/uaccess.h>
10#include <linux/module.h>
11#include <linux/list.h>
12#include <linux/jhash.h>
13#include <linux/cpu.h>
14#include <asm/kprobes.h>
15#include <asm/alternative.h>
16
17#ifdef HAVE_JUMP_LABEL
18
19union jump_code_union {
20 char code[JUMP_LABEL_NOP_SIZE];
21 struct {
22 char jump;
23 int offset;
24 } __attribute__((packed));
25};
26
27void arch_jump_label_transform(struct jump_entry *entry,
28 enum jump_label_type type)
29{
30 union jump_code_union code;
31
32 if (type == JUMP_LABEL_ENABLE) {
33 code.jump = 0xe9;
34 code.offset = entry->target -
35 (entry->code + JUMP_LABEL_NOP_SIZE);
36 } else
37 memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE);
38 get_online_cpus();
39 mutex_lock(&text_mutex);
40 text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
41 mutex_unlock(&text_mutex);
42 put_online_cpus();
43}
44
45void arch_jump_label_text_poke_early(jump_label_t addr)
46{
47 text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE);
48}
49
50#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 770ebfb349e9..1cbd54c0df99 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
230 return 0; 230 return 0;
231} 231}
232 232
233/* Dummy buffers for kallsyms_lookup */
234static char __dummy_buf[KSYM_NAME_LEN];
235
236/* Check if paddr is at an instruction boundary */ 233/* Check if paddr is at an instruction boundary */
237static int __kprobes can_probe(unsigned long paddr) 234static int __kprobes can_probe(unsigned long paddr)
238{ 235{
@@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr)
241 struct insn insn; 238 struct insn insn;
242 kprobe_opcode_t buf[MAX_INSN_SIZE]; 239 kprobe_opcode_t buf[MAX_INSN_SIZE];
243 240
244 if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) 241 if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
245 return 0; 242 return 0;
246 243
247 /* Decode instructions */ 244 /* Decode instructions */
@@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
1129 *(unsigned long *)addr = val; 1126 *(unsigned long *)addr = val;
1130} 1127}
1131 1128
1132void __kprobes kprobes_optinsn_template_holder(void) 1129static void __used __kprobes kprobes_optinsn_template_holder(void)
1133{ 1130{
1134 asm volatile ( 1131 asm volatile (
1135 ".global optprobe_template_entry\n" 1132 ".global optprobe_template_entry\n"
@@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
1221 } 1218 }
1222 /* Check whether the address range is reserved */ 1219 /* Check whether the address range is reserved */
1223 if (ftrace_text_reserved(src, src + len - 1) || 1220 if (ftrace_text_reserved(src, src + len - 1) ||
1224 alternatives_text_reserved(src, src + len - 1)) 1221 alternatives_text_reserved(src, src + len - 1) ||
1222 jump_label_text_reserved(src, src + len - 1))
1225 return -EBUSY; 1223 return -EBUSY;
1226 1224
1227 return len; 1225 return len;
@@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr)
1269 unsigned long addr, size = 0, offset = 0; 1267 unsigned long addr, size = 0, offset = 0;
1270 struct insn insn; 1268 struct insn insn;
1271 kprobe_opcode_t buf[MAX_INSN_SIZE]; 1269 kprobe_opcode_t buf[MAX_INSN_SIZE];
1272 /* Dummy buffers for lookup_symbol_attrs */
1273 static char __dummy_buf[KSYM_NAME_LEN];
1274 1270
1275 /* Lookup symbol including addr */ 1271 /* Lookup symbol including addr */
1276 if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) 1272 if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
1277 return 0; 1273 return 0;
1278 1274
1279 /* Check there is enough space for a relative jump. */ 1275 /* Check there is enough space for a relative jump. */
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 035c8c529181..b3ea9db39db6 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -36,7 +36,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
36 if (!page) 36 if (!page)
37 goto out; 37 goto out;
38 pud = (pud_t *)page_address(page); 38 pud = (pud_t *)page_address(page);
39 memset(pud, 0, PAGE_SIZE); 39 clear_page(pud);
40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); 40 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
41 } 41 }
42 pud = pud_offset(pgd, addr); 42 pud = pud_offset(pgd, addr);
@@ -45,7 +45,7 @@ static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
45 if (!page) 45 if (!page)
46 goto out; 46 goto out;
47 pmd = (pmd_t *)page_address(page); 47 pmd = (pmd_t *)page_address(page);
48 memset(pmd, 0, PAGE_SIZE); 48 clear_page(pmd);
49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); 49 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
50 } 50 }
51 pmd = pmd_offset(pud, addr); 51 pmd = pmd_offset(pud, addr);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 1c355c550960..8f2956091735 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -239,6 +239,9 @@ int module_finalize(const Elf_Ehdr *hdr,
239 apply_paravirt(pseg, pseg + para->sh_size); 239 apply_paravirt(pseg, pseg + para->sh_size);
240 } 240 }
241 241
242 /* make jump label nops */
243 jump_label_apply_nops(me);
244
242 return 0; 245 return 0;
243} 246}
244 247
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 0f7f130caa67..c562207b1b3d 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -39,7 +39,7 @@
39#include <asm/cacheflush.h> 39#include <asm/cacheflush.h>
40#include <asm/swiotlb.h> 40#include <asm/swiotlb.h>
41#include <asm/dma.h> 41#include <asm/dma.h>
42#include <asm/k8.h> 42#include <asm/amd_nb.h>
43#include <asm/x86_init.h> 43#include <asm/x86_init.h>
44 44
45static unsigned long iommu_bus_base; /* GART remapping area (physical) */ 45static unsigned long iommu_bus_base; /* GART remapping area (physical) */
@@ -560,8 +560,11 @@ static void enable_gart_translations(void)
560{ 560{
561 int i; 561 int i;
562 562
563 for (i = 0; i < num_k8_northbridges; i++) { 563 if (!k8_northbridges.gart_supported)
564 struct pci_dev *dev = k8_northbridges[i]; 564 return;
565
566 for (i = 0; i < k8_northbridges.num; i++) {
567 struct pci_dev *dev = k8_northbridges.nb_misc[i];
565 568
566 enable_gart_translation(dev, __pa(agp_gatt_table)); 569 enable_gart_translation(dev, __pa(agp_gatt_table));
567 } 570 }
@@ -592,16 +595,19 @@ static void gart_fixup_northbridges(struct sys_device *dev)
592 if (!fix_up_north_bridges) 595 if (!fix_up_north_bridges)
593 return; 596 return;
594 597
598 if (!k8_northbridges.gart_supported)
599 return;
600
595 pr_info("PCI-DMA: Restoring GART aperture settings\n"); 601 pr_info("PCI-DMA: Restoring GART aperture settings\n");
596 602
597 for (i = 0; i < num_k8_northbridges; i++) { 603 for (i = 0; i < k8_northbridges.num; i++) {
598 struct pci_dev *dev = k8_northbridges[i]; 604 struct pci_dev *dev = k8_northbridges.nb_misc[i];
599 605
600 /* 606 /*
601 * Don't enable translations just yet. That is the next 607 * Don't enable translations just yet. That is the next
602 * step. Restore the pre-suspend aperture settings. 608 * step. Restore the pre-suspend aperture settings.
603 */ 609 */
604 pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); 610 gart_set_size_and_enable(dev, aperture_order);
605 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); 611 pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25);
606 } 612 }
607} 613}
@@ -649,8 +655,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
649 655
650 aper_size = aper_base = info->aper_size = 0; 656 aper_size = aper_base = info->aper_size = 0;
651 dev = NULL; 657 dev = NULL;
652 for (i = 0; i < num_k8_northbridges; i++) { 658 for (i = 0; i < k8_northbridges.num; i++) {
653 dev = k8_northbridges[i]; 659 dev = k8_northbridges.nb_misc[i];
654 new_aper_base = read_aperture(dev, &new_aper_size); 660 new_aper_base = read_aperture(dev, &new_aper_size);
655 if (!new_aper_base) 661 if (!new_aper_base)
656 goto nommu; 662 goto nommu;
@@ -718,10 +724,13 @@ static void gart_iommu_shutdown(void)
718 if (!no_agp) 724 if (!no_agp)
719 return; 725 return;
720 726
721 for (i = 0; i < num_k8_northbridges; i++) { 727 if (!k8_northbridges.gart_supported)
728 return;
729
730 for (i = 0; i < k8_northbridges.num; i++) {
722 u32 ctl; 731 u32 ctl;
723 732
724 dev = k8_northbridges[i]; 733 dev = k8_northbridges.nb_misc[i];
725 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); 734 pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
726 735
727 ctl &= ~GARTEN; 736 ctl &= ~GARTEN;
@@ -739,7 +748,7 @@ int __init gart_iommu_init(void)
739 unsigned long scratch; 748 unsigned long scratch;
740 long i; 749 long i;
741 750
742 if (num_k8_northbridges == 0) 751 if (!k8_northbridges.gart_supported)
743 return 0; 752 return 0;
744 753
745#ifndef CONFIG_AGP_AMD64 754#ifndef CONFIG_AGP_AMD64
diff --git a/arch/x86/kernel/pmtimer_64.c b/arch/x86/kernel/pmtimer_64.c
deleted file mode 100644
index b112406f1996..000000000000
--- a/arch/x86/kernel/pmtimer_64.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/* Ported over from i386 by AK, original copyright was:
2 *
3 * (C) Dominik Brodowski <linux@brodo.de> 2003
4 *
5 * Driver to use the Power Management Timer (PMTMR) available in some
6 * southbridges as primary timing source for the Linux kernel.
7 *
8 * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
9 * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
10 *
11 * This file is licensed under the GPL v2.
12 *
13 * Dropped all the hardware bug workarounds for now. Hopefully they
14 * are not needed on 64bit chipsets.
15 */
16
17#include <linux/jiffies.h>
18#include <linux/kernel.h>
19#include <linux/time.h>
20#include <linux/init.h>
21#include <linux/cpumask.h>
22#include <linux/acpi_pmtmr.h>
23
24#include <asm/io.h>
25#include <asm/proto.h>
26#include <asm/msr.h>
27#include <asm/vsyscall.h>
28
29static inline u32 cyc2us(u32 cycles)
30{
31 /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
32 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
33 *
34 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
35 * easily be multiplied with 286 (=0x11E) without having to fear
36 * u32 overflows.
37 */
38 cycles *= 286;
39 return (cycles >> 10);
40}
41
42static unsigned pmtimer_wait_tick(void)
43{
44 u32 a, b;
45 for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
46 a == b;
47 b = inl(pmtmr_ioport) & ACPI_PM_MASK)
48 cpu_relax();
49 return b;
50}
51
52/* note: wait time is rounded up to one tick */
53void pmtimer_wait(unsigned us)
54{
55 u32 a, b;
56 a = pmtimer_wait_tick();
57 do {
58 b = inl(pmtmr_ioport);
59 cpu_relax();
60 } while (cyc2us(b - a) < us);
61}
62
63static int __init nopmtimer_setup(char *s)
64{
65 pmtmr_ioport = 0;
66 return 1;
67}
68
69__setup("nopmtimer", nopmtimer_setup);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3d9ea531ddd1..b3d7a3a04f38 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
424 load_TLS(next, cpu); 424 load_TLS(next, cpu);
425 425
426 /* Must be after DS reload */ 426 /* Must be after DS reload */
427 unlazy_fpu(prev_p); 427 __unlazy_fpu(prev_p);
428 428
429 /* Make sure cpu is ready for new context */ 429 /* Make sure cpu is ready for new context */
430 if (preload_fpu) 430 if (preload_fpu)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342fe83a..7a4cf14223ba 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -84,7 +84,7 @@ static int __init reboot_setup(char *str)
84 } 84 }
85 /* we will leave sorting out the final value 85 /* we will leave sorting out the final value
86 when we are ready to reboot, since we might not 86 when we are ready to reboot, since we might not
87 have set up boot_cpu_id or smp_num_cpu */ 87 have detected BSP APIC ID or smp_num_cpu */
88 break; 88 break;
89#endif /* CONFIG_SMP */ 89#endif /* CONFIG_SMP */
90 90
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c3a4fbb2b996..9e5cdaf73b48 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -107,11 +107,12 @@
107#include <asm/percpu.h> 107#include <asm/percpu.h>
108#include <asm/topology.h> 108#include <asm/topology.h>
109#include <asm/apicdef.h> 109#include <asm/apicdef.h>
110#include <asm/k8.h> 110#include <asm/amd_nb.h>
111#ifdef CONFIG_X86_64 111#ifdef CONFIG_X86_64
112#include <asm/numa_64.h> 112#include <asm/numa_64.h>
113#endif 113#endif
114#include <asm/mce.h> 114#include <asm/mce.h>
115#include <asm/alternative.h>
115 116
116/* 117/*
117 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. 118 * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -125,7 +126,6 @@ unsigned long max_pfn_mapped;
125RESERVE_BRK(dmi_alloc, 65536); 126RESERVE_BRK(dmi_alloc, 65536);
126#endif 127#endif
127 128
128unsigned int boot_cpu_id __read_mostly;
129 129
130static __initdata unsigned long _brk_start = (unsigned long)__brk_base; 130static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
131unsigned long _brk_end = (unsigned long)__brk_base; 131unsigned long _brk_end = (unsigned long)__brk_base;
@@ -618,79 +618,7 @@ static __init void reserve_ibft_region(void)
618 reserve_early_overlap_ok(addr, addr + size, "ibft"); 618 reserve_early_overlap_ok(addr, addr + size, "ibft");
619} 619}
620 620
621#ifdef CONFIG_X86_RESERVE_LOW_64K 621static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
622static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
623{
624 printk(KERN_NOTICE
625 "%s detected: BIOS may corrupt low RAM, working around it.\n",
626 d->ident);
627
628 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
629 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
630
631 return 0;
632}
633#endif
634
635/* List of systems that have known low memory corruption BIOS problems */
636static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
637#ifdef CONFIG_X86_RESERVE_LOW_64K
638 {
639 .callback = dmi_low_memory_corruption,
640 .ident = "AMI BIOS",
641 .matches = {
642 DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
643 },
644 },
645 {
646 .callback = dmi_low_memory_corruption,
647 .ident = "Phoenix BIOS",
648 .matches = {
649 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
650 },
651 },
652 {
653 .callback = dmi_low_memory_corruption,
654 .ident = "Phoenix/MSC BIOS",
655 .matches = {
656 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
657 },
658 },
659 /*
660 * AMI BIOS with low memory corruption was found on Intel DG45ID and
661 * DG45FC boards.
662 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
663 * match only DMI_BOARD_NAME and see if there is more bad products
664 * with this vendor.
665 */
666 {
667 .callback = dmi_low_memory_corruption,
668 .ident = "AMI BIOS",
669 .matches = {
670 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
671 },
672 },
673 {
674 .callback = dmi_low_memory_corruption,
675 .ident = "AMI BIOS",
676 .matches = {
677 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
678 },
679 },
680 /*
681 * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
682 * match on the product name.
683 */
684 {
685 .callback = dmi_low_memory_corruption,
686 .ident = "Phoenix BIOS",
687 .matches = {
688 DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
689 },
690 },
691#endif
692 {}
693};
694 622
695static void __init trim_bios_range(void) 623static void __init trim_bios_range(void)
696{ 624{
@@ -698,8 +626,14 @@ static void __init trim_bios_range(void)
698 * A special case is the first 4Kb of memory; 626 * A special case is the first 4Kb of memory;
699 * This is a BIOS owned area, not kernel ram, but generally 627 * This is a BIOS owned area, not kernel ram, but generally
700 * not listed as such in the E820 table. 628 * not listed as such in the E820 table.
629 *
630 * This typically reserves additional memory (64KiB by default)
631 * since some BIOSes are known to corrupt low memory. See the
632 * Kconfig help text for X86_RESERVE_LOW.
701 */ 633 */
702 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); 634 e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
635 E820_RAM, E820_RESERVED);
636
703 /* 637 /*
704 * special case: Some BIOSen report the PC BIOS 638 * special case: Some BIOSen report the PC BIOS
705 * area (640->1Mb) as ram even though it is not. 639 * area (640->1Mb) as ram even though it is not.
@@ -709,6 +643,28 @@ static void __init trim_bios_range(void)
709 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 643 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
710} 644}
711 645
646static int __init parse_reservelow(char *p)
647{
648 unsigned long long size;
649
650 if (!p)
651 return -EINVAL;
652
653 size = memparse(p, &p);
654
655 if (size < 4096)
656 size = 4096;
657
658 if (size > 640*1024)
659 size = 640*1024;
660
661 reserve_low = size;
662
663 return 0;
664}
665
666early_param("reservelow", parse_reservelow);
667
712/* 668/*
713 * Determine if we were loaded by an EFI loader. If so, then we have also been 669 * Determine if we were loaded by an EFI loader. If so, then we have also been
714 * passed the efi memmap, systab, etc., so we should use these data structures 670 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -726,6 +682,7 @@ void __init setup_arch(char **cmdline_p)
726{ 682{
727 int acpi = 0; 683 int acpi = 0;
728 int k8 = 0; 684 int k8 = 0;
685 unsigned long flags;
729 686
730#ifdef CONFIG_X86_32 687#ifdef CONFIG_X86_32
731 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 688 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
@@ -863,8 +820,6 @@ void __init setup_arch(char **cmdline_p)
863 820
864 dmi_scan_machine(); 821 dmi_scan_machine();
865 822
866 dmi_check_system(bad_bios_dmi_table);
867
868 /* 823 /*
869 * VMware detection requires dmi to be available, so this 824 * VMware detection requires dmi to be available, so this
870 * needs to be done after dmi_scan_machine, for the BP. 825 * needs to be done after dmi_scan_machine, for the BP.
@@ -1071,6 +1026,10 @@ void __init setup_arch(char **cmdline_p)
1071 x86_init.oem.banner(); 1026 x86_init.oem.banner();
1072 1027
1073 mcheck_init(); 1028 mcheck_init();
1029
1030 local_irq_save(flags);
1031 arch_init_ideal_nop5();
1032 local_irq_restore(flags);
1074} 1033}
1075 1034
1076#ifdef CONFIG_X86_32 1035#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a60df9ae6454..2335c15c93a4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -253,7 +253,7 @@ void __init setup_per_cpu_areas(void)
253 * Up to this point, the boot CPU has been using .init.data 253 * Up to this point, the boot CPU has been using .init.data
254 * area. Reload any changed state for the boot CPU. 254 * area. Reload any changed state for the boot CPU.
255 */ 255 */
256 if (cpu == boot_cpu_id) 256 if (!cpu)
257 switch_to_new_gdt(cpu); 257 switch_to_new_gdt(cpu);
258 } 258 }
259 259
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8b3bfc4dd708..0116552c950d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/pgtable.h> 62#include <asm/pgtable.h>
63#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
64#include <asm/mtrr.h> 64#include <asm/mtrr.h>
65#include <asm/mwait.h>
65#include <asm/vmi.h> 66#include <asm/vmi.h>
66#include <asm/apic.h> 67#include <asm/apic.h>
67#include <asm/setup.h> 68#include <asm/setup.h>
@@ -397,6 +398,19 @@ void __cpuinit smp_store_cpu_info(int id)
397 identify_secondary_cpu(c); 398 identify_secondary_cpu(c);
398} 399}
399 400
401static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
402{
403 struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
404 struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
405
406 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
407 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
408 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
409 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
410 cpumask_set_cpu(cpu1, c2->llc_shared_map);
411 cpumask_set_cpu(cpu2, c1->llc_shared_map);
412}
413
400 414
401void __cpuinit set_cpu_sibling_map(int cpu) 415void __cpuinit set_cpu_sibling_map(int cpu)
402{ 416{
@@ -409,14 +423,13 @@ void __cpuinit set_cpu_sibling_map(int cpu)
409 for_each_cpu(i, cpu_sibling_setup_mask) { 423 for_each_cpu(i, cpu_sibling_setup_mask) {
410 struct cpuinfo_x86 *o = &cpu_data(i); 424 struct cpuinfo_x86 *o = &cpu_data(i);
411 425
412 if (c->phys_proc_id == o->phys_proc_id && 426 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
413 c->cpu_core_id == o->cpu_core_id) { 427 if (c->phys_proc_id == o->phys_proc_id &&
414 cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 428 c->compute_unit_id == o->compute_unit_id)
415 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 429 link_thread_siblings(cpu, i);
416 cpumask_set_cpu(i, cpu_core_mask(cpu)); 430 } else if (c->phys_proc_id == o->phys_proc_id &&
417 cpumask_set_cpu(cpu, cpu_core_mask(i)); 431 c->cpu_core_id == o->cpu_core_id) {
418 cpumask_set_cpu(i, c->llc_shared_map); 432 link_thread_siblings(cpu, i);
419 cpumask_set_cpu(cpu, o->llc_shared_map);
420 } 433 }
421 } 434 }
422 } else { 435 } else {
@@ -1383,11 +1396,88 @@ void play_dead_common(void)
1383 local_irq_disable(); 1396 local_irq_disable();
1384} 1397}
1385 1398
1399/*
1400 * We need to flush the caches before going to sleep, lest we have
1401 * dirty data in our caches when we come back up.
1402 */
1403static inline void mwait_play_dead(void)
1404{
1405 unsigned int eax, ebx, ecx, edx;
1406 unsigned int highest_cstate = 0;
1407 unsigned int highest_subcstate = 0;
1408 int i;
1409 void *mwait_ptr;
1410
1411 if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
1412 return;
1413 if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
1414 return;
1415 if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
1416 return;
1417
1418 eax = CPUID_MWAIT_LEAF;
1419 ecx = 0;
1420 native_cpuid(&eax, &ebx, &ecx, &edx);
1421
1422 /*
1423 * eax will be 0 if EDX enumeration is not valid.
1424 * Initialized below to cstate, sub_cstate value when EDX is valid.
1425 */
1426 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1427 eax = 0;
1428 } else {
1429 edx >>= MWAIT_SUBSTATE_SIZE;
1430 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1431 if (edx & MWAIT_SUBSTATE_MASK) {
1432 highest_cstate = i;
1433 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1434 }
1435 }
1436 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1437 (highest_subcstate - 1);
1438 }
1439
1440 /*
1441 * This should be a memory location in a cache line which is
1442 * unlikely to be touched by other processors. The actual
1443 * content is immaterial as it is not actually modified in any way.
1444 */
1445 mwait_ptr = &current_thread_info()->flags;
1446
1447 wbinvd();
1448
1449 while (1) {
1450 /*
1451 * The CLFLUSH is a workaround for erratum AAI65 for
1452 * the Xeon 7400 series. It's not clear it is actually
1453 * needed, but it should be harmless in either case.
1454 * The WBINVD is insufficient due to the spurious-wakeup
1455 * case where we return around the loop.
1456 */
1457 clflush(mwait_ptr);
1458 __monitor(mwait_ptr, 0, 0);
1459 mb();
1460 __mwait(eax, 0);
1461 }
1462}
1463
1464static inline void hlt_play_dead(void)
1465{
1466 if (current_cpu_data.x86 >= 4)
1467 wbinvd();
1468
1469 while (1) {
1470 native_halt();
1471 }
1472}
1473
1386void native_play_dead(void) 1474void native_play_dead(void)
1387{ 1475{
1388 play_dead_common(); 1476 play_dead_common();
1389 tboot_shutdown(TB_SHUTDOWN_WFS); 1477 tboot_shutdown(TB_SHUTDOWN_WFS);
1390 wbinvd_halt(); 1478
1479 mwait_play_dead(); /* Only returns on failure */
1480 hlt_play_dead();
1391} 1481}
1392 1482
1393#else /* ... !CONFIG_HOTPLUG_CPU */ 1483#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index d5e06624e34a..0b0cb5fede19 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -33,8 +33,8 @@ int kernel_execve(const char *filename,
33 const char *const envp[]) 33 const char *const envp[])
34{ 34{
35 long __res; 35 long __res;
36 asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" 36 asm volatile ("int $0x80"
37 : "=a" (__res) 37 : "=a" (__res)
38 : "0" (__NR_execve), "ri" (filename), "c" (argv), "d" (envp) : "memory"); 38 : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
39 return __res; 39 return __res;
40} 40}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 60788dee0f8a..d43968503dd2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void)
776} 776}
777EXPORT_SYMBOL_GPL(math_state_restore); 777EXPORT_SYMBOL_GPL(math_state_restore);
778 778
779#ifndef CONFIG_MATH_EMULATION
780void math_emulate(struct math_emu_info *info)
781{
782 printk(KERN_EMERG
783 "math-emulation not enabled and no coprocessor found.\n");
784 printk(KERN_EMERG "killing %s.\n", current->comm);
785 force_sig(SIGFPE, current);
786 schedule();
787}
788#endif /* CONFIG_MATH_EMULATION */
789
790dotraplinkage void __kprobes 779dotraplinkage void __kprobes
791do_device_not_available(struct pt_regs *regs, long error_code) 780do_device_not_available(struct pt_regs *regs, long error_code)
792{ 781{
793#ifdef CONFIG_X86_32 782#ifdef CONFIG_MATH_EMULATION
794 if (read_cr0() & X86_CR0_EM) { 783 if (read_cr0() & X86_CR0_EM) {
795 struct math_emu_info info = { }; 784 struct math_emu_info info = { };
796 785
@@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code)
798 787
799 info.regs = regs; 788 info.regs = regs;
800 math_emulate(&info); 789 math_emulate(&info);
801 } else { 790 return;
802 math_state_restore(); /* interrupts still off */
803 conditional_sti(regs);
804 } 791 }
805#else 792#endif
806 math_state_restore(); 793 math_state_restore(); /* interrupts still off */
794#ifdef CONFIG_X86_32
795 conditional_sti(regs);
807#endif 796#endif
808} 797}
809 798
@@ -881,18 +870,6 @@ void __init trap_init(void)
881#endif 870#endif
882 871
883#ifdef CONFIG_X86_32 872#ifdef CONFIG_X86_32
884 if (cpu_has_fxsr) {
885 printk(KERN_INFO "Enabling fast FPU save and restore... ");
886 set_in_cr4(X86_CR4_OSFXSR);
887 printk("done.\n");
888 }
889 if (cpu_has_xmm) {
890 printk(KERN_INFO
891 "Enabling unmasked SIMD FPU exception support... ");
892 set_in_cr4(X86_CR4_OSXMMEXCPT);
893 printk("done.\n");
894 }
895
896 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 873 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
897 set_bit(SYSCALL_VECTOR, used_vectors); 874 set_bit(SYSCALL_VECTOR, used_vectors);
898#endif 875#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 26a863a9c2a8..0c40d8b72416 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -104,10 +104,14 @@ int __init notsc_setup(char *str)
104 104
105__setup("notsc", notsc_setup); 105__setup("notsc", notsc_setup);
106 106
107static int no_sched_irq_time;
108
107static int __init tsc_setup(char *str) 109static int __init tsc_setup(char *str)
108{ 110{
109 if (!strcmp(str, "reliable")) 111 if (!strcmp(str, "reliable"))
110 tsc_clocksource_reliable = 1; 112 tsc_clocksource_reliable = 1;
113 if (!strncmp(str, "noirqtime", 9))
114 no_sched_irq_time = 1;
111 return 1; 115 return 1;
112} 116}
113 117
@@ -801,6 +805,7 @@ void mark_tsc_unstable(char *reason)
801 if (!tsc_unstable) { 805 if (!tsc_unstable) {
802 tsc_unstable = 1; 806 tsc_unstable = 1;
803 sched_clock_stable = 0; 807 sched_clock_stable = 0;
808 disable_sched_clock_irqtime();
804 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 809 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
805 /* Change only the rating, when not registered */ 810 /* Change only the rating, when not registered */
806 if (clocksource_tsc.mult) 811 if (clocksource_tsc.mult)
@@ -892,60 +897,6 @@ static void __init init_tsc_clocksource(void)
892 clocksource_register_khz(&clocksource_tsc, tsc_khz); 897 clocksource_register_khz(&clocksource_tsc, tsc_khz);
893} 898}
894 899
895#ifdef CONFIG_X86_64
896/*
897 * calibrate_cpu is used on systems with fixed rate TSCs to determine
898 * processor frequency
899 */
900#define TICK_COUNT 100000000
901static unsigned long __init calibrate_cpu(void)
902{
903 int tsc_start, tsc_now;
904 int i, no_ctr_free;
905 unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
906 unsigned long flags;
907
908 for (i = 0; i < 4; i++)
909 if (avail_to_resrv_perfctr_nmi_bit(i))
910 break;
911 no_ctr_free = (i == 4);
912 if (no_ctr_free) {
913 WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
914 "cpu_khz value may be incorrect.\n");
915 i = 3;
916 rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
917 wrmsrl(MSR_K7_EVNTSEL3, 0);
918 rdmsrl(MSR_K7_PERFCTR3, pmc3);
919 } else {
920 reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
921 reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
922 }
923 local_irq_save(flags);
924 /* start measuring cycles, incrementing from 0 */
925 wrmsrl(MSR_K7_PERFCTR0 + i, 0);
926 wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
927 rdtscl(tsc_start);
928 do {
929 rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
930 tsc_now = get_cycles();
931 } while ((tsc_now - tsc_start) < TICK_COUNT);
932
933 local_irq_restore(flags);
934 if (no_ctr_free) {
935 wrmsrl(MSR_K7_EVNTSEL3, 0);
936 wrmsrl(MSR_K7_PERFCTR3, pmc3);
937 wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
938 } else {
939 release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
940 release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
941 }
942
943 return pmc_now * tsc_khz / (tsc_now - tsc_start);
944}
945#else
946static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
947#endif
948
949void __init tsc_init(void) 900void __init tsc_init(void)
950{ 901{
951 u64 lpj; 902 u64 lpj;
@@ -964,10 +915,6 @@ void __init tsc_init(void)
964 return; 915 return;
965 } 916 }
966 917
967 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
968 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
969 cpu_khz = calibrate_cpu();
970
971 printk("Detected %lu.%03lu MHz processor.\n", 918 printk("Detected %lu.%03lu MHz processor.\n",
972 (unsigned long)cpu_khz / 1000, 919 (unsigned long)cpu_khz / 1000,
973 (unsigned long)cpu_khz % 1000); 920 (unsigned long)cpu_khz % 1000);
@@ -987,6 +934,9 @@ void __init tsc_init(void)
987 /* now allow native_sched_clock() to use rdtsc */ 934 /* now allow native_sched_clock() to use rdtsc */
988 tsc_disabled = 0; 935 tsc_disabled = 0;
989 936
937 if (!no_sched_irq_time)
938 enable_sched_clock_irqtime();
939
990 lpj = ((u64)tsc_khz * 1000); 940 lpj = ((u64)tsc_khz * 1000);
991 do_div(lpj, HZ); 941 do_div(lpj, HZ);
992 lpj_fine = lpj; 942 lpj_fine = lpj;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 77d8c0f4817d..22b06f7660f4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1056,14 +1056,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
1056 1056
1057 vcpu->arch.apic = apic; 1057 vcpu->arch.apic = apic;
1058 1058
1059 apic->regs_page = alloc_page(GFP_KERNEL); 1059 apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
1060 if (apic->regs_page == NULL) { 1060 if (apic->regs_page == NULL) {
1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n", 1061 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1062 vcpu->vcpu_id); 1062 vcpu->vcpu_id);
1063 goto nomem_free_apic; 1063 goto nomem_free_apic;
1064 } 1064 }
1065 apic->regs = page_address(apic->regs_page); 1065 apic->regs = page_address(apic->regs_page);
1066 memset(apic->regs, 0, PAGE_SIZE);
1067 apic->vcpu = vcpu; 1066 apic->vcpu = vcpu;
1068 1067
1069 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, 1068 hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index bc5b9b8d4a33..8a3f9f64f86f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -766,7 +766,6 @@ static void init_vmcb(struct vcpu_svm *svm)
766 766
767 control->iopm_base_pa = iopm_base; 767 control->iopm_base_pa = iopm_base;
768 control->msrpm_base_pa = __pa(svm->msrpm); 768 control->msrpm_base_pa = __pa(svm->msrpm);
769 control->tsc_offset = 0;
770 control->int_ctl = V_INTR_MASKING_MASK; 769 control->int_ctl = V_INTR_MASKING_MASK;
771 770
772 init_seg(&save->es); 771 init_seg(&save->es);
@@ -902,6 +901,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
902 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 901 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
903 svm->asid_generation = 0; 902 svm->asid_generation = 0;
904 init_vmcb(svm); 903 init_vmcb(svm);
904 svm->vmcb->control.tsc_offset = 0-native_read_tsc();
905 905
906 err = fx_init(&svm->vcpu); 906 err = fx_init(&svm->vcpu);
907 if (err) 907 if (err)
@@ -3163,8 +3163,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3163 sync_lapic_to_cr8(vcpu); 3163 sync_lapic_to_cr8(vcpu);
3164 3164
3165 save_host_msrs(vcpu); 3165 save_host_msrs(vcpu);
3166 fs_selector = kvm_read_fs(); 3166 savesegment(fs, fs_selector);
3167 gs_selector = kvm_read_gs(); 3167 savesegment(gs, gs_selector);
3168 ldt_selector = kvm_read_ldt(); 3168 ldt_selector = kvm_read_ldt();
3169 svm->vmcb->save.cr2 = vcpu->arch.cr2; 3169 svm->vmcb->save.cr2 = vcpu->arch.cr2;
3170 /* required for live migration with NPT */ 3170 /* required for live migration with NPT */
@@ -3251,10 +3251,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3251 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 3251 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3252 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3252 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3253 3253
3254 kvm_load_fs(fs_selector);
3255 kvm_load_gs(gs_selector);
3256 kvm_load_ldt(ldt_selector);
3257 load_host_msrs(vcpu); 3254 load_host_msrs(vcpu);
3255 loadsegment(fs, fs_selector);
3256#ifdef CONFIG_X86_64
3257 load_gs_index(gs_selector);
3258 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
3259#else
3260 loadsegment(gs, gs_selector);
3261#endif
3262 kvm_load_ldt(ldt_selector);
3258 3263
3259 reload_tss(vcpu); 3264 reload_tss(vcpu);
3260 3265
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 49b25eee25ac..7bddfab12013 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -803,7 +803,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
803 */ 803 */
804 vmx->host_state.ldt_sel = kvm_read_ldt(); 804 vmx->host_state.ldt_sel = kvm_read_ldt();
805 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; 805 vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
806 vmx->host_state.fs_sel = kvm_read_fs(); 806 savesegment(fs, vmx->host_state.fs_sel);
807 if (!(vmx->host_state.fs_sel & 7)) { 807 if (!(vmx->host_state.fs_sel & 7)) {
808 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); 808 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
809 vmx->host_state.fs_reload_needed = 0; 809 vmx->host_state.fs_reload_needed = 0;
@@ -811,7 +811,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
811 vmcs_write16(HOST_FS_SELECTOR, 0); 811 vmcs_write16(HOST_FS_SELECTOR, 0);
812 vmx->host_state.fs_reload_needed = 1; 812 vmx->host_state.fs_reload_needed = 1;
813 } 813 }
814 vmx->host_state.gs_sel = kvm_read_gs(); 814 savesegment(gs, vmx->host_state.gs_sel);
815 if (!(vmx->host_state.gs_sel & 7)) 815 if (!(vmx->host_state.gs_sel & 7))
816 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); 816 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
817 else { 817 else {
@@ -841,27 +841,21 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
841 841
842static void __vmx_load_host_state(struct vcpu_vmx *vmx) 842static void __vmx_load_host_state(struct vcpu_vmx *vmx)
843{ 843{
844 unsigned long flags;
845
846 if (!vmx->host_state.loaded) 844 if (!vmx->host_state.loaded)
847 return; 845 return;
848 846
849 ++vmx->vcpu.stat.host_state_reload; 847 ++vmx->vcpu.stat.host_state_reload;
850 vmx->host_state.loaded = 0; 848 vmx->host_state.loaded = 0;
851 if (vmx->host_state.fs_reload_needed) 849 if (vmx->host_state.fs_reload_needed)
852 kvm_load_fs(vmx->host_state.fs_sel); 850 loadsegment(fs, vmx->host_state.fs_sel);
853 if (vmx->host_state.gs_ldt_reload_needed) { 851 if (vmx->host_state.gs_ldt_reload_needed) {
854 kvm_load_ldt(vmx->host_state.ldt_sel); 852 kvm_load_ldt(vmx->host_state.ldt_sel);
855 /*
856 * If we have to reload gs, we must take care to
857 * preserve our gs base.
858 */
859 local_irq_save(flags);
860 kvm_load_gs(vmx->host_state.gs_sel);
861#ifdef CONFIG_X86_64 853#ifdef CONFIG_X86_64
862 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); 854 load_gs_index(vmx->host_state.gs_sel);
855 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
856#else
857 loadsegment(gs, vmx->host_state.gs_sel);
863#endif 858#endif
864 local_irq_restore(flags);
865 } 859 }
866 reload_tss(); 860 reload_tss();
867#ifdef CONFIG_X86_64 861#ifdef CONFIG_X86_64
@@ -2589,8 +2583,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2589 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 2583 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
2590 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2584 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2591 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2585 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2592 vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ 2586 vmcs_write16(HOST_FS_SELECTOR, 0); /* 22.2.4 */
2593 vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ 2587 vmcs_write16(HOST_GS_SELECTOR, 0); /* 22.2.4 */
2594 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ 2588 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
2595#ifdef CONFIG_X86_64 2589#ifdef CONFIG_X86_64
2596 rdmsrl(MSR_FS_BASE, a); 2590 rdmsrl(MSR_FS_BASE, a);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a09c625d526..6c2ecf0a806d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1991,13 +1991,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 1991 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ |
1992 0 /* Reserved, DCA */ | F(XMM4_1) | 1992 0 /* Reserved, DCA */ | F(XMM4_1) |
1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 1993 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
1994 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); 1994 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
1995 F(F16C);
1995 /* cpuid 0x80000001.ecx */ 1996 /* cpuid 0x80000001.ecx */
1996 const u32 kvm_supported_word6_x86_features = 1997 const u32 kvm_supported_word6_x86_features =
1997 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | 1998 F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ |
1998 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | 1999 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
1999 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 2000 F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
2000 0 /* SKINIT */ | 0 /* WDT */; 2001 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
2001 2002
2002 /* all calls to cpuid_count() should be made on the same cpu */ 2003 /* all calls to cpuid_count() should be made on the same cpu */
2003 get_cpu(); 2004 get_cpu();
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 5415a9d06f53..b908a59eccf5 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -22,22 +22,187 @@ EXPORT_SYMBOL(memset);
22 22
23void *memmove(void *dest, const void *src, size_t n) 23void *memmove(void *dest, const void *src, size_t n)
24{ 24{
25 int d0, d1, d2; 25 int d0,d1,d2,d3,d4,d5;
26 26 char *ret = dest;
27 if (dest < src) { 27
28 memcpy(dest, src, n); 28 __asm__ __volatile__(
29 } else { 29 /* Handle more 16bytes in loop */
30 __asm__ __volatile__( 30 "cmp $0x10, %0\n\t"
31 "std\n\t" 31 "jb 1f\n\t"
32 "rep\n\t" 32
33 "movsb\n\t" 33 /* Decide forward/backward copy mode */
34 "cld" 34 "cmp %2, %1\n\t"
35 : "=&c" (d0), "=&S" (d1), "=&D" (d2) 35 "jb 2f\n\t"
36 :"0" (n), 36
37 "1" (n-1+src), 37 /*
38 "2" (n-1+dest) 38 * movs instruction have many startup latency
39 :"memory"); 39 * so we handle small size by general register.
40 } 40 */
41 return dest; 41 "cmp $680, %0\n\t"
42 "jb 3f\n\t"
43 /*
44 * movs instruction is only good for aligned case.
45 */
46 "mov %1, %3\n\t"
47 "xor %2, %3\n\t"
48 "and $0xff, %3\n\t"
49 "jz 4f\n\t"
50 "3:\n\t"
51 "sub $0x10, %0\n\t"
52
53 /*
54 * We gobble 16byts forward in each loop.
55 */
56 "3:\n\t"
57 "sub $0x10, %0\n\t"
58 "mov 0*4(%1), %3\n\t"
59 "mov 1*4(%1), %4\n\t"
60 "mov %3, 0*4(%2)\n\t"
61 "mov %4, 1*4(%2)\n\t"
62 "mov 2*4(%1), %3\n\t"
63 "mov 3*4(%1), %4\n\t"
64 "mov %3, 2*4(%2)\n\t"
65 "mov %4, 3*4(%2)\n\t"
66 "lea 0x10(%1), %1\n\t"
67 "lea 0x10(%2), %2\n\t"
68 "jae 3b\n\t"
69 "add $0x10, %0\n\t"
70 "jmp 1f\n\t"
71
72 /*
73 * Handle data forward by movs.
74 */
75 ".p2align 4\n\t"
76 "4:\n\t"
77 "mov -4(%1, %0), %3\n\t"
78 "lea -4(%2, %0), %4\n\t"
79 "shr $2, %0\n\t"
80 "rep movsl\n\t"
81 "mov %3, (%4)\n\t"
82 "jmp 11f\n\t"
83 /*
84 * Handle data backward by movs.
85 */
86 ".p2align 4\n\t"
87 "6:\n\t"
88 "mov (%1), %3\n\t"
89 "mov %2, %4\n\t"
90 "lea -4(%1, %0), %1\n\t"
91 "lea -4(%2, %0), %2\n\t"
92 "shr $2, %0\n\t"
93 "std\n\t"
94 "rep movsl\n\t"
95 "mov %3,(%4)\n\t"
96 "cld\n\t"
97 "jmp 11f\n\t"
98
99 /*
100 * Start to prepare for backward copy.
101 */
102 ".p2align 4\n\t"
103 "2:\n\t"
104 "cmp $680, %0\n\t"
105 "jb 5f\n\t"
106 "mov %1, %3\n\t"
107 "xor %2, %3\n\t"
108 "and $0xff, %3\n\t"
109 "jz 6b\n\t"
110
111 /*
112 * Calculate copy position to tail.
113 */
114 "5:\n\t"
115 "add %0, %1\n\t"
116 "add %0, %2\n\t"
117 "sub $0x10, %0\n\t"
118
119 /*
120 * We gobble 16byts backward in each loop.
121 */
122 "7:\n\t"
123 "sub $0x10, %0\n\t"
124
125 "mov -1*4(%1), %3\n\t"
126 "mov -2*4(%1), %4\n\t"
127 "mov %3, -1*4(%2)\n\t"
128 "mov %4, -2*4(%2)\n\t"
129 "mov -3*4(%1), %3\n\t"
130 "mov -4*4(%1), %4\n\t"
131 "mov %3, -3*4(%2)\n\t"
132 "mov %4, -4*4(%2)\n\t"
133 "lea -0x10(%1), %1\n\t"
134 "lea -0x10(%2), %2\n\t"
135 "jae 7b\n\t"
136 /*
137 * Calculate copy position to head.
138 */
139 "add $0x10, %0\n\t"
140 "sub %0, %1\n\t"
141 "sub %0, %2\n\t"
142
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 ".p2align 4\n\t"
147 "1:\n\t"
148 "cmp $8, %0\n\t"
149 "jb 8f\n\t"
150 "mov 0*4(%1), %3\n\t"
151 "mov 1*4(%1), %4\n\t"
152 "mov -2*4(%1, %0), %5\n\t"
153 "mov -1*4(%1, %0), %1\n\t"
154
155 "mov %3, 0*4(%2)\n\t"
156 "mov %4, 1*4(%2)\n\t"
157 "mov %5, -2*4(%2, %0)\n\t"
158 "mov %1, -1*4(%2, %0)\n\t"
159 "jmp 11f\n\t"
160
161 /*
162 * Move data from 4 bytes to 7 bytes.
163 */
164 ".p2align 4\n\t"
165 "8:\n\t"
166 "cmp $4, %0\n\t"
167 "jb 9f\n\t"
168 "mov 0*4(%1), %3\n\t"
169 "mov -1*4(%1, %0), %4\n\t"
170 "mov %3, 0*4(%2)\n\t"
171 "mov %4, -1*4(%2, %0)\n\t"
172 "jmp 11f\n\t"
173
174 /*
175 * Move data from 2 bytes to 3 bytes.
176 */
177 ".p2align 4\n\t"
178 "9:\n\t"
179 "cmp $2, %0\n\t"
180 "jb 10f\n\t"
181 "movw 0*2(%1), %%dx\n\t"
182 "movw -1*2(%1, %0), %%bx\n\t"
183 "movw %%dx, 0*2(%2)\n\t"
184 "movw %%bx, -1*2(%2, %0)\n\t"
185 "jmp 11f\n\t"
186
187 /*
188 * Move data for 1 byte.
189 */
190 ".p2align 4\n\t"
191 "10:\n\t"
192 "cmp $1, %0\n\t"
193 "jb 11f\n\t"
194 "movb (%1), %%cl\n\t"
195 "movb %%cl, (%2)\n\t"
196 ".p2align 4\n\t"
197 "11:"
198 : "=&c" (d0), "=&S" (d1), "=&D" (d2),
199 "=r" (d3),"=r" (d4), "=r"(d5)
200 :"0" (n),
201 "1" (src),
202 "2" (dest)
203 :"memory");
204
205 return ret;
206
42} 207}
43EXPORT_SYMBOL(memmove); 208EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bcbcd1e0f7d5..75ef61e35e38 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -40,84 +40,132 @@
40ENTRY(__memcpy) 40ENTRY(__memcpy)
41ENTRY(memcpy) 41ENTRY(memcpy)
42 CFI_STARTPROC 42 CFI_STARTPROC
43 movq %rdi, %rax
43 44
44 /* 45 /*
45 * Put the number of full 64-byte blocks into %ecx. 46 * Use 32bit CMP here to avoid long NOP padding.
46 * Tail portion is handled at the end:
47 */ 47 */
48 movq %rdi, %rax 48 cmp $0x20, %edx
49 movl %edx, %ecx 49 jb .Lhandle_tail
50 shrl $6, %ecx
51 jz .Lhandle_tail
52 50
53 .p2align 4
54.Lloop_64:
55 /* 51 /*
56 * We decrement the loop index here - and the zero-flag is 52 * We check whether memory false dependece could occur,
57 * checked at the end of the loop (instructions inbetween do 53 * then jump to corresponding copy mode.
58 * not change the zero flag):
59 */ 54 */
60 decl %ecx 55 cmp %dil, %sil
56 jl .Lcopy_backward
57 subl $0x20, %edx
58.Lcopy_forward_loop:
59 subq $0x20, %rdx
61 60
62 /* 61 /*
63 * Move in blocks of 4x16 bytes: 62 * Move in blocks of 4x8 bytes:
64 */ 63 */
65 movq 0*8(%rsi), %r11 64 movq 0*8(%rsi), %r8
66 movq 1*8(%rsi), %r8 65 movq 1*8(%rsi), %r9
67 movq %r11, 0*8(%rdi) 66 movq 2*8(%rsi), %r10
68 movq %r8, 1*8(%rdi) 67 movq 3*8(%rsi), %r11
69 68 leaq 4*8(%rsi), %rsi
70 movq 2*8(%rsi), %r9 69
71 movq 3*8(%rsi), %r10 70 movq %r8, 0*8(%rdi)
72 movq %r9, 2*8(%rdi) 71 movq %r9, 1*8(%rdi)
73 movq %r10, 3*8(%rdi) 72 movq %r10, 2*8(%rdi)
74 73 movq %r11, 3*8(%rdi)
75 movq 4*8(%rsi), %r11 74 leaq 4*8(%rdi), %rdi
76 movq 5*8(%rsi), %r8 75 jae .Lcopy_forward_loop
77 movq %r11, 4*8(%rdi) 76 addq $0x20, %rdx
78 movq %r8, 5*8(%rdi) 77 jmp .Lhandle_tail
79 78
80 movq 6*8(%rsi), %r9 79.Lcopy_backward:
81 movq 7*8(%rsi), %r10 80 /*
82 movq %r9, 6*8(%rdi) 81 * Calculate copy position to tail.
83 movq %r10, 7*8(%rdi) 82 */
84 83 addq %rdx, %rsi
85 leaq 64(%rsi), %rsi 84 addq %rdx, %rdi
86 leaq 64(%rdi), %rdi 85 subq $0x20, %rdx
87 86 /*
88 jnz .Lloop_64 87 * At most 3 ALU operations in one cycle,
88 * so append NOPS in the same 16bytes trunk.
89 */
90 .p2align 4
91.Lcopy_backward_loop:
92 subq $0x20, %rdx
93 movq -1*8(%rsi), %r8
94 movq -2*8(%rsi), %r9
95 movq -3*8(%rsi), %r10
96 movq -4*8(%rsi), %r11
97 leaq -4*8(%rsi), %rsi
98 movq %r8, -1*8(%rdi)
99 movq %r9, -2*8(%rdi)
100 movq %r10, -3*8(%rdi)
101 movq %r11, -4*8(%rdi)
102 leaq -4*8(%rdi), %rdi
103 jae .Lcopy_backward_loop
89 104
105 /*
106 * Calculate copy position to head.
107 */
108 addq $0x20, %rdx
109 subq %rdx, %rsi
110 subq %rdx, %rdi
90.Lhandle_tail: 111.Lhandle_tail:
91 movl %edx, %ecx 112 cmpq $16, %rdx
92 andl $63, %ecx 113 jb .Lless_16bytes
93 shrl $3, %ecx
94 jz .Lhandle_7
95 114
115 /*
116 * Move data from 16 bytes to 31 bytes.
117 */
118 movq 0*8(%rsi), %r8
119 movq 1*8(%rsi), %r9
120 movq -2*8(%rsi, %rdx), %r10
121 movq -1*8(%rsi, %rdx), %r11
122 movq %r8, 0*8(%rdi)
123 movq %r9, 1*8(%rdi)
124 movq %r10, -2*8(%rdi, %rdx)
125 movq %r11, -1*8(%rdi, %rdx)
126 retq
96 .p2align 4 127 .p2align 4
97.Lloop_8: 128.Lless_16bytes:
98 decl %ecx 129 cmpq $8, %rdx
99 movq (%rsi), %r8 130 jb .Lless_8bytes
100 movq %r8, (%rdi) 131 /*
101 leaq 8(%rdi), %rdi 132 * Move data from 8 bytes to 15 bytes.
102 leaq 8(%rsi), %rsi 133 */
103 jnz .Lloop_8 134 movq 0*8(%rsi), %r8
104 135 movq -1*8(%rsi, %rdx), %r9
105.Lhandle_7: 136 movq %r8, 0*8(%rdi)
106 movl %edx, %ecx 137 movq %r9, -1*8(%rdi, %rdx)
107 andl $7, %ecx 138 retq
108 jz .Lend 139 .p2align 4
140.Lless_8bytes:
141 cmpq $4, %rdx
142 jb .Lless_3bytes
109 143
144 /*
145 * Move data from 4 bytes to 7 bytes.
146 */
147 movl (%rsi), %ecx
148 movl -4(%rsi, %rdx), %r8d
149 movl %ecx, (%rdi)
150 movl %r8d, -4(%rdi, %rdx)
151 retq
110 .p2align 4 152 .p2align 4
153.Lless_3bytes:
154 cmpl $0, %edx
155 je .Lend
156 /*
157 * Move data from 1 bytes to 3 bytes.
158 */
111.Lloop_1: 159.Lloop_1:
112 movb (%rsi), %r8b 160 movb (%rsi), %r8b
113 movb %r8b, (%rdi) 161 movb %r8b, (%rdi)
114 incq %rdi 162 incq %rdi
115 incq %rsi 163 incq %rsi
116 decl %ecx 164 decl %edx
117 jnz .Lloop_1 165 jnz .Lloop_1
118 166
119.Lend: 167.Lend:
120 ret 168 retq
121 CFI_ENDPROC 169 CFI_ENDPROC
122ENDPROC(memcpy) 170ENDPROC(memcpy)
123ENDPROC(__memcpy) 171ENDPROC(__memcpy)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 0a33909bf122..6d0f0ec41b34 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -8,14 +8,185 @@
8#undef memmove 8#undef memmove
9void *memmove(void *dest, const void *src, size_t count) 9void *memmove(void *dest, const void *src, size_t count)
10{ 10{
11 if (dest < src) { 11 unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
12 return memcpy(dest, src, count); 12 char *ret;
13 } else { 13
14 char *p = dest + count; 14 __asm__ __volatile__(
15 const char *s = src + count; 15 /* Handle more 32bytes in loop */
16 while (count--) 16 "mov %2, %3\n\t"
17 *--p = *--s; 17 "cmp $0x20, %0\n\t"
18 } 18 "jb 1f\n\t"
19 return dest; 19
20 /* Decide forward/backward copy mode */
21 "cmp %2, %1\n\t"
22 "jb 2f\n\t"
23
24 /*
25 * movsq instruction have many startup latency
26 * so we handle small size by general register.
27 */
28 "cmp $680, %0\n\t"
29 "jb 3f\n\t"
30 /*
31 * movsq instruction is only good for aligned case.
32 */
33 "cmpb %%dil, %%sil\n\t"
34 "je 4f\n\t"
35 "3:\n\t"
36 "sub $0x20, %0\n\t"
37 /*
38 * We gobble 32byts forward in each loop.
39 */
40 "5:\n\t"
41 "sub $0x20, %0\n\t"
42 "movq 0*8(%1), %4\n\t"
43 "movq 1*8(%1), %5\n\t"
44 "movq 2*8(%1), %6\n\t"
45 "movq 3*8(%1), %7\n\t"
46 "leaq 4*8(%1), %1\n\t"
47
48 "movq %4, 0*8(%2)\n\t"
49 "movq %5, 1*8(%2)\n\t"
50 "movq %6, 2*8(%2)\n\t"
51 "movq %7, 3*8(%2)\n\t"
52 "leaq 4*8(%2), %2\n\t"
53 "jae 5b\n\t"
54 "addq $0x20, %0\n\t"
55 "jmp 1f\n\t"
56 /*
57 * Handle data forward by movsq.
58 */
59 ".p2align 4\n\t"
60 "4:\n\t"
61 "movq %0, %8\n\t"
62 "movq -8(%1, %0), %4\n\t"
63 "lea -8(%2, %0), %5\n\t"
64 "shrq $3, %8\n\t"
65 "rep movsq\n\t"
66 "movq %4, (%5)\n\t"
67 "jmp 13f\n\t"
68 /*
69 * Handle data backward by movsq.
70 */
71 ".p2align 4\n\t"
72 "7:\n\t"
73 "movq %0, %8\n\t"
74 "movq (%1), %4\n\t"
75 "movq %2, %5\n\t"
76 "leaq -8(%1, %0), %1\n\t"
77 "leaq -8(%2, %0), %2\n\t"
78 "shrq $3, %8\n\t"
79 "std\n\t"
80 "rep movsq\n\t"
81 "cld\n\t"
82 "movq %4, (%5)\n\t"
83 "jmp 13f\n\t"
84
85 /*
86 * Start to prepare for backward copy.
87 */
88 ".p2align 4\n\t"
89 "2:\n\t"
90 "cmp $680, %0\n\t"
91 "jb 6f \n\t"
92 "cmp %%dil, %%sil\n\t"
93 "je 7b \n\t"
94 "6:\n\t"
95 /*
96 * Calculate copy position to tail.
97 */
98 "addq %0, %1\n\t"
99 "addq %0, %2\n\t"
100 "subq $0x20, %0\n\t"
101 /*
102 * We gobble 32byts backward in each loop.
103 */
104 "8:\n\t"
105 "subq $0x20, %0\n\t"
106 "movq -1*8(%1), %4\n\t"
107 "movq -2*8(%1), %5\n\t"
108 "movq -3*8(%1), %6\n\t"
109 "movq -4*8(%1), %7\n\t"
110 "leaq -4*8(%1), %1\n\t"
111
112 "movq %4, -1*8(%2)\n\t"
113 "movq %5, -2*8(%2)\n\t"
114 "movq %6, -3*8(%2)\n\t"
115 "movq %7, -4*8(%2)\n\t"
116 "leaq -4*8(%2), %2\n\t"
117 "jae 8b\n\t"
118 /*
119 * Calculate copy position to head.
120 */
121 "addq $0x20, %0\n\t"
122 "subq %0, %1\n\t"
123 "subq %0, %2\n\t"
124 "1:\n\t"
125 "cmpq $16, %0\n\t"
126 "jb 9f\n\t"
127 /*
128 * Move data from 16 bytes to 31 bytes.
129 */
130 "movq 0*8(%1), %4\n\t"
131 "movq 1*8(%1), %5\n\t"
132 "movq -2*8(%1, %0), %6\n\t"
133 "movq -1*8(%1, %0), %7\n\t"
134 "movq %4, 0*8(%2)\n\t"
135 "movq %5, 1*8(%2)\n\t"
136 "movq %6, -2*8(%2, %0)\n\t"
137 "movq %7, -1*8(%2, %0)\n\t"
138 "jmp 13f\n\t"
139 ".p2align 4\n\t"
140 "9:\n\t"
141 "cmpq $8, %0\n\t"
142 "jb 10f\n\t"
143 /*
144 * Move data from 8 bytes to 15 bytes.
145 */
146 "movq 0*8(%1), %4\n\t"
147 "movq -1*8(%1, %0), %5\n\t"
148 "movq %4, 0*8(%2)\n\t"
149 "movq %5, -1*8(%2, %0)\n\t"
150 "jmp 13f\n\t"
151 "10:\n\t"
152 "cmpq $4, %0\n\t"
153 "jb 11f\n\t"
154 /*
155 * Move data from 4 bytes to 7 bytes.
156 */
157 "movl (%1), %4d\n\t"
158 "movl -4(%1, %0), %5d\n\t"
159 "movl %4d, (%2)\n\t"
160 "movl %5d, -4(%2, %0)\n\t"
161 "jmp 13f\n\t"
162 "11:\n\t"
163 "cmp $2, %0\n\t"
164 "jb 12f\n\t"
165 /*
166 * Move data from 2 bytes to 3 bytes.
167 */
168 "movw (%1), %4w\n\t"
169 "movw -2(%1, %0), %5w\n\t"
170 "movw %4w, (%2)\n\t"
171 "movw %5w, -2(%2, %0)\n\t"
172 "jmp 13f\n\t"
173 "12:\n\t"
174 "cmp $1, %0\n\t"
175 "jb 13f\n\t"
176 /*
177 * Move data for 1 byte.
178 */
179 "movb (%1), %4b\n\t"
180 "movb %4b, (%2)\n\t"
181 "13:\n\t"
182 : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
183 "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
184 :"0" (count),
185 "1" (src),
186 "2" (dest)
187 :"memory");
188
189 return ret;
190
20} 191}
21EXPORT_SYMBOL(memmove); 192EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4c4508e8a204..79b0b372d2d0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void)
229 229
230 spin_lock_irqsave(&pgd_lock, flags); 230 spin_lock_irqsave(&pgd_lock, flags);
231 list_for_each_entry(page, &pgd_list, lru) { 231 list_for_each_entry(page, &pgd_list, lru) {
232 if (!vmalloc_sync_one(page_address(page), address)) 232 spinlock_t *pgt_lock;
233 pmd_t *ret;
234
235 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
236
237 spin_lock(pgt_lock);
238 ret = vmalloc_sync_one(page_address(page), address);
239 spin_unlock(pgt_lock);
240
241 if (!ret)
233 break; 242 break;
234 } 243 }
235 spin_unlock_irqrestore(&pgd_lock, flags); 244 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -251,6 +260,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
251 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 260 if (!(address >= VMALLOC_START && address < VMALLOC_END))
252 return -1; 261 return -1;
253 262
263 WARN_ON_ONCE(in_nmi());
264
254 /* 265 /*
255 * Synchronize this task's top level page-table 266 * Synchronize this task's top level page-table
256 * with the 'reference' page table. 267 * with the 'reference' page table.
@@ -326,29 +337,7 @@ out:
326 337
327void vmalloc_sync_all(void) 338void vmalloc_sync_all(void)
328{ 339{
329 unsigned long address; 340 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
330
331 for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
332 address += PGDIR_SIZE) {
333
334 const pgd_t *pgd_ref = pgd_offset_k(address);
335 unsigned long flags;
336 struct page *page;
337
338 if (pgd_none(*pgd_ref))
339 continue;
340
341 spin_lock_irqsave(&pgd_lock, flags);
342 list_for_each_entry(page, &pgd_list, lru) {
343 pgd_t *pgd;
344 pgd = (pgd_t *)page_address(page) + pgd_index(address);
345 if (pgd_none(*pgd))
346 set_pgd(pgd, *pgd_ref);
347 else
348 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
349 }
350 spin_unlock_irqrestore(&pgd_lock, flags);
351 }
352} 341}
353 342
354/* 343/*
@@ -369,6 +358,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
369 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 358 if (!(address >= VMALLOC_START && address < VMALLOC_END))
370 return -1; 359 return -1;
371 360
361 WARN_ON_ONCE(in_nmi());
362
372 /* 363 /*
373 * Copy kernel mappings over when needed. This can also 364 * Copy kernel mappings over when needed. This can also
374 * happen within a race in page table update. In the later 365 * happen within a race in page table update. In the later
@@ -894,8 +885,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
894 if (pmd_large(*pmd)) 885 if (pmd_large(*pmd))
895 return spurious_fault_check(error_code, (pte_t *) pmd); 886 return spurious_fault_check(error_code, (pte_t *) pmd);
896 887
888 /*
889 * Note: don't use pte_present() here, since it returns true
890 * if the _PAGE_PROTNONE bit is set. However, this aliases the
891 * _PAGE_GLOBAL bit, which for kernel pages give false positives
892 * when CONFIG_DEBUG_PAGEALLOC is used.
893 */
897 pte = pte_offset_kernel(pmd, address); 894 pte = pte_offset_kernel(pmd, address);
898 if (!pte_present(*pte)) 895 if (!(pte_flags(*pte) & _PAGE_PRESENT))
899 return 0; 896 return 0;
900 897
901 ret = spurious_fault_check(error_code, pte); 898 ret = spurious_fault_check(error_code, pte);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index bca79091b9d6..558f2d332076 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,7 +67,7 @@ static __init void *alloc_low_page(void)
67 panic("alloc_low_page: ran out of memory"); 67 panic("alloc_low_page: ran out of memory");
68 68
69 adr = __va(pfn * PAGE_SIZE); 69 adr = __va(pfn * PAGE_SIZE);
70 memset(adr, 0, PAGE_SIZE); 70 clear_page(adr);
71 return adr; 71 return adr;
72} 72}
73 73
@@ -558,7 +558,7 @@ char swsusp_pg_dir[PAGE_SIZE]
558 558
559static inline void save_pg_dir(void) 559static inline void save_pg_dir(void)
560{ 560{
561 memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE); 561 copy_page(swsusp_pg_dir, swapper_pg_dir);
562} 562}
563#else /* !CONFIG_ACPI_SLEEP */ 563#else /* !CONFIG_ACPI_SLEEP */
564static inline void save_pg_dir(void) 564static inline void save_pg_dir(void)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a6674689a20..c55f900fbf89 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,43 @@ static int __init nonx32_setup(char *str)
98__setup("noexec32=", nonx32_setup); 98__setup("noexec32=", nonx32_setup);
99 99
100/* 100/*
101 * When memory was added/removed make sure all the processes MM have
102 * suitable PGD entries in the local PGD level page.
103 */
104void sync_global_pgds(unsigned long start, unsigned long end)
105{
106 unsigned long address;
107
108 for (address = start; address <= end; address += PGDIR_SIZE) {
109 const pgd_t *pgd_ref = pgd_offset_k(address);
110 unsigned long flags;
111 struct page *page;
112
113 if (pgd_none(*pgd_ref))
114 continue;
115
116 spin_lock_irqsave(&pgd_lock, flags);
117 list_for_each_entry(page, &pgd_list, lru) {
118 pgd_t *pgd;
119 spinlock_t *pgt_lock;
120
121 pgd = (pgd_t *)page_address(page) + pgd_index(address);
122 pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
123 spin_lock(pgt_lock);
124
125 if (pgd_none(*pgd))
126 set_pgd(pgd, *pgd_ref);
127 else
128 BUG_ON(pgd_page_vaddr(*pgd)
129 != pgd_page_vaddr(*pgd_ref));
130
131 spin_unlock(pgt_lock);
132 }
133 spin_unlock_irqrestore(&pgd_lock, flags);
134 }
135}
136
137/*
101 * NOTE: This function is marked __ref because it calls __init function 138 * NOTE: This function is marked __ref because it calls __init function
102 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 139 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
103 */ 140 */
@@ -293,7 +330,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
293 panic("alloc_low_page: ran out of memory"); 330 panic("alloc_low_page: ran out of memory");
294 331
295 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); 332 adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
296 memset(adr, 0, PAGE_SIZE); 333 clear_page(adr);
297 *phys = pfn * PAGE_SIZE; 334 *phys = pfn * PAGE_SIZE;
298 return adr; 335 return adr;
299} 336}
@@ -534,11 +571,13 @@ kernel_physical_mapping_init(unsigned long start,
534 unsigned long end, 571 unsigned long end,
535 unsigned long page_size_mask) 572 unsigned long page_size_mask)
536{ 573{
537 574 bool pgd_changed = false;
538 unsigned long next, last_map_addr = end; 575 unsigned long next, last_map_addr = end;
576 unsigned long addr;
539 577
540 start = (unsigned long)__va(start); 578 start = (unsigned long)__va(start);
541 end = (unsigned long)__va(end); 579 end = (unsigned long)__va(end);
580 addr = start;
542 581
543 for (; start < end; start = next) { 582 for (; start < end; start = next) {
544 pgd_t *pgd = pgd_offset_k(start); 583 pgd_t *pgd = pgd_offset_k(start);
@@ -563,7 +602,12 @@ kernel_physical_mapping_init(unsigned long start,
563 spin_lock(&init_mm.page_table_lock); 602 spin_lock(&init_mm.page_table_lock);
564 pgd_populate(&init_mm, pgd, __va(pud_phys)); 603 pgd_populate(&init_mm, pgd, __va(pud_phys));
565 spin_unlock(&init_mm.page_table_lock); 604 spin_unlock(&init_mm.page_table_lock);
605 pgd_changed = true;
566 } 606 }
607
608 if (pgd_changed)
609 sync_global_pgds(addr, end);
610
567 __flush_tlb_all(); 611 __flush_tlb_all();
568 612
569 return last_map_addr; 613 return last_map_addr;
@@ -1003,6 +1047,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
1003 } 1047 }
1004 1048
1005 } 1049 }
1050 sync_global_pgds((unsigned long)start_page, end);
1006 return 0; 1051 return 0;
1007} 1052}
1008 1053
diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c
index 970ed579d4e4..52d54bfc1ebb 100644
--- a/arch/x86/mm/k8topology_64.c
+++ b/arch/x86/mm/k8topology_64.c
@@ -22,7 +22,7 @@
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/mpspec.h> 23#include <asm/mpspec.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/k8.h> 25#include <asm/amd_nb.h>
26 26
27static struct bootnode __initdata nodes[8]; 27static struct bootnode __initdata nodes[8];
28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; 28static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
@@ -54,8 +54,8 @@ static __init int find_northbridge(void)
54static __init void early_get_boot_cpu_id(void) 54static __init void early_get_boot_cpu_id(void)
55{ 55{
56 /* 56 /*
57 * need to get boot_cpu_id so can use that to create apicid_to_node 57 * need to get the APIC ID of the BSP so can use that to
58 * in k8_scan_nodes() 58 * create apicid_to_node in k8_scan_nodes()
59 */ 59 */
60#ifdef CONFIG_X86_MPPARSE 60#ifdef CONFIG_X86_MPPARSE
61 /* 61 /*
@@ -212,7 +212,7 @@ int __init k8_scan_nodes(void)
212 bits = boot_cpu_data.x86_coreid_bits; 212 bits = boot_cpu_data.x86_coreid_bits;
213 cores = (1<<bits); 213 cores = (1<<bits);
214 apicid_base = 0; 214 apicid_base = 0;
215 /* need to get boot_cpu_id early for system with apicid lifting */ 215 /* get the APIC ID of the BSP early for systems with apicid lifting */
216 early_get_boot_cpu_id(); 216 early_get_boot_cpu_id();
217 if (boot_cpu_physical_apicid > 0) { 217 if (boot_cpu_physical_apicid > 0) {
218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); 218 pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index b3b531a4f8e5..d87dd6d042d6 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
631 if (!pte) 631 if (!pte)
632 return false; 632 return false;
633 633
634 WARN_ON_ONCE(in_nmi());
635
634 if (error_code & 2) 636 if (error_code & 2)
635 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 637 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
636 else 638 else
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c
index 63c19e27aa6f..324aa3f07237 100644
--- a/arch/x86/mm/kmemcheck/opcode.c
+++ b/arch/x86/mm/kmemcheck/opcode.c
@@ -9,7 +9,7 @@ static bool opcode_is_prefix(uint8_t b)
9 b == 0xf0 || b == 0xf2 || b == 0xf3 9 b == 0xf0 || b == 0xf2 || b == 0xf3
10 /* Group 2 */ 10 /* Group 2 */
11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 11 || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
12 || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e 12 || b == 0x64 || b == 0x65
13 /* Group 3 */ 13 /* Group 3 */
14 || b == 0x66 14 || b == 0x66
15 /* Group 4 */ 15 /* Group 4 */
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index a7bcc23ef96c..4962f1aeda6f 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -18,7 +18,7 @@
18#include <asm/dma.h> 18#include <asm/dma.h>
19#include <asm/numa.h> 19#include <asm/numa.h>
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/amd_nb.h>
22 22
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 24EXPORT_SYMBOL(node_data);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5c4ee422590e..c70e57dbb491 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd)
87#define UNSHARED_PTRS_PER_PGD \ 87#define UNSHARED_PTRS_PER_PGD \
88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) 88 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
89 89
90static void pgd_ctor(pgd_t *pgd) 90
91static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
92{
93 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
94 virt_to_page(pgd)->index = (pgoff_t)mm;
95}
96
97struct mm_struct *pgd_page_get_mm(struct page *page)
98{
99 return (struct mm_struct *)page->index;
100}
101
102static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
91{ 103{
92 /* If the pgd points to a shared pagetable level (either the 104 /* If the pgd points to a shared pagetable level (either the
93 ptes in non-PAE, or shared PMD in PAE), then just copy the 105 ptes in non-PAE, or shared PMD in PAE), then just copy the
@@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd)
105 } 117 }
106 118
107 /* list required to sync kernel mapping updates */ 119 /* list required to sync kernel mapping updates */
108 if (!SHARED_KERNEL_PMD) 120 if (!SHARED_KERNEL_PMD) {
121 pgd_set_mm(pgd, mm);
109 pgd_list_add(pgd); 122 pgd_list_add(pgd);
123 }
110} 124}
111 125
112static void pgd_dtor(pgd_t *pgd) 126static void pgd_dtor(pgd_t *pgd)
@@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
272 */ 286 */
273 spin_lock_irqsave(&pgd_lock, flags); 287 spin_lock_irqsave(&pgd_lock, flags);
274 288
275 pgd_ctor(pgd); 289 pgd_ctor(mm, pgd);
276 pgd_prepopulate_pmd(mm, pgd, pmds); 290 pgd_prepopulate_pmd(mm, pgd, pmds);
277 291
278 spin_unlock_irqrestore(&pgd_lock, flags); 292 spin_unlock_irqrestore(&pgd_lock, flags);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index f9897f7a9ef1..9c0d0d399c30 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -420,9 +420,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
420 return -1; 420 return -1;
421 } 421 }
422 422
423 for_each_node_mask(i, nodes_parsed) 423 for (i = 0; i < num_node_memblks; i++)
424 e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, 424 e820_register_active_regions(memblk_nodeid[i],
425 nodes[i].end >> PAGE_SHIFT); 425 node_memblk_range[i].start >> PAGE_SHIFT,
426 node_memblk_range[i].end >> PAGE_SHIFT);
427
426 /* for out of order entries in SRAT */ 428 /* for out of order entries in SRAT */
427 sort_node_map(); 429 sort_node_map();
428 if (!nodes_cover_memory(nodes)) { 430 if (!nodes_cover_memory(nodes)) {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c03f14ab6667..49358481c733 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -5,6 +5,7 @@
5#include <linux/smp.h> 5#include <linux/smp.h>
6#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/cpu.h>
8 9
9#include <asm/tlbflush.h> 10#include <asm/tlbflush.h>
10#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
@@ -52,6 +53,8 @@ union smp_flush_state {
52 want false sharing in the per cpu data segment. */ 53 want false sharing in the per cpu data segment. */
53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; 54static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
54 55
56static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
57
55/* 58/*
56 * We cannot call mmdrop() because we are in interrupt context, 59 * We cannot call mmdrop() because we are in interrupt context,
57 * instead update mm->cpu_vm_mask. 60 * instead update mm->cpu_vm_mask.
@@ -173,7 +176,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
173 union smp_flush_state *f; 176 union smp_flush_state *f;
174 177
175 /* Caller has disabled preemption */ 178 /* Caller has disabled preemption */
176 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 179 sender = this_cpu_read(tlb_vector_offset);
177 f = &flush_state[sender]; 180 f = &flush_state[sender];
178 181
179 /* 182 /*
@@ -218,6 +221,47 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
218 flush_tlb_others_ipi(cpumask, mm, va); 221 flush_tlb_others_ipi(cpumask, mm, va);
219} 222}
220 223
224static void __cpuinit calculate_tlb_offset(void)
225{
226 int cpu, node, nr_node_vecs;
227 /*
228 * we are changing tlb_vector_offset for each CPU in runtime, but this
229 * will not cause inconsistency, as the write is atomic under X86. we
230 * might see more lock contentions in a short time, but after all CPU's
231 * tlb_vector_offset are changed, everything should go normal
232 *
233 * Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
234 * waste some vectors.
235 **/
236 if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
237 nr_node_vecs = 1;
238 else
239 nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
240
241 for_each_online_node(node) {
242 int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
243 nr_node_vecs;
244 int cpu_offset = 0;
245 for_each_cpu(cpu, cpumask_of_node(node)) {
246 per_cpu(tlb_vector_offset, cpu) = node_offset +
247 cpu_offset;
248 cpu_offset++;
249 cpu_offset = cpu_offset % nr_node_vecs;
250 }
251 }
252}
253
254static int tlb_cpuhp_notify(struct notifier_block *n,
255 unsigned long action, void *hcpu)
256{
257 switch (action & 0xf) {
258 case CPU_ONLINE:
259 case CPU_DEAD:
260 calculate_tlb_offset();
261 }
262 return NOTIFY_OK;
263}
264
221static int __cpuinit init_smp_flush(void) 265static int __cpuinit init_smp_flush(void)
222{ 266{
223 int i; 267 int i;
@@ -225,6 +269,8 @@ static int __cpuinit init_smp_flush(void)
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 269 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 raw_spin_lock_init(&flush_state[i].tlbstate_lock); 270 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 271
272 calculate_tlb_offset();
273 hotcpu_notifier(tlb_cpuhp_notify, 0);
228 return 0; 274 return 0;
229} 275}
230core_initcall(init_smp_flush); 276core_initcall(init_smp_flush);
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 3855096c59b8..2d49d4e19a36 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -14,6 +14,7 @@
14#include <asm/ptrace.h> 14#include <asm/ptrace.h>
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 16#include <asm/stacktrace.h>
17#include <linux/compat.h>
17 18
18static void backtrace_warning_symbol(void *data, char *msg, 19static void backtrace_warning_symbol(void *data, char *msg,
19 unsigned long symbol) 20 unsigned long symbol)
@@ -48,14 +49,12 @@ static struct stacktrace_ops backtrace_ops = {
48 .walk_stack = print_context_stack, 49 .walk_stack = print_context_stack,
49}; 50};
50 51
51struct frame_head { 52#ifdef CONFIG_COMPAT
52 struct frame_head *bp; 53static struct stack_frame_ia32 *
53 unsigned long ret; 54dump_user_backtrace_32(struct stack_frame_ia32 *head)
54} __attribute__((packed));
55
56static struct frame_head *dump_user_backtrace(struct frame_head *head)
57{ 55{
58 struct frame_head bufhead[2]; 56 struct stack_frame_ia32 bufhead[2];
57 struct stack_frame_ia32 *fp;
59 58
60 /* Also check accessibility of one struct frame_head beyond */ 59 /* Also check accessibility of one struct frame_head beyond */
61 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 60 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
@@ -63,20 +62,66 @@ static struct frame_head *dump_user_backtrace(struct frame_head *head)
63 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) 62 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
64 return NULL; 63 return NULL;
65 64
66 oprofile_add_trace(bufhead[0].ret); 65 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
66
67 oprofile_add_trace(bufhead[0].return_address);
68
69 /* frame pointers should strictly progress back up the stack
70 * (towards higher addresses) */
71 if (head >= fp)
72 return NULL;
73
74 return fp;
75}
76
77static inline int
78x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
79{
80 struct stack_frame_ia32 *head;
81
82 /* User process is 32-bit */
83 if (!current || !test_thread_flag(TIF_IA32))
84 return 0;
85
86 head = (struct stack_frame_ia32 *) regs->bp;
87 while (depth-- && head)
88 head = dump_user_backtrace_32(head);
89
90 return 1;
91}
92
93#else
94static inline int
95x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
96{
97 return 0;
98}
99#endif /* CONFIG_COMPAT */
100
101static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
102{
103 struct stack_frame bufhead[2];
104
105 /* Also check accessibility of one struct stack_frame beyond */
106 if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
107 return NULL;
108 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
109 return NULL;
110
111 oprofile_add_trace(bufhead[0].return_address);
67 112
68 /* frame pointers should strictly progress back up the stack 113 /* frame pointers should strictly progress back up the stack
69 * (towards higher addresses) */ 114 * (towards higher addresses) */
70 if (head >= bufhead[0].bp) 115 if (head >= bufhead[0].next_frame)
71 return NULL; 116 return NULL;
72 117
73 return bufhead[0].bp; 118 return bufhead[0].next_frame;
74} 119}
75 120
76void 121void
77x86_backtrace(struct pt_regs * const regs, unsigned int depth) 122x86_backtrace(struct pt_regs * const regs, unsigned int depth)
78{ 123{
79 struct frame_head *head = (struct frame_head *)frame_pointer(regs); 124 struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
80 125
81 if (!user_mode_vm(regs)) { 126 if (!user_mode_vm(regs)) {
82 unsigned long stack = kernel_stack_pointer(regs); 127 unsigned long stack = kernel_stack_pointer(regs);
@@ -86,6 +131,9 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
86 return; 131 return;
87 } 132 }
88 133
134 if (x86_backtrace_32(regs, depth))
135 return;
136
89 while (depth-- && head) 137 while (depth-- && head)
90 head = dump_user_backtrace(head); 138 head = dump_user_backtrace(head);
91} 139}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index f1575c9a2572..bd1489c3ce09 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -695,9 +695,6 @@ static int __init ppro_init(char **cpu_type)
695 return 1; 695 return 1;
696} 696}
697 697
698/* in order to get sysfs right */
699static int using_nmi;
700
701int __init op_nmi_init(struct oprofile_operations *ops) 698int __init op_nmi_init(struct oprofile_operations *ops)
702{ 699{
703 __u8 vendor = boot_cpu_data.x86_vendor; 700 __u8 vendor = boot_cpu_data.x86_vendor;
@@ -705,8 +702,6 @@ int __init op_nmi_init(struct oprofile_operations *ops)
705 char *cpu_type = NULL; 702 char *cpu_type = NULL;
706 int ret = 0; 703 int ret = 0;
707 704
708 using_nmi = 0;
709
710 if (!cpu_has_apic) 705 if (!cpu_has_apic)
711 return -ENODEV; 706 return -ENODEV;
712 707
@@ -790,13 +785,11 @@ int __init op_nmi_init(struct oprofile_operations *ops)
790 if (ret) 785 if (ret)
791 return ret; 786 return ret;
792 787
793 using_nmi = 1;
794 printk(KERN_INFO "oprofile: using NMI interrupt.\n"); 788 printk(KERN_INFO "oprofile: using NMI interrupt.\n");
795 return 0; 789 return 0;
796} 790}
797 791
798void op_nmi_exit(void) 792void op_nmi_exit(void)
799{ 793{
800 if (using_nmi) 794 exit_sysfs();
801 exit_sysfs();
802} 795}