aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/misc.c15
-rw-r--r--arch/x86/boot/mkcpustr.c2
-rw-r--r--arch/x86/boot/video-vga.c9
-rw-r--r--arch/x86/boot/video.c7
-rw-r--r--arch/x86/ia32/ia32_aout.c13
-rw-r--r--arch/x86/include/asm/alternative.h12
-rw-r--r--arch/x86/include/asm/amd_iommu_proto.h1
-rw-r--r--arch/x86/include/asm/atomic.h299
-rw-r--r--arch/x86/include/asm/atomic64_32.h160
-rw-r--r--arch/x86/include/asm/atomic64_64.h224
-rw-r--r--arch/x86/include/asm/atomic_32.h415
-rw-r--r--arch/x86/include/asm/atomic_64.h485
-rw-r--r--arch/x86/include/asm/cpu_debug.h127
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/debugreg.h3
-rw-r--r--arch/x86/include/asm/elf.h15
-rw-r--r--arch/x86/include/asm/fb.h4
-rw-r--r--arch/x86/include/asm/fixmap.h16
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/i387.h12
-rw-r--r--arch/x86/include/asm/io.h155
-rw-r--r--arch/x86/include/asm/io_32.h196
-rw-r--r--arch/x86/include/asm/io_64.h181
-rw-r--r--arch/x86/include/asm/microcode.h2
-rw-r--r--arch/x86/include/asm/mmzone_64.h6
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa_64.h5
-rw-r--r--arch/x86/include/asm/numaq.h4
-rw-r--r--arch/x86/include/asm/page_types.h1
-rw-r--r--arch/x86/include/asm/pci_x86.h1
-rw-r--r--arch/x86/include/asm/perf_event.h16
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/include/asm/pgtable_32.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h2
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/rwsem.h81
-rw-r--r--arch/x86/include/asm/smp.h9
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/syscall.h2
-rw-r--r--arch/x86/include/asm/system.h4
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h21
-rw-r--r--arch/x86/include/asm/user.h58
-rw-r--r--arch/x86/include/asm/uv/bios.h11
-rw-r--r--arch/x86/include/asm/uv/uv.h1
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h3
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c34
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/amd_iommu.c23
-rw-r--r--arch/x86/kernel/amd_iommu_init.c3
-rw-r--r--arch/x86/kernel/apic/apic.c19
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/numaq_32.c2
-rw-r--r--arch/x86/kernel/apic/probe_32.c29
-rw-r--r--arch/x86/kernel/apic/probe_64.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c89
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/bios_uv.c39
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c4
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c688
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c3
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c334
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c10
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h6
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c94
-rw-r--r--arch/x86/kernel/cpu/perf_event.c1854
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c416
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c971
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c157
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/cpuid.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c6
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/ftrace.c36
-rw-r--r--arch/x86/kernel/hpet.c10
-rw-r--r--arch/x86/kernel/hw_breakpoint.c40
-rw-r--r--arch/x86/kernel/i387.c71
-rw-r--r--arch/x86/kernel/kgdb.c222
-rw-r--r--arch/x86/kernel/kprobes.c5
-rw-r--r--arch/x86/kernel/microcode_amd.c44
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/microcode_intel.c2
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/process.c19
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c18
-rw-r--r--arch/x86/kernel/ptrace.c65
-rw-r--r--arch/x86/kernel/quirks.c13
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c35
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c3
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/uv_sysfs.c6
-rw-r--r--arch/x86/kernel/uv_time.c13
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/kernel/x86_init.c3
-rw-r--r--arch/x86/kernel/xsave.c1
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/lapic.c11
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/x86.c13
-rw-r--r--arch/x86/lib/Makefile5
-rw-r--r--arch/x86/lib/cache-smp.c19
-rw-r--r--arch/x86/lib/copy_user_64.S6
-rw-r--r--arch/x86/lib/io_64.c25
-rw-r--r--arch/x86/lib/memcpy_64.S23
-rw-r--r--arch/x86/lib/memset_64.S18
-rw-r--r--arch/x86/lib/rwsem_64.S81
-rw-r--r--arch/x86/mm/gup.c2
-rw-r--r--arch/x86/mm/init.c7
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/init_64.c19
-rw-r--r--arch/x86/mm/ioremap.c41
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c2
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c16
-rw-r--r--arch/x86/mm/kmemcheck/shadow.h2
-rw-r--r--arch/x86/mm/mmap.c4
-rw-r--r--arch/x86/mm/numa_64.c235
-rw-r--r--arch/x86/mm/pgtable.c31
-rw-r--r--arch/x86/mm/srat_64.c4
-rw-r--r--arch/x86/mm/tlb.c8
-rw-r--r--arch/x86/oprofile/nmi_int.c20
-rw-r--r--arch/x86/oprofile/op_model_amd.c244
-rw-r--r--arch/x86/oprofile/op_model_p4.c6
-rw-r--r--arch/x86/oprofile/op_model_ppro.c17
-rw-r--r--arch/x86/oprofile/op_x86_model.h20
-rw-r--r--arch/x86/pci/Makefile2
-rw-r--r--arch/x86/pci/acpi.c82
-rw-r--r--arch/x86/pci/bus_numa.c3
-rw-r--r--arch/x86/pci/bus_numa.h3
-rw-r--r--arch/x86/pci/common.c3
-rw-r--r--arch/x86/pci/i386.c14
-rw-r--r--arch/x86/pci/intel_bus.c94
-rw-r--r--arch/x86/pci/irq.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c17
-rw-r--r--arch/x86/pci/numaq_32.c6
-rw-r--r--arch/x86/tools/test_get_len.c4
154 files changed, 4632 insertions, 4615 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cbcbfdee3ee0..0896008f7509 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -45,6 +45,7 @@ config X86
45 select HAVE_GENERIC_DMA_COHERENT if X86_32 45 select HAVE_GENERIC_DMA_COHERENT if X86_32
46 select HAVE_EFFICIENT_UNALIGNED_ACCESS 46 select HAVE_EFFICIENT_UNALIGNED_ACCESS
47 select USER_STACKTRACE_SUPPORT 47 select USER_STACKTRACE_SUPPORT
48 select HAVE_REGS_AND_STACK_ACCESS_API
48 select HAVE_DMA_API_DEBUG 49 select HAVE_DMA_API_DEBUG
49 select HAVE_KERNEL_GZIP 50 select HAVE_KERNEL_GZIP
50 select HAVE_KERNEL_BZIP2 51 select HAVE_KERNEL_BZIP2
@@ -989,12 +990,6 @@ config X86_CPUID
989 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to 990 with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
990 /dev/cpu/31/cpuid. 991 /dev/cpu/31/cpuid.
991 992
992config X86_CPU_DEBUG
993 tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
994 ---help---
995 If you select this option, this will provide various x86 CPUs
996 information through debugfs.
997
998choice 993choice
999 prompt "High Memory Support" 994 prompt "High Memory Support"
1000 default HIGHMEM4G if !X86_NUMAQ 995 default HIGHMEM4G if !X86_NUMAQ
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f20ddf84a893..a19829374e6a 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -319,7 +319,7 @@ config X86_L1_CACHE_SHIFT
319 319
320config X86_XADD 320config X86_XADD
321 def_bool y 321 def_bool y
322 depends on X86_32 && !M386 322 depends on X86_64 || !M386
323 323
324config X86_PPRO_FENCE 324config X86_PPRO_FENCE
325 bool "PentiumPro memory ordering errata workaround" 325 bool "PentiumPro memory ordering errata workaround"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 78b32be55e9e..0a43dc515e4c 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -135,9 +135,7 @@ drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
135# suspend and hibernation support 135# suspend and hibernation support
136drivers-$(CONFIG_PM) += arch/x86/power/ 136drivers-$(CONFIG_PM) += arch/x86/power/
137 137
138ifeq ($(CONFIG_X86_32),y)
139drivers-$(CONFIG_FB) += arch/x86/video/ 138drivers-$(CONFIG_FB) += arch/x86/video/
140endif
141 139
142#### 140####
143# boot loader support. Several targets are kept for legacy purposes 141# boot loader support. Several targets are kept for legacy purposes
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 3b22fe8ab91b..51e240779a44 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -19,11 +19,6 @@
19#define _ASM_X86_DESC_H 1 19#define _ASM_X86_DESC_H 1
20#endif 20#endif
21 21
22#ifdef CONFIG_X86_64
23#define _LINUX_STRING_H_ 1
24#define __LINUX_BITMAP_H 1
25#endif
26
27#include <linux/linkage.h> 22#include <linux/linkage.h>
28#include <linux/screen_info.h> 23#include <linux/screen_info.h>
29#include <linux/elf.h> 24#include <linux/elf.h>
@@ -131,8 +126,8 @@ static void error(char *m);
131static struct boot_params *real_mode; /* Pointer to real-mode data */ 126static struct boot_params *real_mode; /* Pointer to real-mode data */
132static int quiet; 127static int quiet;
133 128
134static void *memset(void *s, int c, unsigned n); 129void *memset(void *s, int c, size_t n);
135void *memcpy(void *dest, const void *src, unsigned n); 130void *memcpy(void *dest, const void *src, size_t n);
136 131
137static void __putstr(int, const char *); 132static void __putstr(int, const char *);
138#define putstr(__x) __putstr(0, __x) 133#define putstr(__x) __putstr(0, __x)
@@ -185,11 +180,9 @@ static void __putstr(int error, const char *s)
185 return; 180 return;
186#endif 181#endif
187 182
188#ifdef CONFIG_X86_32
189 if (real_mode->screen_info.orig_video_mode == 0 && 183 if (real_mode->screen_info.orig_video_mode == 0 &&
190 lines == 0 && cols == 0) 184 lines == 0 && cols == 0)
191 return; 185 return;
192#endif
193 186
194 x = real_mode->screen_info.orig_x; 187 x = real_mode->screen_info.orig_x;
195 y = real_mode->screen_info.orig_y; 188 y = real_mode->screen_info.orig_y;
@@ -223,7 +216,7 @@ static void __putstr(int error, const char *s)
223 outb(0xff & (pos >> 1), vidport+1); 216 outb(0xff & (pos >> 1), vidport+1);
224} 217}
225 218
226static void *memset(void *s, int c, unsigned n) 219void *memset(void *s, int c, size_t n)
227{ 220{
228 int i; 221 int i;
229 char *ss = s; 222 char *ss = s;
@@ -233,7 +226,7 @@ static void *memset(void *s, int c, unsigned n)
233 return s; 226 return s;
234} 227}
235 228
236void *memcpy(void *dest, const void *src, unsigned n) 229void *memcpy(void *dest, const void *src, size_t n)
237{ 230{
238 int i; 231 int i;
239 const char *s = src; 232 const char *s = src;
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 8ef60f20b371..919257f526f2 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -22,7 +22,7 @@ int main(void)
22 int i, j; 22 int i, j;
23 const char *str; 23 const char *str;
24 24
25 printf("static const char x86_cap_strs[] = \n"); 25 printf("static const char x86_cap_strs[] =\n");
26 26
27 for (i = 0; i < NCAPINTS; i++) { 27 for (i = 0; i < NCAPINTS; i++) {
28 for (j = 0; j < 32; j++) { 28 for (j = 0; j < 32; j++) {
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 819caa1f2008..ed7aeff786b2 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -42,22 +42,15 @@ static u8 vga_set_basic_mode(void)
42{ 42{
43 struct biosregs ireg, oreg; 43 struct biosregs ireg, oreg;
44 u16 ax; 44 u16 ax;
45 u8 rows;
46 u8 mode; 45 u8 mode;
47 46
48 initregs(&ireg); 47 initregs(&ireg);
49 48
49 /* Query current mode */
50 ax = 0x0f00; 50 ax = 0x0f00;
51 intcall(0x10, &ireg, &oreg); 51 intcall(0x10, &ireg, &oreg);
52 mode = oreg.al; 52 mode = oreg.al;
53 53
54 set_fs(0);
55 rows = rdfs8(0x484); /* rows minus one */
56
57 if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
58 (rows == 0 || rows == 24))
59 return mode;
60
61 if (mode != 3 && mode != 7) 54 if (mode != 3 && mode != 7)
62 mode = 3; 55 mode = 3;
63 56
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index f767164cd5df..43eda284d27f 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -298,11 +298,18 @@ static void restore_screen(void)
298 } 298 }
299 299
300 /* Restore cursor position */ 300 /* Restore cursor position */
301 if (saved.curx >= xs)
302 saved.curx = xs-1;
303 if (saved.cury >= ys)
304 saved.cury = ys-1;
305
301 initregs(&ireg); 306 initregs(&ireg);
302 ireg.ah = 0x02; /* Set cursor position */ 307 ireg.ah = 0x02; /* Set cursor position */
303 ireg.dh = saved.cury; 308 ireg.dh = saved.cury;
304 ireg.dl = saved.curx; 309 ireg.dl = saved.curx;
305 intcall(0x10, &ireg, NULL); 310 intcall(0x10, &ireg, NULL);
311
312 store_cursor_position();
306} 313}
307 314
308void set_video(void) 315void set_video(void)
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 2a4d073d2cf1..9046e4af66ce 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -297,7 +297,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
297 * size limits imposed on them by creating programs with large 297 * size limits imposed on them by creating programs with large
298 * arrays in the data or bss. 298 * arrays in the data or bss.
299 */ 299 */
300 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; 300 rlim = rlimit(RLIMIT_DATA);
301 if (rlim >= RLIM_INFINITY) 301 if (rlim >= RLIM_INFINITY)
302 rlim = ~0; 302 rlim = ~0;
303 if (ex.a_data + ex.a_bss > rlim) 303 if (ex.a_data + ex.a_bss > rlim)
@@ -308,14 +308,15 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
308 if (retval) 308 if (retval)
309 return retval; 309 return retval;
310 310
311 regs->cs = __USER32_CS;
312 regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
313 regs->r13 = regs->r14 = regs->r15 = 0;
314
315 /* OK, This is the point of no return */ 311 /* OK, This is the point of no return */
316 set_personality(PER_LINUX); 312 set_personality(PER_LINUX);
317 set_thread_flag(TIF_IA32); 313 set_thread_flag(TIF_IA32);
318 clear_thread_flag(TIF_ABI_PENDING); 314
315 setup_new_exec(bprm);
316
317 regs->cs = __USER32_CS;
318 regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
319 regs->r13 = regs->r14 = regs->r15 = 0;
319 320
320 current->mm->end_code = ex.a_text + 321 current->mm->end_code = ex.a_text +
321 (current->mm->start_code = N_TXTADDR(ex)); 322 (current->mm->start_code = N_TXTADDR(ex));
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..f1e253ceba4b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
65 void *text, void *text_end); 65 void *text, void *text_end);
66extern void alternatives_smp_module_del(struct module *mod); 66extern void alternatives_smp_module_del(struct module *mod);
67extern void alternatives_smp_switch(int smp); 67extern void alternatives_smp_switch(int smp);
68extern int alternatives_text_reserved(void *start, void *end);
68#else 69#else
69static inline void alternatives_smp_module_add(struct module *mod, char *name, 70static inline void alternatives_smp_module_add(struct module *mod, char *name,
70 void *locks, void *locks_end, 71 void *locks, void *locks_end,
71 void *text, void *text_end) {} 72 void *text, void *text_end) {}
72static inline void alternatives_smp_module_del(struct module *mod) {} 73static inline void alternatives_smp_module_del(struct module *mod) {}
73static inline void alternatives_smp_switch(int smp) {} 74static inline void alternatives_smp_switch(int smp) {}
75static inline int alternatives_text_reserved(void *start, void *end)
76{
77 return 0;
78}
74#endif /* CONFIG_SMP */ 79#endif /* CONFIG_SMP */
75 80
76/* alternative assembly primitive: */ 81/* alternative assembly primitive: */
@@ -125,11 +130,16 @@ static inline void alternatives_smp_switch(int smp) {}
125 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ 130 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
126 : output : "i" (0), ## input) 131 : output : "i" (0), ## input)
127 132
133/* Like alternative_io, but for replacing a direct call with another one. */
134#define alternative_call(oldfunc, newfunc, feature, output, input...) \
135 asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
136 : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
137
128/* 138/*
129 * use this macro(s) if you need more than one output parameter 139 * use this macro(s) if you need more than one output parameter
130 * in alternative_io 140 * in alternative_io
131 */ 141 */
132#define ASM_OUTPUT2(a, b) a, b 142#define ASM_OUTPUT2(a...) a
133 143
134struct paravirt_patch_site; 144struct paravirt_patch_site;
135#ifdef CONFIG_PARAVIRT 145#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h
index 4d817f9e6e77..d2544f1d705d 100644
--- a/arch/x86/include/asm/amd_iommu_proto.h
+++ b/arch/x86/include/asm/amd_iommu_proto.h
@@ -31,6 +31,7 @@ extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu);
31extern int amd_iommu_init_devices(void); 31extern int amd_iommu_init_devices(void);
32extern void amd_iommu_uninit_devices(void); 32extern void amd_iommu_uninit_devices(void);
33extern void amd_iommu_init_notifier(void); 33extern void amd_iommu_init_notifier(void);
34extern void amd_iommu_init_api(void);
34#ifndef CONFIG_AMD_IOMMU_STATS 35#ifndef CONFIG_AMD_IOMMU_STATS
35 36
36static inline void amd_iommu_stats_init(void) { } 37static inline void amd_iommu_stats_init(void) { }
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 4e1b8873c474..8f8217b9bdac 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -1,5 +1,300 @@
1#ifndef _ASM_X86_ATOMIC_H
2#define _ASM_X86_ATOMIC_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7#include <asm/alternative.h>
8#include <asm/cmpxchg.h>
9
10/*
11 * Atomic operations that C can't guarantee us. Useful for
12 * resource counting etc..
13 */
14
15#define ATOMIC_INIT(i) { (i) }
16
17/**
18 * atomic_read - read atomic variable
19 * @v: pointer of type atomic_t
20 *
21 * Atomically reads the value of @v.
22 */
23static inline int atomic_read(const atomic_t *v)
24{
25 return v->counter;
26}
27
28/**
29 * atomic_set - set atomic variable
30 * @v: pointer of type atomic_t
31 * @i: required value
32 *
33 * Atomically sets the value of @v to @i.
34 */
35static inline void atomic_set(atomic_t *v, int i)
36{
37 v->counter = i;
38}
39
40/**
41 * atomic_add - add integer to atomic variable
42 * @i: integer value to add
43 * @v: pointer of type atomic_t
44 *
45 * Atomically adds @i to @v.
46 */
47static inline void atomic_add(int i, atomic_t *v)
48{
49 asm volatile(LOCK_PREFIX "addl %1,%0"
50 : "+m" (v->counter)
51 : "ir" (i));
52}
53
54/**
55 * atomic_sub - subtract integer from atomic variable
56 * @i: integer value to subtract
57 * @v: pointer of type atomic_t
58 *
59 * Atomically subtracts @i from @v.
60 */
61static inline void atomic_sub(int i, atomic_t *v)
62{
63 asm volatile(LOCK_PREFIX "subl %1,%0"
64 : "+m" (v->counter)
65 : "ir" (i));
66}
67
68/**
69 * atomic_sub_and_test - subtract value from variable and test result
70 * @i: integer value to subtract
71 * @v: pointer of type atomic_t
72 *
73 * Atomically subtracts @i from @v and returns
74 * true if the result is zero, or false for all
75 * other cases.
76 */
77static inline int atomic_sub_and_test(int i, atomic_t *v)
78{
79 unsigned char c;
80
81 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
82 : "+m" (v->counter), "=qm" (c)
83 : "ir" (i) : "memory");
84 return c;
85}
86
87/**
88 * atomic_inc - increment atomic variable
89 * @v: pointer of type atomic_t
90 *
91 * Atomically increments @v by 1.
92 */
93static inline void atomic_inc(atomic_t *v)
94{
95 asm volatile(LOCK_PREFIX "incl %0"
96 : "+m" (v->counter));
97}
98
99/**
100 * atomic_dec - decrement atomic variable
101 * @v: pointer of type atomic_t
102 *
103 * Atomically decrements @v by 1.
104 */
105static inline void atomic_dec(atomic_t *v)
106{
107 asm volatile(LOCK_PREFIX "decl %0"
108 : "+m" (v->counter));
109}
110
111/**
112 * atomic_dec_and_test - decrement and test
113 * @v: pointer of type atomic_t
114 *
115 * Atomically decrements @v by 1 and
116 * returns true if the result is 0, or false for all other
117 * cases.
118 */
119static inline int atomic_dec_and_test(atomic_t *v)
120{
121 unsigned char c;
122
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
124 : "+m" (v->counter), "=qm" (c)
125 : : "memory");
126 return c != 0;
127}
128
129/**
130 * atomic_inc_and_test - increment and test
131 * @v: pointer of type atomic_t
132 *
133 * Atomically increments @v by 1
134 * and returns true if the result is zero, or false for all
135 * other cases.
136 */
137static inline int atomic_inc_and_test(atomic_t *v)
138{
139 unsigned char c;
140
141 asm volatile(LOCK_PREFIX "incl %0; sete %1"
142 : "+m" (v->counter), "=qm" (c)
143 : : "memory");
144 return c != 0;
145}
146
147/**
148 * atomic_add_negative - add and test if negative
149 * @i: integer value to add
150 * @v: pointer of type atomic_t
151 *
152 * Atomically adds @i to @v and returns true
153 * if the result is negative, or false when
154 * result is greater than or equal to zero.
155 */
156static inline int atomic_add_negative(int i, atomic_t *v)
157{
158 unsigned char c;
159
160 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
161 : "+m" (v->counter), "=qm" (c)
162 : "ir" (i) : "memory");
163 return c;
164}
165
166/**
167 * atomic_add_return - add integer and return
168 * @i: integer value to add
169 * @v: pointer of type atomic_t
170 *
171 * Atomically adds @i to @v and returns @i + @v
172 */
173static inline int atomic_add_return(int i, atomic_t *v)
174{
175 int __i;
176#ifdef CONFIG_M386
177 unsigned long flags;
178 if (unlikely(boot_cpu_data.x86 <= 3))
179 goto no_xadd;
180#endif
181 /* Modern 486+ processor */
182 __i = i;
183 asm volatile(LOCK_PREFIX "xaddl %0, %1"
184 : "+r" (i), "+m" (v->counter)
185 : : "memory");
186 return i + __i;
187
188#ifdef CONFIG_M386
189no_xadd: /* Legacy 386 processor */
190 raw_local_irq_save(flags);
191 __i = atomic_read(v);
192 atomic_set(v, i + __i);
193 raw_local_irq_restore(flags);
194 return i + __i;
195#endif
196}
197
198/**
199 * atomic_sub_return - subtract integer and return
200 * @v: pointer of type atomic_t
201 * @i: integer value to subtract
202 *
203 * Atomically subtracts @i from @v and returns @v - @i
204 */
205static inline int atomic_sub_return(int i, atomic_t *v)
206{
207 return atomic_add_return(-i, v);
208}
209
210#define atomic_inc_return(v) (atomic_add_return(1, v))
211#define atomic_dec_return(v) (atomic_sub_return(1, v))
212
213static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
214{
215 return cmpxchg(&v->counter, old, new);
216}
217
218static inline int atomic_xchg(atomic_t *v, int new)
219{
220 return xchg(&v->counter, new);
221}
222
223/**
224 * atomic_add_unless - add unless the number is already a given value
225 * @v: pointer of type atomic_t
226 * @a: the amount to add to v...
227 * @u: ...unless v is equal to u.
228 *
229 * Atomically adds @a to @v, so long as @v was not already @u.
230 * Returns non-zero if @v was not @u, and zero otherwise.
231 */
232static inline int atomic_add_unless(atomic_t *v, int a, int u)
233{
234 int c, old;
235 c = atomic_read(v);
236 for (;;) {
237 if (unlikely(c == (u)))
238 break;
239 old = atomic_cmpxchg((v), c, c + (a));
240 if (likely(old == c))
241 break;
242 c = old;
243 }
244 return c != (u);
245}
246
247#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
248
249/**
250 * atomic_inc_short - increment of a short integer
251 * @v: pointer to type int
252 *
253 * Atomically adds 1 to @v
254 * Returns the new value of @u
255 */
256static inline short int atomic_inc_short(short int *v)
257{
258 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
259 return *v;
260}
261
262#ifdef CONFIG_X86_64
263/**
264 * atomic_or_long - OR of two long integers
265 * @v1: pointer to type unsigned long
266 * @v2: pointer to type unsigned long
267 *
268 * Atomically ORs @v1 and @v2
269 * Returns the result of the OR
270 */
271static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
272{
273 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
274}
275#endif
276
277/* These are x86-specific, used by some header files */
278#define atomic_clear_mask(mask, addr) \
279 asm volatile(LOCK_PREFIX "andl %0,%1" \
280 : : "r" (~(mask)), "m" (*(addr)) : "memory")
281
282#define atomic_set_mask(mask, addr) \
283 asm volatile(LOCK_PREFIX "orl %0,%1" \
284 : : "r" ((unsigned)(mask)), "m" (*(addr)) \
285 : "memory")
286
287/* Atomic operations are already serializing on x86 */
288#define smp_mb__before_atomic_dec() barrier()
289#define smp_mb__after_atomic_dec() barrier()
290#define smp_mb__before_atomic_inc() barrier()
291#define smp_mb__after_atomic_inc() barrier()
292
1#ifdef CONFIG_X86_32 293#ifdef CONFIG_X86_32
2# include "atomic_32.h" 294# include "atomic64_32.h"
3#else 295#else
4# include "atomic_64.h" 296# include "atomic64_64.h"
5#endif 297#endif
298
299#include <asm-generic/atomic-long.h>
300#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
new file mode 100644
index 000000000000..03027bf28de5
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -0,0 +1,160 @@
1#ifndef _ASM_X86_ATOMIC64_32_H
2#define _ASM_X86_ATOMIC64_32_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7//#include <asm/cmpxchg.h>
8
9/* An 64bit atomic type */
10
11typedef struct {
12 u64 __aligned(8) counter;
13} atomic64_t;
14
15#define ATOMIC64_INIT(val) { (val) }
16
17extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
18
19/**
20 * atomic64_xchg - xchg atomic64 variable
21 * @ptr: pointer to type atomic64_t
22 * @new_val: value to assign
23 *
24 * Atomically xchgs the value of @ptr to @new_val and returns
25 * the old value.
26 */
27extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
28
29/**
30 * atomic64_set - set atomic64 variable
31 * @ptr: pointer to type atomic64_t
32 * @new_val: value to assign
33 *
34 * Atomically sets the value of @ptr to @new_val.
35 */
36extern void atomic64_set(atomic64_t *ptr, u64 new_val);
37
38/**
39 * atomic64_read - read atomic64 variable
40 * @ptr: pointer to type atomic64_t
41 *
42 * Atomically reads the value of @ptr and returns it.
43 */
44static inline u64 atomic64_read(atomic64_t *ptr)
45{
46 u64 res;
47
48 /*
49 * Note, we inline this atomic64_t primitive because
50 * it only clobbers EAX/EDX and leaves the others
51 * untouched. We also (somewhat subtly) rely on the
52 * fact that cmpxchg8b returns the current 64-bit value
53 * of the memory location we are touching:
54 */
55 asm volatile(
56 "mov %%ebx, %%eax\n\t"
57 "mov %%ecx, %%edx\n\t"
58 LOCK_PREFIX "cmpxchg8b %1\n"
59 : "=&A" (res)
60 : "m" (*ptr)
61 );
62
63 return res;
64}
65
66extern u64 atomic64_read(atomic64_t *ptr);
67
68/**
69 * atomic64_add_return - add and return
70 * @delta: integer value to add
71 * @ptr: pointer to type atomic64_t
72 *
73 * Atomically adds @delta to @ptr and returns @delta + *@ptr
74 */
75extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
76
77/*
78 * Other variants with different arithmetic operators:
79 */
80extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
81extern u64 atomic64_inc_return(atomic64_t *ptr);
82extern u64 atomic64_dec_return(atomic64_t *ptr);
83
84/**
85 * atomic64_add - add integer to atomic64 variable
86 * @delta: integer value to add
87 * @ptr: pointer to type atomic64_t
88 *
89 * Atomically adds @delta to @ptr.
90 */
91extern void atomic64_add(u64 delta, atomic64_t *ptr);
92
93/**
94 * atomic64_sub - subtract the atomic64 variable
95 * @delta: integer value to subtract
96 * @ptr: pointer to type atomic64_t
97 *
98 * Atomically subtracts @delta from @ptr.
99 */
100extern void atomic64_sub(u64 delta, atomic64_t *ptr);
101
102/**
103 * atomic64_sub_and_test - subtract value from variable and test result
104 * @delta: integer value to subtract
105 * @ptr: pointer to type atomic64_t
106 *
107 * Atomically subtracts @delta from @ptr and returns
108 * true if the result is zero, or false for all
109 * other cases.
110 */
111extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
112
113/**
114 * atomic64_inc - increment atomic64 variable
115 * @ptr: pointer to type atomic64_t
116 *
117 * Atomically increments @ptr by 1.
118 */
119extern void atomic64_inc(atomic64_t *ptr);
120
121/**
122 * atomic64_dec - decrement atomic64 variable
123 * @ptr: pointer to type atomic64_t
124 *
125 * Atomically decrements @ptr by 1.
126 */
127extern void atomic64_dec(atomic64_t *ptr);
128
129/**
130 * atomic64_dec_and_test - decrement and test
131 * @ptr: pointer to type atomic64_t
132 *
133 * Atomically decrements @ptr by 1 and
134 * returns true if the result is 0, or false for all other
135 * cases.
136 */
137extern int atomic64_dec_and_test(atomic64_t *ptr);
138
139/**
140 * atomic64_inc_and_test - increment and test
141 * @ptr: pointer to type atomic64_t
142 *
143 * Atomically increments @ptr by 1
144 * and returns true if the result is zero, or false for all
145 * other cases.
146 */
147extern int atomic64_inc_and_test(atomic64_t *ptr);
148
149/**
150 * atomic64_add_negative - add and test if negative
151 * @delta: integer value to add
152 * @ptr: pointer to type atomic64_t
153 *
154 * Atomically adds @delta to @ptr and returns true
155 * if the result is negative, or false when
156 * result is greater than or equal to zero.
157 */
158extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
159
160#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
new file mode 100644
index 000000000000..51c5b4056929
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -0,0 +1,224 @@
1#ifndef _ASM_X86_ATOMIC64_64_H
2#define _ASM_X86_ATOMIC64_64_H
3
4#include <linux/types.h>
5#include <asm/alternative.h>
6#include <asm/cmpxchg.h>
7
8/* The 64-bit atomic type */
9
10#define ATOMIC64_INIT(i) { (i) }
11
12/**
13 * atomic64_read - read atomic64 variable
14 * @v: pointer of type atomic64_t
15 *
16 * Atomically reads the value of @v.
17 * Doesn't imply a read memory barrier.
18 */
19static inline long atomic64_read(const atomic64_t *v)
20{
21 return v->counter;
22}
23
24/**
25 * atomic64_set - set atomic64 variable
26 * @v: pointer to type atomic64_t
27 * @i: required value
28 *
29 * Atomically sets the value of @v to @i.
30 */
31static inline void atomic64_set(atomic64_t *v, long i)
32{
33 v->counter = i;
34}
35
36/**
37 * atomic64_add - add integer to atomic64 variable
38 * @i: integer value to add
39 * @v: pointer to type atomic64_t
40 *
41 * Atomically adds @i to @v.
42 */
43static inline void atomic64_add(long i, atomic64_t *v)
44{
45 asm volatile(LOCK_PREFIX "addq %1,%0"
46 : "=m" (v->counter)
47 : "er" (i), "m" (v->counter));
48}
49
50/**
51 * atomic64_sub - subtract the atomic64 variable
52 * @i: integer value to subtract
53 * @v: pointer to type atomic64_t
54 *
55 * Atomically subtracts @i from @v.
56 */
57static inline void atomic64_sub(long i, atomic64_t *v)
58{
59 asm volatile(LOCK_PREFIX "subq %1,%0"
60 : "=m" (v->counter)
61 : "er" (i), "m" (v->counter));
62}
63
64/**
65 * atomic64_sub_and_test - subtract value from variable and test result
66 * @i: integer value to subtract
67 * @v: pointer to type atomic64_t
68 *
69 * Atomically subtracts @i from @v and returns
70 * true if the result is zero, or false for all
71 * other cases.
72 */
73static inline int atomic64_sub_and_test(long i, atomic64_t *v)
74{
75 unsigned char c;
76
77 asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
78 : "=m" (v->counter), "=qm" (c)
79 : "er" (i), "m" (v->counter) : "memory");
80 return c;
81}
82
83/**
84 * atomic64_inc - increment atomic64 variable
85 * @v: pointer to type atomic64_t
86 *
87 * Atomically increments @v by 1.
88 */
89static inline void atomic64_inc(atomic64_t *v)
90{
91 asm volatile(LOCK_PREFIX "incq %0"
92 : "=m" (v->counter)
93 : "m" (v->counter));
94}
95
96/**
97 * atomic64_dec - decrement atomic64 variable
98 * @v: pointer to type atomic64_t
99 *
100 * Atomically decrements @v by 1.
101 */
102static inline void atomic64_dec(atomic64_t *v)
103{
104 asm volatile(LOCK_PREFIX "decq %0"
105 : "=m" (v->counter)
106 : "m" (v->counter));
107}
108
109/**
110 * atomic64_dec_and_test - decrement and test
111 * @v: pointer to type atomic64_t
112 *
113 * Atomically decrements @v by 1 and
114 * returns true if the result is 0, or false for all other
115 * cases.
116 */
117static inline int atomic64_dec_and_test(atomic64_t *v)
118{
119 unsigned char c;
120
121 asm volatile(LOCK_PREFIX "decq %0; sete %1"
122 : "=m" (v->counter), "=qm" (c)
123 : "m" (v->counter) : "memory");
124 return c != 0;
125}
126
127/**
128 * atomic64_inc_and_test - increment and test
129 * @v: pointer to type atomic64_t
130 *
131 * Atomically increments @v by 1
132 * and returns true if the result is zero, or false for all
133 * other cases.
134 */
135static inline int atomic64_inc_and_test(atomic64_t *v)
136{
137 unsigned char c;
138
139 asm volatile(LOCK_PREFIX "incq %0; sete %1"
140 : "=m" (v->counter), "=qm" (c)
141 : "m" (v->counter) : "memory");
142 return c != 0;
143}
144
145/**
146 * atomic64_add_negative - add and test if negative
147 * @i: integer value to add
148 * @v: pointer to type atomic64_t
149 *
150 * Atomically adds @i to @v and returns true
151 * if the result is negative, or false when
152 * result is greater than or equal to zero.
153 */
154static inline int atomic64_add_negative(long i, atomic64_t *v)
155{
156 unsigned char c;
157
158 asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
159 : "=m" (v->counter), "=qm" (c)
160 : "er" (i), "m" (v->counter) : "memory");
161 return c;
162}
163
164/**
165 * atomic64_add_return - add and return
166 * @i: integer value to add
167 * @v: pointer to type atomic64_t
168 *
169 * Atomically adds @i to @v and returns @i + @v
170 */
171static inline long atomic64_add_return(long i, atomic64_t *v)
172{
173 long __i = i;
174 asm volatile(LOCK_PREFIX "xaddq %0, %1;"
175 : "+r" (i), "+m" (v->counter)
176 : : "memory");
177 return i + __i;
178}
179
180static inline long atomic64_sub_return(long i, atomic64_t *v)
181{
182 return atomic64_add_return(-i, v);
183}
184
185#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
186#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
187
188static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
189{
190 return cmpxchg(&v->counter, old, new);
191}
192
193static inline long atomic64_xchg(atomic64_t *v, long new)
194{
195 return xchg(&v->counter, new);
196}
197
198/**
199 * atomic64_add_unless - add unless the number is a given value
200 * @v: pointer of type atomic64_t
201 * @a: the amount to add to v...
202 * @u: ...unless v is equal to u.
203 *
204 * Atomically adds @a to @v, so long as it was not @u.
205 * Returns non-zero if @v was not @u, and zero otherwise.
206 */
207static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
208{
209 long c, old;
210 c = atomic64_read(v);
211 for (;;) {
212 if (unlikely(c == (u)))
213 break;
214 old = atomic64_cmpxchg((v), c, c + (a));
215 if (likely(old == c))
216 break;
217 c = old;
218 }
219 return c != (u);
220}
221
222#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
223
224#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
deleted file mode 100644
index dc5a667ff791..000000000000
--- a/arch/x86/include/asm/atomic_32.h
+++ /dev/null
@@ -1,415 +0,0 @@
1#ifndef _ASM_X86_ATOMIC_32_H
2#define _ASM_X86_ATOMIC_32_H
3
4#include <linux/compiler.h>
5#include <linux/types.h>
6#include <asm/processor.h>
7#include <asm/cmpxchg.h>
8
9/*
10 * Atomic operations that C can't guarantee us. Useful for
11 * resource counting etc..
12 */
13
14#define ATOMIC_INIT(i) { (i) }
15
16/**
17 * atomic_read - read atomic variable
18 * @v: pointer of type atomic_t
19 *
20 * Atomically reads the value of @v.
21 */
22static inline int atomic_read(const atomic_t *v)
23{
24 return v->counter;
25}
26
27/**
28 * atomic_set - set atomic variable
29 * @v: pointer of type atomic_t
30 * @i: required value
31 *
32 * Atomically sets the value of @v to @i.
33 */
34static inline void atomic_set(atomic_t *v, int i)
35{
36 v->counter = i;
37}
38
39/**
40 * atomic_add - add integer to atomic variable
41 * @i: integer value to add
42 * @v: pointer of type atomic_t
43 *
44 * Atomically adds @i to @v.
45 */
46static inline void atomic_add(int i, atomic_t *v)
47{
48 asm volatile(LOCK_PREFIX "addl %1,%0"
49 : "+m" (v->counter)
50 : "ir" (i));
51}
52
53/**
54 * atomic_sub - subtract integer from atomic variable
55 * @i: integer value to subtract
56 * @v: pointer of type atomic_t
57 *
58 * Atomically subtracts @i from @v.
59 */
60static inline void atomic_sub(int i, atomic_t *v)
61{
62 asm volatile(LOCK_PREFIX "subl %1,%0"
63 : "+m" (v->counter)
64 : "ir" (i));
65}
66
67/**
68 * atomic_sub_and_test - subtract value from variable and test result
69 * @i: integer value to subtract
70 * @v: pointer of type atomic_t
71 *
72 * Atomically subtracts @i from @v and returns
73 * true if the result is zero, or false for all
74 * other cases.
75 */
76static inline int atomic_sub_and_test(int i, atomic_t *v)
77{
78 unsigned char c;
79
80 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
81 : "+m" (v->counter), "=qm" (c)
82 : "ir" (i) : "memory");
83 return c;
84}
85
86/**
87 * atomic_inc - increment atomic variable
88 * @v: pointer of type atomic_t
89 *
90 * Atomically increments @v by 1.
91 */
92static inline void atomic_inc(atomic_t *v)
93{
94 asm volatile(LOCK_PREFIX "incl %0"
95 : "+m" (v->counter));
96}
97
98/**
99 * atomic_dec - decrement atomic variable
100 * @v: pointer of type atomic_t
101 *
102 * Atomically decrements @v by 1.
103 */
104static inline void atomic_dec(atomic_t *v)
105{
106 asm volatile(LOCK_PREFIX "decl %0"
107 : "+m" (v->counter));
108}
109
110/**
111 * atomic_dec_and_test - decrement and test
112 * @v: pointer of type atomic_t
113 *
114 * Atomically decrements @v by 1 and
115 * returns true if the result is 0, or false for all other
116 * cases.
117 */
118static inline int atomic_dec_and_test(atomic_t *v)
119{
120 unsigned char c;
121
122 asm volatile(LOCK_PREFIX "decl %0; sete %1"
123 : "+m" (v->counter), "=qm" (c)
124 : : "memory");
125 return c != 0;
126}
127
128/**
129 * atomic_inc_and_test - increment and test
130 * @v: pointer of type atomic_t
131 *
132 * Atomically increments @v by 1
133 * and returns true if the result is zero, or false for all
134 * other cases.
135 */
136static inline int atomic_inc_and_test(atomic_t *v)
137{
138 unsigned char c;
139
140 asm volatile(LOCK_PREFIX "incl %0; sete %1"
141 : "+m" (v->counter), "=qm" (c)
142 : : "memory");
143 return c != 0;
144}
145
146/**
147 * atomic_add_negative - add and test if negative
148 * @v: pointer of type atomic_t
149 * @i: integer value to add
150 *
151 * Atomically adds @i to @v and returns true
152 * if the result is negative, or false when
153 * result is greater than or equal to zero.
154 */
155static inline int atomic_add_negative(int i, atomic_t *v)
156{
157 unsigned char c;
158
159 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
160 : "+m" (v->counter), "=qm" (c)
161 : "ir" (i) : "memory");
162 return c;
163}
164
165/**
166 * atomic_add_return - add integer and return
167 * @v: pointer of type atomic_t
168 * @i: integer value to add
169 *
170 * Atomically adds @i to @v and returns @i + @v
171 */
172static inline int atomic_add_return(int i, atomic_t *v)
173{
174 int __i;
175#ifdef CONFIG_M386
176 unsigned long flags;
177 if (unlikely(boot_cpu_data.x86 <= 3))
178 goto no_xadd;
179#endif
180 /* Modern 486+ processor */
181 __i = i;
182 asm volatile(LOCK_PREFIX "xaddl %0, %1"
183 : "+r" (i), "+m" (v->counter)
184 : : "memory");
185 return i + __i;
186
187#ifdef CONFIG_M386
188no_xadd: /* Legacy 386 processor */
189 local_irq_save(flags);
190 __i = atomic_read(v);
191 atomic_set(v, i + __i);
192 local_irq_restore(flags);
193 return i + __i;
194#endif
195}
196
197/**
198 * atomic_sub_return - subtract integer and return
199 * @v: pointer of type atomic_t
200 * @i: integer value to subtract
201 *
202 * Atomically subtracts @i from @v and returns @v - @i
203 */
204static inline int atomic_sub_return(int i, atomic_t *v)
205{
206 return atomic_add_return(-i, v);
207}
208
209static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
210{
211 return cmpxchg(&v->counter, old, new);
212}
213
214static inline int atomic_xchg(atomic_t *v, int new)
215{
216 return xchg(&v->counter, new);
217}
218
219/**
220 * atomic_add_unless - add unless the number is already a given value
221 * @v: pointer of type atomic_t
222 * @a: the amount to add to v...
223 * @u: ...unless v is equal to u.
224 *
225 * Atomically adds @a to @v, so long as @v was not already @u.
226 * Returns non-zero if @v was not @u, and zero otherwise.
227 */
228static inline int atomic_add_unless(atomic_t *v, int a, int u)
229{
230 int c, old;
231 c = atomic_read(v);
232 for (;;) {
233 if (unlikely(c == (u)))
234 break;
235 old = atomic_cmpxchg((v), c, c + (a));
236 if (likely(old == c))
237 break;
238 c = old;
239 }
240 return c != (u);
241}
242
243#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
244
245#define atomic_inc_return(v) (atomic_add_return(1, v))
246#define atomic_dec_return(v) (atomic_sub_return(1, v))
247
248/* These are x86-specific, used by some header files */
249#define atomic_clear_mask(mask, addr) \
250 asm volatile(LOCK_PREFIX "andl %0,%1" \
251 : : "r" (~(mask)), "m" (*(addr)) : "memory")
252
253#define atomic_set_mask(mask, addr) \
254 asm volatile(LOCK_PREFIX "orl %0,%1" \
255 : : "r" (mask), "m" (*(addr)) : "memory")
256
257/* Atomic operations are already serializing on x86 */
258#define smp_mb__before_atomic_dec() barrier()
259#define smp_mb__after_atomic_dec() barrier()
260#define smp_mb__before_atomic_inc() barrier()
261#define smp_mb__after_atomic_inc() barrier()
262
263/* An 64bit atomic type */
264
265typedef struct {
266 u64 __aligned(8) counter;
267} atomic64_t;
268
269#define ATOMIC64_INIT(val) { (val) }
270
271extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
272
273/**
274 * atomic64_xchg - xchg atomic64 variable
275 * @ptr: pointer to type atomic64_t
276 * @new_val: value to assign
277 *
278 * Atomically xchgs the value of @ptr to @new_val and returns
279 * the old value.
280 */
281extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
282
283/**
284 * atomic64_set - set atomic64 variable
285 * @ptr: pointer to type atomic64_t
286 * @new_val: value to assign
287 *
288 * Atomically sets the value of @ptr to @new_val.
289 */
290extern void atomic64_set(atomic64_t *ptr, u64 new_val);
291
292/**
293 * atomic64_read - read atomic64 variable
294 * @ptr: pointer to type atomic64_t
295 *
296 * Atomically reads the value of @ptr and returns it.
297 */
298static inline u64 atomic64_read(atomic64_t *ptr)
299{
300 u64 res;
301
302 /*
303 * Note, we inline this atomic64_t primitive because
304 * it only clobbers EAX/EDX and leaves the others
305 * untouched. We also (somewhat subtly) rely on the
306 * fact that cmpxchg8b returns the current 64-bit value
307 * of the memory location we are touching:
308 */
309 asm volatile(
310 "mov %%ebx, %%eax\n\t"
311 "mov %%ecx, %%edx\n\t"
312 LOCK_PREFIX "cmpxchg8b %1\n"
313 : "=&A" (res)
314 : "m" (*ptr)
315 );
316
317 return res;
318}
319
320extern u64 atomic64_read(atomic64_t *ptr);
321
322/**
323 * atomic64_add_return - add and return
324 * @delta: integer value to add
325 * @ptr: pointer to type atomic64_t
326 *
327 * Atomically adds @delta to @ptr and returns @delta + *@ptr
328 */
329extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
330
331/*
332 * Other variants with different arithmetic operators:
333 */
334extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
335extern u64 atomic64_inc_return(atomic64_t *ptr);
336extern u64 atomic64_dec_return(atomic64_t *ptr);
337
338/**
339 * atomic64_add - add integer to atomic64 variable
340 * @delta: integer value to add
341 * @ptr: pointer to type atomic64_t
342 *
343 * Atomically adds @delta to @ptr.
344 */
345extern void atomic64_add(u64 delta, atomic64_t *ptr);
346
347/**
348 * atomic64_sub - subtract the atomic64 variable
349 * @delta: integer value to subtract
350 * @ptr: pointer to type atomic64_t
351 *
352 * Atomically subtracts @delta from @ptr.
353 */
354extern void atomic64_sub(u64 delta, atomic64_t *ptr);
355
356/**
357 * atomic64_sub_and_test - subtract value from variable and test result
358 * @delta: integer value to subtract
359 * @ptr: pointer to type atomic64_t
360 *
361 * Atomically subtracts @delta from @ptr and returns
362 * true if the result is zero, or false for all
363 * other cases.
364 */
365extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
366
367/**
368 * atomic64_inc - increment atomic64 variable
369 * @ptr: pointer to type atomic64_t
370 *
371 * Atomically increments @ptr by 1.
372 */
373extern void atomic64_inc(atomic64_t *ptr);
374
375/**
376 * atomic64_dec - decrement atomic64 variable
377 * @ptr: pointer to type atomic64_t
378 *
379 * Atomically decrements @ptr by 1.
380 */
381extern void atomic64_dec(atomic64_t *ptr);
382
383/**
384 * atomic64_dec_and_test - decrement and test
385 * @ptr: pointer to type atomic64_t
386 *
387 * Atomically decrements @ptr by 1 and
388 * returns true if the result is 0, or false for all other
389 * cases.
390 */
391extern int atomic64_dec_and_test(atomic64_t *ptr);
392
393/**
394 * atomic64_inc_and_test - increment and test
395 * @ptr: pointer to type atomic64_t
396 *
397 * Atomically increments @ptr by 1
398 * and returns true if the result is zero, or false for all
399 * other cases.
400 */
401extern int atomic64_inc_and_test(atomic64_t *ptr);
402
403/**
404 * atomic64_add_negative - add and test if negative
405 * @delta: integer value to add
406 * @ptr: pointer to type atomic64_t
407 *
408 * Atomically adds @delta to @ptr and returns true
409 * if the result is negative, or false when
410 * result is greater than or equal to zero.
411 */
412extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
413
414#include <asm-generic/atomic-long.h>
415#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
deleted file mode 100644
index d605dc268e79..000000000000
--- a/arch/x86/include/asm/atomic_64.h
+++ /dev/null
@@ -1,485 +0,0 @@
1#ifndef _ASM_X86_ATOMIC_64_H
2#define _ASM_X86_ATOMIC_64_H
3
4#include <linux/types.h>
5#include <asm/alternative.h>
6#include <asm/cmpxchg.h>
7
8/*
9 * Atomic operations that C can't guarantee us. Useful for
10 * resource counting etc..
11 */
12
13#define ATOMIC_INIT(i) { (i) }
14
15/**
16 * atomic_read - read atomic variable
17 * @v: pointer of type atomic_t
18 *
19 * Atomically reads the value of @v.
20 */
21static inline int atomic_read(const atomic_t *v)
22{
23 return v->counter;
24}
25
26/**
27 * atomic_set - set atomic variable
28 * @v: pointer of type atomic_t
29 * @i: required value
30 *
31 * Atomically sets the value of @v to @i.
32 */
33static inline void atomic_set(atomic_t *v, int i)
34{
35 v->counter = i;
36}
37
38/**
39 * atomic_add - add integer to atomic variable
40 * @i: integer value to add
41 * @v: pointer of type atomic_t
42 *
43 * Atomically adds @i to @v.
44 */
45static inline void atomic_add(int i, atomic_t *v)
46{
47 asm volatile(LOCK_PREFIX "addl %1,%0"
48 : "=m" (v->counter)
49 : "ir" (i), "m" (v->counter));
50}
51
52/**
53 * atomic_sub - subtract the atomic variable
54 * @i: integer value to subtract
55 * @v: pointer of type atomic_t
56 *
57 * Atomically subtracts @i from @v.
58 */
59static inline void atomic_sub(int i, atomic_t *v)
60{
61 asm volatile(LOCK_PREFIX "subl %1,%0"
62 : "=m" (v->counter)
63 : "ir" (i), "m" (v->counter));
64}
65
66/**
67 * atomic_sub_and_test - subtract value from variable and test result
68 * @i: integer value to subtract
69 * @v: pointer of type atomic_t
70 *
71 * Atomically subtracts @i from @v and returns
72 * true if the result is zero, or false for all
73 * other cases.
74 */
75static inline int atomic_sub_and_test(int i, atomic_t *v)
76{
77 unsigned char c;
78
79 asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
80 : "=m" (v->counter), "=qm" (c)
81 : "ir" (i), "m" (v->counter) : "memory");
82 return c;
83}
84
85/**
86 * atomic_inc - increment atomic variable
87 * @v: pointer of type atomic_t
88 *
89 * Atomically increments @v by 1.
90 */
91static inline void atomic_inc(atomic_t *v)
92{
93 asm volatile(LOCK_PREFIX "incl %0"
94 : "=m" (v->counter)
95 : "m" (v->counter));
96}
97
98/**
99 * atomic_dec - decrement atomic variable
100 * @v: pointer of type atomic_t
101 *
102 * Atomically decrements @v by 1.
103 */
104static inline void atomic_dec(atomic_t *v)
105{
106 asm volatile(LOCK_PREFIX "decl %0"
107 : "=m" (v->counter)
108 : "m" (v->counter));
109}
110
111/**
112 * atomic_dec_and_test - decrement and test
113 * @v: pointer of type atomic_t
114 *
115 * Atomically decrements @v by 1 and
116 * returns true if the result is 0, or false for all other
117 * cases.
118 */
119static inline int atomic_dec_and_test(atomic_t *v)
120{
121 unsigned char c;
122
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
124 : "=m" (v->counter), "=qm" (c)
125 : "m" (v->counter) : "memory");
126 return c != 0;
127}
128
129/**
130 * atomic_inc_and_test - increment and test
131 * @v: pointer of type atomic_t
132 *
133 * Atomically increments @v by 1
134 * and returns true if the result is zero, or false for all
135 * other cases.
136 */
137static inline int atomic_inc_and_test(atomic_t *v)
138{
139 unsigned char c;
140
141 asm volatile(LOCK_PREFIX "incl %0; sete %1"
142 : "=m" (v->counter), "=qm" (c)
143 : "m" (v->counter) : "memory");
144 return c != 0;
145}
146
147/**
148 * atomic_add_negative - add and test if negative
149 * @i: integer value to add
150 * @v: pointer of type atomic_t
151 *
152 * Atomically adds @i to @v and returns true
153 * if the result is negative, or false when
154 * result is greater than or equal to zero.
155 */
156static inline int atomic_add_negative(int i, atomic_t *v)
157{
158 unsigned char c;
159
160 asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
161 : "=m" (v->counter), "=qm" (c)
162 : "ir" (i), "m" (v->counter) : "memory");
163 return c;
164}
165
166/**
167 * atomic_add_return - add and return
168 * @i: integer value to add
169 * @v: pointer of type atomic_t
170 *
171 * Atomically adds @i to @v and returns @i + @v
172 */
173static inline int atomic_add_return(int i, atomic_t *v)
174{
175 int __i = i;
176 asm volatile(LOCK_PREFIX "xaddl %0, %1"
177 : "+r" (i), "+m" (v->counter)
178 : : "memory");
179 return i + __i;
180}
181
182static inline int atomic_sub_return(int i, atomic_t *v)
183{
184 return atomic_add_return(-i, v);
185}
186
187#define atomic_inc_return(v) (atomic_add_return(1, v))
188#define atomic_dec_return(v) (atomic_sub_return(1, v))
189
190/* The 64-bit atomic type */
191
192#define ATOMIC64_INIT(i) { (i) }
193
194/**
195 * atomic64_read - read atomic64 variable
196 * @v: pointer of type atomic64_t
197 *
198 * Atomically reads the value of @v.
199 * Doesn't imply a read memory barrier.
200 */
201static inline long atomic64_read(const atomic64_t *v)
202{
203 return v->counter;
204}
205
206/**
207 * atomic64_set - set atomic64 variable
208 * @v: pointer to type atomic64_t
209 * @i: required value
210 *
211 * Atomically sets the value of @v to @i.
212 */
213static inline void atomic64_set(atomic64_t *v, long i)
214{
215 v->counter = i;
216}
217
218/**
219 * atomic64_add - add integer to atomic64 variable
220 * @i: integer value to add
221 * @v: pointer to type atomic64_t
222 *
223 * Atomically adds @i to @v.
224 */
225static inline void atomic64_add(long i, atomic64_t *v)
226{
227 asm volatile(LOCK_PREFIX "addq %1,%0"
228 : "=m" (v->counter)
229 : "er" (i), "m" (v->counter));
230}
231
232/**
233 * atomic64_sub - subtract the atomic64 variable
234 * @i: integer value to subtract
235 * @v: pointer to type atomic64_t
236 *
237 * Atomically subtracts @i from @v.
238 */
239static inline void atomic64_sub(long i, atomic64_t *v)
240{
241 asm volatile(LOCK_PREFIX "subq %1,%0"
242 : "=m" (v->counter)
243 : "er" (i), "m" (v->counter));
244}
245
246/**
247 * atomic64_sub_and_test - subtract value from variable and test result
248 * @i: integer value to subtract
249 * @v: pointer to type atomic64_t
250 *
251 * Atomically subtracts @i from @v and returns
252 * true if the result is zero, or false for all
253 * other cases.
254 */
255static inline int atomic64_sub_and_test(long i, atomic64_t *v)
256{
257 unsigned char c;
258
259 asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
260 : "=m" (v->counter), "=qm" (c)
261 : "er" (i), "m" (v->counter) : "memory");
262 return c;
263}
264
265/**
266 * atomic64_inc - increment atomic64 variable
267 * @v: pointer to type atomic64_t
268 *
269 * Atomically increments @v by 1.
270 */
271static inline void atomic64_inc(atomic64_t *v)
272{
273 asm volatile(LOCK_PREFIX "incq %0"
274 : "=m" (v->counter)
275 : "m" (v->counter));
276}
277
278/**
279 * atomic64_dec - decrement atomic64 variable
280 * @v: pointer to type atomic64_t
281 *
282 * Atomically decrements @v by 1.
283 */
284static inline void atomic64_dec(atomic64_t *v)
285{
286 asm volatile(LOCK_PREFIX "decq %0"
287 : "=m" (v->counter)
288 : "m" (v->counter));
289}
290
291/**
292 * atomic64_dec_and_test - decrement and test
293 * @v: pointer to type atomic64_t
294 *
295 * Atomically decrements @v by 1 and
296 * returns true if the result is 0, or false for all other
297 * cases.
298 */
299static inline int atomic64_dec_and_test(atomic64_t *v)
300{
301 unsigned char c;
302
303 asm volatile(LOCK_PREFIX "decq %0; sete %1"
304 : "=m" (v->counter), "=qm" (c)
305 : "m" (v->counter) : "memory");
306 return c != 0;
307}
308
309/**
310 * atomic64_inc_and_test - increment and test
311 * @v: pointer to type atomic64_t
312 *
313 * Atomically increments @v by 1
314 * and returns true if the result is zero, or false for all
315 * other cases.
316 */
317static inline int atomic64_inc_and_test(atomic64_t *v)
318{
319 unsigned char c;
320
321 asm volatile(LOCK_PREFIX "incq %0; sete %1"
322 : "=m" (v->counter), "=qm" (c)
323 : "m" (v->counter) : "memory");
324 return c != 0;
325}
326
327/**
328 * atomic64_add_negative - add and test if negative
329 * @i: integer value to add
330 * @v: pointer to type atomic64_t
331 *
332 * Atomically adds @i to @v and returns true
333 * if the result is negative, or false when
334 * result is greater than or equal to zero.
335 */
336static inline int atomic64_add_negative(long i, atomic64_t *v)
337{
338 unsigned char c;
339
340 asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
341 : "=m" (v->counter), "=qm" (c)
342 : "er" (i), "m" (v->counter) : "memory");
343 return c;
344}
345
346/**
347 * atomic64_add_return - add and return
348 * @i: integer value to add
349 * @v: pointer to type atomic64_t
350 *
351 * Atomically adds @i to @v and returns @i + @v
352 */
353static inline long atomic64_add_return(long i, atomic64_t *v)
354{
355 long __i = i;
356 asm volatile(LOCK_PREFIX "xaddq %0, %1;"
357 : "+r" (i), "+m" (v->counter)
358 : : "memory");
359 return i + __i;
360}
361
362static inline long atomic64_sub_return(long i, atomic64_t *v)
363{
364 return atomic64_add_return(-i, v);
365}
366
367#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
368#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
369
370static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
371{
372 return cmpxchg(&v->counter, old, new);
373}
374
375static inline long atomic64_xchg(atomic64_t *v, long new)
376{
377 return xchg(&v->counter, new);
378}
379
380static inline long atomic_cmpxchg(atomic_t *v, int old, int new)
381{
382 return cmpxchg(&v->counter, old, new);
383}
384
385static inline long atomic_xchg(atomic_t *v, int new)
386{
387 return xchg(&v->counter, new);
388}
389
390/**
391 * atomic_add_unless - add unless the number is a given value
392 * @v: pointer of type atomic_t
393 * @a: the amount to add to v...
394 * @u: ...unless v is equal to u.
395 *
396 * Atomically adds @a to @v, so long as it was not @u.
397 * Returns non-zero if @v was not @u, and zero otherwise.
398 */
399static inline int atomic_add_unless(atomic_t *v, int a, int u)
400{
401 int c, old;
402 c = atomic_read(v);
403 for (;;) {
404 if (unlikely(c == (u)))
405 break;
406 old = atomic_cmpxchg((v), c, c + (a));
407 if (likely(old == c))
408 break;
409 c = old;
410 }
411 return c != (u);
412}
413
414#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
415
416/**
417 * atomic64_add_unless - add unless the number is a given value
418 * @v: pointer of type atomic64_t
419 * @a: the amount to add to v...
420 * @u: ...unless v is equal to u.
421 *
422 * Atomically adds @a to @v, so long as it was not @u.
423 * Returns non-zero if @v was not @u, and zero otherwise.
424 */
425static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
426{
427 long c, old;
428 c = atomic64_read(v);
429 for (;;) {
430 if (unlikely(c == (u)))
431 break;
432 old = atomic64_cmpxchg((v), c, c + (a));
433 if (likely(old == c))
434 break;
435 c = old;
436 }
437 return c != (u);
438}
439
440/**
441 * atomic_inc_short - increment of a short integer
442 * @v: pointer to type int
443 *
444 * Atomically adds 1 to @v
445 * Returns the new value of @u
446 */
447static inline short int atomic_inc_short(short int *v)
448{
449 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
450 return *v;
451}
452
453/**
454 * atomic_or_long - OR of two long integers
455 * @v1: pointer to type unsigned long
456 * @v2: pointer to type unsigned long
457 *
458 * Atomically ORs @v1 and @v2
459 * Returns the result of the OR
460 */
461static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
462{
463 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
464}
465
466#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
467
468/* These are x86-specific, used by some header files */
469#define atomic_clear_mask(mask, addr) \
470 asm volatile(LOCK_PREFIX "andl %0,%1" \
471 : : "r" (~(mask)), "m" (*(addr)) : "memory")
472
473#define atomic_set_mask(mask, addr) \
474 asm volatile(LOCK_PREFIX "orl %0,%1" \
475 : : "r" ((unsigned)(mask)), "m" (*(addr)) \
476 : "memory")
477
478/* Atomic operations are already serializing on x86 */
479#define smp_mb__before_atomic_dec() barrier()
480#define smp_mb__after_atomic_dec() barrier()
481#define smp_mb__before_atomic_inc() barrier()
482#define smp_mb__after_atomic_inc() barrier()
483
484#include <asm-generic/atomic-long.h>
485#endif /* _ASM_X86_ATOMIC_64_H */
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h
deleted file mode 100644
index d96c1ee3a95c..000000000000
--- a/arch/x86/include/asm/cpu_debug.h
+++ /dev/null
@@ -1,127 +0,0 @@
1#ifndef _ASM_X86_CPU_DEBUG_H
2#define _ASM_X86_CPU_DEBUG_H
3
4/*
5 * CPU x86 architecture debug
6 *
7 * Copyright(C) 2009 Jaswinder Singh Rajput
8 */
9
10/* Register flags */
11enum cpu_debug_bit {
12/* Model Specific Registers (MSRs) */
13 CPU_MC_BIT, /* Machine Check */
14 CPU_MONITOR_BIT, /* Monitor */
15 CPU_TIME_BIT, /* Time */
16 CPU_PMC_BIT, /* Performance Monitor */
17 CPU_PLATFORM_BIT, /* Platform */
18 CPU_APIC_BIT, /* APIC */
19 CPU_POWERON_BIT, /* Power-on */
20 CPU_CONTROL_BIT, /* Control */
21 CPU_FEATURES_BIT, /* Features control */
22 CPU_LBRANCH_BIT, /* Last Branch */
23 CPU_BIOS_BIT, /* BIOS */
24 CPU_FREQ_BIT, /* Frequency */
25 CPU_MTTR_BIT, /* MTRR */
26 CPU_PERF_BIT, /* Performance */
27 CPU_CACHE_BIT, /* Cache */
28 CPU_SYSENTER_BIT, /* Sysenter */
29 CPU_THERM_BIT, /* Thermal */
30 CPU_MISC_BIT, /* Miscellaneous */
31 CPU_DEBUG_BIT, /* Debug */
32 CPU_PAT_BIT, /* PAT */
33 CPU_VMX_BIT, /* VMX */
34 CPU_CALL_BIT, /* System Call */
35 CPU_BASE_BIT, /* BASE Address */
36 CPU_VER_BIT, /* Version ID */
37 CPU_CONF_BIT, /* Configuration */
38 CPU_SMM_BIT, /* System mgmt mode */
39 CPU_SVM_BIT, /*Secure Virtual Machine*/
40 CPU_OSVM_BIT, /* OS-Visible Workaround*/
41/* Standard Registers */
42 CPU_TSS_BIT, /* Task Stack Segment */
43 CPU_CR_BIT, /* Control Registers */
44 CPU_DT_BIT, /* Descriptor Table */
45/* End of Registers flags */
46 CPU_REG_ALL_BIT, /* Select all Registers */
47};
48
49#define CPU_REG_ALL (~0) /* Select all Registers */
50
51#define CPU_MC (1 << CPU_MC_BIT)
52#define CPU_MONITOR (1 << CPU_MONITOR_BIT)
53#define CPU_TIME (1 << CPU_TIME_BIT)
54#define CPU_PMC (1 << CPU_PMC_BIT)
55#define CPU_PLATFORM (1 << CPU_PLATFORM_BIT)
56#define CPU_APIC (1 << CPU_APIC_BIT)
57#define CPU_POWERON (1 << CPU_POWERON_BIT)
58#define CPU_CONTROL (1 << CPU_CONTROL_BIT)
59#define CPU_FEATURES (1 << CPU_FEATURES_BIT)
60#define CPU_LBRANCH (1 << CPU_LBRANCH_BIT)
61#define CPU_BIOS (1 << CPU_BIOS_BIT)
62#define CPU_FREQ (1 << CPU_FREQ_BIT)
63#define CPU_MTRR (1 << CPU_MTTR_BIT)
64#define CPU_PERF (1 << CPU_PERF_BIT)
65#define CPU_CACHE (1 << CPU_CACHE_BIT)
66#define CPU_SYSENTER (1 << CPU_SYSENTER_BIT)
67#define CPU_THERM (1 << CPU_THERM_BIT)
68#define CPU_MISC (1 << CPU_MISC_BIT)
69#define CPU_DEBUG (1 << CPU_DEBUG_BIT)
70#define CPU_PAT (1 << CPU_PAT_BIT)
71#define CPU_VMX (1 << CPU_VMX_BIT)
72#define CPU_CALL (1 << CPU_CALL_BIT)
73#define CPU_BASE (1 << CPU_BASE_BIT)
74#define CPU_VER (1 << CPU_VER_BIT)
75#define CPU_CONF (1 << CPU_CONF_BIT)
76#define CPU_SMM (1 << CPU_SMM_BIT)
77#define CPU_SVM (1 << CPU_SVM_BIT)
78#define CPU_OSVM (1 << CPU_OSVM_BIT)
79#define CPU_TSS (1 << CPU_TSS_BIT)
80#define CPU_CR (1 << CPU_CR_BIT)
81#define CPU_DT (1 << CPU_DT_BIT)
82
83/* Register file flags */
84enum cpu_file_bit {
85 CPU_INDEX_BIT, /* index */
86 CPU_VALUE_BIT, /* value */
87};
88
89#define CPU_FILE_VALUE (1 << CPU_VALUE_BIT)
90
91#define MAX_CPU_FILES 512
92
93struct cpu_private {
94 unsigned cpu;
95 unsigned type;
96 unsigned reg;
97 unsigned file;
98};
99
100struct cpu_debug_base {
101 char *name; /* Register name */
102 unsigned flag; /* Register flag */
103 unsigned write; /* Register write flag */
104};
105
106/*
107 * Currently it looks similar to cpu_debug_base but once we add more files
108 * cpu_file_base will go in different direction
109 */
110struct cpu_file_base {
111 char *name; /* Register file name */
112 unsigned flag; /* Register file flag */
113 unsigned write; /* Register write flag */
114};
115
116struct cpu_cpuX_base {
117 struct dentry *dentry; /* Register dentry */
118 int init; /* Register index file */
119};
120
121struct cpu_debug_range {
122 unsigned min; /* Register range min */
123 unsigned max; /* Register range max */
124 unsigned flag; /* Supported flags */
125};
126
127#endif /* _ASM_X86_CPU_DEBUG_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 637e1ec963c3..0cd82d068613 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,10 @@
168#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */ 168#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
169#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */ 169#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */
170#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */ 170#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */
171#define X86_FEATURE_NPT (8*32+5) /* AMD Nested Page Table support */
172#define X86_FEATURE_LBRV (8*32+6) /* AMD LBR Virtualization support */
173#define X86_FEATURE_SVML (8*32+7) /* "svm_lock" AMD SVM locking MSR */
174#define X86_FEATURE_NRIPS (8*32+8) /* "nrip_save" AMD SVM next_rip save */
171 175
172#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 176#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
173 177
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 8240f76b531e..b81002f23614 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -14,6 +14,9 @@
14 which debugging register was responsible for the trap. The other bits 14 which debugging register was responsible for the trap. The other bits
15 are either reserved or not of interest to us. */ 15 are either reserved or not of interest to us. */
16 16
17/* Define reserved bits in DR6 which are always set to 1 */
18#define DR6_RESERVED (0xFFFF0FF0)
19
17#define DR_TRAP0 (0x1) /* db0 */ 20#define DR_TRAP0 (0x1) /* db0 */
18#define DR_TRAP1 (0x2) /* db1 */ 21#define DR_TRAP1 (0x2) /* db1 */
19#define DR_TRAP2 (0x4) /* db2 */ 22#define DR_TRAP2 (0x4) /* db2 */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index b4501ee223ad..f2ad2163109d 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t,
170} 170}
171 171
172#define ELF_PLAT_INIT(_r, load_addr) \ 172#define ELF_PLAT_INIT(_r, load_addr) \
173do { \ 173 elf_common_init(&current->thread, _r, 0)
174 elf_common_init(&current->thread, _r, 0); \
175 clear_thread_flag(TIF_IA32); \
176} while (0)
177 174
178#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ 175#define COMPAT_ELF_PLAT_INIT(regs, load_addr) \
179 elf_common_init(&current->thread, regs, __USER_DS) 176 elf_common_init(&current->thread, regs, __USER_DS)
@@ -181,14 +178,8 @@ do { \
181void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp); 178void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp);
182#define compat_start_thread start_thread_ia32 179#define compat_start_thread start_thread_ia32
183 180
184#define COMPAT_SET_PERSONALITY(ex) \ 181void set_personality_ia32(void);
185do { \ 182#define COMPAT_SET_PERSONALITY(ex) set_personality_ia32()
186 if (test_thread_flag(TIF_IA32)) \
187 clear_thread_flag(TIF_ABI_PENDING); \
188 else \
189 set_thread_flag(TIF_ABI_PENDING); \
190 current->personality |= force_personality32; \
191} while (0)
192 183
193#define COMPAT_ELF_PLATFORM ("i686") 184#define COMPAT_ELF_PLATFORM ("i686")
194 185
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
index 53018464aea6..2519d0679d99 100644
--- a/arch/x86/include/asm/fb.h
+++ b/arch/x86/include/asm/fb.h
@@ -12,10 +12,6 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
12 pgprot_val(vma->vm_page_prot) |= _PAGE_PCD; 12 pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
13} 13}
14 14
15#ifdef CONFIG_X86_32
16extern int fb_is_primary_device(struct fb_info *info); 15extern int fb_is_primary_device(struct fb_info *info);
17#else
18static inline int fb_is_primary_device(struct fb_info *info) { return 0; }
19#endif
20 16
21#endif /* _ASM_X86_FB_H */ 17#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 14f9890eb495..635f03bb4995 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -118,14 +118,20 @@ enum fixed_addresses {
118 * 256 temporary boot-time mappings, used by early_ioremap(), 118 * 256 temporary boot-time mappings, used by early_ioremap(),
119 * before ioremap() is functional. 119 * before ioremap() is functional.
120 * 120 *
121 * We round it up to the next 256 pages boundary so that we 121 * If necessary we round it up to the next 256 pages boundary so
122 * can have a single pgd entry and a single pte table: 122 * that we can have a single pgd entry and a single pte table:
123 */ 123 */
124#define NR_FIX_BTMAPS 64 124#define NR_FIX_BTMAPS 64
125#define FIX_BTMAPS_SLOTS 4 125#define FIX_BTMAPS_SLOTS 4
126 FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - 126#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
127 (__end_of_permanent_fixed_addresses & 255), 127 FIX_BTMAP_END =
128 FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, 128 (__end_of_permanent_fixed_addresses ^
129 (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
130 -PTRS_PER_PTE
131 ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
132 (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
133 : __end_of_permanent_fixed_addresses,
134 FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
129#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT 135#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
130 FIX_OHCI1394_BASE, 136 FIX_OHCI1394_BASE,
131#endif 137#endif
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5d89fd2a3690..1d5c08a1bdfd 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -67,6 +67,7 @@ extern unsigned long hpet_address;
67extern unsigned long force_hpet_address; 67extern unsigned long force_hpet_address;
68extern u8 hpet_blockid; 68extern u8 hpet_blockid;
69extern int hpet_force_user; 69extern int hpet_force_user;
70extern u8 hpet_msi_disable;
70extern int is_hpet_enabled(void); 71extern int is_hpet_enabled(void);
71extern int hpet_enable(void); 72extern int hpet_enable(void);
72extern void hpet_disable(void); 73extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index ebfb8a9e11f7..da2930924501 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -33,8 +33,16 @@ extern void init_thread_xstate(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 34
35extern user_regset_active_fn fpregs_active, xfpregs_active; 35extern user_regset_active_fn fpregs_active, xfpregs_active;
36extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 36extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
37extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; 37 xstateregs_get;
38extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
39 xstateregs_set;
40
41/*
42 * xstateregs_active == fpregs_active. Please refer to the comment
43 * at the definition of fpregs_active.
44 */
45#define xstateregs_active fpregs_active
38 46
39extern struct _fpx_sw_bytes fx_sw_reserved; 47extern struct _fpx_sw_bytes fx_sw_reserved;
40#ifdef CONFIG_IA32_EMULATION 48#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 73739322b6d0..a1dcfa3ab17d 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -1,8 +1,42 @@
1#ifndef _ASM_X86_IO_H 1#ifndef _ASM_X86_IO_H
2#define _ASM_X86_IO_H 2#define _ASM_X86_IO_H
3 3
4/*
5 * This file contains the definitions for the x86 IO instructions
6 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
7 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
8 * versions of the single-IO instructions (inb_p/inw_p/..).
9 *
10 * This file is not meant to be obfuscating: it's just complicated
11 * to (a) handle it all in a way that makes gcc able to optimize it
12 * as well as possible and (b) trying to avoid writing the same thing
13 * over and over again with slight variations and possibly making a
14 * mistake somewhere.
15 */
16
17/*
18 * Thanks to James van Artsdalen for a better timing-fix than
19 * the two short jumps: using outb's to a nonexistent port seems
20 * to guarantee better timings even on fast machines.
21 *
22 * On the other hand, I'd like to be sure of a non-existent port:
23 * I feel a bit unsafe about using 0x80 (should be safe, though)
24 *
25 * Linus
26 */
27
28 /*
29 * Bit simplified and optimized by Jan Hubicka
30 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
31 *
32 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
33 * isa_read[wl] and isa_write[wl] fixed
34 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
35 */
36
4#define ARCH_HAS_IOREMAP_WC 37#define ARCH_HAS_IOREMAP_WC
5 38
39#include <linux/string.h>
6#include <linux/compiler.h> 40#include <linux/compiler.h>
7#include <asm-generic/int-ll64.h> 41#include <asm-generic/int-ll64.h>
8#include <asm/page.h> 42#include <asm/page.h>
@@ -173,11 +207,126 @@ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
173extern void iounmap(volatile void __iomem *addr); 207extern void iounmap(volatile void __iomem *addr);
174 208
175 209
176#ifdef CONFIG_X86_32 210#ifdef __KERNEL__
177# include "io_32.h" 211
212#include <asm-generic/iomap.h>
213
214#include <linux/vmalloc.h>
215
216/*
217 * Convert a virtual cached pointer to an uncached pointer
218 */
219#define xlate_dev_kmem_ptr(p) p
220
221static inline void
222memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
223{
224 memset((void __force *)addr, val, count);
225}
226
227static inline void
228memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
229{
230 memcpy(dst, (const void __force *)src, count);
231}
232
233static inline void
234memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
235{
236 memcpy((void __force *)dst, src, count);
237}
238
239/*
240 * ISA space is 'always mapped' on a typical x86 system, no need to
241 * explicitly ioremap() it. The fact that the ISA IO space is mapped
242 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
243 * are physical addresses. The following constant pointer can be
244 * used as the IO-area pointer (it can be iounmapped as well, so the
245 * analogy with PCI is quite large):
246 */
247#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
248
249/*
250 * Cache management
251 *
252 * This needed for two cases
253 * 1. Out of order aware processors
254 * 2. Accidentally out of order processors (PPro errata #51)
255 */
256
257static inline void flush_write_buffers(void)
258{
259#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
260 asm volatile("lock; addl $0,0(%%esp)": : :"memory");
261#endif
262}
263
264#endif /* __KERNEL__ */
265
266extern void native_io_delay(void);
267
268extern int io_delay_type;
269extern void io_delay_init(void);
270
271#if defined(CONFIG_PARAVIRT)
272#include <asm/paravirt.h>
178#else 273#else
179# include "io_64.h" 274
275static inline void slow_down_io(void)
276{
277 native_io_delay();
278#ifdef REALLY_SLOW_IO
279 native_io_delay();
280 native_io_delay();
281 native_io_delay();
180#endif 282#endif
283}
284
285#endif
286
287#define BUILDIO(bwl, bw, type) \
288static inline void out##bwl(unsigned type value, int port) \
289{ \
290 asm volatile("out" #bwl " %" #bw "0, %w1" \
291 : : "a"(value), "Nd"(port)); \
292} \
293 \
294static inline unsigned type in##bwl(int port) \
295{ \
296 unsigned type value; \
297 asm volatile("in" #bwl " %w1, %" #bw "0" \
298 : "=a"(value) : "Nd"(port)); \
299 return value; \
300} \
301 \
302static inline void out##bwl##_p(unsigned type value, int port) \
303{ \
304 out##bwl(value, port); \
305 slow_down_io(); \
306} \
307 \
308static inline unsigned type in##bwl##_p(int port) \
309{ \
310 unsigned type value = in##bwl(port); \
311 slow_down_io(); \
312 return value; \
313} \
314 \
315static inline void outs##bwl(int port, const void *addr, unsigned long count) \
316{ \
317 asm volatile("rep; outs" #bwl \
318 : "+S"(addr), "+c"(count) : "d"(port)); \
319} \
320 \
321static inline void ins##bwl(int port, void *addr, unsigned long count) \
322{ \
323 asm volatile("rep; ins" #bwl \
324 : "+D"(addr), "+c"(count) : "d"(port)); \
325}
326
327BUILDIO(b, b, char)
328BUILDIO(w, w, short)
329BUILDIO(l, , int)
181 330
182extern void *xlate_dev_mem_ptr(unsigned long phys); 331extern void *xlate_dev_mem_ptr(unsigned long phys);
183extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); 332extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
diff --git a/arch/x86/include/asm/io_32.h b/arch/x86/include/asm/io_32.h
deleted file mode 100644
index a299900f5920..000000000000
--- a/arch/x86/include/asm/io_32.h
+++ /dev/null
@@ -1,196 +0,0 @@
1#ifndef _ASM_X86_IO_32_H
2#define _ASM_X86_IO_32_H
3
4#include <linux/string.h>
5#include <linux/compiler.h>
6
7/*
8 * This file contains the definitions for the x86 IO instructions
9 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
10 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
11 * versions of the single-IO instructions (inb_p/inw_p/..).
12 *
13 * This file is not meant to be obfuscating: it's just complicated
14 * to (a) handle it all in a way that makes gcc able to optimize it
15 * as well as possible and (b) trying to avoid writing the same thing
16 * over and over again with slight variations and possibly making a
17 * mistake somewhere.
18 */
19
20/*
21 * Thanks to James van Artsdalen for a better timing-fix than
22 * the two short jumps: using outb's to a nonexistent port seems
23 * to guarantee better timings even on fast machines.
24 *
25 * On the other hand, I'd like to be sure of a non-existent port:
26 * I feel a bit unsafe about using 0x80 (should be safe, though)
27 *
28 * Linus
29 */
30
31 /*
32 * Bit simplified and optimized by Jan Hubicka
33 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
34 *
35 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
36 * isa_read[wl] and isa_write[wl] fixed
37 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
38 */
39
40#define XQUAD_PORTIO_BASE 0xfe400000
41#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
42
43#ifdef __KERNEL__
44
45#include <asm-generic/iomap.h>
46
47#include <linux/vmalloc.h>
48
49/*
50 * Convert a virtual cached pointer to an uncached pointer
51 */
52#define xlate_dev_kmem_ptr(p) p
53
54static inline void
55memset_io(volatile void __iomem *addr, unsigned char val, int count)
56{
57 memset((void __force *)addr, val, count);
58}
59
60static inline void
61memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
62{
63 __memcpy(dst, (const void __force *)src, count);
64}
65
66static inline void
67memcpy_toio(volatile void __iomem *dst, const void *src, int count)
68{
69 __memcpy((void __force *)dst, src, count);
70}
71
72/*
73 * ISA space is 'always mapped' on a typical x86 system, no need to
74 * explicitly ioremap() it. The fact that the ISA IO space is mapped
75 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
76 * are physical addresses. The following constant pointer can be
77 * used as the IO-area pointer (it can be iounmapped as well, so the
78 * analogy with PCI is quite large):
79 */
80#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
81
82/*
83 * Cache management
84 *
85 * This needed for two cases
86 * 1. Out of order aware processors
87 * 2. Accidentally out of order processors (PPro errata #51)
88 */
89
90#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
91
92static inline void flush_write_buffers(void)
93{
94 asm volatile("lock; addl $0,0(%%esp)": : :"memory");
95}
96
97#else
98
99#define flush_write_buffers() do { } while (0)
100
101#endif
102
103#endif /* __KERNEL__ */
104
105extern void native_io_delay(void);
106
107extern int io_delay_type;
108extern void io_delay_init(void);
109
110#if defined(CONFIG_PARAVIRT)
111#include <asm/paravirt.h>
112#else
113
114static inline void slow_down_io(void)
115{
116 native_io_delay();
117#ifdef REALLY_SLOW_IO
118 native_io_delay();
119 native_io_delay();
120 native_io_delay();
121#endif
122}
123
124#endif
125
126#define __BUILDIO(bwl, bw, type) \
127static inline void out##bwl(unsigned type value, int port) \
128{ \
129 out##bwl##_local(value, port); \
130} \
131 \
132static inline unsigned type in##bwl(int port) \
133{ \
134 return in##bwl##_local(port); \
135}
136
137#define BUILDIO(bwl, bw, type) \
138static inline void out##bwl##_local(unsigned type value, int port) \
139{ \
140 asm volatile("out" #bwl " %" #bw "0, %w1" \
141 : : "a"(value), "Nd"(port)); \
142} \
143 \
144static inline unsigned type in##bwl##_local(int port) \
145{ \
146 unsigned type value; \
147 asm volatile("in" #bwl " %w1, %" #bw "0" \
148 : "=a"(value) : "Nd"(port)); \
149 return value; \
150} \
151 \
152static inline void out##bwl##_local_p(unsigned type value, int port) \
153{ \
154 out##bwl##_local(value, port); \
155 slow_down_io(); \
156} \
157 \
158static inline unsigned type in##bwl##_local_p(int port) \
159{ \
160 unsigned type value = in##bwl##_local(port); \
161 slow_down_io(); \
162 return value; \
163} \
164 \
165__BUILDIO(bwl, bw, type) \
166 \
167static inline void out##bwl##_p(unsigned type value, int port) \
168{ \
169 out##bwl(value, port); \
170 slow_down_io(); \
171} \
172 \
173static inline unsigned type in##bwl##_p(int port) \
174{ \
175 unsigned type value = in##bwl(port); \
176 slow_down_io(); \
177 return value; \
178} \
179 \
180static inline void outs##bwl(int port, const void *addr, unsigned long count) \
181{ \
182 asm volatile("rep; outs" #bwl \
183 : "+S"(addr), "+c"(count) : "d"(port)); \
184} \
185 \
186static inline void ins##bwl(int port, void *addr, unsigned long count) \
187{ \
188 asm volatile("rep; ins" #bwl \
189 : "+D"(addr), "+c"(count) : "d"(port)); \
190}
191
192BUILDIO(b, b, char)
193BUILDIO(w, w, short)
194BUILDIO(l, , int)
195
196#endif /* _ASM_X86_IO_32_H */
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h
deleted file mode 100644
index 244067893af4..000000000000
--- a/arch/x86/include/asm/io_64.h
+++ /dev/null
@@ -1,181 +0,0 @@
1#ifndef _ASM_X86_IO_64_H
2#define _ASM_X86_IO_64_H
3
4
5/*
6 * This file contains the definitions for the x86 IO instructions
7 * inb/inw/inl/outb/outw/outl and the "string versions" of the same
8 * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
9 * versions of the single-IO instructions (inb_p/inw_p/..).
10 *
11 * This file is not meant to be obfuscating: it's just complicated
12 * to (a) handle it all in a way that makes gcc able to optimize it
13 * as well as possible and (b) trying to avoid writing the same thing
14 * over and over again with slight variations and possibly making a
15 * mistake somewhere.
16 */
17
18/*
19 * Thanks to James van Artsdalen for a better timing-fix than
20 * the two short jumps: using outb's to a nonexistent port seems
21 * to guarantee better timings even on fast machines.
22 *
23 * On the other hand, I'd like to be sure of a non-existent port:
24 * I feel a bit unsafe about using 0x80 (should be safe, though)
25 *
26 * Linus
27 */
28
29 /*
30 * Bit simplified and optimized by Jan Hubicka
31 * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
32 *
33 * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
34 * isa_read[wl] and isa_write[wl] fixed
35 * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
36 */
37
38extern void native_io_delay(void);
39
40extern int io_delay_type;
41extern void io_delay_init(void);
42
43#if defined(CONFIG_PARAVIRT)
44#include <asm/paravirt.h>
45#else
46
47static inline void slow_down_io(void)
48{
49 native_io_delay();
50#ifdef REALLY_SLOW_IO
51 native_io_delay();
52 native_io_delay();
53 native_io_delay();
54#endif
55}
56#endif
57
58/*
59 * Talk about misusing macros..
60 */
61#define __OUT1(s, x) \
62static inline void out##s(unsigned x value, unsigned short port) {
63
64#define __OUT2(s, s1, s2) \
65asm volatile ("out" #s " %" s1 "0,%" s2 "1"
66
67#ifndef REALLY_SLOW_IO
68#define REALLY_SLOW_IO
69#define UNSET_REALLY_SLOW_IO
70#endif
71
72#define __OUT(s, s1, x) \
73 __OUT1(s, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
74 } \
75 __OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \
76 slow_down_io(); \
77}
78
79#define __IN1(s) \
80static inline RETURN_TYPE in##s(unsigned short port) \
81{ \
82 RETURN_TYPE _v;
83
84#define __IN2(s, s1, s2) \
85 asm volatile ("in" #s " %" s2 "1,%" s1 "0"
86
87#define __IN(s, s1, i...) \
88 __IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
89 return _v; \
90 } \
91 __IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \
92 slow_down_io(); \
93 return _v; }
94
95#ifdef UNSET_REALLY_SLOW_IO
96#undef REALLY_SLOW_IO
97#endif
98
99#define __INS(s) \
100static inline void ins##s(unsigned short port, void *addr, \
101 unsigned long count) \
102{ \
103 asm volatile ("rep ; ins" #s \
104 : "=D" (addr), "=c" (count) \
105 : "d" (port), "0" (addr), "1" (count)); \
106}
107
108#define __OUTS(s) \
109static inline void outs##s(unsigned short port, const void *addr, \
110 unsigned long count) \
111{ \
112 asm volatile ("rep ; outs" #s \
113 : "=S" (addr), "=c" (count) \
114 : "d" (port), "0" (addr), "1" (count)); \
115}
116
117#define RETURN_TYPE unsigned char
118__IN(b, "")
119#undef RETURN_TYPE
120#define RETURN_TYPE unsigned short
121__IN(w, "")
122#undef RETURN_TYPE
123#define RETURN_TYPE unsigned int
124__IN(l, "")
125#undef RETURN_TYPE
126
127__OUT(b, "b", char)
128__OUT(w, "w", short)
129__OUT(l, , int)
130
131__INS(b)
132__INS(w)
133__INS(l)
134
135__OUTS(b)
136__OUTS(w)
137__OUTS(l)
138
139#if defined(__KERNEL__) && defined(__x86_64__)
140
141#include <linux/vmalloc.h>
142
143#include <asm-generic/iomap.h>
144
145void __memcpy_fromio(void *, unsigned long, unsigned);
146void __memcpy_toio(unsigned long, const void *, unsigned);
147
148static inline void memcpy_fromio(void *to, const volatile void __iomem *from,
149 unsigned len)
150{
151 __memcpy_fromio(to, (unsigned long)from, len);
152}
153
154static inline void memcpy_toio(volatile void __iomem *to, const void *from,
155 unsigned len)
156{
157 __memcpy_toio((unsigned long)to, from, len);
158}
159
160void memset_io(volatile void __iomem *a, int b, size_t c);
161
162/*
163 * ISA space is 'always mapped' on a typical x86 system, no need to
164 * explicitly ioremap() it. The fact that the ISA IO space is mapped
165 * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
166 * are physical addresses. The following constant pointer can be
167 * used as the IO-area pointer (it can be iounmapped as well, so the
168 * analogy with PCI is quite large):
169 */
170#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
171
172#define flush_write_buffers()
173
174/*
175 * Convert a virtual cached pointer to an uncached pointer
176 */
177#define xlate_dev_kmem_ptr(p) p
178
179#endif /* __KERNEL__ */
180
181#endif /* _ASM_X86_IO_64_H */
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index c24ca9a56458..ef51b501e22a 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -12,8 +12,6 @@ struct device;
12enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; 12enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
13 13
14struct microcode_ops { 14struct microcode_ops {
15 void (*init)(struct device *device);
16 void (*fini)(void);
17 enum ucode_state (*request_microcode_user) (int cpu, 15 enum ucode_state (*request_microcode_user) (int cpu,
18 const void __user *buf, size_t size); 16 const void __user *buf, size_t size);
19 17
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index a29f48c2a322..288b96f815a6 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -39,11 +39,5 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 39#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ 40#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
41 NODE_DATA(nid)->node_spanned_pages) 41 NODE_DATA(nid)->node_spanned_pages)
42
43#ifdef CONFIG_NUMA_EMU
44#define FAKE_NODE_MIN_SIZE (64 * 1024 * 1024)
45#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
46#endif
47
48#endif 42#endif
49#endif /* _ASM_X86_MMZONE_64_H */ 43#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
19extern int check_nmi_watchdog(void); 19extern int check_nmi_watchdog(void);
20extern int nmi_watchdog_enabled; 20extern int nmi_watchdog_enabled;
21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); 21extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
22extern int avail_to_resrv_perfctr_nmi(unsigned int);
23extern int reserve_perfctr_nmi(unsigned int); 22extern int reserve_perfctr_nmi(unsigned int);
24extern void release_perfctr_nmi(unsigned int); 23extern void release_perfctr_nmi(unsigned int);
25extern int reserve_evntsel_nmi(unsigned int); 24extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index c4ae822e415f..823e070e7c26 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -36,6 +36,11 @@ extern void __cpuinit numa_set_node(int cpu, int node);
36extern void __cpuinit numa_clear_node(int cpu); 36extern void __cpuinit numa_clear_node(int cpu);
37extern void __cpuinit numa_add_cpu(int cpu); 37extern void __cpuinit numa_add_cpu(int cpu);
38extern void __cpuinit numa_remove_cpu(int cpu); 38extern void __cpuinit numa_remove_cpu(int cpu);
39
40#ifdef CONFIG_NUMA_EMU
41#define FAKE_NODE_MIN_SIZE ((u64)64 << 20)
42#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
43#endif /* CONFIG_NUMA_EMU */
39#else 44#else
40static inline void init_cpu_to_node(void) { } 45static inline void init_cpu_to_node(void) { }
41static inline void numa_set_node(int cpu, int node) { } 46static inline void numa_set_node(int cpu, int node) { }
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h
index 9f0a5f5d29ec..13370b95ea94 100644
--- a/arch/x86/include/asm/numaq.h
+++ b/arch/x86/include/asm/numaq.h
@@ -33,6 +33,10 @@ extern int get_memcfg_numaq(void);
33 33
34extern void *xquad_portio; 34extern void *xquad_portio;
35 35
36#define XQUAD_PORTIO_BASE 0xfe400000
37#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
38#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
39
36/* 40/*
37 * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the 41 * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
38 */ 42 */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 642fe34b36a2..a667f24c7254 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -40,7 +40,6 @@
40 40
41#ifndef __ASSEMBLY__ 41#ifndef __ASSEMBLY__
42 42
43extern int page_is_ram(unsigned long pagenr);
44extern int devmem_is_allowed(unsigned long pagenr); 43extern int devmem_is_allowed(unsigned long pagenr);
45 44
46extern unsigned long max_low_pfn_mapped; 45extern unsigned long max_low_pfn_mapped;
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b4bf9a942ed0..05b58ccb2e82 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -29,6 +29,7 @@
29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 29#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
30#define PCI_HAS_IO_ECS 0x40000 30#define PCI_HAS_IO_ECS 0x40000
31#define PCI_NOASSIGN_ROMS 0x80000 31#define PCI_NOASSIGN_ROMS 0x80000
32#define PCI_ROOT_NO_CRS 0x100000
32 33
33extern unsigned int pci_probe; 34extern unsigned int pci_probe;
34extern unsigned long pirq_table_addr; 35extern unsigned long pirq_table_addr;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 1380367dabd9..befd172c82ad 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -27,7 +27,14 @@
27/* 27/*
28 * Includes eventsel and unit mask as well: 28 * Includes eventsel and unit mask as well:
29 */ 29 */
30#define ARCH_PERFMON_EVENT_MASK 0xffff 30
31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL
35#define INTEL_ARCH_INV_MASK 0x00800000ULL
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK)
31 38
32/* 39/*
33 * filter mask to validate fixed counter events. 40 * filter mask to validate fixed counter events.
@@ -38,7 +45,12 @@
38 * The other filters are supported by fixed counters. 45 * The other filters are supported by fixed counters.
39 * The any-thread option is supported starting with v3. 46 * The any-thread option is supported starting with v3.
40 */ 47 */
41#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVTSEL_MASK)
42 54
43#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
44#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 0e8c2a0fd922..271de94c3810 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -23,6 +23,11 @@ static inline void paravirt_release_pud(unsigned long pfn) {}
23#endif 23#endif
24 24
25/* 25/*
26 * Flags to use when allocating a user page table page.
27 */
28extern gfp_t __userpte_alloc_gfp;
29
30/*
26 * Allocate and free page tables. 31 * Allocate and free page tables.
27 */ 32 */
28extern pgd_t *pgd_alloc(struct mm_struct *); 33extern pgd_t *pgd_alloc(struct mm_struct *);
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 01fd9461d323..a28668396508 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -80,7 +80,7 @@ do { \
80 * The i386 doesn't have any external MMU info: the kernel page 80 * The i386 doesn't have any external MMU info: the kernel page
81 * tables contain all the necessary information. 81 * tables contain all the necessary information.
82 */ 82 */
83#define update_mmu_cache(vma, address, pte) do { } while (0) 83#define update_mmu_cache(vma, address, ptep) do { } while (0)
84 84
85#endif /* !__ASSEMBLY__ */ 85#endif /* !__ASSEMBLY__ */
86 86
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index c57a30117149..181be528c612 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -129,7 +129,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
129#define pte_unmap(pte) /* NOP */ 129#define pte_unmap(pte) /* NOP */
130#define pte_unmap_nested(pte) /* NOP */ 130#define pte_unmap_nested(pte) /* NOP */
131 131
132#define update_mmu_cache(vma, address, pte) do { } while (0) 132#define update_mmu_cache(vma, address, ptep) do { } while (0)
133 133
134/* Encode and de-code a swap entry */ 134/* Encode and de-code a swap entry */
135#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE 135#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index fc801bab1b3b..b753ea59703a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -450,6 +450,8 @@ struct thread_struct {
450 struct perf_event *ptrace_bps[HBP_NUM]; 450 struct perf_event *ptrace_bps[HBP_NUM];
451 /* Debug status used for traps, single steps, etc... */ 451 /* Debug status used for traps, single steps, etc... */
452 unsigned long debugreg6; 452 unsigned long debugreg6;
453 /* Keep track of the exact dr7 value set by the user */
454 unsigned long ptrace_dr7;
453 /* Fault info: */ 455 /* Fault info: */
454 unsigned long cr2; 456 unsigned long cr2;
455 unsigned long trap_no; 457 unsigned long trap_no;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f680321..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
274 return 0; 274 return 0;
275} 275}
276 276
277/* Get Nth argument at function call */
278extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
279 unsigned int n);
280
281/* 277/*
282 * These are defined as per linux/ptrace.h, which see. 278 * These are defined as per linux/ptrace.h, which see.
283 */ 279 */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ca7517d33776..606ede126972 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -41,6 +41,7 @@
41#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/spinlock.h> 42#include <linux/spinlock.h>
43#include <linux/lockdep.h> 43#include <linux/lockdep.h>
44#include <asm/asm.h>
44 45
45struct rwsem_waiter; 46struct rwsem_waiter;
46 47
@@ -55,17 +56,28 @@ extern asmregparm struct rw_semaphore *
55 56
56/* 57/*
57 * the semaphore definition 58 * the semaphore definition
59 *
60 * The bias values and the counter type limits the number of
61 * potential readers/writers to 32767 for 32 bits and 2147483647
62 * for 64 bits.
58 */ 63 */
59 64
60#define RWSEM_UNLOCKED_VALUE 0x00000000 65#ifdef CONFIG_X86_64
61#define RWSEM_ACTIVE_BIAS 0x00000001 66# define RWSEM_ACTIVE_MASK 0xffffffffL
62#define RWSEM_ACTIVE_MASK 0x0000ffff 67#else
63#define RWSEM_WAITING_BIAS (-0x00010000) 68# define RWSEM_ACTIVE_MASK 0x0000ffffL
69#endif
70
71#define RWSEM_UNLOCKED_VALUE 0x00000000L
72#define RWSEM_ACTIVE_BIAS 0x00000001L
73#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
64#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS 74#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
65#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) 75#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
66 76
77typedef signed long rwsem_count_t;
78
67struct rw_semaphore { 79struct rw_semaphore {
68 signed long count; 80 rwsem_count_t count;
69 spinlock_t wait_lock; 81 spinlock_t wait_lock;
70 struct list_head wait_list; 82 struct list_head wait_list;
71#ifdef CONFIG_DEBUG_LOCK_ALLOC 83#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -105,7 +117,7 @@ do { \
105static inline void __down_read(struct rw_semaphore *sem) 117static inline void __down_read(struct rw_semaphore *sem)
106{ 118{
107 asm volatile("# beginning down_read\n\t" 119 asm volatile("# beginning down_read\n\t"
108 LOCK_PREFIX " incl (%%eax)\n\t" 120 LOCK_PREFIX _ASM_INC "(%1)\n\t"
109 /* adds 0x00000001, returns the old value */ 121 /* adds 0x00000001, returns the old value */
110 " jns 1f\n" 122 " jns 1f\n"
111 " call call_rwsem_down_read_failed\n" 123 " call call_rwsem_down_read_failed\n"
@@ -121,14 +133,14 @@ static inline void __down_read(struct rw_semaphore *sem)
121 */ 133 */
122static inline int __down_read_trylock(struct rw_semaphore *sem) 134static inline int __down_read_trylock(struct rw_semaphore *sem)
123{ 135{
124 __s32 result, tmp; 136 rwsem_count_t result, tmp;
125 asm volatile("# beginning __down_read_trylock\n\t" 137 asm volatile("# beginning __down_read_trylock\n\t"
126 " movl %0,%1\n\t" 138 " mov %0,%1\n\t"
127 "1:\n\t" 139 "1:\n\t"
128 " movl %1,%2\n\t" 140 " mov %1,%2\n\t"
129 " addl %3,%2\n\t" 141 " add %3,%2\n\t"
130 " jle 2f\n\t" 142 " jle 2f\n\t"
131 LOCK_PREFIX " cmpxchgl %2,%0\n\t" 143 LOCK_PREFIX " cmpxchg %2,%0\n\t"
132 " jnz 1b\n\t" 144 " jnz 1b\n\t"
133 "2:\n\t" 145 "2:\n\t"
134 "# ending __down_read_trylock\n\t" 146 "# ending __down_read_trylock\n\t"
@@ -143,13 +155,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
143 */ 155 */
144static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) 156static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
145{ 157{
146 int tmp; 158 rwsem_count_t tmp;
147 159
148 tmp = RWSEM_ACTIVE_WRITE_BIAS; 160 tmp = RWSEM_ACTIVE_WRITE_BIAS;
149 asm volatile("# beginning down_write\n\t" 161 asm volatile("# beginning down_write\n\t"
150 LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" 162 LOCK_PREFIX " xadd %1,(%2)\n\t"
151 /* subtract 0x0000ffff, returns the old value */ 163 /* subtract 0x0000ffff, returns the old value */
152 " testl %%edx,%%edx\n\t" 164 " test %1,%1\n\t"
153 /* was the count 0 before? */ 165 /* was the count 0 before? */
154 " jz 1f\n" 166 " jz 1f\n"
155 " call call_rwsem_down_write_failed\n" 167 " call call_rwsem_down_write_failed\n"
@@ -170,9 +182,9 @@ static inline void __down_write(struct rw_semaphore *sem)
170 */ 182 */
171static inline int __down_write_trylock(struct rw_semaphore *sem) 183static inline int __down_write_trylock(struct rw_semaphore *sem)
172{ 184{
173 signed long ret = cmpxchg(&sem->count, 185 rwsem_count_t ret = cmpxchg(&sem->count,
174 RWSEM_UNLOCKED_VALUE, 186 RWSEM_UNLOCKED_VALUE,
175 RWSEM_ACTIVE_WRITE_BIAS); 187 RWSEM_ACTIVE_WRITE_BIAS);
176 if (ret == RWSEM_UNLOCKED_VALUE) 188 if (ret == RWSEM_UNLOCKED_VALUE)
177 return 1; 189 return 1;
178 return 0; 190 return 0;
@@ -183,9 +195,9 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
183 */ 195 */
184static inline void __up_read(struct rw_semaphore *sem) 196static inline void __up_read(struct rw_semaphore *sem)
185{ 197{
186 __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; 198 rwsem_count_t tmp = -RWSEM_ACTIVE_READ_BIAS;
187 asm volatile("# beginning __up_read\n\t" 199 asm volatile("# beginning __up_read\n\t"
188 LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" 200 LOCK_PREFIX " xadd %1,(%2)\n\t"
189 /* subtracts 1, returns the old value */ 201 /* subtracts 1, returns the old value */
190 " jns 1f\n\t" 202 " jns 1f\n\t"
191 " call call_rwsem_wake\n" 203 " call call_rwsem_wake\n"
@@ -201,18 +213,18 @@ static inline void __up_read(struct rw_semaphore *sem)
201 */ 213 */
202static inline void __up_write(struct rw_semaphore *sem) 214static inline void __up_write(struct rw_semaphore *sem)
203{ 215{
216 rwsem_count_t tmp;
204 asm volatile("# beginning __up_write\n\t" 217 asm volatile("# beginning __up_write\n\t"
205 " movl %2,%%edx\n\t" 218 LOCK_PREFIX " xadd %1,(%2)\n\t"
206 LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t"
207 /* tries to transition 219 /* tries to transition
208 0xffff0001 -> 0x00000000 */ 220 0xffff0001 -> 0x00000000 */
209 " jz 1f\n" 221 " jz 1f\n"
210 " call call_rwsem_wake\n" 222 " call call_rwsem_wake\n"
211 "1:\n\t" 223 "1:\n\t"
212 "# ending __up_write\n" 224 "# ending __up_write\n"
213 : "+m" (sem->count) 225 : "+m" (sem->count), "=d" (tmp)
214 : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS) 226 : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
215 : "memory", "cc", "edx"); 227 : "memory", "cc");
216} 228}
217 229
218/* 230/*
@@ -221,33 +233,38 @@ static inline void __up_write(struct rw_semaphore *sem)
221static inline void __downgrade_write(struct rw_semaphore *sem) 233static inline void __downgrade_write(struct rw_semaphore *sem)
222{ 234{
223 asm volatile("# beginning __downgrade_write\n\t" 235 asm volatile("# beginning __downgrade_write\n\t"
224 LOCK_PREFIX " addl %2,(%%eax)\n\t" 236 LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t"
225 /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ 237 /*
238 * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
239 * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
240 */
226 " jns 1f\n\t" 241 " jns 1f\n\t"
227 " call call_rwsem_downgrade_wake\n" 242 " call call_rwsem_downgrade_wake\n"
228 "1:\n\t" 243 "1:\n\t"
229 "# ending __downgrade_write\n" 244 "# ending __downgrade_write\n"
230 : "+m" (sem->count) 245 : "+m" (sem->count)
231 : "a" (sem), "i" (-RWSEM_WAITING_BIAS) 246 : "a" (sem), "er" (-RWSEM_WAITING_BIAS)
232 : "memory", "cc"); 247 : "memory", "cc");
233} 248}
234 249
235/* 250/*
236 * implement atomic add functionality 251 * implement atomic add functionality
237 */ 252 */
238static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) 253static inline void rwsem_atomic_add(rwsem_count_t delta,
254 struct rw_semaphore *sem)
239{ 255{
240 asm volatile(LOCK_PREFIX "addl %1,%0" 256 asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
241 : "+m" (sem->count) 257 : "+m" (sem->count)
242 : "ir" (delta)); 258 : "er" (delta));
243} 259}
244 260
245/* 261/*
246 * implement exchange and add functionality 262 * implement exchange and add functionality
247 */ 263 */
248static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) 264static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
265 struct rw_semaphore *sem)
249{ 266{
250 int tmp = delta; 267 rwsem_count_t tmp = delta;
251 268
252 asm volatile(LOCK_PREFIX "xadd %0,%1" 269 asm volatile(LOCK_PREFIX "xadd %0,%1"
253 : "+r" (tmp), "+m" (sem->count) 270 : "+r" (tmp), "+m" (sem->count)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1e796782cd7b..4cfc90824068 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -135,6 +135,8 @@ int native_cpu_disable(void);
135void native_cpu_die(unsigned int cpu); 135void native_cpu_die(unsigned int cpu);
136void native_play_dead(void); 136void native_play_dead(void);
137void play_dead_common(void); 137void play_dead_common(void);
138void wbinvd_on_cpu(int cpu);
139int wbinvd_on_all_cpus(void);
138 140
139void native_send_call_func_ipi(const struct cpumask *mask); 141void native_send_call_func_ipi(const struct cpumask *mask);
140void native_send_call_func_single_ipi(int cpu); 142void native_send_call_func_single_ipi(int cpu);
@@ -147,6 +149,13 @@ static inline int num_booting_cpus(void)
147{ 149{
148 return cpumask_weight(cpu_callout_mask); 150 return cpumask_weight(cpu_callout_mask);
149} 151}
152#else /* !CONFIG_SMP */
153#define wbinvd_on_cpu(cpu) wbinvd()
154static inline int wbinvd_on_all_cpus(void)
155{
156 wbinvd();
157 return 0;
158}
150#endif /* CONFIG_SMP */ 159#endif /* CONFIG_SMP */
151 160
152extern unsigned disabled_cpus __cpuinitdata; 161extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e89122a42f..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
3 3
4extern int kstack_depth_to_print; 4extern int kstack_depth_to_print;
5 5
6int x86_is_stack_id(int id, char *name);
7
8struct thread_info; 6struct thread_info;
9struct stacktrace_ops; 7struct stacktrace_ops;
10 8
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8d33bc5462d1..c4a348f7bd43 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -16,6 +16,8 @@
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/err.h> 17#include <linux/err.h>
18 18
19extern const unsigned long sys_call_table[];
20
19/* 21/*
20 * Only the low 32 bits of orig_ax are meaningful, so we return int. 22 * Only the low 32 bits of orig_ax are meaningful, so we return int.
21 * This importantly ignores the high bits on 64-bit, so comparisons 23 * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index ecb544e65382..e04740f7a0bb 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -11,9 +11,9 @@
11#include <linux/irqflags.h> 11#include <linux/irqflags.h>
12 12
13/* entries in ARCH_DLINFO: */ 13/* entries in ARCH_DLINFO: */
14#ifdef CONFIG_IA32_EMULATION 14#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
15# define AT_VECTOR_SIZE_ARCH 2 15# define AT_VECTOR_SIZE_ARCH 2
16#else 16#else /* else it's non-compat x86-64 */
17# define AT_VECTOR_SIZE_ARCH 1 17# define AT_VECTOR_SIZE_ARCH 1
18#endif 18#endif
19 19
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 375c917c37d2..e0d28901e969 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,7 +87,6 @@ struct thread_info {
87#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 87#define TIF_NOTSC 16 /* TSC is not accessible in userland */
88#define TIF_IA32 17 /* 32bit process */ 88#define TIF_IA32 17 /* 32bit process */
89#define TIF_FORK 18 /* ret_from_fork */ 89#define TIF_FORK 18 /* ret_from_fork */
90#define TIF_ABI_PENDING 19
91#define TIF_MEMDIE 20 90#define TIF_MEMDIE 20
92#define TIF_DEBUG 21 /* uses debug registers */ 91#define TIF_DEBUG 21 /* uses debug registers */
93#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -112,7 +111,6 @@ struct thread_info {
112#define _TIF_NOTSC (1 << TIF_NOTSC) 111#define _TIF_NOTSC (1 << TIF_NOTSC)
113#define _TIF_IA32 (1 << TIF_IA32) 112#define _TIF_IA32 (1 << TIF_IA32)
114#define _TIF_FORK (1 << TIF_FORK) 113#define _TIF_FORK (1 << TIF_FORK)
115#define _TIF_ABI_PENDING (1 << TIF_ABI_PENDING)
116#define _TIF_DEBUG (1 << TIF_DEBUG) 114#define _TIF_DEBUG (1 << TIF_DEBUG)
117#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 115#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
118#define _TIF_FREEZE (1 << TIF_FREEZE) 116#define _TIF_FREEZE (1 << TIF_FREEZE)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 535e421498f6..316708d5af92 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,6 +8,8 @@
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/prefetch.h> 9#include <linux/prefetch.h>
10#include <linux/lockdep.h> 10#include <linux/lockdep.h>
11#include <asm/alternative.h>
12#include <asm/cpufeature.h>
11#include <asm/page.h> 13#include <asm/page.h>
12 14
13/* 15/*
@@ -16,7 +18,24 @@
16 18
17/* Handles exceptions in both to and from, but doesn't do access_ok */ 19/* Handles exceptions in both to and from, but doesn't do access_ok */
18__must_check unsigned long 20__must_check unsigned long
19copy_user_generic(void *to, const void *from, unsigned len); 21copy_user_generic_string(void *to, const void *from, unsigned len);
22__must_check unsigned long
23copy_user_generic_unrolled(void *to, const void *from, unsigned len);
24
25static __always_inline __must_check unsigned long
26copy_user_generic(void *to, const void *from, unsigned len)
27{
28 unsigned ret;
29
30 alternative_call(copy_user_generic_unrolled,
31 copy_user_generic_string,
32 X86_FEATURE_REP_GOOD,
33 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
34 "=d" (len)),
35 "1" (to), "2" (from), "3" (len)
36 : "memory", "rcx", "r8", "r9", "r10", "r11");
37 return ret;
38}
20 39
21__must_check unsigned long 40__must_check unsigned long
22_copy_to_user(void __user *to, const void *from, unsigned len); 41_copy_to_user(void __user *to, const void *from, unsigned len);
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 999873b22e7f..24532c7da3d6 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -1,5 +1,63 @@
1#ifndef _ASM_X86_USER_H
2#define _ASM_X86_USER_H
3
1#ifdef CONFIG_X86_32 4#ifdef CONFIG_X86_32
2# include "user_32.h" 5# include "user_32.h"
3#else 6#else
4# include "user_64.h" 7# include "user_64.h"
5#endif 8#endif
9
10#include <asm/types.h>
11
12struct user_ymmh_regs {
13 /* 16 * 16 bytes for each YMMH-reg */
14 __u32 ymmh_space[64];
15};
16
17struct user_xsave_hdr {
18 __u64 xstate_bv;
19 __u64 reserved1[2];
20 __u64 reserved2[5];
21};
22
23/*
24 * The structure layout of user_xstateregs, used for exporting the
25 * extended register state through ptrace and core-dump (NT_X86_XSTATE note)
26 * interfaces will be same as the memory layout of xsave used by the processor
27 * (except for the bytes 464..511, which can be used by the software) and hence
28 * the size of this structure varies depending on the features supported by the
29 * processor and OS. The size of the structure that users need to use can be
30 * obtained by doing:
31 * cpuid_count(0xd, 0, &eax, &ptrace_xstateregs_struct_size, &ecx, &edx);
32 * i.e., cpuid.(eax=0xd,ecx=0).ebx will be the size that user (debuggers, etc.)
33 * need to use.
34 *
35 * For now, only the first 8 bytes of the software usable bytes[464..471] will
36 * be used and will be set to OS enabled xstate mask (which is same as the
37 * 64bit mask returned by the xgetbv's xCR0). Users (analyzing core dump
38 * remotely, etc.) can use this mask as well as the mask saved in the
39 * xstate_hdr bytes and interpret what states the processor/OS supports
40 * and what states are in modified/initialized conditions for the
41 * particular process/thread.
42 *
43 * Also when the user modifies certain state FP/SSE/etc through the
44 * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
45 * bytes[512..519] of the memory layout are updated correspondingly.
46 * i.e., for example when FP state is modified to a non-init state,
47 * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
48 * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
49 */
50#define USER_XSTATE_FX_SW_WORDS 6
51#define USER_XSTATE_XCR0_WORD 0
52
53struct user_xstateregs {
54 struct {
55 __u64 fpx_space[58];
56 __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
57 } i387;
58 struct user_xsave_hdr xsave_hdr;
59 struct user_ymmh_regs ymmh;
60 /* further processor state extensions go here */
61};
62
63#endif /* _ASM_X86_USER_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 2751f3075d8b..71605c7d5c5c 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -18,8 +18,8 @@
18 * along with this program; if not, write to the Free Software 18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * 20 *
21 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 21 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
22 * Copyright (c) Russ Anderson 22 * Copyright (c) Russ Anderson <rja@sgi.com>
23 */ 23 */
24 24
25#include <linux/rtc.h> 25#include <linux/rtc.h>
@@ -36,7 +36,8 @@ enum uv_bios_cmd {
36 UV_BIOS_WATCHLIST_ALLOC, 36 UV_BIOS_WATCHLIST_ALLOC,
37 UV_BIOS_WATCHLIST_FREE, 37 UV_BIOS_WATCHLIST_FREE,
38 UV_BIOS_MEMPROTECT, 38 UV_BIOS_MEMPROTECT,
39 UV_BIOS_GET_PARTITION_ADDR 39 UV_BIOS_GET_PARTITION_ADDR,
40 UV_BIOS_SET_LEGACY_VGA_TARGET
40}; 41};
41 42
42/* 43/*
@@ -89,13 +90,14 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
89extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); 90extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
90extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); 91extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
91 92
92extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); 93extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
93extern s64 uv_bios_freq_base(u64, u64 *); 94extern s64 uv_bios_freq_base(u64, u64 *);
94extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, 95extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
95 unsigned long *); 96 unsigned long *);
96extern int uv_bios_mq_watchlist_free(int, int); 97extern int uv_bios_mq_watchlist_free(int, int);
97extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); 98extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
98extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); 99extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
100extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
99 101
100extern void uv_bios_init(void); 102extern void uv_bios_init(void);
101 103
@@ -104,6 +106,7 @@ extern int uv_type;
104extern long sn_partition_id; 106extern long sn_partition_id;
105extern long sn_coherency_id; 107extern long sn_coherency_id;
106extern long sn_region_size; 108extern long sn_region_size;
109extern long system_serial_number;
107#define partition_coherence_id() (sn_coherency_id) 110#define partition_coherence_id() (sn_coherency_id)
108 111
109extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ 112extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index c0a01b5d985b..3bb9491b7659 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -11,6 +11,7 @@ struct mm_struct;
11extern enum uv_system_type get_uv_system_type(void); 11extern enum uv_system_type get_uv_system_type(void);
12extern int is_uv_system(void); 12extern int is_uv_system(void);
13extern void uv_cpu_init(void); 13extern void uv_cpu_init(void);
14extern void uv_nmi_init(void);
14extern void uv_system_init(void); 15extern void uv_system_init(void);
15extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, 16extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
16 struct mm_struct *mm, 17 struct mm_struct *mm,
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 40be813fefb1..14cc74ba5d23 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -329,7 +329,8 @@ static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset
329 */ 329 */
330static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset) 330static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
331{ 331{
332 return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val); 332 return UV_GLOBAL_GRU_MMR_BASE | offset |
333 ((unsigned long)pnode << uv_hub_info->m_val);
333} 334}
334 335
335static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val) 336static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ea0e8ea15e15..60cc35269083 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -126,6 +126,7 @@ struct x86_cpuinit_ops {
126 * @get_wallclock: get time from HW clock like RTC etc. 126 * @get_wallclock: get time from HW clock like RTC etc.
127 * @set_wallclock: set time back to HW clock 127 * @set_wallclock: set time back to HW clock
128 * @is_untracked_pat_range exclude from PAT logic 128 * @is_untracked_pat_range exclude from PAT logic
129 * @nmi_init enable NMI on cpus
129 */ 130 */
130struct x86_platform_ops { 131struct x86_platform_ops {
131 unsigned long (*calibrate_tsc)(void); 132 unsigned long (*calibrate_tsc)(void);
@@ -133,6 +134,7 @@ struct x86_platform_ops {
133 int (*set_wallclock)(unsigned long nowtime); 134 int (*set_wallclock)(unsigned long nowtime);
134 void (*iommu_shutdown)(void); 135 void (*iommu_shutdown)(void);
135 bool (*is_untracked_pat_range)(u64 start, u64 end); 136 bool (*is_untracked_pat_range)(u64 start, u64 end);
137 void (*nmi_init)(void);
136}; 138};
137 139
138extern struct x86_init_ops x86_init; 140extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 727acc152344..ddc04ccad03b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -27,9 +27,11 @@
27extern unsigned int xstate_size; 27extern unsigned int xstate_size;
28extern u64 pcntxt_mask; 28extern u64 pcntxt_mask;
29extern struct xsave_struct *init_xstate_buf; 29extern struct xsave_struct *init_xstate_buf;
30extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
30 31
31extern void xsave_cntxt_init(void); 32extern void xsave_cntxt_init(void);
32extern void xsave_init(void); 33extern void xsave_init(void);
34extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
33extern int init_fpu(struct task_struct *child); 35extern int init_fpu(struct task_struct *child);
34extern int check_for_xstate(struct i387_fxsave_struct __user *buf, 36extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
35 void __user *fpstate, 37 void __user *fpstate,
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 036d28adf59d..f95703098f8d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -49,6 +49,7 @@ EXPORT_SYMBOL(acpi_disabled);
49 49
50#ifdef CONFIG_X86_64 50#ifdef CONFIG_X86_64
51# include <asm/proto.h> 51# include <asm/proto.h>
52# include <asm/numa_64.h>
52#endif /* X86 */ 53#endif /* X86 */
53 54
54#define BAD_MADT_ENTRY(entry, end) ( \ 55#define BAD_MADT_ENTRY(entry, end) ( \
@@ -482,6 +483,25 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
482 */ 483 */
483#ifdef CONFIG_ACPI_HOTPLUG_CPU 484#ifdef CONFIG_ACPI_HOTPLUG_CPU
484 485
486static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
487{
488#ifdef CONFIG_ACPI_NUMA
489 int nid;
490
491 nid = acpi_get_node(handle);
492 if (nid == -1 || !node_online(nid))
493 return;
494#ifdef CONFIG_X86_64
495 apicid_to_node[physid] = nid;
496 numa_set_node(cpu, nid);
497#else /* CONFIG_X86_32 */
498 apicid_2_node[physid] = nid;
499 cpu_to_node_map[cpu] = nid;
500#endif
501
502#endif
503}
504
485static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) 505static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
486{ 506{
487 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 507 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -540,6 +560,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
540 } 560 }
541 561
542 cpu = cpumask_first(new_map); 562 cpu = cpumask_first(new_map);
563 acpi_map_cpu2node(handle, cpu, physid);
543 564
544 *pcpu = cpu; 565 *pcpu = cpu;
545 retval = 0; 566 retval = 0;
@@ -1185,9 +1206,6 @@ static void __init acpi_process_madt(void)
1185 if (!error) { 1206 if (!error) {
1186 acpi_lapic = 1; 1207 acpi_lapic = 1;
1187 1208
1188#ifdef CONFIG_X86_BIGSMP
1189 generic_bigsmp_probe();
1190#endif
1191 /* 1209 /*
1192 * Parse MADT IO-APIC entries 1210 * Parse MADT IO-APIC entries
1193 */ 1211 */
@@ -1197,8 +1215,6 @@ static void __init acpi_process_madt(void)
1197 acpi_ioapic = 1; 1215 acpi_ioapic = 1;
1198 1216
1199 smp_found_config = 1; 1217 smp_found_config = 1;
1200 if (apic->setup_apic_routing)
1201 apic->setup_apic_routing();
1202 } 1218 }
1203 } 1219 }
1204 if (error == -EINVAL) { 1220 if (error == -EINVAL) {
@@ -1349,14 +1365,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1349 }, 1365 },
1350 { 1366 {
1351 .callback = force_acpi_ht, 1367 .callback = force_acpi_ht,
1352 .ident = "ASUS P2B-DS",
1353 .matches = {
1354 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
1355 DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"),
1356 },
1357 },
1358 {
1359 .callback = force_acpi_ht,
1360 .ident = "ASUS CUR-DLS", 1368 .ident = "ASUS CUR-DLS",
1361 .matches = { 1369 .matches = {
1362 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), 1370 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."),
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..e6ea0342c8f8 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -205,7 +205,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
205 struct alt_instr *end) 205 struct alt_instr *end)
206{ 206{
207 struct alt_instr *a; 207 struct alt_instr *a;
208 char insnbuf[MAX_PATCH_LEN]; 208 u8 insnbuf[MAX_PATCH_LEN];
209 209
210 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 210 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
211 for (a = start; a < end; a++) { 211 for (a = start; a < end; a++) {
@@ -223,6 +223,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
223 } 223 }
224#endif 224#endif
225 memcpy(insnbuf, a->replacement, a->replacementlen); 225 memcpy(insnbuf, a->replacement, a->replacementlen);
226 if (*insnbuf == 0xe8 && a->replacementlen == 5)
227 *(s32 *)(insnbuf + 1) += a->replacement - a->instr;
226 add_nops(insnbuf + a->replacementlen, 228 add_nops(insnbuf + a->replacementlen,
227 a->instrlen - a->replacementlen); 229 a->instrlen - a->replacementlen);
228 text_poke_early(instr, insnbuf, a->instrlen); 230 text_poke_early(instr, insnbuf, a->instrlen);
@@ -390,6 +392,24 @@ void alternatives_smp_switch(int smp)
390 mutex_unlock(&smp_alt); 392 mutex_unlock(&smp_alt);
391} 393}
392 394
395/* Return 1 if the address range is reserved for smp-alternatives */
396int alternatives_text_reserved(void *start, void *end)
397{
398 struct smp_alt_module *mod;
399 u8 **ptr;
400 u8 *text_start = start;
401 u8 *text_end = end;
402
403 list_for_each_entry(mod, &smp_alt_modules, next) {
404 if (mod->text > text_end || mod->text_end < text_start)
405 continue;
406 for (ptr = mod->locks; ptr < mod->locks_end; ptr++)
407 if (text_start <= *ptr && text_end >= *ptr)
408 return 1;
409 }
410
411 return 0;
412}
393#endif 413#endif
394 414
395#ifdef CONFIG_PARAVIRT 415#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 23824fef789c..adb0ba025702 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -980,7 +980,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
980{ 980{
981 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; 981 int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
982 struct amd_iommu *iommu; 982 struct amd_iommu *iommu;
983 int i; 983 unsigned long i;
984 984
985#ifdef CONFIG_IOMMU_STRESS 985#ifdef CONFIG_IOMMU_STRESS
986 populate = false; 986 populate = false;
@@ -1489,11 +1489,14 @@ static void __detach_device(struct device *dev)
1489{ 1489{
1490 struct iommu_dev_data *dev_data = get_dev_data(dev); 1490 struct iommu_dev_data *dev_data = get_dev_data(dev);
1491 struct iommu_dev_data *alias_data; 1491 struct iommu_dev_data *alias_data;
1492 struct protection_domain *domain;
1492 unsigned long flags; 1493 unsigned long flags;
1493 1494
1494 BUG_ON(!dev_data->domain); 1495 BUG_ON(!dev_data->domain);
1495 1496
1496 spin_lock_irqsave(&dev_data->domain->lock, flags); 1497 domain = dev_data->domain;
1498
1499 spin_lock_irqsave(&domain->lock, flags);
1497 1500
1498 if (dev_data->alias != dev) { 1501 if (dev_data->alias != dev) {
1499 alias_data = get_dev_data(dev_data->alias); 1502 alias_data = get_dev_data(dev_data->alias);
@@ -1504,13 +1507,15 @@ static void __detach_device(struct device *dev)
1504 if (atomic_dec_and_test(&dev_data->bind)) 1507 if (atomic_dec_and_test(&dev_data->bind))
1505 do_detach(dev); 1508 do_detach(dev);
1506 1509
1507 spin_unlock_irqrestore(&dev_data->domain->lock, flags); 1510 spin_unlock_irqrestore(&domain->lock, flags);
1508 1511
1509 /* 1512 /*
1510 * If we run in passthrough mode the device must be assigned to the 1513 * If we run in passthrough mode the device must be assigned to the
1511 * passthrough domain if it is detached from any other domain 1514 * passthrough domain if it is detached from any other domain.
1515 * Make sure we can deassign from the pt_domain itself.
1512 */ 1516 */
1513 if (iommu_pass_through && dev_data->domain == NULL) 1517 if (iommu_pass_through &&
1518 (dev_data->domain == NULL && domain != pt_domain))
1514 __attach_device(dev, pt_domain); 1519 __attach_device(dev, pt_domain);
1515} 1520}
1516 1521
@@ -2218,6 +2223,12 @@ static struct dma_map_ops amd_iommu_dma_ops = {
2218/* 2223/*
2219 * The function which clues the AMD IOMMU driver into dma_ops. 2224 * The function which clues the AMD IOMMU driver into dma_ops.
2220 */ 2225 */
2226
2227void __init amd_iommu_init_api(void)
2228{
2229 register_iommu(&amd_iommu_ops);
2230}
2231
2221int __init amd_iommu_init_dma_ops(void) 2232int __init amd_iommu_init_dma_ops(void)
2222{ 2233{
2223 struct amd_iommu *iommu; 2234 struct amd_iommu *iommu;
@@ -2253,8 +2264,6 @@ int __init amd_iommu_init_dma_ops(void)
2253 /* Make the driver finally visible to the drivers */ 2264 /* Make the driver finally visible to the drivers */
2254 dma_ops = &amd_iommu_dma_ops; 2265 dma_ops = &amd_iommu_dma_ops;
2255 2266
2256 register_iommu(&amd_iommu_ops);
2257
2258 amd_iommu_stats_init(); 2267 amd_iommu_stats_init();
2259 2268
2260 return 0; 2269 return 0;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index fb490ce7dd55..9dc91b431470 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1292,9 +1292,12 @@ static int __init amd_iommu_init(void)
1292 ret = amd_iommu_init_passthrough(); 1292 ret = amd_iommu_init_passthrough();
1293 else 1293 else
1294 ret = amd_iommu_init_dma_ops(); 1294 ret = amd_iommu_init_dma_ops();
1295
1295 if (ret) 1296 if (ret)
1296 goto free; 1297 goto free;
1297 1298
1299 amd_iommu_init_api();
1300
1298 amd_iommu_init_notifier(); 1301 amd_iommu_init_notifier();
1299 1302
1300 enable_iommus(); 1303 enable_iommus();
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3987e4408f75..6e29b2a77aa8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -581,7 +581,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
581 res = (((u64)(*deltatsc)) * pm_100ms); 581 res = (((u64)(*deltatsc)) * pm_100ms);
582 do_div(res, deltapm); 582 do_div(res, deltapm);
583 apic_printk(APIC_VERBOSE, "TSC delta adjusted to " 583 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
584 "PM-Timer: %lu (%ld) \n", 584 "PM-Timer: %lu (%ld)\n",
585 (unsigned long)res, *deltatsc); 585 (unsigned long)res, *deltatsc);
586 *deltatsc = (long)res; 586 *deltatsc = (long)res;
587 } 587 }
@@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void)
1641#endif 1641#endif
1642 1642
1643 enable_IR_x2apic(); 1643 enable_IR_x2apic();
1644#ifdef CONFIG_X86_64
1645 default_setup_apic_routing(); 1644 default_setup_apic_routing();
1646#endif
1647 1645
1648 verify_local_APIC(); 1646 verify_local_APIC();
1649 connect_bsp_APIC(); 1647 connect_bsp_APIC();
@@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1891 if (apicid > max_physical_apicid) 1889 if (apicid > max_physical_apicid)
1892 max_physical_apicid = apicid; 1890 max_physical_apicid = apicid;
1893 1891
1894#ifdef CONFIG_X86_32
1895 if (num_processors > 8) {
1896 switch (boot_cpu_data.x86_vendor) {
1897 case X86_VENDOR_INTEL:
1898 if (!APIC_XAPIC(version)) {
1899 def_to_bigsmp = 0;
1900 break;
1901 }
1902 /* If P4 and above fall through */
1903 case X86_VENDOR_AMD:
1904 def_to_bigsmp = 1;
1905 }
1906 }
1907#endif
1908
1909#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) 1892#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1910 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1893 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1911 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1894 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..6bdd2c7ead75 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1647,7 +1647,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1647 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1647 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1648 1648
1649 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1649 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1650 " Stat Dmod Deli Vect: \n"); 1650 " Stat Dmod Deli Vect:\n");
1651 1651
1652 for (i = 0; i <= reg_01.bits.entries; i++) { 1652 for (i = 0; i <= reg_01.bits.entries; i++) {
1653 struct IO_APIC_route_entry entry; 1653 struct IO_APIC_route_entry entry;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 98c4665f251c..47dd856708e5 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -225,7 +225,7 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc)
225 225
226 mpc_record = 0; 226 mpc_record = 0;
227 printk(KERN_INFO 227 printk(KERN_INFO
228 "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); 228 "Found an OEM MPC table at %8p - parsing it...\n", oemtable);
229 229
230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { 230 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
231 printk(KERN_WARNING 231 printk(KERN_WARNING
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1a6559f6768c..99d2fe016084 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void)
52} 52}
53late_initcall(print_ipi_mode); 53late_initcall(print_ipi_mode);
54 54
55void default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{
57 int version = apic_version[boot_cpu_physical_apicid];
58
59 if (num_possible_cpus() > 8) {
60 switch (boot_cpu_data.x86_vendor) {
61 case X86_VENDOR_INTEL:
62 if (!APIC_XAPIC(version)) {
63 def_to_bigsmp = 0;
64 break;
65 }
66 /* If P4 and above fall through */
67 case X86_VENDOR_AMD:
68 def_to_bigsmp = 1;
69 }
70 }
71
72#ifdef CONFIG_X86_BIGSMP
73 generic_bigsmp_probe();
74#endif
75
76 if (apic->setup_apic_routing)
77 apic->setup_apic_routing();
78}
79
80static void setup_apic_flat_routing(void)
56{ 81{
57#ifdef CONFIG_X86_IO_APIC 82#ifdef CONFIG_X86_IO_APIC
58 printk(KERN_INFO 83 printk(KERN_INFO
@@ -103,7 +128,7 @@ struct apic apic_default = {
103 .init_apic_ldr = default_init_apic_ldr, 128 .init_apic_ldr = default_init_apic_ldr,
104 129
105 .ioapic_phys_id_map = default_ioapic_phys_id_map, 130 .ioapic_phys_id_map = default_ioapic_phys_id_map,
106 .setup_apic_routing = default_setup_apic_routing, 131 .setup_apic_routing = setup_apic_flat_routing,
107 .multi_timer_check = NULL, 132 .multi_timer_check = NULL,
108 .apicid_to_node = default_apicid_to_node, 133 .apicid_to_node = default_apicid_to_node,
109 .cpu_to_logical_apicid = default_cpu_to_logical_apicid, 134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 450fe2064a14..83e9be4778e2 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void)
67 } 67 }
68#endif 68#endif
69 69
70 if (apic == &apic_flat && num_processors > 8) 70 if (apic == &apic_flat && num_possible_cpus() > 8)
71 apic = &apic_physflat; 71 apic = &apic_physflat;
72 72
73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 21db3cbea7dc..3740c8a4eae7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -20,6 +20,8 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/pci.h>
24#include <linux/kdebug.h>
23 25
24#include <asm/uv/uv_mmrs.h> 26#include <asm/uv/uv_mmrs.h>
25#include <asm/uv/uv_hub.h> 27#include <asm/uv/uv_hub.h>
@@ -34,10 +36,13 @@
34 36
35DEFINE_PER_CPU(int, x2apic_extra_bits); 37DEFINE_PER_CPU(int, x2apic_extra_bits);
36 38
39#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
40
37static enum uv_system_type uv_system_type; 41static enum uv_system_type uv_system_type;
38static u64 gru_start_paddr, gru_end_paddr; 42static u64 gru_start_paddr, gru_end_paddr;
39int uv_min_hub_revision_id; 43int uv_min_hub_revision_id;
40EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 44EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
45static DEFINE_SPINLOCK(uv_nmi_lock);
41 46
42static inline bool is_GRU_range(u64 start, u64 end) 47static inline bool is_GRU_range(u64 start, u64 end)
43{ 48{
@@ -71,6 +76,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
71 if (!strcmp(oem_id, "SGI")) { 76 if (!strcmp(oem_id, "SGI")) {
72 nodeid = early_get_nodeid(); 77 nodeid = early_get_nodeid();
73 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 78 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
79 x86_platform.nmi_init = uv_nmi_init;
74 if (!strcmp(oem_table_id, "UVL")) 80 if (!strcmp(oem_table_id, "UVL"))
75 uv_system_type = UV_LEGACY_APIC; 81 uv_system_type = UV_LEGACY_APIC;
76 else if (!strcmp(oem_table_id, "UVX")) 82 else if (!strcmp(oem_table_id, "UVX"))
@@ -482,7 +488,7 @@ static void uv_heartbeat(unsigned long ignored)
482 488
483static void __cpuinit uv_heartbeat_enable(int cpu) 489static void __cpuinit uv_heartbeat_enable(int cpu)
484{ 490{
485 if (!uv_cpu_hub_info(cpu)->scir.enabled) { 491 while (!uv_cpu_hub_info(cpu)->scir.enabled) {
486 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; 492 struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
487 493
488 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); 494 uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
@@ -490,11 +496,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
490 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; 496 timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
491 add_timer_on(timer, cpu); 497 add_timer_on(timer, cpu);
492 uv_cpu_hub_info(cpu)->scir.enabled = 1; 498 uv_cpu_hub_info(cpu)->scir.enabled = 1;
493 }
494 499
495 /* check boot cpu */ 500 /* also ensure that boot cpu is enabled */
496 if (!uv_cpu_hub_info(0)->scir.enabled) 501 cpu = 0;
497 uv_heartbeat_enable(0); 502 }
498} 503}
499 504
500#ifdef CONFIG_HOTPLUG_CPU 505#ifdef CONFIG_HOTPLUG_CPU
@@ -553,6 +558,30 @@ late_initcall(uv_init_heartbeat);
553 558
554#endif /* !CONFIG_HOTPLUG_CPU */ 559#endif /* !CONFIG_HOTPLUG_CPU */
555 560
561/* Direct Legacy VGA I/O traffic to designated IOH */
562int uv_set_vga_state(struct pci_dev *pdev, bool decode,
563 unsigned int command_bits, bool change_bridge)
564{
565 int domain, bus, rc;
566
567 PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n",
568 pdev->devfn, decode, command_bits, change_bridge);
569
570 if (!change_bridge)
571 return 0;
572
573 if ((command_bits & PCI_COMMAND_IO) == 0)
574 return 0;
575
576 domain = pci_domain_nr(pdev->bus);
577 bus = pdev->bus->number;
578
579 rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
580 PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
581
582 return rc;
583}
584
556/* 585/*
557 * Called on each cpu to initialize the per_cpu UV data area. 586 * Called on each cpu to initialize the per_cpu UV data area.
558 * FIXME: hotplug not supported yet 587 * FIXME: hotplug not supported yet
@@ -569,6 +598,46 @@ void __cpuinit uv_cpu_init(void)
569 set_x2apic_extra_bits(uv_hub_info->pnode); 598 set_x2apic_extra_bits(uv_hub_info->pnode);
570} 599}
571 600
601/*
602 * When NMI is received, print a stack trace.
603 */
604int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
605{
606 if (reason != DIE_NMI_IPI)
607 return NOTIFY_OK;
608 /*
609 * Use a lock so only one cpu prints at a time
610 * to prevent intermixed output.
611 */
612 spin_lock(&uv_nmi_lock);
613 pr_info("NMI stack dump cpu %u:\n", smp_processor_id());
614 dump_stack();
615 spin_unlock(&uv_nmi_lock);
616
617 return NOTIFY_STOP;
618}
619
620static struct notifier_block uv_dump_stack_nmi_nb = {
621 .notifier_call = uv_handle_nmi
622};
623
624void uv_register_nmi_notifier(void)
625{
626 if (register_die_notifier(&uv_dump_stack_nmi_nb))
627 printk(KERN_WARNING "UV NMI handler failed to register\n");
628}
629
630void uv_nmi_init(void)
631{
632 unsigned int value;
633
634 /*
635 * Unmask NMI on all cpus
636 */
637 value = apic_read(APIC_LVT1) | APIC_DM_NMI;
638 value &= ~APIC_LVT_MASKED;
639 apic_write(APIC_LVT1, value);
640}
572 641
573void __init uv_system_init(void) 642void __init uv_system_init(void)
574{ 643{
@@ -634,8 +703,8 @@ void __init uv_system_init(void)
634 } 703 }
635 704
636 uv_bios_init(); 705 uv_bios_init();
637 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, 706 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
638 &sn_coherency_id, &sn_region_size); 707 &sn_region_size, &system_serial_number);
639 uv_rtc_init(); 708 uv_rtc_init();
640 709
641 for_each_present_cpu(cpu) { 710 for_each_present_cpu(cpu) {
@@ -690,5 +759,9 @@ void __init uv_system_init(void)
690 759
691 uv_cpu_init(); 760 uv_cpu_init();
692 uv_scir_register_cpu_notifier(); 761 uv_scir_register_cpu_notifier();
762 uv_register_nmi_notifier();
693 proc_mkdir("sgi_uv", NULL); 763 proc_mkdir("sgi_uv", NULL);
764
765 /* register Legacy VGA I/O redirection handler */
766 pci_register_set_vga_state(uv_set_vga_state);
694} 767}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b5b6b23bce53..031aa887b0eb 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1992,8 +1992,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d)
1992 apm_info.disabled = 1; 1992 apm_info.disabled = 1;
1993 printk(KERN_INFO "%s machine detected. " 1993 printk(KERN_INFO "%s machine detected. "
1994 "Disabling APM.\n", d->ident); 1994 "Disabling APM.\n", d->ident);
1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); 1995 printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n");
1996 printk(KERN_INFO "download from support.intel.com \n"); 1996 printk(KERN_INFO "download from support.intel.com\n");
1997 } 1997 }
1998 return 0; 1998 return 0;
1999} 1999}
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index b0206a211b09..8bc57baaa9ad 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -15,8 +15,8 @@
15 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * 17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. 18 * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson 19 * Copyright (c) Russ Anderson <rja@sgi.com>
20 */ 20 */
21 21
22#include <linux/efi.h> 22#include <linux/efi.h>
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab;
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
32 struct uv_systab *tab = &uv_systab; 32 struct uv_systab *tab = &uv_systab;
33 s64 ret;
33 34
34 if (!tab->function) 35 if (!tab->function)
35 /* 36 /*
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
37 */ 38 */
38 return BIOS_STATUS_UNIMPLEMENTED; 39 return BIOS_STATUS_UNIMPLEMENTED;
39 40
40 return efi_call6((void *)__va(tab->function), 41 ret = efi_call6((void *)__va(tab->function), (u64)which,
41 (u64)which, a1, a2, a3, a4, a5); 42 a1, a2, a3, a4, a5);
43 return ret;
42} 44}
45EXPORT_SYMBOL_GPL(uv_bios_call);
43 46
44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, 47s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
45 u64 a4, u64 a5) 48 u64 a4, u64 a5)
@@ -73,11 +76,14 @@ long sn_coherency_id;
73EXPORT_SYMBOL_GPL(sn_coherency_id); 76EXPORT_SYMBOL_GPL(sn_coherency_id);
74long sn_region_size; 77long sn_region_size;
75EXPORT_SYMBOL_GPL(sn_region_size); 78EXPORT_SYMBOL_GPL(sn_region_size);
79long system_serial_number;
80EXPORT_SYMBOL_GPL(system_serial_number);
76int uv_type; 81int uv_type;
82EXPORT_SYMBOL_GPL(uv_type);
77 83
78 84
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, 85s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region) 86 long *region, long *ssn)
81{ 87{
82 s64 ret; 88 s64 ret;
83 u64 v0, v1; 89 u64 v0, v1;
@@ -97,8 +103,11 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
97 *coher = part.coherence_id; 103 *coher = part.coherence_id;
98 if (region) 104 if (region)
99 *region = part.region_size; 105 *region = part.region_size;
106 if (ssn)
107 *ssn = v1;
100 return ret; 108 return ret;
101} 109}
110EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
102 111
103int 112int
104uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, 113uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -154,6 +163,25 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
154} 163}
155EXPORT_SYMBOL_GPL(uv_bios_freq_base); 164EXPORT_SYMBOL_GPL(uv_bios_freq_base);
156 165
166/*
167 * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target
168 * @decode: true to enable target, false to disable target
169 * @domain: PCI domain number
170 * @bus: PCI bus number
171 *
172 * Returns:
173 * 0: Success
174 * -EINVAL: Invalid domain or bus number
175 * -ENOSYS: Capability not available
176 * -EBUSY: Legacy VGA I/O cannot be retargeted at this time
177 */
178int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
179{
180 return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET,
181 (u64)decode, (u64)domain, (u64)bus, 0, 0);
182}
183EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
184
157 185
158#ifdef CONFIG_EFI 186#ifdef CONFIG_EFI
159void uv_bios_init(void) 187void uv_bios_init(void)
@@ -185,4 +213,3 @@ void uv_bios_init(void)
185 213
186void uv_bios_init(void) { } 214void uv_bios_init(void) { }
187#endif 215#endif
188
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 1d2cb383410e..c202b62f3671 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -19,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o
19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 19obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
20obj-$(CONFIG_X86_64) += bugs_64.o 20obj-$(CONFIG_X86_64) += bugs_64.o
21 21
22obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o
23
24obj-$(CONFIG_CPU_SUP_INTEL) += intel.o 22obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
25obj-$(CONFIG_CPU_SUP_AMD) += amd.o 23obj-$(CONFIG_CPU_SUP_AMD) += amd.o
26obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o 24obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 468489b57aae..97ad79cdf688 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { 32 static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, 33 { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, 34 { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
35 { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a },
36 { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a },
37 { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a },
38 { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
35 { 0, 0, 0, 0 } 39 { 0, 0, 0, 0 }
36 }; 40 };
37 41
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
deleted file mode 100644
index b368cd862997..000000000000
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ /dev/null
@@ -1,688 +0,0 @@
1/*
2 * CPU x86 architecture debug code
3 *
4 * Copyright(C) 2009 Jaswinder Singh Rajput
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9#include <linux/interrupt.h>
10#include <linux/compiler.h>
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kprobes.h>
14#include <linux/uaccess.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/percpu.h>
18#include <linux/signal.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/types.h>
22#include <linux/init.h>
23#include <linux/slab.h>
24#include <linux/smp.h>
25
26#include <asm/cpu_debug.h>
27#include <asm/paravirt.h>
28#include <asm/system.h>
29#include <asm/traps.h>
30#include <asm/apic.h>
31#include <asm/desc.h>
32
33static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr);
34static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr);
35static DEFINE_PER_CPU(int, cpud_priv_count);
36
37static DEFINE_MUTEX(cpu_debug_lock);
38
39static struct dentry *cpu_debugfs_dir;
40
41static struct cpu_debug_base cpu_base[] = {
42 { "mc", CPU_MC, 0 },
43 { "monitor", CPU_MONITOR, 0 },
44 { "time", CPU_TIME, 0 },
45 { "pmc", CPU_PMC, 1 },
46 { "platform", CPU_PLATFORM, 0 },
47 { "apic", CPU_APIC, 0 },
48 { "poweron", CPU_POWERON, 0 },
49 { "control", CPU_CONTROL, 0 },
50 { "features", CPU_FEATURES, 0 },
51 { "lastbranch", CPU_LBRANCH, 0 },
52 { "bios", CPU_BIOS, 0 },
53 { "freq", CPU_FREQ, 0 },
54 { "mtrr", CPU_MTRR, 0 },
55 { "perf", CPU_PERF, 0 },
56 { "cache", CPU_CACHE, 0 },
57 { "sysenter", CPU_SYSENTER, 0 },
58 { "therm", CPU_THERM, 0 },
59 { "misc", CPU_MISC, 0 },
60 { "debug", CPU_DEBUG, 0 },
61 { "pat", CPU_PAT, 0 },
62 { "vmx", CPU_VMX, 0 },
63 { "call", CPU_CALL, 0 },
64 { "base", CPU_BASE, 0 },
65 { "ver", CPU_VER, 0 },
66 { "conf", CPU_CONF, 0 },
67 { "smm", CPU_SMM, 0 },
68 { "svm", CPU_SVM, 0 },
69 { "osvm", CPU_OSVM, 0 },
70 { "tss", CPU_TSS, 0 },
71 { "cr", CPU_CR, 0 },
72 { "dt", CPU_DT, 0 },
73 { "registers", CPU_REG_ALL, 0 },
74};
75
76static struct cpu_file_base cpu_file[] = {
77 { "index", CPU_REG_ALL, 0 },
78 { "value", CPU_REG_ALL, 1 },
79};
80
81/* CPU Registers Range */
82static struct cpu_debug_range cpu_reg_range[] = {
83 { 0x00000000, 0x00000001, CPU_MC, },
84 { 0x00000006, 0x00000007, CPU_MONITOR, },
85 { 0x00000010, 0x00000010, CPU_TIME, },
86 { 0x00000011, 0x00000013, CPU_PMC, },
87 { 0x00000017, 0x00000017, CPU_PLATFORM, },
88 { 0x0000001B, 0x0000001B, CPU_APIC, },
89 { 0x0000002A, 0x0000002B, CPU_POWERON, },
90 { 0x0000002C, 0x0000002C, CPU_FREQ, },
91 { 0x0000003A, 0x0000003A, CPU_CONTROL, },
92 { 0x00000040, 0x00000047, CPU_LBRANCH, },
93 { 0x00000060, 0x00000067, CPU_LBRANCH, },
94 { 0x00000079, 0x00000079, CPU_BIOS, },
95 { 0x00000088, 0x0000008A, CPU_CACHE, },
96 { 0x0000008B, 0x0000008B, CPU_BIOS, },
97 { 0x0000009B, 0x0000009B, CPU_MONITOR, },
98 { 0x000000C1, 0x000000C4, CPU_PMC, },
99 { 0x000000CD, 0x000000CD, CPU_FREQ, },
100 { 0x000000E7, 0x000000E8, CPU_PERF, },
101 { 0x000000FE, 0x000000FE, CPU_MTRR, },
102
103 { 0x00000116, 0x0000011E, CPU_CACHE, },
104 { 0x00000174, 0x00000176, CPU_SYSENTER, },
105 { 0x00000179, 0x0000017B, CPU_MC, },
106 { 0x00000186, 0x00000189, CPU_PMC, },
107 { 0x00000198, 0x00000199, CPU_PERF, },
108 { 0x0000019A, 0x0000019A, CPU_TIME, },
109 { 0x0000019B, 0x0000019D, CPU_THERM, },
110 { 0x000001A0, 0x000001A0, CPU_MISC, },
111 { 0x000001C9, 0x000001C9, CPU_LBRANCH, },
112 { 0x000001D7, 0x000001D8, CPU_LBRANCH, },
113 { 0x000001D9, 0x000001D9, CPU_DEBUG, },
114 { 0x000001DA, 0x000001E0, CPU_LBRANCH, },
115
116 { 0x00000200, 0x0000020F, CPU_MTRR, },
117 { 0x00000250, 0x00000250, CPU_MTRR, },
118 { 0x00000258, 0x00000259, CPU_MTRR, },
119 { 0x00000268, 0x0000026F, CPU_MTRR, },
120 { 0x00000277, 0x00000277, CPU_PAT, },
121 { 0x000002FF, 0x000002FF, CPU_MTRR, },
122
123 { 0x00000300, 0x00000311, CPU_PMC, },
124 { 0x00000345, 0x00000345, CPU_PMC, },
125 { 0x00000360, 0x00000371, CPU_PMC, },
126 { 0x0000038D, 0x00000390, CPU_PMC, },
127 { 0x000003A0, 0x000003BE, CPU_PMC, },
128 { 0x000003C0, 0x000003CD, CPU_PMC, },
129 { 0x000003E0, 0x000003E1, CPU_PMC, },
130 { 0x000003F0, 0x000003F2, CPU_PMC, },
131
132 { 0x00000400, 0x00000417, CPU_MC, },
133 { 0x00000480, 0x0000048B, CPU_VMX, },
134
135 { 0x00000600, 0x00000600, CPU_DEBUG, },
136 { 0x00000680, 0x0000068F, CPU_LBRANCH, },
137 { 0x000006C0, 0x000006CF, CPU_LBRANCH, },
138
139 { 0x000107CC, 0x000107D3, CPU_PMC, },
140
141 { 0xC0000080, 0xC0000080, CPU_FEATURES, },
142 { 0xC0000081, 0xC0000084, CPU_CALL, },
143 { 0xC0000100, 0xC0000102, CPU_BASE, },
144 { 0xC0000103, 0xC0000103, CPU_TIME, },
145
146 { 0xC0010000, 0xC0010007, CPU_PMC, },
147 { 0xC0010010, 0xC0010010, CPU_CONF, },
148 { 0xC0010015, 0xC0010015, CPU_CONF, },
149 { 0xC0010016, 0xC001001A, CPU_MTRR, },
150 { 0xC001001D, 0xC001001D, CPU_MTRR, },
151 { 0xC001001F, 0xC001001F, CPU_CONF, },
152 { 0xC0010030, 0xC0010035, CPU_BIOS, },
153 { 0xC0010044, 0xC0010048, CPU_MC, },
154 { 0xC0010050, 0xC0010056, CPU_SMM, },
155 { 0xC0010058, 0xC0010058, CPU_CONF, },
156 { 0xC0010060, 0xC0010060, CPU_CACHE, },
157 { 0xC0010061, 0xC0010068, CPU_SMM, },
158 { 0xC0010069, 0xC001006B, CPU_SMM, },
159 { 0xC0010070, 0xC0010071, CPU_SMM, },
160 { 0xC0010111, 0xC0010113, CPU_SMM, },
161 { 0xC0010114, 0xC0010118, CPU_SVM, },
162 { 0xC0010140, 0xC0010141, CPU_OSVM, },
163 { 0xC0011022, 0xC0011023, CPU_CONF, },
164};
165
166static int is_typeflag_valid(unsigned cpu, unsigned flag)
167{
168 int i;
169
170 /* Standard Registers should be always valid */
171 if (flag >= CPU_TSS)
172 return 1;
173
174 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
175 if (cpu_reg_range[i].flag == flag)
176 return 1;
177 }
178
179 /* Invalid */
180 return 0;
181}
182
183static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
184 int index, unsigned flag)
185{
186 if (cpu_reg_range[index].flag == flag) {
187 *min = cpu_reg_range[index].min;
188 *max = cpu_reg_range[index].max;
189 } else
190 *max = 0;
191
192 return *max;
193}
194
195/* This function can also be called with seq = NULL for printk */
196static void print_cpu_data(struct seq_file *seq, unsigned type,
197 u32 low, u32 high)
198{
199 struct cpu_private *priv;
200 u64 val = high;
201
202 if (seq) {
203 priv = seq->private;
204 if (priv->file) {
205 val = (val << 32) | low;
206 seq_printf(seq, "0x%llx\n", val);
207 } else
208 seq_printf(seq, " %08x: %08x_%08x\n",
209 type, high, low);
210 } else
211 printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
212}
213
214/* This function can also be called with seq = NULL for printk */
215static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
216{
217 unsigned msr, msr_min, msr_max;
218 struct cpu_private *priv;
219 u32 low, high;
220 int i;
221
222 if (seq) {
223 priv = seq->private;
224 if (priv->file) {
225 if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
226 &low, &high))
227 print_cpu_data(seq, priv->reg, low, high);
228 return;
229 }
230 }
231
232 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
233 if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
234 continue;
235
236 for (msr = msr_min; msr <= msr_max; msr++) {
237 if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
238 continue;
239 print_cpu_data(seq, msr, low, high);
240 }
241 }
242}
243
244static void print_tss(void *arg)
245{
246 struct pt_regs *regs = task_pt_regs(current);
247 struct seq_file *seq = arg;
248 unsigned int seg;
249
250 seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
251 seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
252 seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
253 seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
254
255 seq_printf(seq, " RSI\t: %016lx\n", regs->si);
256 seq_printf(seq, " RDI\t: %016lx\n", regs->di);
257 seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
258 seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
259
260#ifdef CONFIG_X86_64
261 seq_printf(seq, " R08\t: %016lx\n", regs->r8);
262 seq_printf(seq, " R09\t: %016lx\n", regs->r9);
263 seq_printf(seq, " R10\t: %016lx\n", regs->r10);
264 seq_printf(seq, " R11\t: %016lx\n", regs->r11);
265 seq_printf(seq, " R12\t: %016lx\n", regs->r12);
266 seq_printf(seq, " R13\t: %016lx\n", regs->r13);
267 seq_printf(seq, " R14\t: %016lx\n", regs->r14);
268 seq_printf(seq, " R15\t: %016lx\n", regs->r15);
269#endif
270
271 asm("movl %%cs,%0" : "=r" (seg));
272 seq_printf(seq, " CS\t: %04x\n", seg);
273 asm("movl %%ds,%0" : "=r" (seg));
274 seq_printf(seq, " DS\t: %04x\n", seg);
275 seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff);
276 asm("movl %%es,%0" : "=r" (seg));
277 seq_printf(seq, " ES\t: %04x\n", seg);
278 asm("movl %%fs,%0" : "=r" (seg));
279 seq_printf(seq, " FS\t: %04x\n", seg);
280 asm("movl %%gs,%0" : "=r" (seg));
281 seq_printf(seq, " GS\t: %04x\n", seg);
282
283 seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
284
285 seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
286}
287
288static void print_cr(void *arg)
289{
290 struct seq_file *seq = arg;
291
292 seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
293 seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
294 seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
295 seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
296#ifdef CONFIG_X86_64
297 seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
298#endif
299}
300
301static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
302{
303 seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
304}
305
306static void print_dt(void *seq)
307{
308 struct desc_ptr dt;
309 unsigned long ldt;
310
311 /* IDT */
312 store_idt((struct desc_ptr *)&dt);
313 print_desc_ptr("IDT", seq, dt);
314
315 /* GDT */
316 store_gdt((struct desc_ptr *)&dt);
317 print_desc_ptr("GDT", seq, dt);
318
319 /* LDT */
320 store_ldt(ldt);
321 seq_printf(seq, " LDT\t: %016lx\n", ldt);
322
323 /* TR */
324 store_tr(ldt);
325 seq_printf(seq, " TR\t: %016lx\n", ldt);
326}
327
328static void print_dr(void *arg)
329{
330 struct seq_file *seq = arg;
331 unsigned long dr;
332 int i;
333
334 for (i = 0; i < 8; i++) {
335 /* Ignore db4, db5 */
336 if ((i == 4) || (i == 5))
337 continue;
338 get_debugreg(dr, i);
339 seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
340 }
341
342 seq_printf(seq, "\n MSR\t:\n");
343}
344
345static void print_apic(void *arg)
346{
347 struct seq_file *seq = arg;
348
349#ifdef CONFIG_X86_LOCAL_APIC
350 seq_printf(seq, " LAPIC\t:\n");
351 seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24);
352 seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR));
353 seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI));
354 seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI));
355 seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI));
356 seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR));
357 seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR));
358 seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV));
359 seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR));
360 seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR));
361 seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR));
362 seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2));
363 seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT));
364 seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR));
365 seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC));
366 seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0));
367 seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1));
368 seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR));
369 seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT));
370 seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT));
371 seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR));
372 if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
373 unsigned int i, v, maxeilvt;
374
375 v = apic_read(APIC_EFEAT);
376 maxeilvt = (v >> 16) & 0xff;
377 seq_printf(seq, " EFEAT\t\t: %08x\n", v);
378 seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
379
380 for (i = 0; i < maxeilvt; i++) {
381 v = apic_read(APIC_EILVTn(i));
382 seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
383 }
384 }
385#endif /* CONFIG_X86_LOCAL_APIC */
386 seq_printf(seq, "\n MSR\t:\n");
387}
388
389static int cpu_seq_show(struct seq_file *seq, void *v)
390{
391 struct cpu_private *priv = seq->private;
392
393 if (priv == NULL)
394 return -EINVAL;
395
396 switch (cpu_base[priv->type].flag) {
397 case CPU_TSS:
398 smp_call_function_single(priv->cpu, print_tss, seq, 1);
399 break;
400 case CPU_CR:
401 smp_call_function_single(priv->cpu, print_cr, seq, 1);
402 break;
403 case CPU_DT:
404 smp_call_function_single(priv->cpu, print_dt, seq, 1);
405 break;
406 case CPU_DEBUG:
407 if (priv->file == CPU_INDEX_BIT)
408 smp_call_function_single(priv->cpu, print_dr, seq, 1);
409 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
410 break;
411 case CPU_APIC:
412 if (priv->file == CPU_INDEX_BIT)
413 smp_call_function_single(priv->cpu, print_apic, seq, 1);
414 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
415 break;
416
417 default:
418 print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
419 break;
420 }
421 seq_printf(seq, "\n");
422
423 return 0;
424}
425
426static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
427{
428 if (*pos == 0) /* One time is enough ;-) */
429 return seq;
430
431 return NULL;
432}
433
434static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
435{
436 (*pos)++;
437
438 return cpu_seq_start(seq, pos);
439}
440
441static void cpu_seq_stop(struct seq_file *seq, void *v)
442{
443}
444
445static const struct seq_operations cpu_seq_ops = {
446 .start = cpu_seq_start,
447 .next = cpu_seq_next,
448 .stop = cpu_seq_stop,
449 .show = cpu_seq_show,
450};
451
452static int cpu_seq_open(struct inode *inode, struct file *file)
453{
454 struct cpu_private *priv = inode->i_private;
455 struct seq_file *seq;
456 int err;
457
458 err = seq_open(file, &cpu_seq_ops);
459 if (!err) {
460 seq = file->private_data;
461 seq->private = priv;
462 }
463
464 return err;
465}
466
467static int write_msr(struct cpu_private *priv, u64 val)
468{
469 u32 low, high;
470
471 high = (val >> 32) & 0xffffffff;
472 low = val & 0xffffffff;
473
474 if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
475 return 0;
476
477 return -EPERM;
478}
479
480static int write_cpu_register(struct cpu_private *priv, const char *buf)
481{
482 int ret = -EPERM;
483 u64 val;
484
485 ret = strict_strtoull(buf, 0, &val);
486 if (ret < 0)
487 return ret;
488
489 /* Supporting only MSRs */
490 if (priv->type < CPU_TSS_BIT)
491 return write_msr(priv, val);
492
493 return ret;
494}
495
496static ssize_t cpu_write(struct file *file, const char __user *ubuf,
497 size_t count, loff_t *off)
498{
499 struct seq_file *seq = file->private_data;
500 struct cpu_private *priv = seq->private;
501 char buf[19];
502
503 if ((priv == NULL) || (count >= sizeof(buf)))
504 return -EINVAL;
505
506 if (copy_from_user(&buf, ubuf, count))
507 return -EFAULT;
508
509 buf[count] = 0;
510
511 if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
512 if (!write_cpu_register(priv, buf))
513 return count;
514
515 return -EACCES;
516}
517
518static const struct file_operations cpu_fops = {
519 .owner = THIS_MODULE,
520 .open = cpu_seq_open,
521 .read = seq_read,
522 .write = cpu_write,
523 .llseek = seq_lseek,
524 .release = seq_release,
525};
526
527static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
528 unsigned file, struct dentry *dentry)
529{
530 struct cpu_private *priv = NULL;
531
532 /* Already intialized */
533 if (file == CPU_INDEX_BIT)
534 if (per_cpu(cpud_arr[type].init, cpu))
535 return 0;
536
537 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
538 if (priv == NULL)
539 return -ENOMEM;
540
541 priv->cpu = cpu;
542 priv->type = type;
543 priv->reg = reg;
544 priv->file = file;
545 mutex_lock(&cpu_debug_lock);
546 per_cpu(cpud_priv_arr[type], cpu) = priv;
547 per_cpu(cpud_priv_count, cpu)++;
548 mutex_unlock(&cpu_debug_lock);
549
550 if (file)
551 debugfs_create_file(cpu_file[file].name, S_IRUGO,
552 dentry, (void *)priv, &cpu_fops);
553 else {
554 debugfs_create_file(cpu_base[type].name, S_IRUGO,
555 per_cpu(cpud_arr[type].dentry, cpu),
556 (void *)priv, &cpu_fops);
557 mutex_lock(&cpu_debug_lock);
558 per_cpu(cpud_arr[type].init, cpu) = 1;
559 mutex_unlock(&cpu_debug_lock);
560 }
561
562 return 0;
563}
564
565static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
566 struct dentry *dentry)
567{
568 unsigned file;
569 int err = 0;
570
571 for (file = 0; file < ARRAY_SIZE(cpu_file); file++) {
572 err = cpu_create_file(cpu, type, reg, file, dentry);
573 if (err)
574 return err;
575 }
576
577 return err;
578}
579
580static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
581{
582 struct dentry *cpu_dentry = NULL;
583 unsigned reg, reg_min, reg_max;
584 int i, err = 0;
585 char reg_dir[12];
586 u32 low, high;
587
588 for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
589 if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
590 cpu_base[type].flag))
591 continue;
592
593 for (reg = reg_min; reg <= reg_max; reg++) {
594 if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
595 continue;
596
597 sprintf(reg_dir, "0x%x", reg);
598 cpu_dentry = debugfs_create_dir(reg_dir, dentry);
599 err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
600 if (err)
601 return err;
602 }
603 }
604
605 return err;
606}
607
608static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
609{
610 struct dentry *cpu_dentry = NULL;
611 unsigned type;
612 int err = 0;
613
614 for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) {
615 if (!is_typeflag_valid(cpu, cpu_base[type].flag))
616 continue;
617 cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
618 per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry;
619
620 if (type < CPU_TSS_BIT)
621 err = cpu_init_msr(cpu, type, cpu_dentry);
622 else
623 err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
624 cpu_dentry);
625 if (err)
626 return err;
627 }
628
629 return err;
630}
631
632static int cpu_init_cpu(void)
633{
634 struct dentry *cpu_dentry = NULL;
635 struct cpuinfo_x86 *cpui;
636 char cpu_dir[12];
637 unsigned cpu;
638 int err = 0;
639
640 for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
641 cpui = &cpu_data(cpu);
642 if (!cpu_has(cpui, X86_FEATURE_MSR))
643 continue;
644
645 sprintf(cpu_dir, "cpu%d", cpu);
646 cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
647 err = cpu_init_allreg(cpu, cpu_dentry);
648
649 pr_info("cpu%d(%d) debug files %d\n",
650 cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu));
651 if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) {
652 pr_err("Register files count %d exceeds limit %d\n",
653 per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES);
654 per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES;
655 err = -ENFILE;
656 }
657 if (err)
658 return err;
659 }
660
661 return err;
662}
663
664static int __init cpu_debug_init(void)
665{
666 cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
667
668 return cpu_init_cpu();
669}
670
671static void __exit cpu_debug_exit(void)
672{
673 int i, cpu;
674
675 if (cpu_debugfs_dir)
676 debugfs_remove_recursive(cpu_debugfs_dir);
677
678 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
679 for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++)
680 kfree(per_cpu(cpud_priv_arr[i], cpu));
681}
682
683module_init(cpu_debug_init);
684module_exit(cpu_debug_exit);
685
686MODULE_AUTHOR("Jaswinder Singh Rajput");
687MODULE_DESCRIPTION("CPU Debug module");
688MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 55d42bc443e8..d360b56e9825 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
1356 1356
1357 kfree(data->powernow_table); 1357 kfree(data->powernow_table);
1358 kfree(data); 1358 kfree(data);
1359 per_cpu(powernow_data, pol->cpu) = NULL;
1359 1360
1360 return 0; 1361 return 0;
1361} 1362}
@@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu)
1375 int err; 1376 int err;
1376 1377
1377 if (!data) 1378 if (!data)
1378 return -EINVAL; 1379 return 0;
1379 1380
1380 smp_call_function_single(cpu, query_values_on_cpu, &err, true); 1381 smp_call_function_single(cpu, query_values_on_cpu, &err, true);
1381 if (err) 1382 if (err)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc6c8ef92dcc..eddb1bdd1b8f 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21#include <asm/smp.h>
21 22
22#define LVL_1_INST 1 23#define LVL_1_INST 1
23#define LVL_1_DATA 2 24#define LVL_1_DATA 2
@@ -31,6 +32,8 @@ struct _cache_table {
31 short size; 32 short size;
32}; 33};
33 34
35#define MB(x) ((x) * 1024)
36
34/* All the cache descriptor types we care about (no TLB or 37/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */ 38 trace cache entries) */
36 39
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
44 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
45 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
46 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
47 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
48 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
49 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
51 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
52 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] =
59 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
60 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
61 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
62 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
63 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
64 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
65 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
66 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
67 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
68 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
69 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
70 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
71 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ 74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
72 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
73 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
74 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -77,34 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] =
77 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
78 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
79 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
80 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
81 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
82 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
83 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
84 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
86 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
87 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
88 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
89 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
90 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
91 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
92 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
93 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
94 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ 97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
95 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ 98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
96 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ 99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
97 { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ 100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
98 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
99 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ 102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
100 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
101 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ 104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
102 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ 105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
103 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
104 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
105 { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ 108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
106 { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ 109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
107 { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ 110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
108 { 0x00, 0, 0} 111 { 0x00, 0, 0}
109}; 112};
110 113
@@ -150,7 +153,8 @@ struct _cpuid4_info {
150 union _cpuid4_leaf_ebx ebx; 153 union _cpuid4_leaf_ebx ebx;
151 union _cpuid4_leaf_ecx ecx; 154 union _cpuid4_leaf_ecx ecx;
152 unsigned long size; 155 unsigned long size;
153 unsigned long can_disable; 156 bool can_disable;
157 unsigned int l3_indices;
154 DECLARE_BITMAP(shared_cpu_map, NR_CPUS); 158 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
155}; 159};
156 160
@@ -160,7 +164,8 @@ struct _cpuid4_info_regs {
160 union _cpuid4_leaf_ebx ebx; 164 union _cpuid4_leaf_ebx ebx;
161 union _cpuid4_leaf_ecx ecx; 165 union _cpuid4_leaf_ecx ecx;
162 unsigned long size; 166 unsigned long size;
163 unsigned long can_disable; 167 bool can_disable;
168 unsigned int l3_indices;
164}; 169};
165 170
166unsigned short num_cache_leaves; 171unsigned short num_cache_leaves;
@@ -290,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
290 (ebx->split.ways_of_associativity + 1) - 1; 295 (ebx->split.ways_of_associativity + 1) - 1;
291} 296}
292 297
298struct _cache_attr {
299 struct attribute attr;
300 ssize_t (*show)(struct _cpuid4_info *, char *);
301 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
302};
303
304#ifdef CONFIG_CPU_SUP_AMD
305static unsigned int __cpuinit amd_calc_l3_indices(void)
306{
307 /*
308 * We're called over smp_call_function_single() and therefore
309 * are on the correct cpu.
310 */
311 int cpu = smp_processor_id();
312 int node = cpu_to_node(cpu);
313 struct pci_dev *dev = node_to_k8_nb_misc(node);
314 unsigned int sc0, sc1, sc2, sc3;
315 u32 val = 0;
316
317 pci_read_config_dword(dev, 0x1C4, &val);
318
319 /* calculate subcache sizes */
320 sc0 = !(val & BIT(0));
321 sc1 = !(val & BIT(4));
322 sc2 = !(val & BIT(8)) + !(val & BIT(9));
323 sc3 = !(val & BIT(12)) + !(val & BIT(13));
324
325 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
326}
327
293static void __cpuinit 328static void __cpuinit
294amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 329amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
295{ 330{
@@ -299,12 +334,103 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
299 if (boot_cpu_data.x86 == 0x11) 334 if (boot_cpu_data.x86 == 0x11)
300 return; 335 return;
301 336
302 /* see erratum #382 */ 337 /* see errata #382 and #388 */
303 if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) 338 if ((boot_cpu_data.x86 == 0x10) &&
339 ((boot_cpu_data.x86_model < 0x8) ||
340 (boot_cpu_data.x86_mask < 0x1)))
304 return; 341 return;
305 342
306 this_leaf->can_disable = 1; 343 this_leaf->can_disable = true;
344 this_leaf->l3_indices = amd_calc_l3_indices();
345}
346
347static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
348 unsigned int index)
349{
350 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
351 int node = amd_get_nb_id(cpu);
352 struct pci_dev *dev = node_to_k8_nb_misc(node);
353 unsigned int reg = 0;
354
355 if (!this_leaf->can_disable)
356 return -EINVAL;
357
358 if (!dev)
359 return -EINVAL;
360
361 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
362 return sprintf(buf, "0x%08x\n", reg);
363}
364
365#define SHOW_CACHE_DISABLE(index) \
366static ssize_t \
367show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
368{ \
369 return show_cache_disable(this_leaf, buf, index); \
307} 370}
371SHOW_CACHE_DISABLE(0)
372SHOW_CACHE_DISABLE(1)
373
374static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
375 const char *buf, size_t count, unsigned int index)
376{
377 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
378 int node = amd_get_nb_id(cpu);
379 struct pci_dev *dev = node_to_k8_nb_misc(node);
380 unsigned long val = 0;
381
382#define SUBCACHE_MASK (3UL << 20)
383#define SUBCACHE_INDEX 0xfff
384
385 if (!this_leaf->can_disable)
386 return -EINVAL;
387
388 if (!capable(CAP_SYS_ADMIN))
389 return -EPERM;
390
391 if (!dev)
392 return -EINVAL;
393
394 if (strict_strtoul(buf, 10, &val) < 0)
395 return -EINVAL;
396
397 /* do not allow writes outside of allowed bits */
398 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
399 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
400 return -EINVAL;
401
402 val |= BIT(30);
403 pci_write_config_dword(dev, 0x1BC + index * 4, val);
404 /*
405 * We need to WBINVD on a core on the node containing the L3 cache which
406 * indices we disable therefore a simple wbinvd() is not sufficient.
407 */
408 wbinvd_on_cpu(cpu);
409 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
410 return count;
411}
412
413#define STORE_CACHE_DISABLE(index) \
414static ssize_t \
415store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
416 const char *buf, size_t count) \
417{ \
418 return store_cache_disable(this_leaf, buf, count, index); \
419}
420STORE_CACHE_DISABLE(0)
421STORE_CACHE_DISABLE(1)
422
423static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
424 show_cache_disable_0, store_cache_disable_0);
425static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
426 show_cache_disable_1, store_cache_disable_1);
427
428#else /* CONFIG_CPU_SUP_AMD */
429static void __cpuinit
430amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
431{
432};
433#endif /* CONFIG_CPU_SUP_AMD */
308 434
309static int 435static int
310__cpuinit cpuid4_cache_lookup_regs(int index, 436__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -711,82 +837,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
711#define to_object(k) container_of(k, struct _index_kobject, kobj) 837#define to_object(k) container_of(k, struct _index_kobject, kobj)
712#define to_attr(a) container_of(a, struct _cache_attr, attr) 838#define to_attr(a) container_of(a, struct _cache_attr, attr)
713 839
714static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
715 unsigned int index)
716{
717 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
718 int node = cpu_to_node(cpu);
719 struct pci_dev *dev = node_to_k8_nb_misc(node);
720 unsigned int reg = 0;
721
722 if (!this_leaf->can_disable)
723 return -EINVAL;
724
725 if (!dev)
726 return -EINVAL;
727
728 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
729 return sprintf(buf, "%x\n", reg);
730}
731
732#define SHOW_CACHE_DISABLE(index) \
733static ssize_t \
734show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
735{ \
736 return show_cache_disable(this_leaf, buf, index); \
737}
738SHOW_CACHE_DISABLE(0)
739SHOW_CACHE_DISABLE(1)
740
741static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
742 const char *buf, size_t count, unsigned int index)
743{
744 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
745 int node = cpu_to_node(cpu);
746 struct pci_dev *dev = node_to_k8_nb_misc(node);
747 unsigned long val = 0;
748 unsigned int scrubber = 0;
749
750 if (!this_leaf->can_disable)
751 return -EINVAL;
752
753 if (!capable(CAP_SYS_ADMIN))
754 return -EPERM;
755
756 if (!dev)
757 return -EINVAL;
758
759 if (strict_strtoul(buf, 10, &val) < 0)
760 return -EINVAL;
761
762 val |= 0xc0000000;
763
764 pci_read_config_dword(dev, 0x58, &scrubber);
765 scrubber &= ~0x1f000000;
766 pci_write_config_dword(dev, 0x58, scrubber);
767
768 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
769 wbinvd();
770 pci_write_config_dword(dev, 0x1BC + index * 4, val);
771 return count;
772}
773
774#define STORE_CACHE_DISABLE(index) \
775static ssize_t \
776store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
777 const char *buf, size_t count) \
778{ \
779 return store_cache_disable(this_leaf, buf, count, index); \
780}
781STORE_CACHE_DISABLE(0)
782STORE_CACHE_DISABLE(1)
783
784struct _cache_attr {
785 struct attribute attr;
786 ssize_t (*show)(struct _cpuid4_info *, char *);
787 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
788};
789
790#define define_one_ro(_name) \ 840#define define_one_ro(_name) \
791static struct _cache_attr _name = \ 841static struct _cache_attr _name = \
792 __ATTR(_name, 0444, show_##_name, NULL) 842 __ATTR(_name, 0444, show_##_name, NULL)
@@ -801,23 +851,28 @@ define_one_ro(size);
801define_one_ro(shared_cpu_map); 851define_one_ro(shared_cpu_map);
802define_one_ro(shared_cpu_list); 852define_one_ro(shared_cpu_list);
803 853
804static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, 854#define DEFAULT_SYSFS_CACHE_ATTRS \
805 show_cache_disable_0, store_cache_disable_0); 855 &type.attr, \
806static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 856 &level.attr, \
807 show_cache_disable_1, store_cache_disable_1); 857 &coherency_line_size.attr, \
858 &physical_line_partition.attr, \
859 &ways_of_associativity.attr, \
860 &number_of_sets.attr, \
861 &size.attr, \
862 &shared_cpu_map.attr, \
863 &shared_cpu_list.attr
808 864
809static struct attribute *default_attrs[] = { 865static struct attribute *default_attrs[] = {
810 &type.attr, 866 DEFAULT_SYSFS_CACHE_ATTRS,
811 &level.attr, 867 NULL
812 &coherency_line_size.attr, 868};
813 &physical_line_partition.attr, 869
814 &ways_of_associativity.attr, 870static struct attribute *default_l3_attrs[] = {
815 &number_of_sets.attr, 871 DEFAULT_SYSFS_CACHE_ATTRS,
816 &size.attr, 872#ifdef CONFIG_CPU_SUP_AMD
817 &shared_cpu_map.attr,
818 &shared_cpu_list.attr,
819 &cache_disable_0.attr, 873 &cache_disable_0.attr,
820 &cache_disable_1.attr, 874 &cache_disable_1.attr,
875#endif
821 NULL 876 NULL
822}; 877};
823 878
@@ -908,6 +963,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
908 unsigned int cpu = sys_dev->id; 963 unsigned int cpu = sys_dev->id;
909 unsigned long i, j; 964 unsigned long i, j;
910 struct _index_kobject *this_object; 965 struct _index_kobject *this_object;
966 struct _cpuid4_info *this_leaf;
911 int retval; 967 int retval;
912 968
913 retval = cpuid4_cache_sysfs_init(cpu); 969 retval = cpuid4_cache_sysfs_init(cpu);
@@ -926,6 +982,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
926 this_object = INDEX_KOBJECT_PTR(cpu, i); 982 this_object = INDEX_KOBJECT_PTR(cpu, i);
927 this_object->cpu = cpu; 983 this_object->cpu = cpu;
928 this_object->index = i; 984 this_object->index = i;
985
986 this_leaf = CPUID4_INFO_IDX(cpu, i);
987
988 if (this_leaf->can_disable)
989 ktype_cache.default_attrs = default_l3_attrs;
990 else
991 ktype_cache.default_attrs = default_attrs;
992
929 retval = kobject_init_and_add(&(this_object->kobj), 993 retval = kobject_init_and_add(&(this_object->kobj),
930 &ktype_cache, 994 &ktype_cache,
931 per_cpu(ici_cache_kobject, cpu), 995 per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index f4361b56f8e9..ad9e5ed81181 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
1obj-y := main.o if.o generic.o state.o cleanup.o 1obj-y := main.o if.o generic.o cleanup.o
2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o 2obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
3 3
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 33af14110dfd..92ba9cd31c9a 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
108 return 0; 108 return 0;
109} 109}
110 110
111static struct mtrr_ops amd_mtrr_ops = { 111static const struct mtrr_ops amd_mtrr_ops = {
112 .vendor = X86_VENDOR_AMD, 112 .vendor = X86_VENDOR_AMD,
113 .set = amd_set_mtrr, 113 .set = amd_set_mtrr,
114 .get = amd_get_mtrr, 114 .get = amd_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index de89f14eff3a..316fe3e60a97 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t
110 return 0; 110 return 0;
111} 111}
112 112
113static struct mtrr_ops centaur_mtrr_ops = { 113static const struct mtrr_ops centaur_mtrr_ops = {
114 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
115 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
116 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 228d982ce09c..68a3343e5798 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -265,7 +265,7 @@ static void cyrix_set_all(void)
265 post_set(); 265 post_set();
266} 266}
267 267
268static struct mtrr_ops cyrix_mtrr_ops = { 268static const struct mtrr_ops cyrix_mtrr_ops = {
269 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
270 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
271 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 55da0c5f68dd..9aa5dc76ff4a 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -464,7 +464,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
464 tmp |= ~((1<<(hi - 1)) - 1); 464 tmp |= ~((1<<(hi - 1)) - 1);
465 465
466 if (tmp != mask_lo) { 466 if (tmp != mask_lo) {
467 WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); 467 printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
468 mask_lo = tmp; 468 mask_lo = tmp;
469 } 469 }
470 } 470 }
@@ -570,7 +570,7 @@ static unsigned long set_mtrr_state(void)
570 570
571 571
572static unsigned long cr4; 572static unsigned long cr4;
573static DEFINE_SPINLOCK(set_atomicity_lock); 573static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
574 574
575/* 575/*
576 * Since we are disabling the cache don't allow any interrupts, 576 * Since we are disabling the cache don't allow any interrupts,
@@ -590,7 +590,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
590 * changes to the way the kernel boots 590 * changes to the way the kernel boots
591 */ 591 */
592 592
593 spin_lock(&set_atomicity_lock); 593 raw_spin_lock(&set_atomicity_lock);
594 594
595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
596 cr0 = read_cr0() | X86_CR0_CD; 596 cr0 = read_cr0() | X86_CR0_CD;
@@ -627,7 +627,7 @@ static void post_set(void) __releases(set_atomicity_lock)
627 /* Restore value of CR4 */ 627 /* Restore value of CR4 */
628 if (cpu_has_pge) 628 if (cpu_has_pge)
629 write_cr4(cr4); 629 write_cr4(cr4);
630 spin_unlock(&set_atomicity_lock); 630 raw_spin_unlock(&set_atomicity_lock);
631} 631}
632 632
633static void generic_set_all(void) 633static void generic_set_all(void)
@@ -752,7 +752,7 @@ int positive_have_wrcomb(void)
752/* 752/*
753 * Generic structure... 753 * Generic structure...
754 */ 754 */
755struct mtrr_ops generic_mtrr_ops = { 755const struct mtrr_ops generic_mtrr_ops = {
756 .use_intel_if = 1, 756 .use_intel_if = 1,
757 .set_all = generic_set_all, 757 .set_all = generic_set_all,
758 .get = generic_get_mtrr, 758 .get = generic_get_mtrr,
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 84e83de54575..fe4622e8c837 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex);
60u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init; 61static bool mtrr_aps_delayed_init;
62 62
63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; 63static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
64 64
65struct mtrr_ops *mtrr_if; 65const struct mtrr_ops *mtrr_if;
66 66
67static void set_mtrr(unsigned int reg, unsigned long base, 67static void set_mtrr(unsigned int reg, unsigned long base,
68 unsigned long size, mtrr_type type); 68 unsigned long size, mtrr_type type);
69 69
70void set_mtrr_ops(struct mtrr_ops *ops) 70void set_mtrr_ops(const struct mtrr_ops *ops)
71{ 71{
72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
73 mtrr_ops[ops->vendor] = ops; 73 mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index a501dee9a87a..df5e41f31a27 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size,
32extern int generic_validate_add_page(unsigned long base, unsigned long size, 32extern int generic_validate_add_page(unsigned long base, unsigned long size,
33 unsigned int type); 33 unsigned int type);
34 34
35extern struct mtrr_ops generic_mtrr_ops; 35extern const struct mtrr_ops generic_mtrr_ops;
36 36
37extern int positive_have_wrcomb(void); 37extern int positive_have_wrcomb(void);
38 38
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
54void get_mtrr_state(void); 54void get_mtrr_state(void);
55 55
56extern void set_mtrr_ops(struct mtrr_ops *ops); 56extern void set_mtrr_ops(const struct mtrr_ops *ops);
57 57
58extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
59extern struct mtrr_ops *mtrr_if; 59extern const struct mtrr_ops *mtrr_if;
60 60
61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
deleted file mode 100644
index dfc80b4e6b0d..000000000000
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ /dev/null
@@ -1,94 +0,0 @@
1#include <linux/init.h>
2#include <linux/io.h>
3#include <linux/mm.h>
4
5#include <asm/processor-cyrix.h>
6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
10#include "mtrr.h"
11
12/* Put the processor into a state where MTRRs can be safely set */
13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
14{
15 unsigned int cr0;
16
17 /* Disable interrupts locally */
18 local_irq_save(ctxt->flags);
19
20 if (use_intel() || is_cpu(CYRIX)) {
21
22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
23 if (cpu_has_pge) {
24 ctxt->cr4val = read_cr4();
25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
26 }
27
28 /*
29 * Disable and flush caches. Note that wbinvd flushes the TLBs
30 * as a side-effect
31 */
32 cr0 = read_cr0() | X86_CR0_CD;
33 wbinvd();
34 write_cr0(cr0);
35 wbinvd();
36
37 if (use_intel()) {
38 /* Save MTRR state */
39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
40 } else {
41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
47 }
48}
49
50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
51{
52 if (use_intel()) {
53 /* Disable MTRRs, and set the default type to uncached */
54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
55 ctxt->deftype_hi);
56 } else {
57 if (is_cpu(CYRIX)) {
58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
62}
63
64/* Restore the processor after a set_mtrr_prepare */
65void set_mtrr_done(struct set_mtrr_context *ctxt)
66{
67 if (use_intel() || is_cpu(CYRIX)) {
68
69 /* Flush caches and TLBs */
70 wbinvd();
71
72 /* Restore MTRRdefType */
73 if (use_intel()) {
74 /* Intel (P6) standard MTRRs */
75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
76 ctxt->deftype_hi);
77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
84
85 /* Enable caches */
86 write_cr0(read_cr0() & 0xbfffffff);
87
88 /* Restore value of CR4 */
89 if (cpu_has_pge)
90 write_cr4(ctxt->cr4val);
91 }
92 /* Re-enable interrupts locally (if enabled previously) */
93 local_irq_restore(ctxt->flags);
94}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8c1c07073ccc..641ccb9dddbc 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -7,6 +7,7 @@
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter 7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> 9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
10 * 11 *
11 * For licencing details see kernel-base/COPYING 12 * For licencing details see kernel-base/COPYING
12 */ 13 */
@@ -22,6 +23,7 @@
22#include <linux/uaccess.h> 23#include <linux/uaccess.h>
23#include <linux/highmem.h> 24#include <linux/highmem.h>
24#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h>
25 27
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <asm/stacktrace.h> 29#include <asm/stacktrace.h>
@@ -68,26 +70,59 @@ struct debug_store {
68 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69}; 71};
70 72
73struct event_constraint {
74 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
76 u64 idxmsk64[1];
77 };
78 int code;
79 int cmask;
80 int weight;
81};
82
83struct amd_nb {
84 int nb_id; /* NorthBridge id */
85 int refcnt; /* reference count */
86 struct perf_event *owners[X86_PMC_IDX_MAX];
87 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
88};
89
71struct cpu_hw_events { 90struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX]; 91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts; 93 unsigned long interrupts;
76 int enabled; 94 int enabled;
77 struct debug_store *ds; 95 struct debug_store *ds;
78};
79 96
80struct event_constraint { 97 int n_events;
81 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 98 int n_added;
82 int code; 99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
102 struct amd_nb *amd_nb;
83}; 103};
84 104
85#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } 105#define __EVENT_CONSTRAINT(c, n, m, w) {\
86#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } 106 { .idxmsk64[0] = (n) }, \
107 .code = (c), \
108 .cmask = (m), \
109 .weight = (w), \
110}
111
112#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
87 114
88#define for_each_event_constraint(e, c) \ 115#define INTEL_EVENT_CONSTRAINT(c, n) \
89 for ((e) = (c); (e)->idxmsk[0]; (e)++) 116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
90 117
118#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
120
121#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0)
123
124#define for_each_event_constraint(e, c) \
125 for ((e) = (c); (e)->cmask; (e)++)
91 126
92/* 127/*
93 * struct x86_pmu - generic x86 pmu 128 * struct x86_pmu - generic x86 pmu
@@ -114,8 +149,14 @@ struct x86_pmu {
114 u64 intel_ctrl; 149 u64 intel_ctrl;
115 void (*enable_bts)(u64 config); 150 void (*enable_bts)(u64 config);
116 void (*disable_bts)(void); 151 void (*disable_bts)(void);
117 int (*get_event_idx)(struct cpu_hw_events *cpuc, 152
118 struct hw_perf_event *hwc); 153 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event);
156
157 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
158 struct perf_event *event);
159 struct event_constraint *event_constraints;
119}; 160};
120 161
121static struct x86_pmu x86_pmu __read_mostly; 162static struct x86_pmu x86_pmu __read_mostly;
@@ -124,111 +165,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
124 .enabled = 1, 165 .enabled = 1,
125}; 166};
126 167
127static const struct event_constraint *event_constraints; 168static int x86_perf_event_set_period(struct perf_event *event,
128 169 struct hw_perf_event *hwc, int idx);
129/*
130 * Not sure about some of these
131 */
132static const u64 p6_perfmon_event_map[] =
133{
134 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
135 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
136 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
137 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
138 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
139 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
140 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
141};
142
143static u64 p6_pmu_event_map(int hw_event)
144{
145 return p6_perfmon_event_map[hw_event];
146}
147
148/*
149 * Event setting that is specified not to count anything.
150 * We use this to effectively disable a counter.
151 *
152 * L2_RQSTS with 0 MESI unit mask.
153 */
154#define P6_NOP_EVENT 0x0000002EULL
155
156static u64 p6_pmu_raw_event(u64 hw_event)
157{
158#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
159#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
160#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
161#define P6_EVNTSEL_INV_MASK 0x00800000ULL
162#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
163
164#define P6_EVNTSEL_MASK \
165 (P6_EVNTSEL_EVENT_MASK | \
166 P6_EVNTSEL_UNIT_MASK | \
167 P6_EVNTSEL_EDGE_MASK | \
168 P6_EVNTSEL_INV_MASK | \
169 P6_EVNTSEL_REG_MASK)
170
171 return hw_event & P6_EVNTSEL_MASK;
172}
173
174static const struct event_constraint intel_p6_event_constraints[] =
175{
176 EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
177 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
178 EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
179 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
180 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
181 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
182 EVENT_CONSTRAINT_END
183};
184
185/*
186 * Intel PerfMon v3. Used on Core2 and later.
187 */
188static const u64 intel_perfmon_event_map[] =
189{
190 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
191 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
192 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
193 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
194 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
195 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
196 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
197};
198
199static const struct event_constraint intel_core_event_constraints[] =
200{
201 EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
202 EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
203 EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
204 EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
205 EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
206 EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
207 EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
208 EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
209 EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
210 EVENT_CONSTRAINT_END
211};
212
213static const struct event_constraint intel_nehalem_event_constraints[] =
214{
215 EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
216 EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
217 EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
218 EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
219 EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
220 EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
221 EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
222 EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
223 EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
224 EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
225 EVENT_CONSTRAINT_END
226};
227
228static u64 intel_pmu_event_map(int hw_event)
229{
230 return intel_perfmon_event_map[hw_event];
231}
232 170
233/* 171/*
234 * Generalized hw caching related hw_event table, filled 172 * Generalized hw caching related hw_event table, filled
@@ -245,424 +183,6 @@ static u64 __read_mostly hw_cache_event_ids
245 [PERF_COUNT_HW_CACHE_OP_MAX] 183 [PERF_COUNT_HW_CACHE_OP_MAX]
246 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 184 [PERF_COUNT_HW_CACHE_RESULT_MAX];
247 185
248static __initconst u64 nehalem_hw_cache_event_ids
249 [PERF_COUNT_HW_CACHE_MAX]
250 [PERF_COUNT_HW_CACHE_OP_MAX]
251 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
252{
253 [ C(L1D) ] = {
254 [ C(OP_READ) ] = {
255 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
256 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
257 },
258 [ C(OP_WRITE) ] = {
259 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
260 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
261 },
262 [ C(OP_PREFETCH) ] = {
263 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
264 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
265 },
266 },
267 [ C(L1I ) ] = {
268 [ C(OP_READ) ] = {
269 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
270 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
271 },
272 [ C(OP_WRITE) ] = {
273 [ C(RESULT_ACCESS) ] = -1,
274 [ C(RESULT_MISS) ] = -1,
275 },
276 [ C(OP_PREFETCH) ] = {
277 [ C(RESULT_ACCESS) ] = 0x0,
278 [ C(RESULT_MISS) ] = 0x0,
279 },
280 },
281 [ C(LL ) ] = {
282 [ C(OP_READ) ] = {
283 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
284 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
285 },
286 [ C(OP_WRITE) ] = {
287 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
288 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
289 },
290 [ C(OP_PREFETCH) ] = {
291 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
292 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
293 },
294 },
295 [ C(DTLB) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
298 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
299 },
300 [ C(OP_WRITE) ] = {
301 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
302 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
303 },
304 [ C(OP_PREFETCH) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0,
306 [ C(RESULT_MISS) ] = 0x0,
307 },
308 },
309 [ C(ITLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
312 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
313 },
314 [ C(OP_WRITE) ] = {
315 [ C(RESULT_ACCESS) ] = -1,
316 [ C(RESULT_MISS) ] = -1,
317 },
318 [ C(OP_PREFETCH) ] = {
319 [ C(RESULT_ACCESS) ] = -1,
320 [ C(RESULT_MISS) ] = -1,
321 },
322 },
323 [ C(BPU ) ] = {
324 [ C(OP_READ) ] = {
325 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
326 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static __initconst u64 core2_hw_cache_event_ids
340 [PERF_COUNT_HW_CACHE_MAX]
341 [PERF_COUNT_HW_CACHE_OP_MAX]
342 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
343{
344 [ C(L1D) ] = {
345 [ C(OP_READ) ] = {
346 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
347 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
348 },
349 [ C(OP_WRITE) ] = {
350 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
351 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
352 },
353 [ C(OP_PREFETCH) ] = {
354 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
355 [ C(RESULT_MISS) ] = 0,
356 },
357 },
358 [ C(L1I ) ] = {
359 [ C(OP_READ) ] = {
360 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
361 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
362 },
363 [ C(OP_WRITE) ] = {
364 [ C(RESULT_ACCESS) ] = -1,
365 [ C(RESULT_MISS) ] = -1,
366 },
367 [ C(OP_PREFETCH) ] = {
368 [ C(RESULT_ACCESS) ] = 0,
369 [ C(RESULT_MISS) ] = 0,
370 },
371 },
372 [ C(LL ) ] = {
373 [ C(OP_READ) ] = {
374 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
375 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
376 },
377 [ C(OP_WRITE) ] = {
378 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
379 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
380 },
381 [ C(OP_PREFETCH) ] = {
382 [ C(RESULT_ACCESS) ] = 0,
383 [ C(RESULT_MISS) ] = 0,
384 },
385 },
386 [ C(DTLB) ] = {
387 [ C(OP_READ) ] = {
388 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
389 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
390 },
391 [ C(OP_WRITE) ] = {
392 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
393 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
394 },
395 [ C(OP_PREFETCH) ] = {
396 [ C(RESULT_ACCESS) ] = 0,
397 [ C(RESULT_MISS) ] = 0,
398 },
399 },
400 [ C(ITLB) ] = {
401 [ C(OP_READ) ] = {
402 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
403 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
404 },
405 [ C(OP_WRITE) ] = {
406 [ C(RESULT_ACCESS) ] = -1,
407 [ C(RESULT_MISS) ] = -1,
408 },
409 [ C(OP_PREFETCH) ] = {
410 [ C(RESULT_ACCESS) ] = -1,
411 [ C(RESULT_MISS) ] = -1,
412 },
413 },
414 [ C(BPU ) ] = {
415 [ C(OP_READ) ] = {
416 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
417 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
418 },
419 [ C(OP_WRITE) ] = {
420 [ C(RESULT_ACCESS) ] = -1,
421 [ C(RESULT_MISS) ] = -1,
422 },
423 [ C(OP_PREFETCH) ] = {
424 [ C(RESULT_ACCESS) ] = -1,
425 [ C(RESULT_MISS) ] = -1,
426 },
427 },
428};
429
430static __initconst u64 atom_hw_cache_event_ids
431 [PERF_COUNT_HW_CACHE_MAX]
432 [PERF_COUNT_HW_CACHE_OP_MAX]
433 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
434{
435 [ C(L1D) ] = {
436 [ C(OP_READ) ] = {
437 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
438 [ C(RESULT_MISS) ] = 0,
439 },
440 [ C(OP_WRITE) ] = {
441 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
442 [ C(RESULT_MISS) ] = 0,
443 },
444 [ C(OP_PREFETCH) ] = {
445 [ C(RESULT_ACCESS) ] = 0x0,
446 [ C(RESULT_MISS) ] = 0,
447 },
448 },
449 [ C(L1I ) ] = {
450 [ C(OP_READ) ] = {
451 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
452 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
453 },
454 [ C(OP_WRITE) ] = {
455 [ C(RESULT_ACCESS) ] = -1,
456 [ C(RESULT_MISS) ] = -1,
457 },
458 [ C(OP_PREFETCH) ] = {
459 [ C(RESULT_ACCESS) ] = 0,
460 [ C(RESULT_MISS) ] = 0,
461 },
462 },
463 [ C(LL ) ] = {
464 [ C(OP_READ) ] = {
465 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
466 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
470 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
471 },
472 [ C(OP_PREFETCH) ] = {
473 [ C(RESULT_ACCESS) ] = 0,
474 [ C(RESULT_MISS) ] = 0,
475 },
476 },
477 [ C(DTLB) ] = {
478 [ C(OP_READ) ] = {
479 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
480 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
481 },
482 [ C(OP_WRITE) ] = {
483 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
484 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
485 },
486 [ C(OP_PREFETCH) ] = {
487 [ C(RESULT_ACCESS) ] = 0,
488 [ C(RESULT_MISS) ] = 0,
489 },
490 },
491 [ C(ITLB) ] = {
492 [ C(OP_READ) ] = {
493 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
494 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
495 },
496 [ C(OP_WRITE) ] = {
497 [ C(RESULT_ACCESS) ] = -1,
498 [ C(RESULT_MISS) ] = -1,
499 },
500 [ C(OP_PREFETCH) ] = {
501 [ C(RESULT_ACCESS) ] = -1,
502 [ C(RESULT_MISS) ] = -1,
503 },
504 },
505 [ C(BPU ) ] = {
506 [ C(OP_READ) ] = {
507 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
508 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
509 },
510 [ C(OP_WRITE) ] = {
511 [ C(RESULT_ACCESS) ] = -1,
512 [ C(RESULT_MISS) ] = -1,
513 },
514 [ C(OP_PREFETCH) ] = {
515 [ C(RESULT_ACCESS) ] = -1,
516 [ C(RESULT_MISS) ] = -1,
517 },
518 },
519};
520
521static u64 intel_pmu_raw_event(u64 hw_event)
522{
523#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
524#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
525#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
526#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
527#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
528
529#define CORE_EVNTSEL_MASK \
530 (CORE_EVNTSEL_EVENT_MASK | \
531 CORE_EVNTSEL_UNIT_MASK | \
532 CORE_EVNTSEL_EDGE_MASK | \
533 CORE_EVNTSEL_INV_MASK | \
534 CORE_EVNTSEL_REG_MASK)
535
536 return hw_event & CORE_EVNTSEL_MASK;
537}
538
539static __initconst u64 amd_hw_cache_event_ids
540 [PERF_COUNT_HW_CACHE_MAX]
541 [PERF_COUNT_HW_CACHE_OP_MAX]
542 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
543{
544 [ C(L1D) ] = {
545 [ C(OP_READ) ] = {
546 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
547 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
548 },
549 [ C(OP_WRITE) ] = {
550 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
551 [ C(RESULT_MISS) ] = 0,
552 },
553 [ C(OP_PREFETCH) ] = {
554 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
555 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
556 },
557 },
558 [ C(L1I ) ] = {
559 [ C(OP_READ) ] = {
560 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
561 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
562 },
563 [ C(OP_WRITE) ] = {
564 [ C(RESULT_ACCESS) ] = -1,
565 [ C(RESULT_MISS) ] = -1,
566 },
567 [ C(OP_PREFETCH) ] = {
568 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
569 [ C(RESULT_MISS) ] = 0,
570 },
571 },
572 [ C(LL ) ] = {
573 [ C(OP_READ) ] = {
574 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
575 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
576 },
577 [ C(OP_WRITE) ] = {
578 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
579 [ C(RESULT_MISS) ] = 0,
580 },
581 [ C(OP_PREFETCH) ] = {
582 [ C(RESULT_ACCESS) ] = 0,
583 [ C(RESULT_MISS) ] = 0,
584 },
585 },
586 [ C(DTLB) ] = {
587 [ C(OP_READ) ] = {
588 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
589 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
590 },
591 [ C(OP_WRITE) ] = {
592 [ C(RESULT_ACCESS) ] = 0,
593 [ C(RESULT_MISS) ] = 0,
594 },
595 [ C(OP_PREFETCH) ] = {
596 [ C(RESULT_ACCESS) ] = 0,
597 [ C(RESULT_MISS) ] = 0,
598 },
599 },
600 [ C(ITLB) ] = {
601 [ C(OP_READ) ] = {
602 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
603 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
604 },
605 [ C(OP_WRITE) ] = {
606 [ C(RESULT_ACCESS) ] = -1,
607 [ C(RESULT_MISS) ] = -1,
608 },
609 [ C(OP_PREFETCH) ] = {
610 [ C(RESULT_ACCESS) ] = -1,
611 [ C(RESULT_MISS) ] = -1,
612 },
613 },
614 [ C(BPU ) ] = {
615 [ C(OP_READ) ] = {
616 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
617 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
618 },
619 [ C(OP_WRITE) ] = {
620 [ C(RESULT_ACCESS) ] = -1,
621 [ C(RESULT_MISS) ] = -1,
622 },
623 [ C(OP_PREFETCH) ] = {
624 [ C(RESULT_ACCESS) ] = -1,
625 [ C(RESULT_MISS) ] = -1,
626 },
627 },
628};
629
630/*
631 * AMD Performance Monitor K7 and later.
632 */
633static const u64 amd_perfmon_event_map[] =
634{
635 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
636 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
637 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
638 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
639 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
640 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
641};
642
643static u64 amd_pmu_event_map(int hw_event)
644{
645 return amd_perfmon_event_map[hw_event];
646}
647
648static u64 amd_pmu_raw_event(u64 hw_event)
649{
650#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
651#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
652#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
653#define K7_EVNTSEL_INV_MASK 0x000800000ULL
654#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
655
656#define K7_EVNTSEL_MASK \
657 (K7_EVNTSEL_EVENT_MASK | \
658 K7_EVNTSEL_UNIT_MASK | \
659 K7_EVNTSEL_EDGE_MASK | \
660 K7_EVNTSEL_INV_MASK | \
661 K7_EVNTSEL_REG_MASK)
662
663 return hw_event & K7_EVNTSEL_MASK;
664}
665
666/* 186/*
667 * Propagate event elapsed time into the generic event. 187 * Propagate event elapsed time into the generic event.
668 * Can only be executed on the CPU where the event is active. 188 * Can only be executed on the CPU where the event is active.
@@ -914,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
914 return 0; 434 return 0;
915} 435}
916 436
917static void intel_pmu_enable_bts(u64 config)
918{
919 unsigned long debugctlmsr;
920
921 debugctlmsr = get_debugctlmsr();
922
923 debugctlmsr |= X86_DEBUGCTL_TR;
924 debugctlmsr |= X86_DEBUGCTL_BTS;
925 debugctlmsr |= X86_DEBUGCTL_BTINT;
926
927 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
928 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
929
930 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
931 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
932
933 update_debugctlmsr(debugctlmsr);
934}
935
936static void intel_pmu_disable_bts(void)
937{
938 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
939 unsigned long debugctlmsr;
940
941 if (!cpuc->ds)
942 return;
943
944 debugctlmsr = get_debugctlmsr();
945
946 debugctlmsr &=
947 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
948 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
949
950 update_debugctlmsr(debugctlmsr);
951}
952
953/* 437/*
954 * Setup the hardware configuration for a given attr_type 438 * Setup the hardware configuration for a given attr_type
955 */ 439 */
@@ -988,6 +472,8 @@ static int __hw_perf_event_init(struct perf_event *event)
988 hwc->config = ARCH_PERFMON_EVENTSEL_INT; 472 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
989 473
990 hwc->idx = -1; 474 hwc->idx = -1;
475 hwc->last_cpu = -1;
476 hwc->last_tag = ~0ULL;
991 477
992 /* 478 /*
993 * Count user and OS events unless requested not to. 479 * Count user and OS events unless requested not to.
@@ -1056,216 +542,323 @@ static int __hw_perf_event_init(struct perf_event *event)
1056 return 0; 542 return 0;
1057} 543}
1058 544
1059static void p6_pmu_disable_all(void) 545static void x86_pmu_disable_all(void)
1060{ 546{
1061 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 547 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1062 u64 val; 548 int idx;
1063
1064 if (!cpuc->enabled)
1065 return;
1066 549
1067 cpuc->enabled = 0; 550 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1068 barrier(); 551 u64 val;
1069 552
1070 /* p6 only has one enable register */ 553 if (!test_bit(idx, cpuc->active_mask))
1071 rdmsrl(MSR_P6_EVNTSEL0, val); 554 continue;
1072 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 555 rdmsrl(x86_pmu.eventsel + idx, val);
1073 wrmsrl(MSR_P6_EVNTSEL0, val); 556 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
557 continue;
558 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
559 wrmsrl(x86_pmu.eventsel + idx, val);
560 }
1074} 561}
1075 562
1076static void intel_pmu_disable_all(void) 563void hw_perf_disable(void)
1077{ 564{
1078 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1079 566
567 if (!x86_pmu_initialized())
568 return;
569
1080 if (!cpuc->enabled) 570 if (!cpuc->enabled)
1081 return; 571 return;
1082 572
573 cpuc->n_added = 0;
1083 cpuc->enabled = 0; 574 cpuc->enabled = 0;
1084 barrier(); 575 barrier();
1085 576
1086 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 577 x86_pmu.disable_all();
1087
1088 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1089 intel_pmu_disable_bts();
1090} 578}
1091 579
1092static void amd_pmu_disable_all(void) 580static void x86_pmu_enable_all(void)
1093{ 581{
1094 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 582 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1095 int idx; 583 int idx;
1096 584
1097 if (!cpuc->enabled)
1098 return;
1099
1100 cpuc->enabled = 0;
1101 /*
1102 * ensure we write the disable before we start disabling the
1103 * events proper, so that amd_pmu_enable_event() does the
1104 * right thing.
1105 */
1106 barrier();
1107
1108 for (idx = 0; idx < x86_pmu.num_events; idx++) { 585 for (idx = 0; idx < x86_pmu.num_events; idx++) {
586 struct perf_event *event = cpuc->events[idx];
1109 u64 val; 587 u64 val;
1110 588
1111 if (!test_bit(idx, cpuc->active_mask)) 589 if (!test_bit(idx, cpuc->active_mask))
1112 continue; 590 continue;
1113 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 591
1114 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) 592 val = event->hw.config;
1115 continue; 593 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1116 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; 594 wrmsrl(x86_pmu.eventsel + idx, val);
1117 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1118 } 595 }
1119} 596}
1120 597
1121void hw_perf_disable(void) 598static const struct pmu pmu;
599
600static inline int is_x86_event(struct perf_event *event)
1122{ 601{
1123 if (!x86_pmu_initialized()) 602 return event->pmu == &pmu;
1124 return;
1125 return x86_pmu.disable_all();
1126} 603}
1127 604
1128static void p6_pmu_enable_all(void) 605static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
1129{ 606{
1130 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 607 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
1131 unsigned long val; 608 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
609 int i, j, w, wmax, num = 0;
610 struct hw_perf_event *hwc;
1132 611
1133 if (cpuc->enabled) 612 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1134 return;
1135 613
1136 cpuc->enabled = 1; 614 for (i = 0; i < n; i++) {
1137 barrier(); 615 constraints[i] =
616 x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
617 }
1138 618
1139 /* p6 only has one enable register */ 619 /*
1140 rdmsrl(MSR_P6_EVNTSEL0, val); 620 * fastpath, try to reuse previous register
1141 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 621 */
1142 wrmsrl(MSR_P6_EVNTSEL0, val); 622 for (i = 0; i < n; i++) {
1143} 623 hwc = &cpuc->event_list[i]->hw;
624 c = constraints[i];
1144 625
1145static void intel_pmu_enable_all(void) 626 /* never assigned */
1146{ 627 if (hwc->idx == -1)
1147 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 628 break;
1148 629
1149 if (cpuc->enabled) 630 /* constraint still honored */
1150 return; 631 if (!test_bit(hwc->idx, c->idxmsk))
632 break;
1151 633
1152 cpuc->enabled = 1; 634 /* not already used */
1153 barrier(); 635 if (test_bit(hwc->idx, used_mask))
636 break;
637
638 set_bit(hwc->idx, used_mask);
639 if (assign)
640 assign[i] = hwc->idx;
641 }
642 if (i == n)
643 goto done;
644
645 /*
646 * begin slow path
647 */
648
649 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
650
651 /*
652 * weight = number of possible counters
653 *
654 * 1 = most constrained, only works on one counter
655 * wmax = least constrained, works on any counter
656 *
657 * assign events to counters starting with most
658 * constrained events.
659 */
660 wmax = x86_pmu.num_events;
661
662 /*
663 * when fixed event counters are present,
664 * wmax is incremented by 1 to account
665 * for one more choice
666 */
667 if (x86_pmu.num_events_fixed)
668 wmax++;
1154 669
1155 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 670 for (w = 1, num = n; num && w <= wmax; w++) {
671 /* for each event */
672 for (i = 0; num && i < n; i++) {
673 c = constraints[i];
674 hwc = &cpuc->event_list[i]->hw;
1156 675
1157 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 676 if (c->weight != w)
1158 struct perf_event *event = 677 continue;
1159 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1160 678
1161 if (WARN_ON_ONCE(!event)) 679 for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
1162 return; 680 if (!test_bit(j, used_mask))
681 break;
682 }
683
684 if (j == X86_PMC_IDX_MAX)
685 break;
686
687 set_bit(j, used_mask);
1163 688
1164 intel_pmu_enable_bts(event->hw.config); 689 if (assign)
690 assign[i] = j;
691 num--;
692 }
693 }
694done:
695 /*
696 * scheduling failed or is just a simulation,
697 * free resources if necessary
698 */
699 if (!assign || num) {
700 for (i = 0; i < n; i++) {
701 if (x86_pmu.put_event_constraints)
702 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
703 }
1165 } 704 }
705 return num ? -ENOSPC : 0;
1166} 706}
1167 707
1168static void amd_pmu_enable_all(void) 708/*
709 * dogrp: true if must collect siblings events (group)
710 * returns total number of events and error code
711 */
712static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
1169{ 713{
1170 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 714 struct perf_event *event;
1171 int idx; 715 int n, max_count;
1172 716
1173 if (cpuc->enabled) 717 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
1174 return;
1175 718
1176 cpuc->enabled = 1; 719 /* current number of events already accepted */
1177 barrier(); 720 n = cpuc->n_events;
1178 721
1179 for (idx = 0; idx < x86_pmu.num_events; idx++) { 722 if (is_x86_event(leader)) {
1180 struct perf_event *event = cpuc->events[idx]; 723 if (n >= max_count)
1181 u64 val; 724 return -ENOSPC;
725 cpuc->event_list[n] = leader;
726 n++;
727 }
728 if (!dogrp)
729 return n;
1182 730
1183 if (!test_bit(idx, cpuc->active_mask)) 731 list_for_each_entry(event, &leader->sibling_list, group_entry) {
732 if (!is_x86_event(event) ||
733 event->state <= PERF_EVENT_STATE_OFF)
1184 continue; 734 continue;
1185 735
1186 val = event->hw.config; 736 if (n >= max_count)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 737 return -ENOSPC;
1188 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1189 }
1190}
1191 738
1192void hw_perf_enable(void) 739 cpuc->event_list[n] = event;
1193{ 740 n++;
1194 if (!x86_pmu_initialized()) 741 }
1195 return; 742 return n;
1196 x86_pmu.enable_all();
1197} 743}
1198 744
1199static inline u64 intel_pmu_get_status(void) 745static inline void x86_assign_hw_event(struct perf_event *event,
746 struct cpu_hw_events *cpuc, int i)
1200{ 747{
1201 u64 status; 748 struct hw_perf_event *hwc = &event->hw;
1202 749
1203 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 750 hwc->idx = cpuc->assign[i];
751 hwc->last_cpu = smp_processor_id();
752 hwc->last_tag = ++cpuc->tags[i];
1204 753
1205 return status; 754 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
755 hwc->config_base = 0;
756 hwc->event_base = 0;
757 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
758 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
759 /*
760 * We set it so that event_base + idx in wrmsr/rdmsr maps to
761 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
762 */
763 hwc->event_base =
764 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
765 } else {
766 hwc->config_base = x86_pmu.eventsel;
767 hwc->event_base = x86_pmu.perfctr;
768 }
1206} 769}
1207 770
1208static inline void intel_pmu_ack_status(u64 ack) 771static inline int match_prev_assignment(struct hw_perf_event *hwc,
772 struct cpu_hw_events *cpuc,
773 int i)
1209{ 774{
1210 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 775 return hwc->idx == cpuc->assign[i] &&
776 hwc->last_cpu == smp_processor_id() &&
777 hwc->last_tag == cpuc->tags[i];
1211} 778}
1212 779
1213static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) 780static void x86_pmu_stop(struct perf_event *event);
1214{
1215 (void)checking_wrmsrl(hwc->config_base + idx,
1216 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1217}
1218 781
1219static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) 782void hw_perf_enable(void)
1220{ 783{
1221 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); 784 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1222} 785 struct perf_event *event;
786 struct hw_perf_event *hwc;
787 int i;
1223 788
1224static inline void 789 if (!x86_pmu_initialized())
1225intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) 790 return;
1226{
1227 int idx = __idx - X86_PMC_IDX_FIXED;
1228 u64 ctrl_val, mask;
1229 791
1230 mask = 0xfULL << (idx * 4); 792 if (cpuc->enabled)
793 return;
1231 794
1232 rdmsrl(hwc->config_base, ctrl_val); 795 if (cpuc->n_added) {
1233 ctrl_val &= ~mask; 796 /*
1234 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 797 * apply assignment obtained either from
1235} 798 * hw_perf_group_sched_in() or x86_pmu_enable()
799 *
800 * step1: save events moving to new counters
801 * step2: reprogram moved events into new counters
802 */
803 for (i = 0; i < cpuc->n_events; i++) {
1236 804
1237static inline void 805 event = cpuc->event_list[i];
1238p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) 806 hwc = &event->hw;
1239{
1240 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1241 u64 val = P6_NOP_EVENT;
1242 807
1243 if (cpuc->enabled) 808 /*
1244 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 809 * we can avoid reprogramming counter if:
810 * - assigned same counter as last time
811 * - running on same CPU as last time
812 * - no other event has used the counter since
813 */
814 if (hwc->idx == -1 ||
815 match_prev_assignment(hwc, cpuc, i))
816 continue;
1245 817
1246 (void)checking_wrmsrl(hwc->config_base + idx, val); 818 x86_pmu_stop(event);
1247}
1248 819
1249static inline void 820 hwc->idx = -1;
1250intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) 821 }
1251{
1252 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1253 intel_pmu_disable_bts();
1254 return;
1255 }
1256 822
1257 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 823 for (i = 0; i < cpuc->n_events; i++) {
1258 intel_pmu_disable_fixed(hwc, idx); 824
1259 return; 825 event = cpuc->event_list[i];
826 hwc = &event->hw;
827
828 if (hwc->idx == -1) {
829 x86_assign_hw_event(event, cpuc, i);
830 x86_perf_event_set_period(event, hwc, hwc->idx);
831 }
832 /*
833 * need to mark as active because x86_pmu_disable()
834 * clear active_mask and events[] yet it preserves
835 * idx
836 */
837 set_bit(hwc->idx, cpuc->active_mask);
838 cpuc->events[hwc->idx] = event;
839
840 x86_pmu.enable(hwc, hwc->idx);
841 perf_event_update_userpage(event);
842 }
843 cpuc->n_added = 0;
844 perf_events_lapic_init();
1260 } 845 }
1261 846
1262 x86_pmu_disable_event(hwc, idx); 847 cpuc->enabled = 1;
848 barrier();
849
850 x86_pmu.enable_all();
851}
852
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
854{
855 (void)checking_wrmsrl(hwc->config_base + idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1263} 857}
1264 858
1265static inline void 859static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1266amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1267{ 860{
1268 x86_pmu_disable_event(hwc, idx); 861 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1269} 862}
1270 863
1271static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 864static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1326,220 +919,60 @@ x86_perf_event_set_period(struct perf_event *event,
1326 return ret; 919 return ret;
1327} 920}
1328 921
1329static inline void 922static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1331{
1332 int idx = __idx - X86_PMC_IDX_FIXED;
1333 u64 ctrl_val, bits, mask;
1334 int err;
1335
1336 /*
1337 * Enable IRQ generation (0x8),
1338 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1339 * if requested:
1340 */
1341 bits = 0x8ULL;
1342 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1343 bits |= 0x2;
1344 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1345 bits |= 0x1;
1346
1347 /*
1348 * ANY bit is supported in v3 and up
1349 */
1350 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
1351 bits |= 0x4;
1352
1353 bits <<= (idx * 4);
1354 mask = 0xfULL << (idx * 4);
1355
1356 rdmsrl(hwc->config_base, ctrl_val);
1357 ctrl_val &= ~mask;
1358 ctrl_val |= bits;
1359 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1360}
1361
1362static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1363{ 923{
1364 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 924 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1365 u64 val;
1366
1367 val = hwc->config;
1368 if (cpuc->enabled) 925 if (cpuc->enabled)
1369 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 926 __x86_pmu_enable_event(hwc, idx);
1370
1371 (void)checking_wrmsrl(hwc->config_base + idx, val);
1372} 927}
1373 928
1374 929/*
1375static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) 930 * activate a single event
1376{ 931 *
1377 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { 932 * The event is added to the group of enabled events
1378 if (!__get_cpu_var(cpu_hw_events).enabled) 933 * but only if it can be scehduled with existing events.
1379 return; 934 *
1380 935 * Called with PMU disabled. If successful and return value 1,
1381 intel_pmu_enable_bts(hwc->config); 936 * then guaranteed to call perf_enable() and hw_perf_enable()
1382 return; 937 */
1383 } 938static int x86_pmu_enable(struct perf_event *event)
1384
1385 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1386 intel_pmu_enable_fixed(hwc, idx);
1387 return;
1388 }
1389
1390 x86_pmu_enable_event(hwc, idx);
1391}
1392
1393static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1394{ 939{
1395 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 940 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
941 struct hw_perf_event *hwc;
942 int assign[X86_PMC_IDX_MAX];
943 int n, n0, ret;
1396 944
1397 if (cpuc->enabled) 945 hwc = &event->hw;
1398 x86_pmu_enable_event(hwc, idx);
1399}
1400
1401static int fixed_mode_idx(struct hw_perf_event *hwc)
1402{
1403 unsigned int hw_event;
1404
1405 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1406
1407 if (unlikely((hw_event ==
1408 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1409 (hwc->sample_period == 1)))
1410 return X86_PMC_IDX_FIXED_BTS;
1411 946
1412 if (!x86_pmu.num_events_fixed) 947 n0 = cpuc->n_events;
1413 return -1; 948 n = collect_events(cpuc, event, false);
949 if (n < 0)
950 return n;
1414 951
952 ret = x86_schedule_events(cpuc, n, assign);
953 if (ret)
954 return ret;
1415 /* 955 /*
1416 * fixed counters do not take all possible filters 956 * copy new assignment, now we know it is possible
957 * will be used by hw_perf_enable()
1417 */ 958 */
1418 if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) 959 memcpy(cpuc->assign, assign, n*sizeof(int));
1419 return -1;
1420 960
1421 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) 961 cpuc->n_events = n;
1422 return X86_PMC_IDX_FIXED_INSTRUCTIONS; 962 cpuc->n_added = n - n0;
1423 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1424 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1425 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1426 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1427 963
1428 return -1; 964 return 0;
1429}
1430
1431/*
1432 * generic counter allocator: get next free counter
1433 */
1434static int
1435gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1436{
1437 int idx;
1438
1439 idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
1440 return idx == x86_pmu.num_events ? -1 : idx;
1441}
1442
1443/*
1444 * intel-specific counter allocator: check event constraints
1445 */
1446static int
1447intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1448{
1449 const struct event_constraint *event_constraint;
1450 int i, code;
1451
1452 if (!event_constraints)
1453 goto skip;
1454
1455 code = hwc->config & CORE_EVNTSEL_EVENT_MASK;
1456
1457 for_each_event_constraint(event_constraint, event_constraints) {
1458 if (code == event_constraint->code) {
1459 for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
1460 if (!test_and_set_bit(i, cpuc->used_mask))
1461 return i;
1462 }
1463 return -1;
1464 }
1465 }
1466skip:
1467 return gen_get_event_idx(cpuc, hwc);
1468}
1469
1470static int
1471x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
1472{
1473 int idx;
1474
1475 idx = fixed_mode_idx(hwc);
1476 if (idx == X86_PMC_IDX_FIXED_BTS) {
1477 /* BTS is already occupied. */
1478 if (test_and_set_bit(idx, cpuc->used_mask))
1479 return -EAGAIN;
1480
1481 hwc->config_base = 0;
1482 hwc->event_base = 0;
1483 hwc->idx = idx;
1484 } else if (idx >= 0) {
1485 /*
1486 * Try to get the fixed event, if that is already taken
1487 * then try to get a generic event:
1488 */
1489 if (test_and_set_bit(idx, cpuc->used_mask))
1490 goto try_generic;
1491
1492 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1493 /*
1494 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1495 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1496 */
1497 hwc->event_base =
1498 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1499 hwc->idx = idx;
1500 } else {
1501 idx = hwc->idx;
1502 /* Try to get the previous generic event again */
1503 if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
1504try_generic:
1505 idx = x86_pmu.get_event_idx(cpuc, hwc);
1506 if (idx == -1)
1507 return -EAGAIN;
1508
1509 set_bit(idx, cpuc->used_mask);
1510 hwc->idx = idx;
1511 }
1512 hwc->config_base = x86_pmu.eventsel;
1513 hwc->event_base = x86_pmu.perfctr;
1514 }
1515
1516 return idx;
1517} 965}
1518 966
1519/* 967static int x86_pmu_start(struct perf_event *event)
1520 * Find a PMC slot for the freshly enabled / scheduled in event:
1521 */
1522static int x86_pmu_enable(struct perf_event *event)
1523{ 968{
1524 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1525 struct hw_perf_event *hwc = &event->hw; 969 struct hw_perf_event *hwc = &event->hw;
1526 int idx;
1527
1528 idx = x86_schedule_event(cpuc, hwc);
1529 if (idx < 0)
1530 return idx;
1531 970
1532 perf_events_lapic_init(); 971 if (hwc->idx == -1)
1533 972 return -EAGAIN;
1534 x86_pmu.disable(hwc, idx);
1535 973
1536 cpuc->events[idx] = event; 974 x86_perf_event_set_period(event, hwc, hwc->idx);
1537 set_bit(idx, cpuc->active_mask); 975 x86_pmu.enable(hwc, hwc->idx);
1538
1539 x86_perf_event_set_period(event, hwc, idx);
1540 x86_pmu.enable(hwc, idx);
1541
1542 perf_event_update_userpage(event);
1543 976
1544 return 0; 977 return 0;
1545} 978}
@@ -1583,7 +1016,7 @@ void perf_event_print_debug(void)
1583 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1016 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1584 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1017 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1585 } 1018 }
1586 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); 1019 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1587 1020
1588 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1021 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1589 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1022 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@ -1607,67 +1040,7 @@ void perf_event_print_debug(void)
1607 local_irq_restore(flags); 1040 local_irq_restore(flags);
1608} 1041}
1609 1042
1610static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) 1043static void x86_pmu_stop(struct perf_event *event)
1611{
1612 struct debug_store *ds = cpuc->ds;
1613 struct bts_record {
1614 u64 from;
1615 u64 to;
1616 u64 flags;
1617 };
1618 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1619 struct bts_record *at, *top;
1620 struct perf_output_handle handle;
1621 struct perf_event_header header;
1622 struct perf_sample_data data;
1623 struct pt_regs regs;
1624
1625 if (!event)
1626 return;
1627
1628 if (!ds)
1629 return;
1630
1631 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1632 top = (struct bts_record *)(unsigned long)ds->bts_index;
1633
1634 if (top <= at)
1635 return;
1636
1637 ds->bts_index = ds->bts_buffer_base;
1638
1639
1640 data.period = event->hw.last_period;
1641 data.addr = 0;
1642 data.raw = NULL;
1643 regs.ip = 0;
1644
1645 /*
1646 * Prepare a generic sample, i.e. fill in the invariant fields.
1647 * We will overwrite the from and to address before we output
1648 * the sample.
1649 */
1650 perf_prepare_sample(&header, &data, event, &regs);
1651
1652 if (perf_output_begin(&handle, event,
1653 header.size * (top - at), 1, 1))
1654 return;
1655
1656 for (; at < top; at++) {
1657 data.ip = at->from;
1658 data.addr = at->to;
1659
1660 perf_output_sample(&handle, &header, &data, event);
1661 }
1662
1663 perf_output_end(&handle);
1664
1665 /* There's new data available. */
1666 event->hw.interrupts++;
1667 event->pending_kill = POLL_IN;
1668}
1669
1670static void x86_pmu_disable(struct perf_event *event)
1671{ 1044{
1672 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1045 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1673 struct hw_perf_event *hwc = &event->hw; 1046 struct hw_perf_event *hwc = &event->hw;
@@ -1681,183 +1054,38 @@ static void x86_pmu_disable(struct perf_event *event)
1681 x86_pmu.disable(hwc, idx); 1054 x86_pmu.disable(hwc, idx);
1682 1055
1683 /* 1056 /*
1684 * Make sure the cleared pointer becomes visible before we
1685 * (potentially) free the event:
1686 */
1687 barrier();
1688
1689 /*
1690 * Drain the remaining delta count out of a event 1057 * Drain the remaining delta count out of a event
1691 * that we are disabling: 1058 * that we are disabling:
1692 */ 1059 */
1693 x86_perf_event_update(event, hwc, idx); 1060 x86_perf_event_update(event, hwc, idx);
1694 1061
1695 /* Drain the remaining BTS records. */
1696 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1697 intel_pmu_drain_bts_buffer(cpuc);
1698
1699 cpuc->events[idx] = NULL; 1062 cpuc->events[idx] = NULL;
1700 clear_bit(idx, cpuc->used_mask);
1701
1702 perf_event_update_userpage(event);
1703}
1704
1705/*
1706 * Save and restart an expired event. Called by NMI contexts,
1707 * so it has to be careful about preempting normal event ops:
1708 */
1709static int intel_pmu_save_and_restart(struct perf_event *event)
1710{
1711 struct hw_perf_event *hwc = &event->hw;
1712 int idx = hwc->idx;
1713 int ret;
1714
1715 x86_perf_event_update(event, hwc, idx);
1716 ret = x86_perf_event_set_period(event, hwc, idx);
1717
1718 if (event->state == PERF_EVENT_STATE_ACTIVE)
1719 intel_pmu_enable_event(hwc, idx);
1720
1721 return ret;
1722} 1063}
1723 1064
1724static void intel_pmu_reset(void) 1065static void x86_pmu_disable(struct perf_event *event)
1725{
1726 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1727 unsigned long flags;
1728 int idx;
1729
1730 if (!x86_pmu.num_events)
1731 return;
1732
1733 local_irq_save(flags);
1734
1735 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1736
1737 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1738 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1739 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1740 }
1741 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1742 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1743 }
1744 if (ds)
1745 ds->bts_index = ds->bts_buffer_base;
1746
1747 local_irq_restore(flags);
1748}
1749
1750static int p6_pmu_handle_irq(struct pt_regs *regs)
1751{
1752 struct perf_sample_data data;
1753 struct cpu_hw_events *cpuc;
1754 struct perf_event *event;
1755 struct hw_perf_event *hwc;
1756 int idx, handled = 0;
1757 u64 val;
1758
1759 data.addr = 0;
1760 data.raw = NULL;
1761
1762 cpuc = &__get_cpu_var(cpu_hw_events);
1763
1764 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1765 if (!test_bit(idx, cpuc->active_mask))
1766 continue;
1767
1768 event = cpuc->events[idx];
1769 hwc = &event->hw;
1770
1771 val = x86_perf_event_update(event, hwc, idx);
1772 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1773 continue;
1774
1775 /*
1776 * event overflow
1777 */
1778 handled = 1;
1779 data.period = event->hw.last_period;
1780
1781 if (!x86_perf_event_set_period(event, hwc, idx))
1782 continue;
1783
1784 if (perf_event_overflow(event, 1, &data, regs))
1785 p6_pmu_disable_event(hwc, idx);
1786 }
1787
1788 if (handled)
1789 inc_irq_stat(apic_perf_irqs);
1790
1791 return handled;
1792}
1793
1794/*
1795 * This handler is triggered by the local APIC, so the APIC IRQ handling
1796 * rules apply:
1797 */
1798static int intel_pmu_handle_irq(struct pt_regs *regs)
1799{ 1066{
1800 struct perf_sample_data data; 1067 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1801 struct cpu_hw_events *cpuc; 1068 int i;
1802 int bit, loops;
1803 u64 ack, status;
1804
1805 data.addr = 0;
1806 data.raw = NULL;
1807
1808 cpuc = &__get_cpu_var(cpu_hw_events);
1809
1810 perf_disable();
1811 intel_pmu_drain_bts_buffer(cpuc);
1812 status = intel_pmu_get_status();
1813 if (!status) {
1814 perf_enable();
1815 return 0;
1816 }
1817
1818 loops = 0;
1819again:
1820 if (++loops > 100) {
1821 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1822 perf_event_print_debug();
1823 intel_pmu_reset();
1824 perf_enable();
1825 return 1;
1826 }
1827 1069
1828 inc_irq_stat(apic_perf_irqs); 1070 x86_pmu_stop(event);
1829 ack = status;
1830 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1831 struct perf_event *event = cpuc->events[bit];
1832 1071
1833 clear_bit(bit, (unsigned long *) &status); 1072 for (i = 0; i < cpuc->n_events; i++) {
1834 if (!test_bit(bit, cpuc->active_mask)) 1073 if (event == cpuc->event_list[i]) {
1835 continue;
1836 1074
1837 if (!intel_pmu_save_and_restart(event)) 1075 if (x86_pmu.put_event_constraints)
1838 continue; 1076 x86_pmu.put_event_constraints(cpuc, event);
1839 1077
1840 data.period = event->hw.last_period; 1078 while (++i < cpuc->n_events)
1079 cpuc->event_list[i-1] = cpuc->event_list[i];
1841 1080
1842 if (perf_event_overflow(event, 1, &data, regs)) 1081 --cpuc->n_events;
1843 intel_pmu_disable_event(&event->hw, bit); 1082 break;
1083 }
1844 } 1084 }
1845 1085 perf_event_update_userpage(event);
1846 intel_pmu_ack_status(ack);
1847
1848 /*
1849 * Repeat if there is more work to be done:
1850 */
1851 status = intel_pmu_get_status();
1852 if (status)
1853 goto again;
1854
1855 perf_enable();
1856
1857 return 1;
1858} 1086}
1859 1087
1860static int amd_pmu_handle_irq(struct pt_regs *regs) 1088static int x86_pmu_handle_irq(struct pt_regs *regs)
1861{ 1089{
1862 struct perf_sample_data data; 1090 struct perf_sample_data data;
1863 struct cpu_hw_events *cpuc; 1091 struct cpu_hw_events *cpuc;
@@ -1892,7 +1120,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1892 continue; 1120 continue;
1893 1121
1894 if (perf_event_overflow(event, 1, &data, regs)) 1122 if (perf_event_overflow(event, 1, &data, regs))
1895 amd_pmu_disable_event(hwc, idx); 1123 x86_pmu.disable(hwc, idx);
1896 } 1124 }
1897 1125
1898 if (handled) 1126 if (handled)
@@ -1975,194 +1203,137 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1975 .priority = 1 1203 .priority = 1
1976}; 1204};
1977 1205
1978static __initconst struct x86_pmu p6_pmu = { 1206static struct event_constraint unconstrained;
1979 .name = "p6", 1207static struct event_constraint emptyconstraint;
1980 .handle_irq = p6_pmu_handle_irq,
1981 .disable_all = p6_pmu_disable_all,
1982 .enable_all = p6_pmu_enable_all,
1983 .enable = p6_pmu_enable_event,
1984 .disable = p6_pmu_disable_event,
1985 .eventsel = MSR_P6_EVNTSEL0,
1986 .perfctr = MSR_P6_PERFCTR0,
1987 .event_map = p6_pmu_event_map,
1988 .raw_event = p6_pmu_raw_event,
1989 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1990 .apic = 1,
1991 .max_period = (1ULL << 31) - 1,
1992 .version = 0,
1993 .num_events = 2,
1994 /*
1995 * Events have 40 bits implemented. However they are designed such
1996 * that bits [32-39] are sign extensions of bit 31. As such the
1997 * effective width of a event for P6-like PMU is 32 bits only.
1998 *
1999 * See IA-32 Intel Architecture Software developer manual Vol 3B
2000 */
2001 .event_bits = 32,
2002 .event_mask = (1ULL << 32) - 1,
2003 .get_event_idx = intel_get_event_idx,
2004};
2005 1208
2006static __initconst struct x86_pmu intel_pmu = { 1209static struct event_constraint *
2007 .name = "Intel", 1210x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2008 .handle_irq = intel_pmu_handle_irq, 1211{
2009 .disable_all = intel_pmu_disable_all, 1212 struct event_constraint *c;
2010 .enable_all = intel_pmu_enable_all,
2011 .enable = intel_pmu_enable_event,
2012 .disable = intel_pmu_disable_event,
2013 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
2014 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
2015 .event_map = intel_pmu_event_map,
2016 .raw_event = intel_pmu_raw_event,
2017 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
2018 .apic = 1,
2019 /*
2020 * Intel PMCs cannot be accessed sanely above 32 bit width,
2021 * so we install an artificial 1<<31 period regardless of
2022 * the generic event period:
2023 */
2024 .max_period = (1ULL << 31) - 1,
2025 .enable_bts = intel_pmu_enable_bts,
2026 .disable_bts = intel_pmu_disable_bts,
2027 .get_event_idx = intel_get_event_idx,
2028};
2029 1213
2030static __initconst struct x86_pmu amd_pmu = { 1214 if (x86_pmu.event_constraints) {
2031 .name = "AMD", 1215 for_each_event_constraint(c, x86_pmu.event_constraints) {
2032 .handle_irq = amd_pmu_handle_irq, 1216 if ((event->hw.config & c->cmask) == c->code)
2033 .disable_all = amd_pmu_disable_all, 1217 return c;
2034 .enable_all = amd_pmu_enable_all, 1218 }
2035 .enable = amd_pmu_enable_event, 1219 }
2036 .disable = amd_pmu_disable_event, 1220
2037 .eventsel = MSR_K7_EVNTSEL0, 1221 return &unconstrained;
2038 .perfctr = MSR_K7_PERFCTR0, 1222}
2039 .event_map = amd_pmu_event_map,
2040 .raw_event = amd_pmu_raw_event,
2041 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
2042 .num_events = 4,
2043 .event_bits = 48,
2044 .event_mask = (1ULL << 48) - 1,
2045 .apic = 1,
2046 /* use highest bit to detect overflow */
2047 .max_period = (1ULL << 47) - 1,
2048 .get_event_idx = gen_get_event_idx,
2049};
2050 1223
2051static __init int p6_pmu_init(void) 1224static int x86_event_sched_in(struct perf_event *event,
1225 struct perf_cpu_context *cpuctx)
2052{ 1226{
2053 switch (boot_cpu_data.x86_model) { 1227 int ret = 0;
2054 case 1:
2055 case 3: /* Pentium Pro */
2056 case 5:
2057 case 6: /* Pentium II */
2058 case 7:
2059 case 8:
2060 case 11: /* Pentium III */
2061 event_constraints = intel_p6_event_constraints;
2062 break;
2063 case 9:
2064 case 13:
2065 /* Pentium M */
2066 event_constraints = intel_p6_event_constraints;
2067 break;
2068 default:
2069 pr_cont("unsupported p6 CPU model %d ",
2070 boot_cpu_data.x86_model);
2071 return -ENODEV;
2072 }
2073 1228
2074 x86_pmu = p6_pmu; 1229 event->state = PERF_EVENT_STATE_ACTIVE;
1230 event->oncpu = smp_processor_id();
1231 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
2075 1232
2076 return 0; 1233 if (!is_x86_event(event))
1234 ret = event->pmu->enable(event);
1235
1236 if (!ret && !is_software_event(event))
1237 cpuctx->active_oncpu++;
1238
1239 if (!ret && event->attr.exclusive)
1240 cpuctx->exclusive = 1;
1241
1242 return ret;
2077} 1243}
2078 1244
2079static __init int intel_pmu_init(void) 1245static void x86_event_sched_out(struct perf_event *event,
1246 struct perf_cpu_context *cpuctx)
2080{ 1247{
2081 union cpuid10_edx edx; 1248 event->state = PERF_EVENT_STATE_INACTIVE;
2082 union cpuid10_eax eax; 1249 event->oncpu = -1;
2083 unsigned int unused;
2084 unsigned int ebx;
2085 int version;
2086
2087 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2088 /* check for P6 processor family */
2089 if (boot_cpu_data.x86 == 6) {
2090 return p6_pmu_init();
2091 } else {
2092 return -ENODEV;
2093 }
2094 }
2095 1250
2096 /* 1251 if (!is_x86_event(event))
2097 * Check whether the Architectural PerfMon supports 1252 event->pmu->disable(event);
2098 * Branch Misses Retired hw_event or not.
2099 */
2100 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
2101 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
2102 return -ENODEV;
2103 1253
2104 version = eax.split.version_id; 1254 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
2105 if (version < 2)
2106 return -ENODEV;
2107 1255
2108 x86_pmu = intel_pmu; 1256 if (!is_software_event(event))
2109 x86_pmu.version = version; 1257 cpuctx->active_oncpu--;
2110 x86_pmu.num_events = eax.split.num_events;
2111 x86_pmu.event_bits = eax.split.bit_width;
2112 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
2113 1258
2114 /* 1259 if (event->attr.exclusive || !cpuctx->active_oncpu)
2115 * Quirk: v2 perfmon does not report fixed-purpose events, so 1260 cpuctx->exclusive = 0;
2116 * assume at least 3 events: 1261}
2117 */
2118 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
2119 1262
1263/*
1264 * Called to enable a whole group of events.
1265 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1266 * Assumes the caller has disabled interrupts and has
1267 * frozen the PMU with hw_perf_save_disable.
1268 *
1269 * called with PMU disabled. If successful and return value 1,
1270 * then guaranteed to call perf_enable() and hw_perf_enable()
1271 */
1272int hw_perf_group_sched_in(struct perf_event *leader,
1273 struct perf_cpu_context *cpuctx,
1274 struct perf_event_context *ctx)
1275{
1276 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1277 struct perf_event *sub;
1278 int assign[X86_PMC_IDX_MAX];
1279 int n0, n1, ret;
1280
1281 /* n0 = total number of events */
1282 n0 = collect_events(cpuc, leader, true);
1283 if (n0 < 0)
1284 return n0;
1285
1286 ret = x86_schedule_events(cpuc, n0, assign);
1287 if (ret)
1288 return ret;
1289
1290 ret = x86_event_sched_in(leader, cpuctx);
1291 if (ret)
1292 return ret;
1293
1294 n1 = 1;
1295 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1296 if (sub->state > PERF_EVENT_STATE_OFF) {
1297 ret = x86_event_sched_in(sub, cpuctx);
1298 if (ret)
1299 goto undo;
1300 ++n1;
1301 }
1302 }
2120 /* 1303 /*
2121 * Install the hw-cache-events table: 1304 * copy new assignment, now we know it is possible
1305 * will be used by hw_perf_enable()
2122 */ 1306 */
2123 switch (boot_cpu_data.x86_model) { 1307 memcpy(cpuc->assign, assign, n0*sizeof(int));
2124 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2125 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2126 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2127 case 29: /* six-core 45 nm xeon "Dunnington" */
2128 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2129 sizeof(hw_cache_event_ids));
2130
2131 pr_cont("Core2 events, ");
2132 event_constraints = intel_core_event_constraints;
2133 break;
2134 default:
2135 case 26:
2136 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2137 sizeof(hw_cache_event_ids));
2138 1308
2139 event_constraints = intel_nehalem_event_constraints; 1309 cpuc->n_events = n0;
2140 pr_cont("Nehalem/Corei7 events, "); 1310 cpuc->n_added = n1;
2141 break; 1311 ctx->nr_active += n1;
2142 case 28:
2143 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2144 sizeof(hw_cache_event_ids));
2145 1312
2146 pr_cont("Atom events, "); 1313 /*
2147 break; 1314 * 1 means successful and events are active
1315 * This is not quite true because we defer
1316 * actual activation until hw_perf_enable() but
1317 * this way we* ensure caller won't try to enable
1318 * individual events
1319 */
1320 return 1;
1321undo:
1322 x86_event_sched_out(leader, cpuctx);
1323 n0 = 1;
1324 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1325 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1326 x86_event_sched_out(sub, cpuctx);
1327 if (++n0 == n1)
1328 break;
1329 }
2148 } 1330 }
2149 return 0; 1331 return ret;
2150} 1332}
2151 1333
2152static __init int amd_pmu_init(void) 1334#include "perf_event_amd.c"
2153{ 1335#include "perf_event_p6.c"
2154 /* Performance-monitoring supported from K7 and later: */ 1336#include "perf_event_intel.c"
2155 if (boot_cpu_data.x86 < 6)
2156 return -ENODEV;
2157
2158 x86_pmu = amd_pmu;
2159
2160 /* Events are common for all AMDs */
2161 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2162 sizeof(hw_cache_event_ids));
2163
2164 return 0;
2165}
2166 1337
2167static void __init pmu_check_apic(void) 1338static void __init pmu_check_apic(void)
2168{ 1339{
@@ -2220,6 +1391,10 @@ void __init init_hw_perf_events(void)
2220 perf_events_lapic_init(); 1391 perf_events_lapic_init();
2221 register_die_notifier(&perf_event_nmi_notifier); 1392 register_die_notifier(&perf_event_nmi_notifier);
2222 1393
1394 unconstrained = (struct event_constraint)
1395 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1396 0, x86_pmu.num_events);
1397
2223 pr_info("... version: %d\n", x86_pmu.version); 1398 pr_info("... version: %d\n", x86_pmu.version);
2224 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1399 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2225 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1400 pr_info("... generic registers: %d\n", x86_pmu.num_events);
@@ -2237,50 +1412,79 @@ static inline void x86_pmu_read(struct perf_event *event)
2237static const struct pmu pmu = { 1412static const struct pmu pmu = {
2238 .enable = x86_pmu_enable, 1413 .enable = x86_pmu_enable,
2239 .disable = x86_pmu_disable, 1414 .disable = x86_pmu_disable,
1415 .start = x86_pmu_start,
1416 .stop = x86_pmu_stop,
2240 .read = x86_pmu_read, 1417 .read = x86_pmu_read,
2241 .unthrottle = x86_pmu_unthrottle, 1418 .unthrottle = x86_pmu_unthrottle,
2242}; 1419};
2243 1420
2244static int 1421/*
2245validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) 1422 * validate a single event group
2246{ 1423 *
2247 struct hw_perf_event fake_event = event->hw; 1424 * validation include:
2248 1425 * - check events are compatible which each other
2249 if (event->pmu && event->pmu != &pmu) 1426 * - events do not compete for the same counter
2250 return 0; 1427 * - number of events <= number of counters
2251 1428 *
2252 return x86_schedule_event(cpuc, &fake_event) >= 0; 1429 * validation ensures the group can be loaded onto the
2253} 1430 * PMU if it was the only group available.
2254 1431 */
2255static int validate_group(struct perf_event *event) 1432static int validate_group(struct perf_event *event)
2256{ 1433{
2257 struct perf_event *sibling, *leader = event->group_leader; 1434 struct perf_event *leader = event->group_leader;
2258 struct cpu_hw_events fake_pmu; 1435 struct cpu_hw_events *fake_cpuc;
1436 int ret, n;
2259 1437
2260 memset(&fake_pmu, 0, sizeof(fake_pmu)); 1438 ret = -ENOMEM;
1439 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1440 if (!fake_cpuc)
1441 goto out;
1442
1443 /*
1444 * the event is not yet connected with its
1445 * siblings therefore we must first collect
1446 * existing siblings, then add the new event
1447 * before we can simulate the scheduling
1448 */
1449 ret = -ENOSPC;
1450 n = collect_events(fake_cpuc, leader, true);
1451 if (n < 0)
1452 goto out_free;
2261 1453
2262 if (!validate_event(&fake_pmu, leader)) 1454 fake_cpuc->n_events = n;
2263 return -ENOSPC; 1455 n = collect_events(fake_cpuc, event, false);
1456 if (n < 0)
1457 goto out_free;
2264 1458
2265 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 1459 fake_cpuc->n_events = n;
2266 if (!validate_event(&fake_pmu, sibling))
2267 return -ENOSPC;
2268 }
2269 1460
2270 if (!validate_event(&fake_pmu, event)) 1461 ret = x86_schedule_events(fake_cpuc, n, NULL);
2271 return -ENOSPC;
2272 1462
2273 return 0; 1463out_free:
1464 kfree(fake_cpuc);
1465out:
1466 return ret;
2274} 1467}
2275 1468
2276const struct pmu *hw_perf_event_init(struct perf_event *event) 1469const struct pmu *hw_perf_event_init(struct perf_event *event)
2277{ 1470{
1471 const struct pmu *tmp;
2278 int err; 1472 int err;
2279 1473
2280 err = __hw_perf_event_init(event); 1474 err = __hw_perf_event_init(event);
2281 if (!err) { 1475 if (!err) {
1476 /*
1477 * we temporarily connect event to its pmu
1478 * such that validate_group() can classify
1479 * it as an x86 event using is_x86_event()
1480 */
1481 tmp = event->pmu;
1482 event->pmu = &pmu;
1483
2282 if (event->group_leader != event) 1484 if (event->group_leader != event)
2283 err = validate_group(event); 1485 err = validate_group(event);
1486
1487 event->pmu = tmp;
2284 } 1488 }
2285 if (err) { 1489 if (err) {
2286 if (event->destroy) 1490 if (event->destroy)
@@ -2304,7 +1508,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2304 1508
2305static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1509static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2306static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); 1510static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2307static DEFINE_PER_CPU(int, in_ignored_frame);
2308 1511
2309 1512
2310static void 1513static void
@@ -2320,10 +1523,6 @@ static void backtrace_warning(void *data, char *msg)
2320 1523
2321static int backtrace_stack(void *data, char *name) 1524static int backtrace_stack(void *data, char *name)
2322{ 1525{
2323 per_cpu(in_ignored_frame, smp_processor_id()) =
2324 x86_is_stack_id(NMI_STACK, name) ||
2325 x86_is_stack_id(DEBUG_STACK, name);
2326
2327 return 0; 1526 return 0;
2328} 1527}
2329 1528
@@ -2331,9 +1530,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
2331{ 1530{
2332 struct perf_callchain_entry *entry = data; 1531 struct perf_callchain_entry *entry = data;
2333 1532
2334 if (per_cpu(in_ignored_frame, smp_processor_id()))
2335 return;
2336
2337 if (reliable) 1533 if (reliable)
2338 callchain_store(entry, addr); 1534 callchain_store(entry, addr);
2339} 1535}
@@ -2440,9 +1636,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2440 1636
2441 is_user = user_mode(regs); 1637 is_user = user_mode(regs);
2442 1638
2443 if (!current || current->pid == 0)
2444 return;
2445
2446 if (is_user && current->state != TASK_RUNNING) 1639 if (is_user && current->state != TASK_RUNNING)
2447 return; 1640 return;
2448 1641
@@ -2472,4 +1665,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2472void hw_perf_event_setup_online(int cpu) 1665void hw_perf_event_setup_online(int cpu)
2473{ 1666{
2474 init_debug_store_on_cpu(cpu); 1667 init_debug_store_on_cpu(cpu);
1668
1669 switch (boot_cpu_data.x86_vendor) {
1670 case X86_VENDOR_AMD:
1671 amd_pmu_cpu_online(cpu);
1672 break;
1673 default:
1674 return;
1675 }
1676}
1677
1678void hw_perf_event_setup_offline(int cpu)
1679{
1680 init_debug_store_on_cpu(cpu);
1681
1682 switch (boot_cpu_data.x86_vendor) {
1683 case X86_VENDOR_AMD:
1684 amd_pmu_cpu_offline(cpu);
1685 break;
1686 default:
1687 return;
1688 }
2475} 1689}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
new file mode 100644
index 000000000000..8f3dbfda3c4f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -0,0 +1,416 @@
1#ifdef CONFIG_CPU_SUP_AMD
2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4
5static __initconst u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
9{
10 [ C(L1D) ] = {
11 [ C(OP_READ) ] = {
12 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
14 },
15 [ C(OP_WRITE) ] = {
16 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS) ] = 0,
18 },
19 [ C(OP_PREFETCH) ] = {
20 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
22 },
23 },
24 [ C(L1I ) ] = {
25 [ C(OP_READ) ] = {
26 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
28 },
29 [ C(OP_WRITE) ] = {
30 [ C(RESULT_ACCESS) ] = -1,
31 [ C(RESULT_MISS) ] = -1,
32 },
33 [ C(OP_PREFETCH) ] = {
34 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS) ] = 0,
36 },
37 },
38 [ C(LL ) ] = {
39 [ C(OP_READ) ] = {
40 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
42 },
43 [ C(OP_WRITE) ] = {
44 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS) ] = 0,
46 },
47 [ C(OP_PREFETCH) ] = {
48 [ C(RESULT_ACCESS) ] = 0,
49 [ C(RESULT_MISS) ] = 0,
50 },
51 },
52 [ C(DTLB) ] = {
53 [ C(OP_READ) ] = {
54 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
56 },
57 [ C(OP_WRITE) ] = {
58 [ C(RESULT_ACCESS) ] = 0,
59 [ C(RESULT_MISS) ] = 0,
60 },
61 [ C(OP_PREFETCH) ] = {
62 [ C(RESULT_ACCESS) ] = 0,
63 [ C(RESULT_MISS) ] = 0,
64 },
65 },
66 [ C(ITLB) ] = {
67 [ C(OP_READ) ] = {
68 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
70 },
71 [ C(OP_WRITE) ] = {
72 [ C(RESULT_ACCESS) ] = -1,
73 [ C(RESULT_MISS) ] = -1,
74 },
75 [ C(OP_PREFETCH) ] = {
76 [ C(RESULT_ACCESS) ] = -1,
77 [ C(RESULT_MISS) ] = -1,
78 },
79 },
80 [ C(BPU ) ] = {
81 [ C(OP_READ) ] = {
82 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
84 },
85 [ C(OP_WRITE) ] = {
86 [ C(RESULT_ACCESS) ] = -1,
87 [ C(RESULT_MISS) ] = -1,
88 },
89 [ C(OP_PREFETCH) ] = {
90 [ C(RESULT_ACCESS) ] = -1,
91 [ C(RESULT_MISS) ] = -1,
92 },
93 },
94};
95
96/*
97 * AMD Performance Monitor K7 and later.
98 */
99static const u64 amd_perfmon_event_map[] =
100{
101 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
106 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
107};
108
109static u64 amd_pmu_event_map(int hw_event)
110{
111 return amd_perfmon_event_map[hw_event];
112}
113
114static u64 amd_pmu_raw_event(u64 hw_event)
115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
121
122#define K7_EVNTSEL_MASK \
123 (K7_EVNTSEL_EVENT_MASK | \
124 K7_EVNTSEL_UNIT_MASK | \
125 K7_EVNTSEL_EDGE_MASK | \
126 K7_EVNTSEL_INV_MASK | \
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130}
131
132/*
133 * AMD64 events are detected based on their event codes.
134 */
135static inline int amd_is_nb_event(struct hw_perf_event *hwc)
136{
137 return (hwc->config & 0xe0) == 0xe0;
138}
139
140static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
141 struct perf_event *event)
142{
143 struct hw_perf_event *hwc = &event->hw;
144 struct amd_nb *nb = cpuc->amd_nb;
145 int i;
146
147 /*
148 * only care about NB events
149 */
150 if (!(nb && amd_is_nb_event(hwc)))
151 return;
152
153 /*
154 * need to scan whole list because event may not have
155 * been assigned during scheduling
156 *
157 * no race condition possible because event can only
158 * be removed on one CPU at a time AND PMU is disabled
159 * when we come here
160 */
161 for (i = 0; i < x86_pmu.num_events; i++) {
162 if (nb->owners[i] == event) {
163 cmpxchg(nb->owners+i, event, NULL);
164 break;
165 }
166 }
167}
168
169 /*
170 * AMD64 NorthBridge events need special treatment because
171 * counter access needs to be synchronized across all cores
172 * of a package. Refer to BKDG section 3.12
173 *
174 * NB events are events measuring L3 cache, Hypertransport
175 * traffic. They are identified by an event code >= 0xe00.
176 * They measure events on the NorthBride which is shared
177 * by all cores on a package. NB events are counted on a
178 * shared set of counters. When a NB event is programmed
179 * in a counter, the data actually comes from a shared
180 * counter. Thus, access to those counters needs to be
181 * synchronized.
182 *
183 * We implement the synchronization such that no two cores
184 * can be measuring NB events using the same counters. Thus,
185 * we maintain a per-NB allocation table. The available slot
186 * is propagated using the event_constraint structure.
187 *
188 * We provide only one choice for each NB event based on
189 * the fact that only NB events have restrictions. Consequently,
190 * if a counter is available, there is a guarantee the NB event
191 * will be assigned to it. If no slot is available, an empty
192 * constraint is returned and scheduling will eventually fail
193 * for this event.
194 *
195 * Note that all cores attached the same NB compete for the same
196 * counters to host NB events, this is why we use atomic ops. Some
197 * multi-chip CPUs may have more than one NB.
198 *
199 * Given that resources are allocated (cmpxchg), they must be
200 * eventually freed for others to use. This is accomplished by
201 * calling amd_put_event_constraints().
202 *
203 * Non NB events are not impacted by this restriction.
204 */
205static struct event_constraint *
206amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
207{
208 struct hw_perf_event *hwc = &event->hw;
209 struct amd_nb *nb = cpuc->amd_nb;
210 struct perf_event *old = NULL;
211 int max = x86_pmu.num_events;
212 int i, j, k = -1;
213
214 /*
215 * if not NB event or no NB, then no constraints
216 */
217 if (!(nb && amd_is_nb_event(hwc)))
218 return &unconstrained;
219
220 /*
221 * detect if already present, if so reuse
222 *
223 * cannot merge with actual allocation
224 * because of possible holes
225 *
226 * event can already be present yet not assigned (in hwc->idx)
227 * because of successive calls to x86_schedule_events() from
228 * hw_perf_group_sched_in() without hw_perf_enable()
229 */
230 for (i = 0; i < max; i++) {
231 /*
232 * keep track of first free slot
233 */
234 if (k == -1 && !nb->owners[i])
235 k = i;
236
237 /* already present, reuse */
238 if (nb->owners[i] == event)
239 goto done;
240 }
241 /*
242 * not present, so grab a new slot
243 * starting either at:
244 */
245 if (hwc->idx != -1) {
246 /* previous assignment */
247 i = hwc->idx;
248 } else if (k != -1) {
249 /* start from free slot found */
250 i = k;
251 } else {
252 /*
253 * event not found, no slot found in
254 * first pass, try again from the
255 * beginning
256 */
257 i = 0;
258 }
259 j = i;
260 do {
261 old = cmpxchg(nb->owners+i, NULL, event);
262 if (!old)
263 break;
264 if (++i == max)
265 i = 0;
266 } while (i != j);
267done:
268 if (!old)
269 return &nb->event_constraints[i];
270
271 return &emptyconstraint;
272}
273
274static __initconst struct x86_pmu amd_pmu = {
275 .name = "AMD",
276 .handle_irq = x86_pmu_handle_irq,
277 .disable_all = x86_pmu_disable_all,
278 .enable_all = x86_pmu_enable_all,
279 .enable = x86_pmu_enable_event,
280 .disable = x86_pmu_disable_event,
281 .eventsel = MSR_K7_EVNTSEL0,
282 .perfctr = MSR_K7_PERFCTR0,
283 .event_map = amd_pmu_event_map,
284 .raw_event = amd_pmu_raw_event,
285 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
286 .num_events = 4,
287 .event_bits = 48,
288 .event_mask = (1ULL << 48) - 1,
289 .apic = 1,
290 /* use highest bit to detect overflow */
291 .max_period = (1ULL << 47) - 1,
292 .get_event_constraints = amd_get_event_constraints,
293 .put_event_constraints = amd_put_event_constraints
294};
295
296static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
297{
298 struct amd_nb *nb;
299 int i;
300
301 nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL);
302 if (!nb)
303 return NULL;
304
305 memset(nb, 0, sizeof(*nb));
306 nb->nb_id = nb_id;
307
308 /*
309 * initialize all possible NB constraints
310 */
311 for (i = 0; i < x86_pmu.num_events; i++) {
312 set_bit(i, nb->event_constraints[i].idxmsk);
313 nb->event_constraints[i].weight = 1;
314 }
315 return nb;
316}
317
318static void amd_pmu_cpu_online(int cpu)
319{
320 struct cpu_hw_events *cpu1, *cpu2;
321 struct amd_nb *nb = NULL;
322 int i, nb_id;
323
324 if (boot_cpu_data.x86_max_cores < 2)
325 return;
326
327 /*
328 * function may be called too early in the
329 * boot process, in which case nb_id is bogus
330 */
331 nb_id = amd_get_nb_id(cpu);
332 if (nb_id == BAD_APICID)
333 return;
334
335 cpu1 = &per_cpu(cpu_hw_events, cpu);
336 cpu1->amd_nb = NULL;
337
338 raw_spin_lock(&amd_nb_lock);
339
340 for_each_online_cpu(i) {
341 cpu2 = &per_cpu(cpu_hw_events, i);
342 nb = cpu2->amd_nb;
343 if (!nb)
344 continue;
345 if (nb->nb_id == nb_id)
346 goto found;
347 }
348
349 nb = amd_alloc_nb(cpu, nb_id);
350 if (!nb) {
351 pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
352 raw_spin_unlock(&amd_nb_lock);
353 return;
354 }
355found:
356 nb->refcnt++;
357 cpu1->amd_nb = nb;
358
359 raw_spin_unlock(&amd_nb_lock);
360}
361
362static void amd_pmu_cpu_offline(int cpu)
363{
364 struct cpu_hw_events *cpuhw;
365
366 if (boot_cpu_data.x86_max_cores < 2)
367 return;
368
369 cpuhw = &per_cpu(cpu_hw_events, cpu);
370
371 raw_spin_lock(&amd_nb_lock);
372
373 if (--cpuhw->amd_nb->refcnt == 0)
374 kfree(cpuhw->amd_nb);
375
376 cpuhw->amd_nb = NULL;
377
378 raw_spin_unlock(&amd_nb_lock);
379}
380
381static __init int amd_pmu_init(void)
382{
383 /* Performance-monitoring supported from K7 and later: */
384 if (boot_cpu_data.x86 < 6)
385 return -ENODEV;
386
387 x86_pmu = amd_pmu;
388
389 /* Events are common for all AMDs */
390 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
391 sizeof(hw_cache_event_ids));
392
393 /*
394 * explicitly initialize the boot cpu, other cpus will get
395 * the cpu hotplug callbacks from smp_init()
396 */
397 amd_pmu_cpu_online(smp_processor_id());
398 return 0;
399}
400
401#else /* CONFIG_CPU_SUP_AMD */
402
403static int amd_pmu_init(void)
404{
405 return 0;
406}
407
408static void amd_pmu_cpu_online(int cpu)
409{
410}
411
412static void amd_pmu_cpu_offline(int cpu)
413{
414}
415
416#endif
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
new file mode 100644
index 000000000000..cf6590cf4a5f
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -0,0 +1,971 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Intel PerfMon v3. Used on Core2 and later.
5 */
6static const u64 intel_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
15};
16
17static struct event_constraint intel_core_event_constraints[] =
18{
19 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
20 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
21 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
22 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
23 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
24 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
25 EVENT_CONSTRAINT_END
26};
27
28static struct event_constraint intel_core2_event_constraints[] =
29{
30 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
31 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
32 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
33 INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
34 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
35 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
36 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
37 INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
38 INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
39 INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
40 INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
41 EVENT_CONSTRAINT_END
42};
43
44static struct event_constraint intel_nehalem_event_constraints[] =
45{
46 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
47 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
48 INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
49 INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
50 INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
51 INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
52 INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
53 INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
54 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
55 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
56 EVENT_CONSTRAINT_END
57};
58
59static struct event_constraint intel_westmere_event_constraints[] =
60{
61 FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
62 FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
63 INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
64 INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
65 INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
66 EVENT_CONSTRAINT_END
67};
68
69static struct event_constraint intel_gen_event_constraints[] =
70{
71 FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
72 FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
73 EVENT_CONSTRAINT_END
74};
75
76static u64 intel_pmu_event_map(int hw_event)
77{
78 return intel_perfmon_event_map[hw_event];
79}
80
81static __initconst u64 westmere_hw_cache_event_ids
82 [PERF_COUNT_HW_CACHE_MAX]
83 [PERF_COUNT_HW_CACHE_OP_MAX]
84 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
85{
86 [ C(L1D) ] = {
87 [ C(OP_READ) ] = {
88 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
89 [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
90 },
91 [ C(OP_WRITE) ] = {
92 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
93 [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
94 },
95 [ C(OP_PREFETCH) ] = {
96 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
97 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
98 },
99 },
100 [ C(L1I ) ] = {
101 [ C(OP_READ) ] = {
102 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
103 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
104 },
105 [ C(OP_WRITE) ] = {
106 [ C(RESULT_ACCESS) ] = -1,
107 [ C(RESULT_MISS) ] = -1,
108 },
109 [ C(OP_PREFETCH) ] = {
110 [ C(RESULT_ACCESS) ] = 0x0,
111 [ C(RESULT_MISS) ] = 0x0,
112 },
113 },
114 [ C(LL ) ] = {
115 [ C(OP_READ) ] = {
116 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
117 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
118 },
119 [ C(OP_WRITE) ] = {
120 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
121 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
122 },
123 [ C(OP_PREFETCH) ] = {
124 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
125 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
126 },
127 },
128 [ C(DTLB) ] = {
129 [ C(OP_READ) ] = {
130 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
131 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
132 },
133 [ C(OP_WRITE) ] = {
134 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
135 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
136 },
137 [ C(OP_PREFETCH) ] = {
138 [ C(RESULT_ACCESS) ] = 0x0,
139 [ C(RESULT_MISS) ] = 0x0,
140 },
141 },
142 [ C(ITLB) ] = {
143 [ C(OP_READ) ] = {
144 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
145 [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
146 },
147 [ C(OP_WRITE) ] = {
148 [ C(RESULT_ACCESS) ] = -1,
149 [ C(RESULT_MISS) ] = -1,
150 },
151 [ C(OP_PREFETCH) ] = {
152 [ C(RESULT_ACCESS) ] = -1,
153 [ C(RESULT_MISS) ] = -1,
154 },
155 },
156 [ C(BPU ) ] = {
157 [ C(OP_READ) ] = {
158 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
159 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
160 },
161 [ C(OP_WRITE) ] = {
162 [ C(RESULT_ACCESS) ] = -1,
163 [ C(RESULT_MISS) ] = -1,
164 },
165 [ C(OP_PREFETCH) ] = {
166 [ C(RESULT_ACCESS) ] = -1,
167 [ C(RESULT_MISS) ] = -1,
168 },
169 },
170};
171
172static __initconst u64 nehalem_hw_cache_event_ids
173 [PERF_COUNT_HW_CACHE_MAX]
174 [PERF_COUNT_HW_CACHE_OP_MAX]
175 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
176{
177 [ C(L1D) ] = {
178 [ C(OP_READ) ] = {
179 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
180 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
181 },
182 [ C(OP_WRITE) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
184 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
185 },
186 [ C(OP_PREFETCH) ] = {
187 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
188 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
189 },
190 },
191 [ C(L1I ) ] = {
192 [ C(OP_READ) ] = {
193 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
194 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
195 },
196 [ C(OP_WRITE) ] = {
197 [ C(RESULT_ACCESS) ] = -1,
198 [ C(RESULT_MISS) ] = -1,
199 },
200 [ C(OP_PREFETCH) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0,
202 [ C(RESULT_MISS) ] = 0x0,
203 },
204 },
205 [ C(LL ) ] = {
206 [ C(OP_READ) ] = {
207 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
208 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
209 },
210 [ C(OP_WRITE) ] = {
211 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
212 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
213 },
214 [ C(OP_PREFETCH) ] = {
215 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
216 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
217 },
218 },
219 [ C(DTLB) ] = {
220 [ C(OP_READ) ] = {
221 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
222 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
223 },
224 [ C(OP_WRITE) ] = {
225 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
226 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
227 },
228 [ C(OP_PREFETCH) ] = {
229 [ C(RESULT_ACCESS) ] = 0x0,
230 [ C(RESULT_MISS) ] = 0x0,
231 },
232 },
233 [ C(ITLB) ] = {
234 [ C(OP_READ) ] = {
235 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
236 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
237 },
238 [ C(OP_WRITE) ] = {
239 [ C(RESULT_ACCESS) ] = -1,
240 [ C(RESULT_MISS) ] = -1,
241 },
242 [ C(OP_PREFETCH) ] = {
243 [ C(RESULT_ACCESS) ] = -1,
244 [ C(RESULT_MISS) ] = -1,
245 },
246 },
247 [ C(BPU ) ] = {
248 [ C(OP_READ) ] = {
249 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
250 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
251 },
252 [ C(OP_WRITE) ] = {
253 [ C(RESULT_ACCESS) ] = -1,
254 [ C(RESULT_MISS) ] = -1,
255 },
256 [ C(OP_PREFETCH) ] = {
257 [ C(RESULT_ACCESS) ] = -1,
258 [ C(RESULT_MISS) ] = -1,
259 },
260 },
261};
262
263static __initconst u64 core2_hw_cache_event_ids
264 [PERF_COUNT_HW_CACHE_MAX]
265 [PERF_COUNT_HW_CACHE_OP_MAX]
266 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
267{
268 [ C(L1D) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
271 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
275 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
279 [ C(RESULT_MISS) ] = 0,
280 },
281 },
282 [ C(L1I ) ] = {
283 [ C(OP_READ) ] = {
284 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
285 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
286 },
287 [ C(OP_WRITE) ] = {
288 [ C(RESULT_ACCESS) ] = -1,
289 [ C(RESULT_MISS) ] = -1,
290 },
291 [ C(OP_PREFETCH) ] = {
292 [ C(RESULT_ACCESS) ] = 0,
293 [ C(RESULT_MISS) ] = 0,
294 },
295 },
296 [ C(LL ) ] = {
297 [ C(OP_READ) ] = {
298 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
299 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
300 },
301 [ C(OP_WRITE) ] = {
302 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
303 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
304 },
305 [ C(OP_PREFETCH) ] = {
306 [ C(RESULT_ACCESS) ] = 0,
307 [ C(RESULT_MISS) ] = 0,
308 },
309 },
310 [ C(DTLB) ] = {
311 [ C(OP_READ) ] = {
312 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
313 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
317 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
318 },
319 [ C(OP_PREFETCH) ] = {
320 [ C(RESULT_ACCESS) ] = 0,
321 [ C(RESULT_MISS) ] = 0,
322 },
323 },
324 [ C(ITLB) ] = {
325 [ C(OP_READ) ] = {
326 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
327 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
328 },
329 [ C(OP_WRITE) ] = {
330 [ C(RESULT_ACCESS) ] = -1,
331 [ C(RESULT_MISS) ] = -1,
332 },
333 [ C(OP_PREFETCH) ] = {
334 [ C(RESULT_ACCESS) ] = -1,
335 [ C(RESULT_MISS) ] = -1,
336 },
337 },
338 [ C(BPU ) ] = {
339 [ C(OP_READ) ] = {
340 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
341 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
342 },
343 [ C(OP_WRITE) ] = {
344 [ C(RESULT_ACCESS) ] = -1,
345 [ C(RESULT_MISS) ] = -1,
346 },
347 [ C(OP_PREFETCH) ] = {
348 [ C(RESULT_ACCESS) ] = -1,
349 [ C(RESULT_MISS) ] = -1,
350 },
351 },
352};
353
354static __initconst u64 atom_hw_cache_event_ids
355 [PERF_COUNT_HW_CACHE_MAX]
356 [PERF_COUNT_HW_CACHE_OP_MAX]
357 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
358{
359 [ C(L1D) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
362 [ C(RESULT_MISS) ] = 0,
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
366 [ C(RESULT_MISS) ] = 0,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = 0x0,
370 [ C(RESULT_MISS) ] = 0,
371 },
372 },
373 [ C(L1I ) ] = {
374 [ C(OP_READ) ] = {
375 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
376 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
377 },
378 [ C(OP_WRITE) ] = {
379 [ C(RESULT_ACCESS) ] = -1,
380 [ C(RESULT_MISS) ] = -1,
381 },
382 [ C(OP_PREFETCH) ] = {
383 [ C(RESULT_ACCESS) ] = 0,
384 [ C(RESULT_MISS) ] = 0,
385 },
386 },
387 [ C(LL ) ] = {
388 [ C(OP_READ) ] = {
389 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
390 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
391 },
392 [ C(OP_WRITE) ] = {
393 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
394 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
395 },
396 [ C(OP_PREFETCH) ] = {
397 [ C(RESULT_ACCESS) ] = 0,
398 [ C(RESULT_MISS) ] = 0,
399 },
400 },
401 [ C(DTLB) ] = {
402 [ C(OP_READ) ] = {
403 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
404 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
405 },
406 [ C(OP_WRITE) ] = {
407 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
408 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
409 },
410 [ C(OP_PREFETCH) ] = {
411 [ C(RESULT_ACCESS) ] = 0,
412 [ C(RESULT_MISS) ] = 0,
413 },
414 },
415 [ C(ITLB) ] = {
416 [ C(OP_READ) ] = {
417 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
418 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
419 },
420 [ C(OP_WRITE) ] = {
421 [ C(RESULT_ACCESS) ] = -1,
422 [ C(RESULT_MISS) ] = -1,
423 },
424 [ C(OP_PREFETCH) ] = {
425 [ C(RESULT_ACCESS) ] = -1,
426 [ C(RESULT_MISS) ] = -1,
427 },
428 },
429 [ C(BPU ) ] = {
430 [ C(OP_READ) ] = {
431 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
432 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
433 },
434 [ C(OP_WRITE) ] = {
435 [ C(RESULT_ACCESS) ] = -1,
436 [ C(RESULT_MISS) ] = -1,
437 },
438 [ C(OP_PREFETCH) ] = {
439 [ C(RESULT_ACCESS) ] = -1,
440 [ C(RESULT_MISS) ] = -1,
441 },
442 },
443};
444
445static u64 intel_pmu_raw_event(u64 hw_event)
446{
447#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
448#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
449#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
450#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
451#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
452
453#define CORE_EVNTSEL_MASK \
454 (INTEL_ARCH_EVTSEL_MASK | \
455 INTEL_ARCH_UNIT_MASK | \
456 INTEL_ARCH_EDGE_MASK | \
457 INTEL_ARCH_INV_MASK | \
458 INTEL_ARCH_CNT_MASK)
459
460 return hw_event & CORE_EVNTSEL_MASK;
461}
462
463static void intel_pmu_enable_bts(u64 config)
464{
465 unsigned long debugctlmsr;
466
467 debugctlmsr = get_debugctlmsr();
468
469 debugctlmsr |= X86_DEBUGCTL_TR;
470 debugctlmsr |= X86_DEBUGCTL_BTS;
471 debugctlmsr |= X86_DEBUGCTL_BTINT;
472
473 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
474 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
475
476 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
477 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
478
479 update_debugctlmsr(debugctlmsr);
480}
481
482static void intel_pmu_disable_bts(void)
483{
484 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
485 unsigned long debugctlmsr;
486
487 if (!cpuc->ds)
488 return;
489
490 debugctlmsr = get_debugctlmsr();
491
492 debugctlmsr &=
493 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
494 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
495
496 update_debugctlmsr(debugctlmsr);
497}
498
499static void intel_pmu_disable_all(void)
500{
501 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
502
503 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
504
505 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
506 intel_pmu_disable_bts();
507}
508
509static void intel_pmu_enable_all(void)
510{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
512
513 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
514
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
516 struct perf_event *event =
517 cpuc->events[X86_PMC_IDX_FIXED_BTS];
518
519 if (WARN_ON_ONCE(!event))
520 return;
521
522 intel_pmu_enable_bts(event->hw.config);
523 }
524}
525
526static inline u64 intel_pmu_get_status(void)
527{
528 u64 status;
529
530 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
531
532 return status;
533}
534
535static inline void intel_pmu_ack_status(u64 ack)
536{
537 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
538}
539
540static inline void
541intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
542{
543 int idx = __idx - X86_PMC_IDX_FIXED;
544 u64 ctrl_val, mask;
545
546 mask = 0xfULL << (idx * 4);
547
548 rdmsrl(hwc->config_base, ctrl_val);
549 ctrl_val &= ~mask;
550 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
551}
552
553static void intel_pmu_drain_bts_buffer(void)
554{
555 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
556 struct debug_store *ds = cpuc->ds;
557 struct bts_record {
558 u64 from;
559 u64 to;
560 u64 flags;
561 };
562 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
563 struct bts_record *at, *top;
564 struct perf_output_handle handle;
565 struct perf_event_header header;
566 struct perf_sample_data data;
567 struct pt_regs regs;
568
569 if (!event)
570 return;
571
572 if (!ds)
573 return;
574
575 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
576 top = (struct bts_record *)(unsigned long)ds->bts_index;
577
578 if (top <= at)
579 return;
580
581 ds->bts_index = ds->bts_buffer_base;
582
583
584 data.period = event->hw.last_period;
585 data.addr = 0;
586 data.raw = NULL;
587 regs.ip = 0;
588
589 /*
590 * Prepare a generic sample, i.e. fill in the invariant fields.
591 * We will overwrite the from and to address before we output
592 * the sample.
593 */
594 perf_prepare_sample(&header, &data, event, &regs);
595
596 if (perf_output_begin(&handle, event,
597 header.size * (top - at), 1, 1))
598 return;
599
600 for (; at < top; at++) {
601 data.ip = at->from;
602 data.addr = at->to;
603
604 perf_output_sample(&handle, &header, &data, event);
605 }
606
607 perf_output_end(&handle);
608
609 /* There's new data available. */
610 event->hw.interrupts++;
611 event->pending_kill = POLL_IN;
612}
613
614static inline void
615intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
616{
617 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
618 intel_pmu_disable_bts();
619 intel_pmu_drain_bts_buffer();
620 return;
621 }
622
623 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
624 intel_pmu_disable_fixed(hwc, idx);
625 return;
626 }
627
628 x86_pmu_disable_event(hwc, idx);
629}
630
631static inline void
632intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
633{
634 int idx = __idx - X86_PMC_IDX_FIXED;
635 u64 ctrl_val, bits, mask;
636 int err;
637
638 /*
639 * Enable IRQ generation (0x8),
640 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
641 * if requested:
642 */
643 bits = 0x8ULL;
644 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
645 bits |= 0x2;
646 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
647 bits |= 0x1;
648
649 /*
650 * ANY bit is supported in v3 and up
651 */
652 if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
653 bits |= 0x4;
654
655 bits <<= (idx * 4);
656 mask = 0xfULL << (idx * 4);
657
658 rdmsrl(hwc->config_base, ctrl_val);
659 ctrl_val &= ~mask;
660 ctrl_val |= bits;
661 err = checking_wrmsrl(hwc->config_base, ctrl_val);
662}
663
664static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
665{
666 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
667 if (!__get_cpu_var(cpu_hw_events).enabled)
668 return;
669
670 intel_pmu_enable_bts(hwc->config);
671 return;
672 }
673
674 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
675 intel_pmu_enable_fixed(hwc, idx);
676 return;
677 }
678
679 __x86_pmu_enable_event(hwc, idx);
680}
681
682/*
683 * Save and restart an expired event. Called by NMI contexts,
684 * so it has to be careful about preempting normal event ops:
685 */
686static int intel_pmu_save_and_restart(struct perf_event *event)
687{
688 struct hw_perf_event *hwc = &event->hw;
689 int idx = hwc->idx;
690 int ret;
691
692 x86_perf_event_update(event, hwc, idx);
693 ret = x86_perf_event_set_period(event, hwc, idx);
694
695 return ret;
696}
697
698static void intel_pmu_reset(void)
699{
700 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
701 unsigned long flags;
702 int idx;
703
704 if (!x86_pmu.num_events)
705 return;
706
707 local_irq_save(flags);
708
709 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
710
711 for (idx = 0; idx < x86_pmu.num_events; idx++) {
712 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
713 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
714 }
715 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
716 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
717 }
718 if (ds)
719 ds->bts_index = ds->bts_buffer_base;
720
721 local_irq_restore(flags);
722}
723
724/*
725 * This handler is triggered by the local APIC, so the APIC IRQ handling
726 * rules apply:
727 */
728static int intel_pmu_handle_irq(struct pt_regs *regs)
729{
730 struct perf_sample_data data;
731 struct cpu_hw_events *cpuc;
732 int bit, loops;
733 u64 ack, status;
734
735 data.addr = 0;
736 data.raw = NULL;
737
738 cpuc = &__get_cpu_var(cpu_hw_events);
739
740 perf_disable();
741 intel_pmu_drain_bts_buffer();
742 status = intel_pmu_get_status();
743 if (!status) {
744 perf_enable();
745 return 0;
746 }
747
748 loops = 0;
749again:
750 if (++loops > 100) {
751 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
752 perf_event_print_debug();
753 intel_pmu_reset();
754 perf_enable();
755 return 1;
756 }
757
758 inc_irq_stat(apic_perf_irqs);
759 ack = status;
760 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
761 struct perf_event *event = cpuc->events[bit];
762
763 clear_bit(bit, (unsigned long *) &status);
764 if (!test_bit(bit, cpuc->active_mask))
765 continue;
766
767 if (!intel_pmu_save_and_restart(event))
768 continue;
769
770 data.period = event->hw.last_period;
771
772 if (perf_event_overflow(event, 1, &data, regs))
773 intel_pmu_disable_event(&event->hw, bit);
774 }
775
776 intel_pmu_ack_status(ack);
777
778 /*
779 * Repeat if there is more work to be done:
780 */
781 status = intel_pmu_get_status();
782 if (status)
783 goto again;
784
785 perf_enable();
786
787 return 1;
788}
789
790static struct event_constraint bts_constraint =
791 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
792
793static struct event_constraint *
794intel_special_constraints(struct perf_event *event)
795{
796 unsigned int hw_event;
797
798 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
799
800 if (unlikely((hw_event ==
801 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
802 (event->hw.sample_period == 1))) {
803
804 return &bts_constraint;
805 }
806 return NULL;
807}
808
809static struct event_constraint *
810intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
811{
812 struct event_constraint *c;
813
814 c = intel_special_constraints(event);
815 if (c)
816 return c;
817
818 return x86_get_event_constraints(cpuc, event);
819}
820
821static __initconst struct x86_pmu core_pmu = {
822 .name = "core",
823 .handle_irq = x86_pmu_handle_irq,
824 .disable_all = x86_pmu_disable_all,
825 .enable_all = x86_pmu_enable_all,
826 .enable = x86_pmu_enable_event,
827 .disable = x86_pmu_disable_event,
828 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
829 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
830 .event_map = intel_pmu_event_map,
831 .raw_event = intel_pmu_raw_event,
832 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
833 .apic = 1,
834 /*
835 * Intel PMCs cannot be accessed sanely above 32 bit width,
836 * so we install an artificial 1<<31 period regardless of
837 * the generic event period:
838 */
839 .max_period = (1ULL << 31) - 1,
840 .get_event_constraints = intel_get_event_constraints,
841 .event_constraints = intel_core_event_constraints,
842};
843
844static __initconst struct x86_pmu intel_pmu = {
845 .name = "Intel",
846 .handle_irq = intel_pmu_handle_irq,
847 .disable_all = intel_pmu_disable_all,
848 .enable_all = intel_pmu_enable_all,
849 .enable = intel_pmu_enable_event,
850 .disable = intel_pmu_disable_event,
851 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
852 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
853 .event_map = intel_pmu_event_map,
854 .raw_event = intel_pmu_raw_event,
855 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
856 .apic = 1,
857 /*
858 * Intel PMCs cannot be accessed sanely above 32 bit width,
859 * so we install an artificial 1<<31 period regardless of
860 * the generic event period:
861 */
862 .max_period = (1ULL << 31) - 1,
863 .enable_bts = intel_pmu_enable_bts,
864 .disable_bts = intel_pmu_disable_bts,
865 .get_event_constraints = intel_get_event_constraints
866};
867
868static __init int intel_pmu_init(void)
869{
870 union cpuid10_edx edx;
871 union cpuid10_eax eax;
872 unsigned int unused;
873 unsigned int ebx;
874 int version;
875
876 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
877 /* check for P6 processor family */
878 if (boot_cpu_data.x86 == 6) {
879 return p6_pmu_init();
880 } else {
881 return -ENODEV;
882 }
883 }
884
885 /*
886 * Check whether the Architectural PerfMon supports
887 * Branch Misses Retired hw_event or not.
888 */
889 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
890 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
891 return -ENODEV;
892
893 version = eax.split.version_id;
894 if (version < 2)
895 x86_pmu = core_pmu;
896 else
897 x86_pmu = intel_pmu;
898
899 x86_pmu.version = version;
900 x86_pmu.num_events = eax.split.num_events;
901 x86_pmu.event_bits = eax.split.bit_width;
902 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
903
904 /*
905 * Quirk: v2 perfmon does not report fixed-purpose events, so
906 * assume at least 3 events:
907 */
908 if (version > 1)
909 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
910
911 /*
912 * Install the hw-cache-events table:
913 */
914 switch (boot_cpu_data.x86_model) {
915 case 14: /* 65 nm core solo/duo, "Yonah" */
916 pr_cont("Core events, ");
917 break;
918
919 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
920 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
921 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
922 case 29: /* six-core 45 nm xeon "Dunnington" */
923 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
924 sizeof(hw_cache_event_ids));
925
926 x86_pmu.event_constraints = intel_core2_event_constraints;
927 pr_cont("Core2 events, ");
928 break;
929
930 case 26: /* 45 nm nehalem, "Bloomfield" */
931 case 30: /* 45 nm nehalem, "Lynnfield" */
932 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
933 sizeof(hw_cache_event_ids));
934
935 x86_pmu.event_constraints = intel_nehalem_event_constraints;
936 pr_cont("Nehalem/Corei7 events, ");
937 break;
938 case 28:
939 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
940 sizeof(hw_cache_event_ids));
941
942 x86_pmu.event_constraints = intel_gen_event_constraints;
943 pr_cont("Atom events, ");
944 break;
945
946 case 37: /* 32 nm nehalem, "Clarkdale" */
947 case 44: /* 32 nm nehalem, "Gulftown" */
948 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
949 sizeof(hw_cache_event_ids));
950
951 x86_pmu.event_constraints = intel_westmere_event_constraints;
952 pr_cont("Westmere events, ");
953 break;
954 default:
955 /*
956 * default constraints for v2 and up
957 */
958 x86_pmu.event_constraints = intel_gen_event_constraints;
959 pr_cont("generic architected perfmon, ");
960 }
961 return 0;
962}
963
964#else /* CONFIG_CPU_SUP_INTEL */
965
966static int intel_pmu_init(void)
967{
968 return 0;
969}
970
971#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
new file mode 100644
index 000000000000..1ca5ba078afd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -0,0 +1,157 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/*
4 * Not sure about some of these
5 */
6static const u64 p6_perfmon_event_map[] =
7{
8 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
9 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
10 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
11 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
12 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
13 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
14 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
15};
16
17static u64 p6_pmu_event_map(int hw_event)
18{
19 return p6_perfmon_event_map[hw_event];
20}
21
22/*
23 * Event setting that is specified not to count anything.
24 * We use this to effectively disable a counter.
25 *
26 * L2_RQSTS with 0 MESI unit mask.
27 */
28#define P6_NOP_EVENT 0x0000002EULL
29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] =
49{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
51 INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
52 INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
53 INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
54 INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
55 INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
56 EVENT_CONSTRAINT_END
57};
58
59static void p6_pmu_disable_all(void)
60{
61 u64 val;
62
63 /* p6 only has one enable register */
64 rdmsrl(MSR_P6_EVNTSEL0, val);
65 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
66 wrmsrl(MSR_P6_EVNTSEL0, val);
67}
68
69static void p6_pmu_enable_all(void)
70{
71 unsigned long val;
72
73 /* p6 only has one enable register */
74 rdmsrl(MSR_P6_EVNTSEL0, val);
75 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
76 wrmsrl(MSR_P6_EVNTSEL0, val);
77}
78
79static inline void
80p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
81{
82 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
83 u64 val = P6_NOP_EVENT;
84
85 if (cpuc->enabled)
86 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
87
88 (void)checking_wrmsrl(hwc->config_base + idx, val);
89}
90
91static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
92{
93 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
94 u64 val;
95
96 val = hwc->config;
97 if (cpuc->enabled)
98 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
99
100 (void)checking_wrmsrl(hwc->config_base + idx, val);
101}
102
103static __initconst struct x86_pmu p6_pmu = {
104 .name = "p6",
105 .handle_irq = x86_pmu_handle_irq,
106 .disable_all = p6_pmu_disable_all,
107 .enable_all = p6_pmu_enable_all,
108 .enable = p6_pmu_enable_event,
109 .disable = p6_pmu_disable_event,
110 .eventsel = MSR_P6_EVNTSEL0,
111 .perfctr = MSR_P6_PERFCTR0,
112 .event_map = p6_pmu_event_map,
113 .raw_event = p6_pmu_raw_event,
114 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
115 .apic = 1,
116 .max_period = (1ULL << 31) - 1,
117 .version = 0,
118 .num_events = 2,
119 /*
120 * Events have 40 bits implemented. However they are designed such
121 * that bits [32-39] are sign extensions of bit 31. As such the
122 * effective width of a event for P6-like PMU is 32 bits only.
123 *
124 * See IA-32 Intel Architecture Software developer manual Vol 3B
125 */
126 .event_bits = 32,
127 .event_mask = (1ULL << 32) - 1,
128 .get_event_constraints = x86_get_event_constraints,
129 .event_constraints = p6_event_constraints,
130};
131
132static __init int p6_pmu_init(void)
133{
134 switch (boot_cpu_data.x86_model) {
135 case 1:
136 case 3: /* Pentium Pro */
137 case 5:
138 case 6: /* Pentium II */
139 case 7:
140 case 8:
141 case 11: /* Pentium III */
142 case 9:
143 case 13:
144 /* Pentium M */
145 break;
146 default:
147 pr_cont("unsupported p6 CPU model %d ",
148 boot_cpu_data.x86_model);
149 return -ENODEV;
150 }
151
152 x86_pmu = p6_pmu;
153
154 return 0;
155}
156
157#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..74f4e85a5727 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
115 115
116 return !test_bit(counter, perfctr_nmi_owner); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118
119/* checks the an msr for availability */
120int avail_to_resrv_perfctr_nmi(unsigned int msr)
121{
122 unsigned int counter;
123
124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126
127 return !test_bit(counter, perfctr_nmi_owner);
128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 118EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 119
131int reserve_perfctr_nmi(unsigned int msr) 120int reserve_perfctr_nmi(unsigned int msr)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index cb27fd6136c9..83e5e628de73 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -229,7 +229,7 @@ static void __exit cpuid_exit(void)
229 for_each_online_cpu(cpu) 229 for_each_online_cpu(cpu)
230 cpuid_device_destroy(cpu); 230 cpuid_device_destroy(cpu);
231 class_destroy(cpuid_class); 231 class_destroy(cpuid_class);
232 unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); 232 __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
233 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); 233 unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
234} 234}
235 235
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca47b25..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
18 18
19#include "dumpstack.h" 19#include "dumpstack.h"
20 20
21/* Just a stub for now */
22int x86_is_stack_id(int id, char *name)
23{
24 return 0;
25}
26 21
27void dump_trace(struct task_struct *task, struct pt_regs *regs, 22void dump_trace(struct task_struct *task, struct pt_regs *regs,
28 unsigned long *stack, unsigned long bp, 23 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 0ad9597073f5..dce99abb4496 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
33#endif 33#endif
34}; 34};
35 35
36int x86_is_stack_id(int id, char *name)
37{
38 return x86_stack_ids[id - 1] == name;
39}
40
41static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 36static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
42 unsigned *usedp, char **idp) 37 unsigned *usedp, char **idp)
43{ 38{
@@ -291,6 +286,7 @@ void show_registers(struct pt_regs *regs)
291 286
292 sp = regs->sp; 287 sp = regs->sp;
293 printk("CPU %d ", cpu); 288 printk("CPU %d ", cpu);
289 print_modules();
294 __show_regs(regs, 1); 290 __show_regs(regs, 1);
295 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 291 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
296 cur->comm, cur->pid, task_thread_info(cur), cur); 292 cur->comm, cur->pid, task_thread_info(cur), cur);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a1a7876cadcb..a966b753e496 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -517,11 +517,19 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
517 int checktype) 517 int checktype)
518{ 518{
519 int i; 519 int i;
520 u64 end;
520 u64 real_removed_size = 0; 521 u64 real_removed_size = 0;
521 522
522 if (size > (ULLONG_MAX - start)) 523 if (size > (ULLONG_MAX - start))
523 size = ULLONG_MAX - start; 524 size = ULLONG_MAX - start;
524 525
526 end = start + size;
527 printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
528 (unsigned long long) start,
529 (unsigned long long) end);
530 e820_print_type(old_type);
531 printk(KERN_CONT "\n");
532
525 for (i = 0; i < e820.nr_map; i++) { 533 for (i = 0; i < e820.nr_map; i++) {
526 struct e820entry *ei = &e820.map[i]; 534 struct e820entry *ei = &e820.map[i];
527 u64 final_start, final_end; 535 u64 final_start, final_end;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index cdcfb122f256..c2fa9b8b497e 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -362,7 +362,7 @@ void __init efi_init(void)
362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); 362 printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
363 early_iounmap(tmp, 2); 363 early_iounmap(tmp, 2);
364 364
365 printk(KERN_INFO "EFI v%u.%.02u by %s \n", 365 printk(KERN_INFO "EFI v%u.%.02u by %s\n",
366 efi.systab->hdr.revision >> 16, 366 efi.systab->hdr.revision >> 16,
367 efi.systab->hdr.revision & 0xffff, vendor); 367 efi.systab->hdr.revision & 0xffff, vendor);
368 368
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 309689245431..cd37469b54ee 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -30,14 +30,32 @@
30 30
31#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
32 32
33/*
34 * modifying_code is set to notify NMIs that they need to use
35 * memory barriers when entering or exiting. But we don't want
36 * to burden NMIs with unnecessary memory barriers when code
37 * modification is not being done (which is most of the time).
38 *
39 * A mutex is already held when ftrace_arch_code_modify_prepare
40 * and post_process are called. No locks need to be taken here.
41 *
42 * Stop machine will make sure currently running NMIs are done
43 * and new NMIs will see the updated variable before we need
44 * to worry about NMIs doing memory barriers.
45 */
46static int modifying_code __read_mostly;
47static DEFINE_PER_CPU(int, save_modifying_code);
48
33int ftrace_arch_code_modify_prepare(void) 49int ftrace_arch_code_modify_prepare(void)
34{ 50{
35 set_kernel_text_rw(); 51 set_kernel_text_rw();
52 modifying_code = 1;
36 return 0; 53 return 0;
37} 54}
38 55
39int ftrace_arch_code_modify_post_process(void) 56int ftrace_arch_code_modify_post_process(void)
40{ 57{
58 modifying_code = 0;
41 set_kernel_text_ro(); 59 set_kernel_text_ro();
42 return 0; 60 return 0;
43} 61}
@@ -149,6 +167,11 @@ static void ftrace_mod_code(void)
149 167
150void ftrace_nmi_enter(void) 168void ftrace_nmi_enter(void)
151{ 169{
170 __get_cpu_var(save_modifying_code) = modifying_code;
171
172 if (!__get_cpu_var(save_modifying_code))
173 return;
174
152 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { 175 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
153 smp_rmb(); 176 smp_rmb();
154 ftrace_mod_code(); 177 ftrace_mod_code();
@@ -160,6 +183,9 @@ void ftrace_nmi_enter(void)
160 183
161void ftrace_nmi_exit(void) 184void ftrace_nmi_exit(void)
162{ 185{
186 if (!__get_cpu_var(save_modifying_code))
187 return;
188
163 /* Finish all executions before clearing nmi_running */ 189 /* Finish all executions before clearing nmi_running */
164 smp_mb(); 190 smp_mb();
165 atomic_dec(&nmi_running); 191 atomic_dec(&nmi_running);
@@ -484,13 +510,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
484 } 510 }
485} 511}
486#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 512#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
487
488#ifdef CONFIG_FTRACE_SYSCALLS
489
490extern unsigned long *sys_call_table;
491
492unsigned long __init arch_syscall_addr(int nr)
493{
494 return (unsigned long)(&sys_call_table)[nr];
495}
496#endif
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ba6e65884603..ee4fa1bfcb33 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -34,6 +34,8 @@
34 */ 34 */
35unsigned long hpet_address; 35unsigned long hpet_address;
36u8 hpet_blockid; /* OS timer block num */ 36u8 hpet_blockid; /* OS timer block num */
37u8 hpet_msi_disable;
38
37#ifdef CONFIG_PCI_MSI 39#ifdef CONFIG_PCI_MSI
38static unsigned long hpet_num_timers; 40static unsigned long hpet_num_timers;
39#endif 41#endif
@@ -264,7 +266,7 @@ static void hpet_resume_device(void)
264 force_hpet_resume(); 266 force_hpet_resume();
265} 267}
266 268
267static void hpet_resume_counter(void) 269static void hpet_resume_counter(struct clocksource *cs)
268{ 270{
269 hpet_resume_device(); 271 hpet_resume_device();
270 hpet_restart_counter(); 272 hpet_restart_counter();
@@ -596,6 +598,9 @@ static void hpet_msi_capability_lookup(unsigned int start_timer)
596 unsigned int num_timers_used = 0; 598 unsigned int num_timers_used = 0;
597 int i; 599 int i;
598 600
601 if (hpet_msi_disable)
602 return;
603
599 if (boot_cpu_has(X86_FEATURE_ARAT)) 604 if (boot_cpu_has(X86_FEATURE_ARAT))
600 return; 605 return;
601 id = hpet_readl(HPET_ID); 606 id = hpet_readl(HPET_ID);
@@ -928,6 +933,9 @@ static __init int hpet_late_init(void)
928 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 933 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
929 hpet_print_config(); 934 hpet_print_config();
930 935
936 if (hpet_msi_disable)
937 return 0;
938
931 if (boot_cpu_has(X86_FEATURE_ARAT)) 939 if (boot_cpu_has(X86_FEATURE_ARAT))
932 return 0; 940 return 0;
933 941
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 05d5fec64a94..dca2802c666f 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -212,25 +212,6 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
212 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); 212 return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
213} 213}
214 214
215/*
216 * Store a breakpoint's encoded address, length, and type.
217 */
218static int arch_store_info(struct perf_event *bp)
219{
220 struct arch_hw_breakpoint *info = counter_arch_bp(bp);
221 /*
222 * For kernel-addresses, either the address or symbol name can be
223 * specified.
224 */
225 if (info->name)
226 info->address = (unsigned long)
227 kallsyms_lookup_name(info->name);
228 if (info->address)
229 return 0;
230
231 return -EINVAL;
232}
233
234int arch_bp_generic_fields(int x86_len, int x86_type, 215int arch_bp_generic_fields(int x86_len, int x86_type,
235 int *gen_len, int *gen_type) 216 int *gen_len, int *gen_type)
236{ 217{
@@ -362,10 +343,13 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp,
362 return ret; 343 return ret;
363 } 344 }
364 345
365 ret = arch_store_info(bp); 346 /*
366 347 * For kernel-addresses, either the address or symbol name can be
367 if (ret < 0) 348 * specified.
368 return ret; 349 */
350 if (info->name)
351 info->address = (unsigned long)
352 kallsyms_lookup_name(info->name);
369 /* 353 /*
370 * Check that the low-order bits of the address are appropriate 354 * Check that the low-order bits of the address are appropriate
371 * for the alignment implied by len. 355 * for the alignment implied by len.
@@ -502,8 +486,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
502 rcu_read_lock(); 486 rcu_read_lock();
503 487
504 bp = per_cpu(bp_per_reg[i], cpu); 488 bp = per_cpu(bp_per_reg[i], cpu);
505 if (bp)
506 rc = NOTIFY_DONE;
507 /* 489 /*
508 * Reset the 'i'th TRAP bit in dr6 to denote completion of 490 * Reset the 'i'th TRAP bit in dr6 to denote completion of
509 * exception handling 491 * exception handling
@@ -522,7 +504,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
522 504
523 rcu_read_unlock(); 505 rcu_read_unlock();
524 } 506 }
525 if (dr6 & (~DR_TRAP_BITS)) 507 /*
508 * Further processing in do_debug() is needed for a) user-space
509 * breakpoints (to generate signals) and b) when the system has
510 * taken exception due to multiple causes
511 */
512 if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
513 (dr6 & (~DR_TRAP_BITS)))
526 rc = NOTIFY_DONE; 514 rc = NOTIFY_DONE;
527 515
528 set_debugreg(dr7, 7); 516 set_debugreg(dr7, 7);
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index f2f8540a7f3d..c01a2b846d47 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -164,6 +164,11 @@ int init_fpu(struct task_struct *tsk)
164 return 0; 164 return 0;
165} 165}
166 166
167/*
168 * The xstateregs_active() routine is the same as the fpregs_active() routine,
169 * as the "regset->n" for the xstate regset will be updated based on the feature
170 * capabilites supported by the xsave.
171 */
167int fpregs_active(struct task_struct *target, const struct user_regset *regset) 172int fpregs_active(struct task_struct *target, const struct user_regset *regset)
168{ 173{
169 return tsk_used_math(target) ? regset->n : 0; 174 return tsk_used_math(target) ? regset->n : 0;
@@ -204,8 +209,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
204 if (ret) 209 if (ret)
205 return ret; 210 return ret;
206 211
207 set_stopped_child_used_math(target);
208
209 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, 212 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
210 &target->thread.xstate->fxsave, 0, -1); 213 &target->thread.xstate->fxsave, 0, -1);
211 214
@@ -224,6 +227,68 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
224 return ret; 227 return ret;
225} 228}
226 229
230int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
231 unsigned int pos, unsigned int count,
232 void *kbuf, void __user *ubuf)
233{
234 int ret;
235
236 if (!cpu_has_xsave)
237 return -ENODEV;
238
239 ret = init_fpu(target);
240 if (ret)
241 return ret;
242
243 /*
244 * Copy the 48bytes defined by the software first into the xstate
245 * memory layout in the thread struct, so that we can copy the entire
246 * xstateregs to the user using one user_regset_copyout().
247 */
248 memcpy(&target->thread.xstate->fxsave.sw_reserved,
249 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
250
251 /*
252 * Copy the xstate memory layout.
253 */
254 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
255 &target->thread.xstate->xsave, 0, -1);
256 return ret;
257}
258
259int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
260 unsigned int pos, unsigned int count,
261 const void *kbuf, const void __user *ubuf)
262{
263 int ret;
264 struct xsave_hdr_struct *xsave_hdr;
265
266 if (!cpu_has_xsave)
267 return -ENODEV;
268
269 ret = init_fpu(target);
270 if (ret)
271 return ret;
272
273 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
274 &target->thread.xstate->xsave, 0, -1);
275
276 /*
277 * mxcsr reserved bits must be masked to zero for security reasons.
278 */
279 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
280
281 xsave_hdr = &target->thread.xstate->xsave.xsave_hdr;
282
283 xsave_hdr->xstate_bv &= pcntxt_mask;
284 /*
285 * These bits must be zero.
286 */
287 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
288
289 return ret;
290}
291
227#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 292#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
228 293
229/* 294/*
@@ -404,8 +469,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
404 if (ret) 469 if (ret)
405 return ret; 470 return ret;
406 471
407 set_stopped_child_used_math(target);
408
409 if (!HAVE_HWFP) 472 if (!HAVE_HWFP)
410 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); 473 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
411 474
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index dd74fe7273b1..bfba6019d762 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -42,6 +42,7 @@
42#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/smp.h> 43#include <linux/smp.h>
44#include <linux/nmi.h> 44#include <linux/nmi.h>
45#include <linux/hw_breakpoint.h>
45 46
46#include <asm/debugreg.h> 47#include <asm/debugreg.h>
47#include <asm/apicdef.h> 48#include <asm/apicdef.h>
@@ -204,40 +205,81 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
204 205
205static struct hw_breakpoint { 206static struct hw_breakpoint {
206 unsigned enabled; 207 unsigned enabled;
207 unsigned type;
208 unsigned len;
209 unsigned long addr; 208 unsigned long addr;
209 int len;
210 int type;
211 struct perf_event **pev;
210} breakinfo[4]; 212} breakinfo[4];
211 213
212static void kgdb_correct_hw_break(void) 214static void kgdb_correct_hw_break(void)
213{ 215{
214 unsigned long dr7;
215 int correctit = 0;
216 int breakbit;
217 int breakno; 216 int breakno;
218 217
219 get_debugreg(dr7, 7);
220 for (breakno = 0; breakno < 4; breakno++) { 218 for (breakno = 0; breakno < 4; breakno++) {
221 breakbit = 2 << (breakno << 1); 219 struct perf_event *bp;
222 if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { 220 struct arch_hw_breakpoint *info;
223 correctit = 1; 221 int val;
224 dr7 |= breakbit; 222 int cpu = raw_smp_processor_id();
225 dr7 &= ~(0xf0000 << (breakno << 2)); 223 if (!breakinfo[breakno].enabled)
226 dr7 |= ((breakinfo[breakno].len << 2) | 224 continue;
227 breakinfo[breakno].type) << 225 bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu);
228 ((breakno << 2) + 16); 226 info = counter_arch_bp(bp);
229 set_debugreg(breakinfo[breakno].addr, breakno); 227 if (bp->attr.disabled != 1)
230 228 continue;
231 } else { 229 bp->attr.bp_addr = breakinfo[breakno].addr;
232 if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { 230 bp->attr.bp_len = breakinfo[breakno].len;
233 correctit = 1; 231 bp->attr.bp_type = breakinfo[breakno].type;
234 dr7 &= ~breakbit; 232 info->address = breakinfo[breakno].addr;
235 dr7 &= ~(0xf0000 << (breakno << 2)); 233 info->len = breakinfo[breakno].len;
236 } 234 info->type = breakinfo[breakno].type;
237 } 235 val = arch_install_hw_breakpoint(bp);
236 if (!val)
237 bp->attr.disabled = 0;
238 }
239 hw_breakpoint_restore();
240}
241
242static int hw_break_reserve_slot(int breakno)
243{
244 int cpu;
245 int cnt = 0;
246 struct perf_event **pevent;
247
248 for_each_online_cpu(cpu) {
249 cnt++;
250 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
251 if (dbg_reserve_bp_slot(*pevent))
252 goto fail;
253 }
254
255 return 0;
256
257fail:
258 for_each_online_cpu(cpu) {
259 cnt--;
260 if (!cnt)
261 break;
262 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
263 dbg_release_bp_slot(*pevent);
238 } 264 }
239 if (correctit) 265 return -1;
240 set_debugreg(dr7, 7); 266}
267
268static int hw_break_release_slot(int breakno)
269{
270 struct perf_event **pevent;
271 int cpu;
272
273 for_each_online_cpu(cpu) {
274 pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
275 if (dbg_release_bp_slot(*pevent))
276 /*
277 * The debugger is responisble for handing the retry on
278 * remove failure.
279 */
280 return -1;
281 }
282 return 0;
241} 283}
242 284
243static int 285static int
@@ -251,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
251 if (i == 4) 293 if (i == 4)
252 return -1; 294 return -1;
253 295
296 if (hw_break_release_slot(i)) {
297 printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr);
298 return -1;
299 }
254 breakinfo[i].enabled = 0; 300 breakinfo[i].enabled = 0;
255 301
256 return 0; 302 return 0;
@@ -259,15 +305,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
259static void kgdb_remove_all_hw_break(void) 305static void kgdb_remove_all_hw_break(void)
260{ 306{
261 int i; 307 int i;
308 int cpu = raw_smp_processor_id();
309 struct perf_event *bp;
262 310
263 for (i = 0; i < 4; i++) 311 for (i = 0; i < 4; i++) {
264 memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); 312 if (!breakinfo[i].enabled)
313 continue;
314 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
315 if (bp->attr.disabled == 1)
316 continue;
317 arch_uninstall_hw_breakpoint(bp);
318 bp->attr.disabled = 1;
319 }
265} 320}
266 321
267static int 322static int
268kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) 323kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
269{ 324{
270 unsigned type;
271 int i; 325 int i;
272 326
273 for (i = 0; i < 4; i++) 327 for (i = 0; i < 4; i++)
@@ -278,27 +332,42 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
278 332
279 switch (bptype) { 333 switch (bptype) {
280 case BP_HARDWARE_BREAKPOINT: 334 case BP_HARDWARE_BREAKPOINT:
281 type = 0; 335 len = 1;
282 len = 1; 336 breakinfo[i].type = X86_BREAKPOINT_EXECUTE;
283 break; 337 break;
284 case BP_WRITE_WATCHPOINT: 338 case BP_WRITE_WATCHPOINT:
285 type = 1; 339 breakinfo[i].type = X86_BREAKPOINT_WRITE;
286 break; 340 break;
287 case BP_ACCESS_WATCHPOINT: 341 case BP_ACCESS_WATCHPOINT:
288 type = 3; 342 breakinfo[i].type = X86_BREAKPOINT_RW;
289 break; 343 break;
290 default: 344 default:
291 return -1; 345 return -1;
292 } 346 }
293 347 switch (len) {
294 if (len == 1 || len == 2 || len == 4) 348 case 1:
295 breakinfo[i].len = len - 1; 349 breakinfo[i].len = X86_BREAKPOINT_LEN_1;
296 else 350 break;
351 case 2:
352 breakinfo[i].len = X86_BREAKPOINT_LEN_2;
353 break;
354 case 4:
355 breakinfo[i].len = X86_BREAKPOINT_LEN_4;
356 break;
357#ifdef CONFIG_X86_64
358 case 8:
359 breakinfo[i].len = X86_BREAKPOINT_LEN_8;
360 break;
361#endif
362 default:
297 return -1; 363 return -1;
298 364 }
299 breakinfo[i].enabled = 1;
300 breakinfo[i].addr = addr; 365 breakinfo[i].addr = addr;
301 breakinfo[i].type = type; 366 if (hw_break_reserve_slot(i)) {
367 breakinfo[i].addr = 0;
368 return -1;
369 }
370 breakinfo[i].enabled = 1;
302 371
303 return 0; 372 return 0;
304} 373}
@@ -313,8 +382,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
313 */ 382 */
314void kgdb_disable_hw_debug(struct pt_regs *regs) 383void kgdb_disable_hw_debug(struct pt_regs *regs)
315{ 384{
385 int i;
386 int cpu = raw_smp_processor_id();
387 struct perf_event *bp;
388
316 /* Disable hardware debugging while we are in kgdb: */ 389 /* Disable hardware debugging while we are in kgdb: */
317 set_debugreg(0UL, 7); 390 set_debugreg(0UL, 7);
391 for (i = 0; i < 4; i++) {
392 if (!breakinfo[i].enabled)
393 continue;
394 bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
395 if (bp->attr.disabled == 1)
396 continue;
397 arch_uninstall_hw_breakpoint(bp);
398 bp->attr.disabled = 1;
399 }
318} 400}
319 401
320/** 402/**
@@ -378,7 +460,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
378 struct pt_regs *linux_regs) 460 struct pt_regs *linux_regs)
379{ 461{
380 unsigned long addr; 462 unsigned long addr;
381 unsigned long dr6;
382 char *ptr; 463 char *ptr;
383 int newPC; 464 int newPC;
384 465
@@ -404,20 +485,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
404 raw_smp_processor_id()); 485 raw_smp_processor_id());
405 } 486 }
406 487
407 get_debugreg(dr6, 6);
408 if (!(dr6 & 0x4000)) {
409 int breakno;
410
411 for (breakno = 0; breakno < 4; breakno++) {
412 if (dr6 & (1 << breakno) &&
413 breakinfo[breakno].type == 0) {
414 /* Set restore flag: */
415 linux_regs->flags |= X86_EFLAGS_RF;
416 break;
417 }
418 }
419 }
420 set_debugreg(0UL, 6);
421 kgdb_correct_hw_break(); 488 kgdb_correct_hw_break();
422 489
423 return 0; 490 return 0;
@@ -485,8 +552,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
485 break; 552 break;
486 553
487 case DIE_DEBUG: 554 case DIE_DEBUG:
488 if (atomic_read(&kgdb_cpu_doing_single_step) == 555 if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
489 raw_smp_processor_id()) {
490 if (user_mode(regs)) 556 if (user_mode(regs))
491 return single_step_cont(regs, args); 557 return single_step_cont(regs, args);
492 break; 558 break;
@@ -539,7 +605,42 @@ static struct notifier_block kgdb_notifier = {
539 */ 605 */
540int kgdb_arch_init(void) 606int kgdb_arch_init(void)
541{ 607{
542 return register_die_notifier(&kgdb_notifier); 608 int i, cpu;
609 int ret;
610 struct perf_event_attr attr;
611 struct perf_event **pevent;
612
613 ret = register_die_notifier(&kgdb_notifier);
614 if (ret != 0)
615 return ret;
616 /*
617 * Pre-allocate the hw breakpoint structions in the non-atomic
618 * portion of kgdb because this operation requires mutexs to
619 * complete.
620 */
621 attr.bp_addr = (unsigned long)kgdb_arch_init;
622 attr.type = PERF_TYPE_BREAKPOINT;
623 attr.bp_len = HW_BREAKPOINT_LEN_1;
624 attr.bp_type = HW_BREAKPOINT_W;
625 attr.disabled = 1;
626 for (i = 0; i < 4; i++) {
627 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
628 if (IS_ERR(breakinfo[i].pev)) {
629 printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n");
630 breakinfo[i].pev = NULL;
631 kgdb_arch_exit();
632 return -1;
633 }
634 for_each_online_cpu(cpu) {
635 pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
636 pevent[0]->hw.sample_period = 1;
637 if (pevent[0]->destroy != NULL) {
638 pevent[0]->destroy = NULL;
639 release_bp_slot(*pevent);
640 }
641 }
642 }
643 return ret;
543} 644}
544 645
545/** 646/**
@@ -550,6 +651,13 @@ int kgdb_arch_init(void)
550 */ 651 */
551void kgdb_arch_exit(void) 652void kgdb_arch_exit(void)
552{ 653{
654 int i;
655 for (i = 0; i < 4; i++) {
656 if (breakinfo[i].pev) {
657 unregister_wide_hw_breakpoint(breakinfo[i].pev);
658 breakinfo[i].pev = NULL;
659 }
660 }
553 unregister_die_notifier(&kgdb_notifier); 661 unregister_die_notifier(&kgdb_notifier);
554} 662}
555 663
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 5b8c7505b3bc..5de9f4a9c3fd 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
337 337
338int __kprobes arch_prepare_kprobe(struct kprobe *p) 338int __kprobes arch_prepare_kprobe(struct kprobe *p)
339{ 339{
340 if (alternatives_text_reserved(p->addr, p->addr))
341 return -EINVAL;
342
340 if (!can_probe((unsigned long)p->addr)) 343 if (!can_probe((unsigned long)p->addr))
341 return -EILSEQ; 344 return -EILSEQ;
342 /* insn: must be on special executable page on x86. */ 345 /* insn: must be on special executable page on x86. */
@@ -429,7 +432,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
429static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 432static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
430 struct kprobe_ctlblk *kcb) 433 struct kprobe_ctlblk *kcb)
431{ 434{
432#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) 435#if !defined(CONFIG_PREEMPT)
433 if (p->ainsn.boostable == 1 && !p->post_handler) { 436 if (p->ainsn.boostable == 1 && !p->post_handler) {
434 /* Boost up -- we can execute copied instructions directly */ 437 /* Boost up -- we can execute copied instructions directly */
435 reset_current_kprobe(); 438 reset_current_kprobe();
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 37542b67c57e..e1af7c055c7d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -36,9 +36,6 @@ MODULE_LICENSE("GPL v2");
36#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 36#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
37#define UCODE_UCODE_TYPE 0x00000001 37#define UCODE_UCODE_TYPE 0x00000001
38 38
39const struct firmware *firmware;
40static int supported_cpu;
41
42struct equiv_cpu_entry { 39struct equiv_cpu_entry {
43 u32 installed_cpu; 40 u32 installed_cpu;
44 u32 fixed_errata_mask; 41 u32 fixed_errata_mask;
@@ -77,12 +74,15 @@ static struct equiv_cpu_entry *equiv_cpu_table;
77 74
78static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) 75static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
79{ 76{
77 struct cpuinfo_x86 *c = &cpu_data(cpu);
80 u32 dummy; 78 u32 dummy;
81 79
82 if (!supported_cpu)
83 return -1;
84
85 memset(csig, 0, sizeof(*csig)); 80 memset(csig, 0, sizeof(*csig));
81 if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
82 pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
83 "supported\n", cpu, c->x86);
84 return -1;
85 }
86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); 86 rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); 87 pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
88 return 0; 88 return 0;
@@ -294,10 +294,14 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
294 294
295static enum ucode_state request_microcode_fw(int cpu, struct device *device) 295static enum ucode_state request_microcode_fw(int cpu, struct device *device)
296{ 296{
297 const char *fw_name = "amd-ucode/microcode_amd.bin";
298 const struct firmware *firmware;
297 enum ucode_state ret; 299 enum ucode_state ret;
298 300
299 if (firmware == NULL) 301 if (request_firmware(&firmware, fw_name, device)) {
302 printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
300 return UCODE_NFOUND; 303 return UCODE_NFOUND;
304 }
301 305
302 if (*(u32 *)firmware->data != UCODE_MAGIC) { 306 if (*(u32 *)firmware->data != UCODE_MAGIC) {
303 pr_err("invalid UCODE_MAGIC (0x%08x)\n", 307 pr_err("invalid UCODE_MAGIC (0x%08x)\n",
@@ -307,6 +311,8 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
307 311
308 ret = generic_load_microcode(cpu, firmware->data, firmware->size); 312 ret = generic_load_microcode(cpu, firmware->data, firmware->size);
309 313
314 release_firmware(firmware);
315
310 return ret; 316 return ret;
311} 317}
312 318
@@ -325,31 +331,7 @@ static void microcode_fini_cpu_amd(int cpu)
325 uci->mc = NULL; 331 uci->mc = NULL;
326} 332}
327 333
328void init_microcode_amd(struct device *device)
329{
330 const char *fw_name = "amd-ucode/microcode_amd.bin";
331 struct cpuinfo_x86 *c = &boot_cpu_data;
332
333 WARN_ON(c->x86_vendor != X86_VENDOR_AMD);
334
335 if (c->x86 < 0x10) {
336 pr_warning("AMD CPU family 0x%x not supported\n", c->x86);
337 return;
338 }
339 supported_cpu = 1;
340
341 if (request_firmware(&firmware, fw_name, device))
342 pr_err("failed to load file %s\n", fw_name);
343}
344
345void fini_microcode_amd(void)
346{
347 release_firmware(firmware);
348}
349
350static struct microcode_ops microcode_amd_ops = { 334static struct microcode_ops microcode_amd_ops = {
351 .init = init_microcode_amd,
352 .fini = fini_microcode_amd,
353 .request_microcode_user = request_microcode_user, 335 .request_microcode_user = request_microcode_user,
354 .request_microcode_fw = request_microcode_fw, 336 .request_microcode_fw = request_microcode_fw,
355 .collect_cpu_info = collect_cpu_info_amd, 337 .collect_cpu_info = collect_cpu_info_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 0c8632433090..cceb5bc3c3c2 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -521,9 +521,6 @@ static int __init microcode_init(void)
521 return PTR_ERR(microcode_pdev); 521 return PTR_ERR(microcode_pdev);
522 } 522 }
523 523
524 if (microcode_ops->init)
525 microcode_ops->init(&microcode_pdev->dev);
526
527 get_online_cpus(); 524 get_online_cpus();
528 mutex_lock(&microcode_mutex); 525 mutex_lock(&microcode_mutex);
529 526
@@ -566,9 +563,6 @@ static void __exit microcode_exit(void)
566 563
567 platform_device_unregister(microcode_pdev); 564 platform_device_unregister(microcode_pdev);
568 565
569 if (microcode_ops->fini)
570 microcode_ops->fini();
571
572 microcode_ops = NULL; 566 microcode_ops = NULL;
573 567
574 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); 568 pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index ebd193e476ca..85a343e28937 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -328,7 +328,7 @@ static int apply_microcode(int cpu)
328 cpu_num, mc_intel->hdr.rev); 328 cpu_num, mc_intel->hdr.rev);
329 return -1; 329 return -1;
330 } 330 }
331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", 331 pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n",
332 cpu_num, val[1], 332 cpu_num, val[1],
333 mc_intel->hdr.date & 0xffff, 333 mc_intel->hdr.date & 0xffff,
334 mc_intel->hdr.date >> 24, 334 mc_intel->hdr.date >> 24,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 40b54ceb68b5..a2c1edd2d3ac 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
359 x86_init.mpparse.mpc_record(1); 359 x86_init.mpparse.mpc_record(1);
360 } 360 }
361 361
362#ifdef CONFIG_X86_BIGSMP
363 generic_bigsmp_probe();
364#endif
365
366 if (apic->setup_apic_routing)
367 apic->setup_apic_routing();
368
369 if (!num_processors) 362 if (!num_processors)
370 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 363 printk(KERN_ERR "MPTABLE: no processors registered!\n");
371 return num_processors; 364 return num_processors;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 4bd93c9b2b27..206735ac8cbd 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -285,7 +285,7 @@ static void __exit msr_exit(void)
285 for_each_online_cpu(cpu) 285 for_each_online_cpu(cpu)
286 msr_device_destroy(cpu); 286 msr_device_destroy(cpu);
287 class_destroy(msr_class); 287 class_destroy(msr_class);
288 unregister_chrdev(MSR_MAJOR, "cpu/msr"); 288 __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
289 unregister_hotcpu_notifier(&msr_class_cpu_notifier); 289 unregister_hotcpu_notifier(&msr_class_cpu_notifier);
290} 290}
291 291
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 02c3ee013ccd..02d678065d7d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -92,6 +92,13 @@ void exit_thread(void)
92 } 92 }
93} 93}
94 94
95void show_regs(struct pt_regs *regs)
96{
97 show_registers(regs);
98 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
99 regs->bp);
100}
101
95void show_regs_common(void) 102void show_regs_common(void)
96{ 103{
97 const char *board, *product; 104 const char *board, *product;
@@ -115,18 +122,6 @@ void flush_thread(void)
115{ 122{
116 struct task_struct *tsk = current; 123 struct task_struct *tsk = current;
117 124
118#ifdef CONFIG_X86_64
119 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
120 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
121 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
122 clear_tsk_thread_flag(tsk, TIF_IA32);
123 } else {
124 set_tsk_thread_flag(tsk, TIF_IA32);
125 current_thread_info()->status |= TS_COMPAT;
126 }
127 }
128#endif
129
130 flush_ptrace_hw_breakpoint(tsk); 125 flush_ptrace_hw_breakpoint(tsk);
131 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 126 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
132 /* 127 /*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 37ad1e046aae..f6c62667e30c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -174,12 +174,6 @@ void __show_regs(struct pt_regs *regs, int all)
174 d6, d7); 174 d6, d7);
175} 175}
176 176
177void show_regs(struct pt_regs *regs)
178{
179 show_registers(regs);
180 show_trace(NULL, regs, &regs->sp, regs->bp);
181}
182
183void release_thread(struct task_struct *dead_task) 177void release_thread(struct task_struct *dead_task)
184{ 178{
185 BUG_ON(dead_task->mm); 179 BUG_ON(dead_task->mm);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f9e033150cdf..dc9690b4c4cc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -211,12 +211,6 @@ void __show_regs(struct pt_regs *regs, int all)
211 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 211 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
212} 212}
213 213
214void show_regs(struct pt_regs *regs)
215{
216 show_registers(regs);
217 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
218}
219
220void release_thread(struct task_struct *dead_task) 214void release_thread(struct task_struct *dead_task)
221{ 215{
222 if (dead_task->mm) { 216 if (dead_task->mm) {
@@ -521,6 +515,18 @@ void set_personality_64bit(void)
521 current->personality &= ~READ_IMPLIES_EXEC; 515 current->personality &= ~READ_IMPLIES_EXEC;
522} 516}
523 517
518void set_personality_ia32(void)
519{
520 /* inherit personality from parent */
521
522 /* Make sure to be in 32bit mode */
523 set_thread_flag(TIF_IA32);
524 current->personality |= force_personality32;
525
526 /* Prepare the first "return" to user space */
527 current_thread_info()->status |= TS_COMPAT;
528}
529
524unsigned long get_wchan(struct task_struct *p) 530unsigned long get_wchan(struct task_struct *p)
525{ 531{
526 unsigned long stack; 532 unsigned long stack;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 017d937639fe..2d96aab82a48 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -48,6 +48,7 @@ enum x86_regset {
48 REGSET_FP, 48 REGSET_FP,
49 REGSET_XFP, 49 REGSET_XFP,
50 REGSET_IOPERM64 = REGSET_XFP, 50 REGSET_IOPERM64 = REGSET_XFP,
51 REGSET_XSTATE,
51 REGSET_TLS, 52 REGSET_TLS,
52 REGSET_IOPERM32, 53 REGSET_IOPERM32,
53}; 54};
@@ -140,30 +141,6 @@ static const int arg_offs_table[] = {
140#endif 141#endif
141}; 142};
142 143
143/**
144 * regs_get_argument_nth() - get Nth argument at function call
145 * @regs: pt_regs which contains registers at function entry.
146 * @n: argument number.
147 *
148 * regs_get_argument_nth() returns @n th argument of a function call.
149 * Since usually the kernel stack will be changed right after function entry,
150 * you must use this at function entry. If the @n th entry is NOT in the
151 * kernel stack or pt_regs, this returns 0.
152 */
153unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
154{
155 if (n < ARRAY_SIZE(arg_offs_table))
156 return *(unsigned long *)((char *)regs + arg_offs_table[n]);
157 else {
158 /*
159 * The typical case: arg n is on the stack.
160 * (Note: stack[0] = return address, so skip it)
161 */
162 n -= ARRAY_SIZE(arg_offs_table);
163 return regs_get_kernel_stack_nth(regs, 1 + n);
164 }
165}
166
167/* 144/*
168 * does not yet catch signals sent when the child dies. 145 * does not yet catch signals sent when the child dies.
169 * in exit.c or in signal.c. 146 * in exit.c or in signal.c.
@@ -702,7 +679,7 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
702 } else if (n == 6) { 679 } else if (n == 6) {
703 val = thread->debugreg6; 680 val = thread->debugreg6;
704 } else if (n == 7) { 681 } else if (n == 7) {
705 val = ptrace_get_dr7(thread->ptrace_bps); 682 val = thread->ptrace_dr7;
706 } 683 }
707 return val; 684 return val;
708} 685}
@@ -778,8 +755,11 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
778 return rc; 755 return rc;
779 } 756 }
780 /* All that's left is DR7 */ 757 /* All that's left is DR7 */
781 if (n == 7) 758 if (n == 7) {
782 rc = ptrace_write_dr7(tsk, val); 759 rc = ptrace_write_dr7(tsk, val);
760 if (!rc)
761 thread->ptrace_dr7 = val;
762 }
783 763
784ret_path: 764ret_path:
785 return rc; 765 return rc;
@@ -1584,7 +1564,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1584 1564
1585#ifdef CONFIG_X86_64 1565#ifdef CONFIG_X86_64
1586 1566
1587static const struct user_regset x86_64_regsets[] = { 1567static struct user_regset x86_64_regsets[] __read_mostly = {
1588 [REGSET_GENERAL] = { 1568 [REGSET_GENERAL] = {
1589 .core_note_type = NT_PRSTATUS, 1569 .core_note_type = NT_PRSTATUS,
1590 .n = sizeof(struct user_regs_struct) / sizeof(long), 1570 .n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1597,6 +1577,12 @@ static const struct user_regset x86_64_regsets[] = {
1597 .size = sizeof(long), .align = sizeof(long), 1577 .size = sizeof(long), .align = sizeof(long),
1598 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1578 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1599 }, 1579 },
1580 [REGSET_XSTATE] = {
1581 .core_note_type = NT_X86_XSTATE,
1582 .size = sizeof(u64), .align = sizeof(u64),
1583 .active = xstateregs_active, .get = xstateregs_get,
1584 .set = xstateregs_set
1585 },
1600 [REGSET_IOPERM64] = { 1586 [REGSET_IOPERM64] = {
1601 .core_note_type = NT_386_IOPERM, 1587 .core_note_type = NT_386_IOPERM,
1602 .n = IO_BITMAP_LONGS, 1588 .n = IO_BITMAP_LONGS,
@@ -1622,7 +1608,7 @@ static const struct user_regset_view user_x86_64_view = {
1622#endif /* CONFIG_X86_64 */ 1608#endif /* CONFIG_X86_64 */
1623 1609
1624#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1610#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1625static const struct user_regset x86_32_regsets[] = { 1611static struct user_regset x86_32_regsets[] __read_mostly = {
1626 [REGSET_GENERAL] = { 1612 [REGSET_GENERAL] = {
1627 .core_note_type = NT_PRSTATUS, 1613 .core_note_type = NT_PRSTATUS,
1628 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1614 .n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1641,6 +1627,12 @@ static const struct user_regset x86_32_regsets[] = {
1641 .size = sizeof(u32), .align = sizeof(u32), 1627 .size = sizeof(u32), .align = sizeof(u32),
1642 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1628 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
1643 }, 1629 },
1630 [REGSET_XSTATE] = {
1631 .core_note_type = NT_X86_XSTATE,
1632 .size = sizeof(u64), .align = sizeof(u64),
1633 .active = xstateregs_active, .get = xstateregs_get,
1634 .set = xstateregs_set
1635 },
1644 [REGSET_TLS] = { 1636 [REGSET_TLS] = {
1645 .core_note_type = NT_386_TLS, 1637 .core_note_type = NT_386_TLS,
1646 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, 1638 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN,
@@ -1663,6 +1655,23 @@ static const struct user_regset_view user_x86_32_view = {
1663}; 1655};
1664#endif 1656#endif
1665 1657
1658/*
1659 * This represents bytes 464..511 in the memory layout exported through
1660 * the REGSET_XSTATE interface.
1661 */
1662u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
1663
1664void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
1665{
1666#ifdef CONFIG_X86_64
1667 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1668#endif
1669#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
1670 x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64);
1671#endif
1672 xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
1673}
1674
1666const struct user_regset_view *task_user_regset_view(struct task_struct *task) 1675const struct user_regset_view *task_user_regset_view(struct task_struct *task)
1667{ 1676{
1668#ifdef CONFIG_IA32_EMULATION 1677#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 18093d7498f0..12e9feaa2f7a 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -491,6 +491,19 @@ void force_hpet_resume(void)
491 break; 491 break;
492 } 492 }
493} 493}
494
495/*
496 * HPET MSI on some boards (ATI SB700/SB800) has side effect on
497 * floppy DMA. Disable HPET MSI on such platforms.
498 */
499static void force_disable_hpet_msi(struct pci_dev *unused)
500{
501 hpet_msi_disable = 1;
502}
503
504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
505 force_disable_hpet_msi);
506
494#endif 507#endif
495 508
496#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) 509#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1545bc0c9845..704bddcdf64d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
203 DMI_MATCH(DMI_BOARD_NAME, "0T656F"), 203 DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
204 }, 204 },
205 }, 205 },
206 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/
207 .callback = set_bios_reboot,
208 .ident = "Dell OptiPlex 760",
209 .matches = {
210 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
211 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"),
212 DMI_MATCH(DMI_BOARD_NAME, "0G919G"),
213 },
214 },
206 { /* Handle problems with rebooting on Dell 2400's */ 215 { /* Handle problems with rebooting on Dell 2400's */
207 .callback = set_bios_reboot, 216 .callback = set_bios_reboot,
208 .ident = "Dell PowerEdge 2400", 217 .ident = "Dell PowerEdge 2400",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f7b8b9894b22..cb42109a55b4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -121,7 +121,9 @@
121unsigned long max_low_pfn_mapped; 121unsigned long max_low_pfn_mapped;
122unsigned long max_pfn_mapped; 122unsigned long max_pfn_mapped;
123 123
124#ifdef CONFIG_DMI
124RESERVE_BRK(dmi_alloc, 65536); 125RESERVE_BRK(dmi_alloc, 65536);
126#endif
125 127
126unsigned int boot_cpu_id __read_mostly; 128unsigned int boot_cpu_id __read_mostly;
127 129
@@ -642,23 +644,48 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
642 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), 644 DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
643 }, 645 },
644 }, 646 },
645 {
646 /* 647 /*
647 * AMI BIOS with low memory corruption was found on Intel DG45ID board. 648 * AMI BIOS with low memory corruption was found on Intel DG45ID and
648 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will 649 * DG45FC boards.
650 * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
649 * match only DMI_BOARD_NAME and see if there is more bad products 651 * match only DMI_BOARD_NAME and see if there is more bad products
650 * with this vendor. 652 * with this vendor.
651 */ 653 */
654 {
652 .callback = dmi_low_memory_corruption, 655 .callback = dmi_low_memory_corruption,
653 .ident = "AMI BIOS", 656 .ident = "AMI BIOS",
654 .matches = { 657 .matches = {
655 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), 658 DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
656 }, 659 },
657 }, 660 },
661 {
662 .callback = dmi_low_memory_corruption,
663 .ident = "AMI BIOS",
664 .matches = {
665 DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
666 },
667 },
658#endif 668#endif
659 {} 669 {}
660}; 670};
661 671
672static void __init trim_bios_range(void)
673{
674 /*
675 * A special case is the first 4Kb of memory;
676 * This is a BIOS owned area, not kernel ram, but generally
677 * not listed as such in the E820 table.
678 */
679 e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
680 /*
681 * special case: Some BIOSen report the PC BIOS
682 * area (640->1Mb) as ram even though it is not.
683 * take them out.
684 */
685 e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
686 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
687}
688
662/* 689/*
663 * Determine if we were loaded by an EFI loader. If so, then we have also been 690 * Determine if we were loaded by an EFI loader. If so, then we have also been
664 * passed the efi memmap, systab, etc., so we should use these data structures 691 * passed the efi memmap, systab, etc., so we should use these data structures
@@ -822,7 +849,7 @@ void __init setup_arch(char **cmdline_p)
822 insert_resource(&iomem_resource, &data_resource); 849 insert_resource(&iomem_resource, &data_resource);
823 insert_resource(&iomem_resource, &bss_resource); 850 insert_resource(&iomem_resource, &bss_resource);
824 851
825 852 trim_bios_range();
826#ifdef CONFIG_X86_32 853#ifdef CONFIG_X86_32
827 if (ppro_with_ram_bug()) { 854 if (ppro_with_ram_bug()) {
828 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, 855 e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 678d0b8c26f3..9b4401115ea1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -320,6 +320,7 @@ notrace static void __cpuinit start_secondary(void *unused)
320 unlock_vector_lock(); 320 unlock_vector_lock();
321 ipi_call_unlock(); 321 ipi_call_unlock();
322 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 322 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
323 x86_platform.nmi_init();
323 324
324 /* enable local interrupts */ 325 /* enable local interrupts */
325 local_irq_enable(); 326 local_irq_enable();
@@ -1083,9 +1084,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1083 set_cpu_sibling_map(0); 1084 set_cpu_sibling_map(0);
1084 1085
1085 enable_IR_x2apic(); 1086 enable_IR_x2apic();
1086#ifdef CONFIG_X86_64
1087 default_setup_apic_routing(); 1087 default_setup_apic_routing();
1088#endif
1089 1088
1090 if (smp_sanity_check(max_cpus) < 0) { 1089 if (smp_sanity_check(max_cpus) < 0) {
1091 printk(KERN_INFO "SMP disabled\n"); 1090 printk(KERN_INFO "SMP disabled\n");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 33399176512a..1168e4454188 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
534 534
535 get_debugreg(dr6, 6); 535 get_debugreg(dr6, 6);
536 536
537 /* Filter out all the reserved bits which are preset to 1 */
538 dr6 &= ~DR6_RESERVED;
539
537 /* Catch kmemcheck conditions first of all! */ 540 /* Catch kmemcheck conditions first of all! */
538 if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) 541 if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
539 return; 542 return;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 597683aa5ba0..208a857c679f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
740} 740}
741#endif 741#endif
742 742
743static void resume_tsc(void) 743static void resume_tsc(struct clocksource *cs)
744{ 744{
745 clocksource_tsc.cycle_last = 0; 745 clocksource_tsc.cycle_last = 0;
746} 746}
@@ -806,7 +806,7 @@ static void __init check_system_tsc_reliable(void)
806 unsigned long res_low, res_high; 806 unsigned long res_low, res_high;
807 807
808 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); 808 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
809 /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ 809 /* Geode_LX - the OLPC CPU has a very reliable TSC */
810 if (res_low & RTSC_SUSP) 810 if (res_low & RTSC_SUSP)
811 tsc_clocksource_reliable = 1; 811 tsc_clocksource_reliable = 1;
812#endif 812#endif
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 36afb98675a4..309c70fb7759 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -54,19 +54,19 @@ static int __init sgi_uv_sysfs_init(void)
54 if (!sgi_uv_kobj) 54 if (!sgi_uv_kobj)
55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); 55 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
56 if (!sgi_uv_kobj) { 56 if (!sgi_uv_kobj) {
57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); 57 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n");
58 return -EINVAL; 58 return -EINVAL;
59 } 59 }
60 60
61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); 61 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
62 if (ret) { 62 if (ret) {
63 printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); 63 printk(KERN_WARNING "sysfs_create_file partition_id failed\n");
64 return ret; 64 return ret;
65 } 65 }
66 66
67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); 67 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
68 if (ret) { 68 if (ret) {
69 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); 69 printk(KERN_WARNING "sysfs_create_file coherence_id failed\n");
70 return ret; 70 return ret;
71 } 71 }
72 72
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c
index 3c84aa001c11..2b75ef638dbc 100644
--- a/arch/x86/kernel/uv_time.c
+++ b/arch/x86/kernel/uv_time.c
@@ -282,10 +282,21 @@ static int uv_rtc_unset_timer(int cpu, int force)
282 282
283/* 283/*
284 * Read the RTC. 284 * Read the RTC.
285 *
286 * Starting with HUB rev 2.0, the UV RTC register is replicated across all
287 * cachelines of it's own page. This allows faster simultaneous reads
288 * from a given socket.
285 */ 289 */
286static cycle_t uv_read_rtc(struct clocksource *cs) 290static cycle_t uv_read_rtc(struct clocksource *cs)
287{ 291{
288 return (cycle_t)uv_read_local_mmr(UVH_RTC); 292 unsigned long offset;
293
294 if (uv_get_min_hub_revision_id() == 1)
295 offset = 0;
296 else
297 offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE;
298
299 return (cycle_t)uv_read_local_mmr(UVH_RTC | offset);
289} 300}
290 301
291/* 302/*
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 619f7f88b8cc..693920b22496 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -26,7 +26,8 @@ EXPORT_SYMBOL(__put_user_2);
26EXPORT_SYMBOL(__put_user_4); 26EXPORT_SYMBOL(__put_user_4);
27EXPORT_SYMBOL(__put_user_8); 27EXPORT_SYMBOL(__put_user_8);
28 28
29EXPORT_SYMBOL(copy_user_generic); 29EXPORT_SYMBOL(copy_user_generic_string);
30EXPORT_SYMBOL(copy_user_generic_unrolled);
30EXPORT_SYMBOL(__copy_user_nocache); 31EXPORT_SYMBOL(__copy_user_nocache);
31EXPORT_SYMBOL(_copy_from_user); 32EXPORT_SYMBOL(_copy_from_user);
32EXPORT_SYMBOL(_copy_to_user); 33EXPORT_SYMBOL(_copy_to_user);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ccd179dec36e..ee5746c94628 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -76,10 +76,13 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
76 .setup_percpu_clockev = setup_secondary_APIC_clock, 76 .setup_percpu_clockev = setup_secondary_APIC_clock,
77}; 77};
78 78
79static void default_nmi_init(void) { };
80
79struct x86_platform_ops x86_platform = { 81struct x86_platform_ops x86_platform = {
80 .calibrate_tsc = native_calibrate_tsc, 82 .calibrate_tsc = native_calibrate_tsc,
81 .get_wallclock = mach_get_cmos_time, 83 .get_wallclock = mach_get_cmos_time,
82 .set_wallclock = mach_set_rtc_mmss, 84 .set_wallclock = mach_set_rtc_mmss,
83 .iommu_shutdown = iommu_shutdown_noop, 85 .iommu_shutdown = iommu_shutdown_noop,
84 .is_untracked_pat_range = is_ISA_range, 86 .is_untracked_pat_range = is_ISA_range,
87 .nmi_init = default_nmi_init
85}; 88};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index c5ee17e8c6d9..782c3a362ec6 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void)
337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); 337 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
338 xstate_size = ebx; 338 xstate_size = ebx;
339 339
340 update_regset_xstate_info(xstate_size, pcntxt_mask);
340 prepare_fx_sw_frame(); 341 prepare_fx_sw_frame();
341 342
342 setup_xstate_init(); 343 setup_xstate_init();
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 296aba49472a..15578f180e59 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this,
467 return -EOPNOTSUPP; 467 return -EOPNOTSUPP;
468 468
469 addr &= KVM_PIT_CHANNEL_MASK; 469 addr &= KVM_PIT_CHANNEL_MASK;
470 if (addr == 3)
471 return 0;
472
470 s = &pit_state->channels[addr]; 473 s = &pit_state->channels[addr];
471 474
472 mutex_lock(&pit_state->lock); 475 mutex_lock(&pit_state->lock);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3063a0c4858b..ba8c045da782 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
373 if (unlikely(!apic_enabled(apic))) 373 if (unlikely(!apic_enabled(apic)))
374 break; 374 break;
375 375
376 if (trig_mode) {
377 apic_debug("level trig mode for vector %d", vector);
378 apic_set_vector(vector, apic->regs + APIC_TMR);
379 } else
380 apic_clear_vector(vector, apic->regs + APIC_TMR);
381
376 result = !apic_test_and_set_irr(vector, apic); 382 result = !apic_test_and_set_irr(vector, apic);
377 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, 383 trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
378 trig_mode, vector, !result); 384 trig_mode, vector, !result);
@@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
383 break; 389 break;
384 } 390 }
385 391
386 if (trig_mode) {
387 apic_debug("level trig mode for vector %d", vector);
388 apic_set_vector(vector, apic->regs + APIC_TMR);
389 } else
390 apic_clear_vector(vector, apic->regs + APIC_TMR);
391 kvm_vcpu_kick(vcpu); 392 kvm_vcpu_kick(vcpu);
392 break; 393 break;
393 394
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4c3e5b2314cb..89a49fb46a27 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
477 477
478 addr = gfn_to_hva(kvm, gfn); 478 addr = gfn_to_hva(kvm, gfn);
479 if (kvm_is_error_hva(addr)) 479 if (kvm_is_error_hva(addr))
480 return page_size; 480 return PT_PAGE_TABLE_LEVEL;
481 481
482 down_read(&current->mm->mmap_sem); 482 down_read(&current->mm->mmap_sem);
483 vma = find_vma(current->mm, addr); 483 vma = find_vma(current->mm, addr);
@@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515 if (host_level == PT_PAGE_TABLE_LEVEL) 515 if (host_level == PT_PAGE_TABLE_LEVEL)
516 return host_level; 516 return host_level;
517 517
518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { 518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
519
520 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 519 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
521 break; 520 break;
522 }
523 521
524 return level - 1; 522 return level - 1;
525} 523}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 58a0f1e88596..ede2131a9225 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -150,7 +150,9 @@ walk:
150 walker->table_gfn[walker->level - 1] = table_gfn; 150 walker->table_gfn[walker->level - 1] = table_gfn;
151 walker->pte_gpa[walker->level - 1] = pte_gpa; 151 walker->pte_gpa[walker->level - 1] = pte_gpa;
152 152
153 kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); 153 if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)))
154 goto not_present;
155
154 trace_kvm_mmu_paging_element(pte, walker->level); 156 trace_kvm_mmu_paging_element(pte, walker->level);
155 157
156 if (!is_present_gpte(pte)) 158 if (!is_present_gpte(pte))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6651dbf58675..a1e1bc9d412d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
670{ 670{
671 static int version; 671 static int version;
672 struct pvclock_wall_clock wc; 672 struct pvclock_wall_clock wc;
673 struct timespec now, sys, boot; 673 struct timespec boot;
674 674
675 if (!wall_clock) 675 if (!wall_clock)
676 return; 676 return;
@@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
685 * wall clock specified here. guest system time equals host 685 * wall clock specified here. guest system time equals host
686 * system time for us, thus we must fill in host boot time here. 686 * system time for us, thus we must fill in host boot time here.
687 */ 687 */
688 now = current_kernel_time(); 688 getboottime(&boot);
689 ktime_get_ts(&sys);
690 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
691 689
692 wc.sec = boot.tv_sec; 690 wc.sec = boot.tv_sec;
693 wc.nsec = boot.tv_nsec; 691 wc.nsec = boot.tv_nsec;
@@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
762 local_irq_save(flags); 760 local_irq_save(flags);
763 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); 761 kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp);
764 ktime_get_ts(&ts); 762 ktime_get_ts(&ts);
763 monotonic_to_bootbased(&ts);
765 local_irq_restore(flags); 764 local_irq_restore(flags);
766 765
767 /* With all the info we got, fill in the values */ 766 /* With all the info we got, fill in the values */
@@ -5072,12 +5071,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5072 GFP_KERNEL); 5071 GFP_KERNEL);
5073 if (!vcpu->arch.mce_banks) { 5072 if (!vcpu->arch.mce_banks) {
5074 r = -ENOMEM; 5073 r = -ENOMEM;
5075 goto fail_mmu_destroy; 5074 goto fail_free_lapic;
5076 } 5075 }
5077 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; 5076 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5078 5077
5079 return 0; 5078 return 0;
5080 5079fail_free_lapic:
5080 kvm_free_lapic(vcpu);
5081fail_mmu_destroy: 5081fail_mmu_destroy:
5082 kvm_mmu_destroy(vcpu); 5082 kvm_mmu_destroy(vcpu);
5083fail_free_pio_data: 5083fail_free_pio_data:
@@ -5088,6 +5088,7 @@ fail:
5088 5088
5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090{ 5090{
5091 kfree(vcpu->arch.mce_banks);
5091 kvm_free_lapic(vcpu); 5092 kvm_free_lapic(vcpu);
5092 down_read(&vcpu->kvm->slots_lock); 5093 down_read(&vcpu->kvm->slots_lock);
5093 kvm_mmu_destroy(vcpu); 5094 kvm_mmu_destroy(vcpu);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index cffd754f3039..419386c24b82 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c
14 14
15clean-files := inat-tables.c 15clean-files := inat-tables.c
16 16
17obj-$(CONFIG_SMP) += msr-smp.o 17obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o 19lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
@@ -34,9 +34,10 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
34endif 34endif
35 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 35 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
36else 36else
37 obj-y += io_64.o iomap_copy_64.o 37 obj-y += iomap_copy_64.o
38 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 38 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
39 lib-y += thunk_64.o clear_page_64.o copy_page_64.o 39 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
40 lib-y += memmove_64.o memset_64.o 40 lib-y += memmove_64.o memset_64.o
41 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o 41 lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
42 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
42endif 43endif
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
new file mode 100644
index 000000000000..a3c668875038
--- /dev/null
+++ b/arch/x86/lib/cache-smp.c
@@ -0,0 +1,19 @@
1#include <linux/smp.h>
2#include <linux/module.h>
3
4static void __wbinvd(void *dummy)
5{
6 wbinvd();
7}
8
9void wbinvd_on_cpu(int cpu)
10{
11 smp_call_function_single(cpu, __wbinvd, NULL, 1);
12}
13EXPORT_SYMBOL(wbinvd_on_cpu);
14
15int wbinvd_on_all_cpus(void)
16{
17 return on_each_cpu(__wbinvd, NULL, 1);
18}
19EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index cf889d4e076a..71100c98e337 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -90,12 +90,6 @@ ENTRY(_copy_from_user)
90 CFI_ENDPROC 90 CFI_ENDPROC
91ENDPROC(_copy_from_user) 91ENDPROC(_copy_from_user)
92 92
93ENTRY(copy_user_generic)
94 CFI_STARTPROC
95 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
96 CFI_ENDPROC
97ENDPROC(copy_user_generic)
98
99 .section .fixup,"ax" 93 .section .fixup,"ax"
100 /* must zero dest */ 94 /* must zero dest */
101ENTRY(bad_from_user) 95ENTRY(bad_from_user)
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c
deleted file mode 100644
index 3f1eb59b5f08..000000000000
--- a/arch/x86/lib/io_64.c
+++ /dev/null
@@ -1,25 +0,0 @@
1#include <linux/string.h>
2#include <linux/module.h>
3#include <asm/io.h>
4
5void __memcpy_toio(unsigned long dst, const void *src, unsigned len)
6{
7 __inline_memcpy((void *)dst, src, len);
8}
9EXPORT_SYMBOL(__memcpy_toio);
10
11void __memcpy_fromio(void *dst, unsigned long src, unsigned len)
12{
13 __inline_memcpy(dst, (const void *)src, len);
14}
15EXPORT_SYMBOL(__memcpy_fromio);
16
17void memset_io(volatile void __iomem *a, int b, size_t c)
18{
19 /*
20 * TODO: memset can mangle the IO patterns quite a bit.
21 * perhaps it would be better to use a dumb one:
22 */
23 memset((void *)a, b, c);
24}
25EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index ad5441ed1b57..f82e884928af 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -20,12 +20,11 @@
20/* 20/*
21 * memcpy_c() - fast string ops (REP MOVSQ) based variant. 21 * memcpy_c() - fast string ops (REP MOVSQ) based variant.
22 * 22 *
23 * Calls to this get patched into the kernel image via the 23 * This gets patched over the unrolled variant (below) via the
24 * alternative instructions framework: 24 * alternative instructions framework:
25 */ 25 */
26 ALIGN 26 .section .altinstr_replacement, "ax", @progbits
27memcpy_c: 27.Lmemcpy_c:
28 CFI_STARTPROC
29 movq %rdi, %rax 28 movq %rdi, %rax
30 29
31 movl %edx, %ecx 30 movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
35 movl %edx, %ecx 34 movl %edx, %ecx
36 rep movsb 35 rep movsb
37 ret 36 ret
38 CFI_ENDPROC 37.Lmemcpy_e:
39ENDPROC(memcpy_c) 38 .previous
40 39
41ENTRY(__memcpy) 40ENTRY(__memcpy)
42ENTRY(memcpy) 41ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
128 * It is also a lot simpler. Use this when possible: 127 * It is also a lot simpler. Use this when possible:
129 */ 128 */
130 129
131 .section .altinstr_replacement, "ax"
1321: .byte 0xeb /* jmp <disp8> */
133 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1342:
135 .previous
136
137 .section .altinstructions, "a" 130 .section .altinstructions, "a"
138 .align 8 131 .align 8
139 .quad memcpy 132 .quad memcpy
140 .quad 1b 133 .quad .Lmemcpy_c
141 .byte X86_FEATURE_REP_GOOD 134 .byte X86_FEATURE_REP_GOOD
142 135
143 /* 136 /*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
145 * so it is silly to overwrite itself with nops - reboot is the 138 * so it is silly to overwrite itself with nops - reboot is the
146 * only outcome... 139 * only outcome...
147 */ 140 */
148 .byte 2b - 1b 141 .byte .Lmemcpy_e - .Lmemcpy_c
149 .byte 2b - 1b 142 .byte .Lmemcpy_e - .Lmemcpy_c
150 .previous 143 .previous
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2c5948116bd2..e88d3b81644a 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -12,9 +12,8 @@
12 * 12 *
13 * rax original destination 13 * rax original destination
14 */ 14 */
15 ALIGN 15 .section .altinstr_replacement, "ax", @progbits
16memset_c: 16.Lmemset_c:
17 CFI_STARTPROC
18 movq %rdi,%r9 17 movq %rdi,%r9
19 movl %edx,%r8d 18 movl %edx,%r8d
20 andl $7,%r8d 19 andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
29 rep stosb 28 rep stosb
30 movq %r9,%rax 29 movq %r9,%rax
31 ret 30 ret
32 CFI_ENDPROC 31.Lmemset_e:
33ENDPROC(memset_c) 32 .previous
34 33
35ENTRY(memset) 34ENTRY(memset)
36ENTRY(__memset) 35ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
118 117
119#include <asm/cpufeature.h> 118#include <asm/cpufeature.h>
120 119
121 .section .altinstr_replacement,"ax"
1221: .byte 0xeb /* jmp <disp8> */
123 .byte (memset_c - memset) - (2f - 1b) /* offset */
1242:
125 .previous
126 .section .altinstructions,"a" 120 .section .altinstructions,"a"
127 .align 8 121 .align 8
128 .quad memset 122 .quad memset
129 .quad 1b 123 .quad .Lmemset_c
130 .byte X86_FEATURE_REP_GOOD 124 .byte X86_FEATURE_REP_GOOD
131 .byte .Lfinal - memset 125 .byte .Lfinal - memset
132 .byte 2b - 1b 126 .byte .Lmemset_e - .Lmemset_c
133 .previous 127 .previous
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
new file mode 100644
index 000000000000..15acecf0d7aa
--- /dev/null
+++ b/arch/x86/lib/rwsem_64.S
@@ -0,0 +1,81 @@
1/*
2 * x86-64 rwsem wrappers
3 *
4 * This interfaces the inline asm code to the slow-path
5 * C routines. We need to save the call-clobbered regs
6 * that the asm does not mark as clobbered, and move the
7 * argument from %rax to %rdi.
8 *
9 * NOTE! We don't need to save %rax, because the functions
10 * will always return the semaphore pointer in %rax (which
11 * is also the input argument to these helpers)
12 *
13 * The following can clobber %rdx because the asm clobbers it:
14 * call_rwsem_down_write_failed
15 * call_rwsem_wake
16 * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
17 */
18
19#include <linux/linkage.h>
20#include <asm/rwlock.h>
21#include <asm/alternative-asm.h>
22#include <asm/frame.h>
23#include <asm/dwarf2.h>
24
25#define save_common_regs \
26 pushq %rdi; \
27 pushq %rsi; \
28 pushq %rcx; \
29 pushq %r8; \
30 pushq %r9; \
31 pushq %r10; \
32 pushq %r11
33
34#define restore_common_regs \
35 popq %r11; \
36 popq %r10; \
37 popq %r9; \
38 popq %r8; \
39 popq %rcx; \
40 popq %rsi; \
41 popq %rdi
42
43/* Fix up special calling conventions */
44ENTRY(call_rwsem_down_read_failed)
45 save_common_regs
46 pushq %rdx
47 movq %rax,%rdi
48 call rwsem_down_read_failed
49 popq %rdx
50 restore_common_regs
51 ret
52 ENDPROC(call_rwsem_down_read_failed)
53
54ENTRY(call_rwsem_down_write_failed)
55 save_common_regs
56 movq %rax,%rdi
57 call rwsem_down_write_failed
58 restore_common_regs
59 ret
60 ENDPROC(call_rwsem_down_write_failed)
61
62ENTRY(call_rwsem_wake)
63 decw %dx /* do nothing if still outstanding active readers */
64 jnz 1f
65 save_common_regs
66 movq %rax,%rdi
67 call rwsem_wake
68 restore_common_regs
691: ret
70 ENDPROC(call_rwsem_wake)
71
72/* Fix up special calling conventions */
73ENTRY(call_rwsem_downgrade_wake)
74 save_common_regs
75 pushq %rdx
76 movq %rax,%rdi
77 call rwsem_downgrade_wake
78 popq %rdx
79 restore_common_regs
80 ret
81 ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 71da1bca13cb..738e6593799d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep)
18#else 18#else
19 /* 19 /*
20 * With get_user_pages_fast, we walk down the pagetables without taking 20 * With get_user_pages_fast, we walk down the pagetables without taking
21 * any locks. For this we would like to load the pointers atoimcally, 21 * any locks. For this we would like to load the pointers atomically,
22 * but that is not possible (without expensive cmpxchg8b) on PAE. What 22 * but that is not possible (without expensive cmpxchg8b) on PAE. What
23 * we do have is the guarantee that a pte will only either go from not 23 * we do have is the guarantee that a pte will only either go from not
24 * present to present, or present to not present or both -- it will not 24 * present to present, or present to not present or both -- it will not
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index d406c5239019..e71c5cbc8f35 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -266,16 +266,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
266 if (!after_bootmem) 266 if (!after_bootmem)
267 find_early_table_space(end, use_pse, use_gbpages); 267 find_early_table_space(end, use_pse, use_gbpages);
268 268
269#ifdef CONFIG_X86_32
270 for (i = 0; i < nr_range; i++)
271 kernel_physical_mapping_init(mr[i].start, mr[i].end,
272 mr[i].page_size_mask);
273 ret = end;
274#else /* CONFIG_X86_64 */
275 for (i = 0; i < nr_range; i++) 269 for (i = 0; i < nr_range; i++)
276 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, 270 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
277 mr[i].page_size_mask); 271 mr[i].page_size_mask);
278#endif
279 272
280#ifdef CONFIG_X86_32 273#ifdef CONFIG_X86_32
281 early_ioremap_page_table_range_init(); 274 early_ioremap_page_table_range_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9a0c258a86be..2226f2c70ea3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -241,6 +241,7 @@ kernel_physical_mapping_init(unsigned long start,
241 unsigned long page_size_mask) 241 unsigned long page_size_mask)
242{ 242{
243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M); 243 int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
244 unsigned long last_map_addr = end;
244 unsigned long start_pfn, end_pfn; 245 unsigned long start_pfn, end_pfn;
245 pgd_t *pgd_base = swapper_pg_dir; 246 pgd_t *pgd_base = swapper_pg_dir;
246 int pgd_idx, pmd_idx, pte_ofs; 247 int pgd_idx, pmd_idx, pte_ofs;
@@ -341,9 +342,10 @@ repeat:
341 prot = PAGE_KERNEL_EXEC; 342 prot = PAGE_KERNEL_EXEC;
342 343
343 pages_4k++; 344 pages_4k++;
344 if (mapping_iter == 1) 345 if (mapping_iter == 1) {
345 set_pte(pte, pfn_pte(pfn, init_prot)); 346 set_pte(pte, pfn_pte(pfn, init_prot));
346 else 347 last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
348 } else
347 set_pte(pte, pfn_pte(pfn, prot)); 349 set_pte(pte, pfn_pte(pfn, prot));
348 } 350 }
349 } 351 }
@@ -368,7 +370,7 @@ repeat:
368 mapping_iter = 2; 370 mapping_iter = 2;
369 goto repeat; 371 goto repeat;
370 } 372 }
371 return 0; 373 return last_map_addr;
372} 374}
373 375
374pte_t *kmap_pte; 376pte_t *kmap_pte;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5198b9bb34ef..69ddfbd91135 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -49,6 +49,7 @@
49#include <asm/numa.h> 49#include <asm/numa.h>
50#include <asm/cacheflush.h> 50#include <asm/cacheflush.h>
51#include <asm/init.h> 51#include <asm/init.h>
52#include <linux/bootmem.h>
52 53
53static unsigned long dma_reserve __initdata; 54static unsigned long dma_reserve __initdata;
54 55
@@ -616,6 +617,21 @@ void __init paging_init(void)
616 */ 617 */
617#ifdef CONFIG_MEMORY_HOTPLUG 618#ifdef CONFIG_MEMORY_HOTPLUG
618/* 619/*
620 * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
621 * updating.
622 */
623static void update_end_of_memory_vars(u64 start, u64 size)
624{
625 unsigned long end_pfn = PFN_UP(start + size);
626
627 if (end_pfn > max_pfn) {
628 max_pfn = end_pfn;
629 max_low_pfn = end_pfn;
630 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
631 }
632}
633
634/*
619 * Memory is added always to NORMAL zone. This means you will never get 635 * Memory is added always to NORMAL zone. This means you will never get
620 * additional DMA/DMA32 memory. 636 * additional DMA/DMA32 memory.
621 */ 637 */
@@ -634,6 +650,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
634 ret = __add_pages(nid, zone, start_pfn, nr_pages); 650 ret = __add_pages(nid, zone, start_pfn, nr_pages);
635 WARN_ON_ONCE(ret); 651 WARN_ON_ONCE(ret);
636 652
653 /* update max_pfn, max_low_pfn and high_memory */
654 update_end_of_memory_vars(start, size);
655
637 return ret; 656 return ret;
638} 657}
639EXPORT_SYMBOL_GPL(arch_add_memory); 658EXPORT_SYMBOL_GPL(arch_add_memory);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c246d259822d..5eb1ba74a3a9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -24,43 +24,6 @@
24 24
25#include "physaddr.h" 25#include "physaddr.h"
26 26
27int page_is_ram(unsigned long pagenr)
28{
29 resource_size_t addr, end;
30 int i;
31
32 /*
33 * A special case is the first 4Kb of memory;
34 * This is a BIOS owned area, not kernel ram, but generally
35 * not listed as such in the E820 table.
36 */
37 if (pagenr == 0)
38 return 0;
39
40 /*
41 * Second special case: Some BIOSen report the PC BIOS
42 * area (640->1Mb) as ram even though it is not.
43 */
44 if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
45 pagenr < (BIOS_END >> PAGE_SHIFT))
46 return 0;
47
48 for (i = 0; i < e820.nr_map; i++) {
49 /*
50 * Not usable memory:
51 */
52 if (e820.map[i].type != E820_RAM)
53 continue;
54 addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
55 end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
56
57
58 if ((pagenr >= addr) && (pagenr < end))
59 return 1;
60 }
61 return 0;
62}
63
64/* 27/*
65 * Fix up the linear direct mapping of the kernel to avoid cache attribute 28 * Fix up the linear direct mapping of the kernel to avoid cache attribute
66 * conflicts. 29 * conflicts.
@@ -422,6 +385,10 @@ void __init early_ioremap_init(void)
422 * The boot-ioremap range spans multiple pmds, for which 385 * The boot-ioremap range spans multiple pmds, for which
423 * we are not prepared: 386 * we are not prepared:
424 */ 387 */
388#define __FIXADDR_TOP (-PAGE_SIZE)
389 BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
390 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
391#undef __FIXADDR_TOP
425 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { 392 if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
426 WARN_ON(1); 393 WARN_ON(1);
427 printk(KERN_WARNING "pmd %p != %p\n", 394 printk(KERN_WARNING "pmd %p != %p\n",
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 8cc183344140..b3b531a4f8e5 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -337,7 +337,7 @@ bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
337 if (!shadow) 337 if (!shadow)
338 return true; 338 return true;
339 339
340 status = kmemcheck_shadow_test(shadow, size); 340 status = kmemcheck_shadow_test_all(shadow, size);
341 341
342 return status == KMEMCHECK_SHADOW_INITIALIZED; 342 return status == KMEMCHECK_SHADOW_INITIALIZED;
343} 343}
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index 3f66b82076a3..aec124214d97 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -125,12 +125,12 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
125 125
126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) 126enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
127{ 127{
128#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
128 uint8_t *x; 129 uint8_t *x;
129 unsigned int i; 130 unsigned int i;
130 131
131 x = shadow; 132 x = shadow;
132 133
133#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
134 /* 134 /*
135 * Make sure _some_ bytes are initialized. Gcc frequently generates 135 * Make sure _some_ bytes are initialized. Gcc frequently generates
136 * code to access neighboring bytes. 136 * code to access neighboring bytes.
@@ -139,13 +139,25 @@ enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) 139 if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
140 return x[i]; 140 return x[i];
141 } 141 }
142
143 return x[0];
142#else 144#else
145 return kmemcheck_shadow_test_all(shadow, size);
146#endif
147}
148
149enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow, unsigned int size)
150{
151 uint8_t *x;
152 unsigned int i;
153
154 x = shadow;
155
143 /* All bytes must be initialized. */ 156 /* All bytes must be initialized. */
144 for (i = 0; i < size; ++i) { 157 for (i = 0; i < size; ++i) {
145 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) 158 if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
146 return x[i]; 159 return x[i];
147 } 160 }
148#endif
149 161
150 return x[0]; 162 return x[0];
151} 163}
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h
index af46d9ab9d86..ff0b2f70fbcb 100644
--- a/arch/x86/mm/kmemcheck/shadow.h
+++ b/arch/x86/mm/kmemcheck/shadow.h
@@ -11,6 +11,8 @@ enum kmemcheck_shadow {
11void *kmemcheck_shadow_lookup(unsigned long address); 11void *kmemcheck_shadow_lookup(unsigned long address);
12 12
13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); 13enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
14enum kmemcheck_shadow kmemcheck_shadow_test_all(void *shadow,
15 unsigned int size);
14void kmemcheck_shadow_set(void *shadow, unsigned int size); 16void kmemcheck_shadow_set(void *shadow, unsigned int size);
15 17
16#endif 18#endif
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index c8191defc38a..1dab5194fd9d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,7 +71,7 @@ static int mmap_is_legacy(void)
71 if (current->personality & ADDR_COMPAT_LAYOUT) 71 if (current->personality & ADDR_COMPAT_LAYOUT)
72 return 1; 72 return 1;
73 73
74 if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) 74 if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
75 return 1; 75 return 1;
76 76
77 return sysctl_legacy_va_layout; 77 return sysctl_legacy_va_layout;
@@ -96,7 +96,7 @@ static unsigned long mmap_rnd(void)
96 96
97static unsigned long mmap_base(void) 97static unsigned long mmap_base(void)
98{ 98{
99 unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; 99 unsigned long gap = rlimit(RLIMIT_STACK);
100 100
101 if (gap < MIN_GAP) 101 if (gap < MIN_GAP)
102 gap = MIN_GAP; 102 gap = MIN_GAP;
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 83bbc70d11bb..3307ea8bd43a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -427,7 +427,7 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
427 * Calculate the number of big nodes that can be allocated as a result 427 * Calculate the number of big nodes that can be allocated as a result
428 * of consolidating the remainder. 428 * of consolidating the remainder.
429 */ 429 */
430 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) & nr_nodes) / 430 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
431 FAKE_NODE_MIN_SIZE; 431 FAKE_NODE_MIN_SIZE;
432 432
433 size &= FAKE_NODE_MIN_HASH_MASK; 433 size &= FAKE_NODE_MIN_HASH_MASK;
@@ -502,77 +502,99 @@ static int __init split_nodes_interleave(u64 addr, u64 max_addr,
502} 502}
503 503
504/* 504/*
505 * Splits num_nodes nodes up equally starting at node_start. The return value 505 * Returns the end address of a node so that there is at least `size' amount of
506 * is the number of nodes split up and addr is adjusted to be at the end of the 506 * non-reserved memory or `max_addr' is reached.
507 * last node allocated.
508 */ 507 */
509static int __init split_nodes_equally(u64 *addr, u64 max_addr, int node_start, 508static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
510 int num_nodes)
511{ 509{
512 unsigned int big; 510 u64 end = start + size;
513 u64 size;
514 int i;
515
516 if (num_nodes <= 0)
517 return -1;
518 if (num_nodes > MAX_NUMNODES)
519 num_nodes = MAX_NUMNODES;
520 size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
521 num_nodes;
522 /*
523 * Calculate the number of big nodes that can be allocated as a result
524 * of consolidating the leftovers.
525 */
526 big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) /
527 FAKE_NODE_MIN_SIZE;
528
529 /* Round down to nearest FAKE_NODE_MIN_SIZE. */
530 size &= FAKE_NODE_MIN_HASH_MASK;
531 if (!size) {
532 printk(KERN_ERR "Not enough memory for each node. "
533 "NUMA emulation disabled.\n");
534 return -1;
535 }
536
537 for (i = node_start; i < num_nodes + node_start; i++) {
538 u64 end = *addr + size;
539 511
540 if (i < big) 512 while (end - start - e820_hole_size(start, end) < size) {
541 end += FAKE_NODE_MIN_SIZE; 513 end += FAKE_NODE_MIN_SIZE;
542 /* 514 if (end > max_addr) {
543 * The final node can have the remaining system RAM. Other
544 * nodes receive roughly the same amount of available pages.
545 */
546 if (i == num_nodes + node_start - 1)
547 end = max_addr; 515 end = max_addr;
548 else
549 while (end - *addr - e820_hole_size(*addr, end) <
550 size) {
551 end += FAKE_NODE_MIN_SIZE;
552 if (end > max_addr) {
553 end = max_addr;
554 break;
555 }
556 }
557 if (setup_node_range(i, addr, end - *addr, max_addr) < 0)
558 break; 516 break;
517 }
559 } 518 }
560 return i - node_start + 1; 519 return end;
561} 520}
562 521
563/* 522/*
564 * Splits the remaining system RAM into chunks of size. The remaining memory is 523 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
565 * always assigned to a final node and can be asymmetric. Returns the number of 524 * `addr' to `max_addr'. The return value is the number of nodes allocated.
566 * nodes split.
567 */ 525 */
568static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start, 526static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
569 u64 size)
570{ 527{
571 int i = node_start; 528 nodemask_t physnode_mask = NODE_MASK_NONE;
572 size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; 529 u64 min_size;
573 while (!setup_node_range(i++, addr, size, max_addr)) 530 int ret = 0;
574 ; 531 int i;
575 return i - node_start; 532
533 if (!size)
534 return -1;
535 /*
536 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
537 * increased accordingly if the requested size is too small. This
538 * creates a uniform distribution of node sizes across the entire
539 * machine (but not necessarily over physical nodes).
540 */
541 min_size = (max_addr - addr - e820_hole_size(addr, max_addr)) /
542 MAX_NUMNODES;
543 min_size = max(min_size, FAKE_NODE_MIN_SIZE);
544 if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
545 min_size = (min_size + FAKE_NODE_MIN_SIZE) &
546 FAKE_NODE_MIN_HASH_MASK;
547 if (size < min_size) {
548 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
549 size >> 20, min_size >> 20);
550 size = min_size;
551 }
552 size &= FAKE_NODE_MIN_HASH_MASK;
553
554 for (i = 0; i < MAX_NUMNODES; i++)
555 if (physnodes[i].start != physnodes[i].end)
556 node_set(i, physnode_mask);
557 /*
558 * Fill physical nodes with fake nodes of size until there is no memory
559 * left on any of them.
560 */
561 while (nodes_weight(physnode_mask)) {
562 for_each_node_mask(i, physnode_mask) {
563 u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
564 u64 end;
565
566 end = find_end_of_node(physnodes[i].start,
567 physnodes[i].end, size);
568 /*
569 * If there won't be at least FAKE_NODE_MIN_SIZE of
570 * non-reserved memory in ZONE_DMA32 for the next node,
571 * this one must extend to the boundary.
572 */
573 if (end < dma32_end && dma32_end - end -
574 e820_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
575 end = dma32_end;
576
577 /*
578 * If there won't be enough non-reserved memory for the
579 * next node, this one must extend to the end of the
580 * physical node.
581 */
582 if (physnodes[i].end - end -
583 e820_hole_size(end, physnodes[i].end) < size)
584 end = physnodes[i].end;
585
586 /*
587 * Setup the fake node that will be allocated as bootmem
588 * later. If setup_node_range() returns non-zero, there
589 * is no more memory available on this physical node.
590 */
591 if (setup_node_range(ret++, &physnodes[i].start,
592 end - physnodes[i].start,
593 physnodes[i].end) < 0)
594 node_clear(i, physnode_mask);
595 }
596 }
597 return ret;
576} 598}
577 599
578/* 600/*
@@ -582,87 +604,32 @@ static int __init split_nodes_by_size(u64 *addr, u64 max_addr, int node_start,
582static int __init numa_emulation(unsigned long start_pfn, 604static int __init numa_emulation(unsigned long start_pfn,
583 unsigned long last_pfn, int acpi, int k8) 605 unsigned long last_pfn, int acpi, int k8)
584{ 606{
585 u64 size, addr = start_pfn << PAGE_SHIFT; 607 u64 addr = start_pfn << PAGE_SHIFT;
586 u64 max_addr = last_pfn << PAGE_SHIFT; 608 u64 max_addr = last_pfn << PAGE_SHIFT;
587 int num_nodes = 0, num = 0, coeff_flag, coeff = -1, i;
588 int num_phys_nodes; 609 int num_phys_nodes;
610 int num_nodes;
611 int i;
589 612
590 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8); 613 num_phys_nodes = setup_physnodes(addr, max_addr, acpi, k8);
591 /* 614 /*
592 * If the numa=fake command-line is just a single number N, split the 615 * If the numa=fake command-line contains a 'M' or 'G', it represents
593 * system RAM into N fake nodes. 616 * the fixed node size. Otherwise, if it is just a single number N,
617 * split the system RAM into N fake nodes.
594 */ 618 */
595 if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { 619 if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
596 long n = simple_strtol(cmdline, NULL, 0); 620 u64 size;
597
598 num_nodes = split_nodes_interleave(addr, max_addr,
599 num_phys_nodes, n);
600 if (num_nodes < 0)
601 return num_nodes;
602 goto out;
603 }
604 621
605 /* Parse the command line. */ 622 size = memparse(cmdline, &cmdline);
606 for (coeff_flag = 0; ; cmdline++) { 623 num_nodes = split_nodes_size_interleave(addr, max_addr, size);
607 if (*cmdline && isdigit(*cmdline)) { 624 } else {
608 num = num * 10 + *cmdline - '0'; 625 unsigned long n;
609 continue; 626
610 } 627 n = simple_strtoul(cmdline, NULL, 0);
611 if (*cmdline == '*') { 628 num_nodes = split_nodes_interleave(addr, max_addr, num_phys_nodes, n);
612 if (num > 0)
613 coeff = num;
614 coeff_flag = 1;
615 }
616 if (!*cmdline || *cmdline == ',') {
617 if (!coeff_flag)
618 coeff = 1;
619 /*
620 * Round down to the nearest FAKE_NODE_MIN_SIZE.
621 * Command-line coefficients are in megabytes.
622 */
623 size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK;
624 if (size)
625 for (i = 0; i < coeff; i++, num_nodes++)
626 if (setup_node_range(num_nodes, &addr,
627 size, max_addr) < 0)
628 goto done;
629 if (!*cmdline)
630 break;
631 coeff_flag = 0;
632 coeff = -1;
633 }
634 num = 0;
635 }
636done:
637 if (!num_nodes)
638 return -1;
639 /* Fill remainder of system RAM, if appropriate. */
640 if (addr < max_addr) {
641 if (coeff_flag && coeff < 0) {
642 /* Split remaining nodes into num-sized chunks */
643 num_nodes += split_nodes_by_size(&addr, max_addr,
644 num_nodes, num);
645 goto out;
646 }
647 switch (*(cmdline - 1)) {
648 case '*':
649 /* Split remaining nodes into coeff chunks */
650 if (coeff <= 0)
651 break;
652 num_nodes += split_nodes_equally(&addr, max_addr,
653 num_nodes, coeff);
654 break;
655 case ',':
656 /* Do not allocate remaining system RAM */
657 break;
658 default:
659 /* Give one final node */
660 setup_node_range(num_nodes, &addr, max_addr - addr,
661 max_addr);
662 num_nodes++;
663 }
664 } 629 }
665out: 630
631 if (num_nodes < 0)
632 return num_nodes;
666 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); 633 memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
667 if (memnode_shift < 0) { 634 if (memnode_shift < 0) {
668 memnode_shift = 0; 635 memnode_shift = 0;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed34f5e35999..c9ba9deafe83 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,6 +6,14 @@
6 6
7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO 7#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
8 8
9#ifdef CONFIG_HIGHPTE
10#define PGALLOC_USER_GFP __GFP_HIGHMEM
11#else
12#define PGALLOC_USER_GFP 0
13#endif
14
15gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
16
9pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) 17pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
10{ 18{
11 return (pte_t *)__get_free_page(PGALLOC_GFP); 19 return (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -15,16 +23,29 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
15{ 23{
16 struct page *pte; 24 struct page *pte;
17 25
18#ifdef CONFIG_HIGHPTE 26 pte = alloc_pages(__userpte_alloc_gfp, 0);
19 pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
20#else
21 pte = alloc_pages(PGALLOC_GFP, 0);
22#endif
23 if (pte) 27 if (pte)
24 pgtable_page_ctor(pte); 28 pgtable_page_ctor(pte);
25 return pte; 29 return pte;
26} 30}
27 31
32static int __init setup_userpte(char *arg)
33{
34 if (!arg)
35 return -EINVAL;
36
37 /*
38 * "userpte=nohigh" disables allocation of user pagetables in
39 * high memory.
40 */
41 if (strcmp(arg, "nohigh") == 0)
42 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
43 else
44 return -EINVAL;
45 return 0;
46}
47early_param("userpte", setup_userpte);
48
28void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) 49void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
29{ 50{
30 pgtable_page_dtor(pte); 51 pgtable_page_dtor(pte);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index a27124185fc1..28c68762648f 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -229,9 +229,11 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); 229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
230 } 230 }
231 231
232 if (changed) 232 if (changed) {
233 node_set(node, cpu_nodes_parsed);
233 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", 234 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
234 nd->start, nd->end); 235 nd->start, nd->end);
236 }
235} 237}
236 238
237/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ 239/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 65b58e4b0b8b..426f3a1a64d3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -41,7 +41,7 @@ union smp_flush_state {
41 struct { 41 struct {
42 struct mm_struct *flush_mm; 42 struct mm_struct *flush_mm;
43 unsigned long flush_va; 43 unsigned long flush_va;
44 spinlock_t tlbstate_lock; 44 raw_spinlock_t tlbstate_lock;
45 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
46 }; 46 };
47 char pad[INTERNODE_CACHE_BYTES]; 47 char pad[INTERNODE_CACHE_BYTES];
@@ -181,7 +181,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is 181 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
182 * probably not worth checking this for a cache-hot lock. 182 * probably not worth checking this for a cache-hot lock.
183 */ 183 */
184 spin_lock(&f->tlbstate_lock); 184 raw_spin_lock(&f->tlbstate_lock);
185 185
186 f->flush_mm = mm; 186 f->flush_mm = mm;
187 f->flush_va = va; 187 f->flush_va = va;
@@ -199,7 +199,7 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
199 199
200 f->flush_mm = NULL; 200 f->flush_mm = NULL;
201 f->flush_va = 0; 201 f->flush_va = 0;
202 spin_unlock(&f->tlbstate_lock); 202 raw_spin_unlock(&f->tlbstate_lock);
203} 203}
204 204
205void native_flush_tlb_others(const struct cpumask *cpumask, 205void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -223,7 +223,7 @@ static int __cpuinit init_smp_flush(void)
223 int i; 223 int i;
224 224
225 for (i = 0; i < ARRAY_SIZE(flush_state); i++) 225 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
226 spin_lock_init(&flush_state[i].tlbstate_lock); 226 raw_spin_lock_init(&flush_state[i].tlbstate_lock);
227 227
228 return 0; 228 return 0;
229} 229}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cb88b1a0bd5f..2c505ee71014 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -159,7 +159,7 @@ static int nmi_setup_mux(void)
159 159
160 for_each_possible_cpu(i) { 160 for_each_possible_cpu(i) {
161 per_cpu(cpu_msrs, i).multiplex = 161 per_cpu(cpu_msrs, i).multiplex =
162 kmalloc(multiplex_size, GFP_KERNEL); 162 kzalloc(multiplex_size, GFP_KERNEL);
163 if (!per_cpu(cpu_msrs, i).multiplex) 163 if (!per_cpu(cpu_msrs, i).multiplex)
164 return 0; 164 return 0;
165 } 165 }
@@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
179 if (counter_config[i].enabled) { 179 if (counter_config[i].enabled) {
180 multiplex[i].saved = -(u64)counter_config[i].count; 180 multiplex[i].saved = -(u64)counter_config[i].count;
181 } else { 181 } else {
182 multiplex[i].addr = 0;
183 multiplex[i].saved = 0; 182 multiplex[i].saved = 0;
184 } 183 }
185 } 184 }
@@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
189 188
190static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) 189static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
191{ 190{
191 struct op_msr *counters = msrs->counters;
192 struct op_msr *multiplex = msrs->multiplex; 192 struct op_msr *multiplex = msrs->multiplex;
193 int i; 193 int i;
194 194
195 for (i = 0; i < model->num_counters; ++i) { 195 for (i = 0; i < model->num_counters; ++i) {
196 int virt = op_x86_phys_to_virt(i); 196 int virt = op_x86_phys_to_virt(i);
197 if (multiplex[virt].addr) 197 if (counters[i].addr)
198 rdmsrl(multiplex[virt].addr, multiplex[virt].saved); 198 rdmsrl(counters[i].addr, multiplex[virt].saved);
199 } 199 }
200} 200}
201 201
202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) 202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
203{ 203{
204 struct op_msr *counters = msrs->counters;
204 struct op_msr *multiplex = msrs->multiplex; 205 struct op_msr *multiplex = msrs->multiplex;
205 int i; 206 int i;
206 207
207 for (i = 0; i < model->num_counters; ++i) { 208 for (i = 0; i < model->num_counters; ++i) {
208 int virt = op_x86_phys_to_virt(i); 209 int virt = op_x86_phys_to_virt(i);
209 if (multiplex[virt].addr) 210 if (counters[i].addr)
210 wrmsrl(multiplex[virt].addr, multiplex[virt].saved); 211 wrmsrl(counters[i].addr, multiplex[virt].saved);
211 } 212 }
212} 213}
213 214
@@ -222,7 +223,7 @@ static void nmi_cpu_switch(void *dummy)
222 223
223 /* move to next set */ 224 /* move to next set */
224 si += model->num_counters; 225 si += model->num_counters;
225 if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) 226 if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
226 per_cpu(switch_index, cpu) = 0; 227 per_cpu(switch_index, cpu) = 0;
227 else 228 else
228 per_cpu(switch_index, cpu) = si; 229 per_cpu(switch_index, cpu) = si;
@@ -303,11 +304,11 @@ static int allocate_msrs(void)
303 304
304 int i; 305 int i;
305 for_each_possible_cpu(i) { 306 for_each_possible_cpu(i) {
306 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, 307 per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
307 GFP_KERNEL); 308 GFP_KERNEL);
308 if (!per_cpu(cpu_msrs, i).counters) 309 if (!per_cpu(cpu_msrs, i).counters)
309 return 0; 310 return 0;
310 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, 311 per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
311 GFP_KERNEL); 312 GFP_KERNEL);
312 if (!per_cpu(cpu_msrs, i).controls) 313 if (!per_cpu(cpu_msrs, i).controls)
313 return 0; 314 return 0;
@@ -598,6 +599,7 @@ static int __init ppro_init(char **cpu_type)
598 case 15: case 23: 599 case 15: case 23:
599 *cpu_type = "i386/core_2"; 600 *cpu_type = "i386/core_2";
600 break; 601 break;
602 case 0x2e:
601 case 26: 603 case 26:
602 spec = &op_arch_perfmon_spec; 604 spec = &op_arch_perfmon_spec;
603 *cpu_type = "i386/core_i7"; 605 *cpu_type = "i386/core_i7";
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 39686c29f03a..6a58256dce9f 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -22,6 +22,9 @@
22#include <asm/ptrace.h> 22#include <asm/ptrace.h>
23#include <asm/msr.h> 23#include <asm/msr.h>
24#include <asm/nmi.h> 24#include <asm/nmi.h>
25#include <asm/apic.h>
26#include <asm/processor.h>
27#include <asm/cpufeature.h>
25 28
26#include "op_x86_model.h" 29#include "op_x86_model.h"
27#include "op_counter.h" 30#include "op_counter.h"
@@ -43,15 +46,13 @@
43 46
44static unsigned long reset_value[NUM_VIRT_COUNTERS]; 47static unsigned long reset_value[NUM_VIRT_COUNTERS];
45 48
46#ifdef CONFIG_OPROFILE_IBS
47
48/* IbsFetchCtl bits/masks */ 49/* IbsFetchCtl bits/masks */
49#define IBS_FETCH_RAND_EN (1ULL<<57) 50#define IBS_FETCH_RAND_EN (1ULL<<57)
50#define IBS_FETCH_VAL (1ULL<<49) 51#define IBS_FETCH_VAL (1ULL<<49)
51#define IBS_FETCH_ENABLE (1ULL<<48) 52#define IBS_FETCH_ENABLE (1ULL<<48)
52#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL 53#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL
53 54
54/*IbsOpCtl bits */ 55/* IbsOpCtl bits */
55#define IBS_OP_CNT_CTL (1ULL<<19) 56#define IBS_OP_CNT_CTL (1ULL<<19)
56#define IBS_OP_VAL (1ULL<<18) 57#define IBS_OP_VAL (1ULL<<18)
57#define IBS_OP_ENABLE (1ULL<<17) 58#define IBS_OP_ENABLE (1ULL<<17)
@@ -59,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
59#define IBS_FETCH_SIZE 6 60#define IBS_FETCH_SIZE 6
60#define IBS_OP_SIZE 12 61#define IBS_OP_SIZE 12
61 62
62static int has_ibs; /* AMD Family10h and later */ 63static u32 ibs_caps;
63 64
64struct op_ibs_config { 65struct op_ibs_config {
65 unsigned long op_enabled; 66 unsigned long op_enabled;
@@ -71,24 +72,52 @@ struct op_ibs_config {
71}; 72};
72 73
73static struct op_ibs_config ibs_config; 74static struct op_ibs_config ibs_config;
75static u64 ibs_op_ctl;
74 76
75#endif 77/*
78 * IBS cpuid feature detection
79 */
76 80
77#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX 81#define IBS_CPUID_FEATURES 0x8000001b
82
83/*
84 * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but
85 * bit 0 is used to indicate the existence of IBS.
86 */
87#define IBS_CAPS_AVAIL (1LL<<0)
88#define IBS_CAPS_RDWROPCNT (1LL<<3)
89#define IBS_CAPS_OPCNT (1LL<<4)
90
91/*
92 * IBS randomization macros
93 */
94#define IBS_RANDOM_BITS 12
95#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
96#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
78 97
79static void op_mux_fill_in_addresses(struct op_msrs * const msrs) 98static u32 get_ibs_caps(void)
80{ 99{
81 int i; 100 u32 ibs_caps;
101 unsigned int max_level;
82 102
83 for (i = 0; i < NUM_VIRT_COUNTERS; i++) { 103 if (!boot_cpu_has(X86_FEATURE_IBS))
84 int hw_counter = op_x86_virt_to_phys(i); 104 return 0;
85 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 105
86 msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; 106 /* check IBS cpuid feature flags */
87 else 107 max_level = cpuid_eax(0x80000000);
88 msrs->multiplex[i].addr = 0; 108 if (max_level < IBS_CPUID_FEATURES)
89 } 109 return IBS_CAPS_AVAIL;
110
111 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
112 if (!(ibs_caps & IBS_CAPS_AVAIL))
113 /* cpuid flags not valid */
114 return IBS_CAPS_AVAIL;
115
116 return ibs_caps;
90} 117}
91 118
119#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
120
92static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, 121static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
93 struct op_msrs const * const msrs) 122 struct op_msrs const * const msrs)
94{ 123{
@@ -98,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
98 /* enable active counters */ 127 /* enable active counters */
99 for (i = 0; i < NUM_COUNTERS; ++i) { 128 for (i = 0; i < NUM_COUNTERS; ++i) {
100 int virt = op_x86_phys_to_virt(i); 129 int virt = op_x86_phys_to_virt(i);
101 if (!counter_config[virt].enabled) 130 if (!reset_value[virt])
102 continue; 131 continue;
103 rdmsrl(msrs->controls[i].addr, val); 132 rdmsrl(msrs->controls[i].addr, val);
104 val &= model->reserved; 133 val &= model->reserved;
@@ -107,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
107 } 136 }
108} 137}
109 138
110#else
111
112static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { }
113
114#endif 139#endif
115 140
116/* functions for op_amd_spec */ 141/* functions for op_amd_spec */
@@ -122,18 +147,12 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
122 for (i = 0; i < NUM_COUNTERS; i++) { 147 for (i = 0; i < NUM_COUNTERS; i++) {
123 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) 148 if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
124 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; 149 msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
125 else
126 msrs->counters[i].addr = 0;
127 } 150 }
128 151
129 for (i = 0; i < NUM_CONTROLS; i++) { 152 for (i = 0; i < NUM_CONTROLS; i++) {
130 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) 153 if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
131 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; 154 msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
132 else
133 msrs->controls[i].addr = 0;
134 } 155 }
135
136 op_mux_fill_in_addresses(msrs);
137} 156}
138 157
139static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, 158static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -144,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
144 163
145 /* setup reset_value */ 164 /* setup reset_value */
146 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { 165 for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
147 if (counter_config[i].enabled) 166 if (counter_config[i].enabled
167 && msrs->counters[op_x86_virt_to_phys(i)].addr)
148 reset_value[i] = counter_config[i].count; 168 reset_value[i] = counter_config[i].count;
149 else 169 else
150 reset_value[i] = 0; 170 reset_value[i] = 0;
@@ -152,9 +172,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
152 172
153 /* clear all counters */ 173 /* clear all counters */
154 for (i = 0; i < NUM_CONTROLS; ++i) { 174 for (i = 0; i < NUM_CONTROLS; ++i) {
155 if (unlikely(!msrs->controls[i].addr)) 175 if (unlikely(!msrs->controls[i].addr)) {
176 if (counter_config[i].enabled && !smp_processor_id())
177 /*
178 * counter is reserved, this is on all
179 * cpus, so report only for cpu #0
180 */
181 op_x86_warn_reserved(i);
156 continue; 182 continue;
183 }
157 rdmsrl(msrs->controls[i].addr, val); 184 rdmsrl(msrs->controls[i].addr, val);
185 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
186 op_x86_warn_in_use(i);
158 val &= model->reserved; 187 val &= model->reserved;
159 wrmsrl(msrs->controls[i].addr, val); 188 wrmsrl(msrs->controls[i].addr, val);
160 } 189 }
@@ -169,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
169 /* enable active counters */ 198 /* enable active counters */
170 for (i = 0; i < NUM_COUNTERS; ++i) { 199 for (i = 0; i < NUM_COUNTERS; ++i) {
171 int virt = op_x86_phys_to_virt(i); 200 int virt = op_x86_phys_to_virt(i);
172 if (!counter_config[virt].enabled) 201 if (!reset_value[virt])
173 continue;
174 if (!msrs->counters[i].addr)
175 continue; 202 continue;
176 203
177 /* setup counter registers */ 204 /* setup counter registers */
@@ -185,7 +212,60 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
185 } 212 }
186} 213}
187 214
188#ifdef CONFIG_OPROFILE_IBS 215/*
216 * 16-bit Linear Feedback Shift Register (LFSR)
217 *
218 * 16 14 13 11
219 * Feedback polynomial = X + X + X + X + 1
220 */
221static unsigned int lfsr_random(void)
222{
223 static unsigned int lfsr_value = 0xF00D;
224 unsigned int bit;
225
226 /* Compute next bit to shift in */
227 bit = ((lfsr_value >> 0) ^
228 (lfsr_value >> 2) ^
229 (lfsr_value >> 3) ^
230 (lfsr_value >> 5)) & 0x0001;
231
232 /* Advance to next register value */
233 lfsr_value = (lfsr_value >> 1) | (bit << 15);
234
235 return lfsr_value;
236}
237
238/*
239 * IBS software randomization
240 *
241 * The IBS periodic op counter is randomized in software. The lower 12
242 * bits of the 20 bit counter are randomized. IbsOpCurCnt is
243 * initialized with a 12 bit random value.
244 */
245static inline u64 op_amd_randomize_ibs_op(u64 val)
246{
247 unsigned int random = lfsr_random();
248
249 if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
250 /*
251 * Work around if the hw can not write to IbsOpCurCnt
252 *
253 * Randomize the lower 8 bits of the 16 bit
254 * IbsOpMaxCnt [15:0] value in the range of -128 to
255 * +127 by adding/subtracting an offset to the
256 * maximum count (IbsOpMaxCnt).
257 *
258 * To avoid over or underflows and protect upper bits
259 * starting at bit 16, the initial value for
260 * IbsOpMaxCnt must fit in the range from 0x0081 to
261 * 0xff80.
262 */
263 val += (s8)(random >> 4);
264 else
265 val |= (u64)(random & IBS_RANDOM_MASK) << 32;
266
267 return val;
268}
189 269
190static inline void 270static inline void
191op_amd_handle_ibs(struct pt_regs * const regs, 271op_amd_handle_ibs(struct pt_regs * const regs,
@@ -194,7 +274,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
194 u64 val, ctl; 274 u64 val, ctl;
195 struct op_entry entry; 275 struct op_entry entry;
196 276
197 if (!has_ibs) 277 if (!ibs_caps)
198 return; 278 return;
199 279
200 if (ibs_config.fetch_enabled) { 280 if (ibs_config.fetch_enabled) {
@@ -236,8 +316,7 @@ op_amd_handle_ibs(struct pt_regs * const regs,
236 oprofile_write_commit(&entry); 316 oprofile_write_commit(&entry);
237 317
238 /* reenable the IRQ */ 318 /* reenable the IRQ */
239 ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; 319 ctl = op_amd_randomize_ibs_op(ibs_op_ctl);
240 ctl |= IBS_OP_ENABLE;
241 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 320 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
242 } 321 }
243 } 322 }
@@ -246,41 +325,57 @@ op_amd_handle_ibs(struct pt_regs * const regs,
246static inline void op_amd_start_ibs(void) 325static inline void op_amd_start_ibs(void)
247{ 326{
248 u64 val; 327 u64 val;
249 if (has_ibs && ibs_config.fetch_enabled) { 328
329 if (!ibs_caps)
330 return;
331
332 if (ibs_config.fetch_enabled) {
250 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 333 val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF;
251 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 334 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
252 val |= IBS_FETCH_ENABLE; 335 val |= IBS_FETCH_ENABLE;
253 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 336 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
254 } 337 }
255 338
256 if (has_ibs && ibs_config.op_enabled) { 339 if (ibs_config.op_enabled) {
257 val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; 340 ibs_op_ctl = ibs_config.max_cnt_op >> 4;
258 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; 341 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
259 val |= IBS_OP_ENABLE; 342 /*
343 * IbsOpCurCnt not supported. See
344 * op_amd_randomize_ibs_op() for details.
345 */
346 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL);
347 } else {
348 /*
349 * The start value is randomized with a
350 * positive offset, we need to compensate it
351 * with the half of the randomized range. Also
352 * avoid underflows.
353 */
354 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET,
355 0xFFFFULL);
356 }
357 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops)
358 ibs_op_ctl |= IBS_OP_CNT_CTL;
359 ibs_op_ctl |= IBS_OP_ENABLE;
360 val = op_amd_randomize_ibs_op(ibs_op_ctl);
260 wrmsrl(MSR_AMD64_IBSOPCTL, val); 361 wrmsrl(MSR_AMD64_IBSOPCTL, val);
261 } 362 }
262} 363}
263 364
264static void op_amd_stop_ibs(void) 365static void op_amd_stop_ibs(void)
265{ 366{
266 if (has_ibs && ibs_config.fetch_enabled) 367 if (!ibs_caps)
368 return;
369
370 if (ibs_config.fetch_enabled)
267 /* clear max count and enable */ 371 /* clear max count and enable */
268 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); 372 wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
269 373
270 if (has_ibs && ibs_config.op_enabled) 374 if (ibs_config.op_enabled)
271 /* clear max count and enable */ 375 /* clear max count and enable */
272 wrmsrl(MSR_AMD64_IBSOPCTL, 0); 376 wrmsrl(MSR_AMD64_IBSOPCTL, 0);
273} 377}
274 378
275#else
276
277static inline void op_amd_handle_ibs(struct pt_regs * const regs,
278 struct op_msrs const * const msrs) { }
279static inline void op_amd_start_ibs(void) { }
280static inline void op_amd_stop_ibs(void) { }
281
282#endif
283
284static int op_amd_check_ctrs(struct pt_regs * const regs, 379static int op_amd_check_ctrs(struct pt_regs * const regs,
285 struct op_msrs const * const msrs) 380 struct op_msrs const * const msrs)
286{ 381{
@@ -355,8 +450,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
355 } 450 }
356} 451}
357 452
358#ifdef CONFIG_OPROFILE_IBS
359
360static u8 ibs_eilvt_off; 453static u8 ibs_eilvt_off;
361 454
362static inline void apic_init_ibs_nmi_per_cpu(void *arg) 455static inline void apic_init_ibs_nmi_per_cpu(void *arg)
@@ -405,45 +498,36 @@ static int init_ibs_nmi(void)
405 return 1; 498 return 1;
406 } 499 }
407 500
408#ifdef CONFIG_NUMA
409 /* Sanity check */
410 /* Works only for 64bit with proper numa implementation. */
411 if (nodes != num_possible_nodes()) {
412 printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, "
413 "found: %d, expected %d",
414 nodes, num_possible_nodes());
415 return 1;
416 }
417#endif
418 return 0; 501 return 0;
419} 502}
420 503
421/* uninitialize the APIC for the IBS interrupts if needed */ 504/* uninitialize the APIC for the IBS interrupts if needed */
422static void clear_ibs_nmi(void) 505static void clear_ibs_nmi(void)
423{ 506{
424 if (has_ibs) 507 if (ibs_caps)
425 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 508 on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1);
426} 509}
427 510
428/* initialize the APIC for the IBS interrupts if available */ 511/* initialize the APIC for the IBS interrupts if available */
429static void ibs_init(void) 512static void ibs_init(void)
430{ 513{
431 has_ibs = boot_cpu_has(X86_FEATURE_IBS); 514 ibs_caps = get_ibs_caps();
432 515
433 if (!has_ibs) 516 if (!ibs_caps)
434 return; 517 return;
435 518
436 if (init_ibs_nmi()) { 519 if (init_ibs_nmi()) {
437 has_ibs = 0; 520 ibs_caps = 0;
438 return; 521 return;
439 } 522 }
440 523
441 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 524 printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n",
525 (unsigned)ibs_caps);
442} 526}
443 527
444static void ibs_exit(void) 528static void ibs_exit(void)
445{ 529{
446 if (!has_ibs) 530 if (!ibs_caps)
447 return; 531 return;
448 532
449 clear_ibs_nmi(); 533 clear_ibs_nmi();
@@ -463,7 +547,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
463 if (ret) 547 if (ret)
464 return ret; 548 return ret;
465 549
466 if (!has_ibs) 550 if (!ibs_caps)
467 return ret; 551 return ret;
468 552
469 /* model specific files */ 553 /* model specific files */
@@ -473,7 +557,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
473 ibs_config.fetch_enabled = 0; 557 ibs_config.fetch_enabled = 0;
474 ibs_config.max_cnt_op = 250000; 558 ibs_config.max_cnt_op = 250000;
475 ibs_config.op_enabled = 0; 559 ibs_config.op_enabled = 0;
476 ibs_config.dispatched_ops = 1; 560 ibs_config.dispatched_ops = 0;
477 561
478 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 562 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
479 oprofilefs_create_ulong(sb, dir, "enable", 563 oprofilefs_create_ulong(sb, dir, "enable",
@@ -488,8 +572,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
488 &ibs_config.op_enabled); 572 &ibs_config.op_enabled);
489 oprofilefs_create_ulong(sb, dir, "max_count", 573 oprofilefs_create_ulong(sb, dir, "max_count",
490 &ibs_config.max_cnt_op); 574 &ibs_config.max_cnt_op);
491 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 575 if (ibs_caps & IBS_CAPS_OPCNT)
492 &ibs_config.dispatched_ops); 576 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
577 &ibs_config.dispatched_ops);
493 578
494 return 0; 579 return 0;
495} 580}
@@ -507,19 +592,6 @@ static void op_amd_exit(void)
507 ibs_exit(); 592 ibs_exit();
508} 593}
509 594
510#else
511
512/* no IBS support */
513
514static int op_amd_init(struct oprofile_operations *ops)
515{
516 return 0;
517}
518
519static void op_amd_exit(void) {}
520
521#endif /* CONFIG_OPROFILE_IBS */
522
523struct op_x86_model_spec op_amd_spec = { 595struct op_x86_model_spec op_amd_spec = {
524 .num_counters = NUM_COUNTERS, 596 .num_counters = NUM_COUNTERS,
525 .num_controls = NUM_CONTROLS, 597 .num_controls = NUM_CONTROLS,
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index ac6b354becdf..e6a160a4684a 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
394 setup_num_counters(); 394 setup_num_counters();
395 stag = get_stagger(); 395 stag = get_stagger();
396 396
397 /* initialize some registers */
398 for (i = 0; i < num_counters; ++i)
399 msrs->counters[i].addr = 0;
400 for (i = 0; i < num_controls; ++i)
401 msrs->controls[i].addr = 0;
402
403 /* the counter & cccr registers we pay attention to */ 397 /* the counter & cccr registers we pay attention to */
404 for (i = 0; i < num_counters; ++i) { 398 for (i = 0; i < num_counters; ++i) {
405 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 8eb05878554c..5d1727ba409e 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs)
37 for (i = 0; i < num_counters; i++) { 37 for (i = 0; i < num_counters; i++) {
38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) 38 if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; 39 msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
40 else
41 msrs->counters[i].addr = 0;
42 } 40 }
43 41
44 for (i = 0; i < num_counters; i++) { 42 for (i = 0; i < num_counters; i++) {
45 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) 43 if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
46 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; 44 msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
47 else
48 msrs->controls[i].addr = 0;
49 } 45 }
50} 46}
51 47
@@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
57 int i; 53 int i;
58 54
59 if (!reset_value) { 55 if (!reset_value) {
60 reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, 56 reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
61 GFP_ATOMIC); 57 GFP_ATOMIC);
62 if (!reset_value) 58 if (!reset_value)
63 return; 59 return;
@@ -82,9 +78,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
82 78
83 /* clear all counters */ 79 /* clear all counters */
84 for (i = 0; i < num_counters; ++i) { 80 for (i = 0; i < num_counters; ++i) {
85 if (unlikely(!msrs->controls[i].addr)) 81 if (unlikely(!msrs->controls[i].addr)) {
82 if (counter_config[i].enabled && !smp_processor_id())
83 /*
84 * counter is reserved, this is on all
85 * cpus, so report only for cpu #0
86 */
87 op_x86_warn_reserved(i);
86 continue; 88 continue;
89 }
87 rdmsrl(msrs->controls[i].addr, val); 90 rdmsrl(msrs->controls[i].addr, val);
91 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE)
92 op_x86_warn_in_use(i);
88 val &= model->reserved; 93 val &= model->reserved;
89 wrmsrl(msrs->controls[i].addr, val); 94 wrmsrl(msrs->controls[i].addr, val);
90 } 95 }
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 7b8e75d16081..ff82a755edd4 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -57,6 +57,26 @@ struct op_x86_model_spec {
57 57
58struct op_counter_config; 58struct op_counter_config;
59 59
60static inline void op_x86_warn_in_use(int counter)
61{
62 /*
63 * The warning indicates an already running counter. If
64 * oprofile doesn't collect data, then try using a different
65 * performance counter on your platform to monitor the desired
66 * event. Delete counter #%d from the desired event by editing
67 * the /usr/share/oprofile/%s/<cpu>/events file. If the event
68 * cannot be monitored by any other counter, contact your
69 * hardware or BIOS vendor.
70 */
71 pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
72 counter, smp_processor_id());
73}
74
75static inline void op_x86_warn_reserved(int counter)
76{
77 pr_warning("oprofile: counter #%d is already reserved\n", counter);
78}
79
60extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, 80extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
61 struct op_counter_config *counter_config); 81 struct op_counter_config *counter_config);
62extern int op_x86_phys_to_virt(int phys); 82extern int op_x86_phys_to_virt(int phys);
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 564b008a51c7..39fba37f702f 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -15,7 +15,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
15 15
16obj-y += common.o early.o 16obj-y += common.o early.o
17obj-y += amd_bus.o 17obj-y += amd_bus.o
18obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o 18obj-$(CONFIG_X86_64) += bus_numa.o
19 19
20ifeq ($(CONFIG_PCI_DEBUG),y) 20ifeq ($(CONFIG_PCI_DEBUG),y)
21EXTRA_CFLAGS += -DDEBUG 21EXTRA_CFLAGS += -DDEBUG
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 959e548a7039..5f11ff6f5389 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -15,6 +15,51 @@ struct pci_root_info {
15 int busnum; 15 int busnum;
16}; 16};
17 17
18static bool pci_use_crs = true;
19
20static int __init set_use_crs(const struct dmi_system_id *id)
21{
22 pci_use_crs = true;
23 return 0;
24}
25
26static const struct dmi_system_id pci_use_crs_table[] __initconst = {
27 /* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
28 {
29 .callback = set_use_crs,
30 .ident = "IBM System x3800",
31 .matches = {
32 DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
33 DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
34 },
35 },
36 {}
37};
38
39void __init pci_acpi_crs_quirks(void)
40{
41 int year;
42
43 if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008)
44 pci_use_crs = false;
45
46 dmi_check_system(pci_use_crs_table);
47
48 /*
49 * If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
50 * takes precedence over anything we figured out above.
51 */
52 if (pci_probe & PCI_ROOT_NO_CRS)
53 pci_use_crs = false;
54 else if (pci_probe & PCI_USE__CRS)
55 pci_use_crs = true;
56
57 printk(KERN_INFO "PCI: %s host bridge windows from ACPI; "
58 "if necessary, use \"pci=%s\" and report a bug\n",
59 pci_use_crs ? "Using" : "Ignoring",
60 pci_use_crs ? "nocrs" : "use_crs");
61}
62
18static acpi_status 63static acpi_status
19resource_to_addr(struct acpi_resource *resource, 64resource_to_addr(struct acpi_resource *resource,
20 struct acpi_resource_address64 *addr) 65 struct acpi_resource_address64 *addr)
@@ -45,20 +90,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)
45 return AE_OK; 90 return AE_OK;
46} 91}
47 92
48static int
49bus_has_transparent_bridge(struct pci_bus *bus)
50{
51 struct pci_dev *dev;
52
53 list_for_each_entry(dev, &bus->devices, bus_list) {
54 u16 class = dev->class >> 8;
55
56 if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent)
57 return true;
58 }
59 return false;
60}
61
62static void 93static void
63align_resource(struct acpi_device *bridge, struct resource *res) 94align_resource(struct acpi_device *bridge, struct resource *res)
64{ 95{
@@ -92,12 +123,8 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
92 acpi_status status; 123 acpi_status status;
93 unsigned long flags; 124 unsigned long flags;
94 struct resource *root; 125 struct resource *root;
95 int max_root_bus_resources = PCI_BUS_NUM_RESOURCES;
96 u64 start, end; 126 u64 start, end;
97 127
98 if (bus_has_transparent_bridge(info->bus))
99 max_root_bus_resources -= 3;
100
101 status = resource_to_addr(acpi_res, &addr); 128 status = resource_to_addr(acpi_res, &addr);
102 if (!ACPI_SUCCESS(status)) 129 if (!ACPI_SUCCESS(status))
103 return AE_OK; 130 return AE_OK;
@@ -115,15 +142,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
115 142
116 start = addr.minimum + addr.translation_offset; 143 start = addr.minimum + addr.translation_offset;
117 end = start + addr.address_length - 1; 144 end = start + addr.address_length - 1;
118 if (info->res_num >= max_root_bus_resources) {
119 if (pci_probe & PCI_USE__CRS)
120 printk(KERN_WARNING "PCI: Failed to allocate "
121 "0x%lx-0x%lx from %s for %s due to _CRS "
122 "returning more than %d resource descriptors\n",
123 (unsigned long) start, (unsigned long) end,
124 root->name, info->name, max_root_bus_resources);
125 return AE_OK;
126 }
127 145
128 res = &info->res[info->res_num]; 146 res = &info->res[info->res_num];
129 res->name = info->name; 147 res->name = info->name;
@@ -133,7 +151,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
133 res->child = NULL; 151 res->child = NULL;
134 align_resource(info->bridge, res); 152 align_resource(info->bridge, res);
135 153
136 if (!(pci_probe & PCI_USE__CRS)) { 154 if (!pci_use_crs) {
137 dev_printk(KERN_DEBUG, &info->bridge->dev, 155 dev_printk(KERN_DEBUG, &info->bridge->dev,
138 "host bridge window %pR (ignored)\n", res); 156 "host bridge window %pR (ignored)\n", res);
139 return AE_OK; 157 return AE_OK;
@@ -143,7 +161,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
143 dev_err(&info->bridge->dev, 161 dev_err(&info->bridge->dev,
144 "can't allocate host bridge window %pR\n", res); 162 "can't allocate host bridge window %pR\n", res);
145 } else { 163 } else {
146 info->bus->resource[info->res_num] = res; 164 pci_bus_add_resource(info->bus, res, 0);
147 info->res_num++; 165 info->res_num++;
148 if (addr.translation_offset) 166 if (addr.translation_offset)
149 dev_info(&info->bridge->dev, "host bridge window %pR " 167 dev_info(&info->bridge->dev, "host bridge window %pR "
@@ -164,10 +182,8 @@ get_current_resources(struct acpi_device *device, int busnum,
164 struct pci_root_info info; 182 struct pci_root_info info;
165 size_t size; 183 size_t size;
166 184
167 if (!(pci_probe & PCI_USE__CRS)) 185 if (pci_use_crs)
168 dev_info(&device->dev, 186 pci_bus_remove_resources(bus);
169 "ignoring host bridge windows from ACPI; "
170 "boot with \"pci=use_crs\" to use them\n");
171 187
172 info.bridge = device; 188 info.bridge = device;
173 info.bus = bus; 189 info.bus = bus;
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index f939d603adfa..12d54ff3654d 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -36,13 +36,14 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
36 printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", 36 printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n",
37 b->number); 37 b->number);
38 38
39 pci_bus_remove_resources(b);
39 info = &pci_root_info[i]; 40 info = &pci_root_info[i];
40 for (j = 0; j < info->res_num; j++) { 41 for (j = 0; j < info->res_num; j++) {
41 struct resource *res; 42 struct resource *res;
42 struct resource *root; 43 struct resource *root;
43 44
44 res = &info->res[j]; 45 res = &info->res[j];
45 b->resource[j] = res; 46 pci_bus_add_resource(b, res, 0);
46 if (res->flags & IORESOURCE_IO) 47 if (res->flags & IORESOURCE_IO)
47 root = &ioport_resource; 48 root = &ioport_resource;
48 else 49 else
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h
index adbc23fe82ac..731b64ee8d84 100644
--- a/arch/x86/pci/bus_numa.h
+++ b/arch/x86/pci/bus_numa.h
@@ -2,8 +2,7 @@
2 2
3/* 3/*
4 * sub bus (transparent) will use entres from 3 to store extra from 4 * sub bus (transparent) will use entres from 3 to store extra from
5 * root, so need to make sure we have enough slot there, Should we 5 * root, so need to make sure we have enough slot there.
6 * increase PCI_BUS_NUM_RESOURCES?
7 */ 6 */
8#define RES_NUM 16 7#define RES_NUM 16
9struct pci_root_info { 8struct pci_root_info {
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index d2552c68e94d..3736176acaab 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -520,6 +520,9 @@ char * __devinit pcibios_setup(char *str)
520 } else if (!strcmp(str, "use_crs")) { 520 } else if (!strcmp(str, "use_crs")) {
521 pci_probe |= PCI_USE__CRS; 521 pci_probe |= PCI_USE__CRS;
522 return NULL; 522 return NULL;
523 } else if (!strcmp(str, "nocrs")) {
524 pci_probe |= PCI_ROOT_NO_CRS;
525 return NULL;
523 } else if (!strcmp(str, "earlydump")) { 526 } else if (!strcmp(str, "earlydump")) {
524 pci_early_dump_regs = 1; 527 pci_early_dump_regs = 1;
525 return NULL; 528 return NULL;
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5dc9e8c63fcd..5a8fbf8d4cac 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -60,22 +60,20 @@ skip_isa_ioresource_align(struct pci_dev *dev) {
60 * but we want to try to avoid allocating at 0x2900-0x2bff 60 * but we want to try to avoid allocating at 0x2900-0x2bff
61 * which might have be mirrored at 0x0100-0x03ff.. 61 * which might have be mirrored at 0x0100-0x03ff..
62 */ 62 */
63void 63resource_size_t
64pcibios_align_resource(void *data, struct resource *res, 64pcibios_align_resource(void *data, const struct resource *res,
65 resource_size_t size, resource_size_t align) 65 resource_size_t size, resource_size_t align)
66{ 66{
67 struct pci_dev *dev = data; 67 struct pci_dev *dev = data;
68 resource_size_t start = res->start;
68 69
69 if (res->flags & IORESOURCE_IO) { 70 if (res->flags & IORESOURCE_IO) {
70 resource_size_t start = res->start;
71
72 if (skip_isa_ioresource_align(dev)) 71 if (skip_isa_ioresource_align(dev))
73 return; 72 return start;
74 if (start & 0x300) { 73 if (start & 0x300)
75 start = (start + 0x3ff) & ~0x3ff; 74 start = (start + 0x3ff) & ~0x3ff;
76 res->start = start;
77 }
78 } 75 }
76 return start;
79} 77}
80EXPORT_SYMBOL(pcibios_align_resource); 78EXPORT_SYMBOL(pcibios_align_resource);
81 79
diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c
deleted file mode 100644
index f81a2fa8fe25..000000000000
--- a/arch/x86/pci/intel_bus.c
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * to read io range from IOH pci conf, need to do it after mmconfig is there
3 */
4
5#include <linux/delay.h>
6#include <linux/dmi.h>
7#include <linux/pci.h>
8#include <linux/init.h>
9#include <asm/pci_x86.h>
10
11#include "bus_numa.h"
12
13static inline void print_ioh_resources(struct pci_root_info *info)
14{
15 int res_num;
16 int busnum;
17 int i;
18
19 printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n",
20 info->bus_min, info->bus_max);
21 res_num = info->res_num;
22 busnum = info->bus_min;
23 for (i = 0; i < res_num; i++) {
24 struct resource *res;
25
26 res = &info->res[i];
27 printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n",
28 busnum, i,
29 (res->flags & IORESOURCE_IO) ? "io port" :
30 "mmio",
31 res->start, res->end);
32 }
33}
34
35#define IOH_LIO 0x108
36#define IOH_LMMIOL 0x10c
37#define IOH_LMMIOH 0x110
38#define IOH_LMMIOH_BASEU 0x114
39#define IOH_LMMIOH_LIMITU 0x118
40#define IOH_LCFGBUS 0x11c
41
42static void __devinit pci_root_bus_res(struct pci_dev *dev)
43{
44 u16 word;
45 u32 dword;
46 struct pci_root_info *info;
47 u16 io_base, io_end;
48 u32 mmiol_base, mmiol_end;
49 u64 mmioh_base, mmioh_end;
50 int bus_base, bus_end;
51
52 /* some sys doesn't get mmconf enabled */
53 if (dev->cfg_size < 0x120)
54 return;
55
56 if (pci_root_num >= PCI_ROOT_NR) {
57 printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n");
58 return;
59 }
60
61 info = &pci_root_info[pci_root_num];
62 pci_root_num++;
63
64 pci_read_config_word(dev, IOH_LCFGBUS, &word);
65 bus_base = (word & 0xff);
66 bus_end = (word & 0xff00) >> 8;
67 sprintf(info->name, "PCI Bus #%02x", bus_base);
68 info->bus_min = bus_base;
69 info->bus_max = bus_end;
70
71 pci_read_config_word(dev, IOH_LIO, &word);
72 io_base = (word & 0xf0) << (12 - 4);
73 io_end = (word & 0xf000) | 0xfff;
74 update_res(info, io_base, io_end, IORESOURCE_IO, 0);
75
76 pci_read_config_dword(dev, IOH_LMMIOL, &dword);
77 mmiol_base = (dword & 0xff00) << (24 - 8);
78 mmiol_end = (dword & 0xff000000) | 0xffffff;
79 update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0);
80
81 pci_read_config_dword(dev, IOH_LMMIOH, &dword);
82 mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10);
83 mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff);
84 pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword);
85 mmioh_base |= ((u64)(dword & 0x7ffff)) << 32;
86 pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword);
87 mmioh_end |= ((u64)(dword & 0x7ffff)) << 32;
88 update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0);
89
90 print_ioh_resources(info);
91}
92
93/* intel IOH */
94DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res);
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 0696d506c4ad..b02f6d8ac922 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -590,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route
590 case PCI_DEVICE_ID_INTEL_ICH10_1: 590 case PCI_DEVICE_ID_INTEL_ICH10_1:
591 case PCI_DEVICE_ID_INTEL_ICH10_2: 591 case PCI_DEVICE_ID_INTEL_ICH10_2:
592 case PCI_DEVICE_ID_INTEL_ICH10_3: 592 case PCI_DEVICE_ID_INTEL_ICH10_3:
593 case PCI_DEVICE_ID_INTEL_CPT_LPC1:
594 case PCI_DEVICE_ID_INTEL_CPT_LPC2:
593 r->name = "PIIX/ICH"; 595 r->name = "PIIX/ICH";
594 r->get = pirq_piix_get; 596 r->get = pirq_piix_get;
595 r->set = pirq_piix_set; 597 r->set = pirq_piix_set;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index b19d1e54201e..8f3f9a50b1e0 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -303,22 +303,17 @@ static void __init pci_mmcfg_check_end_bus_number(void)
303{ 303{
304 struct pci_mmcfg_region *cfg, *cfgx; 304 struct pci_mmcfg_region *cfg, *cfgx;
305 305
306 /* last one*/ 306 /* Fixup overlaps */
307 cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list);
308 if (cfg)
309 if (cfg->end_bus < cfg->start_bus)
310 cfg->end_bus = 255;
311
312 if (list_is_singular(&pci_mmcfg_list))
313 return;
314
315 /* don't overlap please */
316 list_for_each_entry(cfg, &pci_mmcfg_list, list) { 307 list_for_each_entry(cfg, &pci_mmcfg_list, list) {
317 if (cfg->end_bus < cfg->start_bus) 308 if (cfg->end_bus < cfg->start_bus)
318 cfg->end_bus = 255; 309 cfg->end_bus = 255;
319 310
311 /* Don't access the list head ! */
312 if (cfg->list.next == &pci_mmcfg_list)
313 break;
314
320 cfgx = list_entry(cfg->list.next, typeof(*cfg), list); 315 cfgx = list_entry(cfg->list.next, typeof(*cfg), list);
321 if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) 316 if (cfg->end_bus >= cfgx->start_bus)
322 cfg->end_bus = cfgx->start_bus - 1; 317 cfg->end_bus = cfgx->start_bus - 1;
323 } 318 }
324} 319}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 8eb295e116f6..8884a1c1ada6 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -8,9 +8,7 @@
8#include <asm/apic.h> 8#include <asm/apic.h>
9#include <asm/mpspec.h> 9#include <asm/mpspec.h>
10#include <asm/pci_x86.h> 10#include <asm/pci_x86.h>
11 11#include <asm/numaq.h>
12#define XQUAD_PORTIO_BASE 0xfe400000
13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
14 12
15#define BUS2QUAD(global) (mp_bus_id_to_node[global]) 13#define BUS2QUAD(global) (mp_bus_id_to_node[global])
16 14
@@ -18,8 +16,6 @@
18 16
19#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) 17#define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
20 18
21#define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
22
23#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ 19#define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
24 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) 20 (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3))
25 21
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index bee8d6ac2691..13403fc95a96 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -43,7 +43,7 @@ static int x86_64;
43static void usage(void) 43static void usage(void)
44{ 44{
45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" 45 fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |"
46 " %s [-y|-n] [-v] \n", prog); 46 " %s [-y|-n] [-v]\n", prog);
47 fprintf(stderr, "\t-y 64bit mode\n"); 47 fprintf(stderr, "\t-y 64bit mode\n");
48 fprintf(stderr, "\t-n 32bit mode\n"); 48 fprintf(stderr, "\t-n 32bit mode\n");
49 fprintf(stderr, "\t-v verbose mode\n"); 49 fprintf(stderr, "\t-v verbose mode\n");
@@ -69,7 +69,7 @@ static void dump_field(FILE *fp, const char *name, const char *indent,
69 69
70static void dump_insn(FILE *fp, struct insn *insn) 70static void dump_insn(FILE *fp, struct insn *insn)
71{ 71{
72 fprintf(fp, "Instruction = { \n"); 72 fprintf(fp, "Instruction = {\n");
73 dump_field(fp, "prefixes", "\t", &insn->prefixes); 73 dump_field(fp, "prefixes", "\t", &insn->prefixes);
74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); 74 dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix);
75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); 75 dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix);