aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-07-25 15:40:40 -0400
committerIngo Molnar <mingo@kernel.org>2012-07-25 15:40:40 -0400
commitd431adfbc9b7de651f3164c6b7ffcad75805d7e4 (patch)
tree29bce222c81a3a392e51c11e2188659aa6d1bded /arch/x86
parentd6250a3f12edb3a86db9598ffeca3de8b4a219e9 (diff)
parente2b34e311be3a57c9abcb927e37a57e38913714c (diff)
Merge branch 'linus' into x86/urgent
Merge in Linus's tree to avoid a conflict. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile3
-rw-r--r--arch/x86/include/asm/alternative.h74
-rw-r--r--arch/x86/include/asm/apic.h61
-rw-r--r--arch/x86/include/asm/emergency-restart.h2
-rw-r--r--arch/x86/include/asm/floppy.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/msr.h46
-rw-r--r--arch/x86/include/asm/nmi.h20
-rw-r--r--arch/x86/include/asm/paravirt.h41
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pci_x86.h8
-rw-r--r--arch/x86/include/asm/perf_event.h22
-rw-r--r--arch/x86/include/asm/pgtable-2level.h4
-rw-r--r--arch/x86/include/asm/pgtable-3level.h6
-rw-r--r--arch/x86/include/asm/pgtable_64.h8
-rw-r--r--arch/x86/include/asm/realmode.h3
-rw-r--r--arch/x86/include/asm/reboot.h4
-rw-r--r--arch/x86/include/asm/smp.h5
-rw-r--r--arch/x86/include/asm/uaccess_64.h11
-rw-r--r--arch/x86/include/asm/uprobes.h2
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h28
-rw-r--r--arch/x86/include/asm/x2apic.h18
-rw-r--r--arch/x86/include/asm/x86_init.h4
-rw-r--r--arch/x86/kernel/alternative.c19
-rw-r--r--arch/x86/kernel/amd_nb.c10
-rw-r--r--arch/x86/kernel/apic/apic.c19
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c76
-rw-r--r--arch/x86/kernel/apic/apic_noop.c9
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c50
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c48
-rw-r--r--arch/x86/kernel/apic/es7000_32.c51
-rw-r--r--arch/x86/kernel/apic/io_apic.c350
-rw-r--r--arch/x86/kernel/apic/numaq_32.c30
-rw-r--r--arch/x86/kernel/apic/probe_32.c23
-rw-r--r--arch/x86/kernel/apic/probe_64.c11
-rw-r--r--arch/x86/kernel/apic/summit_32.c68
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c82
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c39
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c45
-rw-r--r--arch/x86/kernel/apm_32.c29
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c39
-rw-r--r--arch/x86/kernel/cpu/bugs.c20
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c22
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event.c111
-rw-r--r--arch/x86/kernel/cpu/perf_event.h26
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c103
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c134
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c1850
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h424
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c21
-rw-r--r--arch/x86/kernel/entry_64.S20
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/microcode_core.c66
-rw-r--r--arch/x86/kernel/module.c32
-rw-r--r--arch/x86/kernel/nmi.c47
-rw-r--r--arch/x86/kernel/nmi_selftest.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c34
-rw-r--r--arch/x86/kernel/process.c34
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/reboot.c74
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/signal.c5
-rw-r--r--arch/x86/kernel/smpboot.c106
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kernel/tsc.c50
-rw-r--r--arch/x86/kernel/uprobes.c3
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/kernel/vsmp_64.c44
-rw-r--r--arch/x86/kernel/vsyscall_64.c56
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c2
-rw-r--r--arch/x86/kernel/xsave.c12
-rw-r--r--arch/x86/kvm/pmu.c22
-rw-r--r--arch/x86/kvm/trace.h12
-rw-r--r--arch/x86/lib/msr-reg-export.c4
-rw-r--r--arch/x86/lib/msr-reg.S10
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/oprofile/op_model_amd.c4
-rw-r--r--arch/x86/platform/uv/tlb_uv.c453
-rw-r--r--arch/x86/platform/uv/uv_irq.c9
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/realmode/rm/header.S4
-rw-r--r--arch/x86/realmode/rm/reboot.S (renamed from arch/x86/realmode/rm/reboot_32.S)30
-rw-r--r--arch/x86/vdso/vdso32-setup.c6
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--arch/x86/xen/smp.c2
96 files changed, 3848 insertions, 1519 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1f2521434554..b0c5276861ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -49,6 +49,9 @@ else
49 KBUILD_AFLAGS += -m64 49 KBUILD_AFLAGS += -m64
50 KBUILD_CFLAGS += -m64 50 KBUILD_CFLAGS += -m64
51 51
52 # Use -mpreferred-stack-boundary=3 if supported.
53 KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
54
52 # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) 55 # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
53 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) 56 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
54 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) 57 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 49331bedc158..70780689599a 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -75,23 +75,54 @@ static inline int alternatives_text_reserved(void *start, void *end)
75} 75}
76#endif /* CONFIG_SMP */ 76#endif /* CONFIG_SMP */
77 77
78#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
79
80#define b_replacement(number) "663"#number
81#define e_replacement(number) "664"#number
82
83#define alt_slen "662b-661b"
84#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
85
86#define ALTINSTR_ENTRY(feature, number) \
87 " .long 661b - .\n" /* label */ \
88 " .long " b_replacement(number)"f - .\n" /* new instruction */ \
89 " .word " __stringify(feature) "\n" /* feature bit */ \
90 " .byte " alt_slen "\n" /* source len */ \
91 " .byte " alt_rlen(number) "\n" /* replacement len */
92
93#define DISCARD_ENTRY(number) /* rlen <= slen */ \
94 " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
95
96#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
97 b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
98
78/* alternative assembly primitive: */ 99/* alternative assembly primitive: */
79#define ALTERNATIVE(oldinstr, newinstr, feature) \ 100#define ALTERNATIVE(oldinstr, newinstr, feature) \
80 \ 101 OLDINSTR(oldinstr) \
81 "661:\n\t" oldinstr "\n662:\n" \ 102 ".section .altinstructions,\"a\"\n" \
82 ".section .altinstructions,\"a\"\n" \ 103 ALTINSTR_ENTRY(feature, 1) \
83 " .long 661b - .\n" /* label */ \ 104 ".previous\n" \
84 " .long 663f - .\n" /* new instruction */ \ 105 ".section .discard,\"aw\",@progbits\n" \
85 " .word " __stringify(feature) "\n" /* feature bit */ \ 106 DISCARD_ENTRY(1) \
86 " .byte 662b-661b\n" /* sourcelen */ \ 107 ".previous\n" \
87 " .byte 664f-663f\n" /* replacementlen */ \ 108 ".section .altinstr_replacement, \"ax\"\n" \
88 ".previous\n" \ 109 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
89 ".section .discard,\"aw\",@progbits\n" \ 110 ".previous"
90 " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ 111
91 ".previous\n" \ 112#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
92 ".section .altinstr_replacement, \"ax\"\n" \ 113 OLDINSTR(oldinstr) \
93 "663:\n\t" newinstr "\n664:\n" /* replacement */ \ 114 ".section .altinstructions,\"a\"\n" \
94 ".previous" 115 ALTINSTR_ENTRY(feature1, 1) \
116 ALTINSTR_ENTRY(feature2, 2) \
117 ".previous\n" \
118 ".section .discard,\"aw\",@progbits\n" \
119 DISCARD_ENTRY(1) \
120 DISCARD_ENTRY(2) \
121 ".previous\n" \
122 ".section .altinstr_replacement, \"ax\"\n" \
123 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
124 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
125 ".previous"
95 126
96/* 127/*
97 * This must be included *after* the definition of ALTERNATIVE due to 128 * This must be included *after* the definition of ALTERNATIVE due to
@@ -140,6 +171,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
140 : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) 171 : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
141 172
142/* 173/*
174 * Like alternative_call, but there are two features and respective functions.
175 * If CPU has feature2, function2 is used.
176 * Otherwise, if CPU has feature1, function1 is used.
177 * Otherwise, old function is used.
178 */
179#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
180 output, input...) \
181 asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
182 "call %P[new2]", feature2) \
183 : output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
184 [new2] "i" (newfunc2), ## input)
185
186/*
143 * use this macro(s) if you need more than one output parameter 187 * use this macro(s) if you need more than one output parameter
144 * in alternative_io 188 * in alternative_io
145 */ 189 */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eaff4790ed96..88093c1d44fd 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -306,7 +306,8 @@ struct apic {
306 unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); 306 unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid);
307 unsigned long (*check_apicid_present)(int apicid); 307 unsigned long (*check_apicid_present)(int apicid);
308 308
309 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); 309 void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
310 const struct cpumask *mask);
310 void (*init_apic_ldr)(void); 311 void (*init_apic_ldr)(void);
311 312
312 void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); 313 void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap);
@@ -331,9 +332,9 @@ struct apic {
331 unsigned long (*set_apic_id)(unsigned int id); 332 unsigned long (*set_apic_id)(unsigned int id);
332 unsigned long apic_id_mask; 333 unsigned long apic_id_mask;
333 334
334 unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); 335 int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
335 unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, 336 const struct cpumask *andmask,
336 const struct cpumask *andmask); 337 unsigned int *apicid);
337 338
338 /* ipi */ 339 /* ipi */
339 void (*send_IPI_mask)(const struct cpumask *mask, int vector); 340 void (*send_IPI_mask)(const struct cpumask *mask, int vector);
@@ -537,6 +538,11 @@ static inline const struct cpumask *default_target_cpus(void)
537#endif 538#endif
538} 539}
539 540
541static inline const struct cpumask *online_target_cpus(void)
542{
543 return cpu_online_mask;
544}
545
540DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); 546DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
541 547
542 548
@@ -586,21 +592,50 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
586 592
587#endif 593#endif
588 594
589static inline unsigned int 595static inline int
590default_cpu_mask_to_apicid(const struct cpumask *cpumask) 596flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
597 const struct cpumask *andmask,
598 unsigned int *apicid)
591{ 599{
592 return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; 600 unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
601 cpumask_bits(andmask)[0] &
602 cpumask_bits(cpu_online_mask)[0] &
603 APIC_ALL_CPUS;
604
605 if (likely(cpu_mask)) {
606 *apicid = (unsigned int)cpu_mask;
607 return 0;
608 } else {
609 return -EINVAL;
610 }
593} 611}
594 612
595static inline unsigned int 613extern int
596default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 614default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
597 const struct cpumask *andmask) 615 const struct cpumask *andmask,
616 unsigned int *apicid);
617
618static inline void
619flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
620 const struct cpumask *mask)
598{ 621{
599 unsigned long mask1 = cpumask_bits(cpumask)[0]; 622 /* Careful. Some cpus do not strictly honor the set of cpus
600 unsigned long mask2 = cpumask_bits(andmask)[0]; 623 * specified in the interrupt destination when using lowest
601 unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; 624 * priority interrupt delivery mode.
625 *
626 * In particular there was a hyperthreading cpu observed to
627 * deliver interrupts to the wrong hyperthread when only one
628 * hyperthread was specified in the interrupt desitination.
629 */
630 cpumask_clear(retmask);
631 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
632}
602 633
603 return (unsigned int)(mask1 & mask2 & mask3); 634static inline void
635default_vector_allocation_domain(int cpu, struct cpumask *retmask,
636 const struct cpumask *mask)
637{
638 cpumask_copy(retmask, cpumask_of(cpu));
604} 639}
605 640
606static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) 641static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid)
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index cc70c1c78ca4..75ce3f47d204 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -4,9 +4,7 @@
4enum reboot_type { 4enum reboot_type {
5 BOOT_TRIPLE = 't', 5 BOOT_TRIPLE = 't',
6 BOOT_KBD = 'k', 6 BOOT_KBD = 'k',
7#ifdef CONFIG_X86_32
8 BOOT_BIOS = 'b', 7 BOOT_BIOS = 'b',
9#endif
10 BOOT_ACPI = 'a', 8 BOOT_ACPI = 'a',
11 BOOT_EFI = 'e', 9 BOOT_EFI = 'e',
12 BOOT_CF9 = 'p', 10 BOOT_CF9 = 'p',
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h
index dbe82a5c5eac..d3d74698dce9 100644
--- a/arch/x86/include/asm/floppy.h
+++ b/arch/x86/include/asm/floppy.h
@@ -99,7 +99,7 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
99 virtual_dma_residue += virtual_dma_count; 99 virtual_dma_residue += virtual_dma_count;
100 virtual_dma_count = 0; 100 virtual_dma_count = 0;
101#ifdef TRACE_FLPY_INT 101#ifdef TRACE_FLPY_INT
102 printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 102 printk(KERN_DEBUG "count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n",
103 virtual_dma_count, virtual_dma_residue, calls, bytes, 103 virtual_dma_count, virtual_dma_residue, calls, bytes,
104 dma_wait); 104 dma_wait);
105 calls = 0; 105 calls = 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2709a2..2da88c0cda14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -313,8 +313,8 @@ struct kvm_pmu {
313 u64 counter_bitmask[2]; 313 u64 counter_bitmask[2];
314 u64 global_ctrl_mask; 314 u64 global_ctrl_mask;
315 u8 version; 315 u8 version;
316 struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC]; 316 struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
317 struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED]; 317 struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
318 struct irq_work irq_work; 318 struct irq_work irq_work;
319 u64 reprogram_pmi; 319 u64 reprogram_pmi;
320}; 320};
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 084ef95274cd..813ed103f45e 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -115,8 +115,8 @@ notrace static inline int native_write_msr_safe(unsigned int msr,
115 115
116extern unsigned long long native_read_tsc(void); 116extern unsigned long long native_read_tsc(void);
117 117
118extern int native_rdmsr_safe_regs(u32 regs[8]); 118extern int rdmsr_safe_regs(u32 regs[8]);
119extern int native_wrmsr_safe_regs(u32 regs[8]); 119extern int wrmsr_safe_regs(u32 regs[8]);
120 120
121static __always_inline unsigned long long __native_read_tsc(void) 121static __always_inline unsigned long long __native_read_tsc(void)
122{ 122{
@@ -187,43 +187,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
187 return err; 187 return err;
188} 188}
189 189
190static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
191{
192 u32 gprs[8] = { 0 };
193 int err;
194
195 gprs[1] = msr;
196 gprs[7] = 0x9c5a203a;
197
198 err = native_rdmsr_safe_regs(gprs);
199
200 *p = gprs[0] | ((u64)gprs[2] << 32);
201
202 return err;
203}
204
205static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
206{
207 u32 gprs[8] = { 0 };
208
209 gprs[0] = (u32)val;
210 gprs[1] = msr;
211 gprs[2] = val >> 32;
212 gprs[7] = 0x9c5a203a;
213
214 return native_wrmsr_safe_regs(gprs);
215}
216
217static inline int rdmsr_safe_regs(u32 regs[8])
218{
219 return native_rdmsr_safe_regs(regs);
220}
221
222static inline int wrmsr_safe_regs(u32 regs[8])
223{
224 return native_wrmsr_safe_regs(regs);
225}
226
227#define rdtscl(low) \ 190#define rdtscl(low) \
228 ((low) = (u32)__native_read_tsc()) 191 ((low) = (u32)__native_read_tsc())
229 192
@@ -237,6 +200,8 @@ do { \
237 (high) = (u32)(_l >> 32); \ 200 (high) = (u32)(_l >> 32); \
238} while (0) 201} while (0)
239 202
203#define rdpmcl(counter, val) ((val) = native_read_pmc(counter))
204
240#define rdtscp(low, high, aux) \ 205#define rdtscp(low, high, aux) \
241do { \ 206do { \
242 unsigned long long _val = native_read_tscp(&(aux)); \ 207 unsigned long long _val = native_read_tscp(&(aux)); \
@@ -248,8 +213,7 @@ do { \
248 213
249#endif /* !CONFIG_PARAVIRT */ 214#endif /* !CONFIG_PARAVIRT */
250 215
251 216#define wrmsrl_safe(msr, val) wrmsr_safe((msr), (u32)(val), \
252#define checking_wrmsrl(msr, val) wrmsr_safe((msr), (u32)(val), \
253 (u32)((val) >> 32)) 217 (u32)((val) >> 32))
254 218
255#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2)) 219#define write_tsc(val1, val2) wrmsr(MSR_IA32_TSC, (val1), (val2))
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index dc580c42851c..c0fa356e90de 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,28 +44,14 @@ struct nmiaction {
44 const char *name; 44 const char *name;
45}; 45};
46 46
47#define register_nmi_handler(t, fn, fg, n) \ 47#define register_nmi_handler(t, fn, fg, n, init...) \
48({ \ 48({ \
49 static struct nmiaction fn##_na = { \ 49 static struct nmiaction init fn##_na = { \
50 .handler = (fn), \ 50 .handler = (fn), \
51 .name = (n), \ 51 .name = (n), \
52 .flags = (fg), \ 52 .flags = (fg), \
53 }; \ 53 }; \
54 __register_nmi_handler((t), &fn##_na); \ 54 __register_nmi_handler((t), &fn##_na); \
55})
56
57/*
58 * For special handlers that register/unregister in the
59 * init section only. This should be considered rare.
60 */
61#define register_nmi_handler_initonly(t, fn, fg, n) \
62({ \
63 static struct nmiaction fn##_na __initdata = { \
64 .handler = (fn), \
65 .name = (n), \
66 .flags = (fg), \
67 }; \
68 __register_nmi_handler((t), &fn##_na); \
69}) 55})
70 56
71int __register_nmi_handler(unsigned int, struct nmiaction *); 57int __register_nmi_handler(unsigned int, struct nmiaction *);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6cbbabf52707..0b47ddb6f00b 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -128,21 +128,11 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err)
128 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 128 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
129} 129}
130 130
131static inline int paravirt_rdmsr_regs(u32 *regs)
132{
133 return PVOP_CALL1(int, pv_cpu_ops.rdmsr_regs, regs);
134}
135
136static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 131static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
137{ 132{
138 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 133 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
139} 134}
140 135
141static inline int paravirt_wrmsr_regs(u32 *regs)
142{
143 return PVOP_CALL1(int, pv_cpu_ops.wrmsr_regs, regs);
144}
145
146/* These should all do BUG_ON(_err), but our headers are too tangled. */ 136/* These should all do BUG_ON(_err), but our headers are too tangled. */
147#define rdmsr(msr, val1, val2) \ 137#define rdmsr(msr, val1, val2) \
148do { \ 138do { \
@@ -176,9 +166,6 @@ do { \
176 _err; \ 166 _err; \
177}) 167})
178 168
179#define rdmsr_safe_regs(regs) paravirt_rdmsr_regs(regs)
180#define wrmsr_safe_regs(regs) paravirt_wrmsr_regs(regs)
181
182static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) 169static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
183{ 170{
184 int err; 171 int err;
@@ -186,32 +173,6 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
186 *p = paravirt_read_msr(msr, &err); 173 *p = paravirt_read_msr(msr, &err);
187 return err; 174 return err;
188} 175}
189static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
190{
191 u32 gprs[8] = { 0 };
192 int err;
193
194 gprs[1] = msr;
195 gprs[7] = 0x9c5a203a;
196
197 err = paravirt_rdmsr_regs(gprs);
198
199 *p = gprs[0] | ((u64)gprs[2] << 32);
200
201 return err;
202}
203
204static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
205{
206 u32 gprs[8] = { 0 };
207
208 gprs[0] = (u32)val;
209 gprs[1] = msr;
210 gprs[2] = val >> 32;
211 gprs[7] = 0x9c5a203a;
212
213 return paravirt_wrmsr_regs(gprs);
214}
215 176
216static inline u64 paravirt_read_tsc(void) 177static inline u64 paravirt_read_tsc(void)
217{ 178{
@@ -252,6 +213,8 @@ do { \
252 high = _l >> 32; \ 213 high = _l >> 32; \
253} while (0) 214} while (0)
254 215
216#define rdpmcl(counter, val) ((val) = paravirt_read_pmc(counter))
217
255static inline unsigned long long paravirt_rdtscp(unsigned int *aux) 218static inline unsigned long long paravirt_rdtscp(unsigned int *aux)
256{ 219{
257 return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux); 220 return PVOP_CALL1(u64, pv_cpu_ops.read_tscp, aux);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8e8b9a4987ee..8613cbb7ba41 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -153,9 +153,7 @@ struct pv_cpu_ops {
153 /* MSR, PMC and TSR operations. 153 /* MSR, PMC and TSR operations.
154 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 154 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */
155 u64 (*read_msr)(unsigned int msr, int *err); 155 u64 (*read_msr)(unsigned int msr, int *err);
156 int (*rdmsr_regs)(u32 *regs);
157 int (*write_msr)(unsigned int msr, unsigned low, unsigned high); 156 int (*write_msr)(unsigned int msr, unsigned low, unsigned high);
158 int (*wrmsr_regs)(u32 *regs);
159 157
160 u64 (*read_tsc)(void); 158 u64 (*read_tsc)(void);
161 u64 (*read_pmc)(int counter); 159 u64 (*read_pmc)(int counter);
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index b3a531746026..5ad24a89b19b 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -7,9 +7,13 @@
7#undef DEBUG 7#undef DEBUG
8 8
9#ifdef DEBUG 9#ifdef DEBUG
10#define DBG(x...) printk(x) 10#define DBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
11#else 11#else
12#define DBG(x...) 12#define DBG(fmt, ...) \
13do { \
14 if (0) \
15 printk(fmt, ##__VA_ARGS__); \
16} while (0)
13#endif 17#endif
14 18
15#define PCI_PROBE_BIOS 0x0001 19#define PCI_PROBE_BIOS 0x0001
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 588f52ea810e..c78f14a0df00 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -5,11 +5,10 @@
5 * Performance event hw details: 5 * Performance event hw details:
6 */ 6 */
7 7
8#define X86_PMC_MAX_GENERIC 32 8#define INTEL_PMC_MAX_GENERIC 32
9#define X86_PMC_MAX_FIXED 3 9#define INTEL_PMC_MAX_FIXED 3
10#define INTEL_PMC_IDX_FIXED 32
10 11
11#define X86_PMC_IDX_GENERIC 0
12#define X86_PMC_IDX_FIXED 32
13#define X86_PMC_IDX_MAX 64 12#define X86_PMC_IDX_MAX 64
14 13
15#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 14#define MSR_ARCH_PERFMON_PERFCTR0 0xc1
@@ -48,8 +47,7 @@
48 (X86_RAW_EVENT_MASK | \ 47 (X86_RAW_EVENT_MASK | \
49 AMD64_EVENTSEL_EVENT) 48 AMD64_EVENTSEL_EVENT)
50#define AMD64_NUM_COUNTERS 4 49#define AMD64_NUM_COUNTERS 4
51#define AMD64_NUM_COUNTERS_F15H 6 50#define AMD64_NUM_COUNTERS_CORE 6
52#define AMD64_NUM_COUNTERS_MAX AMD64_NUM_COUNTERS_F15H
53 51
54#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 52#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 53#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -121,16 +119,16 @@ struct x86_pmu_capability {
121 119
122/* Instr_Retired.Any: */ 120/* Instr_Retired.Any: */
123#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 121#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309
124#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) 122#define INTEL_PMC_IDX_FIXED_INSTRUCTIONS (INTEL_PMC_IDX_FIXED + 0)
125 123
126/* CPU_CLK_Unhalted.Core: */ 124/* CPU_CLK_Unhalted.Core: */
127#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a 125#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
128#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) 126#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
129 127
130/* CPU_CLK_Unhalted.Ref: */ 128/* CPU_CLK_Unhalted.Ref: */
131#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b 129#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
132#define X86_PMC_IDX_FIXED_REF_CYCLES (X86_PMC_IDX_FIXED + 2) 130#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
133#define X86_PMC_MSK_FIXED_REF_CYCLES (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES) 131#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
134 132
135/* 133/*
136 * We model BTS tracing as another fixed-mode PMC. 134 * We model BTS tracing as another fixed-mode PMC.
@@ -139,7 +137,7 @@ struct x86_pmu_capability {
139 * values are used by actual fixed events and higher values are used 137 * values are used by actual fixed events and higher values are used
140 * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. 138 * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
141 */ 139 */
142#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 140#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
143 141
144/* 142/*
145 * IBS cpuid feature detection 143 * IBS cpuid feature detection
@@ -234,6 +232,7 @@ struct perf_guest_switch_msr {
234 232
235extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr); 233extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
236extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); 234extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
235extern void perf_check_microcode(void);
237#else 236#else
238static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr) 237static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
239{ 238{
@@ -247,6 +246,7 @@ static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
247} 246}
248 247
249static inline void perf_events_lapic_init(void) { } 248static inline void perf_events_lapic_init(void) { }
249static inline void perf_check_microcode(void) { }
250#endif 250#endif
251 251
252#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 252#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 98391db840c6..f2b489cf1602 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -2,9 +2,9 @@
2#define _ASM_X86_PGTABLE_2LEVEL_H 2#define _ASM_X86_PGTABLE_2LEVEL_H
3 3
4#define pte_ERROR(e) \ 4#define pte_ERROR(e) \
5 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) 5 pr_err("%s:%d: bad pte %08lx\n", __FILE__, __LINE__, (e).pte_low)
6#define pgd_ERROR(e) \ 6#define pgd_ERROR(e) \
7 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 7 pr_err("%s:%d: bad pgd %08lx\n", __FILE__, __LINE__, pgd_val(e))
8 8
9/* 9/*
10 * Certain architectures need to do special things when PTEs 10 * Certain architectures need to do special things when PTEs
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index cb00ccc7d571..4cc9f2b7cdc3 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -9,13 +9,13 @@
9 */ 9 */
10 10
11#define pte_ERROR(e) \ 11#define pte_ERROR(e) \
12 printk("%s:%d: bad pte %p(%08lx%08lx).\n", \ 12 pr_err("%s:%d: bad pte %p(%08lx%08lx)\n", \
13 __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low) 13 __FILE__, __LINE__, &(e), (e).pte_high, (e).pte_low)
14#define pmd_ERROR(e) \ 14#define pmd_ERROR(e) \
15 printk("%s:%d: bad pmd %p(%016Lx).\n", \ 15 pr_err("%s:%d: bad pmd %p(%016Lx)\n", \
16 __FILE__, __LINE__, &(e), pmd_val(e)) 16 __FILE__, __LINE__, &(e), pmd_val(e))
17#define pgd_ERROR(e) \ 17#define pgd_ERROR(e) \
18 printk("%s:%d: bad pgd %p(%016Lx).\n", \ 18 pr_err("%s:%d: bad pgd %p(%016Lx)\n", \
19 __FILE__, __LINE__, &(e), pgd_val(e)) 19 __FILE__, __LINE__, &(e), pgd_val(e))
20 20
21/* Rules for using set_pte: the pte being assigned *must* be 21/* Rules for using set_pte: the pte being assigned *must* be
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 975f709e09ae..8251be02301e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -26,16 +26,16 @@ extern pgd_t init_level4_pgt[];
26extern void paging_init(void); 26extern void paging_init(void);
27 27
28#define pte_ERROR(e) \ 28#define pte_ERROR(e) \
29 printk("%s:%d: bad pte %p(%016lx).\n", \ 29 pr_err("%s:%d: bad pte %p(%016lx)\n", \
30 __FILE__, __LINE__, &(e), pte_val(e)) 30 __FILE__, __LINE__, &(e), pte_val(e))
31#define pmd_ERROR(e) \ 31#define pmd_ERROR(e) \
32 printk("%s:%d: bad pmd %p(%016lx).\n", \ 32 pr_err("%s:%d: bad pmd %p(%016lx)\n", \
33 __FILE__, __LINE__, &(e), pmd_val(e)) 33 __FILE__, __LINE__, &(e), pmd_val(e))
34#define pud_ERROR(e) \ 34#define pud_ERROR(e) \
35 printk("%s:%d: bad pud %p(%016lx).\n", \ 35 pr_err("%s:%d: bad pud %p(%016lx)\n", \
36 __FILE__, __LINE__, &(e), pud_val(e)) 36 __FILE__, __LINE__, &(e), pud_val(e))
37#define pgd_ERROR(e) \ 37#define pgd_ERROR(e) \
38 printk("%s:%d: bad pgd %p(%016lx).\n", \ 38 pr_err("%s:%d: bad pgd %p(%016lx)\n", \
39 __FILE__, __LINE__, &(e), pgd_val(e)) 39 __FILE__, __LINE__, &(e), pgd_val(e))
40 40
41struct mm_struct; 41struct mm_struct;
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index fce3f4ae5bd6..fe1ec5bcd846 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -21,8 +21,9 @@ struct real_mode_header {
21 u32 wakeup_header; 21 u32 wakeup_header;
22#endif 22#endif
23 /* APM/BIOS reboot */ 23 /* APM/BIOS reboot */
24#ifdef CONFIG_X86_32
25 u32 machine_real_restart_asm; 24 u32 machine_real_restart_asm;
25#ifdef CONFIG_X86_64
26 u32 machine_real_restart_seg;
26#endif 27#endif
27}; 28};
28 29
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 92f297069e87..a82c4f1b4d83 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -18,8 +18,8 @@ extern struct machine_ops machine_ops;
18 18
19void native_machine_crash_shutdown(struct pt_regs *regs); 19void native_machine_crash_shutdown(struct pt_regs *regs);
20void native_machine_shutdown(void); 20void native_machine_shutdown(void);
21void machine_real_restart(unsigned int type); 21void __noreturn machine_real_restart(unsigned int type);
22/* These must match dispatch_table in reboot_32.S */ 22/* These must match dispatch in arch/x86/realmore/rm/reboot.S */
23#define MRR_BIOS 0 23#define MRR_BIOS 0
24#define MRR_APM 1 24#define MRR_APM 1
25 25
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f48394513c37..2ffa95dc2333 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -169,11 +169,6 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
169void smp_store_cpu_info(int id); 169void smp_store_cpu_info(int id);
170#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 170#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
171 171
172/* We don't mark CPUs online until __cpu_up(), so we need another measure */
173static inline int num_booting_cpus(void)
174{
175 return cpumask_weight(cpu_callout_mask);
176}
177#else /* !CONFIG_SMP */ 172#else /* !CONFIG_SMP */
178#define wbinvd_on_cpu(cpu) wbinvd() 173#define wbinvd_on_cpu(cpu) wbinvd()
179static inline int wbinvd_on_all_cpus(void) 174static inline int wbinvd_on_all_cpus(void)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 8e796fbbf9c6..d8def8b3dba0 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -17,6 +17,8 @@
17 17
18/* Handles exceptions in both to and from, but doesn't do access_ok */ 18/* Handles exceptions in both to and from, but doesn't do access_ok */
19__must_check unsigned long 19__must_check unsigned long
20copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
21__must_check unsigned long
20copy_user_generic_string(void *to, const void *from, unsigned len); 22copy_user_generic_string(void *to, const void *from, unsigned len);
21__must_check unsigned long 23__must_check unsigned long
22copy_user_generic_unrolled(void *to, const void *from, unsigned len); 24copy_user_generic_unrolled(void *to, const void *from, unsigned len);
@@ -26,9 +28,16 @@ copy_user_generic(void *to, const void *from, unsigned len)
26{ 28{
27 unsigned ret; 29 unsigned ret;
28 30
29 alternative_call(copy_user_generic_unrolled, 31 /*
32 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
33 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
34 * Otherwise, use copy_user_generic_unrolled.
35 */
36 alternative_call_2(copy_user_generic_unrolled,
30 copy_user_generic_string, 37 copy_user_generic_string,
31 X86_FEATURE_REP_GOOD, 38 X86_FEATURE_REP_GOOD,
39 copy_user_enhanced_fast_string,
40 X86_FEATURE_ERMS,
32 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from), 41 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
33 "=d" (len)), 42 "=d" (len)),
34 "1" (to), "2" (from), "3" (len) 43 "1" (to), "2" (from), "3" (len)
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index 1e9bed14f7ae..f3971bbcd1de 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -48,7 +48,7 @@ struct arch_uprobe_task {
48#endif 48#endif
49}; 49};
50 50
51extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); 51extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
52extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); 52extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
53extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); 53extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
54extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); 54extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 6149b476d9df..a06983cdc125 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -140,6 +140,9 @@
140#define IPI_RESET_LIMIT 1 140#define IPI_RESET_LIMIT 1
141/* after this # consecutive successes, bump up the throttle if it was lowered */ 141/* after this # consecutive successes, bump up the throttle if it was lowered */
142#define COMPLETE_THRESHOLD 5 142#define COMPLETE_THRESHOLD 5
143/* after this # of giveups (fall back to kernel IPI's) disable the use of
144 the BAU for a period of time */
145#define GIVEUP_LIMIT 100
143 146
144#define UV_LB_SUBNODEID 0x10 147#define UV_LB_SUBNODEID 0x10
145 148
@@ -166,7 +169,6 @@
166#define FLUSH_RETRY_TIMEOUT 2 169#define FLUSH_RETRY_TIMEOUT 2
167#define FLUSH_GIVEUP 3 170#define FLUSH_GIVEUP 3
168#define FLUSH_COMPLETE 4 171#define FLUSH_COMPLETE 4
169#define FLUSH_RETRY_BUSYBUG 5
170 172
171/* 173/*
172 * tuning the action when the numalink network is extremely delayed 174 * tuning the action when the numalink network is extremely delayed
@@ -175,7 +177,7 @@
175 microseconds */ 177 microseconds */
176#define CONGESTED_REPS 10 /* long delays averaged over 178#define CONGESTED_REPS 10 /* long delays averaged over
177 this many broadcasts */ 179 this many broadcasts */
178#define CONGESTED_PERIOD 30 /* time for the bau to be 180#define DISABLED_PERIOD 10 /* time for the bau to be
179 disabled, in seconds */ 181 disabled, in seconds */
180/* see msg_type: */ 182/* see msg_type: */
181#define MSG_NOOP 0 183#define MSG_NOOP 0
@@ -520,6 +522,12 @@ struct ptc_stats {
520 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */ 522 unsigned long s_uv2_wars; /* uv2 workaround, perm. busy */
521 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */ 523 unsigned long s_uv2_wars_hw; /* uv2 workaround, hiwater */
522 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */ 524 unsigned long s_uv2_war_waits; /* uv2 workaround, long waits */
525 unsigned long s_overipilimit; /* over the ipi reset limit */
526 unsigned long s_giveuplimit; /* disables, over giveup limit*/
527 unsigned long s_enters; /* entries to the driver */
528 unsigned long s_ipifordisabled; /* fall back to IPI; disabled */
529 unsigned long s_plugged; /* plugged by h/w bug*/
530 unsigned long s_congested; /* giveup on long wait */
523 /* destination statistics */ 531 /* destination statistics */
524 unsigned long d_alltlb; /* times all tlb's on this 532 unsigned long d_alltlb; /* times all tlb's on this
525 cpu were flushed */ 533 cpu were flushed */
@@ -586,8 +594,8 @@ struct bau_control {
586 int timeout_tries; 594 int timeout_tries;
587 int ipi_attempts; 595 int ipi_attempts;
588 int conseccompletes; 596 int conseccompletes;
589 int baudisabled; 597 short nobau;
590 int set_bau_off; 598 short baudisabled;
591 short cpu; 599 short cpu;
592 short osnode; 600 short osnode;
593 short uvhub_cpu; 601 short uvhub_cpu;
@@ -596,14 +604,16 @@ struct bau_control {
596 short cpus_in_socket; 604 short cpus_in_socket;
597 short cpus_in_uvhub; 605 short cpus_in_uvhub;
598 short partition_base_pnode; 606 short partition_base_pnode;
599 short using_desc; /* an index, like uvhub_cpu */ 607 short busy; /* all were busy (war) */
600 unsigned int inuse_map;
601 unsigned short message_number; 608 unsigned short message_number;
602 unsigned short uvhub_quiesce; 609 unsigned short uvhub_quiesce;
603 short socket_acknowledge_count[DEST_Q_SIZE]; 610 short socket_acknowledge_count[DEST_Q_SIZE];
604 cycles_t send_message; 611 cycles_t send_message;
612 cycles_t period_end;
613 cycles_t period_time;
605 spinlock_t uvhub_lock; 614 spinlock_t uvhub_lock;
606 spinlock_t queue_lock; 615 spinlock_t queue_lock;
616 spinlock_t disable_lock;
607 /* tunables */ 617 /* tunables */
608 int max_concurr; 618 int max_concurr;
609 int max_concurr_const; 619 int max_concurr_const;
@@ -614,9 +624,9 @@ struct bau_control {
614 int complete_threshold; 624 int complete_threshold;
615 int cong_response_us; 625 int cong_response_us;
616 int cong_reps; 626 int cong_reps;
617 int cong_period; 627 cycles_t disabled_period;
618 unsigned long clocks_per_100_usec; 628 int period_giveups;
619 cycles_t period_time; 629 int giveup_limit;
620 long period_requests; 630 long period_requests;
621 struct hub_and_pnode *thp; 631 struct hub_and_pnode *thp;
622}; 632};
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 92e54abf89e0..f90f0a587c66 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -9,15 +9,6 @@
9#include <asm/ipi.h> 9#include <asm/ipi.h>
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11 11
12/*
13 * Need to use more than cpu 0, because we need more vectors
14 * when MSI-X are used.
15 */
16static const struct cpumask *x2apic_target_cpus(void)
17{
18 return cpu_online_mask;
19}
20
21static int x2apic_apic_id_valid(int apicid) 12static int x2apic_apic_id_valid(int apicid)
22{ 13{
23 return 1; 14 return 1;
@@ -28,15 +19,6 @@ static int x2apic_apic_id_registered(void)
28 return 1; 19 return 1;
29} 20}
30 21
31/*
32 * For now each logical cpu is in its own vector allocation domain.
33 */
34static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
35{
36 cpumask_clear(retmask);
37 cpumask_set_cpu(cpu, retmask);
38}
39
40static void 22static void
41__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) 23__x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
42{ 24{
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index c090af10ac7d..38155f667144 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -156,7 +156,6 @@ struct x86_cpuinit_ops {
156/** 156/**
157 * struct x86_platform_ops - platform specific runtime functions 157 * struct x86_platform_ops - platform specific runtime functions
158 * @calibrate_tsc: calibrate TSC 158 * @calibrate_tsc: calibrate TSC
159 * @wallclock_init: init the wallclock device
160 * @get_wallclock: get time from HW clock like RTC etc. 159 * @get_wallclock: get time from HW clock like RTC etc.
161 * @set_wallclock: set time back to HW clock 160 * @set_wallclock: set time back to HW clock
162 * @is_untracked_pat_range exclude from PAT logic 161 * @is_untracked_pat_range exclude from PAT logic
@@ -164,10 +163,10 @@ struct x86_cpuinit_ops {
164 * @i8042_detect pre-detect if i8042 controller exists 163 * @i8042_detect pre-detect if i8042 controller exists
165 * @save_sched_clock_state: save state for sched_clock() on suspend 164 * @save_sched_clock_state: save state for sched_clock() on suspend
166 * @restore_sched_clock_state: restore state for sched_clock() on resume 165 * @restore_sched_clock_state: restore state for sched_clock() on resume
166 * @apic_post_init: adjust apic if neeeded
167 */ 167 */
168struct x86_platform_ops { 168struct x86_platform_ops {
169 unsigned long (*calibrate_tsc)(void); 169 unsigned long (*calibrate_tsc)(void);
170 void (*wallclock_init)(void);
171 unsigned long (*get_wallclock)(void); 170 unsigned long (*get_wallclock)(void);
172 int (*set_wallclock)(unsigned long nowtime); 171 int (*set_wallclock)(unsigned long nowtime);
173 void (*iommu_shutdown)(void); 172 void (*iommu_shutdown)(void);
@@ -177,6 +176,7 @@ struct x86_platform_ops {
177 int (*i8042_detect)(void); 176 int (*i8042_detect)(void);
178 void (*save_sched_clock_state)(void); 177 void (*save_sched_clock_state)(void);
179 void (*restore_sched_clock_state)(void); 178 void (*restore_sched_clock_state)(void);
179 void (*apic_post_init)(void);
180}; 180};
181 181
182struct pci_dev; 182struct pci_dev;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73ef56c5a8b3..afb7ff79a29f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) "SMP alternatives: " fmt
2
1#include <linux/module.h> 3#include <linux/module.h>
2#include <linux/sched.h> 4#include <linux/sched.h>
3#include <linux/mutex.h> 5#include <linux/mutex.h>
@@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)
63__setup("noreplace-paravirt", setup_noreplace_paravirt); 65__setup("noreplace-paravirt", setup_noreplace_paravirt);
64#endif 66#endif
65 67
66#define DPRINTK(fmt, args...) if (debug_alternative) \ 68#define DPRINTK(fmt, ...) \
67 printk(KERN_DEBUG fmt, args) 69do { \
70 if (debug_alternative) \
71 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
72} while (0)
68 73
69/* 74/*
70 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes 75 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
@@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)
428 * If this still occurs then you should see a hang 433 * If this still occurs then you should see a hang
429 * or crash shortly after this line: 434 * or crash shortly after this line:
430 */ 435 */
431 printk("lockdep: fixing up alternatives.\n"); 436 pr_info("lockdep: fixing up alternatives\n");
432#endif 437#endif
433 438
434 if (noreplace_smp || smp_alt_once || skip_smp_alternatives) 439 if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
@@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)
444 if (smp == smp_mode) { 449 if (smp == smp_mode) {
445 /* nothing */ 450 /* nothing */
446 } else if (smp) { 451 } else if (smp) {
447 printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); 452 pr_info("switching to SMP code\n");
448 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 453 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
449 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 454 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
450 list_for_each_entry(mod, &smp_alt_modules, next) 455 list_for_each_entry(mod, &smp_alt_modules, next)
451 alternatives_smp_lock(mod->locks, mod->locks_end, 456 alternatives_smp_lock(mod->locks, mod->locks_end,
452 mod->text, mod->text_end); 457 mod->text, mod->text_end);
453 } else { 458 } else {
454 printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 459 pr_info("switching to UP code\n");
455 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 460 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
456 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 461 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
457 list_for_each_entry(mod, &smp_alt_modules, next) 462 list_for_each_entry(mod, &smp_alt_modules, next)
@@ -546,7 +551,7 @@ void __init alternative_instructions(void)
546#ifdef CONFIG_SMP 551#ifdef CONFIG_SMP
547 if (smp_alt_once) { 552 if (smp_alt_once) {
548 if (1 == num_possible_cpus()) { 553 if (1 == num_possible_cpus()) {
549 printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 554 pr_info("switching to UP code\n");
550 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 555 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
551 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 556 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
552 557
@@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data)
664 struct text_poke_param *p; 669 struct text_poke_param *p;
665 int i; 670 int i;
666 671
667 if (atomic_dec_and_test(&stop_machine_first)) { 672 if (atomic_xchg(&stop_machine_first, 0)) {
668 for (i = 0; i < tpp->nparams; i++) { 673 for (i = 0; i < tpp->nparams; i++) {
669 p = &tpp->params[i]; 674 p = &tpp->params[i];
670 text_poke(p->addr, p->opcode, p->len); 675 text_poke(p->addr, p->opcode, p->len);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index be16854591cc..f29f6dd6bc08 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -2,6 +2,9 @@
2 * Shared support code for AMD K8 northbridges and derivates. 2 * Shared support code for AMD K8 northbridges and derivates.
3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. 3 * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
4 */ 4 */
5
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
5#include <linux/types.h> 8#include <linux/types.h>
6#include <linux/slab.h> 9#include <linux/slab.h>
7#include <linux/init.h> 10#include <linux/init.h>
@@ -258,7 +261,7 @@ void amd_flush_garts(void)
258 } 261 }
259 spin_unlock_irqrestore(&gart_lock, flags); 262 spin_unlock_irqrestore(&gart_lock, flags);
260 if (!flushed) 263 if (!flushed)
261 printk("nothing to flush?\n"); 264 pr_notice("nothing to flush?\n");
262} 265}
263EXPORT_SYMBOL_GPL(amd_flush_garts); 266EXPORT_SYMBOL_GPL(amd_flush_garts);
264 267
@@ -269,11 +272,10 @@ static __init int init_amd_nbs(void)
269 err = amd_cache_northbridges(); 272 err = amd_cache_northbridges();
270 273
271 if (err < 0) 274 if (err < 0)
272 printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); 275 pr_notice("Cannot enumerate AMD northbridges\n");
273 276
274 if (amd_cache_gart() < 0) 277 if (amd_cache_gart() < 0)
275 printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " 278 pr_notice("Cannot initialize GART flush words, GART support disabled\n");
276 "GART support disabled.\n");
277 279
278 return err; 280 return err;
279} 281}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 39a222e094af..c421512ca5eb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2123,6 +2123,25 @@ void default_init_apic_ldr(void)
2123 apic_write(APIC_LDR, val); 2123 apic_write(APIC_LDR, val);
2124} 2124}
2125 2125
2126int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
2127 const struct cpumask *andmask,
2128 unsigned int *apicid)
2129{
2130 unsigned int cpu;
2131
2132 for_each_cpu_and(cpu, cpumask, andmask) {
2133 if (cpumask_test_cpu(cpu, cpu_online_mask))
2134 break;
2135 }
2136
2137 if (likely(cpu < nr_cpu_ids)) {
2138 *apicid = per_cpu(x86_cpu_to_apicid, cpu);
2139 return 0;
2140 }
2141
2142 return -EINVAL;
2143}
2144
2126/* 2145/*
2127 * Power management 2146 * Power management
2128 */ 2147 */
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 0e881c46e8c8..00c77cf78e9e 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
36 return 1; 36 return 1;
37} 37}
38 38
39static const struct cpumask *flat_target_cpus(void)
40{
41 return cpu_online_mask;
42}
43
44static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
45{
46 /* Careful. Some cpus do not strictly honor the set of cpus
47 * specified in the interrupt destination when using lowest
48 * priority interrupt delivery mode.
49 *
50 * In particular there was a hyperthreading cpu observed to
51 * deliver interrupts to the wrong hyperthread when only one
52 * hyperthread was specified in the interrupt desitination.
53 */
54 cpumask_clear(retmask);
55 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
56}
57
58/* 39/*
59 * Set up the logical destination ID. 40 * Set up the logical destination ID.
60 * 41 *
@@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
92} 73}
93 74
94static void 75static void
95 flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) 76flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
96{ 77{
97 unsigned long mask = cpumask_bits(cpumask)[0]; 78 unsigned long mask = cpumask_bits(cpumask)[0];
98 int cpu = smp_processor_id(); 79 int cpu = smp_processor_id();
@@ -186,7 +167,7 @@ static struct apic apic_flat = {
186 .irq_delivery_mode = dest_LowestPrio, 167 .irq_delivery_mode = dest_LowestPrio,
187 .irq_dest_mode = 1, /* logical */ 168 .irq_dest_mode = 1, /* logical */
188 169
189 .target_cpus = flat_target_cpus, 170 .target_cpus = online_target_cpus,
190 .disable_esr = 0, 171 .disable_esr = 0,
191 .dest_logical = APIC_DEST_LOGICAL, 172 .dest_logical = APIC_DEST_LOGICAL,
192 .check_apicid_used = NULL, 173 .check_apicid_used = NULL,
@@ -210,8 +191,7 @@ static struct apic apic_flat = {
210 .set_apic_id = set_apic_id, 191 .set_apic_id = set_apic_id,
211 .apic_id_mask = 0xFFu << 24, 192 .apic_id_mask = 0xFFu << 24,
212 193
213 .cpu_mask_to_apicid = default_cpu_mask_to_apicid, 194 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
214 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
215 195
216 .send_IPI_mask = flat_send_IPI_mask, 196 .send_IPI_mask = flat_send_IPI_mask,
217 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 197 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
@@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
262 return 0; 242 return 0;
263} 243}
264 244
265static const struct cpumask *physflat_target_cpus(void)
266{
267 return cpu_online_mask;
268}
269
270static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
271{
272 cpumask_clear(retmask);
273 cpumask_set_cpu(cpu, retmask);
274}
275
276static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) 245static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
277{ 246{
278 default_send_IPI_mask_sequence_phys(cpumask, vector); 247 default_send_IPI_mask_sequence_phys(cpumask, vector);
@@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector)
294 physflat_send_IPI_mask(cpu_online_mask, vector); 263 physflat_send_IPI_mask(cpu_online_mask, vector);
295} 264}
296 265
297static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
298{
299 int cpu;
300
301 /*
302 * We're using fixed IRQ delivery, can only return one phys APIC ID.
303 * May as well be the first.
304 */
305 cpu = cpumask_first(cpumask);
306 if ((unsigned)cpu < nr_cpu_ids)
307 return per_cpu(x86_cpu_to_apicid, cpu);
308 else
309 return BAD_APICID;
310}
311
312static unsigned int
313physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
314 const struct cpumask *andmask)
315{
316 int cpu;
317
318 /*
319 * We're using fixed IRQ delivery, can only return one phys APIC ID.
320 * May as well be the first.
321 */
322 for_each_cpu_and(cpu, cpumask, andmask) {
323 if (cpumask_test_cpu(cpu, cpu_online_mask))
324 break;
325 }
326 return per_cpu(x86_cpu_to_apicid, cpu);
327}
328
329static int physflat_probe(void) 266static int physflat_probe(void)
330{ 267{
331 if (apic == &apic_physflat || num_possible_cpus() > 8) 268 if (apic == &apic_physflat || num_possible_cpus() > 8)
@@ -345,13 +282,13 @@ static struct apic apic_physflat = {
345 .irq_delivery_mode = dest_Fixed, 282 .irq_delivery_mode = dest_Fixed,
346 .irq_dest_mode = 0, /* physical */ 283 .irq_dest_mode = 0, /* physical */
347 284
348 .target_cpus = physflat_target_cpus, 285 .target_cpus = online_target_cpus,
349 .disable_esr = 0, 286 .disable_esr = 0,
350 .dest_logical = 0, 287 .dest_logical = 0,
351 .check_apicid_used = NULL, 288 .check_apicid_used = NULL,
352 .check_apicid_present = NULL, 289 .check_apicid_present = NULL,
353 290
354 .vector_allocation_domain = physflat_vector_allocation_domain, 291 .vector_allocation_domain = default_vector_allocation_domain,
355 /* not needed, but shouldn't hurt: */ 292 /* not needed, but shouldn't hurt: */
356 .init_apic_ldr = flat_init_apic_ldr, 293 .init_apic_ldr = flat_init_apic_ldr,
357 294
@@ -370,8 +307,7 @@ static struct apic apic_physflat = {
370 .set_apic_id = set_apic_id, 307 .set_apic_id = set_apic_id,
371 .apic_id_mask = 0xFFu << 24, 308 .apic_id_mask = 0xFFu << 24,
372 309
373 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 310 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
374 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
375 311
376 .send_IPI_mask = physflat_send_IPI_mask, 312 .send_IPI_mask = physflat_send_IPI_mask,
377 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, 313 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index a6e4c6e06c08..e145f28b4099 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)
100 return physid_isset(bit, phys_cpu_present_map); 100 return physid_isset(bit, phys_cpu_present_map);
101} 101}
102 102
103static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) 103static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
104 const struct cpumask *mask)
104{ 105{
105 if (cpu != 0) 106 if (cpu != 0)
106 pr_warning("APIC: Vector allocated for non-BSP cpu\n"); 107 pr_warning("APIC: Vector allocated for non-BSP cpu\n");
107 cpumask_clear(retmask); 108 cpumask_copy(retmask, cpumask_of(cpu));
108 cpumask_set_cpu(cpu, retmask);
109} 109}
110 110
111static u32 noop_apic_read(u32 reg) 111static u32 noop_apic_read(u32 reg)
@@ -159,8 +159,7 @@ struct apic apic_noop = {
159 .set_apic_id = NULL, 159 .set_apic_id = NULL,
160 .apic_id_mask = 0x0F << 24, 160 .apic_id_mask = 0x0F << 24,
161 161
162 .cpu_mask_to_apicid = default_cpu_mask_to_apicid, 162 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
163 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
164 163
165 .send_IPI_mask = noop_send_IPI_mask, 164 .send_IPI_mask = noop_send_IPI_mask,
166 .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, 165 .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 6ec6d5d297c3..bc552cff2578 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
72 return initial_apic_id >> index_msb; 72 return initial_apic_id >> index_msb;
73} 73}
74 74
75static const struct cpumask *numachip_target_cpus(void)
76{
77 return cpu_online_mask;
78}
79
80static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
81{
82 cpumask_clear(retmask);
83 cpumask_set_cpu(cpu, retmask);
84}
85
86static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip) 75static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
87{ 76{
88 union numachip_csr_g3_ext_irq_gen int_gen; 77 union numachip_csr_g3_ext_irq_gen int_gen;
@@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector)
157 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 146 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
158} 147}
159 148
160static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
161{
162 int cpu;
163
164 /*
165 * We're using fixed IRQ delivery, can only return one phys APIC ID.
166 * May as well be the first.
167 */
168 cpu = cpumask_first(cpumask);
169 if (likely((unsigned)cpu < nr_cpu_ids))
170 return per_cpu(x86_cpu_to_apicid, cpu);
171
172 return BAD_APICID;
173}
174
175static unsigned int
176numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
177 const struct cpumask *andmask)
178{
179 int cpu;
180
181 /*
182 * We're using fixed IRQ delivery, can only return one phys APIC ID.
183 * May as well be the first.
184 */
185 for_each_cpu_and(cpu, cpumask, andmask) {
186 if (cpumask_test_cpu(cpu, cpu_online_mask))
187 break;
188 }
189 return per_cpu(x86_cpu_to_apicid, cpu);
190}
191
192static int __init numachip_probe(void) 149static int __init numachip_probe(void)
193{ 150{
194 return apic == &apic_numachip; 151 return apic == &apic_numachip;
@@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = {
253 .irq_delivery_mode = dest_Fixed, 210 .irq_delivery_mode = dest_Fixed,
254 .irq_dest_mode = 0, /* physical */ 211 .irq_dest_mode = 0, /* physical */
255 212
256 .target_cpus = numachip_target_cpus, 213 .target_cpus = online_target_cpus,
257 .disable_esr = 0, 214 .disable_esr = 0,
258 .dest_logical = 0, 215 .dest_logical = 0,
259 .check_apicid_used = NULL, 216 .check_apicid_used = NULL,
260 .check_apicid_present = NULL, 217 .check_apicid_present = NULL,
261 218
262 .vector_allocation_domain = numachip_vector_allocation_domain, 219 .vector_allocation_domain = default_vector_allocation_domain,
263 .init_apic_ldr = flat_init_apic_ldr, 220 .init_apic_ldr = flat_init_apic_ldr,
264 221
265 .ioapic_phys_id_map = NULL, 222 .ioapic_phys_id_map = NULL,
@@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = {
277 .set_apic_id = set_apic_id, 234 .set_apic_id = set_apic_id,
278 .apic_id_mask = 0xffU << 24, 235 .apic_id_mask = 0xffU << 24,
279 236
280 .cpu_mask_to_apicid = numachip_cpu_mask_to_apicid, 237 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
281 .cpu_mask_to_apicid_and = numachip_cpu_mask_to_apicid_and,
282 238
283 .send_IPI_mask = numachip_send_IPI_mask, 239 .send_IPI_mask = numachip_send_IPI_mask,
284 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself, 240 .send_IPI_mask_allbutself = numachip_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 31fbdbfbf960..d50e3640d5ae 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)
26 return 1; 26 return 1;
27} 27}
28 28
29static const struct cpumask *bigsmp_target_cpus(void)
30{
31#ifdef CONFIG_SMP
32 return cpu_online_mask;
33#else
34 return cpumask_of(0);
35#endif
36}
37
38static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) 29static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
39{ 30{
40 return 0; 31 return 0;
@@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
105 return 1; 96 return 1;
106} 97}
107 98
108/* As we are using single CPU as destination, pick only one CPU here */
109static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
110{
111 int cpu = cpumask_first(cpumask);
112
113 if (cpu < nr_cpu_ids)
114 return cpu_physical_id(cpu);
115 return BAD_APICID;
116}
117
118static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
119 const struct cpumask *andmask)
120{
121 int cpu;
122
123 /*
124 * We're using fixed IRQ delivery, can only return one phys APIC ID.
125 * May as well be the first.
126 */
127 for_each_cpu_and(cpu, cpumask, andmask) {
128 if (cpumask_test_cpu(cpu, cpu_online_mask))
129 return cpu_physical_id(cpu);
130 }
131 return BAD_APICID;
132}
133
134static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) 99static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
135{ 100{
136 return cpuid_apic >> index_msb; 101 return cpuid_apic >> index_msb;
@@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
177 { } /* NULL entry stops DMI scanning */ 142 { } /* NULL entry stops DMI scanning */
178}; 143};
179 144
180static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
181{
182 cpumask_clear(retmask);
183 cpumask_set_cpu(cpu, retmask);
184}
185
186static int probe_bigsmp(void) 145static int probe_bigsmp(void)
187{ 146{
188 if (def_to_bigsmp) 147 if (def_to_bigsmp)
@@ -205,13 +164,13 @@ static struct apic apic_bigsmp = {
205 /* phys delivery to target CPU: */ 164 /* phys delivery to target CPU: */
206 .irq_dest_mode = 0, 165 .irq_dest_mode = 0,
207 166
208 .target_cpus = bigsmp_target_cpus, 167 .target_cpus = default_target_cpus,
209 .disable_esr = 1, 168 .disable_esr = 1,
210 .dest_logical = 0, 169 .dest_logical = 0,
211 .check_apicid_used = bigsmp_check_apicid_used, 170 .check_apicid_used = bigsmp_check_apicid_used,
212 .check_apicid_present = bigsmp_check_apicid_present, 171 .check_apicid_present = bigsmp_check_apicid_present,
213 172
214 .vector_allocation_domain = bigsmp_vector_allocation_domain, 173 .vector_allocation_domain = default_vector_allocation_domain,
215 .init_apic_ldr = bigsmp_init_apic_ldr, 174 .init_apic_ldr = bigsmp_init_apic_ldr,
216 175
217 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, 176 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
@@ -229,8 +188,7 @@ static struct apic apic_bigsmp = {
229 .set_apic_id = NULL, 188 .set_apic_id = NULL,
230 .apic_id_mask = 0xFF << 24, 189 .apic_id_mask = 0xFF << 24,
231 190
232 .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
233 .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
234 192
235 .send_IPI_mask = bigsmp_send_IPI_mask, 193 .send_IPI_mask = bigsmp_send_IPI_mask,
236 .send_IPI_mask_allbutself = NULL, 194 .send_IPI_mask_allbutself = NULL,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index db4ab1be3c79..0874799a98c6 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)
394 WARN(1, "Command failed, status = %x\n", mip_status); 394 WARN(1, "Command failed, status = %x\n", mip_status);
395} 395}
396 396
397static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
398{
399 /* Careful. Some cpus do not strictly honor the set of cpus
400 * specified in the interrupt destination when using lowest
401 * priority interrupt delivery mode.
402 *
403 * In particular there was a hyperthreading cpu observed to
404 * deliver interrupts to the wrong hyperthread when only one
405 * hyperthread was specified in the interrupt desitination.
406 */
407 cpumask_clear(retmask);
408 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
409}
410
411
412static void es7000_wait_for_init_deassert(atomic_t *deassert) 397static void es7000_wait_for_init_deassert(atomic_t *deassert)
413{ 398{
414 while (!atomic_read(deassert)) 399 while (!atomic_read(deassert))
@@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
540 return 1; 525 return 1;
541} 526}
542 527
543static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) 528static inline int
529es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
544{ 530{
545 unsigned int round = 0; 531 unsigned int round = 0;
546 int cpu, uninitialized_var(apicid); 532 unsigned int cpu, uninitialized_var(apicid);
547 533
548 /* 534 /*
549 * The cpus in the mask must all be on the apic cluster. 535 * The cpus in the mask must all be on the apic cluster.
550 */ 536 */
551 for_each_cpu(cpu, cpumask) { 537 for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
552 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 538 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
553 539
554 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 540 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
555 WARN(1, "Not a valid mask!"); 541 WARN(1, "Not a valid mask!");
556 542
557 return BAD_APICID; 543 return -EINVAL;
558 } 544 }
559 apicid = new_apicid; 545 apicid |= new_apicid;
560 round++; 546 round++;
561 } 547 }
562 return apicid; 548 if (!round)
549 return -EINVAL;
550 *dest_id = apicid;
551 return 0;
563} 552}
564 553
565static unsigned int 554static int
566es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, 555es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
567 const struct cpumask *andmask) 556 const struct cpumask *andmask,
557 unsigned int *apicid)
568{ 558{
569 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
570 cpumask_var_t cpumask; 559 cpumask_var_t cpumask;
560 *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
571 561
572 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 562 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
573 return apicid; 563 return 0;
574 564
575 cpumask_and(cpumask, inmask, andmask); 565 cpumask_and(cpumask, inmask, andmask);
576 cpumask_and(cpumask, cpumask, cpu_online_mask); 566 es7000_cpu_mask_to_apicid(cpumask, apicid);
577 apicid = es7000_cpu_mask_to_apicid(cpumask);
578 567
579 free_cpumask_var(cpumask); 568 free_cpumask_var(cpumask);
580 569
581 return apicid; 570 return 0;
582} 571}
583 572
584static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) 573static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = {
638 .check_apicid_used = es7000_check_apicid_used, 627 .check_apicid_used = es7000_check_apicid_used,
639 .check_apicid_present = es7000_check_apicid_present, 628 .check_apicid_present = es7000_check_apicid_present,
640 629
641 .vector_allocation_domain = es7000_vector_allocation_domain, 630 .vector_allocation_domain = flat_vector_allocation_domain,
642 .init_apic_ldr = es7000_init_apic_ldr_cluster, 631 .init_apic_ldr = es7000_init_apic_ldr_cluster,
643 632
644 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 633 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
@@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = {
656 .set_apic_id = NULL, 645 .set_apic_id = NULL,
657 .apic_id_mask = 0xFF << 24, 646 .apic_id_mask = 0xFF << 24,
658 647
659 .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
660 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, 648 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
661 649
662 .send_IPI_mask = es7000_send_IPI_mask, 650 .send_IPI_mask = es7000_send_IPI_mask,
@@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = {
705 .check_apicid_used = es7000_check_apicid_used, 693 .check_apicid_used = es7000_check_apicid_used,
706 .check_apicid_present = es7000_check_apicid_present, 694 .check_apicid_present = es7000_check_apicid_present,
707 695
708 .vector_allocation_domain = es7000_vector_allocation_domain, 696 .vector_allocation_domain = flat_vector_allocation_domain,
709 .init_apic_ldr = es7000_init_apic_ldr, 697 .init_apic_ldr = es7000_init_apic_ldr,
710 698
711 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 699 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
@@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = {
723 .set_apic_id = NULL, 711 .set_apic_id = NULL,
724 .apic_id_mask = 0xFF << 24, 712 .apic_id_mask = 0xFF << 24,
725 713
726 .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
727 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, 714 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
728 715
729 .send_IPI_mask = es7000_send_IPI_mask, 716 .send_IPI_mask = es7000_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5f0ff597437c..406eee784684 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi
448 448
449 entry = alloc_irq_pin_list(node); 449 entry = alloc_irq_pin_list(node);
450 if (!entry) { 450 if (!entry) {
451 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", 451 pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
452 node, apic, pin); 452 node, apic, pin);
453 return -ENOMEM; 453 return -ENOMEM;
454 } 454 }
455 entry->apic = apic; 455 entry->apic = apic;
@@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
661 ioapic_mask_entry(apic, pin); 661 ioapic_mask_entry(apic, pin);
662 entry = ioapic_read_entry(apic, pin); 662 entry = ioapic_read_entry(apic, pin);
663 if (entry.irr) 663 if (entry.irr)
664 printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", 664 pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
665 mpc_ioapic_id(apic), pin); 665 mpc_ioapic_id(apic), pin);
666} 666}
667 667
@@ -895,7 +895,7 @@ static int irq_polarity(int idx)
895 } 895 }
896 case 2: /* reserved */ 896 case 2: /* reserved */
897 { 897 {
898 printk(KERN_WARNING "broken BIOS!!\n"); 898 pr_warn("broken BIOS!!\n");
899 polarity = 1; 899 polarity = 1;
900 break; 900 break;
901 } 901 }
@@ -906,7 +906,7 @@ static int irq_polarity(int idx)
906 } 906 }
907 default: /* invalid */ 907 default: /* invalid */
908 { 908 {
909 printk(KERN_WARNING "broken BIOS!!\n"); 909 pr_warn("broken BIOS!!\n");
910 polarity = 1; 910 polarity = 1;
911 break; 911 break;
912 } 912 }
@@ -948,7 +948,7 @@ static int irq_trigger(int idx)
948 } 948 }
949 default: 949 default:
950 { 950 {
951 printk(KERN_WARNING "broken BIOS!!\n"); 951 pr_warn("broken BIOS!!\n");
952 trigger = 1; 952 trigger = 1;
953 break; 953 break;
954 } 954 }
@@ -962,7 +962,7 @@ static int irq_trigger(int idx)
962 } 962 }
963 case 2: /* reserved */ 963 case 2: /* reserved */
964 { 964 {
965 printk(KERN_WARNING "broken BIOS!!\n"); 965 pr_warn("broken BIOS!!\n");
966 trigger = 1; 966 trigger = 1;
967 break; 967 break;
968 } 968 }
@@ -973,7 +973,7 @@ static int irq_trigger(int idx)
973 } 973 }
974 default: /* invalid */ 974 default: /* invalid */
975 { 975 {
976 printk(KERN_WARNING "broken BIOS!!\n"); 976 pr_warn("broken BIOS!!\n");
977 trigger = 0; 977 trigger = 0;
978 break; 978 break;
979 } 979 }
@@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)
991 * Debugging check, we are in big trouble if this message pops up! 991 * Debugging check, we are in big trouble if this message pops up!
992 */ 992 */
993 if (mp_irqs[idx].dstirq != pin) 993 if (mp_irqs[idx].dstirq != pin)
994 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 994 pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
995 995
996 if (test_bit(bus, mp_bus_not_pci)) { 996 if (test_bit(bus, mp_bus_not_pci)) {
997 irq = mp_irqs[idx].srcbusirq; 997 irq = mp_irqs[idx].srcbusirq;
@@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1112 * 0x80, because int 0x80 is hm, kind of importantish. ;) 1112 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1113 */ 1113 */
1114 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; 1114 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1115 static int current_offset = VECTOR_OFFSET_START % 8; 1115 static int current_offset = VECTOR_OFFSET_START % 16;
1116 unsigned int old_vector;
1117 int cpu, err; 1116 int cpu, err;
1118 cpumask_var_t tmp_mask; 1117 cpumask_var_t tmp_mask;
1119 1118
@@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1123 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) 1122 if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
1124 return -ENOMEM; 1123 return -ENOMEM;
1125 1124
1126 old_vector = cfg->vector;
1127 if (old_vector) {
1128 cpumask_and(tmp_mask, mask, cpu_online_mask);
1129 cpumask_and(tmp_mask, cfg->domain, tmp_mask);
1130 if (!cpumask_empty(tmp_mask)) {
1131 free_cpumask_var(tmp_mask);
1132 return 0;
1133 }
1134 }
1135
1136 /* Only try and allocate irqs on cpus that are present */ 1125 /* Only try and allocate irqs on cpus that are present */
1137 err = -ENOSPC; 1126 err = -ENOSPC;
1138 for_each_cpu_and(cpu, mask, cpu_online_mask) { 1127 cpumask_clear(cfg->old_domain);
1139 int new_cpu; 1128 cpu = cpumask_first_and(mask, cpu_online_mask);
1140 int vector, offset; 1129 while (cpu < nr_cpu_ids) {
1130 int new_cpu, vector, offset;
1141 1131
1142 apic->vector_allocation_domain(cpu, tmp_mask); 1132 apic->vector_allocation_domain(cpu, tmp_mask, mask);
1133
1134 if (cpumask_subset(tmp_mask, cfg->domain)) {
1135 err = 0;
1136 if (cpumask_equal(tmp_mask, cfg->domain))
1137 break;
1138 /*
1139 * New cpumask using the vector is a proper subset of
1140 * the current in use mask. So cleanup the vector
1141 * allocation for the members that are not used anymore.
1142 */
1143 cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
1144 cfg->move_in_progress = 1;
1145 cpumask_and(cfg->domain, cfg->domain, tmp_mask);
1146 break;
1147 }
1143 1148
1144 vector = current_vector; 1149 vector = current_vector;
1145 offset = current_offset; 1150 offset = current_offset;
1146next: 1151next:
1147 vector += 8; 1152 vector += 16;
1148 if (vector >= first_system_vector) { 1153 if (vector >= first_system_vector) {
1149 /* If out of vectors on large boxen, must share them. */ 1154 offset = (offset + 1) % 16;
1150 offset = (offset + 1) % 8;
1151 vector = FIRST_EXTERNAL_VECTOR + offset; 1155 vector = FIRST_EXTERNAL_VECTOR + offset;
1152 } 1156 }
1153 if (unlikely(current_vector == vector)) 1157
1158 if (unlikely(current_vector == vector)) {
1159 cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
1160 cpumask_andnot(tmp_mask, mask, cfg->old_domain);
1161 cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
1154 continue; 1162 continue;
1163 }
1155 1164
1156 if (test_bit(vector, used_vectors)) 1165 if (test_bit(vector, used_vectors))
1157 goto next; 1166 goto next;
@@ -1162,7 +1171,7 @@ next:
1162 /* Found one! */ 1171 /* Found one! */
1163 current_vector = vector; 1172 current_vector = vector;
1164 current_offset = offset; 1173 current_offset = offset;
1165 if (old_vector) { 1174 if (cfg->vector) {
1166 cfg->move_in_progress = 1; 1175 cfg->move_in_progress = 1;
1167 cpumask_copy(cfg->old_domain, cfg->domain); 1176 cpumask_copy(cfg->old_domain, cfg->domain);
1168 } 1177 }
@@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1346 1355
1347 if (!IO_APIC_IRQ(irq)) 1356 if (!IO_APIC_IRQ(irq))
1348 return; 1357 return;
1349 /*
1350 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1351 * controllers like 8259. Now that IO-APIC can handle this irq, update
1352 * the cfg->domain.
1353 */
1354 if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1355 apic->vector_allocation_domain(0, cfg->domain);
1356 1358
1357 if (assign_irq_vector(irq, cfg, apic->target_cpus())) 1359 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1358 return; 1360 return;
1359 1361
1360 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 1362 if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
1363 &dest)) {
1364 pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
1365 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1366 __clear_irq_vector(irq, cfg);
1367
1368 return;
1369 }
1361 1370
1362 apic_printk(APIC_VERBOSE,KERN_DEBUG 1371 apic_printk(APIC_VERBOSE,KERN_DEBUG
1363 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1372 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
@@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
1366 cfg->vector, irq, attr->trigger, attr->polarity, dest); 1375 cfg->vector, irq, attr->trigger, attr->polarity, dest);
1367 1376
1368 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { 1377 if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
1369 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1378 pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1370 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); 1379 mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
1371 __clear_irq_vector(irq, cfg); 1380 __clear_irq_vector(irq, cfg);
1372 1381
@@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1469 * Set up the timer pin, possibly with the 8259A-master behind. 1478 * Set up the timer pin, possibly with the 8259A-master behind.
1470 */ 1479 */
1471static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, 1480static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1472 unsigned int pin, int vector) 1481 unsigned int pin, int vector)
1473{ 1482{
1474 struct IO_APIC_route_entry entry; 1483 struct IO_APIC_route_entry entry;
1484 unsigned int dest;
1475 1485
1476 if (irq_remapping_enabled) 1486 if (irq_remapping_enabled)
1477 return; 1487 return;
@@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
1482 * We use logical delivery to get the timer IRQ 1492 * We use logical delivery to get the timer IRQ
1483 * to the first CPU. 1493 * to the first CPU.
1484 */ 1494 */
1495 if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
1496 apic->target_cpus(), &dest)))
1497 dest = BAD_APICID;
1498
1485 entry.dest_mode = apic->irq_dest_mode; 1499 entry.dest_mode = apic->irq_dest_mode;
1486 entry.mask = 0; /* don't mask IRQ for edge */ 1500 entry.mask = 0; /* don't mask IRQ for edge */
1487 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); 1501 entry.dest = dest;
1488 entry.delivery_mode = apic->irq_delivery_mode; 1502 entry.delivery_mode = apic->irq_delivery_mode;
1489 entry.polarity = 0; 1503 entry.polarity = 0;
1490 entry.trigger = 0; 1504 entry.trigger = 0;
@@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1521 reg_03.raw = io_apic_read(ioapic_idx, 3); 1535 reg_03.raw = io_apic_read(ioapic_idx, 3);
1522 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1536 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1523 1537
1524 printk("\n");
1525 printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); 1538 printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
1526 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1539 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1527 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1540 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
@@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1578 i, 1591 i,
1579 ir_entry->index 1592 ir_entry->index
1580 ); 1593 );
1581 printk("%1d %1d %1d %1d %1d " 1594 pr_cont("%1d %1d %1d %1d %1d "
1582 "%1d %1d %X %02X\n", 1595 "%1d %1d %X %02X\n",
1583 ir_entry->format, 1596 ir_entry->format,
1584 ir_entry->mask, 1597 ir_entry->mask,
@@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
1598 i, 1611 i,
1599 entry.dest 1612 entry.dest
1600 ); 1613 );
1601 printk("%1d %1d %1d %1d %1d " 1614 pr_cont("%1d %1d %1d %1d %1d "
1602 "%1d %1d %02X\n", 1615 "%1d %1d %02X\n",
1603 entry.mask, 1616 entry.mask,
1604 entry.trigger, 1617 entry.trigger,
@@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void)
1651 continue; 1664 continue;
1652 printk(KERN_DEBUG "IRQ%d ", irq); 1665 printk(KERN_DEBUG "IRQ%d ", irq);
1653 for_each_irq_pin(entry, cfg->irq_2_pin) 1666 for_each_irq_pin(entry, cfg->irq_2_pin)
1654 printk("-> %d:%d", entry->apic, entry->pin); 1667 pr_cont("-> %d:%d", entry->apic, entry->pin);
1655 printk("\n"); 1668 pr_cont("\n");
1656 } 1669 }
1657 1670
1658 printk(KERN_INFO ".................................... done.\n"); 1671 printk(KERN_INFO ".................................... done.\n");
@@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base)
1665 printk(KERN_DEBUG); 1678 printk(KERN_DEBUG);
1666 1679
1667 for (i = 0; i < 8; i++) 1680 for (i = 0; i < 8; i++)
1668 printk(KERN_CONT "%08x", apic_read(base + i*0x10)); 1681 pr_cont("%08x", apic_read(base + i*0x10));
1669 1682
1670 printk(KERN_CONT "\n"); 1683 pr_cont("\n");
1671} 1684}
1672 1685
1673__apicdebuginit(void) print_local_APIC(void *dummy) 1686__apicdebuginit(void) print_local_APIC(void *dummy)
@@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1769 printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); 1782 printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
1770 } 1783 }
1771 } 1784 }
1772 printk("\n"); 1785 pr_cont("\n");
1773} 1786}
1774 1787
1775__apicdebuginit(void) print_local_APICs(int maxcpu) 1788__apicdebuginit(void) print_local_APICs(int maxcpu)
@@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)
2065 reg_00.raw = io_apic_read(ioapic_idx, 0); 2078 reg_00.raw = io_apic_read(ioapic_idx, 0);
2066 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2079 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2067 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) 2080 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
2068 printk("could not set ID!\n"); 2081 pr_cont("could not set ID!\n");
2069 else 2082 else
2070 apic_printk(APIC_VERBOSE, " ok.\n"); 2083 apic_printk(APIC_VERBOSE, " ok.\n");
2071 } 2084 }
@@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)
2210 cfg->move_in_progress = 0; 2223 cfg->move_in_progress = 0;
2211} 2224}
2212 2225
2213static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2214{
2215 int apic, pin;
2216 struct irq_pin_list *entry;
2217 u8 vector = cfg->vector;
2218
2219 for_each_irq_pin(entry, cfg->irq_2_pin) {
2220 unsigned int reg;
2221
2222 apic = entry->apic;
2223 pin = entry->pin;
2224 /*
2225 * With interrupt-remapping, destination information comes
2226 * from interrupt-remapping table entry.
2227 */
2228 if (!irq_remapped(cfg))
2229 io_apic_write(apic, 0x11 + pin*2, dest);
2230 reg = io_apic_read(apic, 0x10 + pin*2);
2231 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2232 reg |= vector;
2233 io_apic_modify(apic, 0x10 + pin*2, reg);
2234 }
2235}
2236
2237/*
2238 * Either sets data->affinity to a valid value, and returns
2239 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2240 * leaves data->affinity untouched.
2241 */
2242int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2243 unsigned int *dest_id)
2244{
2245 struct irq_cfg *cfg = data->chip_data;
2246
2247 if (!cpumask_intersects(mask, cpu_online_mask))
2248 return -1;
2249
2250 if (assign_irq_vector(data->irq, data->chip_data, mask))
2251 return -1;
2252
2253 cpumask_copy(data->affinity, mask);
2254
2255 *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2256 return 0;
2257}
2258
2259static int
2260ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2261 bool force)
2262{
2263 unsigned int dest, irq = data->irq;
2264 unsigned long flags;
2265 int ret;
2266
2267 raw_spin_lock_irqsave(&ioapic_lock, flags);
2268 ret = __ioapic_set_affinity(data, mask, &dest);
2269 if (!ret) {
2270 /* Only the high 8 bits are valid. */
2271 dest = SET_APIC_LOGICAL_ID(dest);
2272 __target_IO_APIC_irq(irq, dest, data->chip_data);
2273 }
2274 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2275 return ret;
2276}
2277
2278asmlinkage void smp_irq_move_cleanup_interrupt(void) 2226asmlinkage void smp_irq_move_cleanup_interrupt(void)
2279{ 2227{
2280 unsigned vector, me; 2228 unsigned vector, me;
@@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq)
2362static inline void irq_complete_move(struct irq_cfg *cfg) { } 2310static inline void irq_complete_move(struct irq_cfg *cfg) { }
2363#endif 2311#endif
2364 2312
2313static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2314{
2315 int apic, pin;
2316 struct irq_pin_list *entry;
2317 u8 vector = cfg->vector;
2318
2319 for_each_irq_pin(entry, cfg->irq_2_pin) {
2320 unsigned int reg;
2321
2322 apic = entry->apic;
2323 pin = entry->pin;
2324 /*
2325 * With interrupt-remapping, destination information comes
2326 * from interrupt-remapping table entry.
2327 */
2328 if (!irq_remapped(cfg))
2329 io_apic_write(apic, 0x11 + pin*2, dest);
2330 reg = io_apic_read(apic, 0x10 + pin*2);
2331 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2332 reg |= vector;
2333 io_apic_modify(apic, 0x10 + pin*2, reg);
2334 }
2335}
2336
2337/*
2338 * Either sets data->affinity to a valid value, and returns
2339 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2340 * leaves data->affinity untouched.
2341 */
2342int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2343 unsigned int *dest_id)
2344{
2345 struct irq_cfg *cfg = data->chip_data;
2346 unsigned int irq = data->irq;
2347 int err;
2348
2349 if (!config_enabled(CONFIG_SMP))
2350 return -1;
2351
2352 if (!cpumask_intersects(mask, cpu_online_mask))
2353 return -EINVAL;
2354
2355 err = assign_irq_vector(irq, cfg, mask);
2356 if (err)
2357 return err;
2358
2359 err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
2360 if (err) {
2361 if (assign_irq_vector(irq, cfg, data->affinity))
2362 pr_err("Failed to recover vector for irq %d\n", irq);
2363 return err;
2364 }
2365
2366 cpumask_copy(data->affinity, mask);
2367
2368 return 0;
2369}
2370
2371static int
2372ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2373 bool force)
2374{
2375 unsigned int dest, irq = data->irq;
2376 unsigned long flags;
2377 int ret;
2378
2379 if (!config_enabled(CONFIG_SMP))
2380 return -1;
2381
2382 raw_spin_lock_irqsave(&ioapic_lock, flags);
2383 ret = __ioapic_set_affinity(data, mask, &dest);
2384 if (!ret) {
2385 /* Only the high 8 bits are valid. */
2386 dest = SET_APIC_LOGICAL_ID(dest);
2387 __target_IO_APIC_irq(irq, dest, data->chip_data);
2388 ret = IRQ_SET_MASK_OK_NOCOPY;
2389 }
2390 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2391 return ret;
2392}
2393
2365static void ack_apic_edge(struct irq_data *data) 2394static void ack_apic_edge(struct irq_data *data)
2366{ 2395{
2367 irq_complete_move(data->chip_data); 2396 irq_complete_move(data->chip_data);
@@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
2541 chip->irq_ack = ir_ack_apic_edge; 2570 chip->irq_ack = ir_ack_apic_edge;
2542 chip->irq_eoi = ir_ack_apic_level; 2571 chip->irq_eoi = ir_ack_apic_level;
2543 2572
2544#ifdef CONFIG_SMP
2545 chip->irq_set_affinity = set_remapped_irq_affinity; 2573 chip->irq_set_affinity = set_remapped_irq_affinity;
2546#endif
2547} 2574}
2548#endif /* CONFIG_IRQ_REMAP */ 2575#endif /* CONFIG_IRQ_REMAP */
2549 2576
@@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
2554 .irq_unmask = unmask_ioapic_irq, 2581 .irq_unmask = unmask_ioapic_irq,
2555 .irq_ack = ack_apic_edge, 2582 .irq_ack = ack_apic_edge,
2556 .irq_eoi = ack_apic_level, 2583 .irq_eoi = ack_apic_level,
2557#ifdef CONFIG_SMP
2558 .irq_set_affinity = ioapic_set_affinity, 2584 .irq_set_affinity = ioapic_set_affinity,
2559#endif
2560 .irq_retrigger = ioapic_retrigger_irq, 2585 .irq_retrigger = ioapic_retrigger_irq,
2561}; 2586};
2562 2587
@@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3038 if (err) 3063 if (err)
3039 return err; 3064 return err;
3040 3065
3041 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3066 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3067 apic->target_cpus(), &dest);
3068 if (err)
3069 return err;
3042 3070
3043 if (irq_remapped(cfg)) { 3071 if (irq_remapped(cfg)) {
3044 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); 3072 compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3072 return err; 3100 return err;
3073} 3101}
3074 3102
3075#ifdef CONFIG_SMP
3076static int 3103static int
3077msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) 3104msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3078{ 3105{
@@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3092 3119
3093 __write_msi_msg(data->msi_desc, &msg); 3120 __write_msi_msg(data->msi_desc, &msg);
3094 3121
3095 return 0; 3122 return IRQ_SET_MASK_OK_NOCOPY;
3096} 3123}
3097#endif /* CONFIG_SMP */
3098 3124
3099/* 3125/*
3100 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, 3126 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
@@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = {
3105 .irq_unmask = unmask_msi_irq, 3131 .irq_unmask = unmask_msi_irq,
3106 .irq_mask = mask_msi_irq, 3132 .irq_mask = mask_msi_irq,
3107 .irq_ack = ack_apic_edge, 3133 .irq_ack = ack_apic_edge,
3108#ifdef CONFIG_SMP
3109 .irq_set_affinity = msi_set_affinity, 3134 .irq_set_affinity = msi_set_affinity,
3110#endif
3111 .irq_retrigger = ioapic_retrigger_irq, 3135 .irq_retrigger = ioapic_retrigger_irq,
3112}; 3136};
3113 3137
@@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq)
3192} 3216}
3193 3217
3194#ifdef CONFIG_DMAR_TABLE 3218#ifdef CONFIG_DMAR_TABLE
3195#ifdef CONFIG_SMP
3196static int 3219static int
3197dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, 3220dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3198 bool force) 3221 bool force)
@@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3214 3237
3215 dmar_msi_write(irq, &msg); 3238 dmar_msi_write(irq, &msg);
3216 3239
3217 return 0; 3240 return IRQ_SET_MASK_OK_NOCOPY;
3218} 3241}
3219 3242
3220#endif /* CONFIG_SMP */
3221
3222static struct irq_chip dmar_msi_type = { 3243static struct irq_chip dmar_msi_type = {
3223 .name = "DMAR_MSI", 3244 .name = "DMAR_MSI",
3224 .irq_unmask = dmar_msi_unmask, 3245 .irq_unmask = dmar_msi_unmask,
3225 .irq_mask = dmar_msi_mask, 3246 .irq_mask = dmar_msi_mask,
3226 .irq_ack = ack_apic_edge, 3247 .irq_ack = ack_apic_edge,
3227#ifdef CONFIG_SMP
3228 .irq_set_affinity = dmar_msi_set_affinity, 3248 .irq_set_affinity = dmar_msi_set_affinity,
3229#endif
3230 .irq_retrigger = ioapic_retrigger_irq, 3249 .irq_retrigger = ioapic_retrigger_irq,
3231}; 3250};
3232 3251
@@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq)
3247 3266
3248#ifdef CONFIG_HPET_TIMER 3267#ifdef CONFIG_HPET_TIMER
3249 3268
3250#ifdef CONFIG_SMP
3251static int hpet_msi_set_affinity(struct irq_data *data, 3269static int hpet_msi_set_affinity(struct irq_data *data,
3252 const struct cpumask *mask, bool force) 3270 const struct cpumask *mask, bool force)
3253{ 3271{
@@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data,
3267 3285
3268 hpet_msi_write(data->handler_data, &msg); 3286 hpet_msi_write(data->handler_data, &msg);
3269 3287
3270 return 0; 3288 return IRQ_SET_MASK_OK_NOCOPY;
3271} 3289}
3272 3290
3273#endif /* CONFIG_SMP */
3274
3275static struct irq_chip hpet_msi_type = { 3291static struct irq_chip hpet_msi_type = {
3276 .name = "HPET_MSI", 3292 .name = "HPET_MSI",
3277 .irq_unmask = hpet_msi_unmask, 3293 .irq_unmask = hpet_msi_unmask,
3278 .irq_mask = hpet_msi_mask, 3294 .irq_mask = hpet_msi_mask,
3279 .irq_ack = ack_apic_edge, 3295 .irq_ack = ack_apic_edge,
3280#ifdef CONFIG_SMP
3281 .irq_set_affinity = hpet_msi_set_affinity, 3296 .irq_set_affinity = hpet_msi_set_affinity,
3282#endif
3283 .irq_retrigger = ioapic_retrigger_irq, 3297 .irq_retrigger = ioapic_retrigger_irq,
3284}; 3298};
3285 3299
@@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3314 */ 3328 */
3315#ifdef CONFIG_HT_IRQ 3329#ifdef CONFIG_HT_IRQ
3316 3330
3317#ifdef CONFIG_SMP
3318
3319static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) 3331static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3320{ 3332{
3321 struct ht_irq_msg msg; 3333 struct ht_irq_msg msg;
@@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3340 return -1; 3352 return -1;
3341 3353
3342 target_ht_irq(data->irq, dest, cfg->vector); 3354 target_ht_irq(data->irq, dest, cfg->vector);
3343 return 0; 3355 return IRQ_SET_MASK_OK_NOCOPY;
3344} 3356}
3345 3357
3346#endif
3347
3348static struct irq_chip ht_irq_chip = { 3358static struct irq_chip ht_irq_chip = {
3349 .name = "PCI-HT", 3359 .name = "PCI-HT",
3350 .irq_mask = mask_ht_irq, 3360 .irq_mask = mask_ht_irq,
3351 .irq_unmask = unmask_ht_irq, 3361 .irq_unmask = unmask_ht_irq,
3352 .irq_ack = ack_apic_edge, 3362 .irq_ack = ack_apic_edge,
3353#ifdef CONFIG_SMP
3354 .irq_set_affinity = ht_set_affinity, 3363 .irq_set_affinity = ht_set_affinity,
3355#endif
3356 .irq_retrigger = ioapic_retrigger_irq, 3364 .irq_retrigger = ioapic_retrigger_irq,
3357}; 3365};
3358 3366
3359int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3367int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3360{ 3368{
3361 struct irq_cfg *cfg; 3369 struct irq_cfg *cfg;
3370 struct ht_irq_msg msg;
3371 unsigned dest;
3362 int err; 3372 int err;
3363 3373
3364 if (disable_apic) 3374 if (disable_apic)
@@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3366 3376
3367 cfg = irq_cfg(irq); 3377 cfg = irq_cfg(irq);
3368 err = assign_irq_vector(irq, cfg, apic->target_cpus()); 3378 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3369 if (!err) { 3379 if (err)
3370 struct ht_irq_msg msg; 3380 return err;
3371 unsigned dest; 3381
3382 err = apic->cpu_mask_to_apicid_and(cfg->domain,
3383 apic->target_cpus(), &dest);
3384 if (err)
3385 return err;
3372 3386
3373 dest = apic->cpu_mask_to_apicid_and(cfg->domain, 3387 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3374 apic->target_cpus());
3375 3388
3376 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3389 msg.address_lo =
3390 HT_IRQ_LOW_BASE |
3391 HT_IRQ_LOW_DEST_ID(dest) |
3392 HT_IRQ_LOW_VECTOR(cfg->vector) |
3393 ((apic->irq_dest_mode == 0) ?
3394 HT_IRQ_LOW_DM_PHYSICAL :
3395 HT_IRQ_LOW_DM_LOGICAL) |
3396 HT_IRQ_LOW_RQEOI_EDGE |
3397 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3398 HT_IRQ_LOW_MT_FIXED :
3399 HT_IRQ_LOW_MT_ARBITRATED) |
3400 HT_IRQ_LOW_IRQ_MASKED;
3377 3401
3378 msg.address_lo = 3402 write_ht_irq_msg(irq, &msg);
3379 HT_IRQ_LOW_BASE |
3380 HT_IRQ_LOW_DEST_ID(dest) |
3381 HT_IRQ_LOW_VECTOR(cfg->vector) |
3382 ((apic->irq_dest_mode == 0) ?
3383 HT_IRQ_LOW_DM_PHYSICAL :
3384 HT_IRQ_LOW_DM_LOGICAL) |
3385 HT_IRQ_LOW_RQEOI_EDGE |
3386 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3387 HT_IRQ_LOW_MT_FIXED :
3388 HT_IRQ_LOW_MT_ARBITRATED) |
3389 HT_IRQ_LOW_IRQ_MASKED;
3390 3403
3391 write_ht_irq_msg(irq, &msg); 3404 irq_set_chip_and_handler_name(irq, &ht_irq_chip,
3405 handle_edge_irq, "edge");
3392 3406
3393 irq_set_chip_and_handler_name(irq, &ht_irq_chip, 3407 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
3394 handle_edge_irq, "edge");
3395 3408
3396 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); 3409 return 0;
3397 }
3398 return err;
3399} 3410}
3400#endif /* CONFIG_HT_IRQ */ 3411#endif /* CONFIG_HT_IRQ */
3401 3412
@@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)
3563 3574
3564 /* Sanity check */ 3575 /* Sanity check */
3565 if (reg_00.bits.ID != apic_id) { 3576 if (reg_00.bits.ID != apic_id) {
3566 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); 3577 pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
3578 ioapic);
3567 return -1; 3579 return -1;
3568 } 3580 }
3569 } 3581 }
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index f00a68cca37a..d661ee95cabf 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)
406 * We use physical apicids here, not logical, so just return the default 406 * We use physical apicids here, not logical, so just return the default
407 * physical broadcast to stop people from breaking us 407 * physical broadcast to stop people from breaking us
408 */ 408 */
409static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) 409static int
410{
411 return 0x0F;
412}
413
414static inline unsigned int
415numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 410numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
416 const struct cpumask *andmask) 411 const struct cpumask *andmask,
412 unsigned int *apicid)
417{ 413{
418 return 0x0F; 414 *apicid = 0x0F;
415 return 0;
419} 416}
420 417
421/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ 418/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
@@ -441,20 +438,6 @@ static int probe_numaq(void)
441 return found_numaq; 438 return found_numaq;
442} 439}
443 440
444static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
445{
446 /* Careful. Some cpus do not strictly honor the set of cpus
447 * specified in the interrupt destination when using lowest
448 * priority interrupt delivery mode.
449 *
450 * In particular there was a hyperthreading cpu observed to
451 * deliver interrupts to the wrong hyperthread when only one
452 * hyperthread was specified in the interrupt desitination.
453 */
454 cpumask_clear(retmask);
455 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
456}
457
458static void numaq_setup_portio_remap(void) 441static void numaq_setup_portio_remap(void)
459{ 442{
460 int num_quads = num_online_nodes(); 443 int num_quads = num_online_nodes();
@@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = {
491 .check_apicid_used = numaq_check_apicid_used, 474 .check_apicid_used = numaq_check_apicid_used,
492 .check_apicid_present = numaq_check_apicid_present, 475 .check_apicid_present = numaq_check_apicid_present,
493 476
494 .vector_allocation_domain = numaq_vector_allocation_domain, 477 .vector_allocation_domain = flat_vector_allocation_domain,
495 .init_apic_ldr = numaq_init_apic_ldr, 478 .init_apic_ldr = numaq_init_apic_ldr,
496 479
497 .ioapic_phys_id_map = numaq_ioapic_phys_id_map, 480 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
@@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = {
509 .set_apic_id = NULL, 492 .set_apic_id = NULL,
510 .apic_id_mask = 0x0F << 24, 493 .apic_id_mask = 0x0F << 24,
511 494
512 .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
513 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, 495 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
514 496
515 .send_IPI_mask = numaq_send_IPI_mask, 497 .send_IPI_mask = numaq_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1b291da09e60..eb35ef9ee63f 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)
66#endif 66#endif
67} 67}
68 68
69static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
70{
71 /*
72 * Careful. Some cpus do not strictly honor the set of cpus
73 * specified in the interrupt destination when using lowest
74 * priority interrupt delivery mode.
75 *
76 * In particular there was a hyperthreading cpu observed to
77 * deliver interrupts to the wrong hyperthread when only one
78 * hyperthread was specified in the interrupt desitination.
79 */
80 cpumask_clear(retmask);
81 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
82}
83
84/* should be called last. */ 69/* should be called last. */
85static int probe_default(void) 70static int probe_default(void)
86{ 71{
@@ -105,7 +90,7 @@ static struct apic apic_default = {
105 .check_apicid_used = default_check_apicid_used, 90 .check_apicid_used = default_check_apicid_used,
106 .check_apicid_present = default_check_apicid_present, 91 .check_apicid_present = default_check_apicid_present,
107 92
108 .vector_allocation_domain = default_vector_allocation_domain, 93 .vector_allocation_domain = flat_vector_allocation_domain,
109 .init_apic_ldr = default_init_apic_ldr, 94 .init_apic_ldr = default_init_apic_ldr,
110 95
111 .ioapic_phys_id_map = default_ioapic_phys_id_map, 96 .ioapic_phys_id_map = default_ioapic_phys_id_map,
@@ -123,8 +108,7 @@ static struct apic apic_default = {
123 .set_apic_id = NULL, 108 .set_apic_id = NULL,
124 .apic_id_mask = 0x0F << 24, 109 .apic_id_mask = 0x0F << 24,
125 110
126 .cpu_mask_to_apicid = default_cpu_mask_to_apicid, 111 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
127 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
128 112
129 .send_IPI_mask = default_send_IPI_mask_logical, 113 .send_IPI_mask = default_send_IPI_mask_logical,
130 .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical, 114 .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
@@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void)
208 192
209 if (apic->setup_apic_routing) 193 if (apic->setup_apic_routing)
210 apic->setup_apic_routing(); 194 apic->setup_apic_routing();
195
196 if (x86_platform.apic_post_init)
197 x86_platform.apic_post_init();
211} 198}
212 199
213void __init generic_apic_probe(void) 200void __init generic_apic_probe(void)
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 3fe986698929..1793dba7a741 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -23,11 +23,6 @@
23#include <asm/ipi.h> 23#include <asm/ipi.h>
24#include <asm/setup.h> 24#include <asm/setup.h>
25 25
26static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
27{
28 return hard_smp_processor_id() >> index_msb;
29}
30
31/* 26/*
32 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 27 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
33 */ 28 */
@@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)
48 } 43 }
49 } 44 }
50 45
51 if (is_vsmp_box()) { 46 if (x86_platform.apic_post_init)
52 /* need to update phys_pkg_id */ 47 x86_platform.apic_post_init();
53 apic->phys_pkg_id = apicid_phys_pkg_id;
54 }
55} 48}
56 49
57/* Same for both flat and physical. */ 50/* Same for both flat and physical. */
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 659897c00755..77c95c0e1bf7 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -26,6 +26,8 @@
26 * 26 *
27 */ 27 */
28 28
29#define pr_fmt(fmt) "summit: %s: " fmt, __func__
30
29#include <linux/mm.h> 31#include <linux/mm.h>
30#include <linux/init.h> 32#include <linux/init.h>
31#include <asm/io.h> 33#include <asm/io.h>
@@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)
235 237
236static void summit_setup_apic_routing(void) 238static void summit_setup_apic_routing(void)
237{ 239{
238 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n", 240 pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n",
239 nr_ioapics); 241 nr_ioapics);
240} 242}
241 243
242static int summit_cpu_present_to_apicid(int mps_cpu) 244static int summit_cpu_present_to_apicid(int mps_cpu)
@@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid)
263 return 1; 265 return 1;
264} 266}
265 267
266static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) 268static inline int
269summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)
267{ 270{
268 unsigned int round = 0; 271 unsigned int round = 0;
269 int cpu, apicid = 0; 272 unsigned int cpu, apicid = 0;
270 273
271 /* 274 /*
272 * The cpus in the mask must all be on the apic cluster. 275 * The cpus in the mask must all be on the apic cluster.
273 */ 276 */
274 for_each_cpu(cpu, cpumask) { 277 for_each_cpu_and(cpu, cpumask, cpu_online_mask) {
275 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 278 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
276 279
277 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 280 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
278 printk("%s: Not a valid mask!\n", __func__); 281 pr_err("Not a valid mask!\n");
279 return BAD_APICID; 282 return -EINVAL;
280 } 283 }
281 apicid |= new_apicid; 284 apicid |= new_apicid;
282 round++; 285 round++;
283 } 286 }
284 return apicid; 287 if (!round)
288 return -EINVAL;
289 *dest_id = apicid;
290 return 0;
285} 291}
286 292
287static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, 293static int
288 const struct cpumask *andmask) 294summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
295 const struct cpumask *andmask,
296 unsigned int *apicid)
289{ 297{
290 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
291 cpumask_var_t cpumask; 298 cpumask_var_t cpumask;
299 *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
292 300
293 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 301 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
294 return apicid; 302 return 0;
295 303
296 cpumask_and(cpumask, inmask, andmask); 304 cpumask_and(cpumask, inmask, andmask);
297 cpumask_and(cpumask, cpumask, cpu_online_mask); 305 summit_cpu_mask_to_apicid(cpumask, apicid);
298 apicid = summit_cpu_mask_to_apicid(cpumask);
299 306
300 free_cpumask_var(cpumask); 307 free_cpumask_var(cpumask);
301 308
302 return apicid; 309 return 0;
303} 310}
304 311
305/* 312/*
@@ -320,20 +327,6 @@ static int probe_summit(void)
320 return 0; 327 return 0;
321} 328}
322 329
323static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
324{
325 /* Careful. Some cpus do not strictly honor the set of cpus
326 * specified in the interrupt destination when using lowest
327 * priority interrupt delivery mode.
328 *
329 * In particular there was a hyperthreading cpu observed to
330 * deliver interrupts to the wrong hyperthread when only one
331 * hyperthread was specified in the interrupt desitination.
332 */
333 cpumask_clear(retmask);
334 cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
335}
336
337#ifdef CONFIG_X86_SUMMIT_NUMA 330#ifdef CONFIG_X86_SUMMIT_NUMA
338static struct rio_table_hdr *rio_table_hdr; 331static struct rio_table_hdr *rio_table_hdr;
339static struct scal_detail *scal_devs[MAX_NUMNODES]; 332static struct scal_detail *scal_devs[MAX_NUMNODES];
@@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
355 } 348 }
356 } 349 }
357 if (i == rio_table_hdr->num_rio_dev) { 350 if (i == rio_table_hdr->num_rio_dev) {
358 printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); 351 pr_err("Couldn't find owner Cyclone for Winnipeg!\n");
359 return last_bus; 352 return last_bus;
360 } 353 }
361 354
@@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
366 } 359 }
367 } 360 }
368 if (i == rio_table_hdr->num_scal_dev) { 361 if (i == rio_table_hdr->num_scal_dev) {
369 printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); 362 pr_err("Couldn't find owner Twister for Cyclone!\n");
370 return last_bus; 363 return last_bus;
371 } 364 }
372 365
@@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
396 num_buses = 9; 389 num_buses = 9;
397 break; 390 break;
398 default: 391 default:
399 printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); 392 pr_info("Unsupported Winnipeg type!\n");
400 return last_bus; 393 return last_bus;
401 } 394 }
402 395
@@ -411,13 +404,15 @@ static int build_detail_arrays(void)
411 int i, scal_detail_size, rio_detail_size; 404 int i, scal_detail_size, rio_detail_size;
412 405
413 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { 406 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
414 printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); 407 pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n",
408 MAX_NUMNODES, rio_table_hdr->num_scal_dev);
415 return 0; 409 return 0;
416 } 410 }
417 411
418 switch (rio_table_hdr->version) { 412 switch (rio_table_hdr->version) {
419 default: 413 default:
420 printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); 414 pr_warn("Invalid Rio Grande Table Version: %d\n",
415 rio_table_hdr->version);
421 return 0; 416 return 0;
422 case 2: 417 case 2:
423 scal_detail_size = 11; 418 scal_detail_size = 11;
@@ -462,7 +457,7 @@ void setup_summit(void)
462 offset = *((unsigned short *)(ptr + offset)); 457 offset = *((unsigned short *)(ptr + offset));
463 } 458 }
464 if (!rio_table_hdr) { 459 if (!rio_table_hdr) {
465 printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); 460 pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");
466 return; 461 return;
467 } 462 }
468 463
@@ -509,7 +504,7 @@ static struct apic apic_summit = {
509 .check_apicid_used = summit_check_apicid_used, 504 .check_apicid_used = summit_check_apicid_used,
510 .check_apicid_present = summit_check_apicid_present, 505 .check_apicid_present = summit_check_apicid_present,
511 506
512 .vector_allocation_domain = summit_vector_allocation_domain, 507 .vector_allocation_domain = flat_vector_allocation_domain,
513 .init_apic_ldr = summit_init_apic_ldr, 508 .init_apic_ldr = summit_init_apic_ldr,
514 509
515 .ioapic_phys_id_map = summit_ioapic_phys_id_map, 510 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
@@ -527,7 +522,6 @@ static struct apic apic_summit = {
527 .set_apic_id = NULL, 522 .set_apic_id = NULL,
528 .apic_id_mask = 0xFF << 24, 523 .apic_id_mask = 0xFF << 24,
529 524
530 .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
531 .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, 525 .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
532 526
533 .send_IPI_mask = summit_send_IPI_mask, 527 .send_IPI_mask = summit_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index ff35cff0e1a7..c88baa4ff0e5 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
81} 81}
82 82
83static void 83static void
84 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 84x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
85{ 85{
86 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT); 86 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
87} 87}
@@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector)
96 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); 96 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
97} 97}
98 98
99static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 99static int
100x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
101 const struct cpumask *andmask,
102 unsigned int *apicid)
100{ 103{
101 /* 104 u32 dest = 0;
102 * We're using fixed IRQ delivery, can only return one logical APIC ID. 105 u16 cluster;
103 * May as well be the first. 106 int i;
104 */
105 int cpu = cpumask_first(cpumask);
106 107
107 if ((unsigned)cpu < nr_cpu_ids) 108 for_each_cpu_and(i, cpumask, andmask) {
108 return per_cpu(x86_cpu_to_logical_apicid, cpu); 109 if (!cpumask_test_cpu(i, cpu_online_mask))
109 else 110 continue;
110 return BAD_APICID; 111 dest = per_cpu(x86_cpu_to_logical_apicid, i);
111} 112 cluster = x2apic_cluster(i);
113 break;
114 }
112 115
113static unsigned int 116 if (!dest)
114x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 117 return -EINVAL;
115 const struct cpumask *andmask)
116{
117 int cpu;
118 118
119 /* 119 for_each_cpu_and(i, cpumask, andmask) {
120 * We're using fixed IRQ delivery, can only return one logical APIC ID. 120 if (!cpumask_test_cpu(i, cpu_online_mask))
121 * May as well be the first. 121 continue;
122 */ 122 if (cluster != x2apic_cluster(i))
123 for_each_cpu_and(cpu, cpumask, andmask) { 123 continue;
124 if (cpumask_test_cpu(cpu, cpu_online_mask)) 124 dest |= per_cpu(x86_cpu_to_logical_apicid, i);
125 break;
126 } 125 }
127 126
128 return per_cpu(x86_cpu_to_logical_apicid, cpu); 127 *apicid = dest;
128
129 return 0;
129} 130}
130 131
131static void init_x2apic_ldr(void) 132static void init_x2apic_ldr(void)
@@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void)
208 return 0; 209 return 0;
209} 210}
210 211
212static const struct cpumask *x2apic_cluster_target_cpus(void)
213{
214 return cpu_all_mask;
215}
216
217/*
218 * Each x2apic cluster is an allocation domain.
219 */
220static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
221 const struct cpumask *mask)
222{
223 /*
224 * To minimize vector pressure, default case of boot, device bringup
225 * etc will use a single cpu for the interrupt destination.
226 *
227 * On explicit migration requests coming from irqbalance etc,
228 * interrupts will be routed to the x2apic cluster (cluster-id
229 * derived from the first cpu in the mask) members specified
230 * in the mask.
231 */
232 if (mask == x2apic_cluster_target_cpus())
233 cpumask_copy(retmask, cpumask_of(cpu));
234 else
235 cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
236}
237
211static struct apic apic_x2apic_cluster = { 238static struct apic apic_x2apic_cluster = {
212 239
213 .name = "cluster x2apic", 240 .name = "cluster x2apic",
@@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = {
219 .irq_delivery_mode = dest_LowestPrio, 246 .irq_delivery_mode = dest_LowestPrio,
220 .irq_dest_mode = 1, /* logical */ 247 .irq_dest_mode = 1, /* logical */
221 248
222 .target_cpus = x2apic_target_cpus, 249 .target_cpus = x2apic_cluster_target_cpus,
223 .disable_esr = 0, 250 .disable_esr = 0,
224 .dest_logical = APIC_DEST_LOGICAL, 251 .dest_logical = APIC_DEST_LOGICAL,
225 .check_apicid_used = NULL, 252 .check_apicid_used = NULL,
226 .check_apicid_present = NULL, 253 .check_apicid_present = NULL,
227 254
228 .vector_allocation_domain = x2apic_vector_allocation_domain, 255 .vector_allocation_domain = cluster_vector_allocation_domain,
229 .init_apic_ldr = init_x2apic_ldr, 256 .init_apic_ldr = init_x2apic_ldr,
230 257
231 .ioapic_phys_id_map = NULL, 258 .ioapic_phys_id_map = NULL,
@@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = {
243 .set_apic_id = x2apic_set_apic_id, 270 .set_apic_id = x2apic_set_apic_id,
244 .apic_id_mask = 0xFFFFFFFFu, 271 .apic_id_mask = 0xFFFFFFFFu,
245 272
246 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
247 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 273 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
248 274
249 .send_IPI_mask = x2apic_send_IPI_mask, 275 .send_IPI_mask = x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index c17e982db275..e03a1e180e81 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)
76 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC); 76 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
77} 77}
78 78
79static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
80{
81 /*
82 * We're using fixed IRQ delivery, can only return one phys APIC ID.
83 * May as well be the first.
84 */
85 int cpu = cpumask_first(cpumask);
86
87 if ((unsigned)cpu < nr_cpu_ids)
88 return per_cpu(x86_cpu_to_apicid, cpu);
89 else
90 return BAD_APICID;
91}
92
93static unsigned int
94x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
95 const struct cpumask *andmask)
96{
97 int cpu;
98
99 /*
100 * We're using fixed IRQ delivery, can only return one phys APIC ID.
101 * May as well be the first.
102 */
103 for_each_cpu_and(cpu, cpumask, andmask) {
104 if (cpumask_test_cpu(cpu, cpu_online_mask))
105 break;
106 }
107
108 return per_cpu(x86_cpu_to_apicid, cpu);
109}
110
111static void init_x2apic_ldr(void) 79static void init_x2apic_ldr(void)
112{ 80{
113} 81}
@@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = {
131 .irq_delivery_mode = dest_Fixed, 99 .irq_delivery_mode = dest_Fixed,
132 .irq_dest_mode = 0, /* physical */ 100 .irq_dest_mode = 0, /* physical */
133 101
134 .target_cpus = x2apic_target_cpus, 102 .target_cpus = online_target_cpus,
135 .disable_esr = 0, 103 .disable_esr = 0,
136 .dest_logical = 0, 104 .dest_logical = 0,
137 .check_apicid_used = NULL, 105 .check_apicid_used = NULL,
138 .check_apicid_present = NULL, 106 .check_apicid_present = NULL,
139 107
140 .vector_allocation_domain = x2apic_vector_allocation_domain, 108 .vector_allocation_domain = default_vector_allocation_domain,
141 .init_apic_ldr = init_x2apic_ldr, 109 .init_apic_ldr = init_x2apic_ldr,
142 110
143 .ioapic_phys_id_map = NULL, 111 .ioapic_phys_id_map = NULL,
@@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = {
155 .set_apic_id = x2apic_set_apic_id, 123 .set_apic_id = x2apic_set_apic_id,
156 .apic_id_mask = 0xFFFFFFFFu, 124 .apic_id_mask = 0xFFFFFFFFu,
157 125
158 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 126 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
159 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
160 127
161 .send_IPI_mask = x2apic_send_IPI_mask, 128 .send_IPI_mask = x2apic_send_IPI_mask,
162 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 129 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c6d03f7a4401..8cfade9510a4 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
185unsigned long sn_rtc_cycles_per_second; 185unsigned long sn_rtc_cycles_per_second;
186EXPORT_SYMBOL(sn_rtc_cycles_per_second); 186EXPORT_SYMBOL(sn_rtc_cycles_per_second);
187 187
188static const struct cpumask *uv_target_cpus(void)
189{
190 return cpu_online_mask;
191}
192
193static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
194{
195 cpumask_clear(retmask);
196 cpumask_set_cpu(cpu, retmask);
197}
198
199static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) 188static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
200{ 189{
201#ifdef CONFIG_SMP 190#ifdef CONFIG_SMP
@@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void)
280{ 269{
281} 270}
282 271
283static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) 272static int
284{
285 /*
286 * We're using fixed IRQ delivery, can only return one phys APIC ID.
287 * May as well be the first.
288 */
289 int cpu = cpumask_first(cpumask);
290
291 if ((unsigned)cpu < nr_cpu_ids)
292 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
293 else
294 return BAD_APICID;
295}
296
297static unsigned int
298uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 273uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
299 const struct cpumask *andmask) 274 const struct cpumask *andmask,
275 unsigned int *apicid)
300{ 276{
301 int cpu; 277 int unsigned cpu;
302 278
303 /* 279 /*
304 * We're using fixed IRQ delivery, can only return one phys APIC ID. 280 * We're using fixed IRQ delivery, can only return one phys APIC ID.
@@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
308 if (cpumask_test_cpu(cpu, cpu_online_mask)) 284 if (cpumask_test_cpu(cpu, cpu_online_mask))
309 break; 285 break;
310 } 286 }
311 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; 287
288 if (likely(cpu < nr_cpu_ids)) {
289 *apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
290 return 0;
291 }
292
293 return -EINVAL;
312} 294}
313 295
314static unsigned int x2apic_get_apic_id(unsigned long x) 296static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = {
362 .irq_delivery_mode = dest_Fixed, 344 .irq_delivery_mode = dest_Fixed,
363 .irq_dest_mode = 0, /* physical */ 345 .irq_dest_mode = 0, /* physical */
364 346
365 .target_cpus = uv_target_cpus, 347 .target_cpus = online_target_cpus,
366 .disable_esr = 0, 348 .disable_esr = 0,
367 .dest_logical = APIC_DEST_LOGICAL, 349 .dest_logical = APIC_DEST_LOGICAL,
368 .check_apicid_used = NULL, 350 .check_apicid_used = NULL,
369 .check_apicid_present = NULL, 351 .check_apicid_present = NULL,
370 352
371 .vector_allocation_domain = uv_vector_allocation_domain, 353 .vector_allocation_domain = default_vector_allocation_domain,
372 .init_apic_ldr = uv_init_apic_ldr, 354 .init_apic_ldr = uv_init_apic_ldr,
373 355
374 .ioapic_phys_id_map = NULL, 356 .ioapic_phys_id_map = NULL,
@@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {
386 .set_apic_id = set_apic_id, 368 .set_apic_id = set_apic_id,
387 .apic_id_mask = 0xFFFFFFFFu, 369 .apic_id_mask = 0xFFFFFFFFu,
388 370
389 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
390 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, 371 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
391 372
392 .send_IPI_mask = uv_send_IPI_mask, 373 .send_IPI_mask = uv_send_IPI_mask,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 07b0c0db466c..d65464e43503 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -201,6 +201,8 @@
201 * http://www.microsoft.com/whdc/archive/amp_12.mspx] 201 * http://www.microsoft.com/whdc/archive/amp_12.mspx]
202 */ 202 */
203 203
204#define pr_fmt(fmt) "apm: " fmt
205
204#include <linux/module.h> 206#include <linux/module.h>
205 207
206#include <linux/poll.h> 208#include <linux/poll.h>
@@ -485,11 +487,11 @@ static void apm_error(char *str, int err)
485 if (error_table[i].key == err) 487 if (error_table[i].key == err)
486 break; 488 break;
487 if (i < ERROR_COUNT) 489 if (i < ERROR_COUNT)
488 printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); 490 pr_notice("%s: %s\n", str, error_table[i].msg);
489 else if (err < 0) 491 else if (err < 0)
490 printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); 492 pr_notice("%s: linux error code %i\n", str, err);
491 else 493 else
492 printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", 494 pr_notice("%s: unknown error code %#2.2x\n",
493 str, err); 495 str, err);
494} 496}
495 497
@@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)
1184 static int notified; 1186 static int notified;
1185 1187
1186 if (notified++ == 0) 1188 if (notified++ == 0)
1187 printk(KERN_ERR "apm: an event queue overflowed\n"); 1189 pr_err("an event queue overflowed\n");
1188 if (++as->event_tail >= APM_MAX_EVENTS) 1190 if (++as->event_tail >= APM_MAX_EVENTS)
1189 as->event_tail = 0; 1191 as->event_tail = 0;
1190 } 1192 }
@@ -1447,7 +1449,7 @@ static void apm_mainloop(void)
1447static int check_apm_user(struct apm_user *as, const char *func) 1449static int check_apm_user(struct apm_user *as, const char *func)
1448{ 1450{
1449 if (as == NULL || as->magic != APM_BIOS_MAGIC) { 1451 if (as == NULL || as->magic != APM_BIOS_MAGIC) {
1450 printk(KERN_ERR "apm: %s passed bad filp\n", func); 1452 pr_err("%s passed bad filp\n", func);
1451 return 1; 1453 return 1;
1452 } 1454 }
1453 return 0; 1455 return 0;
@@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)
1586 as1 = as1->next) 1588 as1 = as1->next)
1587 ; 1589 ;
1588 if (as1 == NULL) 1590 if (as1 == NULL)
1589 printk(KERN_ERR "apm: filp not in user list\n"); 1591 pr_err("filp not in user list\n");
1590 else 1592 else
1591 as1->next = as->next; 1593 as1->next = as->next;
1592 } 1594 }
@@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)
1600 struct apm_user *as; 1602 struct apm_user *as;
1601 1603
1602 as = kmalloc(sizeof(*as), GFP_KERNEL); 1604 as = kmalloc(sizeof(*as), GFP_KERNEL);
1603 if (as == NULL) { 1605 if (as == NULL)
1604 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
1605 sizeof(*as));
1606 return -ENOMEM; 1606 return -ENOMEM;
1607 } 1607
1608 as->magic = APM_BIOS_MAGIC; 1608 as->magic = APM_BIOS_MAGIC;
1609 as->event_tail = as->event_head = 0; 1609 as->event_tail = as->event_head = 0;
1610 as->suspends_pending = as->standbys_pending = 0; 1610 as->suspends_pending = as->standbys_pending = 0;
@@ -2313,16 +2313,16 @@ static int __init apm_init(void)
2313 } 2313 }
2314 2314
2315 if (apm_info.disabled) { 2315 if (apm_info.disabled) {
2316 printk(KERN_NOTICE "apm: disabled on user request.\n"); 2316 pr_notice("disabled on user request.\n");
2317 return -ENODEV; 2317 return -ENODEV;
2318 } 2318 }
2319 if ((num_online_cpus() > 1) && !power_off && !smp) { 2319 if ((num_online_cpus() > 1) && !power_off && !smp) {
2320 printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); 2320 pr_notice("disabled - APM is not SMP safe.\n");
2321 apm_info.disabled = 1; 2321 apm_info.disabled = 1;
2322 return -ENODEV; 2322 return -ENODEV;
2323 } 2323 }
2324 if (!acpi_disabled) { 2324 if (!acpi_disabled) {
2325 printk(KERN_NOTICE "apm: overridden by ACPI.\n"); 2325 pr_notice("overridden by ACPI.\n");
2326 apm_info.disabled = 1; 2326 apm_info.disabled = 1;
2327 return -ENODEV; 2327 return -ENODEV;
2328 } 2328 }
@@ -2356,8 +2356,7 @@ static int __init apm_init(void)
2356 2356
2357 kapmd_task = kthread_create(apm, NULL, "kapmd"); 2357 kapmd_task = kthread_create(apm, NULL, "kapmd");
2358 if (IS_ERR(kapmd_task)) { 2358 if (IS_ERR(kapmd_task)) {
2359 printk(KERN_ERR "apm: disabled - Unable to start kernel " 2359 pr_err("disabled - Unable to start kernel thread\n");
2360 "thread.\n");
2361 err = PTR_ERR(kapmd_task); 2360 err = PTR_ERR(kapmd_task);
2362 kapmd_task = NULL; 2361 kapmd_task = NULL;
2363 remove_proc_entry("apm", NULL); 2362 remove_proc_entry("apm", NULL);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 6ab6aa2fdfdd..bac4c3804cc7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
32 32
33ifdef CONFIG_PERF_EVENTS 33ifdef CONFIG_PERF_EVENTS
34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o 34obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 35obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
36obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
37obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
36endif 38endif
37 39
38obj-$(CONFIG_X86_MCE) += mcheck/ 40obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 146bb6218eec..9d92e19039f0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -19,6 +19,39 @@
19 19
20#include "cpu.h" 20#include "cpu.h"
21 21
22static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
23{
24 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
25 u32 gprs[8] = { 0 };
26 int err;
27
28 WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
29
30 gprs[1] = msr;
31 gprs[7] = 0x9c5a203a;
32
33 err = rdmsr_safe_regs(gprs);
34
35 *p = gprs[0] | ((u64)gprs[2] << 32);
36
37 return err;
38}
39
40static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
41{
42 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
43 u32 gprs[8] = { 0 };
44
45 WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__);
46
47 gprs[0] = (u32)val;
48 gprs[1] = msr;
49 gprs[2] = val >> 32;
50 gprs[7] = 0x9c5a203a;
51
52 return wrmsr_safe_regs(gprs);
53}
54
22#ifdef CONFIG_X86_32 55#ifdef CONFIG_X86_32
23/* 56/*
24 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 57 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
@@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
586 !cpu_has(c, X86_FEATURE_TOPOEXT)) { 619 !cpu_has(c, X86_FEATURE_TOPOEXT)) {
587 u64 val; 620 u64 val;
588 621
589 if (!rdmsrl_amd_safe(0xc0011005, &val)) { 622 if (!rdmsrl_safe(0xc0011005, &val)) {
590 val |= 1ULL << 54; 623 val |= 1ULL << 54;
591 wrmsrl_amd_safe(0xc0011005, val); 624 wrmsrl_safe(0xc0011005, val);
592 rdmsrl(0xc0011005, val); 625 rdmsrl(0xc0011005, val);
593 if (val & (1ULL << 54)) { 626 if (val & (1ULL << 54)) {
594 set_cpu_cap(c, X86_FEATURE_TOPOEXT); 627 set_cpu_cap(c, X86_FEATURE_TOPOEXT);
@@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
679 err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); 712 err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);
680 if (err == 0) { 713 if (err == 0) {
681 mask |= (1 << 10); 714 mask |= (1 << 10);
682 checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); 715 wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);
683 } 716 }
684 } 717 }
685 718
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 46674fbb62ba..c97bb7b5a9f8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -55,8 +55,8 @@ static void __init check_fpu(void)
55 55
56 if (!boot_cpu_data.hard_math) { 56 if (!boot_cpu_data.hard_math) {
57#ifndef CONFIG_MATH_EMULATION 57#ifndef CONFIG_MATH_EMULATION
58 printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); 58 pr_emerg("No coprocessor found and no math emulation present\n");
59 printk(KERN_EMERG "Giving up.\n"); 59 pr_emerg("Giving up\n");
60 for (;;) ; 60 for (;;) ;
61#endif 61#endif
62 return; 62 return;
@@ -86,7 +86,7 @@ static void __init check_fpu(void)
86 86
87 boot_cpu_data.fdiv_bug = fdiv_bug; 87 boot_cpu_data.fdiv_bug = fdiv_bug;
88 if (boot_cpu_data.fdiv_bug) 88 if (boot_cpu_data.fdiv_bug)
89 printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); 89 pr_warn("Hmm, FPU with FDIV bug\n");
90} 90}
91 91
92static void __init check_hlt(void) 92static void __init check_hlt(void)
@@ -94,16 +94,16 @@ static void __init check_hlt(void)
94 if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) 94 if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
95 return; 95 return;
96 96
97 printk(KERN_INFO "Checking 'hlt' instruction... "); 97 pr_info("Checking 'hlt' instruction... ");
98 if (!boot_cpu_data.hlt_works_ok) { 98 if (!boot_cpu_data.hlt_works_ok) {
99 printk("disabled\n"); 99 pr_cont("disabled\n");
100 return; 100 return;
101 } 101 }
102 halt(); 102 halt();
103 halt(); 103 halt();
104 halt(); 104 halt();
105 halt(); 105 halt();
106 printk(KERN_CONT "OK.\n"); 106 pr_cont("OK\n");
107} 107}
108 108
109/* 109/*
@@ -116,7 +116,7 @@ static void __init check_popad(void)
116#ifndef CONFIG_X86_POPAD_OK 116#ifndef CONFIG_X86_POPAD_OK
117 int res, inp = (int) &res; 117 int res, inp = (int) &res;
118 118
119 printk(KERN_INFO "Checking for popad bug... "); 119 pr_info("Checking for popad bug... ");
120 __asm__ __volatile__( 120 __asm__ __volatile__(
121 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " 121 "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
122 : "=&a" (res) 122 : "=&a" (res)
@@ -127,9 +127,9 @@ static void __init check_popad(void)
127 * CPU hard. Too bad. 127 * CPU hard. Too bad.
128 */ 128 */
129 if (res != 12345678) 129 if (res != 12345678)
130 printk(KERN_CONT "Buggy.\n"); 130 pr_cont("Buggy\n");
131 else 131 else
132 printk(KERN_CONT "OK.\n"); 132 pr_cont("OK\n");
133#endif 133#endif
134} 134}
135 135
@@ -161,7 +161,7 @@ void __init check_bugs(void)
161{ 161{
162 identify_boot_cpu(); 162 identify_boot_cpu();
163#ifndef CONFIG_SMP 163#ifndef CONFIG_SMP
164 printk(KERN_INFO "CPU: "); 164 pr_info("CPU: ");
165 print_cpu_info(&boot_cpu_data); 165 print_cpu_info(&boot_cpu_data);
166#endif 166#endif
167 check_config(); 167 check_config();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6b9333b429ba..5bbc082c47ad 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void)
947 index_max = msr_range_array[i].max; 947 index_max = msr_range_array[i].max;
948 948
949 for (index = index_min; index < index_max; index++) { 949 for (index = index_min; index < index_max; index++) {
950 if (rdmsrl_amd_safe(index, &val)) 950 if (rdmsrl_safe(index, &val))
951 continue; 951 continue;
952 printk(KERN_INFO " MSR%08x: %016llx\n", index, val); 952 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
953 } 953 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index da27c5d2168a..5a5a5dc1ff15 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -7,6 +7,9 @@
7 * Copyright 2008 Intel Corporation 7 * Copyright 2008 Intel Corporation
8 * Author: Andi Kleen 8 * Author: Andi Kleen
9 */ 9 */
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
10#include <linux/thread_info.h> 13#include <linux/thread_info.h>
11#include <linux/capability.h> 14#include <linux/capability.h>
12#include <linux/miscdevice.h> 15#include <linux/miscdevice.h>
@@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)
210 cpu_relax(); 213 cpu_relax();
211 214
212 if (!m->finished && retries >= 4) { 215 if (!m->finished && retries >= 4) {
213 pr_err("MCE: skipping error being logged currently!\n"); 216 pr_err("skipping error being logged currently!\n");
214 break; 217 break;
215 } 218 }
216 } 219 }
@@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)
1167{ 1170{
1168 /* mce_severity() should not hand us an ACTION_REQUIRED error */ 1171 /* mce_severity() should not hand us an ACTION_REQUIRED error */
1169 BUG_ON(flags & MF_ACTION_REQUIRED); 1172 BUG_ON(flags & MF_ACTION_REQUIRED);
1170 printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" 1173 pr_err("Uncorrected memory error in page 0x%lx ignored\n"
1171 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); 1174 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
1175 pfn);
1172 1176
1173 return 0; 1177 return 0;
1174} 1178}
@@ -1358,11 +1362,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
1358 1362
1359 b = cap & MCG_BANKCNT_MASK; 1363 b = cap & MCG_BANKCNT_MASK;
1360 if (!banks) 1364 if (!banks)
1361 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1365 pr_info("CPU supports %d MCE banks\n", b);
1362 1366
1363 if (b > MAX_NR_BANKS) { 1367 if (b > MAX_NR_BANKS) {
1364 printk(KERN_WARNING 1368 pr_warn("Using only %u machine check banks out of %u\n",
1365 "MCE: Using only %u machine check banks out of %u\n",
1366 MAX_NR_BANKS, b); 1369 MAX_NR_BANKS, b);
1367 b = MAX_NR_BANKS; 1370 b = MAX_NR_BANKS;
1368 } 1371 }
@@ -1419,7 +1422,7 @@ static void __mcheck_cpu_init_generic(void)
1419static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1422static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1420{ 1423{
1421 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1424 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1422 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1425 pr_info("unknown CPU type - not enabling MCE support\n");
1423 return -EOPNOTSUPP; 1426 return -EOPNOTSUPP;
1424 } 1427 }
1425 1428
@@ -1574,7 +1577,7 @@ static void __mcheck_cpu_init_timer(void)
1574/* Handle unconfigured int18 (should never happen) */ 1577/* Handle unconfigured int18 (should never happen) */
1575static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1578static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1576{ 1579{
1577 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", 1580 pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
1578 smp_processor_id()); 1581 smp_processor_id());
1579} 1582}
1580 1583
@@ -1893,8 +1896,7 @@ static int __init mcheck_enable(char *str)
1893 get_option(&str, &monarch_timeout); 1896 get_option(&str, &monarch_timeout);
1894 } 1897 }
1895 } else { 1898 } else {
1896 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", 1899 pr_info("mce argument %s ignored. Please use /sys\n", str);
1897 str);
1898 return 0; 1900 return 0;
1899 } 1901 }
1900 return 1; 1902 return 1;
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index bdda2e6c673b..35ffda5d0727 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
258 258
259 /* Compute the maximum size with which we can make a range: */ 259 /* Compute the maximum size with which we can make a range: */
260 if (range_startk) 260 if (range_startk)
261 max_align = ffs(range_startk) - 1; 261 max_align = __ffs(range_startk);
262 else 262 else
263 max_align = 32; 263 max_align = BITS_PER_LONG - 1;
264 264
265 align = fls(range_sizek) - 1; 265 align = __fls(range_sizek);
266 if (align > max_align) 266 if (align > max_align)
267 align = max_align; 267 align = max_align;
268 268
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 75772ae6c65f..e9fe907cd249 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)
361 } 361 }
362 pr_debug("MTRR variable ranges %sabled:\n", 362 pr_debug("MTRR variable ranges %sabled:\n",
363 mtrr_state.enabled & 2 ? "en" : "dis"); 363 mtrr_state.enabled & 2 ? "en" : "dis");
364 if (size_or_mask & 0xffffffffUL) 364 high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;
365 high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
366 else
367 high_width = ffs(size_or_mask>>32) + 32 - 1;
368 high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
369 365
370 for (i = 0; i < num_var_ranges; ++i) { 366 for (i = 0; i < num_var_ranges; ++i) {
371 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) 367 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c4706cf9c011..29557aa06dda 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -35,17 +35,6 @@
35 35
36#include "perf_event.h" 36#include "perf_event.h"
37 37
38#if 0
39#undef wrmsrl
40#define wrmsrl(msr, val) \
41do { \
42 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
43 (unsigned long)(val)); \
44 native_write_msr((msr), (u32)((u64)(val)), \
45 (u32)((u64)(val) >> 32)); \
46} while (0)
47#endif
48
49struct x86_pmu x86_pmu __read_mostly; 38struct x86_pmu x86_pmu __read_mostly;
50 39
51DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 40DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -74,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event)
74 int idx = hwc->idx; 63 int idx = hwc->idx;
75 s64 delta; 64 s64 delta;
76 65
77 if (idx == X86_PMC_IDX_FIXED_BTS) 66 if (idx == INTEL_PMC_IDX_FIXED_BTS)
78 return 0; 67 return 0;
79 68
80 /* 69 /*
@@ -86,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)
86 */ 75 */
87again: 76again:
88 prev_raw_count = local64_read(&hwc->prev_count); 77 prev_raw_count = local64_read(&hwc->prev_count);
89 rdmsrl(hwc->event_base, new_raw_count); 78 rdpmcl(hwc->event_base_rdpmc, new_raw_count);
90 79
91 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 80 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
92 new_raw_count) != prev_raw_count) 81 new_raw_count) != prev_raw_count)
@@ -189,7 +178,7 @@ static void release_pmc_hardware(void) {}
189 178
190static bool check_hw_exists(void) 179static bool check_hw_exists(void)
191{ 180{
192 u64 val, val_new = 0; 181 u64 val, val_new = ~0;
193 int i, reg, ret = 0; 182 int i, reg, ret = 0;
194 183
195 /* 184 /*
@@ -222,8 +211,9 @@ static bool check_hw_exists(void)
222 * that don't trap on the MSR access and always return 0s. 211 * that don't trap on the MSR access and always return 0s.
223 */ 212 */
224 val = 0xabcdUL; 213 val = 0xabcdUL;
225 ret = checking_wrmsrl(x86_pmu_event_addr(0), val); 214 reg = x86_pmu_event_addr(0);
226 ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); 215 ret = wrmsrl_safe(reg, val);
216 ret |= rdmsrl_safe(reg, &val_new);
227 if (ret || val != val_new) 217 if (ret || val != val_new)
228 goto msr_fail; 218 goto msr_fail;
229 219
@@ -240,6 +230,7 @@ bios_fail:
240 230
241msr_fail: 231msr_fail:
242 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); 232 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
233 printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
243 234
244 return false; 235 return false;
245} 236}
@@ -388,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event)
388 int precise = 0; 379 int precise = 0;
389 380
390 /* Support for constant skid */ 381 /* Support for constant skid */
391 if (x86_pmu.pebs_active) { 382 if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
392 precise++; 383 precise++;
393 384
394 /* Support for IP fixup */ 385 /* Support for IP fixup */
@@ -637,8 +628,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
637 c = sched->constraints[sched->state.event]; 628 c = sched->constraints[sched->state.event];
638 629
639 /* Prefer fixed purpose counters */ 630 /* Prefer fixed purpose counters */
640 if (x86_pmu.num_counters_fixed) { 631 if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
641 idx = X86_PMC_IDX_FIXED; 632 idx = INTEL_PMC_IDX_FIXED;
642 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { 633 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
643 if (!__test_and_set_bit(idx, sched->state.used)) 634 if (!__test_and_set_bit(idx, sched->state.used))
644 goto done; 635 goto done;
@@ -646,7 +637,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
646 } 637 }
647 /* Grab the first unused counter starting with idx */ 638 /* Grab the first unused counter starting with idx */
648 idx = sched->state.counter; 639 idx = sched->state.counter;
649 for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { 640 for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {
650 if (!__test_and_set_bit(idx, sched->state.used)) 641 if (!__test_and_set_bit(idx, sched->state.used))
651 goto done; 642 goto done;
652 } 643 }
@@ -704,8 +695,8 @@ static bool perf_sched_next_event(struct perf_sched *sched)
704/* 695/*
705 * Assign a counter for each event. 696 * Assign a counter for each event.
706 */ 697 */
707static int perf_assign_events(struct event_constraint **constraints, int n, 698int perf_assign_events(struct event_constraint **constraints, int n,
708 int wmin, int wmax, int *assign) 699 int wmin, int wmax, int *assign)
709{ 700{
710 struct perf_sched sched; 701 struct perf_sched sched;
711 702
@@ -824,15 +815,17 @@ static inline void x86_assign_hw_event(struct perf_event *event,
824 hwc->last_cpu = smp_processor_id(); 815 hwc->last_cpu = smp_processor_id();
825 hwc->last_tag = ++cpuc->tags[i]; 816 hwc->last_tag = ++cpuc->tags[i];
826 817
827 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { 818 if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
828 hwc->config_base = 0; 819 hwc->config_base = 0;
829 hwc->event_base = 0; 820 hwc->event_base = 0;
830 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 821 } else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
831 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 822 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
832 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); 823 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
824 hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
833 } else { 825 } else {
834 hwc->config_base = x86_pmu_config_addr(hwc->idx); 826 hwc->config_base = x86_pmu_config_addr(hwc->idx);
835 hwc->event_base = x86_pmu_event_addr(hwc->idx); 827 hwc->event_base = x86_pmu_event_addr(hwc->idx);
828 hwc->event_base_rdpmc = hwc->idx;
836 } 829 }
837} 830}
838 831
@@ -930,7 +923,7 @@ int x86_perf_event_set_period(struct perf_event *event)
930 s64 period = hwc->sample_period; 923 s64 period = hwc->sample_period;
931 int ret = 0, idx = hwc->idx; 924 int ret = 0, idx = hwc->idx;
932 925
933 if (idx == X86_PMC_IDX_FIXED_BTS) 926 if (idx == INTEL_PMC_IDX_FIXED_BTS)
934 return 0; 927 return 0;
935 928
936 /* 929 /*
@@ -1316,7 +1309,6 @@ static struct attribute_group x86_pmu_format_group = {
1316static int __init init_hw_perf_events(void) 1309static int __init init_hw_perf_events(void)
1317{ 1310{
1318 struct x86_pmu_quirk *quirk; 1311 struct x86_pmu_quirk *quirk;
1319 struct event_constraint *c;
1320 int err; 1312 int err;
1321 1313
1322 pr_info("Performance Events: "); 1314 pr_info("Performance Events: ");
@@ -1347,21 +1339,8 @@ static int __init init_hw_perf_events(void)
1347 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) 1339 for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
1348 quirk->func(); 1340 quirk->func();
1349 1341
1350 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { 1342 if (!x86_pmu.intel_ctrl)
1351 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1343 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1352 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1353 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1354 }
1355 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1356
1357 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1358 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1359 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1360 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1361 }
1362
1363 x86_pmu.intel_ctrl |=
1364 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1365 1344
1366 perf_events_lapic_init(); 1345 perf_events_lapic_init();
1367 register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); 1346 register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI");
@@ -1370,22 +1349,6 @@ static int __init init_hw_perf_events(void)
1370 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, 1349 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1371 0, x86_pmu.num_counters, 0); 1350 0, x86_pmu.num_counters, 0);
1372 1351
1373 if (x86_pmu.event_constraints) {
1374 /*
1375 * event on fixed counter2 (REF_CYCLES) only works on this
1376 * counter, so do not extend mask to generic counters
1377 */
1378 for_each_event_constraint(c, x86_pmu.event_constraints) {
1379 if (c->cmask != X86_RAW_EVENT_MASK
1380 || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
1381 continue;
1382 }
1383
1384 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1385 c->weight += x86_pmu.num_counters;
1386 }
1387 }
1388
1389 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ 1352 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1390 x86_pmu_format_group.attrs = x86_pmu.format_attrs; 1353 x86_pmu_format_group.attrs = x86_pmu.format_attrs;
1391 1354
@@ -1620,8 +1583,8 @@ static int x86_pmu_event_idx(struct perf_event *event)
1620 if (!x86_pmu.attr_rdpmc) 1583 if (!x86_pmu.attr_rdpmc)
1621 return 0; 1584 return 0;
1622 1585
1623 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { 1586 if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
1624 idx -= X86_PMC_IDX_FIXED; 1587 idx -= INTEL_PMC_IDX_FIXED;
1625 idx |= 1 << 30; 1588 idx |= 1 << 30;
1626 } 1589 }
1627 1590
@@ -1649,7 +1612,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
1649 struct device_attribute *attr, 1612 struct device_attribute *attr,
1650 const char *buf, size_t count) 1613 const char *buf, size_t count)
1651{ 1614{
1652 unsigned long val = simple_strtoul(buf, NULL, 0); 1615 unsigned long val;
1616 ssize_t ret;
1617
1618 ret = kstrtoul(buf, 0, &val);
1619 if (ret)
1620 return ret;
1653 1621
1654 if (!!val != !!x86_pmu.attr_rdpmc) { 1622 if (!!val != !!x86_pmu.attr_rdpmc) {
1655 x86_pmu.attr_rdpmc = !!val; 1623 x86_pmu.attr_rdpmc = !!val;
@@ -1682,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void)
1682 x86_pmu.flush_branch_stack(); 1650 x86_pmu.flush_branch_stack();
1683} 1651}
1684 1652
1653void perf_check_microcode(void)
1654{
1655 if (x86_pmu.check_microcode)
1656 x86_pmu.check_microcode();
1657}
1658EXPORT_SYMBOL_GPL(perf_check_microcode);
1659
1685static struct pmu pmu = { 1660static struct pmu pmu = {
1686 .pmu_enable = x86_pmu_enable, 1661 .pmu_enable = x86_pmu_enable,
1687 .pmu_disable = x86_pmu_disable, 1662 .pmu_disable = x86_pmu_disable,
1688 1663
1689 .attr_groups = x86_pmu_attr_groups, 1664 .attr_groups = x86_pmu_attr_groups,
1690 1665
1691 .event_init = x86_pmu_event_init, 1666 .event_init = x86_pmu_event_init,
1692 1667
1693 .add = x86_pmu_add, 1668 .add = x86_pmu_add,
1694 .del = x86_pmu_del, 1669 .del = x86_pmu_del,
@@ -1696,11 +1671,11 @@ static struct pmu pmu = {
1696 .stop = x86_pmu_stop, 1671 .stop = x86_pmu_stop,
1697 .read = x86_pmu_read, 1672 .read = x86_pmu_read,
1698 1673
1699 .start_txn = x86_pmu_start_txn, 1674 .start_txn = x86_pmu_start_txn,
1700 .cancel_txn = x86_pmu_cancel_txn, 1675 .cancel_txn = x86_pmu_cancel_txn,
1701 .commit_txn = x86_pmu_commit_txn, 1676 .commit_txn = x86_pmu_commit_txn,
1702 1677
1703 .event_idx = x86_pmu_event_idx, 1678 .event_idx = x86_pmu_event_idx,
1704 .flush_branch_stack = x86_pmu_flush_branch_stack, 1679 .flush_branch_stack = x86_pmu_flush_branch_stack,
1705}; 1680};
1706 1681
@@ -1863,7 +1838,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
1863 else 1838 else
1864 misc |= PERF_RECORD_MISC_GUEST_KERNEL; 1839 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1865 } else { 1840 } else {
1866 if (user_mode(regs)) 1841 if (!kernel_ip(regs->ip))
1867 misc |= PERF_RECORD_MISC_USER; 1842 misc |= PERF_RECORD_MISC_USER;
1868 else 1843 else
1869 misc |= PERF_RECORD_MISC_KERNEL; 1844 misc |= PERF_RECORD_MISC_KERNEL;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7241e2fc3c17..a15df4be151f 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -14,6 +14,18 @@
14 14
15#include <linux/perf_event.h> 15#include <linux/perf_event.h>
16 16
17#if 0
18#undef wrmsrl
19#define wrmsrl(msr, val) \
20do { \
21 unsigned int _msr = (msr); \
22 u64 _val = (val); \
23 trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr), \
24 (unsigned long long)(_val)); \
25 native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32)); \
26} while (0)
27#endif
28
17/* 29/*
18 * | NHM/WSM | SNB | 30 * | NHM/WSM | SNB |
19 * register ------------------------------- 31 * register -------------------------------
@@ -57,7 +69,7 @@ struct amd_nb {
57}; 69};
58 70
59/* The maximal number of PEBS events: */ 71/* The maximal number of PEBS events: */
60#define MAX_PEBS_EVENTS 4 72#define MAX_PEBS_EVENTS 8
61 73
62/* 74/*
63 * A debug store configuration. 75 * A debug store configuration.
@@ -349,6 +361,8 @@ struct x86_pmu {
349 void (*cpu_starting)(int cpu); 361 void (*cpu_starting)(int cpu);
350 void (*cpu_dying)(int cpu); 362 void (*cpu_dying)(int cpu);
351 void (*cpu_dead)(int cpu); 363 void (*cpu_dead)(int cpu);
364
365 void (*check_microcode)(void);
352 void (*flush_branch_stack)(void); 366 void (*flush_branch_stack)(void);
353 367
354 /* 368 /*
@@ -360,12 +374,16 @@ struct x86_pmu {
360 /* 374 /*
361 * Intel DebugStore bits 375 * Intel DebugStore bits
362 */ 376 */
363 int bts, pebs; 377 int bts :1,
364 int bts_active, pebs_active; 378 bts_active :1,
379 pebs :1,
380 pebs_active :1,
381 pebs_broken :1;
365 int pebs_record_size; 382 int pebs_record_size;
366 void (*drain_pebs)(struct pt_regs *regs); 383 void (*drain_pebs)(struct pt_regs *regs);
367 struct event_constraint *pebs_constraints; 384 struct event_constraint *pebs_constraints;
368 void (*pebs_aliases)(struct perf_event *event); 385 void (*pebs_aliases)(struct perf_event *event);
386 int max_pebs_events;
369 387
370 /* 388 /*
371 * Intel LBR 389 * Intel LBR
@@ -468,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
468 486
469void x86_pmu_enable_all(int added); 487void x86_pmu_enable_all(int added);
470 488
489int perf_assign_events(struct event_constraint **constraints, int n,
490 int wmin, int wmax, int *assign);
471int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); 491int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
472 492
473void x86_pmu_stop(struct perf_event *event, int flags); 493void x86_pmu_stop(struct perf_event *event, int flags);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 11a4eb9131d5..4528ae7b6ec4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu)
366 366
367 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; 367 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
368 368
369 if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) 369 if (boot_cpu_data.x86_max_cores < 2)
370 return; 370 return;
371 371
372 nb_id = amd_get_nb_id(cpu); 372 nb_id = amd_get_nb_id(cpu);
@@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = {
422 NULL, 422 NULL,
423}; 423};
424 424
425static __initconst const struct x86_pmu amd_pmu = {
426 .name = "AMD",
427 .handle_irq = x86_pmu_handle_irq,
428 .disable_all = x86_pmu_disable_all,
429 .enable_all = x86_pmu_enable_all,
430 .enable = x86_pmu_enable_event,
431 .disable = x86_pmu_disable_event,
432 .hw_config = amd_pmu_hw_config,
433 .schedule_events = x86_schedule_events,
434 .eventsel = MSR_K7_EVNTSEL0,
435 .perfctr = MSR_K7_PERFCTR0,
436 .event_map = amd_pmu_event_map,
437 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
438 .num_counters = AMD64_NUM_COUNTERS,
439 .cntval_bits = 48,
440 .cntval_mask = (1ULL << 48) - 1,
441 .apic = 1,
442 /* use highest bit to detect overflow */
443 .max_period = (1ULL << 47) - 1,
444 .get_event_constraints = amd_get_event_constraints,
445 .put_event_constraints = amd_put_event_constraints,
446
447 .format_attrs = amd_format_attr,
448
449 .cpu_prepare = amd_pmu_cpu_prepare,
450 .cpu_starting = amd_pmu_cpu_starting,
451 .cpu_dead = amd_pmu_cpu_dead,
452};
453
454/* AMD Family 15h */ 425/* AMD Family 15h */
455 426
456#define AMD_EVENT_TYPE_MASK 0x000000F0ULL 427#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
@@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
597 } 568 }
598} 569}
599 570
600static __initconst const struct x86_pmu amd_pmu_f15h = { 571static __initconst const struct x86_pmu amd_pmu = {
601 .name = "AMD Family 15h", 572 .name = "AMD",
602 .handle_irq = x86_pmu_handle_irq, 573 .handle_irq = x86_pmu_handle_irq,
603 .disable_all = x86_pmu_disable_all, 574 .disable_all = x86_pmu_disable_all,
604 .enable_all = x86_pmu_enable_all, 575 .enable_all = x86_pmu_enable_all,
@@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
606 .disable = x86_pmu_disable_event, 577 .disable = x86_pmu_disable_event,
607 .hw_config = amd_pmu_hw_config, 578 .hw_config = amd_pmu_hw_config,
608 .schedule_events = x86_schedule_events, 579 .schedule_events = x86_schedule_events,
609 .eventsel = MSR_F15H_PERF_CTL, 580 .eventsel = MSR_K7_EVNTSEL0,
610 .perfctr = MSR_F15H_PERF_CTR, 581 .perfctr = MSR_K7_PERFCTR0,
611 .event_map = amd_pmu_event_map, 582 .event_map = amd_pmu_event_map,
612 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 583 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
613 .num_counters = AMD64_NUM_COUNTERS_F15H, 584 .num_counters = AMD64_NUM_COUNTERS,
614 .cntval_bits = 48, 585 .cntval_bits = 48,
615 .cntval_mask = (1ULL << 48) - 1, 586 .cntval_mask = (1ULL << 48) - 1,
616 .apic = 1, 587 .apic = 1,
617 /* use highest bit to detect overflow */ 588 /* use highest bit to detect overflow */
618 .max_period = (1ULL << 47) - 1, 589 .max_period = (1ULL << 47) - 1,
619 .get_event_constraints = amd_get_event_constraints_f15h, 590 .get_event_constraints = amd_get_event_constraints,
620 /* nortbridge counters not yet implemented: */
621#if 0
622 .put_event_constraints = amd_put_event_constraints, 591 .put_event_constraints = amd_put_event_constraints,
623 592
593 .format_attrs = amd_format_attr,
594
624 .cpu_prepare = amd_pmu_cpu_prepare, 595 .cpu_prepare = amd_pmu_cpu_prepare,
625 .cpu_dead = amd_pmu_cpu_dead,
626#endif
627 .cpu_starting = amd_pmu_cpu_starting, 596 .cpu_starting = amd_pmu_cpu_starting,
628 .format_attrs = amd_format_attr, 597 .cpu_dead = amd_pmu_cpu_dead,
629}; 598};
630 599
600static int setup_event_constraints(void)
601{
602 if (boot_cpu_data.x86 >= 0x15)
603 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
604 return 0;
605}
606
607static int setup_perfctr_core(void)
608{
609 if (!cpu_has_perfctr_core) {
610 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
611 KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
612 return -ENODEV;
613 }
614
615 WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
616 KERN_ERR "hw perf events core counters need constraints handler!");
617
618 /*
619 * If core performance counter extensions exists, we must use
620 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
621 * x86_pmu_addr_offset().
622 */
623 x86_pmu.eventsel = MSR_F15H_PERF_CTL;
624 x86_pmu.perfctr = MSR_F15H_PERF_CTR;
625 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
626
627 printk(KERN_INFO "perf: AMD core performance counters detected\n");
628
629 return 0;
630}
631
631__init int amd_pmu_init(void) 632__init int amd_pmu_init(void)
632{ 633{
633 /* Performance-monitoring supported from K7 and later: */ 634 /* Performance-monitoring supported from K7 and later: */
634 if (boot_cpu_data.x86 < 6) 635 if (boot_cpu_data.x86 < 6)
635 return -ENODEV; 636 return -ENODEV;
636 637
637 /* 638 x86_pmu = amd_pmu;
638 * If core performance counter extensions exists, it must be 639
639 * family 15h, otherwise fail. See x86_pmu_addr_offset(). 640 setup_event_constraints();
640 */ 641 setup_perfctr_core();
641 switch (boot_cpu_data.x86) {
642 case 0x15:
643 if (!cpu_has_perfctr_core)
644 return -ENODEV;
645 x86_pmu = amd_pmu_f15h;
646 break;
647 default:
648 if (cpu_has_perfctr_core)
649 return -ENODEV;
650 x86_pmu = amd_pmu;
651 break;
652 }
653 642
654 /* Events are common for all AMDs */ 643 /* Events are common for all AMDs */
655 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 644 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 187c294bc658..7a8b9d0abcaa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -5,6 +5,8 @@
5 * among events on a single PMU. 5 * among events on a single PMU.
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
8#include <linux/stddef.h> 10#include <linux/stddef.h>
9#include <linux/types.h> 11#include <linux/types.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -21,14 +23,14 @@
21 */ 23 */
22static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = 24static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
23{ 25{
24 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, 26 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
25 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 27 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
26 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, 28 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
27 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, 29 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
28 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 30 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
29 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 31 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
30 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, 32 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
31 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ 33 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */
32}; 34};
33 35
34static struct event_constraint intel_core_event_constraints[] __read_mostly = 36static struct event_constraint intel_core_event_constraints[] __read_mostly =
@@ -747,7 +749,7 @@ static void intel_pmu_disable_all(void)
747 749
748 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 750 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
749 751
750 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 752 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
751 intel_pmu_disable_bts(); 753 intel_pmu_disable_bts();
752 754
753 intel_pmu_pebs_disable_all(); 755 intel_pmu_pebs_disable_all();
@@ -763,9 +765,9 @@ static void intel_pmu_enable_all(int added)
763 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 765 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
764 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); 766 x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
765 767
766 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 768 if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
767 struct perf_event *event = 769 struct perf_event *event =
768 cpuc->events[X86_PMC_IDX_FIXED_BTS]; 770 cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
769 771
770 if (WARN_ON_ONCE(!event)) 772 if (WARN_ON_ONCE(!event))
771 return; 773 return;
@@ -871,7 +873,7 @@ static inline void intel_pmu_ack_status(u64 ack)
871 873
872static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) 874static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
873{ 875{
874 int idx = hwc->idx - X86_PMC_IDX_FIXED; 876 int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
875 u64 ctrl_val, mask; 877 u64 ctrl_val, mask;
876 878
877 mask = 0xfULL << (idx * 4); 879 mask = 0xfULL << (idx * 4);
@@ -886,7 +888,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
886 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
887 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 889 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
888 890
889 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 891 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
890 intel_pmu_disable_bts(); 892 intel_pmu_disable_bts();
891 intel_pmu_drain_bts_buffer(); 893 intel_pmu_drain_bts_buffer();
892 return; 894 return;
@@ -915,7 +917,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
915 917
916static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) 918static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
917{ 919{
918 int idx = hwc->idx - X86_PMC_IDX_FIXED; 920 int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
919 u64 ctrl_val, bits, mask; 921 u64 ctrl_val, bits, mask;
920 922
921 /* 923 /*
@@ -949,7 +951,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
949 struct hw_perf_event *hwc = &event->hw; 951 struct hw_perf_event *hwc = &event->hw;
950 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 952 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
951 953
952 if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { 954 if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
953 if (!__this_cpu_read(cpu_hw_events.enabled)) 955 if (!__this_cpu_read(cpu_hw_events.enabled))
954 return; 956 return;
955 957
@@ -1000,14 +1002,14 @@ static void intel_pmu_reset(void)
1000 1002
1001 local_irq_save(flags); 1003 local_irq_save(flags);
1002 1004
1003 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 1005 pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
1004 1006
1005 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1007 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1006 checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); 1008 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
1007 checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); 1009 wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
1008 } 1010 }
1009 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) 1011 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
1010 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 1012 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1011 1013
1012 if (ds) 1014 if (ds)
1013 ds->bts_index = ds->bts_buffer_base; 1015 ds->bts_index = ds->bts_buffer_base;
@@ -1707,16 +1709,61 @@ static __init void intel_clovertown_quirk(void)
1707 * But taken together it might just make sense to not enable PEBS on 1709 * But taken together it might just make sense to not enable PEBS on
1708 * these chips. 1710 * these chips.
1709 */ 1711 */
1710 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1712 pr_warn("PEBS disabled due to CPU errata\n");
1711 x86_pmu.pebs = 0; 1713 x86_pmu.pebs = 0;
1712 x86_pmu.pebs_constraints = NULL; 1714 x86_pmu.pebs_constraints = NULL;
1713} 1715}
1714 1716
1717static int intel_snb_pebs_broken(int cpu)
1718{
1719 u32 rev = UINT_MAX; /* default to broken for unknown models */
1720
1721 switch (cpu_data(cpu).x86_model) {
1722 case 42: /* SNB */
1723 rev = 0x28;
1724 break;
1725
1726 case 45: /* SNB-EP */
1727 switch (cpu_data(cpu).x86_mask) {
1728 case 6: rev = 0x618; break;
1729 case 7: rev = 0x70c; break;
1730 }
1731 }
1732
1733 return (cpu_data(cpu).microcode < rev);
1734}
1735
1736static void intel_snb_check_microcode(void)
1737{
1738 int pebs_broken = 0;
1739 int cpu;
1740
1741 get_online_cpus();
1742 for_each_online_cpu(cpu) {
1743 if ((pebs_broken = intel_snb_pebs_broken(cpu)))
1744 break;
1745 }
1746 put_online_cpus();
1747
1748 if (pebs_broken == x86_pmu.pebs_broken)
1749 return;
1750
1751 /*
1752 * Serialized by the microcode lock..
1753 */
1754 if (x86_pmu.pebs_broken) {
1755 pr_info("PEBS enabled due to microcode update\n");
1756 x86_pmu.pebs_broken = 0;
1757 } else {
1758 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
1759 x86_pmu.pebs_broken = 1;
1760 }
1761}
1762
1715static __init void intel_sandybridge_quirk(void) 1763static __init void intel_sandybridge_quirk(void)
1716{ 1764{
1717 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); 1765 x86_pmu.check_microcode = intel_snb_check_microcode;
1718 x86_pmu.pebs = 0; 1766 intel_snb_check_microcode();
1719 x86_pmu.pebs_constraints = NULL;
1720} 1767}
1721 1768
1722static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { 1769static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
@@ -1736,8 +1783,8 @@ static __init void intel_arch_events_quirk(void)
1736 /* disable event that reported as not presend by cpuid */ 1783 /* disable event that reported as not presend by cpuid */
1737 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { 1784 for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
1738 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; 1785 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
1739 printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", 1786 pr_warn("CPUID marked event: \'%s\' unavailable\n",
1740 intel_arch_events_map[bit].name); 1787 intel_arch_events_map[bit].name);
1741 } 1788 }
1742} 1789}
1743 1790
@@ -1756,7 +1803,7 @@ static __init void intel_nehalem_quirk(void)
1756 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; 1803 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
1757 ebx.split.no_branch_misses_retired = 0; 1804 ebx.split.no_branch_misses_retired = 0;
1758 x86_pmu.events_maskl = ebx.full; 1805 x86_pmu.events_maskl = ebx.full;
1759 printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); 1806 pr_info("CPU erratum AAJ80 worked around\n");
1760 } 1807 }
1761} 1808}
1762 1809
@@ -1765,6 +1812,7 @@ __init int intel_pmu_init(void)
1765 union cpuid10_edx edx; 1812 union cpuid10_edx edx;
1766 union cpuid10_eax eax; 1813 union cpuid10_eax eax;
1767 union cpuid10_ebx ebx; 1814 union cpuid10_ebx ebx;
1815 struct event_constraint *c;
1768 unsigned int unused; 1816 unsigned int unused;
1769 int version; 1817 int version;
1770 1818
@@ -1800,6 +1848,8 @@ __init int intel_pmu_init(void)
1800 x86_pmu.events_maskl = ebx.full; 1848 x86_pmu.events_maskl = ebx.full;
1801 x86_pmu.events_mask_len = eax.split.mask_length; 1849 x86_pmu.events_mask_len = eax.split.mask_length;
1802 1850
1851 x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
1852
1803 /* 1853 /*
1804 * Quirk: v2 perfmon does not report fixed-purpose events, so 1854 * Quirk: v2 perfmon does not report fixed-purpose events, so
1805 * assume at least 3 events: 1855 * assume at least 3 events:
@@ -1951,5 +2001,37 @@ __init int intel_pmu_init(void)
1951 } 2001 }
1952 } 2002 }
1953 2003
2004 if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
2005 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2006 x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
2007 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
2008 }
2009 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
2010
2011 if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
2012 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2013 x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
2014 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
2015 }
2016
2017 x86_pmu.intel_ctrl |=
2018 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
2019
2020 if (x86_pmu.event_constraints) {
2021 /*
2022 * event on fixed counter2 (REF_CYCLES) only works on this
2023 * counter, so do not extend mask to generic counters
2024 */
2025 for_each_event_constraint(c, x86_pmu.event_constraints) {
2026 if (c->cmask != X86_RAW_EVENT_MASK
2027 || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
2028 continue;
2029 }
2030
2031 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
2032 c->weight += x86_pmu.num_counters;
2033 }
2034 }
2035
1954 return 0; 2036 return 0;
1955} 2037}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35e2192df9f4..629ae0b7ad90 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -248,7 +248,7 @@ void reserve_ds_buffers(void)
248 */ 248 */
249 249
250struct event_constraint bts_constraint = 250struct event_constraint bts_constraint =
251 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); 251 EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);
252 252
253void intel_pmu_enable_bts(u64 config) 253void intel_pmu_enable_bts(u64 config)
254{ 254{
@@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void)
295 u64 to; 295 u64 to;
296 u64 flags; 296 u64 flags;
297 }; 297 };
298 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; 298 struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
299 struct bts_record *at, *top; 299 struct bts_record *at, *top;
300 struct perf_output_handle handle; 300 struct perf_output_handle handle;
301 struct perf_event_header header; 301 struct perf_event_header header;
@@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
620 * Should not happen, we program the threshold at 1 and do not 620 * Should not happen, we program the threshold at 1 and do not
621 * set a reset value. 621 * set a reset value.
622 */ 622 */
623 WARN_ON_ONCE(n > 1); 623 WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
624 at += n - 1; 624 at += n - 1;
625 625
626 __intel_pmu_pebs_event(event, iregs, at); 626 __intel_pmu_pebs_event(event, iregs, at);
@@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
651 * Should not happen, we program the threshold at 1 and do not 651 * Should not happen, we program the threshold at 1 and do not
652 * set a reset value. 652 * set a reset value.
653 */ 653 */
654 WARN_ON_ONCE(n > MAX_PEBS_EVENTS); 654 WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
655 655
656 for ( ; at < top; at++) { 656 for ( ; at < top; at++) {
657 for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { 657 for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
658 event = cpuc->events[bit]; 658 event = cpuc->events[bit];
659 if (!test_bit(bit, cpuc->active_mask)) 659 if (!test_bit(bit, cpuc->active_mask))
660 continue; 660 continue;
@@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
670 break; 670 break;
671 } 671 }
672 672
673 if (!event || bit >= MAX_PEBS_EVENTS) 673 if (!event || bit >= x86_pmu.max_pebs_events)
674 continue; 674 continue;
675 675
676 __intel_pmu_pebs_event(event, iregs, at); 676 __intel_pmu_pebs_event(event, iregs, at);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
new file mode 100644
index 000000000000..19faffc60886
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -0,0 +1,1850 @@
1#include "perf_event_intel_uncore.h"
2
3static struct intel_uncore_type *empty_uncore[] = { NULL, };
4static struct intel_uncore_type **msr_uncores = empty_uncore;
5static struct intel_uncore_type **pci_uncores = empty_uncore;
6/* pci bus to socket mapping */
7static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
8
9static DEFINE_RAW_SPINLOCK(uncore_box_lock);
10
11/* mask of cpus that collect uncore events */
12static cpumask_t uncore_cpu_mask;
13
14/* constraint for the fixed counter */
15static struct event_constraint constraint_fixed =
16 EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
17static struct event_constraint constraint_empty =
18 EVENT_CONSTRAINT(0, 0, 0);
19
20DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
21DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
22DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
23DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
24DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
25DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28");
26DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31");
27DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
28DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
29DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15");
30DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
31DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
32DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
33DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
34DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
35DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
36DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7");
37DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15");
38DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23");
39DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31");
40
41/* Sandy Bridge-EP uncore support */
42static struct intel_uncore_type snbep_uncore_cbox;
43static struct intel_uncore_type snbep_uncore_pcu;
44
45static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
46{
47 struct pci_dev *pdev = box->pci_dev;
48 int box_ctl = uncore_pci_box_ctl(box);
49 u32 config;
50
51 pci_read_config_dword(pdev, box_ctl, &config);
52 config |= SNBEP_PMON_BOX_CTL_FRZ;
53 pci_write_config_dword(pdev, box_ctl, config);
54}
55
56static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
57{
58 struct pci_dev *pdev = box->pci_dev;
59 int box_ctl = uncore_pci_box_ctl(box);
60 u32 config;
61
62 pci_read_config_dword(pdev, box_ctl, &config);
63 config &= ~SNBEP_PMON_BOX_CTL_FRZ;
64 pci_write_config_dword(pdev, box_ctl, config);
65}
66
67static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box,
68 struct perf_event *event)
69{
70 struct pci_dev *pdev = box->pci_dev;
71 struct hw_perf_event *hwc = &event->hw;
72
73 pci_write_config_dword(pdev, hwc->config_base, hwc->config |
74 SNBEP_PMON_CTL_EN);
75}
76
77static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box,
78 struct perf_event *event)
79{
80 struct pci_dev *pdev = box->pci_dev;
81 struct hw_perf_event *hwc = &event->hw;
82
83 pci_write_config_dword(pdev, hwc->config_base, hwc->config);
84}
85
86static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box,
87 struct perf_event *event)
88{
89 struct pci_dev *pdev = box->pci_dev;
90 struct hw_perf_event *hwc = &event->hw;
91 u64 count;
92
93 pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
94 pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
95 return count;
96}
97
98static void snbep_uncore_pci_init_box(struct intel_uncore_box *box)
99{
100 struct pci_dev *pdev = box->pci_dev;
101 pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL,
102 SNBEP_PMON_BOX_CTL_INT);
103}
104
105static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box)
106{
107 u64 config;
108 unsigned msr;
109
110 msr = uncore_msr_box_ctl(box);
111 if (msr) {
112 rdmsrl(msr, config);
113 config |= SNBEP_PMON_BOX_CTL_FRZ;
114 wrmsrl(msr, config);
115 return;
116 }
117}
118
119static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box)
120{
121 u64 config;
122 unsigned msr;
123
124 msr = uncore_msr_box_ctl(box);
125 if (msr) {
126 rdmsrl(msr, config);
127 config &= ~SNBEP_PMON_BOX_CTL_FRZ;
128 wrmsrl(msr, config);
129 return;
130 }
131}
132
133static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box,
134 struct perf_event *event)
135{
136 struct hw_perf_event *hwc = &event->hw;
137 struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
138
139 if (reg1->idx != EXTRA_REG_NONE)
140 wrmsrl(reg1->reg, reg1->config);
141
142 wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
143}
144
145static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box,
146 struct perf_event *event)
147{
148 struct hw_perf_event *hwc = &event->hw;
149
150 wrmsrl(hwc->config_base, hwc->config);
151}
152
153static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box,
154 struct perf_event *event)
155{
156 struct hw_perf_event *hwc = &event->hw;
157 u64 count;
158
159 rdmsrl(hwc->event_base, count);
160 return count;
161}
162
163static void snbep_uncore_msr_init_box(struct intel_uncore_box *box)
164{
165 unsigned msr = uncore_msr_box_ctl(box);
166 if (msr)
167 wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT);
168}
169
170static struct event_constraint *
171snbep_uncore_get_constraint(struct intel_uncore_box *box,
172 struct perf_event *event)
173{
174 struct intel_uncore_extra_reg *er;
175 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
176 unsigned long flags;
177 bool ok = false;
178
179 if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc))
180 return NULL;
181
182 er = &box->shared_regs[reg1->idx];
183 raw_spin_lock_irqsave(&er->lock, flags);
184 if (!atomic_read(&er->ref) || er->config1 == reg1->config) {
185 atomic_inc(&er->ref);
186 er->config1 = reg1->config;
187 ok = true;
188 }
189 raw_spin_unlock_irqrestore(&er->lock, flags);
190
191 if (ok) {
192 if (box->phys_id >= 0)
193 reg1->alloc = 1;
194 return NULL;
195 }
196 return &constraint_empty;
197}
198
199static void snbep_uncore_put_constraint(struct intel_uncore_box *box,
200 struct perf_event *event)
201{
202 struct intel_uncore_extra_reg *er;
203 struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
204
205 if (box->phys_id < 0 || !reg1->alloc)
206 return;
207
208 er = &box->shared_regs[reg1->idx];
209 atomic_dec(&er->ref);
210 reg1->alloc = 0;
211}
212
213static int snbep_uncore_hw_config(struct intel_uncore_box *box,
214 struct perf_event *event)
215{
216 struct hw_perf_event *hwc = &event->hw;
217 struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
218
219 if (box->pmu->type == &snbep_uncore_cbox) {
220 reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER +
221 SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
222 reg1->config = event->attr.config1 &
223 SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK;
224 } else if (box->pmu->type == &snbep_uncore_pcu) {
225 reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER;
226 reg1->config = event->attr.config1 &
227 SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK;
228 } else {
229 return 0;
230 }
231 reg1->idx = 0;
232 return 0;
233}
234
235static struct attribute *snbep_uncore_formats_attr[] = {
236 &format_attr_event.attr,
237 &format_attr_umask.attr,
238 &format_attr_edge.attr,
239 &format_attr_inv.attr,
240 &format_attr_thresh8.attr,
241 NULL,
242};
243
244static struct attribute *snbep_uncore_ubox_formats_attr[] = {
245 &format_attr_event.attr,
246 &format_attr_umask.attr,
247 &format_attr_edge.attr,
248 &format_attr_inv.attr,
249 &format_attr_thresh5.attr,
250 NULL,
251};
252
253static struct attribute *snbep_uncore_cbox_formats_attr[] = {
254 &format_attr_event.attr,
255 &format_attr_umask.attr,
256 &format_attr_edge.attr,
257 &format_attr_tid_en.attr,
258 &format_attr_inv.attr,
259 &format_attr_thresh8.attr,
260 &format_attr_filter_tid.attr,
261 &format_attr_filter_nid.attr,
262 &format_attr_filter_state.attr,
263 &format_attr_filter_opc.attr,
264 NULL,
265};
266
267static struct attribute *snbep_uncore_pcu_formats_attr[] = {
268 &format_attr_event.attr,
269 &format_attr_occ_sel.attr,
270 &format_attr_edge.attr,
271 &format_attr_inv.attr,
272 &format_attr_thresh5.attr,
273 &format_attr_occ_invert.attr,
274 &format_attr_occ_edge.attr,
275 &format_attr_filter_brand0.attr,
276 &format_attr_filter_brand1.attr,
277 &format_attr_filter_brand2.attr,
278 &format_attr_filter_brand3.attr,
279 NULL,
280};
281
282static struct uncore_event_desc snbep_uncore_imc_events[] = {
283 INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
284 INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x03"),
285 INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"),
286 { /* end: all zeroes */ },
287};
288
289static struct uncore_event_desc snbep_uncore_qpi_events[] = {
290 INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
291 INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
292 INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"),
293 INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"),
294 { /* end: all zeroes */ },
295};
296
297static struct attribute_group snbep_uncore_format_group = {
298 .name = "format",
299 .attrs = snbep_uncore_formats_attr,
300};
301
302static struct attribute_group snbep_uncore_ubox_format_group = {
303 .name = "format",
304 .attrs = snbep_uncore_ubox_formats_attr,
305};
306
307static struct attribute_group snbep_uncore_cbox_format_group = {
308 .name = "format",
309 .attrs = snbep_uncore_cbox_formats_attr,
310};
311
312static struct attribute_group snbep_uncore_pcu_format_group = {
313 .name = "format",
314 .attrs = snbep_uncore_pcu_formats_attr,
315};
316
317static struct intel_uncore_ops snbep_uncore_msr_ops = {
318 .init_box = snbep_uncore_msr_init_box,
319 .disable_box = snbep_uncore_msr_disable_box,
320 .enable_box = snbep_uncore_msr_enable_box,
321 .disable_event = snbep_uncore_msr_disable_event,
322 .enable_event = snbep_uncore_msr_enable_event,
323 .read_counter = snbep_uncore_msr_read_counter,
324 .get_constraint = snbep_uncore_get_constraint,
325 .put_constraint = snbep_uncore_put_constraint,
326 .hw_config = snbep_uncore_hw_config,
327};
328
329static struct intel_uncore_ops snbep_uncore_pci_ops = {
330 .init_box = snbep_uncore_pci_init_box,
331 .disable_box = snbep_uncore_pci_disable_box,
332 .enable_box = snbep_uncore_pci_enable_box,
333 .disable_event = snbep_uncore_pci_disable_event,
334 .enable_event = snbep_uncore_pci_enable_event,
335 .read_counter = snbep_uncore_pci_read_counter,
336};
337
338static struct event_constraint snbep_uncore_cbox_constraints[] = {
339 UNCORE_EVENT_CONSTRAINT(0x01, 0x1),
340 UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
341 UNCORE_EVENT_CONSTRAINT(0x04, 0x3),
342 UNCORE_EVENT_CONSTRAINT(0x05, 0x3),
343 UNCORE_EVENT_CONSTRAINT(0x07, 0x3),
344 UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
345 UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
346 UNCORE_EVENT_CONSTRAINT(0x13, 0x3),
347 UNCORE_EVENT_CONSTRAINT(0x1b, 0xc),
348 UNCORE_EVENT_CONSTRAINT(0x1c, 0xc),
349 UNCORE_EVENT_CONSTRAINT(0x1d, 0xc),
350 UNCORE_EVENT_CONSTRAINT(0x1e, 0xc),
351 EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff),
352 UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
353 UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
354 UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
355 UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
356 UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
357 UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
358 UNCORE_EVENT_CONSTRAINT(0x35, 0x3),
359 UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
360 UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
361 UNCORE_EVENT_CONSTRAINT(0x38, 0x3),
362 UNCORE_EVENT_CONSTRAINT(0x39, 0x3),
363 UNCORE_EVENT_CONSTRAINT(0x3b, 0x1),
364 EVENT_CONSTRAINT_END
365};
366
367static struct event_constraint snbep_uncore_r2pcie_constraints[] = {
368 UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
369 UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
370 UNCORE_EVENT_CONSTRAINT(0x12, 0x1),
371 UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
372 UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
373 UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
374 UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
375 UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
376 UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
377 UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
378 EVENT_CONSTRAINT_END
379};
380
381static struct event_constraint snbep_uncore_r3qpi_constraints[] = {
382 UNCORE_EVENT_CONSTRAINT(0x10, 0x3),
383 UNCORE_EVENT_CONSTRAINT(0x11, 0x3),
384 UNCORE_EVENT_CONSTRAINT(0x12, 0x3),
385 UNCORE_EVENT_CONSTRAINT(0x13, 0x1),
386 UNCORE_EVENT_CONSTRAINT(0x20, 0x3),
387 UNCORE_EVENT_CONSTRAINT(0x21, 0x3),
388 UNCORE_EVENT_CONSTRAINT(0x22, 0x3),
389 UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
390 UNCORE_EVENT_CONSTRAINT(0x24, 0x3),
391 UNCORE_EVENT_CONSTRAINT(0x25, 0x3),
392 UNCORE_EVENT_CONSTRAINT(0x26, 0x3),
393 UNCORE_EVENT_CONSTRAINT(0x30, 0x3),
394 UNCORE_EVENT_CONSTRAINT(0x31, 0x3),
395 UNCORE_EVENT_CONSTRAINT(0x32, 0x3),
396 UNCORE_EVENT_CONSTRAINT(0x33, 0x3),
397 UNCORE_EVENT_CONSTRAINT(0x34, 0x3),
398 UNCORE_EVENT_CONSTRAINT(0x36, 0x3),
399 UNCORE_EVENT_CONSTRAINT(0x37, 0x3),
400 EVENT_CONSTRAINT_END
401};
402
403static struct intel_uncore_type snbep_uncore_ubox = {
404 .name = "ubox",
405 .num_counters = 2,
406 .num_boxes = 1,
407 .perf_ctr_bits = 44,
408 .fixed_ctr_bits = 48,
409 .perf_ctr = SNBEP_U_MSR_PMON_CTR0,
410 .event_ctl = SNBEP_U_MSR_PMON_CTL0,
411 .event_mask = SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
412 .fixed_ctr = SNBEP_U_MSR_PMON_UCLK_FIXED_CTR,
413 .fixed_ctl = SNBEP_U_MSR_PMON_UCLK_FIXED_CTL,
414 .ops = &snbep_uncore_msr_ops,
415 .format_group = &snbep_uncore_ubox_format_group,
416};
417
418static struct intel_uncore_type snbep_uncore_cbox = {
419 .name = "cbox",
420 .num_counters = 4,
421 .num_boxes = 8,
422 .perf_ctr_bits = 44,
423 .event_ctl = SNBEP_C0_MSR_PMON_CTL0,
424 .perf_ctr = SNBEP_C0_MSR_PMON_CTR0,
425 .event_mask = SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK,
426 .box_ctl = SNBEP_C0_MSR_PMON_BOX_CTL,
427 .msr_offset = SNBEP_CBO_MSR_OFFSET,
428 .num_shared_regs = 1,
429 .constraints = snbep_uncore_cbox_constraints,
430 .ops = &snbep_uncore_msr_ops,
431 .format_group = &snbep_uncore_cbox_format_group,
432};
433
434static struct intel_uncore_type snbep_uncore_pcu = {
435 .name = "pcu",
436 .num_counters = 4,
437 .num_boxes = 1,
438 .perf_ctr_bits = 48,
439 .perf_ctr = SNBEP_PCU_MSR_PMON_CTR0,
440 .event_ctl = SNBEP_PCU_MSR_PMON_CTL0,
441 .event_mask = SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
442 .box_ctl = SNBEP_PCU_MSR_PMON_BOX_CTL,
443 .num_shared_regs = 1,
444 .ops = &snbep_uncore_msr_ops,
445 .format_group = &snbep_uncore_pcu_format_group,
446};
447
448static struct intel_uncore_type *snbep_msr_uncores[] = {
449 &snbep_uncore_ubox,
450 &snbep_uncore_cbox,
451 &snbep_uncore_pcu,
452 NULL,
453};
454
455#define SNBEP_UNCORE_PCI_COMMON_INIT() \
456 .perf_ctr = SNBEP_PCI_PMON_CTR0, \
457 .event_ctl = SNBEP_PCI_PMON_CTL0, \
458 .event_mask = SNBEP_PMON_RAW_EVENT_MASK, \
459 .box_ctl = SNBEP_PCI_PMON_BOX_CTL, \
460 .ops = &snbep_uncore_pci_ops, \
461 .format_group = &snbep_uncore_format_group
462
463static struct intel_uncore_type snbep_uncore_ha = {
464 .name = "ha",
465 .num_counters = 4,
466 .num_boxes = 1,
467 .perf_ctr_bits = 48,
468 SNBEP_UNCORE_PCI_COMMON_INIT(),
469};
470
471static struct intel_uncore_type snbep_uncore_imc = {
472 .name = "imc",
473 .num_counters = 4,
474 .num_boxes = 4,
475 .perf_ctr_bits = 48,
476 .fixed_ctr_bits = 48,
477 .fixed_ctr = SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
478 .fixed_ctl = SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
479 .event_descs = snbep_uncore_imc_events,
480 SNBEP_UNCORE_PCI_COMMON_INIT(),
481};
482
483static struct intel_uncore_type snbep_uncore_qpi = {
484 .name = "qpi",
485 .num_counters = 4,
486 .num_boxes = 2,
487 .perf_ctr_bits = 48,
488 .event_descs = snbep_uncore_qpi_events,
489 SNBEP_UNCORE_PCI_COMMON_INIT(),
490};
491
492
493static struct intel_uncore_type snbep_uncore_r2pcie = {
494 .name = "r2pcie",
495 .num_counters = 4,
496 .num_boxes = 1,
497 .perf_ctr_bits = 44,
498 .constraints = snbep_uncore_r2pcie_constraints,
499 SNBEP_UNCORE_PCI_COMMON_INIT(),
500};
501
502static struct intel_uncore_type snbep_uncore_r3qpi = {
503 .name = "r3qpi",
504 .num_counters = 3,
505 .num_boxes = 2,
506 .perf_ctr_bits = 44,
507 .constraints = snbep_uncore_r3qpi_constraints,
508 SNBEP_UNCORE_PCI_COMMON_INIT(),
509};
510
511static struct intel_uncore_type *snbep_pci_uncores[] = {
512 &snbep_uncore_ha,
513 &snbep_uncore_imc,
514 &snbep_uncore_qpi,
515 &snbep_uncore_r2pcie,
516 &snbep_uncore_r3qpi,
517 NULL,
518};
519
520static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
521 { /* Home Agent */
522 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
523 .driver_data = (unsigned long)&snbep_uncore_ha,
524 },
525 { /* MC Channel 0 */
526 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
527 .driver_data = (unsigned long)&snbep_uncore_imc,
528 },
529 { /* MC Channel 1 */
530 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
531 .driver_data = (unsigned long)&snbep_uncore_imc,
532 },
533 { /* MC Channel 2 */
534 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
535 .driver_data = (unsigned long)&snbep_uncore_imc,
536 },
537 { /* MC Channel 3 */
538 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
539 .driver_data = (unsigned long)&snbep_uncore_imc,
540 },
541 { /* QPI Port 0 */
542 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
543 .driver_data = (unsigned long)&snbep_uncore_qpi,
544 },
545 { /* QPI Port 1 */
546 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
547 .driver_data = (unsigned long)&snbep_uncore_qpi,
548 },
549 { /* P2PCIe */
550 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
551 .driver_data = (unsigned long)&snbep_uncore_r2pcie,
552 },
553 { /* R3QPI Link 0 */
554 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
555 .driver_data = (unsigned long)&snbep_uncore_r3qpi,
556 },
557 { /* R3QPI Link 1 */
558 PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
559 .driver_data = (unsigned long)&snbep_uncore_r3qpi,
560 },
561 { /* end: all zeroes */ }
562};
563
564static struct pci_driver snbep_uncore_pci_driver = {
565 .name = "snbep_uncore",
566 .id_table = snbep_uncore_pci_ids,
567};
568
569/*
570 * build pci bus to socket mapping
571 */
572static void snbep_pci2phy_map_init(void)
573{
574 struct pci_dev *ubox_dev = NULL;
575 int i, bus, nodeid;
576 u32 config;
577
578 while (1) {
579 /* find the UBOX device */
580 ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
581 PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX,
582 ubox_dev);
583 if (!ubox_dev)
584 break;
585 bus = ubox_dev->bus->number;
586 /* get the Node ID of the local register */
587 pci_read_config_dword(ubox_dev, 0x40, &config);
588 nodeid = config;
589 /* get the Node ID mapping */
590 pci_read_config_dword(ubox_dev, 0x54, &config);
591 /*
592 * every three bits in the Node ID mapping register maps
593 * to a particular node.
594 */
595 for (i = 0; i < 8; i++) {
596 if (nodeid == ((config >> (3 * i)) & 0x7)) {
597 pcibus_to_physid[bus] = i;
598 break;
599 }
600 }
601 };
602 return;
603}
604/* end of Sandy Bridge-EP uncore support */
605
606
607/* Sandy Bridge uncore support */
608static void snb_uncore_msr_enable_event(struct intel_uncore_box *box,
609 struct perf_event *event)
610{
611 struct hw_perf_event *hwc = &event->hw;
612
613 if (hwc->idx < UNCORE_PMC_IDX_FIXED)
614 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
615 else
616 wrmsrl(hwc->config_base, SNB_UNC_CTL_EN);
617}
618
619static void snb_uncore_msr_disable_event(struct intel_uncore_box *box,
620 struct perf_event *event)
621{
622 wrmsrl(event->hw.config_base, 0);
623}
624
625static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box,
626 struct perf_event *event)
627{
628 u64 count;
629 rdmsrl(event->hw.event_base, count);
630 return count;
631}
632
633static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
634{
635 if (box->pmu->pmu_idx == 0) {
636 wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
637 SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
638 }
639}
640
641static struct attribute *snb_uncore_formats_attr[] = {
642 &format_attr_event.attr,
643 &format_attr_umask.attr,
644 &format_attr_edge.attr,
645 &format_attr_inv.attr,
646 &format_attr_cmask5.attr,
647 NULL,
648};
649
650static struct attribute_group snb_uncore_format_group = {
651 .name = "format",
652 .attrs = snb_uncore_formats_attr,
653};
654
655static struct intel_uncore_ops snb_uncore_msr_ops = {
656 .init_box = snb_uncore_msr_init_box,
657 .disable_event = snb_uncore_msr_disable_event,
658 .enable_event = snb_uncore_msr_enable_event,
659 .read_counter = snb_uncore_msr_read_counter,
660};
661
662static struct event_constraint snb_uncore_cbox_constraints[] = {
663 UNCORE_EVENT_CONSTRAINT(0x80, 0x1),
664 UNCORE_EVENT_CONSTRAINT(0x83, 0x1),
665 EVENT_CONSTRAINT_END
666};
667
668static struct intel_uncore_type snb_uncore_cbox = {
669 .name = "cbox",
670 .num_counters = 2,
671 .num_boxes = 4,
672 .perf_ctr_bits = 44,
673 .fixed_ctr_bits = 48,
674 .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
675 .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
676 .fixed_ctr = SNB_UNC_FIXED_CTR,
677 .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
678 .single_fixed = 1,
679 .event_mask = SNB_UNC_RAW_EVENT_MASK,
680 .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
681 .constraints = snb_uncore_cbox_constraints,
682 .ops = &snb_uncore_msr_ops,
683 .format_group = &snb_uncore_format_group,
684};
685
686static struct intel_uncore_type *snb_msr_uncores[] = {
687 &snb_uncore_cbox,
688 NULL,
689};
690/* end of Sandy Bridge uncore support */
691
692/* Nehalem uncore support */
693static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box)
694{
695 wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0);
696}
697
698static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box)
699{
700 wrmsrl(NHM_UNC_PERF_GLOBAL_CTL,
701 NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC);
702}
703
704static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box,
705 struct perf_event *event)
706{
707 struct hw_perf_event *hwc = &event->hw;
708
709 if (hwc->idx < UNCORE_PMC_IDX_FIXED)
710 wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN);
711 else
712 wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN);
713}
714
715static struct attribute *nhm_uncore_formats_attr[] = {
716 &format_attr_event.attr,
717 &format_attr_umask.attr,
718 &format_attr_edge.attr,
719 &format_attr_inv.attr,
720 &format_attr_cmask8.attr,
721 NULL,
722};
723
724static struct attribute_group nhm_uncore_format_group = {
725 .name = "format",
726 .attrs = nhm_uncore_formats_attr,
727};
728
729static struct uncore_event_desc nhm_uncore_events[] = {
730 INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
731 INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any, "event=0x2f,umask=0x0f"),
732 INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any, "event=0x2c,umask=0x0f"),
733 INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads, "event=0x20,umask=0x01"),
734 INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes, "event=0x20,umask=0x02"),
735 INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads, "event=0x20,umask=0x04"),
736 INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"),
737 INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads, "event=0x20,umask=0x10"),
738 INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes, "event=0x20,umask=0x20"),
739 { /* end: all zeroes */ },
740};
741
742static struct intel_uncore_ops nhm_uncore_msr_ops = {
743 .disable_box = nhm_uncore_msr_disable_box,
744 .enable_box = nhm_uncore_msr_enable_box,
745 .disable_event = snb_uncore_msr_disable_event,
746 .enable_event = nhm_uncore_msr_enable_event,
747 .read_counter = snb_uncore_msr_read_counter,
748};
749
750static struct intel_uncore_type nhm_uncore = {
751 .name = "",
752 .num_counters = 8,
753 .num_boxes = 1,
754 .perf_ctr_bits = 48,
755 .fixed_ctr_bits = 48,
756 .event_ctl = NHM_UNC_PERFEVTSEL0,
757 .perf_ctr = NHM_UNC_UNCORE_PMC0,
758 .fixed_ctr = NHM_UNC_FIXED_CTR,
759 .fixed_ctl = NHM_UNC_FIXED_CTR_CTRL,
760 .event_mask = NHM_UNC_RAW_EVENT_MASK,
761 .event_descs = nhm_uncore_events,
762 .ops = &nhm_uncore_msr_ops,
763 .format_group = &nhm_uncore_format_group,
764};
765
766static struct intel_uncore_type *nhm_msr_uncores[] = {
767 &nhm_uncore,
768 NULL,
769};
770/* end of Nehalem uncore support */
771
772static void uncore_assign_hw_event(struct intel_uncore_box *box,
773 struct perf_event *event, int idx)
774{
775 struct hw_perf_event *hwc = &event->hw;
776
777 hwc->idx = idx;
778 hwc->last_tag = ++box->tags[idx];
779
780 if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
781 hwc->event_base = uncore_fixed_ctr(box);
782 hwc->config_base = uncore_fixed_ctl(box);
783 return;
784 }
785
786 hwc->config_base = uncore_event_ctl(box, hwc->idx);
787 hwc->event_base = uncore_perf_ctr(box, hwc->idx);
788}
789
790static void uncore_perf_event_update(struct intel_uncore_box *box,
791 struct perf_event *event)
792{
793 u64 prev_count, new_count, delta;
794 int shift;
795
796 if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
797 shift = 64 - uncore_fixed_ctr_bits(box);
798 else
799 shift = 64 - uncore_perf_ctr_bits(box);
800
801 /* the hrtimer might modify the previous event value */
802again:
803 prev_count = local64_read(&event->hw.prev_count);
804 new_count = uncore_read_counter(box, event);
805 if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
806 goto again;
807
808 delta = (new_count << shift) - (prev_count << shift);
809 delta >>= shift;
810
811 local64_add(delta, &event->count);
812}
813
814/*
815 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
816 * for SandyBridge. So we use hrtimer to periodically poll the counter
817 * to avoid overflow.
818 */
819static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
820{
821 struct intel_uncore_box *box;
822 unsigned long flags;
823 int bit;
824
825 box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
826 if (!box->n_active || box->cpu != smp_processor_id())
827 return HRTIMER_NORESTART;
828 /*
829 * disable local interrupt to prevent uncore_pmu_event_start/stop
830 * to interrupt the update process
831 */
832 local_irq_save(flags);
833
834 for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
835 uncore_perf_event_update(box, box->events[bit]);
836
837 local_irq_restore(flags);
838
839 hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL));
840 return HRTIMER_RESTART;
841}
842
843static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
844{
845 __hrtimer_start_range_ns(&box->hrtimer,
846 ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0,
847 HRTIMER_MODE_REL_PINNED, 0);
848}
849
850static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
851{
852 hrtimer_cancel(&box->hrtimer);
853}
854
855static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
856{
857 hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
858 box->hrtimer.function = uncore_pmu_hrtimer;
859}
860
861struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
862 int cpu)
863{
864 struct intel_uncore_box *box;
865 int i, size;
866
867 size = sizeof(*box) + type->num_shared_regs *
868 sizeof(struct intel_uncore_extra_reg);
869
870 box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
871 if (!box)
872 return NULL;
873
874 for (i = 0; i < type->num_shared_regs; i++)
875 raw_spin_lock_init(&box->shared_regs[i].lock);
876
877 uncore_pmu_init_hrtimer(box);
878 atomic_set(&box->refcnt, 1);
879 box->cpu = -1;
880 box->phys_id = -1;
881
882 return box;
883}
884
885static struct intel_uncore_box *
886uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
887{
888 static struct intel_uncore_box *box;
889
890 box = *per_cpu_ptr(pmu->box, cpu);
891 if (box)
892 return box;
893
894 raw_spin_lock(&uncore_box_lock);
895 list_for_each_entry(box, &pmu->box_list, list) {
896 if (box->phys_id == topology_physical_package_id(cpu)) {
897 atomic_inc(&box->refcnt);
898 *per_cpu_ptr(pmu->box, cpu) = box;
899 break;
900 }
901 }
902 raw_spin_unlock(&uncore_box_lock);
903
904 return *per_cpu_ptr(pmu->box, cpu);
905}
906
907static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
908{
909 return container_of(event->pmu, struct intel_uncore_pmu, pmu);
910}
911
912static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
913{
914 /*
915 * perf core schedules event on the basis of cpu, uncore events are
916 * collected by one of the cpus inside a physical package.
917 */
918 return uncore_pmu_to_box(uncore_event_to_pmu(event),
919 smp_processor_id());
920}
921
922static int uncore_collect_events(struct intel_uncore_box *box,
923 struct perf_event *leader, bool dogrp)
924{
925 struct perf_event *event;
926 int n, max_count;
927
928 max_count = box->pmu->type->num_counters;
929 if (box->pmu->type->fixed_ctl)
930 max_count++;
931
932 if (box->n_events >= max_count)
933 return -EINVAL;
934
935 n = box->n_events;
936 box->event_list[n] = leader;
937 n++;
938 if (!dogrp)
939 return n;
940
941 list_for_each_entry(event, &leader->sibling_list, group_entry) {
942 if (event->state <= PERF_EVENT_STATE_OFF)
943 continue;
944
945 if (n >= max_count)
946 return -EINVAL;
947
948 box->event_list[n] = event;
949 n++;
950 }
951 return n;
952}
953
954static struct event_constraint *
955uncore_get_event_constraint(struct intel_uncore_box *box,
956 struct perf_event *event)
957{
958 struct intel_uncore_type *type = box->pmu->type;
959 struct event_constraint *c;
960
961 if (type->ops->get_constraint) {
962 c = type->ops->get_constraint(box, event);
963 if (c)
964 return c;
965 }
966
967 if (event->hw.config == ~0ULL)
968 return &constraint_fixed;
969
970 if (type->constraints) {
971 for_each_event_constraint(c, type->constraints) {
972 if ((event->hw.config & c->cmask) == c->code)
973 return c;
974 }
975 }
976
977 return &type->unconstrainted;
978}
979
980static void uncore_put_event_constraint(struct intel_uncore_box *box,
981 struct perf_event *event)
982{
983 if (box->pmu->type->ops->put_constraint)
984 box->pmu->type->ops->put_constraint(box, event);
985}
986
987static int uncore_assign_events(struct intel_uncore_box *box,
988 int assign[], int n)
989{
990 unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
991 struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
992 int i, wmin, wmax, ret = 0;
993 struct hw_perf_event *hwc;
994
995 bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
996
997 for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
998 c = uncore_get_event_constraint(box, box->event_list[i]);
999 constraints[i] = c;
1000 wmin = min(wmin, c->weight);
1001 wmax = max(wmax, c->weight);
1002 }
1003
1004 /* fastpath, try to reuse previous register */
1005 for (i = 0; i < n; i++) {
1006 hwc = &box->event_list[i]->hw;
1007 c = constraints[i];
1008
1009 /* never assigned */
1010 if (hwc->idx == -1)
1011 break;
1012
1013 /* constraint still honored */
1014 if (!test_bit(hwc->idx, c->idxmsk))
1015 break;
1016
1017 /* not already used */
1018 if (test_bit(hwc->idx, used_mask))
1019 break;
1020
1021 __set_bit(hwc->idx, used_mask);
1022 if (assign)
1023 assign[i] = hwc->idx;
1024 }
1025 /* slow path */
1026 if (i != n)
1027 ret = perf_assign_events(constraints, n, wmin, wmax, assign);
1028
1029 if (!assign || ret) {
1030 for (i = 0; i < n; i++)
1031 uncore_put_event_constraint(box, box->event_list[i]);
1032 }
1033 return ret ? -EINVAL : 0;
1034}
1035
1036static void uncore_pmu_event_start(struct perf_event *event, int flags)
1037{
1038 struct intel_uncore_box *box = uncore_event_to_box(event);
1039 int idx = event->hw.idx;
1040
1041 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1042 return;
1043
1044 if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
1045 return;
1046
1047 event->hw.state = 0;
1048 box->events[idx] = event;
1049 box->n_active++;
1050 __set_bit(idx, box->active_mask);
1051
1052 local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
1053 uncore_enable_event(box, event);
1054
1055 if (box->n_active == 1) {
1056 uncore_enable_box(box);
1057 uncore_pmu_start_hrtimer(box);
1058 }
1059}
1060
1061static void uncore_pmu_event_stop(struct perf_event *event, int flags)
1062{
1063 struct intel_uncore_box *box = uncore_event_to_box(event);
1064 struct hw_perf_event *hwc = &event->hw;
1065
1066 if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
1067 uncore_disable_event(box, event);
1068 box->n_active--;
1069 box->events[hwc->idx] = NULL;
1070 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1071 hwc->state |= PERF_HES_STOPPED;
1072
1073 if (box->n_active == 0) {
1074 uncore_disable_box(box);
1075 uncore_pmu_cancel_hrtimer(box);
1076 }
1077 }
1078
1079 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1080 /*
1081 * Drain the remaining delta count out of a event
1082 * that we are disabling:
1083 */
1084 uncore_perf_event_update(box, event);
1085 hwc->state |= PERF_HES_UPTODATE;
1086 }
1087}
1088
1089static int uncore_pmu_event_add(struct perf_event *event, int flags)
1090{
1091 struct intel_uncore_box *box = uncore_event_to_box(event);
1092 struct hw_perf_event *hwc = &event->hw;
1093 int assign[UNCORE_PMC_IDX_MAX];
1094 int i, n, ret;
1095
1096 if (!box)
1097 return -ENODEV;
1098
1099 ret = n = uncore_collect_events(box, event, false);
1100 if (ret < 0)
1101 return ret;
1102
1103 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1104 if (!(flags & PERF_EF_START))
1105 hwc->state |= PERF_HES_ARCH;
1106
1107 ret = uncore_assign_events(box, assign, n);
1108 if (ret)
1109 return ret;
1110
1111 /* save events moving to new counters */
1112 for (i = 0; i < box->n_events; i++) {
1113 event = box->event_list[i];
1114 hwc = &event->hw;
1115
1116 if (hwc->idx == assign[i] &&
1117 hwc->last_tag == box->tags[assign[i]])
1118 continue;
1119 /*
1120 * Ensure we don't accidentally enable a stopped
1121 * counter simply because we rescheduled.
1122 */
1123 if (hwc->state & PERF_HES_STOPPED)
1124 hwc->state |= PERF_HES_ARCH;
1125
1126 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
1127 }
1128
1129 /* reprogram moved events into new counters */
1130 for (i = 0; i < n; i++) {
1131 event = box->event_list[i];
1132 hwc = &event->hw;
1133
1134 if (hwc->idx != assign[i] ||
1135 hwc->last_tag != box->tags[assign[i]])
1136 uncore_assign_hw_event(box, event, assign[i]);
1137 else if (i < box->n_events)
1138 continue;
1139
1140 if (hwc->state & PERF_HES_ARCH)
1141 continue;
1142
1143 uncore_pmu_event_start(event, 0);
1144 }
1145 box->n_events = n;
1146
1147 return 0;
1148}
1149
1150static void uncore_pmu_event_del(struct perf_event *event, int flags)
1151{
1152 struct intel_uncore_box *box = uncore_event_to_box(event);
1153 int i;
1154
1155 uncore_pmu_event_stop(event, PERF_EF_UPDATE);
1156
1157 for (i = 0; i < box->n_events; i++) {
1158 if (event == box->event_list[i]) {
1159 uncore_put_event_constraint(box, event);
1160
1161 while (++i < box->n_events)
1162 box->event_list[i - 1] = box->event_list[i];
1163
1164 --box->n_events;
1165 break;
1166 }
1167 }
1168
1169 event->hw.idx = -1;
1170 event->hw.last_tag = ~0ULL;
1171}
1172
1173static void uncore_pmu_event_read(struct perf_event *event)
1174{
1175 struct intel_uncore_box *box = uncore_event_to_box(event);
1176 uncore_perf_event_update(box, event);
1177}
1178
1179/*
1180 * validation ensures the group can be loaded onto the
1181 * PMU if it was the only group available.
1182 */
1183static int uncore_validate_group(struct intel_uncore_pmu *pmu,
1184 struct perf_event *event)
1185{
1186 struct perf_event *leader = event->group_leader;
1187 struct intel_uncore_box *fake_box;
1188 int ret = -EINVAL, n;
1189
1190 fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
1191 if (!fake_box)
1192 return -ENOMEM;
1193
1194 fake_box->pmu = pmu;
1195 /*
1196 * the event is not yet connected with its
1197 * siblings therefore we must first collect
1198 * existing siblings, then add the new event
1199 * before we can simulate the scheduling
1200 */
1201 n = uncore_collect_events(fake_box, leader, true);
1202 if (n < 0)
1203 goto out;
1204
1205 fake_box->n_events = n;
1206 n = uncore_collect_events(fake_box, event, false);
1207 if (n < 0)
1208 goto out;
1209
1210 fake_box->n_events = n;
1211
1212 ret = uncore_assign_events(fake_box, NULL, n);
1213out:
1214 kfree(fake_box);
1215 return ret;
1216}
1217
1218int uncore_pmu_event_init(struct perf_event *event)
1219{
1220 struct intel_uncore_pmu *pmu;
1221 struct intel_uncore_box *box;
1222 struct hw_perf_event *hwc = &event->hw;
1223 int ret;
1224
1225 if (event->attr.type != event->pmu->type)
1226 return -ENOENT;
1227
1228 pmu = uncore_event_to_pmu(event);
1229 /* no device found for this pmu */
1230 if (pmu->func_id < 0)
1231 return -ENOENT;
1232
1233 /*
1234 * Uncore PMU does measure at all privilege level all the time.
1235 * So it doesn't make sense to specify any exclude bits.
1236 */
1237 if (event->attr.exclude_user || event->attr.exclude_kernel ||
1238 event->attr.exclude_hv || event->attr.exclude_idle)
1239 return -EINVAL;
1240
1241 /* Sampling not supported yet */
1242 if (hwc->sample_period)
1243 return -EINVAL;
1244
1245 /*
1246 * Place all uncore events for a particular physical package
1247 * onto a single cpu
1248 */
1249 if (event->cpu < 0)
1250 return -EINVAL;
1251 box = uncore_pmu_to_box(pmu, event->cpu);
1252 if (!box || box->cpu < 0)
1253 return -EINVAL;
1254 event->cpu = box->cpu;
1255
1256 event->hw.idx = -1;
1257 event->hw.last_tag = ~0ULL;
1258 event->hw.extra_reg.idx = EXTRA_REG_NONE;
1259
1260 if (event->attr.config == UNCORE_FIXED_EVENT) {
1261 /* no fixed counter */
1262 if (!pmu->type->fixed_ctl)
1263 return -EINVAL;
1264 /*
1265 * if there is only one fixed counter, only the first pmu
1266 * can access the fixed counter
1267 */
1268 if (pmu->type->single_fixed && pmu->pmu_idx > 0)
1269 return -EINVAL;
1270 hwc->config = ~0ULL;
1271 } else {
1272 hwc->config = event->attr.config & pmu->type->event_mask;
1273 if (pmu->type->ops->hw_config) {
1274 ret = pmu->type->ops->hw_config(box, event);
1275 if (ret)
1276 return ret;
1277 }
1278 }
1279
1280 if (event->group_leader != event)
1281 ret = uncore_validate_group(pmu, event);
1282 else
1283 ret = 0;
1284
1285 return ret;
1286}
1287
1288static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
1289{
1290 int ret;
1291
1292 pmu->pmu = (struct pmu) {
1293 .attr_groups = pmu->type->attr_groups,
1294 .task_ctx_nr = perf_invalid_context,
1295 .event_init = uncore_pmu_event_init,
1296 .add = uncore_pmu_event_add,
1297 .del = uncore_pmu_event_del,
1298 .start = uncore_pmu_event_start,
1299 .stop = uncore_pmu_event_stop,
1300 .read = uncore_pmu_event_read,
1301 };
1302
1303 if (pmu->type->num_boxes == 1) {
1304 if (strlen(pmu->type->name) > 0)
1305 sprintf(pmu->name, "uncore_%s", pmu->type->name);
1306 else
1307 sprintf(pmu->name, "uncore");
1308 } else {
1309 sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
1310 pmu->pmu_idx);
1311 }
1312
1313 ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
1314 return ret;
1315}
1316
1317static void __init uncore_type_exit(struct intel_uncore_type *type)
1318{
1319 int i;
1320
1321 for (i = 0; i < type->num_boxes; i++)
1322 free_percpu(type->pmus[i].box);
1323 kfree(type->pmus);
1324 type->pmus = NULL;
1325 kfree(type->attr_groups[1]);
1326 type->attr_groups[1] = NULL;
1327}
1328
1329static void uncore_types_exit(struct intel_uncore_type **types)
1330{
1331 int i;
1332 for (i = 0; types[i]; i++)
1333 uncore_type_exit(types[i]);
1334}
1335
1336static int __init uncore_type_init(struct intel_uncore_type *type)
1337{
1338 struct intel_uncore_pmu *pmus;
1339 struct attribute_group *events_group;
1340 struct attribute **attrs;
1341 int i, j;
1342
1343 pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
1344 if (!pmus)
1345 return -ENOMEM;
1346
1347 type->unconstrainted = (struct event_constraint)
1348 __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
1349 0, type->num_counters, 0);
1350
1351 for (i = 0; i < type->num_boxes; i++) {
1352 pmus[i].func_id = -1;
1353 pmus[i].pmu_idx = i;
1354 pmus[i].type = type;
1355 INIT_LIST_HEAD(&pmus[i].box_list);
1356 pmus[i].box = alloc_percpu(struct intel_uncore_box *);
1357 if (!pmus[i].box)
1358 goto fail;
1359 }
1360
1361 if (type->event_descs) {
1362 i = 0;
1363 while (type->event_descs[i].attr.attr.name)
1364 i++;
1365
1366 events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
1367 sizeof(*events_group), GFP_KERNEL);
1368 if (!events_group)
1369 goto fail;
1370
1371 attrs = (struct attribute **)(events_group + 1);
1372 events_group->name = "events";
1373 events_group->attrs = attrs;
1374
1375 for (j = 0; j < i; j++)
1376 attrs[j] = &type->event_descs[j].attr.attr;
1377
1378 type->attr_groups[1] = events_group;
1379 }
1380
1381 type->pmus = pmus;
1382 return 0;
1383fail:
1384 uncore_type_exit(type);
1385 return -ENOMEM;
1386}
1387
1388static int __init uncore_types_init(struct intel_uncore_type **types)
1389{
1390 int i, ret;
1391
1392 for (i = 0; types[i]; i++) {
1393 ret = uncore_type_init(types[i]);
1394 if (ret)
1395 goto fail;
1396 }
1397 return 0;
1398fail:
1399 while (--i >= 0)
1400 uncore_type_exit(types[i]);
1401 return ret;
1402}
1403
1404static struct pci_driver *uncore_pci_driver;
1405static bool pcidrv_registered;
1406
1407/*
1408 * add a pci uncore device
1409 */
1410static int __devinit uncore_pci_add(struct intel_uncore_type *type,
1411 struct pci_dev *pdev)
1412{
1413 struct intel_uncore_pmu *pmu;
1414 struct intel_uncore_box *box;
1415 int i, phys_id;
1416
1417 phys_id = pcibus_to_physid[pdev->bus->number];
1418 if (phys_id < 0)
1419 return -ENODEV;
1420
1421 box = uncore_alloc_box(type, 0);
1422 if (!box)
1423 return -ENOMEM;
1424
1425 /*
1426 * for performance monitoring unit with multiple boxes,
1427 * each box has a different function id.
1428 */
1429 for (i = 0; i < type->num_boxes; i++) {
1430 pmu = &type->pmus[i];
1431 if (pmu->func_id == pdev->devfn)
1432 break;
1433 if (pmu->func_id < 0) {
1434 pmu->func_id = pdev->devfn;
1435 break;
1436 }
1437 pmu = NULL;
1438 }
1439
1440 if (!pmu) {
1441 kfree(box);
1442 return -EINVAL;
1443 }
1444
1445 box->phys_id = phys_id;
1446 box->pci_dev = pdev;
1447 box->pmu = pmu;
1448 uncore_box_init(box);
1449 pci_set_drvdata(pdev, box);
1450
1451 raw_spin_lock(&uncore_box_lock);
1452 list_add_tail(&box->list, &pmu->box_list);
1453 raw_spin_unlock(&uncore_box_lock);
1454
1455 return 0;
1456}
1457
1458static void uncore_pci_remove(struct pci_dev *pdev)
1459{
1460 struct intel_uncore_box *box = pci_get_drvdata(pdev);
1461 struct intel_uncore_pmu *pmu = box->pmu;
1462 int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
1463
1464 if (WARN_ON_ONCE(phys_id != box->phys_id))
1465 return;
1466
1467 raw_spin_lock(&uncore_box_lock);
1468 list_del(&box->list);
1469 raw_spin_unlock(&uncore_box_lock);
1470
1471 for_each_possible_cpu(cpu) {
1472 if (*per_cpu_ptr(pmu->box, cpu) == box) {
1473 *per_cpu_ptr(pmu->box, cpu) = NULL;
1474 atomic_dec(&box->refcnt);
1475 }
1476 }
1477
1478 WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
1479 kfree(box);
1480}
1481
1482static int __devinit uncore_pci_probe(struct pci_dev *pdev,
1483 const struct pci_device_id *id)
1484{
1485 struct intel_uncore_type *type;
1486
1487 type = (struct intel_uncore_type *)id->driver_data;
1488 return uncore_pci_add(type, pdev);
1489}
1490
1491static int __init uncore_pci_init(void)
1492{
1493 int ret;
1494
1495 switch (boot_cpu_data.x86_model) {
1496 case 45: /* Sandy Bridge-EP */
1497 pci_uncores = snbep_pci_uncores;
1498 uncore_pci_driver = &snbep_uncore_pci_driver;
1499 snbep_pci2phy_map_init();
1500 break;
1501 default:
1502 return 0;
1503 }
1504
1505 ret = uncore_types_init(pci_uncores);
1506 if (ret)
1507 return ret;
1508
1509 uncore_pci_driver->probe = uncore_pci_probe;
1510 uncore_pci_driver->remove = uncore_pci_remove;
1511
1512 ret = pci_register_driver(uncore_pci_driver);
1513 if (ret == 0)
1514 pcidrv_registered = true;
1515 else
1516 uncore_types_exit(pci_uncores);
1517
1518 return ret;
1519}
1520
1521static void __init uncore_pci_exit(void)
1522{
1523 if (pcidrv_registered) {
1524 pcidrv_registered = false;
1525 pci_unregister_driver(uncore_pci_driver);
1526 uncore_types_exit(pci_uncores);
1527 }
1528}
1529
1530static void __cpuinit uncore_cpu_dying(int cpu)
1531{
1532 struct intel_uncore_type *type;
1533 struct intel_uncore_pmu *pmu;
1534 struct intel_uncore_box *box;
1535 int i, j;
1536
1537 for (i = 0; msr_uncores[i]; i++) {
1538 type = msr_uncores[i];
1539 for (j = 0; j < type->num_boxes; j++) {
1540 pmu = &type->pmus[j];
1541 box = *per_cpu_ptr(pmu->box, cpu);
1542 *per_cpu_ptr(pmu->box, cpu) = NULL;
1543 if (box && atomic_dec_and_test(&box->refcnt))
1544 kfree(box);
1545 }
1546 }
1547}
1548
1549static int __cpuinit uncore_cpu_starting(int cpu)
1550{
1551 struct intel_uncore_type *type;
1552 struct intel_uncore_pmu *pmu;
1553 struct intel_uncore_box *box, *exist;
1554 int i, j, k, phys_id;
1555
1556 phys_id = topology_physical_package_id(cpu);
1557
1558 for (i = 0; msr_uncores[i]; i++) {
1559 type = msr_uncores[i];
1560 for (j = 0; j < type->num_boxes; j++) {
1561 pmu = &type->pmus[j];
1562 box = *per_cpu_ptr(pmu->box, cpu);
1563 /* called by uncore_cpu_init? */
1564 if (box && box->phys_id >= 0) {
1565 uncore_box_init(box);
1566 continue;
1567 }
1568
1569 for_each_online_cpu(k) {
1570 exist = *per_cpu_ptr(pmu->box, k);
1571 if (exist && exist->phys_id == phys_id) {
1572 atomic_inc(&exist->refcnt);
1573 *per_cpu_ptr(pmu->box, cpu) = exist;
1574 kfree(box);
1575 box = NULL;
1576 break;
1577 }
1578 }
1579
1580 if (box) {
1581 box->phys_id = phys_id;
1582 uncore_box_init(box);
1583 }
1584 }
1585 }
1586 return 0;
1587}
1588
1589static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
1590{
1591 struct intel_uncore_type *type;
1592 struct intel_uncore_pmu *pmu;
1593 struct intel_uncore_box *box;
1594 int i, j;
1595
1596 for (i = 0; msr_uncores[i]; i++) {
1597 type = msr_uncores[i];
1598 for (j = 0; j < type->num_boxes; j++) {
1599 pmu = &type->pmus[j];
1600 if (pmu->func_id < 0)
1601 pmu->func_id = j;
1602
1603 box = uncore_alloc_box(type, cpu);
1604 if (!box)
1605 return -ENOMEM;
1606
1607 box->pmu = pmu;
1608 box->phys_id = phys_id;
1609 *per_cpu_ptr(pmu->box, cpu) = box;
1610 }
1611 }
1612 return 0;
1613}
1614
1615static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores,
1616 int old_cpu, int new_cpu)
1617{
1618 struct intel_uncore_type *type;
1619 struct intel_uncore_pmu *pmu;
1620 struct intel_uncore_box *box;
1621 int i, j;
1622
1623 for (i = 0; uncores[i]; i++) {
1624 type = uncores[i];
1625 for (j = 0; j < type->num_boxes; j++) {
1626 pmu = &type->pmus[j];
1627 if (old_cpu < 0)
1628 box = uncore_pmu_to_box(pmu, new_cpu);
1629 else
1630 box = uncore_pmu_to_box(pmu, old_cpu);
1631 if (!box)
1632 continue;
1633
1634 if (old_cpu < 0) {
1635 WARN_ON_ONCE(box->cpu != -1);
1636 box->cpu = new_cpu;
1637 continue;
1638 }
1639
1640 WARN_ON_ONCE(box->cpu != old_cpu);
1641 if (new_cpu >= 0) {
1642 uncore_pmu_cancel_hrtimer(box);
1643 perf_pmu_migrate_context(&pmu->pmu,
1644 old_cpu, new_cpu);
1645 box->cpu = new_cpu;
1646 } else {
1647 box->cpu = -1;
1648 }
1649 }
1650 }
1651}
1652
1653static void __cpuinit uncore_event_exit_cpu(int cpu)
1654{
1655 int i, phys_id, target;
1656
1657 /* if exiting cpu is used for collecting uncore events */
1658 if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1659 return;
1660
1661 /* find a new cpu to collect uncore events */
1662 phys_id = topology_physical_package_id(cpu);
1663 target = -1;
1664 for_each_online_cpu(i) {
1665 if (i == cpu)
1666 continue;
1667 if (phys_id == topology_physical_package_id(i)) {
1668 target = i;
1669 break;
1670 }
1671 }
1672
1673 /* migrate uncore events to the new cpu */
1674 if (target >= 0)
1675 cpumask_set_cpu(target, &uncore_cpu_mask);
1676
1677 uncore_change_context(msr_uncores, cpu, target);
1678 uncore_change_context(pci_uncores, cpu, target);
1679}
1680
1681static void __cpuinit uncore_event_init_cpu(int cpu)
1682{
1683 int i, phys_id;
1684
1685 phys_id = topology_physical_package_id(cpu);
1686 for_each_cpu(i, &uncore_cpu_mask) {
1687 if (phys_id == topology_physical_package_id(i))
1688 return;
1689 }
1690
1691 cpumask_set_cpu(cpu, &uncore_cpu_mask);
1692
1693 uncore_change_context(msr_uncores, -1, cpu);
1694 uncore_change_context(pci_uncores, -1, cpu);
1695}
1696
1697static int __cpuinit uncore_cpu_notifier(struct notifier_block *self,
1698 unsigned long action, void *hcpu)
1699{
1700 unsigned int cpu = (long)hcpu;
1701
1702 /* allocate/free data structure for uncore box */
1703 switch (action & ~CPU_TASKS_FROZEN) {
1704 case CPU_UP_PREPARE:
1705 uncore_cpu_prepare(cpu, -1);
1706 break;
1707 case CPU_STARTING:
1708 uncore_cpu_starting(cpu);
1709 break;
1710 case CPU_UP_CANCELED:
1711 case CPU_DYING:
1712 uncore_cpu_dying(cpu);
1713 break;
1714 default:
1715 break;
1716 }
1717
1718 /* select the cpu that collects uncore events */
1719 switch (action & ~CPU_TASKS_FROZEN) {
1720 case CPU_DOWN_FAILED:
1721 case CPU_STARTING:
1722 uncore_event_init_cpu(cpu);
1723 break;
1724 case CPU_DOWN_PREPARE:
1725 uncore_event_exit_cpu(cpu);
1726 break;
1727 default:
1728 break;
1729 }
1730
1731 return NOTIFY_OK;
1732}
1733
1734static struct notifier_block uncore_cpu_nb __cpuinitdata = {
1735 .notifier_call = uncore_cpu_notifier,
1736 /*
1737 * to migrate uncore events, our notifier should be executed
1738 * before perf core's notifier.
1739 */
1740 .priority = CPU_PRI_PERF + 1,
1741};
1742
1743static void __init uncore_cpu_setup(void *dummy)
1744{
1745 uncore_cpu_starting(smp_processor_id());
1746}
1747
1748static int __init uncore_cpu_init(void)
1749{
1750 int ret, cpu, max_cores;
1751
1752 max_cores = boot_cpu_data.x86_max_cores;
1753 switch (boot_cpu_data.x86_model) {
1754 case 26: /* Nehalem */
1755 case 30:
1756 case 37: /* Westmere */
1757 case 44:
1758 msr_uncores = nhm_msr_uncores;
1759 break;
1760 case 42: /* Sandy Bridge */
1761 if (snb_uncore_cbox.num_boxes > max_cores)
1762 snb_uncore_cbox.num_boxes = max_cores;
1763 msr_uncores = snb_msr_uncores;
1764 break;
1765 case 45: /* Sandy Birdge-EP */
1766 if (snbep_uncore_cbox.num_boxes > max_cores)
1767 snbep_uncore_cbox.num_boxes = max_cores;
1768 msr_uncores = snbep_msr_uncores;
1769 break;
1770 default:
1771 return 0;
1772 }
1773
1774 ret = uncore_types_init(msr_uncores);
1775 if (ret)
1776 return ret;
1777
1778 get_online_cpus();
1779
1780 for_each_online_cpu(cpu) {
1781 int i, phys_id = topology_physical_package_id(cpu);
1782
1783 for_each_cpu(i, &uncore_cpu_mask) {
1784 if (phys_id == topology_physical_package_id(i)) {
1785 phys_id = -1;
1786 break;
1787 }
1788 }
1789 if (phys_id < 0)
1790 continue;
1791
1792 uncore_cpu_prepare(cpu, phys_id);
1793 uncore_event_init_cpu(cpu);
1794 }
1795 on_each_cpu(uncore_cpu_setup, NULL, 1);
1796
1797 register_cpu_notifier(&uncore_cpu_nb);
1798
1799 put_online_cpus();
1800
1801 return 0;
1802}
1803
1804static int __init uncore_pmus_register(void)
1805{
1806 struct intel_uncore_pmu *pmu;
1807 struct intel_uncore_type *type;
1808 int i, j;
1809
1810 for (i = 0; msr_uncores[i]; i++) {
1811 type = msr_uncores[i];
1812 for (j = 0; j < type->num_boxes; j++) {
1813 pmu = &type->pmus[j];
1814 uncore_pmu_register(pmu);
1815 }
1816 }
1817
1818 for (i = 0; pci_uncores[i]; i++) {
1819 type = pci_uncores[i];
1820 for (j = 0; j < type->num_boxes; j++) {
1821 pmu = &type->pmus[j];
1822 uncore_pmu_register(pmu);
1823 }
1824 }
1825
1826 return 0;
1827}
1828
1829static int __init intel_uncore_init(void)
1830{
1831 int ret;
1832
1833 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1834 return -ENODEV;
1835
1836 ret = uncore_pci_init();
1837 if (ret)
1838 goto fail;
1839 ret = uncore_cpu_init();
1840 if (ret) {
1841 uncore_pci_exit();
1842 goto fail;
1843 }
1844
1845 uncore_pmus_register();
1846 return 0;
1847fail:
1848 return ret;
1849}
1850device_initcall(intel_uncore_init);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
new file mode 100644
index 000000000000..b13e9ea81def
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -0,0 +1,424 @@
1#include <linux/module.h>
2#include <linux/slab.h>
3#include <linux/pci.h>
4#include <linux/perf_event.h>
5#include "perf_event.h"
6
7#define UNCORE_PMU_NAME_LEN 32
8#define UNCORE_BOX_HASH_SIZE 8
9
10#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)
11
12#define UNCORE_FIXED_EVENT 0xff
13#define UNCORE_PMC_IDX_MAX_GENERIC 8
14#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
15#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
16
17#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
18
19/* SNB event control */
20#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
21#define SNB_UNC_CTL_UMASK_MASK 0x0000ff00
22#define SNB_UNC_CTL_EDGE_DET (1 << 18)
23#define SNB_UNC_CTL_EN (1 << 22)
24#define SNB_UNC_CTL_INVERT (1 << 23)
25#define SNB_UNC_CTL_CMASK_MASK 0x1f000000
26#define NHM_UNC_CTL_CMASK_MASK 0xff000000
27#define NHM_UNC_FIXED_CTR_CTL_EN (1 << 0)
28
29#define SNB_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
30 SNB_UNC_CTL_UMASK_MASK | \
31 SNB_UNC_CTL_EDGE_DET | \
32 SNB_UNC_CTL_INVERT | \
33 SNB_UNC_CTL_CMASK_MASK)
34
35#define NHM_UNC_RAW_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \
36 SNB_UNC_CTL_UMASK_MASK | \
37 SNB_UNC_CTL_EDGE_DET | \
38 SNB_UNC_CTL_INVERT | \
39 NHM_UNC_CTL_CMASK_MASK)
40
41/* SNB global control register */
42#define SNB_UNC_PERF_GLOBAL_CTL 0x391
43#define SNB_UNC_FIXED_CTR_CTRL 0x394
44#define SNB_UNC_FIXED_CTR 0x395
45
46/* SNB uncore global control */
47#define SNB_UNC_GLOBAL_CTL_CORE_ALL ((1 << 4) - 1)
48#define SNB_UNC_GLOBAL_CTL_EN (1 << 29)
49
50/* SNB Cbo register */
51#define SNB_UNC_CBO_0_PERFEVTSEL0 0x700
52#define SNB_UNC_CBO_0_PER_CTR0 0x706
53#define SNB_UNC_CBO_MSR_OFFSET 0x10
54
55/* NHM global control register */
56#define NHM_UNC_PERF_GLOBAL_CTL 0x391
57#define NHM_UNC_FIXED_CTR 0x394
58#define NHM_UNC_FIXED_CTR_CTRL 0x395
59
60/* NHM uncore global control */
61#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL ((1ULL << 8) - 1)
62#define NHM_UNC_GLOBAL_CTL_EN_FC (1ULL << 32)
63
64/* NHM uncore register */
65#define NHM_UNC_PERFEVTSEL0 0x3c0
66#define NHM_UNC_UNCORE_PMC0 0x3b0
67
68/* SNB-EP Box level control */
69#define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0)
70#define SNBEP_PMON_BOX_CTL_RST_CTRS (1 << 1)
71#define SNBEP_PMON_BOX_CTL_FRZ (1 << 8)
72#define SNBEP_PMON_BOX_CTL_FRZ_EN (1 << 16)
73#define SNBEP_PMON_BOX_CTL_INT (SNBEP_PMON_BOX_CTL_RST_CTRL | \
74 SNBEP_PMON_BOX_CTL_RST_CTRS | \
75 SNBEP_PMON_BOX_CTL_FRZ_EN)
76/* SNB-EP event control */
77#define SNBEP_PMON_CTL_EV_SEL_MASK 0x000000ff
78#define SNBEP_PMON_CTL_UMASK_MASK 0x0000ff00
79#define SNBEP_PMON_CTL_RST (1 << 17)
80#define SNBEP_PMON_CTL_EDGE_DET (1 << 18)
81#define SNBEP_PMON_CTL_EV_SEL_EXT (1 << 21) /* only for QPI */
82#define SNBEP_PMON_CTL_EN (1 << 22)
83#define SNBEP_PMON_CTL_INVERT (1 << 23)
84#define SNBEP_PMON_CTL_TRESH_MASK 0xff000000
85#define SNBEP_PMON_RAW_EVENT_MASK (SNBEP_PMON_CTL_EV_SEL_MASK | \
86 SNBEP_PMON_CTL_UMASK_MASK | \
87 SNBEP_PMON_CTL_EDGE_DET | \
88 SNBEP_PMON_CTL_INVERT | \
89 SNBEP_PMON_CTL_TRESH_MASK)
90
91/* SNB-EP Ubox event control */
92#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK 0x1f000000
93#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK \
94 (SNBEP_PMON_CTL_EV_SEL_MASK | \
95 SNBEP_PMON_CTL_UMASK_MASK | \
96 SNBEP_PMON_CTL_EDGE_DET | \
97 SNBEP_PMON_CTL_INVERT | \
98 SNBEP_U_MSR_PMON_CTL_TRESH_MASK)
99
100#define SNBEP_CBO_PMON_CTL_TID_EN (1 << 19)
101#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK (SNBEP_PMON_RAW_EVENT_MASK | \
102 SNBEP_CBO_PMON_CTL_TID_EN)
103
104/* SNB-EP PCU event control */
105#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK 0x0000c000
106#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK 0x1f000000
107#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT (1 << 30)
108#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET (1 << 31)
109#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK \
110 (SNBEP_PMON_CTL_EV_SEL_MASK | \
111 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
112 SNBEP_PMON_CTL_EDGE_DET | \
113 SNBEP_PMON_CTL_INVERT | \
114 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
115 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
116 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
117
118/* SNB-EP pci control register */
119#define SNBEP_PCI_PMON_BOX_CTL 0xf4
120#define SNBEP_PCI_PMON_CTL0 0xd8
121/* SNB-EP pci counter register */
122#define SNBEP_PCI_PMON_CTR0 0xa0
123
124/* SNB-EP home agent register */
125#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0 0x40
126#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1 0x44
127#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH 0x48
128/* SNB-EP memory controller register */
129#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL 0xf0
130#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR 0xd0
131/* SNB-EP QPI register */
132#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0 0x228
133#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1 0x22c
134#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0 0x238
135#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1 0x23c
136
137/* SNB-EP Ubox register */
138#define SNBEP_U_MSR_PMON_CTR0 0xc16
139#define SNBEP_U_MSR_PMON_CTL0 0xc10
140
141#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL 0xc08
142#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR 0xc09
143
144/* SNB-EP Cbo register */
145#define SNBEP_C0_MSR_PMON_CTR0 0xd16
146#define SNBEP_C0_MSR_PMON_CTL0 0xd10
147#define SNBEP_C0_MSR_PMON_BOX_CTL 0xd04
148#define SNBEP_C0_MSR_PMON_BOX_FILTER 0xd14
149#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK 0xfffffc1f
150#define SNBEP_CBO_MSR_OFFSET 0x20
151
152/* SNB-EP PCU register */
153#define SNBEP_PCU_MSR_PMON_CTR0 0xc36
154#define SNBEP_PCU_MSR_PMON_CTL0 0xc30
155#define SNBEP_PCU_MSR_PMON_BOX_CTL 0xc24
156#define SNBEP_PCU_MSR_PMON_BOX_FILTER 0xc34
157#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK 0xffffffff
158#define SNBEP_PCU_MSR_CORE_C3_CTR 0x3fc
159#define SNBEP_PCU_MSR_CORE_C6_CTR 0x3fd
160
161struct intel_uncore_ops;
162struct intel_uncore_pmu;
163struct intel_uncore_box;
164struct uncore_event_desc;
165
166struct intel_uncore_type {
167 const char *name;
168 int num_counters;
169 int num_boxes;
170 int perf_ctr_bits;
171 int fixed_ctr_bits;
172 unsigned perf_ctr;
173 unsigned event_ctl;
174 unsigned event_mask;
175 unsigned fixed_ctr;
176 unsigned fixed_ctl;
177 unsigned box_ctl;
178 unsigned msr_offset;
179 unsigned num_shared_regs:8;
180 unsigned single_fixed:1;
181 struct event_constraint unconstrainted;
182 struct event_constraint *constraints;
183 struct intel_uncore_pmu *pmus;
184 struct intel_uncore_ops *ops;
185 struct uncore_event_desc *event_descs;
186 const struct attribute_group *attr_groups[3];
187};
188
189#define format_group attr_groups[0]
190
191struct intel_uncore_ops {
192 void (*init_box)(struct intel_uncore_box *);
193 void (*disable_box)(struct intel_uncore_box *);
194 void (*enable_box)(struct intel_uncore_box *);
195 void (*disable_event)(struct intel_uncore_box *, struct perf_event *);
196 void (*enable_event)(struct intel_uncore_box *, struct perf_event *);
197 u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *);
198 int (*hw_config)(struct intel_uncore_box *, struct perf_event *);
199 struct event_constraint *(*get_constraint)(struct intel_uncore_box *,
200 struct perf_event *);
201 void (*put_constraint)(struct intel_uncore_box *, struct perf_event *);
202};
203
204struct intel_uncore_pmu {
205 struct pmu pmu;
206 char name[UNCORE_PMU_NAME_LEN];
207 int pmu_idx;
208 int func_id;
209 struct intel_uncore_type *type;
210 struct intel_uncore_box ** __percpu box;
211 struct list_head box_list;
212};
213
214struct intel_uncore_extra_reg {
215 raw_spinlock_t lock;
216 u64 config1;
217 atomic_t ref;
218};
219
220struct intel_uncore_box {
221 int phys_id;
222 int n_active; /* number of active events */
223 int n_events;
224 int cpu; /* cpu to collect events */
225 unsigned long flags;
226 atomic_t refcnt;
227 struct perf_event *events[UNCORE_PMC_IDX_MAX];
228 struct perf_event *event_list[UNCORE_PMC_IDX_MAX];
229 unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
230 u64 tags[UNCORE_PMC_IDX_MAX];
231 struct pci_dev *pci_dev;
232 struct intel_uncore_pmu *pmu;
233 struct hrtimer hrtimer;
234 struct list_head list;
235 struct intel_uncore_extra_reg shared_regs[0];
236};
237
238#define UNCORE_BOX_FLAG_INITIATED 0
239
240struct uncore_event_desc {
241 struct kobj_attribute attr;
242 const char *config;
243};
244
245#define INTEL_UNCORE_EVENT_DESC(_name, _config) \
246{ \
247 .attr = __ATTR(_name, 0444, uncore_event_show, NULL), \
248 .config = _config, \
249}
250
251#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
252static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
253 struct kobj_attribute *attr, \
254 char *page) \
255{ \
256 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
257 return sprintf(page, _format "\n"); \
258} \
259static struct kobj_attribute format_attr_##_var = \
260 __ATTR(_name, 0444, __uncore_##_var##_show, NULL)
261
262
263static ssize_t uncore_event_show(struct kobject *kobj,
264 struct kobj_attribute *attr, char *buf)
265{
266 struct uncore_event_desc *event =
267 container_of(attr, struct uncore_event_desc, attr);
268 return sprintf(buf, "%s", event->config);
269}
270
271static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
272{
273 return box->pmu->type->box_ctl;
274}
275
276static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box)
277{
278 return box->pmu->type->fixed_ctl;
279}
280
281static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box)
282{
283 return box->pmu->type->fixed_ctr;
284}
285
286static inline
287unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
288{
289 return idx * 4 + box->pmu->type->event_ctl;
290}
291
292static inline
293unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx)
294{
295 return idx * 8 + box->pmu->type->perf_ctr;
296}
297
298static inline
299unsigned uncore_msr_box_ctl(struct intel_uncore_box *box)
300{
301 if (!box->pmu->type->box_ctl)
302 return 0;
303 return box->pmu->type->box_ctl +
304 box->pmu->type->msr_offset * box->pmu->pmu_idx;
305}
306
307static inline
308unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box)
309{
310 if (!box->pmu->type->fixed_ctl)
311 return 0;
312 return box->pmu->type->fixed_ctl +
313 box->pmu->type->msr_offset * box->pmu->pmu_idx;
314}
315
316static inline
317unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box)
318{
319 return box->pmu->type->fixed_ctr +
320 box->pmu->type->msr_offset * box->pmu->pmu_idx;
321}
322
323static inline
324unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
325{
326 return idx + box->pmu->type->event_ctl +
327 box->pmu->type->msr_offset * box->pmu->pmu_idx;
328}
329
330static inline
331unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
332{
333 return idx + box->pmu->type->perf_ctr +
334 box->pmu->type->msr_offset * box->pmu->pmu_idx;
335}
336
337static inline
338unsigned uncore_fixed_ctl(struct intel_uncore_box *box)
339{
340 if (box->pci_dev)
341 return uncore_pci_fixed_ctl(box);
342 else
343 return uncore_msr_fixed_ctl(box);
344}
345
346static inline
347unsigned uncore_fixed_ctr(struct intel_uncore_box *box)
348{
349 if (box->pci_dev)
350 return uncore_pci_fixed_ctr(box);
351 else
352 return uncore_msr_fixed_ctr(box);
353}
354
355static inline
356unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx)
357{
358 if (box->pci_dev)
359 return uncore_pci_event_ctl(box, idx);
360 else
361 return uncore_msr_event_ctl(box, idx);
362}
363
364static inline
365unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx)
366{
367 if (box->pci_dev)
368 return uncore_pci_perf_ctr(box, idx);
369 else
370 return uncore_msr_perf_ctr(box, idx);
371}
372
373static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box)
374{
375 return box->pmu->type->perf_ctr_bits;
376}
377
378static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box)
379{
380 return box->pmu->type->fixed_ctr_bits;
381}
382
383static inline int uncore_num_counters(struct intel_uncore_box *box)
384{
385 return box->pmu->type->num_counters;
386}
387
388static inline void uncore_disable_box(struct intel_uncore_box *box)
389{
390 if (box->pmu->type->ops->disable_box)
391 box->pmu->type->ops->disable_box(box);
392}
393
394static inline void uncore_enable_box(struct intel_uncore_box *box)
395{
396 if (box->pmu->type->ops->enable_box)
397 box->pmu->type->ops->enable_box(box);
398}
399
400static inline void uncore_disable_event(struct intel_uncore_box *box,
401 struct perf_event *event)
402{
403 box->pmu->type->ops->disable_event(box, event);
404}
405
406static inline void uncore_enable_event(struct intel_uncore_box *box,
407 struct perf_event *event)
408{
409 box->pmu->type->ops->enable_event(box, event);
410}
411
412static inline u64 uncore_read_counter(struct intel_uncore_box *box,
413 struct perf_event *event)
414{
415 return box->pmu->type->ops->read_counter(box, event);
416}
417
418static inline void uncore_box_init(struct intel_uncore_box *box)
419{
420 if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) {
421 if (box->pmu->type->ops->init_box)
422 box->pmu->type->ops->init_box(box);
423 }
424}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 47124a73dd73..92c7e39a079f 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)
895 * So at moment let leave metrics turned on forever -- it's 895 * So at moment let leave metrics turned on forever -- it's
896 * ok for now but need to be revisited! 896 * ok for now but need to be revisited!
897 * 897 *
898 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); 898 * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0);
899 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); 899 * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
900 */ 900 */
901} 901}
902 902
@@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
909 * state we need to clear P4_CCCR_OVF, otherwise interrupt get 909 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
910 * asserted again and again 910 * asserted again and again
911 */ 911 */
912 (void)checking_wrmsrl(hwc->config_base, 912 (void)wrmsrl_safe(hwc->config_base,
913 (u64)(p4_config_unpack_cccr(hwc->config)) & 913 (u64)(p4_config_unpack_cccr(hwc->config)) &
914 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); 914 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
915} 915}
@@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config)
943 943
944 bind = &p4_pebs_bind_map[idx]; 944 bind = &p4_pebs_bind_map[idx];
945 945
946 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs); 946 (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
947 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert); 947 (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
948} 948}
949 949
950static void p4_pmu_enable_event(struct perf_event *event) 950static void p4_pmu_enable_event(struct perf_event *event)
@@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event)
978 */ 978 */
979 p4_pmu_enable_pebs(hwc->config); 979 p4_pmu_enable_pebs(hwc->config);
980 980
981 (void)checking_wrmsrl(escr_addr, escr_conf); 981 (void)wrmsrl_safe(escr_addr, escr_conf);
982 (void)checking_wrmsrl(hwc->config_base, 982 (void)wrmsrl_safe(hwc->config_base,
983 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); 983 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
984} 984}
985 985
@@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void)
1325 unsigned int low, high; 1325 unsigned int low, high;
1326 1326
1327 /* If we get stripped -- indexing fails */ 1327 /* If we get stripped -- indexing fails */
1328 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); 1328 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);
1329 1329
1330 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 1330 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
1331 if (!(low & (1 << 7))) { 1331 if (!(low & (1 << 7))) {
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 32bcfc7dd230..e4dd0f7a0453 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event)
71 if (cpuc->enabled) 71 if (cpuc->enabled)
72 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 72 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
73 73
74 (void)checking_wrmsrl(hwc->config_base, val); 74 (void)wrmsrl_safe(hwc->config_base, val);
75} 75}
76 76
77static void p6_pmu_enable_event(struct perf_event *event) 77static void p6_pmu_enable_event(struct perf_event *event)
@@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
84 if (cpuc->enabled) 84 if (cpuc->enabled)
85 val |= ARCH_PERFMON_EVENTSEL_ENABLE; 85 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
86 86
87 (void)checking_wrmsrl(hwc->config_base, val); 87 (void)wrmsrl_safe(hwc->config_base, val);
88} 88}
89 89
90PMU_FORMAT_ATTR(event, "config:0-7" ); 90PMU_FORMAT_ATTR(event, "config:0-7" );
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 571246d81edf..ae42418bc50f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,8 +27,8 @@ static int die_counter;
27 27
28void printk_address(unsigned long address, int reliable) 28void printk_address(unsigned long address, int reliable)
29{ 29{
30 printk(" [<%p>] %s%pB\n", (void *) address, 30 pr_cont(" [<%p>] %s%pB\n",
31 reliable ? "" : "? ", (void *) address); 31 (void *)address, reliable ? "" : "? ", (void *)address);
32} 32}
33 33
34#ifdef CONFIG_FUNCTION_GRAPH_TRACER 34#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
272 return 1; 272 return 1;
273 273
274 print_modules();
274 show_regs(regs); 275 show_regs(regs);
275#ifdef CONFIG_X86_32 276#ifdef CONFIG_X86_32
276 if (user_mode_vm(regs)) { 277 if (user_mode_vm(regs)) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index e0b1d783daab..1038a417ea53 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
73 if (kstack_end(stack)) 73 if (kstack_end(stack))
74 break; 74 break;
75 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 75 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
76 printk(KERN_CONT "\n"); 76 pr_cont("\n");
77 printk(KERN_CONT " %08lx", *stack++); 77 pr_cont(" %08lx", *stack++);
78 touch_nmi_watchdog(); 78 touch_nmi_watchdog();
79 } 79 }
80 printk(KERN_CONT "\n"); 80 pr_cont("\n");
81 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 81 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
82} 82}
83 83
@@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs)
86{ 86{
87 int i; 87 int i;
88 88
89 print_modules();
90 __show_regs(regs, !user_mode_vm(regs)); 89 __show_regs(regs, !user_mode_vm(regs));
91 90
92 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", 91 pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
93 TASK_COMM_LEN, current->comm, task_pid_nr(current), 92 TASK_COMM_LEN, current->comm, task_pid_nr(current),
94 current_thread_info(), current, task_thread_info(current)); 93 current_thread_info(), current, task_thread_info(current));
95 /* 94 /*
96 * When in-kernel, we also print out the stack and code at the 95 * When in-kernel, we also print out the stack and code at the
97 * time of the fault.. 96 * time of the fault..
@@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs)
102 unsigned char c; 101 unsigned char c;
103 u8 *ip; 102 u8 *ip;
104 103
105 printk(KERN_EMERG "Stack:\n"); 104 pr_emerg("Stack:\n");
106 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG); 105 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
107 106
108 printk(KERN_EMERG "Code: "); 107 pr_emerg("Code:");
109 108
110 ip = (u8 *)regs->ip - code_prologue; 109 ip = (u8 *)regs->ip - code_prologue;
111 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 110 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
@@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs)
116 for (i = 0; i < code_len; i++, ip++) { 115 for (i = 0; i < code_len; i++, ip++) {
117 if (ip < (u8 *)PAGE_OFFSET || 116 if (ip < (u8 *)PAGE_OFFSET ||
118 probe_kernel_address(ip, c)) { 117 probe_kernel_address(ip, c)) {
119 printk(KERN_CONT " Bad EIP value."); 118 pr_cont(" Bad EIP value.");
120 break; 119 break;
121 } 120 }
122 if (ip == (u8 *)regs->ip) 121 if (ip == (u8 *)regs->ip)
123 printk(KERN_CONT "<%02x> ", c); 122 pr_cont(" <%02x>", c);
124 else 123 else
125 printk(KERN_CONT "%02x ", c); 124 pr_cont(" %02x", c);
126 } 125 }
127 } 126 }
128 printk(KERN_CONT "\n"); 127 pr_cont("\n");
129} 128}
130 129
131int is_valid_bugaddr(unsigned long ip) 130int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 791b76122aa8..b653675d5288 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
228 if (stack >= irq_stack && stack <= irq_stack_end) { 228 if (stack >= irq_stack && stack <= irq_stack_end) {
229 if (stack == irq_stack_end) { 229 if (stack == irq_stack_end) {
230 stack = (unsigned long *) (irq_stack_end[-1]); 230 stack = (unsigned long *) (irq_stack_end[-1]);
231 printk(KERN_CONT " <EOI> "); 231 pr_cont(" <EOI> ");
232 } 232 }
233 } else { 233 } else {
234 if (((long) stack & (THREAD_SIZE-1)) == 0) 234 if (((long) stack & (THREAD_SIZE-1)) == 0)
235 break; 235 break;
236 } 236 }
237 if (i && ((i % STACKSLOTS_PER_LINE) == 0)) 237 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
238 printk(KERN_CONT "\n"); 238 pr_cont("\n");
239 printk(KERN_CONT " %016lx", *stack++); 239 pr_cont(" %016lx", *stack++);
240 touch_nmi_watchdog(); 240 touch_nmi_watchdog();
241 } 241 }
242 preempt_enable(); 242 preempt_enable();
243 243
244 printk(KERN_CONT "\n"); 244 pr_cont("\n");
245 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 245 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
246} 246}
247 247
@@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs)
254 254
255 sp = regs->sp; 255 sp = regs->sp;
256 printk("CPU %d ", cpu); 256 printk("CPU %d ", cpu);
257 print_modules();
258 __show_regs(regs, 1); 257 __show_regs(regs, 1);
259 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 258 printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n",
260 cur->comm, cur->pid, task_thread_info(cur), cur); 259 cur->comm, cur->pid, task_thread_info(cur), cur);
261 260
262 /* 261 /*
263 * When in-kernel, we also print out the stack and code at the 262 * When in-kernel, we also print out the stack and code at the
@@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs)
284 for (i = 0; i < code_len; i++, ip++) { 283 for (i = 0; i < code_len; i++, ip++) {
285 if (ip < (u8 *)PAGE_OFFSET || 284 if (ip < (u8 *)PAGE_OFFSET ||
286 probe_kernel_address(ip, c)) { 285 probe_kernel_address(ip, c)) {
287 printk(KERN_CONT " Bad RIP value."); 286 pr_cont(" Bad RIP value.");
288 break; 287 break;
289 } 288 }
290 if (ip == (u8 *)regs->ip) 289 if (ip == (u8 *)regs->ip)
291 printk(KERN_CONT "<%02x> ", c); 290 pr_cont("<%02x> ", c);
292 else 291 else
293 printk(KERN_CONT "%02x ", c); 292 pr_cont("%02x ", c);
294 } 293 }
295 } 294 }
296 printk(KERN_CONT "\n"); 295 pr_cont("\n");
297} 296}
298 297
299int is_valid_bugaddr(unsigned long ip) 298int is_valid_bugaddr(unsigned long ip)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d65133b51be..111f6bbd8b38 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1758,10 +1758,30 @@ end_repeat_nmi:
1758 */ 1758 */
1759 call save_paranoid 1759 call save_paranoid
1760 DEFAULT_FRAME 0 1760 DEFAULT_FRAME 0
1761
1762 /*
1763 * Save off the CR2 register. If we take a page fault in the NMI then
1764 * it could corrupt the CR2 value. If the NMI preempts a page fault
1765 * handler before it was able to read the CR2 register, and then the
1766 * NMI itself takes a page fault, the page fault that was preempted
1767 * will read the information from the NMI page fault and not the
1768 * origin fault. Save it off and restore it if it changes.
1769 * Use the r12 callee-saved register.
1770 */
1771 movq %cr2, %r12
1772
1761 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1773 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1762 movq %rsp,%rdi 1774 movq %rsp,%rdi
1763 movq $-1,%rsi 1775 movq $-1,%rsi
1764 call do_nmi 1776 call do_nmi
1777
1778 /* Did the NMI take a page fault? Restore cr2 if it did */
1779 movq %cr2, %rcx
1780 cmpq %rcx, %r12
1781 je 1f
1782 movq %r12, %cr2
17831:
1784
1765 testl %ebx,%ebx /* swapgs needed? */ 1785 testl %ebx,%ebx /* swapgs needed? */
1766 jnz nmi_restore 1786 jnz nmi_restore
1767nmi_swapgs: 1787nmi_swapgs:
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3dafc6003b7c..1f5f1d5d2a02 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -294,9 +294,9 @@ void fixup_irqs(void)
294 raw_spin_unlock(&desc->lock); 294 raw_spin_unlock(&desc->lock);
295 295
296 if (break_affinity && set_affinity) 296 if (break_affinity && set_affinity)
297 printk("Broke affinity for irq %i\n", irq); 297 pr_notice("Broke affinity for irq %i\n", irq);
298 else if (!set_affinity) 298 else if (!set_affinity)
299 printk("Cannot set affinity for irq %i\n", irq); 299 pr_notice("Cannot set affinity for irq %i\n", irq);
300 } 300 }
301 301
302 /* 302 /*
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fbdfc6917180..4873e62db6a1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -87,6 +87,7 @@
87#include <asm/microcode.h> 87#include <asm/microcode.h>
88#include <asm/processor.h> 88#include <asm/processor.h>
89#include <asm/cpu_device_id.h> 89#include <asm/cpu_device_id.h>
90#include <asm/perf_event.h>
90 91
91MODULE_DESCRIPTION("Microcode Update Driver"); 92MODULE_DESCRIPTION("Microcode Update Driver");
92MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); 93MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu)
277 struct ucode_cpu_info *uci = ucode_cpu_info + cpu; 278 struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
278 int err = 0; 279 int err = 0;
279 280
280 mutex_lock(&microcode_mutex);
281 if (uci->valid) { 281 if (uci->valid) {
282 enum ucode_state ustate; 282 enum ucode_state ustate;
283 283
@@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu)
288 if (ustate == UCODE_ERROR) 288 if (ustate == UCODE_ERROR)
289 err = -EINVAL; 289 err = -EINVAL;
290 } 290 }
291 mutex_unlock(&microcode_mutex);
292 291
293 return err; 292 return err;
294} 293}
@@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev,
298 const char *buf, size_t size) 297 const char *buf, size_t size)
299{ 298{
300 unsigned long val; 299 unsigned long val;
301 int cpu = dev->id; 300 int cpu;
302 ssize_t ret = 0; 301 ssize_t ret = 0, tmp_ret;
303 302
304 ret = kstrtoul(buf, 0, &val); 303 ret = kstrtoul(buf, 0, &val);
305 if (ret) 304 if (ret)
306 return ret; 305 return ret;
307 306
308 if (val == 1) { 307 if (val != 1)
309 get_online_cpus(); 308 return size;
310 if (cpu_online(cpu)) 309
311 ret = reload_for_cpu(cpu); 310 get_online_cpus();
312 put_online_cpus(); 311 mutex_lock(&microcode_mutex);
312 for_each_online_cpu(cpu) {
313 tmp_ret = reload_for_cpu(cpu);
314 if (tmp_ret != 0)
315 pr_warn("Error reloading microcode on CPU %d\n", cpu);
316
317 /* save retval of the first encountered reload error */
318 if (!ret)
319 ret = tmp_ret;
313 } 320 }
321 if (!ret)
322 perf_check_microcode();
323 mutex_unlock(&microcode_mutex);
324 put_online_cpus();
314 325
315 if (!ret) 326 if (!ret)
316 ret = size; 327 ret = size;
@@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL);
339static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL); 350static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
340 351
341static struct attribute *mc_default_attrs[] = { 352static struct attribute *mc_default_attrs[] = {
342 &dev_attr_reload.attr,
343 &dev_attr_version.attr, 353 &dev_attr_version.attr,
344 &dev_attr_processor_flags.attr, 354 &dev_attr_processor_flags.attr,
345 NULL 355 NULL
@@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {
504 514
505#ifdef MODULE 515#ifdef MODULE
506/* Autoload on Intel and AMD systems */ 516/* Autoload on Intel and AMD systems */
507static const struct x86_cpu_id microcode_id[] = { 517static const struct x86_cpu_id __initconst microcode_id[] = {
508#ifdef CONFIG_MICROCODE_INTEL 518#ifdef CONFIG_MICROCODE_INTEL
509 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, }, 519 { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
510#endif 520#endif
@@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = {
516MODULE_DEVICE_TABLE(x86cpu, microcode_id); 526MODULE_DEVICE_TABLE(x86cpu, microcode_id);
517#endif 527#endif
518 528
529static struct attribute *cpu_root_microcode_attrs[] = {
530 &dev_attr_reload.attr,
531 NULL
532};
533
534static struct attribute_group cpu_root_microcode_group = {
535 .name = "microcode",
536 .attrs = cpu_root_microcode_attrs,
537};
538
519static int __init microcode_init(void) 539static int __init microcode_init(void)
520{ 540{
521 struct cpuinfo_x86 *c = &cpu_data(0); 541 struct cpuinfo_x86 *c = &cpu_data(0);
@@ -540,16 +560,25 @@ static int __init microcode_init(void)
540 mutex_lock(&microcode_mutex); 560 mutex_lock(&microcode_mutex);
541 561
542 error = subsys_interface_register(&mc_cpu_interface); 562 error = subsys_interface_register(&mc_cpu_interface);
543 563 if (!error)
564 perf_check_microcode();
544 mutex_unlock(&microcode_mutex); 565 mutex_unlock(&microcode_mutex);
545 put_online_cpus(); 566 put_online_cpus();
546 567
547 if (error) 568 if (error)
548 goto out_pdev; 569 goto out_pdev;
549 570
571 error = sysfs_create_group(&cpu_subsys.dev_root->kobj,
572 &cpu_root_microcode_group);
573
574 if (error) {
575 pr_err("Error creating microcode group!\n");
576 goto out_driver;
577 }
578
550 error = microcode_dev_init(); 579 error = microcode_dev_init();
551 if (error) 580 if (error)
552 goto out_driver; 581 goto out_ucode_group;
553 582
554 register_syscore_ops(&mc_syscore_ops); 583 register_syscore_ops(&mc_syscore_ops);
555 register_hotcpu_notifier(&mc_cpu_notifier); 584 register_hotcpu_notifier(&mc_cpu_notifier);
@@ -559,7 +588,11 @@ static int __init microcode_init(void)
559 588
560 return 0; 589 return 0;
561 590
562out_driver: 591 out_ucode_group:
592 sysfs_remove_group(&cpu_subsys.dev_root->kobj,
593 &cpu_root_microcode_group);
594
595 out_driver:
563 get_online_cpus(); 596 get_online_cpus();
564 mutex_lock(&microcode_mutex); 597 mutex_lock(&microcode_mutex);
565 598
@@ -568,7 +601,7 @@ out_driver:
568 mutex_unlock(&microcode_mutex); 601 mutex_unlock(&microcode_mutex);
569 put_online_cpus(); 602 put_online_cpus();
570 603
571out_pdev: 604 out_pdev:
572 platform_device_unregister(microcode_pdev); 605 platform_device_unregister(microcode_pdev);
573 return error; 606 return error;
574 607
@@ -584,6 +617,9 @@ static void __exit microcode_exit(void)
584 unregister_hotcpu_notifier(&mc_cpu_notifier); 617 unregister_hotcpu_notifier(&mc_cpu_notifier);
585 unregister_syscore_ops(&mc_syscore_ops); 618 unregister_syscore_ops(&mc_syscore_ops);
586 619
620 sysfs_remove_group(&cpu_subsys.dev_root->kobj,
621 &cpu_root_microcode_group);
622
587 get_online_cpus(); 623 get_online_cpus();
588 mutex_lock(&microcode_mutex); 624 mutex_lock(&microcode_mutex);
589 625
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f21fd94ac897..202494d2ec6e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -15,6 +15,9 @@
15 along with this program; if not, write to the Free Software 15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/ 17*/
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
18#include <linux/moduleloader.h> 21#include <linux/moduleloader.h>
19#include <linux/elf.h> 22#include <linux/elf.h>
20#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
@@ -30,9 +33,14 @@
30#include <asm/pgtable.h> 33#include <asm/pgtable.h>
31 34
32#if 0 35#if 0
33#define DEBUGP printk 36#define DEBUGP(fmt, ...) \
37 printk(KERN_DEBUG fmt, ##__VA_ARGS__)
34#else 38#else
35#define DEBUGP(fmt...) 39#define DEBUGP(fmt, ...) \
40do { \
41 if (0) \
42 printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
43} while (0)
36#endif 44#endif
37 45
38void *module_alloc(unsigned long size) 46void *module_alloc(unsigned long size)
@@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,
56 Elf32_Sym *sym; 64 Elf32_Sym *sym;
57 uint32_t *location; 65 uint32_t *location;
58 66
59 DEBUGP("Applying relocate section %u to %u\n", relsec, 67 DEBUGP("Applying relocate section %u to %u\n",
60 sechdrs[relsec].sh_info); 68 relsec, sechdrs[relsec].sh_info);
61 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { 69 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
62 /* This is where to make the change */ 70 /* This is where to make the change */
63 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr 71 location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,
77 *location += sym->st_value - (uint32_t)location; 85 *location += sym->st_value - (uint32_t)location;
78 break; 86 break;
79 default: 87 default:
80 printk(KERN_ERR "module %s: Unknown relocation: %u\n", 88 pr_err("%s: Unknown relocation: %u\n",
81 me->name, ELF32_R_TYPE(rel[i].r_info)); 89 me->name, ELF32_R_TYPE(rel[i].r_info));
82 return -ENOEXEC; 90 return -ENOEXEC;
83 } 91 }
@@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
97 void *loc; 105 void *loc;
98 u64 val; 106 u64 val;
99 107
100 DEBUGP("Applying relocate section %u to %u\n", relsec, 108 DEBUGP("Applying relocate section %u to %u\n",
101 sechdrs[relsec].sh_info); 109 relsec, sechdrs[relsec].sh_info);
102 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { 110 for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
103 /* This is where to make the change */ 111 /* This is where to make the change */
104 loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr 112 loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
110 + ELF64_R_SYM(rel[i].r_info); 118 + ELF64_R_SYM(rel[i].r_info);
111 119
112 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 120 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
113 (int)ELF64_R_TYPE(rel[i].r_info), 121 (int)ELF64_R_TYPE(rel[i].r_info),
114 sym->st_value, rel[i].r_addend, (u64)loc); 122 sym->st_value, rel[i].r_addend, (u64)loc);
115 123
116 val = sym->st_value + rel[i].r_addend; 124 val = sym->st_value + rel[i].r_addend;
117 125
@@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
140#endif 148#endif
141 break; 149 break;
142 default: 150 default:
143 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", 151 pr_err("%s: Unknown rela relocation: %llu\n",
144 me->name, ELF64_R_TYPE(rel[i].r_info)); 152 me->name, ELF64_R_TYPE(rel[i].r_info));
145 return -ENOEXEC; 153 return -ENOEXEC;
146 } 154 }
@@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
148 return 0; 156 return 0;
149 157
150overflow: 158overflow:
151 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 159 pr_err("overflow in relocation type %d val %Lx\n",
152 (int)ELF64_R_TYPE(rel[i].r_info), val); 160 (int)ELF64_R_TYPE(rel[i].r_info), val);
153 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 161 pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
154 me->name); 162 me->name);
155 return -ENOEXEC; 163 return -ENOEXEC;
156} 164}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index a0b2f84457be..f84f5c57de35 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
365#ifdef CONFIG_X86_32 365#ifdef CONFIG_X86_32
366/* 366/*
367 * For i386, NMIs use the same stack as the kernel, and we can 367 * For i386, NMIs use the same stack as the kernel, and we can
368 * add a workaround to the iret problem in C. Simply have 3 states 368 * add a workaround to the iret problem in C (preventing nested
369 * the NMI can be in. 369 * NMIs if an NMI takes a trap). Simply have 3 states the NMI
370 * can be in:
370 * 371 *
371 * 1) not running 372 * 1) not running
372 * 2) executing 373 * 2) executing
@@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
383 * If an NMI hits a breakpoint that executes an iret, another 384 * If an NMI hits a breakpoint that executes an iret, another
384 * NMI can preempt it. We do not want to allow this new NMI 385 * NMI can preempt it. We do not want to allow this new NMI
385 * to run, but we want to execute it when the first one finishes. 386 * to run, but we want to execute it when the first one finishes.
386 * We set the state to "latched", and the first NMI will perform 387 * We set the state to "latched", and the exit of the first NMI will
387 * an cmpxchg on the state, and if it doesn't successfully 388 * perform a dec_return, if the result is zero (NOT_RUNNING), then
388 * reset the state to "not running" it will restart the next 389 * it will simply exit the NMI handler. If not, the dec_return
389 * NMI. 390 * would have set the state to NMI_EXECUTING (what we want it to
391 * be when we are running). In this case, we simply jump back
392 * to rerun the NMI handler again, and restart the 'latched' NMI.
393 *
394 * No trap (breakpoint or page fault) should be hit before nmi_restart,
395 * thus there is no race between the first check of state for NOT_RUNNING
396 * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
397 * at this point.
398 *
399 * In case the NMI takes a page fault, we need to save off the CR2
400 * because the NMI could have preempted another page fault and corrupt
401 * the CR2 that is about to be read. As nested NMIs must be restarted
402 * and they can not take breakpoints or page faults, the update of the
403 * CR2 must be done before converting the nmi state back to NOT_RUNNING.
404 * Otherwise, there would be a race of another nested NMI coming in
405 * after setting state to NOT_RUNNING but before updating the nmi_cr2.
390 */ 406 */
391enum nmi_states { 407enum nmi_states {
392 NMI_NOT_RUNNING, 408 NMI_NOT_RUNNING = 0,
393 NMI_EXECUTING, 409 NMI_EXECUTING,
394 NMI_LATCHED, 410 NMI_LATCHED,
395}; 411};
396static DEFINE_PER_CPU(enum nmi_states, nmi_state); 412static DEFINE_PER_CPU(enum nmi_states, nmi_state);
413static DEFINE_PER_CPU(unsigned long, nmi_cr2);
397 414
398#define nmi_nesting_preprocess(regs) \ 415#define nmi_nesting_preprocess(regs) \
399 do { \ 416 do { \
400 if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) { \ 417 if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \
401 __get_cpu_var(nmi_state) = NMI_LATCHED; \ 418 this_cpu_write(nmi_state, NMI_LATCHED); \
402 return; \ 419 return; \
403 } \ 420 } \
404 nmi_restart: \ 421 this_cpu_write(nmi_state, NMI_EXECUTING); \
405 __get_cpu_var(nmi_state) = NMI_EXECUTING; \ 422 this_cpu_write(nmi_cr2, read_cr2()); \
406 } while (0) 423 } while (0); \
424 nmi_restart:
407 425
408#define nmi_nesting_postprocess() \ 426#define nmi_nesting_postprocess() \
409 do { \ 427 do { \
410 if (cmpxchg(&__get_cpu_var(nmi_state), \ 428 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \
411 NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING) \ 429 write_cr2(this_cpu_read(nmi_cr2)); \
430 if (this_cpu_dec_return(nmi_state)) \
412 goto nmi_restart; \ 431 goto nmi_restart; \
413 } while (0) 432 } while (0)
414#else /* x86_64 */ 433#else /* x86_64 */
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 149b8d9c6ad4..6d9582ec0324 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
42static void __init init_nmi_testsuite(void) 42static void __init init_nmi_testsuite(void)
43{ 43{
44 /* trap all the unknown NMIs we may generate */ 44 /* trap all the unknown NMIs we may generate */
45 register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); 45 register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk",
46 __initdata);
46} 47}
47 48
48static void __init cleanup_nmi_testsuite(void) 49static void __init cleanup_nmi_testsuite(void)
@@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)
64{ 65{
65 unsigned long timeout; 66 unsigned long timeout;
66 67
67 if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, 68 if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback,
68 NMI_FLAG_FIRST, "nmi_selftest")) { 69 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {
69 nmi_fail = FAILURE; 70 nmi_fail = FAILURE;
70 return; 71 return;
71 } 72 }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9ce885996fd7..17fff18a1031 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = {
352#endif 352#endif
353 .wbinvd = native_wbinvd, 353 .wbinvd = native_wbinvd,
354 .read_msr = native_read_msr_safe, 354 .read_msr = native_read_msr_safe,
355 .rdmsr_regs = native_rdmsr_safe_regs,
356 .write_msr = native_write_msr_safe, 355 .write_msr = native_write_msr_safe,
357 .wrmsr_regs = native_wrmsr_safe_regs,
358 .read_tsc = native_read_tsc, 356 .read_tsc = native_read_tsc,
359 .read_pmc = native_read_pmc, 357 .read_pmc = native_read_pmc,
360 .read_tscp = native_read_tscp, 358 .read_tscp = native_read_tscp,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index b72838bae64a..299d49302e7d 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */ 23 */
24 24
25#define pr_fmt(fmt) "Calgary: " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/init.h> 28#include <linux/init.h>
27#include <linux/types.h> 29#include <linux/types.h>
@@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,
245 offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0, 247 offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
246 npages, 0, boundary_size, 0); 248 npages, 0, boundary_size, 0);
247 if (offset == ~0UL) { 249 if (offset == ~0UL) {
248 printk(KERN_WARNING "Calgary: IOMMU full.\n"); 250 pr_warn("IOMMU full\n");
249 spin_unlock_irqrestore(&tbl->it_lock, flags); 251 spin_unlock_irqrestore(&tbl->it_lock, flags);
250 if (panic_on_overflow) 252 if (panic_on_overflow)
251 panic("Calgary: fix the allocator.\n"); 253 panic("Calgary: fix the allocator.\n");
@@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
271 entry = iommu_range_alloc(dev, tbl, npages); 273 entry = iommu_range_alloc(dev, tbl, npages);
272 274
273 if (unlikely(entry == DMA_ERROR_CODE)) { 275 if (unlikely(entry == DMA_ERROR_CODE)) {
274 printk(KERN_WARNING "Calgary: failed to allocate %u pages in " 276 pr_warn("failed to allocate %u pages in iommu %p\n",
275 "iommu %p\n", npages, tbl); 277 npages, tbl);
276 return DMA_ERROR_CODE; 278 return DMA_ERROR_CODE;
277 } 279 }
278 280
@@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)
561 i++; 563 i++;
562 } while ((val & 0xff) != 0xff && i < 100); 564 } while ((val & 0xff) != 0xff && i < 100);
563 if (i == 100) 565 if (i == 100)
564 printk(KERN_WARNING "Calgary: PCI bus not quiesced, " 566 pr_warn("PCI bus not quiesced, continuing anyway\n");
565 "continuing anyway\n");
566 567
567 /* invalidate TCE cache */ 568 /* invalidate TCE cache */
568 target = calgary_reg(bbar, tar_offset(tbl->it_busno)); 569 target = calgary_reg(bbar, tar_offset(tbl->it_busno));
@@ -604,8 +605,7 @@ begin:
604 i++; 605 i++;
605 } while ((val64 & 0xff) != 0xff && i < 100); 606 } while ((val64 & 0xff) != 0xff && i < 100);
606 if (i == 100) 607 if (i == 100)
607 printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " 608 pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
608 "continuing anyway\n");
609 609
610 /* 3. poll Page Migration DEBUG for SoftStopFault */ 610 /* 3. poll Page Migration DEBUG for SoftStopFault */
611 target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); 611 target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
@@ -617,8 +617,7 @@ begin:
617 if (++count < 100) 617 if (++count < 100)
618 goto begin; 618 goto begin;
619 else { 619 else {
620 printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " 620 pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
621 "aborting TCE cache flush sequence!\n");
622 return; /* pray for the best */ 621 return; /* pray for the best */
623 } 622 }
624 } 623 }
@@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)
840 plssr = be32_to_cpu(readl(target)); 839 plssr = be32_to_cpu(readl(target));
841 840
842 /* If no error, the agent ID in the CSR is not valid */ 841 /* If no error, the agent ID in the CSR is not valid */
843 printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " 842 pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
844 "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); 843 tbl->it_busno, csr, plssr);
845} 844}
846 845
847static void calioc2_dump_error_regs(struct iommu_table *tbl) 846static void calioc2_dump_error_regs(struct iommu_table *tbl)
@@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)
867 target = calgary_reg(bbar, phboff | 0x800); 866 target = calgary_reg(bbar, phboff | 0x800);
868 mck = be32_to_cpu(readl(target)); 867 mck = be32_to_cpu(readl(target));
869 868
870 printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", 869 pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
871 tbl->it_busno);
872 870
873 printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", 871 pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
874 csr, plssr, csmr, mck); 872 csr, plssr, csmr, mck);
875 873
876 /* dump rest of error regs */ 874 /* dump rest of error regs */
877 printk(KERN_EMERG "Calgary: "); 875 pr_emerg("");
878 for (i = 0; i < ARRAY_SIZE(errregs); i++) { 876 for (i = 0; i < ARRAY_SIZE(errregs); i++) {
879 /* err regs are at 0x810 - 0x870 */ 877 /* err regs are at 0x810 - 0x870 */
880 erroff = (0x810 + (i * 0x10)); 878 erroff = (0x810 + (i * 0x10));
881 target = calgary_reg(bbar, phboff | erroff); 879 target = calgary_reg(bbar, phboff | erroff);
882 errregs[i] = be32_to_cpu(readl(target)); 880 errregs[i] = be32_to_cpu(readl(target));
883 printk("0x%08x@0x%lx ", errregs[i], erroff); 881 pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
884 } 882 }
885 printk("\n"); 883 pr_cont("\n");
886 884
887 /* root complex status */ 885 /* root complex status */
888 target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); 886 target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 735279e54e59..ef6a8456f719 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
1#include <linux/errno.h> 3#include <linux/errno.h>
2#include <linux/kernel.h> 4#include <linux/kernel.h>
3#include <linux/mm.h> 5#include <linux/mm.h>
@@ -145,16 +147,14 @@ void show_regs_common(void)
145 /* Board Name is optional */ 147 /* Board Name is optional */
146 board = dmi_get_system_info(DMI_BOARD_NAME); 148 board = dmi_get_system_info(DMI_BOARD_NAME);
147 149
148 printk(KERN_CONT "\n"); 150 printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n",
149 printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", 151 current->pid, current->comm, print_tainted(),
150 current->pid, current->comm, print_tainted(), 152 init_utsname()->release,
151 init_utsname()->release, 153 (int)strcspn(init_utsname()->version, " "),
152 (int)strcspn(init_utsname()->version, " "), 154 init_utsname()->version,
153 init_utsname()->version); 155 vendor, product,
154 printk(KERN_CONT " %s %s", vendor, product); 156 board ? "/" : "",
155 if (board) 157 board ? board : "");
156 printk(KERN_CONT "/%s", board);
157 printk(KERN_CONT "\n");
158} 158}
159 159
160void flush_thread(void) 160void flush_thread(void)
@@ -645,7 +645,7 @@ static void amd_e400_idle(void)
645 amd_e400_c1e_detected = true; 645 amd_e400_c1e_detected = true;
646 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 646 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
647 mark_tsc_unstable("TSC halt in AMD C1E"); 647 mark_tsc_unstable("TSC halt in AMD C1E");
648 printk(KERN_INFO "System has AMD C1E enabled\n"); 648 pr_info("System has AMD C1E enabled\n");
649 } 649 }
650 } 650 }
651 651
@@ -659,8 +659,7 @@ static void amd_e400_idle(void)
659 */ 659 */
660 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 660 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
661 &cpu); 661 &cpu);
662 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", 662 pr_info("Switch to broadcast mode on CPU%d\n", cpu);
663 cpu);
664 } 663 }
665 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 664 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
666 665
@@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
681{ 680{
682#ifdef CONFIG_SMP 681#ifdef CONFIG_SMP
683 if (pm_idle == poll_idle && smp_num_siblings > 1) { 682 if (pm_idle == poll_idle && smp_num_siblings > 1) {
684 printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," 683 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
685 " performance may degrade.\n");
686 } 684 }
687#endif 685#endif
688 if (pm_idle) 686 if (pm_idle)
@@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
692 /* 690 /*
693 * One CPU supports mwait => All CPUs supports mwait 691 * One CPU supports mwait => All CPUs supports mwait
694 */ 692 */
695 printk(KERN_INFO "using mwait in idle threads.\n"); 693 pr_info("using mwait in idle threads\n");
696 pm_idle = mwait_idle; 694 pm_idle = mwait_idle;
697 } else if (cpu_has_amd_erratum(amd_erratum_400)) { 695 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
698 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 696 /* E400: APIC timer interrupt does not wake up CPU from C1e */
699 printk(KERN_INFO "using AMD E400 aware idle routine\n"); 697 pr_info("using AMD E400 aware idle routine\n");
700 pm_idle = amd_e400_idle; 698 pm_idle = amd_e400_idle;
701 } else 699 } else
702 pm_idle = default_idle; 700 pm_idle = default_idle;
@@ -715,7 +713,7 @@ static int __init idle_setup(char *str)
715 return -EINVAL; 713 return -EINVAL;
716 714
717 if (!strcmp(str, "poll")) { 715 if (!strcmp(str, "poll")) {
718 printk("using polling idle threads.\n"); 716 pr_info("using polling idle threads\n");
719 pm_idle = poll_idle; 717 pm_idle = poll_idle;
720 boot_option_idle_override = IDLE_POLL; 718 boot_option_idle_override = IDLE_POLL;
721 } else if (!strcmp(str, "mwait")) { 719 } else if (!strcmp(str, "mwait")) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 61cdf7fdf099..0a980c9d7cb8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)
117{ 117{
118 if (dead_task->mm) { 118 if (dead_task->mm) {
119 if (dead_task->mm->context.size) { 119 if (dead_task->mm->context.size) {
120 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 120 pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n",
121 dead_task->comm, 121 dead_task->comm,
122 dead_task->mm->context.ldt, 122 dead_task->mm->context.ldt,
123 dead_task->mm->context.size); 123 dead_task->mm->context.size);
124 BUG(); 124 BUG();
125 } 125 }
126 } 126 }
@@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
466 task->thread.gs = addr; 466 task->thread.gs = addr;
467 if (doit) { 467 if (doit) {
468 load_gs_index(0); 468 load_gs_index(0);
469 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 469 ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
470 } 470 }
471 } 471 }
472 put_cpu(); 472 put_cpu();
@@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
494 /* set the selector to 0 to not confuse 494 /* set the selector to 0 to not confuse
495 __switch_to */ 495 __switch_to */
496 loadsegment(fs, 0); 496 loadsegment(fs, 0);
497 ret = checking_wrmsrl(MSR_FS_BASE, addr); 497 ret = wrmsrl_safe(MSR_FS_BASE, addr);
498 } 498 }
499 } 499 }
500 put_cpu(); 500 put_cpu();
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5de92f1abd76..52190a938b4a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
1#include <linux/module.h> 3#include <linux/module.h>
2#include <linux/reboot.h> 4#include <linux/reboot.h>
3#include <linux/init.h> 5#include <linux/init.h>
@@ -20,14 +22,12 @@
20#include <asm/virtext.h> 22#include <asm/virtext.h>
21#include <asm/cpu.h> 23#include <asm/cpu.h>
22#include <asm/nmi.h> 24#include <asm/nmi.h>
25#include <asm/smp.h>
23 26
24#ifdef CONFIG_X86_32 27#include <linux/ctype.h>
25# include <linux/ctype.h> 28#include <linux/mc146818rtc.h>
26# include <linux/mc146818rtc.h> 29#include <asm/realmode.h>
27# include <asm/realmode.h> 30#include <asm/x86_init.h>
28#else
29# include <asm/x86_init.h>
30#endif
31 31
32/* 32/*
33 * Power off function, if any 33 * Power off function, if any
@@ -49,7 +49,7 @@ int reboot_force;
49 */ 49 */
50static int reboot_default = 1; 50static int reboot_default = 1;
51 51
52#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 52#ifdef CONFIG_SMP
53static int reboot_cpu = -1; 53static int reboot_cpu = -1;
54#endif 54#endif
55 55
@@ -67,8 +67,8 @@ bool port_cf9_safe = false;
67 * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] 67 * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
68 * warm Don't set the cold reboot flag 68 * warm Don't set the cold reboot flag
69 * cold Set the cold reboot flag 69 * cold Set the cold reboot flag
70 * bios Reboot by jumping through the BIOS (only for X86_32) 70 * bios Reboot by jumping through the BIOS
71 * smp Reboot by executing reset on BSP or other CPU (only for X86_32) 71 * smp Reboot by executing reset on BSP or other CPU
72 * triple Force a triple fault (init) 72 * triple Force a triple fault (init)
73 * kbd Use the keyboard controller. cold reset (default) 73 * kbd Use the keyboard controller. cold reset (default)
74 * acpi Use the RESET_REG in the FADT 74 * acpi Use the RESET_REG in the FADT
@@ -95,7 +95,6 @@ static int __init reboot_setup(char *str)
95 reboot_mode = 0; 95 reboot_mode = 0;
96 break; 96 break;
97 97
98#ifdef CONFIG_X86_32
99#ifdef CONFIG_SMP 98#ifdef CONFIG_SMP
100 case 's': 99 case 's':
101 if (isdigit(*(str+1))) { 100 if (isdigit(*(str+1))) {
@@ -112,7 +111,6 @@ static int __init reboot_setup(char *str)
112#endif /* CONFIG_SMP */ 111#endif /* CONFIG_SMP */
113 112
114 case 'b': 113 case 'b':
115#endif
116 case 'a': 114 case 'a':
117 case 'k': 115 case 'k':
118 case 't': 116 case 't':
@@ -138,7 +136,6 @@ static int __init reboot_setup(char *str)
138__setup("reboot=", reboot_setup); 136__setup("reboot=", reboot_setup);
139 137
140 138
141#ifdef CONFIG_X86_32
142/* 139/*
143 * Reboot options and system auto-detection code provided by 140 * Reboot options and system auto-detection code provided by
144 * Dell Inc. so their systems "just work". :-) 141 * Dell Inc. so their systems "just work". :-)
@@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
152{ 149{
153 if (reboot_type != BOOT_BIOS) { 150 if (reboot_type != BOOT_BIOS) {
154 reboot_type = BOOT_BIOS; 151 reboot_type = BOOT_BIOS;
155 printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); 152 pr_info("%s series board detected. Selecting %s-method for reboots.\n",
153 "BIOS", d->ident);
156 } 154 }
157 return 0; 155 return 0;
158} 156}
159 157
160void machine_real_restart(unsigned int type) 158void __noreturn machine_real_restart(unsigned int type)
161{ 159{
162 void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int))
163 real_mode_header->machine_real_restart_asm;
164
165 local_irq_disable(); 160 local_irq_disable();
166 161
167 /* 162 /*
@@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type)
181 /* 176 /*
182 * Switch back to the initial page table. 177 * Switch back to the initial page table.
183 */ 178 */
179#ifdef CONFIG_X86_32
184 load_cr3(initial_page_table); 180 load_cr3(initial_page_table);
185 181#else
186 /* 182 write_cr3(real_mode_header->trampoline_pgd);
187 * Write 0x1234 to absolute memory location 0x472. The BIOS reads 183#endif
188 * this on booting to tell it to "Bypass memory test (also warm
189 * boot)". This seems like a fairly standard thing that gets set by
190 * REBOOT.COM programs, and the previous reset routine did this
191 * too. */
192 *((unsigned short *)0x472) = reboot_mode;
193 184
194 /* Jump to the identity-mapped low memory code */ 185 /* Jump to the identity-mapped low memory code */
195 restart_lowmem(type); 186#ifdef CONFIG_X86_32
187 asm volatile("jmpl *%0" : :
188 "rm" (real_mode_header->machine_real_restart_asm),
189 "a" (type));
190#else
191 asm volatile("ljmpl *%0" : :
192 "m" (real_mode_header->machine_real_restart_asm),
193 "D" (type));
194#endif
195 unreachable();
196} 196}
197#ifdef CONFIG_APM_MODULE 197#ifdef CONFIG_APM_MODULE
198EXPORT_SYMBOL(machine_real_restart); 198EXPORT_SYMBOL(machine_real_restart);
199#endif 199#endif
200 200
201#endif /* CONFIG_X86_32 */
202
203/* 201/*
204 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot 202 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
205 */ 203 */
@@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
207{ 205{
208 if (reboot_type != BOOT_CF9) { 206 if (reboot_type != BOOT_CF9) {
209 reboot_type = BOOT_CF9; 207 reboot_type = BOOT_CF9;
210 printk(KERN_INFO "%s series board detected. " 208 pr_info("%s series board detected. Selecting %s-method for reboots.\n",
211 "Selecting PCI-method for reboots.\n", d->ident); 209 "PCI", d->ident);
212 } 210 }
213 return 0; 211 return 0;
214} 212}
@@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
217{ 215{
218 if (reboot_type != BOOT_KBD) { 216 if (reboot_type != BOOT_KBD) {
219 reboot_type = BOOT_KBD; 217 reboot_type = BOOT_KBD;
220 printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); 218 pr_info("%s series board detected. Selecting %s-method for reboot.\n",
219 "KBD", d->ident);
221 } 220 }
222 return 0; 221 return 0;
223} 222}
224 223
225/* 224/*
226 * This is a single dmi_table handling all reboot quirks. Note that 225 * This is a single dmi_table handling all reboot quirks.
227 * REBOOT_BIOS is only available for 32bit
228 */ 226 */
229static struct dmi_system_id __initdata reboot_dmi_table[] = { 227static struct dmi_system_id __initdata reboot_dmi_table[] = {
230#ifdef CONFIG_X86_32
231 { /* Handle problems with rebooting on Dell E520's */ 228 { /* Handle problems with rebooting on Dell E520's */
232 .callback = set_bios_reboot, 229 .callback = set_bios_reboot,
233 .ident = "Dell E520", 230 .ident = "Dell E520",
@@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
377 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 374 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
378 }, 375 },
379 }, 376 },
380#endif /* CONFIG_X86_32 */
381 377
382 { /* Handle reboot issue on Acer Aspire one */ 378 { /* Handle reboot issue on Acer Aspire one */
383 .callback = set_kbd_reboot, 379 .callback = set_kbd_reboot,
@@ -584,13 +580,11 @@ static void native_machine_emergency_restart(void)
584 reboot_type = BOOT_KBD; 580 reboot_type = BOOT_KBD;
585 break; 581 break;
586 582
587#ifdef CONFIG_X86_32
588 case BOOT_BIOS: 583 case BOOT_BIOS:
589 machine_real_restart(MRR_BIOS); 584 machine_real_restart(MRR_BIOS);
590 585
591 reboot_type = BOOT_KBD; 586 reboot_type = BOOT_KBD;
592 break; 587 break;
593#endif
594 588
595 case BOOT_ACPI: 589 case BOOT_ACPI:
596 acpi_reboot(); 590 acpi_reboot();
@@ -632,12 +626,10 @@ void native_machine_shutdown(void)
632 /* The boot cpu is always logical cpu 0 */ 626 /* The boot cpu is always logical cpu 0 */
633 int reboot_cpu_id = 0; 627 int reboot_cpu_id = 0;
634 628
635#ifdef CONFIG_X86_32
636 /* See if there has been given a command line override */ 629 /* See if there has been given a command line override */
637 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && 630 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
638 cpu_online(reboot_cpu)) 631 cpu_online(reboot_cpu))
639 reboot_cpu_id = reboot_cpu; 632 reboot_cpu_id = reboot_cpu;
640#endif
641 633
642 /* Make certain the cpu I'm about to reboot on is online */ 634 /* Make certain the cpu I'm about to reboot on is online */
643 if (!cpu_online(reboot_cpu_id)) 635 if (!cpu_online(reboot_cpu_id))
@@ -678,7 +670,7 @@ static void __machine_emergency_restart(int emergency)
678 670
679static void native_machine_restart(char *__unused) 671static void native_machine_restart(char *__unused)
680{ 672{
681 printk("machine restart\n"); 673 pr_notice("machine restart\n");
682 674
683 if (!reboot_force) 675 if (!reboot_force)
684 machine_shutdown(); 676 machine_shutdown();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 16be6dc14db1..f4b9b80e1b95 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p)
1031 1031
1032 x86_init.timers.wallclock_init(); 1032 x86_init.timers.wallclock_init();
1033 1033
1034 x86_platform.wallclock_init();
1035
1036 mcheck_init(); 1034 mcheck_init();
1037 1035
1038 arch_init_ideal_nops(); 1036 arch_init_ideal_nops();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 21af737053aa..b280908a376e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -6,6 +6,9 @@
6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes 6 * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes
7 * 2000-2002 x86-64 support by Andi Kleen 7 * 2000-2002 x86-64 support by Andi Kleen
8 */ 8 */
9
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11
9#include <linux/sched.h> 12#include <linux/sched.h>
10#include <linux/mm.h> 13#include <linux/mm.h>
11#include <linux/smp.h> 14#include <linux/smp.h>
@@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
814 me->comm, me->pid, where, frame, 817 me->comm, me->pid, where, frame,
815 regs->ip, regs->sp, regs->orig_ax); 818 regs->ip, regs->sp, regs->orig_ax);
816 print_vma_addr(" in ", regs->ip); 819 print_vma_addr(" in ", regs->ip);
817 printk(KERN_CONT "\n"); 820 pr_cont("\n");
818 } 821 }
819 822
820 force_sig(SIGSEGV, me); 823 force_sig(SIGSEGV, me);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7bd8a0823654..c1a310fb8309 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1,4 +1,4 @@
1/* 1 /*
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
@@ -39,6 +39,8 @@
39 * Glauber Costa : i386 and x86_64 integration 39 * Glauber Costa : i386 and x86_64 integration
40 */ 40 */
41 41
42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
42#include <linux/init.h> 44#include <linux/init.h>
43#include <linux/smp.h> 45#include <linux/smp.h>
44#include <linux/module.h> 46#include <linux/module.h>
@@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)
184 * boards) 186 * boards)
185 */ 187 */
186 188
187 pr_debug("CALLIN, before setup_local_APIC().\n"); 189 pr_debug("CALLIN, before setup_local_APIC()\n");
188 if (apic->smp_callin_clear_local_apic) 190 if (apic->smp_callin_clear_local_apic)
189 apic->smp_callin_clear_local_apic(); 191 apic->smp_callin_clear_local_apic();
190 setup_local_APIC(); 192 setup_local_APIC();
@@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused)
255 check_tsc_sync_target(); 257 check_tsc_sync_target();
256 258
257 /* 259 /*
258 * We need to hold call_lock, so there is no inconsistency
259 * between the time smp_call_function() determines number of
260 * IPI recipients, and the time when the determination is made
261 * for which cpus receive the IPI. Holding this
262 * lock helps us to not include this cpu in a currently in progress
263 * smp_call_function().
264 *
265 * We need to hold vector_lock so there the set of online cpus 260 * We need to hold vector_lock so there the set of online cpus
266 * does not change while we are assigning vectors to cpus. Holding 261 * does not change while we are assigning vectors to cpus. Holding
267 * this lock ensures we don't half assign or remove an irq from a cpu. 262 * this lock ensures we don't half assign or remove an irq from a cpu.
268 */ 263 */
269 ipi_call_lock();
270 lock_vector_lock(); 264 lock_vector_lock();
271 set_cpu_online(smp_processor_id(), true); 265 set_cpu_online(smp_processor_id(), true);
272 unlock_vector_lock(); 266 unlock_vector_lock();
273 ipi_call_unlock();
274 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 267 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
275 x86_platform.nmi_init(); 268 x86_platform.nmi_init();
276 269
@@ -432,17 +425,16 @@ static void impress_friends(void)
432 /* 425 /*
433 * Allow the user to impress friends. 426 * Allow the user to impress friends.
434 */ 427 */
435 pr_debug("Before bogomips.\n"); 428 pr_debug("Before bogomips\n");
436 for_each_possible_cpu(cpu) 429 for_each_possible_cpu(cpu)
437 if (cpumask_test_cpu(cpu, cpu_callout_mask)) 430 if (cpumask_test_cpu(cpu, cpu_callout_mask))
438 bogosum += cpu_data(cpu).loops_per_jiffy; 431 bogosum += cpu_data(cpu).loops_per_jiffy;
439 printk(KERN_INFO 432 pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",
440 "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
441 num_online_cpus(), 433 num_online_cpus(),
442 bogosum/(500000/HZ), 434 bogosum/(500000/HZ),
443 (bogosum/(5000/HZ))%100); 435 (bogosum/(5000/HZ))%100);
444 436
445 pr_debug("Before bogocount - setting activated=1.\n"); 437 pr_debug("Before bogocount - setting activated=1\n");
446} 438}
447 439
448void __inquire_remote_apic(int apicid) 440void __inquire_remote_apic(int apicid)
@@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid)
452 int timeout; 444 int timeout;
453 u32 status; 445 u32 status;
454 446
455 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); 447 pr_info("Inquiring remote APIC 0x%x...\n", apicid);
456 448
457 for (i = 0; i < ARRAY_SIZE(regs); i++) { 449 for (i = 0; i < ARRAY_SIZE(regs); i++) {
458 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); 450 pr_info("... APIC 0x%x %s: ", apicid, names[i]);
459 451
460 /* 452 /*
461 * Wait for idle. 453 * Wait for idle.
462 */ 454 */
463 status = safe_apic_wait_icr_idle(); 455 status = safe_apic_wait_icr_idle();
464 if (status) 456 if (status)
465 printk(KERN_CONT 457 pr_cont("a previous APIC delivery may have failed\n");
466 "a previous APIC delivery may have failed\n");
467 458
468 apic_icr_write(APIC_DM_REMRD | regs[i], apicid); 459 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
469 460
@@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid)
476 switch (status) { 467 switch (status) {
477 case APIC_ICR_RR_VALID: 468 case APIC_ICR_RR_VALID:
478 status = apic_read(APIC_RRR); 469 status = apic_read(APIC_RRR);
479 printk(KERN_CONT "%08x\n", status); 470 pr_cont("%08x\n", status);
480 break; 471 break;
481 default: 472 default:
482 printk(KERN_CONT "failed\n"); 473 pr_cont("failed\n");
483 } 474 }
484 } 475 }
485} 476}
@@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
513 apic_write(APIC_ESR, 0); 504 apic_write(APIC_ESR, 0);
514 accept_status = (apic_read(APIC_ESR) & 0xEF); 505 accept_status = (apic_read(APIC_ESR) & 0xEF);
515 } 506 }
516 pr_debug("NMI sent.\n"); 507 pr_debug("NMI sent\n");
517 508
518 if (send_status) 509 if (send_status)
519 printk(KERN_ERR "APIC never delivered???\n"); 510 pr_err("APIC never delivered???\n");
520 if (accept_status) 511 if (accept_status)
521 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); 512 pr_err("APIC delivery error (%lx)\n", accept_status);
522 513
523 return (send_status | accept_status); 514 return (send_status | accept_status);
524} 515}
@@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
540 apic_read(APIC_ESR); 531 apic_read(APIC_ESR);
541 } 532 }
542 533
543 pr_debug("Asserting INIT.\n"); 534 pr_debug("Asserting INIT\n");
544 535
545 /* 536 /*
546 * Turn INIT on target chip 537 * Turn INIT on target chip
@@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
556 547
557 mdelay(10); 548 mdelay(10);
558 549
559 pr_debug("Deasserting INIT.\n"); 550 pr_debug("Deasserting INIT\n");
560 551
561 /* Target chip */ 552 /* Target chip */
562 /* Send IPI */ 553 /* Send IPI */
@@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
589 /* 580 /*
590 * Run STARTUP IPI loop. 581 * Run STARTUP IPI loop.
591 */ 582 */
592 pr_debug("#startup loops: %d.\n", num_starts); 583 pr_debug("#startup loops: %d\n", num_starts);
593 584
594 for (j = 1; j <= num_starts; j++) { 585 for (j = 1; j <= num_starts; j++) {
595 pr_debug("Sending STARTUP #%d.\n", j); 586 pr_debug("Sending STARTUP #%d\n", j);
596 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 587 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
597 apic_write(APIC_ESR, 0); 588 apic_write(APIC_ESR, 0);
598 apic_read(APIC_ESR); 589 apic_read(APIC_ESR);
599 pr_debug("After apic_write.\n"); 590 pr_debug("After apic_write\n");
600 591
601 /* 592 /*
602 * STARTUP IPI 593 * STARTUP IPI
@@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
613 */ 604 */
614 udelay(300); 605 udelay(300);
615 606
616 pr_debug("Startup point 1.\n"); 607 pr_debug("Startup point 1\n");
617 608
618 pr_debug("Waiting for send to finish...\n"); 609 pr_debug("Waiting for send to finish...\n");
619 send_status = safe_apic_wait_icr_idle(); 610 send_status = safe_apic_wait_icr_idle();
@@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
628 if (send_status || accept_status) 619 if (send_status || accept_status)
629 break; 620 break;
630 } 621 }
631 pr_debug("After Startup.\n"); 622 pr_debug("After Startup\n");
632 623
633 if (send_status) 624 if (send_status)
634 printk(KERN_ERR "APIC never delivered???\n"); 625 pr_err("APIC never delivered???\n");
635 if (accept_status) 626 if (accept_status)
636 printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); 627 pr_err("APIC delivery error (%lx)\n", accept_status);
637 628
638 return (send_status | accept_status); 629 return (send_status | accept_status);
639} 630}
@@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
647 if (system_state == SYSTEM_BOOTING) { 638 if (system_state == SYSTEM_BOOTING) {
648 if (node != current_node) { 639 if (node != current_node) {
649 if (current_node > (-1)) 640 if (current_node > (-1))
650 pr_cont(" Ok.\n"); 641 pr_cont(" OK\n");
651 current_node = node; 642 current_node = node;
652 pr_info("Booting Node %3d, Processors ", node); 643 pr_info("Booting Node %3d, Processors ", node);
653 } 644 }
654 pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); 645 pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
655 return; 646 return;
656 } else 647 } else
657 pr_info("Booting Node %d Processor %d APIC 0x%x\n", 648 pr_info("Booting Node %d Processor %d APIC 0x%x\n",
@@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
731 /* 722 /*
732 * allow APs to start initializing. 723 * allow APs to start initializing.
733 */ 724 */
734 pr_debug("Before Callout %d.\n", cpu); 725 pr_debug("Before Callout %d\n", cpu);
735 cpumask_set_cpu(cpu, cpu_callout_mask); 726 cpumask_set_cpu(cpu, cpu_callout_mask);
736 pr_debug("After Callout %d.\n", cpu); 727 pr_debug("After Callout %d\n", cpu);
737 728
738 /* 729 /*
739 * Wait 5s total for a response 730 * Wait 5s total for a response
@@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
761 pr_err("CPU%d: Stuck ??\n", cpu); 752 pr_err("CPU%d: Stuck ??\n", cpu);
762 else 753 else
763 /* trampoline code not run */ 754 /* trampoline code not run */
764 pr_err("CPU%d: Not responding.\n", cpu); 755 pr_err("CPU%d: Not responding\n", cpu);
765 if (apic->inquire_remote_apic) 756 if (apic->inquire_remote_apic)
766 apic->inquire_remote_apic(apicid); 757 apic->inquire_remote_apic(apicid);
767 } 758 }
@@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
806 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || 797 if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
807 !physid_isset(apicid, phys_cpu_present_map) || 798 !physid_isset(apicid, phys_cpu_present_map) ||
808 !apic->apic_id_valid(apicid)) { 799 !apic->apic_id_valid(apicid)) {
809 printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); 800 pr_err("%s: bad cpu %d\n", __func__, cpu);
810 return -EINVAL; 801 return -EINVAL;
811 } 802 }
812 803
@@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
887 unsigned int cpu; 878 unsigned int cpu;
888 unsigned nr; 879 unsigned nr;
889 880
890 printk(KERN_WARNING 881 pr_warn("More than 8 CPUs detected - skipping them\n"
891 "More than 8 CPUs detected - skipping them.\n" 882 "Use CONFIG_X86_BIGSMP\n");
892 "Use CONFIG_X86_BIGSMP.\n");
893 883
894 nr = 0; 884 nr = 0;
895 for_each_present_cpu(cpu) { 885 for_each_present_cpu(cpu) {
@@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
910#endif 900#endif
911 901
912 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 902 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
913 printk(KERN_WARNING 903 pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",
914 "weird, boot CPU (#%d) not listed by the BIOS.\n",
915 hard_smp_processor_id()); 904 hard_smp_processor_id());
916 905
917 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 906 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
@@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus)
923 */ 912 */
924 if (!smp_found_config && !acpi_lapic) { 913 if (!smp_found_config && !acpi_lapic) {
925 preempt_enable(); 914 preempt_enable();
926 printk(KERN_NOTICE "SMP motherboard not detected.\n"); 915 pr_notice("SMP motherboard not detected\n");
927 disable_smp(); 916 disable_smp();
928 if (APIC_init_uniprocessor()) 917 if (APIC_init_uniprocessor())
929 printk(KERN_NOTICE "Local APIC not detected." 918 pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
930 " Using dummy APIC emulation.\n");
931 return -1; 919 return -1;
932 } 920 }
933 921
@@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus)
936 * CPU too, but we do it for the sake of robustness anyway. 924 * CPU too, but we do it for the sake of robustness anyway.
937 */ 925 */
938 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { 926 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
939 printk(KERN_NOTICE 927 pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n",
940 "weird, boot CPU (#%d) not listed by the BIOS.\n", 928 boot_cpu_physical_apicid);
941 boot_cpu_physical_apicid);
942 physid_set(hard_smp_processor_id(), phys_cpu_present_map); 929 physid_set(hard_smp_processor_id(), phys_cpu_present_map);
943 } 930 }
944 preempt_enable(); 931 preempt_enable();
@@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
951 if (!disable_apic) { 938 if (!disable_apic) {
952 pr_err("BIOS bug, local APIC #%d not detected!...\n", 939 pr_err("BIOS bug, local APIC #%d not detected!...\n",
953 boot_cpu_physical_apicid); 940 boot_cpu_physical_apicid);
954 pr_err("... forcing use of dummy APIC emulation." 941 pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
955 "(tell your hw vendor)\n");
956 } 942 }
957 smpboot_clear_io_apic(); 943 smpboot_clear_io_apic();
958 disable_ioapic_support(); 944 disable_ioapic_support();
@@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
965 * If SMP should be disabled, then really disable it! 951 * If SMP should be disabled, then really disable it!
966 */ 952 */
967 if (!max_cpus) { 953 if (!max_cpus) {
968 printk(KERN_INFO "SMP mode deactivated.\n"); 954 pr_info("SMP mode deactivated\n");
969 smpboot_clear_io_apic(); 955 smpboot_clear_io_apic();
970 956
971 connect_bsp_APIC(); 957 connect_bsp_APIC();
@@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1017 1003
1018 1004
1019 if (smp_sanity_check(max_cpus) < 0) { 1005 if (smp_sanity_check(max_cpus) < 0) {
1020 printk(KERN_INFO "SMP disabled\n"); 1006 pr_info("SMP disabled\n");
1021 disable_smp(); 1007 disable_smp();
1022 goto out; 1008 goto out;
1023 } 1009 }
@@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1055 * Set up local APIC timer on boot CPU. 1041 * Set up local APIC timer on boot CPU.
1056 */ 1042 */
1057 1043
1058 printk(KERN_INFO "CPU%d: ", 0); 1044 pr_info("CPU%d: ", 0);
1059 print_cpu_info(&cpu_data(0)); 1045 print_cpu_info(&cpu_data(0));
1060 x86_init.timers.setup_percpu_clockev(); 1046 x86_init.timers.setup_percpu_clockev();
1061 1047
@@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void)
1105 1091
1106void __init native_smp_cpus_done(unsigned int max_cpus) 1092void __init native_smp_cpus_done(unsigned int max_cpus)
1107{ 1093{
1108 pr_debug("Boot done.\n"); 1094 pr_debug("Boot done\n");
1109 1095
1110 nmi_selftest(); 1096 nmi_selftest();
1111 impress_friends(); 1097 impress_friends();
@@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void)
1166 1152
1167 /* nr_cpu_ids could be reduced via nr_cpus= */ 1153 /* nr_cpu_ids could be reduced via nr_cpus= */
1168 if (possible > nr_cpu_ids) { 1154 if (possible > nr_cpu_ids) {
1169 printk(KERN_WARNING 1155 pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",
1170 "%d Processors exceeds NR_CPUS limit of %d\n",
1171 possible, nr_cpu_ids); 1156 possible, nr_cpu_ids);
1172 possible = nr_cpu_ids; 1157 possible = nr_cpu_ids;
1173 } 1158 }
@@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void)
1176 if (!setup_max_cpus) 1161 if (!setup_max_cpus)
1177#endif 1162#endif
1178 if (possible > i) { 1163 if (possible > i) {
1179 printk(KERN_WARNING 1164 pr_warn("%d Processors exceeds max_cpus limit of %u\n",
1180 "%d Processors exceeds max_cpus limit of %u\n",
1181 possible, setup_max_cpus); 1165 possible, setup_max_cpus);
1182 possible = i; 1166 possible = i;
1183 } 1167 }
1184 1168
1185 printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", 1169 pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
1186 possible, max_t(int, possible - num_processors, 0)); 1170 possible, max_t(int, possible - num_processors, 0));
1187 1171
1188 for (i = 0; i < possible; i++) 1172 for (i = 0; i < possible; i++)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 05b31d92f69c..b481341c9369 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -9,6 +9,9 @@
9/* 9/*
10 * Handle hardware traps and faults. 10 * Handle hardware traps and faults.
11 */ 11 */
12
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
12#include <linux/interrupt.h> 15#include <linux/interrupt.h>
13#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
14#include <linux/spinlock.h> 17#include <linux/spinlock.h>
@@ -143,12 +146,11 @@ trap_signal:
143#ifdef CONFIG_X86_64 146#ifdef CONFIG_X86_64
144 if (show_unhandled_signals && unhandled_signal(tsk, signr) && 147 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
145 printk_ratelimit()) { 148 printk_ratelimit()) {
146 printk(KERN_INFO 149 pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
147 "%s[%d] trap %s ip:%lx sp:%lx error:%lx", 150 tsk->comm, tsk->pid, str,
148 tsk->comm, tsk->pid, str, 151 regs->ip, regs->sp, error_code);
149 regs->ip, regs->sp, error_code);
150 print_vma_addr(" in ", regs->ip); 152 print_vma_addr(" in ", regs->ip);
151 printk("\n"); 153 pr_cont("\n");
152 } 154 }
153#endif 155#endif
154 156
@@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
269 271
270 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && 272 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
271 printk_ratelimit()) { 273 printk_ratelimit()) {
272 printk(KERN_INFO 274 pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
273 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
274 tsk->comm, task_pid_nr(tsk), 275 tsk->comm, task_pid_nr(tsk),
275 regs->ip, regs->sp, error_code); 276 regs->ip, regs->sp, error_code);
276 print_vma_addr(" in ", regs->ip); 277 print_vma_addr(" in ", regs->ip);
277 printk("\n"); 278 pr_cont("\n");
278 } 279 }
279 280
280 force_sig(SIGSEGV, tsk); 281 force_sig(SIGSEGV, tsk);
@@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
570 conditional_sti(regs); 571 conditional_sti(regs);
571#if 0 572#if 0
572 /* No need to warn about this any longer. */ 573 /* No need to warn about this any longer. */
573 printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 574 pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
574#endif 575#endif
575} 576}
576 577
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fc0a147e3727..cfa5d4f7ca56 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
1#include <linux/kernel.h> 3#include <linux/kernel.h>
2#include <linux/sched.h> 4#include <linux/sched.h>
3#include <linux/init.h> 5#include <linux/init.h>
@@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
84#ifdef CONFIG_X86_TSC 86#ifdef CONFIG_X86_TSC
85int __init notsc_setup(char *str) 87int __init notsc_setup(char *str)
86{ 88{
87 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " 89 pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
88 "cannot disable TSC completely.\n");
89 tsc_disabled = 1; 90 tsc_disabled = 1;
90 return 1; 91 return 1;
91} 92}
@@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)
373 goto success; 374 goto success;
374 } 375 }
375 } 376 }
376 printk("Fast TSC calibration failed\n"); 377 pr_err("Fast TSC calibration failed\n");
377 return 0; 378 return 0;
378 379
379success: 380success:
@@ -392,7 +393,7 @@ success:
392 */ 393 */
393 delta *= PIT_TICK_RATE; 394 delta *= PIT_TICK_RATE;
394 do_div(delta, i*256*1000); 395 do_div(delta, i*256*1000);
395 printk("Fast TSC calibration using PIT\n"); 396 pr_info("Fast TSC calibration using PIT\n");
396 return delta; 397 return delta;
397} 398}
398 399
@@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)
487 * use the reference value, as it is more precise. 488 * use the reference value, as it is more precise.
488 */ 489 */
489 if (delta >= 90 && delta <= 110) { 490 if (delta >= 90 && delta <= 110) {
490 printk(KERN_INFO 491 pr_info("PIT calibration matches %s. %d loops\n",
491 "TSC: PIT calibration matches %s. %d loops\n", 492 hpet ? "HPET" : "PMTIMER", i + 1);
492 hpet ? "HPET" : "PMTIMER", i + 1);
493 return tsc_ref_min; 493 return tsc_ref_min;
494 } 494 }
495 495
@@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void)
511 */ 511 */
512 if (tsc_pit_min == ULONG_MAX) { 512 if (tsc_pit_min == ULONG_MAX) {
513 /* PIT gave no useful value */ 513 /* PIT gave no useful value */
514 printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); 514 pr_warn("Unable to calibrate against PIT\n");
515 515
516 /* We don't have an alternative source, disable TSC */ 516 /* We don't have an alternative source, disable TSC */
517 if (!hpet && !ref1 && !ref2) { 517 if (!hpet && !ref1 && !ref2) {
518 printk("TSC: No reference (HPET/PMTIMER) available\n"); 518 pr_notice("No reference (HPET/PMTIMER) available\n");
519 return 0; 519 return 0;
520 } 520 }
521 521
522 /* The alternative source failed as well, disable TSC */ 522 /* The alternative source failed as well, disable TSC */
523 if (tsc_ref_min == ULONG_MAX) { 523 if (tsc_ref_min == ULONG_MAX) {
524 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " 524 pr_warn("HPET/PMTIMER calibration failed\n");
525 "failed.\n");
526 return 0; 525 return 0;
527 } 526 }
528 527
529 /* Use the alternative source */ 528 /* Use the alternative source */
530 printk(KERN_INFO "TSC: using %s reference calibration\n", 529 pr_info("using %s reference calibration\n",
531 hpet ? "HPET" : "PMTIMER"); 530 hpet ? "HPET" : "PMTIMER");
532 531
533 return tsc_ref_min; 532 return tsc_ref_min;
534 } 533 }
535 534
536 /* We don't have an alternative source, use the PIT calibration value */ 535 /* We don't have an alternative source, use the PIT calibration value */
537 if (!hpet && !ref1 && !ref2) { 536 if (!hpet && !ref1 && !ref2) {
538 printk(KERN_INFO "TSC: Using PIT calibration value\n"); 537 pr_info("Using PIT calibration value\n");
539 return tsc_pit_min; 538 return tsc_pit_min;
540 } 539 }
541 540
542 /* The alternative source failed, use the PIT calibration value */ 541 /* The alternative source failed, use the PIT calibration value */
543 if (tsc_ref_min == ULONG_MAX) { 542 if (tsc_ref_min == ULONG_MAX) {
544 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " 543 pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");
545 "Using PIT calibration\n");
546 return tsc_pit_min; 544 return tsc_pit_min;
547 } 545 }
548 546
@@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void)
551 * the PIT value as we know that there are PMTIMERs around 549 * the PIT value as we know that there are PMTIMERs around
552 * running at double speed. At least we let the user know: 550 * running at double speed. At least we let the user know:
553 */ 551 */
554 printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", 552 pr_warn("PIT calibration deviates from %s: %lu %lu\n",
555 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); 553 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
556 printk(KERN_INFO "TSC: Using PIT calibration value\n"); 554 pr_info("Using PIT calibration value\n");
557 return tsc_pit_min; 555 return tsc_pit_min;
558} 556}
559 557
@@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason)
785 tsc_unstable = 1; 783 tsc_unstable = 1;
786 sched_clock_stable = 0; 784 sched_clock_stable = 0;
787 disable_sched_clock_irqtime(); 785 disable_sched_clock_irqtime();
788 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); 786 pr_info("Marking TSC unstable due to %s\n", reason);
789 /* Change only the rating, when not registered */ 787 /* Change only the rating, when not registered */
790 if (clocksource_tsc.mult) 788 if (clocksource_tsc.mult)
791 clocksource_mark_unstable(&clocksource_tsc); 789 clocksource_mark_unstable(&clocksource_tsc);
@@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
912 goto out; 910 goto out;
913 911
914 tsc_khz = freq; 912 tsc_khz = freq;
915 printk(KERN_INFO "Refined TSC clocksource calibration: " 913 pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n",
916 "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, 914 (unsigned long)tsc_khz / 1000,
917 (unsigned long)tsc_khz % 1000); 915 (unsigned long)tsc_khz % 1000);
918 916
919out: 917out:
920 clocksource_register_khz(&clocksource_tsc, tsc_khz); 918 clocksource_register_khz(&clocksource_tsc, tsc_khz);
@@ -970,9 +968,9 @@ void __init tsc_init(void)
970 return; 968 return;
971 } 969 }
972 970
973 printk("Detected %lu.%03lu MHz processor.\n", 971 pr_info("Detected %lu.%03lu MHz processor\n",
974 (unsigned long)cpu_khz / 1000, 972 (unsigned long)cpu_khz / 1000,
975 (unsigned long)cpu_khz % 1000); 973 (unsigned long)cpu_khz % 1000);
976 974
977 /* 975 /*
978 * Secondary CPUs do not run through tsc_init(), so set up 976 * Secondary CPUs do not run through tsc_init(), so set up
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index dc4e910a7d96..36fd42091fa7 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,
409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. 409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
410 * @mm: the probed address space. 410 * @mm: the probed address space.
411 * @arch_uprobe: the probepoint information. 411 * @arch_uprobe: the probepoint information.
412 * @addr: virtual address at which to install the probepoint
412 * Return 0 on success or a -ve number on error. 413 * Return 0 on success or a -ve number on error.
413 */ 414 */
414int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) 415int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
415{ 416{
416 int ret; 417 int ret;
417 struct insn insn; 418 struct insn insn;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 255f58ae71e8..54abcc0baf23 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -28,6 +28,8 @@
28 * 28 *
29 */ 29 */
30 30
31#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
32
31#include <linux/capability.h> 33#include <linux/capability.h>
32#include <linux/errno.h> 34#include <linux/errno.h>
33#include <linux/interrupt.h> 35#include <linux/interrupt.h>
@@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
137 local_irq_enable(); 139 local_irq_enable();
138 140
139 if (!current->thread.vm86_info) { 141 if (!current->thread.vm86_info) {
140 printk("no vm86_info: BAD\n"); 142 pr_alert("no vm86_info: BAD\n");
141 do_exit(SIGSEGV); 143 do_exit(SIGSEGV);
142 } 144 }
143 set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask); 145 set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
144 tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs); 146 tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
145 tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap); 147 tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
146 if (tmp) { 148 if (tmp) {
147 printk("vm86: could not access userspace vm86_info\n"); 149 pr_alert("could not access userspace vm86_info\n");
148 do_exit(SIGSEGV); 150 do_exit(SIGSEGV);
149 } 151 }
150 152
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 8eeb55a551b4..992f890283e9 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -16,6 +16,7 @@
16#include <linux/pci_ids.h> 16#include <linux/pci_ids.h>
17#include <linux/pci_regs.h> 17#include <linux/pci_regs.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/irq.h>
19 20
20#include <asm/apic.h> 21#include <asm/apic.h>
21#include <asm/pci-direct.h> 22#include <asm/pci-direct.h>
@@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void)
95 ctl = readl(address + 4); 96 ctl = readl(address + 4);
96 printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", 97 printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n",
97 cap, ctl); 98 cap, ctl);
99
100 /* If possible, let the vSMP foundation route the interrupt optimally */
101#ifdef CONFIG_SMP
102 if (cap & ctl & BIT(8)) {
103 ctl &= ~BIT(8);
104#ifdef CONFIG_PROC_FS
105 /* Don't let users change irq affinity via procfs */
106 no_irq_affinity = 1;
107#endif
108 }
109#endif
110
98 if (cap & ctl & (1 << 4)) { 111 if (cap & ctl & (1 << 4)) {
99 /* Setup irq ops and turn on vSMP IRQ fastpath handling */ 112 /* Setup irq ops and turn on vSMP IRQ fastpath handling */
100 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); 113 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
@@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void)
102 pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl); 115 pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
103 pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl); 116 pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
104 pv_init_ops.patch = vsmp_patch; 117 pv_init_ops.patch = vsmp_patch;
105
106 ctl &= ~(1 << 4); 118 ctl &= ~(1 << 4);
107 writel(ctl, address + 4);
108 ctl = readl(address + 4);
109 printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
110 } 119 }
120 writel(ctl, address + 4);
121 ctl = readl(address + 4);
122 pr_info("vSMP CTL: control set to:0x%08x\n", ctl);
111 123
112 early_iounmap(address, 8); 124 early_iounmap(address, 8);
113} 125}
@@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void)
187#endif 199#endif
188} 200}
189 201
202static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
203{
204 return hard_smp_processor_id() >> index_msb;
205}
206
207/*
208 * In vSMP, all cpus should be capable of handling interrupts, regardless of
209 * the APIC used.
210 */
211static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
212 const struct cpumask *mask)
213{
214 cpumask_setall(retmask);
215}
216
217static void vsmp_apic_post_init(void)
218{
219 /* need to update phys_pkg_id */
220 apic->phys_pkg_id = apicid_phys_pkg_id;
221 apic->vector_allocation_domain = fill_vector_allocation_domain;
222}
223
190void __init vsmp_init(void) 224void __init vsmp_init(void)
191{ 225{
192 detect_vsmp_box(); 226 detect_vsmp_box();
193 if (!is_vsmp_box()) 227 if (!is_vsmp_box())
194 return; 228 return;
195 229
230 x86_platform.apic_post_init = vsmp_apic_post_init;
231
196 vsmp_cap_cpus(); 232 vsmp_cap_cpus();
197 233
198 set_vsmp_pv_ops(); 234 set_vsmp_pv_ops();
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 7515cf0e1805..8d141b309046 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,6 +18,8 @@
18 * use the vDSO. 18 * use the vDSO.
19 */ 19 */
20 20
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
21#include <linux/time.h> 23#include <linux/time.h>
22#include <linux/init.h> 24#include <linux/init.h>
23#include <linux/kernel.h> 25#include <linux/kernel.h>
@@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
111static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, 113static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
112 const char *message) 114 const char *message)
113{ 115{
114 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); 116 if (!show_unhandled_signals)
115 struct task_struct *tsk;
116
117 if (!show_unhandled_signals || !__ratelimit(&rs))
118 return; 117 return;
119 118
120 tsk = current; 119 pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
121 120 level, current->comm, task_pid_nr(current),
122 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", 121 message, regs->ip, regs->cs,
123 level, tsk->comm, task_pid_nr(tsk), 122 regs->sp, regs->ax, regs->si, regs->di);
124 message, regs->ip, regs->cs,
125 regs->sp, regs->ax, regs->si, regs->di);
126} 123}
127 124
128static int addr_to_vsyscall_nr(unsigned long addr) 125static int addr_to_vsyscall_nr(unsigned long addr)
@@ -139,6 +136,19 @@ static int addr_to_vsyscall_nr(unsigned long addr)
139 return nr; 136 return nr;
140} 137}
141 138
139#ifdef CONFIG_SECCOMP
140static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
141{
142 if (!seccomp_mode(&tsk->seccomp))
143 return 0;
144 task_pt_regs(tsk)->orig_ax = syscall_nr;
145 task_pt_regs(tsk)->ax = syscall_nr;
146 return __secure_computing(syscall_nr);
147}
148#else
149#define vsyscall_seccomp(_tsk, _nr) 0
150#endif
151
142static bool write_ok_or_segv(unsigned long ptr, size_t size) 152static bool write_ok_or_segv(unsigned long ptr, size_t size)
143{ 153{
144 /* 154 /*
@@ -174,6 +184,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
174 int vsyscall_nr; 184 int vsyscall_nr;
175 int prev_sig_on_uaccess_error; 185 int prev_sig_on_uaccess_error;
176 long ret; 186 long ret;
187 int skip;
177 188
178 /* 189 /*
179 * No point in checking CS -- the only way to get here is a user mode 190 * No point in checking CS -- the only way to get here is a user mode
@@ -205,9 +216,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
205 } 216 }
206 217
207 tsk = current; 218 tsk = current;
208 if (seccomp_mode(&tsk->seccomp))
209 do_exit(SIGKILL);
210
211 /* 219 /*
212 * With a real vsyscall, page faults cause SIGSEGV. We want to 220 * With a real vsyscall, page faults cause SIGSEGV. We want to
213 * preserve that behavior to make writing exploits harder. 221 * preserve that behavior to make writing exploits harder.
@@ -222,8 +230,13 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
222 * address 0". 230 * address 0".
223 */ 231 */
224 ret = -EFAULT; 232 ret = -EFAULT;
233 skip = 0;
225 switch (vsyscall_nr) { 234 switch (vsyscall_nr) {
226 case 0: 235 case 0:
236 skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
237 if (skip)
238 break;
239
227 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || 240 if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
228 !write_ok_or_segv(regs->si, sizeof(struct timezone))) 241 !write_ok_or_segv(regs->si, sizeof(struct timezone)))
229 break; 242 break;
@@ -234,6 +247,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
234 break; 247 break;
235 248
236 case 1: 249 case 1:
250 skip = vsyscall_seccomp(tsk, __NR_time);
251 if (skip)
252 break;
253
237 if (!write_ok_or_segv(regs->di, sizeof(time_t))) 254 if (!write_ok_or_segv(regs->di, sizeof(time_t)))
238 break; 255 break;
239 256
@@ -241,6 +258,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
241 break; 258 break;
242 259
243 case 2: 260 case 2:
261 skip = vsyscall_seccomp(tsk, __NR_getcpu);
262 if (skip)
263 break;
264
244 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || 265 if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
245 !write_ok_or_segv(regs->si, sizeof(unsigned))) 266 !write_ok_or_segv(regs->si, sizeof(unsigned)))
246 break; 267 break;
@@ -253,6 +274,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
253 274
254 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; 275 current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
255 276
277 if (skip) {
278 if ((long)regs->ax <= 0L) /* seccomp errno emulation */
279 goto do_ret;
280 goto done; /* seccomp trace/trap */
281 }
282
256 if (ret == -EFAULT) { 283 if (ret == -EFAULT) {
257 /* Bad news -- userspace fed a bad pointer to a vsyscall. */ 284 /* Bad news -- userspace fed a bad pointer to a vsyscall. */
258 warn_bad_vsyscall(KERN_INFO, regs, 285 warn_bad_vsyscall(KERN_INFO, regs,
@@ -271,10 +298,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
271 298
272 regs->ax = ret; 299 regs->ax = ret;
273 300
301do_ret:
274 /* Emulate a ret instruction. */ 302 /* Emulate a ret instruction. */
275 regs->ip = caller; 303 regs->ip = caller;
276 regs->sp += 8; 304 regs->sp += 8;
277 305done:
278 return true; 306 return true;
279 307
280sigsegv: 308sigsegv:
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 9796c2f3d074..6020f6f5927c 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);
28 28
29EXPORT_SYMBOL(copy_user_generic_string); 29EXPORT_SYMBOL(copy_user_generic_string);
30EXPORT_SYMBOL(copy_user_generic_unrolled); 30EXPORT_SYMBOL(copy_user_generic_unrolled);
31EXPORT_SYMBOL(copy_user_enhanced_fast_string);
31EXPORT_SYMBOL(__copy_user_nocache); 32EXPORT_SYMBOL(__copy_user_nocache);
32EXPORT_SYMBOL(_copy_from_user); 33EXPORT_SYMBOL(_copy_from_user);
33EXPORT_SYMBOL(_copy_to_user); 34EXPORT_SYMBOL(_copy_to_user);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 35c5e543f550..9f3167e891ef 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { }
29void __init x86_init_pgd_noop(pgd_t *unused) { } 29void __init x86_init_pgd_noop(pgd_t *unused) { }
30int __init iommu_init_noop(void) { return 0; } 30int __init iommu_init_noop(void) { return 0; }
31void iommu_shutdown_noop(void) { } 31void iommu_shutdown_noop(void) { }
32void wallclock_init_noop(void) { }
33 32
34/* 33/*
35 * The platform setup functions are preset with the default functions 34 * The platform setup functions are preset with the default functions
@@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; };
101 100
102struct x86_platform_ops x86_platform = { 101struct x86_platform_ops x86_platform = {
103 .calibrate_tsc = native_calibrate_tsc, 102 .calibrate_tsc = native_calibrate_tsc,
104 .wallclock_init = wallclock_init_noop,
105 .get_wallclock = mach_get_cmos_time, 103 .get_wallclock = mach_get_cmos_time,
106 .set_wallclock = mach_set_rtc_mmss, 104 .set_wallclock = mach_set_rtc_mmss,
107 .iommu_shutdown = iommu_shutdown_noop, 105 .iommu_shutdown = iommu_shutdown_noop,
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index bd18149b2b0f..3d3e20709119 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -3,6 +3,9 @@
3 * 3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com> 4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
6#include <linux/bootmem.h> 9#include <linux/bootmem.h>
7#include <linux/compat.h> 10#include <linux/compat.h>
8#include <asm/i387.h> 11#include <asm/i387.h>
@@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf)
162 BUG_ON(sig_xstate_size < xstate_size); 165 BUG_ON(sig_xstate_size < xstate_size);
163 166
164 if ((unsigned long)buf % 64) 167 if ((unsigned long)buf % 64)
165 printk("save_i387_xstate: bad fpstate %p\n", buf); 168 pr_err("%s: bad fpstate %p\n", __func__, buf);
166 169
167 if (!used_math()) 170 if (!used_math())
168 return 0; 171 return 0;
@@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void)
422 pcntxt_mask = eax + ((u64)edx << 32); 425 pcntxt_mask = eax + ((u64)edx << 32);
423 426
424 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { 427 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
425 printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", 428 pr_err("FP/SSE not shown under xsave features 0x%llx\n",
426 pcntxt_mask); 429 pcntxt_mask);
427 BUG(); 430 BUG();
428 } 431 }
@@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void)
445 448
446 setup_xstate_init(); 449 setup_xstate_init();
447 450
448 printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " 451 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
449 "cntxt size 0x%x\n", 452 pcntxt_mask, xstate_size);
450 pcntxt_mask, xstate_size);
451} 453}
452 454
453/* 455/*
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2e88438ffd83..9b7ec1150ab0 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -80,10 +80,10 @@ static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
80 80
81static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) 81static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
82{ 82{
83 if (idx < X86_PMC_IDX_FIXED) 83 if (idx < INTEL_PMC_IDX_FIXED)
84 return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0); 84 return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
85 else 85 else
86 return get_fixed_pmc_idx(pmu, idx - X86_PMC_IDX_FIXED); 86 return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);
87} 87}
88 88
89void kvm_deliver_pmi(struct kvm_vcpu *vcpu) 89void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
@@ -291,7 +291,7 @@ static void reprogram_idx(struct kvm_pmu *pmu, int idx)
291 if (pmc_is_gp(pmc)) 291 if (pmc_is_gp(pmc))
292 reprogram_gp_counter(pmc, pmc->eventsel); 292 reprogram_gp_counter(pmc, pmc->eventsel);
293 else { 293 else {
294 int fidx = idx - X86_PMC_IDX_FIXED; 294 int fidx = idx - INTEL_PMC_IDX_FIXED;
295 reprogram_fixed_counter(pmc, 295 reprogram_fixed_counter(pmc,
296 fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx); 296 fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
297 } 297 }
@@ -452,7 +452,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
452 return; 452 return;
453 453
454 pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, 454 pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
455 X86_PMC_MAX_GENERIC); 455 INTEL_PMC_MAX_GENERIC);
456 pmu->counter_bitmask[KVM_PMC_GP] = 456 pmu->counter_bitmask[KVM_PMC_GP] =
457 ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; 457 ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
458 bitmap_len = (entry->eax >> 24) & 0xff; 458 bitmap_len = (entry->eax >> 24) & 0xff;
@@ -462,13 +462,13 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
462 pmu->nr_arch_fixed_counters = 0; 462 pmu->nr_arch_fixed_counters = 0;
463 } else { 463 } else {
464 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), 464 pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
465 X86_PMC_MAX_FIXED); 465 INTEL_PMC_MAX_FIXED);
466 pmu->counter_bitmask[KVM_PMC_FIXED] = 466 pmu->counter_bitmask[KVM_PMC_FIXED] =
467 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; 467 ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
468 } 468 }
469 469
470 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | 470 pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
471 (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); 471 (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
472 pmu->global_ctrl_mask = ~pmu->global_ctrl; 472 pmu->global_ctrl_mask = ~pmu->global_ctrl;
473} 473}
474 474
@@ -478,15 +478,15 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
478 struct kvm_pmu *pmu = &vcpu->arch.pmu; 478 struct kvm_pmu *pmu = &vcpu->arch.pmu;
479 479
480 memset(pmu, 0, sizeof(*pmu)); 480 memset(pmu, 0, sizeof(*pmu));
481 for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { 481 for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
482 pmu->gp_counters[i].type = KVM_PMC_GP; 482 pmu->gp_counters[i].type = KVM_PMC_GP;
483 pmu->gp_counters[i].vcpu = vcpu; 483 pmu->gp_counters[i].vcpu = vcpu;
484 pmu->gp_counters[i].idx = i; 484 pmu->gp_counters[i].idx = i;
485 } 485 }
486 for (i = 0; i < X86_PMC_MAX_FIXED; i++) { 486 for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
487 pmu->fixed_counters[i].type = KVM_PMC_FIXED; 487 pmu->fixed_counters[i].type = KVM_PMC_FIXED;
488 pmu->fixed_counters[i].vcpu = vcpu; 488 pmu->fixed_counters[i].vcpu = vcpu;
489 pmu->fixed_counters[i].idx = i + X86_PMC_IDX_FIXED; 489 pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
490 } 490 }
491 init_irq_work(&pmu->irq_work, trigger_pmi); 491 init_irq_work(&pmu->irq_work, trigger_pmi);
492 kvm_pmu_cpuid_update(vcpu); 492 kvm_pmu_cpuid_update(vcpu);
@@ -498,13 +498,13 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
498 int i; 498 int i;
499 499
500 irq_work_sync(&pmu->irq_work); 500 irq_work_sync(&pmu->irq_work);
501 for (i = 0; i < X86_PMC_MAX_GENERIC; i++) { 501 for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
502 struct kvm_pmc *pmc = &pmu->gp_counters[i]; 502 struct kvm_pmc *pmc = &pmu->gp_counters[i];
503 stop_counter(pmc); 503 stop_counter(pmc);
504 pmc->counter = pmc->eventsel = 0; 504 pmc->counter = pmc->eventsel = 0;
505 } 505 }
506 506
507 for (i = 0; i < X86_PMC_MAX_FIXED; i++) 507 for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
508 stop_counter(&pmu->fixed_counters[i]); 508 stop_counter(&pmu->fixed_counters[i]);
509 509
510 pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 510 pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 911d2641f14c..62d02e3c3ed6 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -710,16 +710,6 @@ TRACE_EVENT(kvm_skinit,
710 __entry->rip, __entry->slb) 710 __entry->rip, __entry->slb)
711); 711);
712 712
713#define __print_insn(insn, ilen) ({ \
714 int i; \
715 const char *ret = p->buffer + p->len; \
716 \
717 for (i = 0; i < ilen; ++i) \
718 trace_seq_printf(p, " %02x", insn[i]); \
719 trace_seq_printf(p, "%c", 0); \
720 ret; \
721 })
722
723#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) 713#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
724#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) 714#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
725#define KVM_EMUL_INSN_F_CS_D (1 << 2) 715#define KVM_EMUL_INSN_F_CS_D (1 << 2)
@@ -786,7 +776,7 @@ TRACE_EVENT(kvm_emulate_insn,
786 776
787 TP_printk("%x:%llx:%s (%s)%s", 777 TP_printk("%x:%llx:%s (%s)%s",
788 __entry->csbase, __entry->rip, 778 __entry->csbase, __entry->rip,
789 __print_insn(__entry->insn, __entry->len), 779 __print_hex(__entry->insn, __entry->len),
790 __print_symbolic(__entry->flags, 780 __print_symbolic(__entry->flags,
791 kvm_trace_symbol_emul_flags), 781 kvm_trace_symbol_emul_flags),
792 __entry->failed ? " failed" : "" 782 __entry->failed ? " failed" : ""
diff --git a/arch/x86/lib/msr-reg-export.c b/arch/x86/lib/msr-reg-export.c
index a311cc59b65d..8d6ef78b5d01 100644
--- a/arch/x86/lib/msr-reg-export.c
+++ b/arch/x86/lib/msr-reg-export.c
@@ -1,5 +1,5 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <asm/msr.h> 2#include <asm/msr.h>
3 3
4EXPORT_SYMBOL(native_rdmsr_safe_regs); 4EXPORT_SYMBOL(rdmsr_safe_regs);
5EXPORT_SYMBOL(native_wrmsr_safe_regs); 5EXPORT_SYMBOL(wrmsr_safe_regs);
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index 69fa10623f21..f6d13eefad10 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -6,13 +6,13 @@
6 6
7#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
8/* 8/*
9 * int native_{rdmsr,wrmsr}_safe_regs(u32 gprs[8]); 9 * int {rdmsr,wrmsr}_safe_regs(u32 gprs[8]);
10 * 10 *
11 * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi] 11 * reg layout: u32 gprs[eax, ecx, edx, ebx, esp, ebp, esi, edi]
12 * 12 *
13 */ 13 */
14.macro op_safe_regs op 14.macro op_safe_regs op
15ENTRY(native_\op\()_safe_regs) 15ENTRY(\op\()_safe_regs)
16 CFI_STARTPROC 16 CFI_STARTPROC
17 pushq_cfi %rbx 17 pushq_cfi %rbx
18 pushq_cfi %rbp 18 pushq_cfi %rbp
@@ -45,13 +45,13 @@ ENTRY(native_\op\()_safe_regs)
45 45
46 _ASM_EXTABLE(1b, 3b) 46 _ASM_EXTABLE(1b, 3b)
47 CFI_ENDPROC 47 CFI_ENDPROC
48ENDPROC(native_\op\()_safe_regs) 48ENDPROC(\op\()_safe_regs)
49.endm 49.endm
50 50
51#else /* X86_32 */ 51#else /* X86_32 */
52 52
53.macro op_safe_regs op 53.macro op_safe_regs op
54ENTRY(native_\op\()_safe_regs) 54ENTRY(\op\()_safe_regs)
55 CFI_STARTPROC 55 CFI_STARTPROC
56 pushl_cfi %ebx 56 pushl_cfi %ebx
57 pushl_cfi %ebp 57 pushl_cfi %ebp
@@ -92,7 +92,7 @@ ENTRY(native_\op\()_safe_regs)
92 92
93 _ASM_EXTABLE(1b, 3b) 93 _ASM_EXTABLE(1b, 3b)
94 CFI_ENDPROC 94 CFI_ENDPROC
95ENDPROC(native_\op\()_safe_regs) 95ENDPROC(\op\()_safe_regs)
96.endm 96.endm
97 97
98#endif 98#endif
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bc4e9d84157f..e0e6990723e9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -385,7 +385,7 @@ void free_initmem(void)
385} 385}
386 386
387#ifdef CONFIG_BLK_DEV_INITRD 387#ifdef CONFIG_BLK_DEV_INITRD
388void free_initrd_mem(unsigned long start, unsigned long end) 388void __init free_initrd_mem(unsigned long start, unsigned long end)
389{ 389{
390 /* 390 /*
391 * end could be not aligned, and We can not align that, 391 * end could be not aligned, and We can not align that,
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 303f08637826..b2b94438ff05 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -312,7 +312,7 @@ static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
312 goto fail; 312 goto fail;
313 } 313 }
314 /* both registers must be reserved */ 314 /* both registers must be reserved */
315 if (num_counters == AMD64_NUM_COUNTERS_F15H) { 315 if (num_counters == AMD64_NUM_COUNTERS_CORE) {
316 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1); 316 msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
317 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1); 317 msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
318 } else { 318 } else {
@@ -514,7 +514,7 @@ static int op_amd_init(struct oprofile_operations *ops)
514 ops->create_files = setup_ibs_files; 514 ops->create_files = setup_ibs_files;
515 515
516 if (boot_cpu_data.x86 == 0x15) { 516 if (boot_cpu_data.x86 == 0x15) {
517 num_counters = AMD64_NUM_COUNTERS_F15H; 517 num_counters = AMD64_NUM_COUNTERS_CORE;
518 } else { 518 } else {
519 num_counters = AMD64_NUM_COUNTERS; 519 num_counters = AMD64_NUM_COUNTERS;
520 } 520 }
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 59880afa851f..71b5d5a07d7b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * SGI UltraViolet TLB flush routines. 2 * SGI UltraViolet TLB flush routines.
3 * 3 *
4 * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. 4 * (c) 2008-2012 Cliff Wickman <cpw@sgi.com>, SGI.
5 * 5 *
6 * This code is released under the GNU General Public License version 2 or 6 * This code is released under the GNU General Public License version 2 or
7 * later. 7 * later.
@@ -38,8 +38,7 @@ static int timeout_base_ns[] = {
38 38
39static int timeout_us; 39static int timeout_us;
40static int nobau; 40static int nobau;
41static int baudisabled; 41static int nobau_perm;
42static spinlock_t disable_lock;
43static cycles_t congested_cycles; 42static cycles_t congested_cycles;
44 43
45/* tunables: */ 44/* tunables: */
@@ -47,12 +46,13 @@ static int max_concurr = MAX_BAU_CONCURRENT;
47static int max_concurr_const = MAX_BAU_CONCURRENT; 46static int max_concurr_const = MAX_BAU_CONCURRENT;
48static int plugged_delay = PLUGGED_DELAY; 47static int plugged_delay = PLUGGED_DELAY;
49static int plugsb4reset = PLUGSB4RESET; 48static int plugsb4reset = PLUGSB4RESET;
49static int giveup_limit = GIVEUP_LIMIT;
50static int timeoutsb4reset = TIMEOUTSB4RESET; 50static int timeoutsb4reset = TIMEOUTSB4RESET;
51static int ipi_reset_limit = IPI_RESET_LIMIT; 51static int ipi_reset_limit = IPI_RESET_LIMIT;
52static int complete_threshold = COMPLETE_THRESHOLD; 52static int complete_threshold = COMPLETE_THRESHOLD;
53static int congested_respns_us = CONGESTED_RESPONSE_US; 53static int congested_respns_us = CONGESTED_RESPONSE_US;
54static int congested_reps = CONGESTED_REPS; 54static int congested_reps = CONGESTED_REPS;
55static int congested_period = CONGESTED_PERIOD; 55static int disabled_period = DISABLED_PERIOD;
56 56
57static struct tunables tunables[] = { 57static struct tunables tunables[] = {
58 {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ 58 {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
@@ -63,7 +63,8 @@ static struct tunables tunables[] = {
63 {&complete_threshold, COMPLETE_THRESHOLD}, 63 {&complete_threshold, COMPLETE_THRESHOLD},
64 {&congested_respns_us, CONGESTED_RESPONSE_US}, 64 {&congested_respns_us, CONGESTED_RESPONSE_US},
65 {&congested_reps, CONGESTED_REPS}, 65 {&congested_reps, CONGESTED_REPS},
66 {&congested_period, CONGESTED_PERIOD} 66 {&disabled_period, DISABLED_PERIOD},
67 {&giveup_limit, GIVEUP_LIMIT}
67}; 68};
68 69
69static struct dentry *tunables_dir; 70static struct dentry *tunables_dir;
@@ -120,6 +121,40 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
120static DEFINE_PER_CPU(struct bau_control, bau_control); 121static DEFINE_PER_CPU(struct bau_control, bau_control);
121static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); 122static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
122 123
124static void
125set_bau_on(void)
126{
127 int cpu;
128 struct bau_control *bcp;
129
130 if (nobau_perm) {
131 pr_info("BAU not initialized; cannot be turned on\n");
132 return;
133 }
134 nobau = 0;
135 for_each_present_cpu(cpu) {
136 bcp = &per_cpu(bau_control, cpu);
137 bcp->nobau = 0;
138 }
139 pr_info("BAU turned on\n");
140 return;
141}
142
143static void
144set_bau_off(void)
145{
146 int cpu;
147 struct bau_control *bcp;
148
149 nobau = 1;
150 for_each_present_cpu(cpu) {
151 bcp = &per_cpu(bau_control, cpu);
152 bcp->nobau = 1;
153 }
154 pr_info("BAU turned off\n");
155 return;
156}
157
123/* 158/*
124 * Determine the first node on a uvhub. 'Nodes' are used for kernel 159 * Determine the first node on a uvhub. 'Nodes' are used for kernel
125 * memory allocation. 160 * memory allocation.
@@ -278,7 +313,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
278 * Both sockets dump their completed count total into 313 * Both sockets dump their completed count total into
279 * the message's count. 314 * the message's count.
280 */ 315 */
281 smaster->socket_acknowledge_count[mdp->msg_slot] = 0; 316 *sp = 0;
282 asp = (struct atomic_short *)&msg->acknowledge_count; 317 asp = (struct atomic_short *)&msg->acknowledge_count;
283 msg_ack_count = atom_asr(socket_ack_count, asp); 318 msg_ack_count = atom_asr(socket_ack_count, asp);
284 319
@@ -491,16 +526,15 @@ static int uv1_wait_completion(struct bau_desc *bau_desc,
491} 526}
492 527
493/* 528/*
494 * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. 529 * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
530 * But not currently used.
495 */ 531 */
496static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc) 532static unsigned long uv2_read_status(unsigned long offset, int rshft, int desc)
497{ 533{
498 unsigned long descriptor_status; 534 unsigned long descriptor_status;
499 unsigned long descriptor_status2;
500 535
501 descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); 536 descriptor_status =
502 descriptor_status2 = (read_mmr_uv2_status() >> desc) & 0x1UL; 537 ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
503 descriptor_status = (descriptor_status << 1) | descriptor_status2;
504 return descriptor_status; 538 return descriptor_status;
505} 539}
506 540
@@ -531,87 +565,11 @@ int normal_busy(struct bau_control *bcp)
531 */ 565 */
532int handle_uv2_busy(struct bau_control *bcp) 566int handle_uv2_busy(struct bau_control *bcp)
533{ 567{
534 int busy_one = bcp->using_desc;
535 int normal = bcp->uvhub_cpu;
536 int selected = -1;
537 int i;
538 unsigned long descriptor_status;
539 unsigned long status;
540 int mmr_offset;
541 struct bau_desc *bau_desc_old;
542 struct bau_desc *bau_desc_new;
543 struct bau_control *hmaster = bcp->uvhub_master;
544 struct ptc_stats *stat = bcp->statp; 568 struct ptc_stats *stat = bcp->statp;
545 cycles_t ttm;
546 569
547 stat->s_uv2_wars++; 570 stat->s_uv2_wars++;
548 spin_lock(&hmaster->uvhub_lock); 571 bcp->busy = 1;
549 /* try for the original first */ 572 return FLUSH_GIVEUP;
550 if (busy_one != normal) {
551 if (!normal_busy(bcp))
552 selected = normal;
553 }
554 if (selected < 0) {
555 /* can't use the normal, select an alternate */
556 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
557 descriptor_status = read_lmmr(mmr_offset);
558
559 /* scan available descriptors 32-63 */
560 for (i = 0; i < UV_CPUS_PER_AS; i++) {
561 if ((hmaster->inuse_map & (1 << i)) == 0) {
562 status = ((descriptor_status >>
563 (i * UV_ACT_STATUS_SIZE)) &
564 UV_ACT_STATUS_MASK) << 1;
565 if (status != UV2H_DESC_BUSY) {
566 selected = i + UV_CPUS_PER_AS;
567 break;
568 }
569 }
570 }
571 }
572
573 if (busy_one != normal)
574 /* mark the busy alternate as not in-use */
575 hmaster->inuse_map &= ~(1 << (busy_one - UV_CPUS_PER_AS));
576
577 if (selected >= 0) {
578 /* switch to the selected descriptor */
579 if (selected != normal) {
580 /* set the selected alternate as in-use */
581 hmaster->inuse_map |=
582 (1 << (selected - UV_CPUS_PER_AS));
583 if (selected > stat->s_uv2_wars_hw)
584 stat->s_uv2_wars_hw = selected;
585 }
586 bau_desc_old = bcp->descriptor_base;
587 bau_desc_old += (ITEMS_PER_DESC * busy_one);
588 bcp->using_desc = selected;
589 bau_desc_new = bcp->descriptor_base;
590 bau_desc_new += (ITEMS_PER_DESC * selected);
591 *bau_desc_new = *bau_desc_old;
592 } else {
593 /*
594 * All are busy. Wait for the normal one for this cpu to
595 * free up.
596 */
597 stat->s_uv2_war_waits++;
598 spin_unlock(&hmaster->uvhub_lock);
599 ttm = get_cycles();
600 do {
601 cpu_relax();
602 } while (normal_busy(bcp));
603 spin_lock(&hmaster->uvhub_lock);
604 /* switch to the original descriptor */
605 bcp->using_desc = normal;
606 bau_desc_old = bcp->descriptor_base;
607 bau_desc_old += (ITEMS_PER_DESC * bcp->using_desc);
608 bcp->using_desc = (ITEMS_PER_DESC * normal);
609 bau_desc_new = bcp->descriptor_base;
610 bau_desc_new += (ITEMS_PER_DESC * normal);
611 *bau_desc_new = *bau_desc_old; /* copy the entire descriptor */
612 }
613 spin_unlock(&hmaster->uvhub_lock);
614 return FLUSH_RETRY_BUSYBUG;
615} 573}
616 574
617static int uv2_wait_completion(struct bau_desc *bau_desc, 575static int uv2_wait_completion(struct bau_desc *bau_desc,
@@ -620,7 +578,7 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
620{ 578{
621 unsigned long descriptor_stat; 579 unsigned long descriptor_stat;
622 cycles_t ttm; 580 cycles_t ttm;
623 int desc = bcp->using_desc; 581 int desc = bcp->uvhub_cpu;
624 long busy_reps = 0; 582 long busy_reps = 0;
625 struct ptc_stats *stat = bcp->statp; 583 struct ptc_stats *stat = bcp->statp;
626 584
@@ -628,24 +586,38 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
628 586
629 /* spin on the status MMR, waiting for it to go idle */ 587 /* spin on the status MMR, waiting for it to go idle */
630 while (descriptor_stat != UV2H_DESC_IDLE) { 588 while (descriptor_stat != UV2H_DESC_IDLE) {
631 /* 589 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) {
632 * Our software ack messages may be blocked because 590 /*
633 * there are no swack resources available. As long 591 * A h/w bug on the destination side may
634 * as none of them has timed out hardware will NACK 592 * have prevented the message being marked
635 * our message and its state will stay IDLE. 593 * pending, thus it doesn't get replied to
636 */ 594 * and gets continually nacked until it times
637 if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || 595 * out with a SOURCE_TIMEOUT.
638 (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { 596 */
639 stat->s_stimeout++; 597 stat->s_stimeout++;
640 return FLUSH_GIVEUP; 598 return FLUSH_GIVEUP;
641 } else if (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) {
642 stat->s_strongnacks++;
643 bcp->conseccompletes = 0;
644 return FLUSH_GIVEUP;
645 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { 599 } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
600 ttm = get_cycles();
601
602 /*
603 * Our retries may be blocked by all destination
604 * swack resources being consumed, and a timeout
605 * pending. In that case hardware returns the
606 * ERROR that looks like a destination timeout.
607 * Without using the extended status we have to
608 * deduce from the short time that this was a
609 * strong nack.
610 */
611 if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
612 bcp->conseccompletes = 0;
613 stat->s_plugged++;
614 /* FLUSH_RETRY_PLUGGED causes hang on boot */
615 return FLUSH_GIVEUP;
616 }
646 stat->s_dtimeout++; 617 stat->s_dtimeout++;
647 bcp->conseccompletes = 0; 618 bcp->conseccompletes = 0;
648 return FLUSH_RETRY_TIMEOUT; 619 /* FLUSH_RETRY_TIMEOUT causes hang on boot */
620 return FLUSH_GIVEUP;
649 } else { 621 } else {
650 busy_reps++; 622 busy_reps++;
651 if (busy_reps > 1000000) { 623 if (busy_reps > 1000000) {
@@ -653,9 +625,8 @@ static int uv2_wait_completion(struct bau_desc *bau_desc,
653 busy_reps = 0; 625 busy_reps = 0;
654 ttm = get_cycles(); 626 ttm = get_cycles();
655 if ((ttm - bcp->send_message) > 627 if ((ttm - bcp->send_message) >
656 (bcp->clocks_per_100_usec)) { 628 bcp->timeout_interval)
657 return handle_uv2_busy(bcp); 629 return handle_uv2_busy(bcp);
658 }
659 } 630 }
660 /* 631 /*
661 * descriptor_stat is still BUSY 632 * descriptor_stat is still BUSY
@@ -679,7 +650,7 @@ static int wait_completion(struct bau_desc *bau_desc,
679{ 650{
680 int right_shift; 651 int right_shift;
681 unsigned long mmr_offset; 652 unsigned long mmr_offset;
682 int desc = bcp->using_desc; 653 int desc = bcp->uvhub_cpu;
683 654
684 if (desc < UV_CPUS_PER_AS) { 655 if (desc < UV_CPUS_PER_AS) {
685 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; 656 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
@@ -758,33 +729,31 @@ static void destination_timeout(struct bau_desc *bau_desc,
758} 729}
759 730
760/* 731/*
761 * Completions are taking a very long time due to a congested numalink 732 * Stop all cpus on a uvhub from using the BAU for a period of time.
762 * network. 733 * This is reversed by check_enable.
763 */ 734 */
764static void disable_for_congestion(struct bau_control *bcp, 735static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
765 struct ptc_stats *stat)
766{ 736{
767 /* let only one cpu do this disabling */ 737 int tcpu;
768 spin_lock(&disable_lock); 738 struct bau_control *tbcp;
769 739 struct bau_control *hmaster;
770 if (!baudisabled && bcp->period_requests && 740 cycles_t tm1;
771 ((bcp->period_time / bcp->period_requests) > congested_cycles)) { 741
772 int tcpu; 742 hmaster = bcp->uvhub_master;
773 struct bau_control *tbcp; 743 spin_lock(&hmaster->disable_lock);
774 /* it becomes this cpu's job to turn on the use of the 744 if (!bcp->baudisabled) {
775 BAU again */
776 baudisabled = 1;
777 bcp->set_bau_off = 1;
778 bcp->set_bau_on_time = get_cycles();
779 bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period);
780 stat->s_bau_disabled++; 745 stat->s_bau_disabled++;
746 tm1 = get_cycles();
781 for_each_present_cpu(tcpu) { 747 for_each_present_cpu(tcpu) {
782 tbcp = &per_cpu(bau_control, tcpu); 748 tbcp = &per_cpu(bau_control, tcpu);
783 tbcp->baudisabled = 1; 749 if (tbcp->uvhub_master == hmaster) {
750 tbcp->baudisabled = 1;
751 tbcp->set_bau_on_time =
752 tm1 + bcp->disabled_period;
753 }
784 } 754 }
785 } 755 }
786 756 spin_unlock(&hmaster->disable_lock);
787 spin_unlock(&disable_lock);
788} 757}
789 758
790static void count_max_concurr(int stat, struct bau_control *bcp, 759static void count_max_concurr(int stat, struct bau_control *bcp,
@@ -815,16 +784,30 @@ static void record_send_stats(cycles_t time1, cycles_t time2,
815 bcp->period_requests++; 784 bcp->period_requests++;
816 bcp->period_time += elapsed; 785 bcp->period_time += elapsed;
817 if ((elapsed > congested_cycles) && 786 if ((elapsed > congested_cycles) &&
818 (bcp->period_requests > bcp->cong_reps)) 787 (bcp->period_requests > bcp->cong_reps) &&
819 disable_for_congestion(bcp, stat); 788 ((bcp->period_time / bcp->period_requests) >
789 congested_cycles)) {
790 stat->s_congested++;
791 disable_for_period(bcp, stat);
792 }
820 } 793 }
821 } else 794 } else
822 stat->s_requestor--; 795 stat->s_requestor--;
823 796
824 if (completion_status == FLUSH_COMPLETE && try > 1) 797 if (completion_status == FLUSH_COMPLETE && try > 1)
825 stat->s_retriesok++; 798 stat->s_retriesok++;
826 else if (completion_status == FLUSH_GIVEUP) 799 else if (completion_status == FLUSH_GIVEUP) {
827 stat->s_giveup++; 800 stat->s_giveup++;
801 if (get_cycles() > bcp->period_end)
802 bcp->period_giveups = 0;
803 bcp->period_giveups++;
804 if (bcp->period_giveups == 1)
805 bcp->period_end = get_cycles() + bcp->disabled_period;
806 if (bcp->period_giveups > bcp->giveup_limit) {
807 disable_for_period(bcp, stat);
808 stat->s_giveuplimit++;
809 }
810 }
828} 811}
829 812
830/* 813/*
@@ -868,7 +851,8 @@ static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
868 * Returns 1 if it gives up entirely and the original cpu mask is to be 851 * Returns 1 if it gives up entirely and the original cpu mask is to be
869 * returned to the kernel. 852 * returned to the kernel.
870 */ 853 */
871int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp) 854int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp,
855 struct bau_desc *bau_desc)
872{ 856{
873 int seq_number = 0; 857 int seq_number = 0;
874 int completion_stat = 0; 858 int completion_stat = 0;
@@ -881,24 +865,23 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
881 struct bau_control *hmaster = bcp->uvhub_master; 865 struct bau_control *hmaster = bcp->uvhub_master;
882 struct uv1_bau_msg_header *uv1_hdr = NULL; 866 struct uv1_bau_msg_header *uv1_hdr = NULL;
883 struct uv2_bau_msg_header *uv2_hdr = NULL; 867 struct uv2_bau_msg_header *uv2_hdr = NULL;
884 struct bau_desc *bau_desc;
885 868
886 if (bcp->uvhub_version == 1) 869 if (bcp->uvhub_version == 1) {
870 uv1 = 1;
887 uv1_throttle(hmaster, stat); 871 uv1_throttle(hmaster, stat);
872 }
888 873
889 while (hmaster->uvhub_quiesce) 874 while (hmaster->uvhub_quiesce)
890 cpu_relax(); 875 cpu_relax();
891 876
892 time1 = get_cycles(); 877 time1 = get_cycles();
878 if (uv1)
879 uv1_hdr = &bau_desc->header.uv1_hdr;
880 else
881 uv2_hdr = &bau_desc->header.uv2_hdr;
882
893 do { 883 do {
894 bau_desc = bcp->descriptor_base; 884 if (try == 0) {
895 bau_desc += (ITEMS_PER_DESC * bcp->using_desc);
896 if (bcp->uvhub_version == 1) {
897 uv1 = 1;
898 uv1_hdr = &bau_desc->header.uv1_hdr;
899 } else
900 uv2_hdr = &bau_desc->header.uv2_hdr;
901 if ((try == 0) || (completion_stat == FLUSH_RETRY_BUSYBUG)) {
902 if (uv1) 885 if (uv1)
903 uv1_hdr->msg_type = MSG_REGULAR; 886 uv1_hdr->msg_type = MSG_REGULAR;
904 else 887 else
@@ -916,25 +899,24 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
916 uv1_hdr->sequence = seq_number; 899 uv1_hdr->sequence = seq_number;
917 else 900 else
918 uv2_hdr->sequence = seq_number; 901 uv2_hdr->sequence = seq_number;
919 index = (1UL << AS_PUSH_SHIFT) | bcp->using_desc; 902 index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
920 bcp->send_message = get_cycles(); 903 bcp->send_message = get_cycles();
921 904
922 write_mmr_activation(index); 905 write_mmr_activation(index);
923 906
924 try++; 907 try++;
925 completion_stat = wait_completion(bau_desc, bcp, try); 908 completion_stat = wait_completion(bau_desc, bcp, try);
926 /* UV2: wait_completion() may change the bcp->using_desc */
927 909
928 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); 910 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
929 911
930 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { 912 if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
931 bcp->ipi_attempts = 0; 913 bcp->ipi_attempts = 0;
914 stat->s_overipilimit++;
932 completion_stat = FLUSH_GIVEUP; 915 completion_stat = FLUSH_GIVEUP;
933 break; 916 break;
934 } 917 }
935 cpu_relax(); 918 cpu_relax();
936 } while ((completion_stat == FLUSH_RETRY_PLUGGED) || 919 } while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
937 (completion_stat == FLUSH_RETRY_BUSYBUG) ||
938 (completion_stat == FLUSH_RETRY_TIMEOUT)); 920 (completion_stat == FLUSH_RETRY_TIMEOUT));
939 921
940 time2 = get_cycles(); 922 time2 = get_cycles();
@@ -955,28 +937,33 @@ int uv_flush_send_and_wait(struct cpumask *flush_mask, struct bau_control *bcp)
955} 937}
956 938
957/* 939/*
958 * The BAU is disabled. When the disabled time period has expired, the cpu 940 * The BAU is disabled for this uvhub. When the disabled time period has
959 * that disabled it must re-enable it. 941 * expired re-enable it.
960 * Return 0 if it is re-enabled for all cpus. 942 * Return 0 if it is re-enabled for all cpus on this uvhub.
961 */ 943 */
962static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) 944static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
963{ 945{
964 int tcpu; 946 int tcpu;
965 struct bau_control *tbcp; 947 struct bau_control *tbcp;
948 struct bau_control *hmaster;
966 949
967 if (bcp->set_bau_off) { 950 hmaster = bcp->uvhub_master;
968 if (get_cycles() >= bcp->set_bau_on_time) { 951 spin_lock(&hmaster->disable_lock);
969 stat->s_bau_reenabled++; 952 if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
970 baudisabled = 0; 953 stat->s_bau_reenabled++;
971 for_each_present_cpu(tcpu) { 954 for_each_present_cpu(tcpu) {
972 tbcp = &per_cpu(bau_control, tcpu); 955 tbcp = &per_cpu(bau_control, tcpu);
956 if (tbcp->uvhub_master == hmaster) {
973 tbcp->baudisabled = 0; 957 tbcp->baudisabled = 0;
974 tbcp->period_requests = 0; 958 tbcp->period_requests = 0;
975 tbcp->period_time = 0; 959 tbcp->period_time = 0;
960 tbcp->period_giveups = 0;
976 } 961 }
977 return 0;
978 } 962 }
963 spin_unlock(&hmaster->disable_lock);
964 return 0;
979 } 965 }
966 spin_unlock(&hmaster->disable_lock);
980 return -1; 967 return -1;
981} 968}
982 969
@@ -1078,18 +1065,32 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1078 struct cpumask *flush_mask; 1065 struct cpumask *flush_mask;
1079 struct ptc_stats *stat; 1066 struct ptc_stats *stat;
1080 struct bau_control *bcp; 1067 struct bau_control *bcp;
1081 1068 unsigned long descriptor_status;
1082 /* kernel was booted 'nobau' */ 1069 unsigned long status;
1083 if (nobau)
1084 return cpumask;
1085 1070
1086 bcp = &per_cpu(bau_control, cpu); 1071 bcp = &per_cpu(bau_control, cpu);
1087 stat = bcp->statp; 1072 stat = bcp->statp;
1073 stat->s_enters++;
1074
1075 if (bcp->nobau)
1076 return cpumask;
1077
1078 if (bcp->busy) {
1079 descriptor_status =
1080 read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
1081 status = ((descriptor_status >> (bcp->uvhub_cpu *
1082 UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
1083 if (status == UV2H_DESC_BUSY)
1084 return cpumask;
1085 bcp->busy = 0;
1086 }
1088 1087
1089 /* bau was disabled due to slow response */ 1088 /* bau was disabled due to slow response */
1090 if (bcp->baudisabled) { 1089 if (bcp->baudisabled) {
1091 if (check_enable(bcp, stat)) 1090 if (check_enable(bcp, stat)) {
1091 stat->s_ipifordisabled++;
1092 return cpumask; 1092 return cpumask;
1093 }
1093 } 1094 }
1094 1095
1095 /* 1096 /*
@@ -1105,7 +1106,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1105 stat->s_ntargself++; 1106 stat->s_ntargself++;
1106 1107
1107 bau_desc = bcp->descriptor_base; 1108 bau_desc = bcp->descriptor_base;
1108 bau_desc += (ITEMS_PER_DESC * bcp->using_desc); 1109 bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
1109 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 1110 bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
1110 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) 1111 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
1111 return NULL; 1112 return NULL;
@@ -1118,25 +1119,27 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
1118 * uv_flush_send_and_wait returns 0 if all cpu's were messaged, 1119 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
1119 * or 1 if it gave up and the original cpumask should be returned. 1120 * or 1 if it gave up and the original cpumask should be returned.
1120 */ 1121 */
1121 if (!uv_flush_send_and_wait(flush_mask, bcp)) 1122 if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
1122 return NULL; 1123 return NULL;
1123 else 1124 else
1124 return cpumask; 1125 return cpumask;
1125} 1126}
1126 1127
1127/* 1128/*
1128 * Search the message queue for any 'other' message with the same software 1129 * Search the message queue for any 'other' unprocessed message with the
1129 * acknowledge resource bit vector. 1130 * same software acknowledge resource bit vector as the 'msg' message.
1130 */ 1131 */
1131struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, 1132struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
1132 struct bau_control *bcp, unsigned char swack_vec) 1133 struct bau_control *bcp)
1133{ 1134{
1134 struct bau_pq_entry *msg_next = msg + 1; 1135 struct bau_pq_entry *msg_next = msg + 1;
1136 unsigned char swack_vec = msg->swack_vec;
1135 1137
1136 if (msg_next > bcp->queue_last) 1138 if (msg_next > bcp->queue_last)
1137 msg_next = bcp->queue_first; 1139 msg_next = bcp->queue_first;
1138 while ((msg_next->swack_vec != 0) && (msg_next != msg)) { 1140 while (msg_next != msg) {
1139 if (msg_next->swack_vec == swack_vec) 1141 if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
1142 (msg_next->swack_vec == swack_vec))
1140 return msg_next; 1143 return msg_next;
1141 msg_next++; 1144 msg_next++;
1142 if (msg_next > bcp->queue_last) 1145 if (msg_next > bcp->queue_last)
@@ -1165,32 +1168,30 @@ void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
1165 * This message was assigned a swack resource, but no 1168 * This message was assigned a swack resource, but no
1166 * reserved acknowlegment is pending. 1169 * reserved acknowlegment is pending.
1167 * The bug has prevented this message from setting the MMR. 1170 * The bug has prevented this message from setting the MMR.
1168 * And no other message has used the same sw_ack resource.
1169 * Do the requested shootdown but do not reply to the msg.
1170 * (the 0 means make no acknowledge)
1171 */ 1171 */
1172 bau_process_message(mdp, bcp, 0);
1173 return;
1174 }
1175
1176 /*
1177 * Some message has set the MMR 'pending' bit; it might have been
1178 * another message. Look for that message.
1179 */
1180 other_msg = find_another_by_swack(msg, bcp, msg->swack_vec);
1181 if (other_msg) {
1182 /* There is another. Do not ack the current one. */
1183 bau_process_message(mdp, bcp, 0);
1184 /* 1172 /*
1185 * Let the natural processing of that message acknowledge 1173 * Some message has set the MMR 'pending' bit; it might have
1186 * it. Don't get the processing of sw_ack's out of order. 1174 * been another message. Look for that message.
1187 */ 1175 */
1188 return; 1176 other_msg = find_another_by_swack(msg, bcp);
1177 if (other_msg) {
1178 /*
1179 * There is another. Process this one but do not
1180 * ack it.
1181 */
1182 bau_process_message(mdp, bcp, 0);
1183 /*
1184 * Let the natural processing of that other message
1185 * acknowledge it. Don't get the processing of sw_ack's
1186 * out of order.
1187 */
1188 return;
1189 }
1189 } 1190 }
1190 1191
1191 /* 1192 /*
1192 * There is no other message using this sw_ack, so it is safe to 1193 * Either the MMR shows this one pending a reply or there is no
1193 * acknowledge it. 1194 * other message using this sw_ack, so it is safe to acknowledge it.
1194 */ 1195 */
1195 bau_process_message(mdp, bcp, 1); 1196 bau_process_message(mdp, bcp, 1);
1196 1197
@@ -1295,7 +1296,8 @@ static void __init enable_timeouts(void)
1295 */ 1296 */
1296 mmr_image |= (1L << SOFTACK_MSHIFT); 1297 mmr_image |= (1L << SOFTACK_MSHIFT);
1297 if (is_uv2_hub()) { 1298 if (is_uv2_hub()) {
1298 mmr_image |= (1L << UV2_EXT_SHFT); 1299 /* hw bug workaround; do not use extended status */
1300 mmr_image &= ~(1L << UV2_EXT_SHFT);
1299 } 1301 }
1300 write_mmr_misc_control(pnode, mmr_image); 1302 write_mmr_misc_control(pnode, mmr_image);
1301 } 1303 }
@@ -1338,29 +1340,34 @@ static inline unsigned long long usec_2_cycles(unsigned long microsec)
1338static int ptc_seq_show(struct seq_file *file, void *data) 1340static int ptc_seq_show(struct seq_file *file, void *data)
1339{ 1341{
1340 struct ptc_stats *stat; 1342 struct ptc_stats *stat;
1343 struct bau_control *bcp;
1341 int cpu; 1344 int cpu;
1342 1345
1343 cpu = *(loff_t *)data; 1346 cpu = *(loff_t *)data;
1344 if (!cpu) { 1347 if (!cpu) {
1345 seq_printf(file, 1348 seq_printf(file,
1346 "# cpu sent stime self locals remotes ncpus localhub "); 1349 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
1347 seq_printf(file, 1350 seq_printf(file,
1348 "remotehub numuvhubs numuvhubs16 numuvhubs8 "); 1351 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1349 seq_printf(file, 1352 seq_printf(file,
1350 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries rok "); 1353 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
1354 seq_printf(file,
1355 "rok resetp resett giveup sto bz throt disable ");
1351 seq_printf(file, 1356 seq_printf(file,
1352 "resetp resett giveup sto bz throt swack recv rtime "); 1357 "enable wars warshw warwaits enters ipidis plugged ");
1353 seq_printf(file, 1358 seq_printf(file,
1354 "all one mult none retry canc nocan reset rcan "); 1359 "ipiover glim cong swack recv rtime all one mult ");
1355 seq_printf(file, 1360 seq_printf(file,
1356 "disable enable wars warshw warwaits\n"); 1361 "none retry canc nocan reset rcan\n");
1357 } 1362 }
1358 if (cpu < num_possible_cpus() && cpu_online(cpu)) { 1363 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
1359 stat = &per_cpu(ptcstats, cpu); 1364 bcp = &per_cpu(bau_control, cpu);
1365 stat = bcp->statp;
1360 /* source side statistics */ 1366 /* source side statistics */
1361 seq_printf(file, 1367 seq_printf(file,
1362 "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 1368 "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1363 cpu, stat->s_requestor, cycles_2_us(stat->s_time), 1369 cpu, bcp->nobau, stat->s_requestor,
1370 cycles_2_us(stat->s_time),
1364 stat->s_ntargself, stat->s_ntarglocals, 1371 stat->s_ntargself, stat->s_ntarglocals,
1365 stat->s_ntargremotes, stat->s_ntargcpu, 1372 stat->s_ntargremotes, stat->s_ntargcpu,
1366 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, 1373 stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
@@ -1374,20 +1381,23 @@ static int ptc_seq_show(struct seq_file *file, void *data)
1374 stat->s_resets_plug, stat->s_resets_timeout, 1381 stat->s_resets_plug, stat->s_resets_timeout,
1375 stat->s_giveup, stat->s_stimeout, 1382 stat->s_giveup, stat->s_stimeout,
1376 stat->s_busy, stat->s_throttles); 1383 stat->s_busy, stat->s_throttles);
1384 seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1385 stat->s_bau_disabled, stat->s_bau_reenabled,
1386 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1387 stat->s_uv2_war_waits, stat->s_enters,
1388 stat->s_ipifordisabled, stat->s_plugged,
1389 stat->s_overipilimit, stat->s_giveuplimit,
1390 stat->s_congested);
1377 1391
1378 /* destination side statistics */ 1392 /* destination side statistics */
1379 seq_printf(file, 1393 seq_printf(file,
1380 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", 1394 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
1381 read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), 1395 read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
1382 stat->d_requestee, cycles_2_us(stat->d_time), 1396 stat->d_requestee, cycles_2_us(stat->d_time),
1383 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, 1397 stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
1384 stat->d_nomsg, stat->d_retries, stat->d_canceled, 1398 stat->d_nomsg, stat->d_retries, stat->d_canceled,
1385 stat->d_nocanceled, stat->d_resets, 1399 stat->d_nocanceled, stat->d_resets,
1386 stat->d_rcanceled); 1400 stat->d_rcanceled);
1387 seq_printf(file, "%ld %ld %ld %ld %ld\n",
1388 stat->s_bau_disabled, stat->s_bau_reenabled,
1389 stat->s_uv2_wars, stat->s_uv2_wars_hw,
1390 stat->s_uv2_war_waits);
1391 } 1401 }
1392 return 0; 1402 return 0;
1393} 1403}
@@ -1401,13 +1411,14 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf,
1401 char *buf; 1411 char *buf;
1402 int ret; 1412 int ret;
1403 1413
1404 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", 1414 buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
1405 "max_concur plugged_delay plugsb4reset", 1415 "max_concur plugged_delay plugsb4reset timeoutsb4reset",
1406 "timeoutsb4reset ipi_reset_limit complete_threshold", 1416 "ipi_reset_limit complete_threshold congested_response_us",
1407 "congested_response_us congested_reps congested_period", 1417 "congested_reps disabled_period giveup_limit",
1408 max_concurr, plugged_delay, plugsb4reset, 1418 max_concurr, plugged_delay, plugsb4reset,
1409 timeoutsb4reset, ipi_reset_limit, complete_threshold, 1419 timeoutsb4reset, ipi_reset_limit, complete_threshold,
1410 congested_respns_us, congested_reps, congested_period); 1420 congested_respns_us, congested_reps, disabled_period,
1421 giveup_limit);
1411 1422
1412 if (!buf) 1423 if (!buf)
1413 return -ENOMEM; 1424 return -ENOMEM;
@@ -1438,6 +1449,14 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
1438 return -EFAULT; 1449 return -EFAULT;
1439 optstr[count - 1] = '\0'; 1450 optstr[count - 1] = '\0';
1440 1451
1452 if (!strcmp(optstr, "on")) {
1453 set_bau_on();
1454 return count;
1455 } else if (!strcmp(optstr, "off")) {
1456 set_bau_off();
1457 return count;
1458 }
1459
1441 if (strict_strtol(optstr, 10, &input_arg) < 0) { 1460 if (strict_strtol(optstr, 10, &input_arg) < 0) {
1442 printk(KERN_DEBUG "%s is invalid\n", optstr); 1461 printk(KERN_DEBUG "%s is invalid\n", optstr);
1443 return -EINVAL; 1462 return -EINVAL;
@@ -1570,7 +1589,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user,
1570 bcp->complete_threshold = complete_threshold; 1589 bcp->complete_threshold = complete_threshold;
1571 bcp->cong_response_us = congested_respns_us; 1590 bcp->cong_response_us = congested_respns_us;
1572 bcp->cong_reps = congested_reps; 1591 bcp->cong_reps = congested_reps;
1573 bcp->cong_period = congested_period; 1592 bcp->disabled_period = sec_2_cycles(disabled_period);
1593 bcp->giveup_limit = giveup_limit;
1574 } 1594 }
1575 return count; 1595 return count;
1576} 1596}
@@ -1699,6 +1719,10 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
1699 * fairness chaining multilevel count replied_to 1719 * fairness chaining multilevel count replied_to
1700 */ 1720 */
1701 } else { 1721 } else {
1722 /*
1723 * BIOS uses legacy mode, but UV2 hardware always
1724 * uses native mode for selective broadcasts.
1725 */
1702 uv2_hdr = &bd2->header.uv2_hdr; 1726 uv2_hdr = &bd2->header.uv2_hdr;
1703 uv2_hdr->swack_flag = 1; 1727 uv2_hdr->swack_flag = 1;
1704 uv2_hdr->base_dest_nasid = 1728 uv2_hdr->base_dest_nasid =
@@ -1811,8 +1835,8 @@ static int calculate_destination_timeout(void)
1811 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; 1835 index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK;
1812 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); 1836 mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT);
1813 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; 1837 mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK;
1814 base = timeout_base_ns[index]; 1838 ts_ns = timeout_base_ns[index];
1815 ts_ns = base * mult1 * mult2; 1839 ts_ns *= (mult1 * mult2);
1816 ret = ts_ns / 1000; 1840 ret = ts_ns / 1000;
1817 } else { 1841 } else {
1818 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */ 1842 /* 4 bits 0/1 for 10/80us base, 3 bits of multiplier */
@@ -1836,6 +1860,8 @@ static void __init init_per_cpu_tunables(void)
1836 for_each_present_cpu(cpu) { 1860 for_each_present_cpu(cpu) {
1837 bcp = &per_cpu(bau_control, cpu); 1861 bcp = &per_cpu(bau_control, cpu);
1838 bcp->baudisabled = 0; 1862 bcp->baudisabled = 0;
1863 if (nobau)
1864 bcp->nobau = 1;
1839 bcp->statp = &per_cpu(ptcstats, cpu); 1865 bcp->statp = &per_cpu(ptcstats, cpu);
1840 /* time interval to catch a hardware stay-busy bug */ 1866 /* time interval to catch a hardware stay-busy bug */
1841 bcp->timeout_interval = usec_2_cycles(2*timeout_us); 1867 bcp->timeout_interval = usec_2_cycles(2*timeout_us);
@@ -1848,10 +1874,11 @@ static void __init init_per_cpu_tunables(void)
1848 bcp->complete_threshold = complete_threshold; 1874 bcp->complete_threshold = complete_threshold;
1849 bcp->cong_response_us = congested_respns_us; 1875 bcp->cong_response_us = congested_respns_us;
1850 bcp->cong_reps = congested_reps; 1876 bcp->cong_reps = congested_reps;
1851 bcp->cong_period = congested_period; 1877 bcp->disabled_period = sec_2_cycles(disabled_period);
1852 bcp->clocks_per_100_usec = usec_2_cycles(100); 1878 bcp->giveup_limit = giveup_limit;
1853 spin_lock_init(&bcp->queue_lock); 1879 spin_lock_init(&bcp->queue_lock);
1854 spin_lock_init(&bcp->uvhub_lock); 1880 spin_lock_init(&bcp->uvhub_lock);
1881 spin_lock_init(&bcp->disable_lock);
1855 } 1882 }
1856} 1883}
1857 1884
@@ -1972,7 +1999,6 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
1972 } 1999 }
1973 bcp->uvhub_master = *hmasterp; 2000 bcp->uvhub_master = *hmasterp;
1974 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; 2001 bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
1975 bcp->using_desc = bcp->uvhub_cpu;
1976 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { 2002 if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
1977 printk(KERN_EMERG "%d cpus per uvhub invalid\n", 2003 printk(KERN_EMERG "%d cpus per uvhub invalid\n",
1978 bcp->uvhub_cpu); 2004 bcp->uvhub_cpu);
@@ -2069,16 +2095,12 @@ static int __init uv_bau_init(void)
2069 if (!is_uv_system()) 2095 if (!is_uv_system())
2070 return 0; 2096 return 0;
2071 2097
2072 if (nobau)
2073 return 0;
2074
2075 for_each_possible_cpu(cur_cpu) { 2098 for_each_possible_cpu(cur_cpu) {
2076 mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); 2099 mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
2077 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); 2100 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
2078 } 2101 }
2079 2102
2080 nuvhubs = uv_num_possible_blades(); 2103 nuvhubs = uv_num_possible_blades();
2081 spin_lock_init(&disable_lock);
2082 congested_cycles = usec_2_cycles(congested_respns_us); 2104 congested_cycles = usec_2_cycles(congested_respns_us);
2083 2105
2084 uv_base_pnode = 0x7fffffff; 2106 uv_base_pnode = 0x7fffffff;
@@ -2091,7 +2113,8 @@ static int __init uv_bau_init(void)
2091 enable_timeouts(); 2113 enable_timeouts();
2092 2114
2093 if (init_per_cpu(nuvhubs, uv_base_pnode)) { 2115 if (init_per_cpu(nuvhubs, uv_base_pnode)) {
2094 nobau = 1; 2116 set_bau_off();
2117 nobau_perm = 1;
2095 return 0; 2118 return 0;
2096 } 2119 }
2097 2120
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index f25c2765a5c9..acf7752da952 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -135,6 +135,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
135 unsigned long mmr_value; 135 unsigned long mmr_value;
136 struct uv_IO_APIC_route_entry *entry; 136 struct uv_IO_APIC_route_entry *entry;
137 int mmr_pnode, err; 137 int mmr_pnode, err;
138 unsigned int dest;
138 139
139 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != 140 BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) !=
140 sizeof(unsigned long)); 141 sizeof(unsigned long));
@@ -143,6 +144,10 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
143 if (err != 0) 144 if (err != 0)
144 return err; 145 return err;
145 146
147 err = apic->cpu_mask_to_apicid_and(eligible_cpu, eligible_cpu, &dest);
148 if (err != 0)
149 return err;
150
146 if (limit == UV_AFFINITY_CPU) 151 if (limit == UV_AFFINITY_CPU)
147 irq_set_status_flags(irq, IRQ_NO_BALANCING); 152 irq_set_status_flags(irq, IRQ_NO_BALANCING);
148 else 153 else
@@ -159,7 +164,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
159 entry->polarity = 0; 164 entry->polarity = 0;
160 entry->trigger = 0; 165 entry->trigger = 0;
161 entry->mask = 0; 166 entry->mask = 0;
162 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); 167 entry->dest = dest;
163 168
164 mmr_pnode = uv_blade_to_pnode(mmr_blade); 169 mmr_pnode = uv_blade_to_pnode(mmr_blade);
165 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 170 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -222,7 +227,7 @@ uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
222 if (cfg->move_in_progress) 227 if (cfg->move_in_progress)
223 send_cleanup_vector(cfg); 228 send_cleanup_vector(cfg);
224 229
225 return 0; 230 return IRQ_SET_MASK_OK_NOCOPY;
226} 231}
227 232
228/* 233/*
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 5b84a2d30888..b2d534cab25f 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -22,7 +22,7 @@ wakeup-objs += video-bios.o
22realmode-y += header.o 22realmode-y += header.o
23realmode-y += trampoline_$(BITS).o 23realmode-y += trampoline_$(BITS).o
24realmode-y += stack.o 24realmode-y += stack.o
25realmode-$(CONFIG_X86_32) += reboot_32.o 25realmode-y += reboot.o
26realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs) 26realmode-$(CONFIG_ACPI_SLEEP) += $(wakeup-objs)
27 27
28targets += $(realmode-y) 28targets += $(realmode-y)
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index fadf48378ada..a28221d94e69 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -6,6 +6,7 @@
6 6
7#include <linux/linkage.h> 7#include <linux/linkage.h>
8#include <asm/page_types.h> 8#include <asm/page_types.h>
9#include <asm/segment.h>
9 10
10#include "realmode.h" 11#include "realmode.h"
11 12
@@ -28,8 +29,9 @@ GLOBAL(real_mode_header)
28 .long pa_wakeup_header 29 .long pa_wakeup_header
29#endif 30#endif
30 /* APM/BIOS reboot */ 31 /* APM/BIOS reboot */
31#ifdef CONFIG_X86_32
32 .long pa_machine_real_restart_asm 32 .long pa_machine_real_restart_asm
33#ifdef CONFIG_X86_64
34 .long __KERNEL32_CS
33#endif 35#endif
34END(real_mode_header) 36END(real_mode_header)
35 37
diff --git a/arch/x86/realmode/rm/reboot_32.S b/arch/x86/realmode/rm/reboot.S
index 114044876b3d..f932ea61d1c8 100644
--- a/arch/x86/realmode/rm/reboot_32.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -2,6 +2,8 @@
2#include <linux/init.h> 2#include <linux/init.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/page_types.h> 4#include <asm/page_types.h>
5#include <asm/processor-flags.h>
6#include <asm/msr-index.h>
5#include "realmode.h" 7#include "realmode.h"
6 8
7/* 9/*
@@ -12,13 +14,35 @@
12 * doesn't work with at least one type of 486 motherboard. It is easy 14 * doesn't work with at least one type of 486 motherboard. It is easy
13 * to stop this code working; hence the copious comments. 15 * to stop this code working; hence the copious comments.
14 * 16 *
15 * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. 17 * This code is called with the restart type (0 = BIOS, 1 = APM) in
18 * the primary argument register (%eax for 32 bit, %edi for 64 bit).
16 */ 19 */
17 .section ".text32", "ax" 20 .section ".text32", "ax"
18 .code32 21 .code32
19
20 .balign 16
21ENTRY(machine_real_restart_asm) 22ENTRY(machine_real_restart_asm)
23
24#ifdef CONFIG_X86_64
25 /* Switch to trampoline GDT as it is guaranteed < 4 GiB */
26 movl $__KERNEL_DS, %eax
27 movl %eax, %ds
28 lgdtl pa_tr_gdt
29
30 /* Disable paging to drop us out of long mode */
31 movl %cr0, %eax
32 andl $~X86_CR0_PG, %eax
33 movl %eax, %cr0
34 ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off
35
36GLOBAL(machine_real_restart_paging_off)
37 xorl %eax, %eax
38 xorl %edx, %edx
39 movl $MSR_EFER, %ecx
40 wrmsr
41
42 movl %edi, %eax
43
44#endif /* CONFIG_X86_64 */
45
22 /* Set up the IDT for real mode. */ 46 /* Set up the IDT for real mode. */
23 lidtl pa_machine_real_restart_idt 47 lidtl pa_machine_real_restart_idt
24 48
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 66e6d9359826..0faad646f5fd 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -205,9 +205,9 @@ void syscall32_cpu_init(void)
205{ 205{
206 /* Load these always in case some future AMD CPU supports 206 /* Load these always in case some future AMD CPU supports
207 SYSENTER from compat mode too. */ 207 SYSENTER from compat mode too. */
208 checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 208 wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
209 checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); 209 wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
210 checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); 210 wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
211 211
212 wrmsrl(MSR_CSTAR, ia32_cstar_target); 212 wrmsrl(MSR_CSTAR, ia32_cstar_target);
213} 213}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ff962d4b821e..ed7d54985d0c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1124,9 +1124,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1124 .wbinvd = native_wbinvd, 1124 .wbinvd = native_wbinvd,
1125 1125
1126 .read_msr = native_read_msr_safe, 1126 .read_msr = native_read_msr_safe,
1127 .rdmsr_regs = native_rdmsr_safe_regs,
1128 .write_msr = xen_write_msr_safe, 1127 .write_msr = xen_write_msr_safe,
1129 .wrmsr_regs = native_wrmsr_safe_regs,
1130 1128
1131 .read_tsc = native_read_tsc, 1129 .read_tsc = native_read_tsc,
1132 .read_pmc = native_read_pmc, 1130 .read_pmc = native_read_pmc,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index afb250d22a6b..f58dca7a6e52 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -80,9 +80,7 @@ static void __cpuinit cpu_bringup(void)
80 80
81 notify_cpu_starting(cpu); 81 notify_cpu_starting(cpu);
82 82
83 ipi_call_lock();
84 set_cpu_online(cpu, true); 83 set_cpu_online(cpu, true);
85 ipi_call_unlock();
86 84
87 this_cpu_write(cpu_state, CPU_ONLINE); 85 this_cpu_write(cpu_state, CPU_ONLINE);
88 86