aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/apic/apic.c12
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c1
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c109
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/check.c20
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/match.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c41
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c65
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c570
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c6
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c2
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S16
-rw-r--r--arch/x86/kernel/ftrace.c500
-rw-r--r--arch/x86/kernel/head_32.S223
-rw-r--r--arch/x86/kernel/head_64.S80
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/kprobes.c4
-rw-r--r--arch/x86/kernel/mca_32.c476
-rw-r--r--arch/x86/kernel/microcode_core.c9
-rw-r--r--arch/x86/kernel/mpparse.c11
-rw-r--r--arch/x86/kernel/nmi.c24
-rw-r--r--arch/x86/kernel/nmi_selftest.c13
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/process.c39
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c19
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/reboot.c277
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/smp.c100
-rw-r--r--arch/x86/kernel/smpboot.c108
-rw-r--r--arch/x86/kernel/test_rodata.c10
-rw-r--r--arch/x86/kernel/time.c6
-rw-r--r--arch/x86/kernel/traps.c12
-rw-r--r--arch/x86/kernel/vsmp_64.c40
-rw-r--r--arch/x86/kernel/x86_init.c8
-rw-r--r--arch/x86/kernel/xsave.c2
58 files changed, 1695 insertions, 1300 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 56ebd1f9844..bb8529275aa 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -49,7 +49,6 @@ obj-y += cpu/
49obj-y += acpi/ 49obj-y += acpi/
50obj-y += reboot.o 50obj-y += reboot.o
51obj-$(CONFIG_X86_32) += reboot_32.o 51obj-$(CONFIG_X86_32) += reboot_32.o
52obj-$(CONFIG_MCA) += mca_32.o
53obj-$(CONFIG_X86_MSR) += msr.o 52obj-$(CONFIG_X86_MSR) += msr.o
54obj-$(CONFIG_X86_CPUID) += cpuid.o 53obj-$(CONFIG_X86_CPUID) += cpuid.o
55obj-$(CONFIG_PCI) += early-quirks.o 54obj-$(CONFIG_PCI) += early-quirks.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7c439fe4941..8afb6931981 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -990,7 +990,7 @@ void __init mp_config_acpi_legacy_irqs(void)
990 int i; 990 int i;
991 struct mpc_intsrc mp_irq; 991 struct mpc_intsrc mp_irq;
992 992
993#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 993#ifdef CONFIG_EISA
994 /* 994 /*
995 * Fabricate the legacy ISA bus (bus #31). 995 * Fabricate the legacy ISA bus (bus #31).
996 */ 996 */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 3722179a49d..39a222e094a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1326,11 +1326,13 @@ void __cpuinit setup_local_APIC(void)
1326 acked); 1326 acked);
1327 break; 1327 break;
1328 } 1328 }
1329 if (cpu_has_tsc) { 1329 if (queued) {
1330 rdtscll(ntsc); 1330 if (cpu_has_tsc) {
1331 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1331 rdtscll(ntsc);
1332 } else 1332 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1333 max_loops--; 1333 } else
1334 max_loops--;
1335 }
1334 } while (queued && max_loops > 0); 1336 } while (queued && max_loops > 0);
1335 WARN_ON(max_loops <= 0); 1337 WARN_ON(max_loops <= 0);
1336 1338
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 359b6899a36..0e881c46e8c 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -227,6 +227,7 @@ static struct apic apic_flat = {
227 227
228 .read = native_apic_mem_read, 228 .read = native_apic_mem_read,
229 .write = native_apic_mem_write, 229 .write = native_apic_mem_write,
230 .eoi_write = native_apic_mem_write,
230 .icr_read = native_apic_icr_read, 231 .icr_read = native_apic_icr_read,
231 .icr_write = native_apic_icr_write, 232 .icr_write = native_apic_icr_write,
232 .wait_icr_idle = native_apic_wait_icr_idle, 233 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -386,6 +387,7 @@ static struct apic apic_physflat = {
386 387
387 .read = native_apic_mem_read, 388 .read = native_apic_mem_read,
388 .write = native_apic_mem_write, 389 .write = native_apic_mem_write,
390 .eoi_write = native_apic_mem_write,
389 .icr_read = native_apic_icr_read, 391 .icr_read = native_apic_icr_read,
390 .icr_write = native_apic_icr_write, 392 .icr_write = native_apic_icr_write,
391 .wait_icr_idle = native_apic_wait_icr_idle, 393 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 634ae6cdd5c..a6e4c6e06c0 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -181,6 +181,7 @@ struct apic apic_noop = {
181 181
182 .read = noop_apic_read, 182 .read = noop_apic_read,
183 .write = noop_apic_write, 183 .write = noop_apic_write,
184 .eoi_write = noop_apic_write,
184 .icr_read = noop_apic_icr_read, 185 .icr_read = noop_apic_icr_read,
185 .icr_write = noop_apic_icr_write, 186 .icr_write = noop_apic_icr_write,
186 .wait_icr_idle = noop_apic_wait_icr_idle, 187 .wait_icr_idle = noop_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 23e75422e01..6ec6d5d297c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -295,6 +295,7 @@ static struct apic apic_numachip __refconst = {
295 295
296 .read = native_apic_mem_read, 296 .read = native_apic_mem_read,
297 .write = native_apic_mem_write, 297 .write = native_apic_mem_write,
298 .eoi_write = native_apic_mem_write,
298 .icr_read = native_apic_icr_read, 299 .icr_read = native_apic_icr_read,
299 .icr_write = native_apic_icr_write, 300 .icr_write = native_apic_icr_write,
300 .wait_icr_idle = native_apic_wait_icr_idle, 301 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 0cdec7065af..31fbdbfbf96 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = {
248 248
249 .read = native_apic_mem_read, 249 .read = native_apic_mem_read,
250 .write = native_apic_mem_write, 250 .write = native_apic_mem_write,
251 .eoi_write = native_apic_mem_write,
251 .icr_read = native_apic_icr_read, 252 .icr_read = native_apic_icr_read,
252 .icr_write = native_apic_icr_write, 253 .icr_write = native_apic_icr_write,
253 .wait_icr_idle = native_apic_wait_icr_idle, 254 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index e42d1d3b913..db4ab1be3c7 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = {
678 678
679 .read = native_apic_mem_read, 679 .read = native_apic_mem_read,
680 .write = native_apic_mem_write, 680 .write = native_apic_mem_write,
681 .eoi_write = native_apic_mem_write,
681 .icr_read = native_apic_icr_read, 682 .icr_read = native_apic_icr_read,
682 .icr_write = native_apic_icr_write, 683 .icr_write = native_apic_icr_write,
683 .wait_icr_idle = native_apic_wait_icr_idle, 684 .wait_icr_idle = native_apic_wait_icr_idle,
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = {
742 743
743 .read = native_apic_mem_read, 744 .read = native_apic_mem_read,
744 .write = native_apic_mem_write, 745 .write = native_apic_mem_write,
746 .eoi_write = native_apic_mem_write,
745 .icr_read = native_apic_icr_read, 747 .icr_read = native_apic_icr_read,
746 .icr_write = native_apic_icr_write, 748 .icr_write = native_apic_icr_write,
747 .wait_icr_idle = native_apic_wait_icr_idle, 749 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ef0648cd708..ac96561d1a9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,24 +68,6 @@
68#define for_each_irq_pin(entry, head) \ 68#define for_each_irq_pin(entry, head) \
69 for (entry = head; entry; entry = entry->next) 69 for (entry = head; entry; entry = entry->next)
70 70
71static void __init __ioapic_init_mappings(void);
72
73static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
74static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
75static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
76
77static struct io_apic_ops io_apic_ops = {
78 .init = __ioapic_init_mappings,
79 .read = __io_apic_read,
80 .write = __io_apic_write,
81 .modify = __io_apic_modify,
82};
83
84void __init set_io_apic_ops(const struct io_apic_ops *ops)
85{
86 io_apic_ops = *ops;
87}
88
89#ifdef CONFIG_IRQ_REMAP 71#ifdef CONFIG_IRQ_REMAP
90static void irq_remap_modify_chip_defaults(struct irq_chip *chip); 72static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
91static inline bool irq_remapped(struct irq_cfg *cfg) 73static inline bool irq_remapped(struct irq_cfg *cfg)
@@ -158,7 +140,7 @@ int mp_irq_entries;
158/* GSI interrupts */ 140/* GSI interrupts */
159static int nr_irqs_gsi = NR_IRQS_LEGACY; 141static int nr_irqs_gsi = NR_IRQS_LEGACY;
160 142
161#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 143#ifdef CONFIG_EISA
162int mp_bus_id_to_type[MAX_MP_BUSSES]; 144int mp_bus_id_to_type[MAX_MP_BUSSES];
163#endif 145#endif
164 146
@@ -329,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
329 irq_free_desc(at); 311 irq_free_desc(at);
330} 312}
331 313
332static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
333{
334 return io_apic_ops.read(apic, reg);
335}
336
337static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
338{
339 io_apic_ops.write(apic, reg, value);
340}
341
342static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
343{
344 io_apic_ops.modify(apic, reg, value);
345}
346
347 314
348struct io_apic { 315struct io_apic {
349 unsigned int index; 316 unsigned int index;
@@ -365,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
365 writel(vector, &io_apic->eoi); 332 writel(vector, &io_apic->eoi);
366} 333}
367 334
368static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) 335unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
369{ 336{
370 struct io_apic __iomem *io_apic = io_apic_base(apic); 337 struct io_apic __iomem *io_apic = io_apic_base(apic);
371 writel(reg, &io_apic->index); 338 writel(reg, &io_apic->index);
372 return readl(&io_apic->data); 339 return readl(&io_apic->data);
373} 340}
374 341
375static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) 342void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
376{ 343{
377 struct io_apic __iomem *io_apic = io_apic_base(apic); 344 struct io_apic __iomem *io_apic = io_apic_base(apic);
378 345
@@ -386,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va
386 * 353 *
387 * Older SiS APIC requires we rewrite the index register 354 * Older SiS APIC requires we rewrite the index register
388 */ 355 */
389static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) 356void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
390{ 357{
391 struct io_apic __iomem *io_apic = io_apic_base(apic); 358 struct io_apic __iomem *io_apic = io_apic_base(apic);
392 359
@@ -395,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v
395 writel(value, &io_apic->data); 362 writel(value, &io_apic->data);
396} 363}
397 364
398static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
399{
400 struct irq_pin_list *entry;
401 unsigned long flags;
402
403 raw_spin_lock_irqsave(&ioapic_lock, flags);
404 for_each_irq_pin(entry, cfg->irq_2_pin) {
405 unsigned int reg;
406 int pin;
407
408 pin = entry->pin;
409 reg = io_apic_read(entry->apic, 0x10 + pin*2);
410 /* Is the remote IRR bit set? */
411 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
412 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
413 return true;
414 }
415 }
416 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
417
418 return false;
419}
420
421union entry_union { 365union entry_union {
422 struct { u32 w1, w2; }; 366 struct { u32 w1, w2; };
423 struct IO_APIC_route_entry entry; 367 struct IO_APIC_route_entry entry;
@@ -891,7 +835,7 @@ static int __init find_isa_irq_apic(int irq, int type)
891 return -1; 835 return -1;
892} 836}
893 837
894#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 838#ifdef CONFIG_EISA
895/* 839/*
896 * EISA Edge/Level control register, ELCR 840 * EISA Edge/Level control register, ELCR
897 */ 841 */
@@ -928,12 +872,6 @@ static int EISA_ELCR(unsigned int irq)
928#define default_PCI_trigger(idx) (1) 872#define default_PCI_trigger(idx) (1)
929#define default_PCI_polarity(idx) (1) 873#define default_PCI_polarity(idx) (1)
930 874
931/* MCA interrupts are always polarity zero level triggered,
932 * when listed as conforming in the MP table. */
933
934#define default_MCA_trigger(idx) (1)
935#define default_MCA_polarity(idx) default_ISA_polarity(idx)
936
937static int irq_polarity(int idx) 875static int irq_polarity(int idx)
938{ 876{
939 int bus = mp_irqs[idx].srcbus; 877 int bus = mp_irqs[idx].srcbus;
@@ -991,7 +929,7 @@ static int irq_trigger(int idx)
991 trigger = default_ISA_trigger(idx); 929 trigger = default_ISA_trigger(idx);
992 else 930 else
993 trigger = default_PCI_trigger(idx); 931 trigger = default_PCI_trigger(idx);
994#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 932#ifdef CONFIG_EISA
995 switch (mp_bus_id_to_type[bus]) { 933 switch (mp_bus_id_to_type[bus]) {
996 case MP_BUS_ISA: /* ISA pin */ 934 case MP_BUS_ISA: /* ISA pin */
997 { 935 {
@@ -1008,11 +946,6 @@ static int irq_trigger(int idx)
1008 /* set before the switch */ 946 /* set before the switch */
1009 break; 947 break;
1010 } 948 }
1011 case MP_BUS_MCA: /* MCA pin */
1012 {
1013 trigger = default_MCA_trigger(idx);
1014 break;
1015 }
1016 default: 949 default:
1017 { 950 {
1018 printk(KERN_WARNING "broken BIOS!!\n"); 951 printk(KERN_WARNING "broken BIOS!!\n");
@@ -2439,6 +2372,29 @@ static void ack_apic_edge(struct irq_data *data)
2439atomic_t irq_mis_count; 2372atomic_t irq_mis_count;
2440 2373
2441#ifdef CONFIG_GENERIC_PENDING_IRQ 2374#ifdef CONFIG_GENERIC_PENDING_IRQ
2375static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
2376{
2377 struct irq_pin_list *entry;
2378 unsigned long flags;
2379
2380 raw_spin_lock_irqsave(&ioapic_lock, flags);
2381 for_each_irq_pin(entry, cfg->irq_2_pin) {
2382 unsigned int reg;
2383 int pin;
2384
2385 pin = entry->pin;
2386 reg = io_apic_read(entry->apic, 0x10 + pin*2);
2387 /* Is the remote IRR bit set? */
2388 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
2389 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2390 return true;
2391 }
2392 }
2393 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2394
2395 return false;
2396}
2397
2442static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) 2398static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
2443{ 2399{
2444 /* If we are moving the irq we need to mask it */ 2400 /* If we are moving the irq we need to mask it */
@@ -3756,12 +3712,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3756 return res; 3712 return res;
3757} 3713}
3758 3714
3759void __init ioapic_and_gsi_init(void) 3715void __init native_io_apic_init_mappings(void)
3760{
3761 io_apic_ops.init();
3762}
3763
3764static void __init __ioapic_init_mappings(void)
3765{ 3716{
3766 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3717 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3767 struct resource *ioapic_res; 3718 struct resource *ioapic_res;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 00d2422ca7c..f00a68cca37 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = {
530 530
531 .read = native_apic_mem_read, 531 .read = native_apic_mem_read,
532 .write = native_apic_mem_write, 532 .write = native_apic_mem_write,
533 .eoi_write = native_apic_mem_write,
533 .icr_read = native_apic_icr_read, 534 .icr_read = native_apic_icr_read,
534 .icr_write = native_apic_icr_write, 535 .icr_write = native_apic_icr_write,
535 .wait_icr_idle = native_apic_wait_icr_idle, 536 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index ff2c1b9aac4..1b291da09e6 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -142,6 +142,7 @@ static struct apic apic_default = {
142 142
143 .read = native_apic_mem_read, 143 .read = native_apic_mem_read,
144 .write = native_apic_mem_write, 144 .write = native_apic_mem_write,
145 .eoi_write = native_apic_mem_write,
145 .icr_read = native_apic_icr_read, 146 .icr_read = native_apic_icr_read,
146 .icr_write = native_apic_icr_write, 147 .icr_write = native_apic_icr_write,
147 .wait_icr_idle = native_apic_wait_icr_idle, 148 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index fea000b27f0..659897c0075 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -546,6 +546,7 @@ static struct apic apic_summit = {
546 546
547 .read = native_apic_mem_read, 547 .read = native_apic_mem_read,
548 .write = native_apic_mem_write, 548 .write = native_apic_mem_write,
549 .eoi_write = native_apic_mem_write,
549 .icr_read = native_apic_icr_read, 550 .icr_read = native_apic_icr_read,
550 .icr_write = native_apic_icr_write, 551 .icr_write = native_apic_icr_write,
551 .wait_icr_idle = native_apic_wait_icr_idle, 552 .wait_icr_idle = native_apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 48f3103b3c9..ff35cff0e1a 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = {
260 260
261 .read = native_apic_msr_read, 261 .read = native_apic_msr_read,
262 .write = native_apic_msr_write, 262 .write = native_apic_msr_write,
263 .eoi_write = native_apic_msr_eoi_write,
263 .icr_read = native_x2apic_icr_read, 264 .icr_read = native_x2apic_icr_read,
264 .icr_write = native_x2apic_icr_write, 265 .icr_write = native_x2apic_icr_write,
265 .wait_icr_idle = native_x2apic_wait_icr_idle, 266 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 991e315f422..c17e982db27 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -172,6 +172,7 @@ static struct apic apic_x2apic_phys = {
172 172
173 .read = native_apic_msr_read, 173 .read = native_apic_msr_read,
174 .write = native_apic_msr_write, 174 .write = native_apic_msr_write,
175 .eoi_write = native_apic_msr_eoi_write,
175 .icr_read = native_x2apic_icr_read, 176 .icr_read = native_x2apic_icr_read,
176 .icr_write = native_x2apic_icr_write, 177 .icr_write = native_x2apic_icr_write,
177 .wait_icr_idle = native_x2apic_wait_icr_idle, 178 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 87bfa69e216..c6d03f7a440 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
404 404
405 .read = native_apic_msr_read, 405 .read = native_apic_msr_read,
406 .write = native_apic_msr_write, 406 .write = native_apic_msr_write,
407 .eoi_write = native_apic_msr_eoi_write,
407 .icr_read = native_x2apic_icr_read, 408 .icr_read = native_x2apic_icr_read,
408 .icr_write = native_x2apic_icr_write, 409 .icr_write = native_x2apic_icr_write,
409 .wait_icr_idle = native_x2apic_wait_icr_idle, 410 .wait_icr_idle = native_x2apic_wait_icr_idle,
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 5da1269e8dd..e2dbcb7dabd 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -27,21 +27,29 @@ static int num_scan_areas;
27 27
28static __init int set_corruption_check(char *arg) 28static __init int set_corruption_check(char *arg)
29{ 29{
30 char *end; 30 ssize_t ret;
31 unsigned long val;
31 32
32 memory_corruption_check = simple_strtol(arg, &end, 10); 33 ret = kstrtoul(arg, 10, &val);
34 if (ret)
35 return ret;
33 36
34 return (*end == 0) ? 0 : -EINVAL; 37 memory_corruption_check = val;
38 return 0;
35} 39}
36early_param("memory_corruption_check", set_corruption_check); 40early_param("memory_corruption_check", set_corruption_check);
37 41
38static __init int set_corruption_check_period(char *arg) 42static __init int set_corruption_check_period(char *arg)
39{ 43{
40 char *end; 44 ssize_t ret;
45 unsigned long val;
41 46
42 corruption_check_period = simple_strtoul(arg, &end, 10); 47 ret = kstrtoul(arg, 10, &val);
48 if (ret)
49 return ret;
43 50
44 return (*end == 0) ? 0 : -EINVAL; 51 corruption_check_period = val;
52 return 0;
45} 53}
46early_param("memory_corruption_check_period", set_corruption_check_period); 54early_param("memory_corruption_check_period", set_corruption_check_period);
47 55
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cf79302198a..82f29e70d05 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1185,7 +1185,7 @@ void __cpuinit cpu_init(void)
1185 oist = &per_cpu(orig_ist, cpu); 1185 oist = &per_cpu(orig_ist, cpu);
1186 1186
1187#ifdef CONFIG_NUMA 1187#ifdef CONFIG_NUMA
1188 if (cpu != 0 && percpu_read(numa_node) == 0 && 1188 if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
1189 early_cpu_to_node(cpu) != NUMA_NO_NODE) 1189 early_cpu_to_node(cpu) != NUMA_NO_NODE)
1190 set_numa_node(early_cpu_to_node(cpu)); 1190 set_numa_node(early_cpu_to_node(cpu));
1191#endif 1191#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index b8f3653dddb..9a7c90d80bc 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
615 new_l2 = this_leaf.size/1024; 615 new_l2 = this_leaf.size/1024;
616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 616 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
617 index_msb = get_count_order(num_threads_sharing); 617 index_msb = get_count_order(num_threads_sharing);
618 l2_id = c->apicid >> index_msb; 618 l2_id = c->apicid & ~((1 << index_msb) - 1);
619 break; 619 break;
620 case 3: 620 case 3:
621 new_l3 = this_leaf.size/1024; 621 new_l3 = this_leaf.size/1024;
622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 622 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
623 index_msb = get_count_order( 623 index_msb = get_count_order(
624 num_threads_sharing); 624 num_threads_sharing);
625 l3_id = c->apicid >> index_msb; 625 l3_id = c->apicid & ~((1 << index_msb) - 1);
626 break; 626 break;
627 default: 627 default:
628 break; 628 break;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index 5502b289341..36565373af8 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -23,7 +23,7 @@
23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) 23 * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
24 * 24 *
25 * Arrays used to match for this should also be declared using 25 * Arrays used to match for this should also be declared using
26 * MODULE_DEVICE_TABLE(x86_cpu, ...) 26 * MODULE_DEVICE_TABLE(x86cpu, ...)
27 * 27 *
28 * This always matches against the boot cpu, assuming models and features are 28 * This always matches against the boot cpu, assuming models and features are
29 * consistent over all CPUs. 29 * consistent over all CPUs.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 11c9166c333..2afcbd253e1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
583 struct mce m; 583 struct mce m;
584 int i; 584 int i;
585 585
586 percpu_inc(mce_poll_count); 586 this_cpu_inc(mce_poll_count);
587 587
588 mce_gather_info(&m, NULL); 588 mce_gather_info(&m, NULL);
589 589
@@ -1017,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1017 1017
1018 atomic_inc(&mce_entry); 1018 atomic_inc(&mce_entry);
1019 1019
1020 percpu_inc(mce_exception_count); 1020 this_cpu_inc(mce_exception_count);
1021 1021
1022 if (!banks) 1022 if (!banks)
1023 goto out; 1023 goto out;
@@ -1431,6 +1431,43 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1431 */ 1431 */
1432 if (c->x86 == 6 && banks > 0) 1432 if (c->x86 == 6 && banks > 0)
1433 mce_banks[0].ctl = 0; 1433 mce_banks[0].ctl = 0;
1434
1435 /*
1436 * Turn off MC4_MISC thresholding banks on those models since
1437 * they're not supported there.
1438 */
1439 if (c->x86 == 0x15 &&
1440 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1441 int i;
1442 u64 val, hwcr;
1443 bool need_toggle;
1444 u32 msrs[] = {
1445 0x00000413, /* MC4_MISC0 */
1446 0xc0000408, /* MC4_MISC1 */
1447 };
1448
1449 rdmsrl(MSR_K7_HWCR, hwcr);
1450
1451 /* McStatusWrEn has to be set */
1452 need_toggle = !(hwcr & BIT(18));
1453
1454 if (need_toggle)
1455 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
1456
1457 for (i = 0; i < ARRAY_SIZE(msrs); i++) {
1458 rdmsrl(msrs[i], val);
1459
1460 /* CntP bit set? */
1461 if (val & BIT(62)) {
1462 val &= ~BIT(62);
1463 wrmsrl(msrs[i], val);
1464 }
1465 }
1466
1467 /* restore old settings */
1468 if (need_toggle)
1469 wrmsrl(MSR_K7_HWCR, hwcr);
1470 }
1434 } 1471 }
1435 1472
1436 if (c->x86_vendor == X86_VENDOR_INTEL) { 1473 if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 99b57179f91..f4873a64f46 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -51,6 +51,7 @@ struct threshold_block {
51 unsigned int cpu; 51 unsigned int cpu;
52 u32 address; 52 u32 address;
53 u16 interrupt_enable; 53 u16 interrupt_enable;
54 bool interrupt_capable;
54 u16 threshold_limit; 55 u16 threshold_limit;
55 struct kobject kobj; 56 struct kobject kobj;
56 struct list_head miscj; 57 struct list_head miscj;
@@ -83,6 +84,21 @@ struct thresh_restart {
83 u16 old_limit; 84 u16 old_limit;
84}; 85};
85 86
87static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
88{
89 /*
90 * bank 4 supports APIC LVT interrupts implicitly since forever.
91 */
92 if (bank == 4)
93 return true;
94
95 /*
96 * IntP: interrupt present; if this bit is set, the thresholding
97 * bank can generate APIC LVT interrupts
98 */
99 return msr_high_bits & BIT(28);
100}
101
86static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) 102static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
87{ 103{
88 int msr = (hi & MASK_LVTOFF_HI) >> 20; 104 int msr = (hi & MASK_LVTOFF_HI) >> 20;
@@ -104,8 +120,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
104 return 1; 120 return 1;
105}; 121};
106 122
107/* must be called with correct cpu affinity */ 123/*
108/* Called via smp_call_function_single() */ 124 * Called via smp_call_function_single(), must be called with correct
125 * cpu affinity.
126 */
109static void threshold_restart_bank(void *_tr) 127static void threshold_restart_bank(void *_tr)
110{ 128{
111 struct thresh_restart *tr = _tr; 129 struct thresh_restart *tr = _tr;
@@ -128,6 +146,12 @@ static void threshold_restart_bank(void *_tr)
128 (new_count & THRESHOLD_MAX); 146 (new_count & THRESHOLD_MAX);
129 } 147 }
130 148
149 /* clear IntType */
150 hi &= ~MASK_INT_TYPE_HI;
151
152 if (!tr->b->interrupt_capable)
153 goto done;
154
131 if (tr->set_lvt_off) { 155 if (tr->set_lvt_off) {
132 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { 156 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
133 /* set new lvt offset */ 157 /* set new lvt offset */
@@ -136,9 +160,10 @@ static void threshold_restart_bank(void *_tr)
136 } 160 }
137 } 161 }
138 162
139 tr->b->interrupt_enable ? 163 if (tr->b->interrupt_enable)
140 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 164 hi |= INT_TYPE_APIC;
141 (hi &= ~MASK_INT_TYPE_HI); 165
166 done:
142 167
143 hi |= MASK_COUNT_EN_HI; 168 hi |= MASK_COUNT_EN_HI;
144 wrmsr(tr->b->address, lo, hi); 169 wrmsr(tr->b->address, lo, hi);
@@ -202,14 +227,17 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
202 if (shared_bank[bank] && c->cpu_core_id) 227 if (shared_bank[bank] && c->cpu_core_id)
203 break; 228 break;
204 229
205 offset = setup_APIC_mce(offset,
206 (high & MASK_LVTOFF_HI) >> 20);
207
208 memset(&b, 0, sizeof(b)); 230 memset(&b, 0, sizeof(b));
209 b.cpu = cpu; 231 b.cpu = cpu;
210 b.bank = bank; 232 b.bank = bank;
211 b.block = block; 233 b.block = block;
212 b.address = address; 234 b.address = address;
235 b.interrupt_capable = lvt_interrupt_supported(bank, high);
236
237 if (b.interrupt_capable) {
238 int new = (high & MASK_LVTOFF_HI) >> 20;
239 offset = setup_APIC_mce(offset, new);
240 }
213 241
214 mce_threshold_block_init(&b, offset); 242 mce_threshold_block_init(&b, offset);
215 mce_threshold_vector = amd_threshold_interrupt; 243 mce_threshold_vector = amd_threshold_interrupt;
@@ -309,6 +337,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
309 struct thresh_restart tr; 337 struct thresh_restart tr;
310 unsigned long new; 338 unsigned long new;
311 339
340 if (!b->interrupt_capable)
341 return -EINVAL;
342
312 if (strict_strtoul(buf, 0, &new) < 0) 343 if (strict_strtoul(buf, 0, &new) < 0)
313 return -EINVAL; 344 return -EINVAL;
314 345
@@ -390,10 +421,10 @@ RW_ATTR(threshold_limit);
390RW_ATTR(error_count); 421RW_ATTR(error_count);
391 422
392static struct attribute *default_attrs[] = { 423static struct attribute *default_attrs[] = {
393 &interrupt_enable.attr,
394 &threshold_limit.attr, 424 &threshold_limit.attr,
395 &error_count.attr, 425 &error_count.attr,
396 NULL 426 NULL, /* possibly interrupt_enable if supported, see below */
427 NULL,
397}; 428};
398 429
399#define to_block(k) container_of(k, struct threshold_block, kobj) 430#define to_block(k) container_of(k, struct threshold_block, kobj)
@@ -467,8 +498,14 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
467 b->cpu = cpu; 498 b->cpu = cpu;
468 b->address = address; 499 b->address = address;
469 b->interrupt_enable = 0; 500 b->interrupt_enable = 0;
501 b->interrupt_capable = lvt_interrupt_supported(bank, high);
470 b->threshold_limit = THRESHOLD_MAX; 502 b->threshold_limit = THRESHOLD_MAX;
471 503
504 if (b->interrupt_capable)
505 threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
506 else
507 threshold_ktype.default_attrs[2] = NULL;
508
472 INIT_LIST_HEAD(&b->miscj); 509 INIT_LIST_HEAD(&b->miscj);
473 510
474 if (per_cpu(threshold_banks, cpu)[bank]->blocks) { 511 if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bb8e03407e1..e049d6da018 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event)
484 484
485 /* mark unused */ 485 /* mark unused */
486 event->hw.extra_reg.idx = EXTRA_REG_NONE; 486 event->hw.extra_reg.idx = EXTRA_REG_NONE;
487
488 /* mark not used */
489 event->hw.extra_reg.idx = EXTRA_REG_NONE;
490 event->hw.branch_reg.idx = EXTRA_REG_NONE; 487 event->hw.branch_reg.idx = EXTRA_REG_NONE;
491 488
492 return x86_pmu.hw_config(event); 489 return x86_pmu.hw_config(event);
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1186 int idx, handled = 0; 1183 int idx, handled = 0;
1187 u64 val; 1184 u64 val;
1188 1185
1189 perf_sample_data_init(&data, 0);
1190
1191 cpuc = &__get_cpu_var(cpu_hw_events); 1186 cpuc = &__get_cpu_var(cpu_hw_events);
1192 1187
1193 /* 1188 /*
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
1222 * event overflow 1217 * event overflow
1223 */ 1218 */
1224 handled++; 1219 handled++;
1225 data.period = event->hw.last_period; 1220 perf_sample_data_init(&data, 0, event->hw.last_period);
1226 1221
1227 if (!x86_perf_event_set_period(event)) 1222 if (!x86_perf_event_set_period(event))
1228 continue; 1223 continue;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 95e7fe1c5f0..11a4eb9131d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event)
134 134
135static int amd_pmu_hw_config(struct perf_event *event) 135static int amd_pmu_hw_config(struct perf_event *event)
136{ 136{
137 int ret = x86_pmu_hw_config(event); 137 int ret;
138 138
139 /* pass precise event sampling to ibs: */
140 if (event->attr.precise_ip && get_ibs_caps())
141 return -ENOENT;
142
143 ret = x86_pmu_hw_config(event);
139 if (ret) 144 if (ret)
140 return ret; 145 return ret;
141 146
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
205 * when we come here 210 * when we come here
206 */ 211 */
207 for (i = 0; i < x86_pmu.num_counters; i++) { 212 for (i = 0; i < x86_pmu.num_counters; i++) {
208 if (nb->owners[i] == event) { 213 if (cmpxchg(nb->owners + i, event, NULL) == event)
209 cmpxchg(nb->owners+i, event, NULL);
210 break; 214 break;
211 }
212 } 215 }
213} 216}
214 217
@@ -493,6 +496,7 @@ static __initconst const struct x86_pmu amd_pmu = {
493 * 0x023 DE PERF_CTL[2:0] 496 * 0x023 DE PERF_CTL[2:0]
494 * 0x02D LS PERF_CTL[3] 497 * 0x02D LS PERF_CTL[3]
495 * 0x02E LS PERF_CTL[3,0] 498 * 0x02E LS PERF_CTL[3,0]
499 * 0x031 LS PERF_CTL[2:0] (**)
496 * 0x043 CU PERF_CTL[2:0] 500 * 0x043 CU PERF_CTL[2:0]
497 * 0x045 CU PERF_CTL[2:0] 501 * 0x045 CU PERF_CTL[2:0]
498 * 0x046 CU PERF_CTL[2:0] 502 * 0x046 CU PERF_CTL[2:0]
@@ -506,10 +510,12 @@ static __initconst const struct x86_pmu amd_pmu = {
506 * 0x0DD LS PERF_CTL[5:0] 510 * 0x0DD LS PERF_CTL[5:0]
507 * 0x0DE LS PERF_CTL[5:0] 511 * 0x0DE LS PERF_CTL[5:0]
508 * 0x0DF LS PERF_CTL[5:0] 512 * 0x0DF LS PERF_CTL[5:0]
513 * 0x1C0 EX PERF_CTL[5:3]
509 * 0x1D6 EX PERF_CTL[5:0] 514 * 0x1D6 EX PERF_CTL[5:0]
510 * 0x1D8 EX PERF_CTL[5:0] 515 * 0x1D8 EX PERF_CTL[5:0]
511 * 516 *
512 * (*) depending on the umask all FPU counters may be used 517 * (*) depending on the umask all FPU counters may be used
518 * (**) only one unitmask enabled at a time
513 */ 519 */
514 520
515static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 521static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
@@ -559,6 +565,12 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
559 return &amd_f15_PMC3; 565 return &amd_f15_PMC3;
560 case 0x02E: 566 case 0x02E:
561 return &amd_f15_PMC30; 567 return &amd_f15_PMC30;
568 case 0x031:
569 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1)
570 return &amd_f15_PMC20;
571 return &emptyconstraint;
572 case 0x1C0:
573 return &amd_f15_PMC53;
562 default: 574 default:
563 return &amd_f15_PMC50; 575 return &amd_f15_PMC50;
564 } 576 }
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 3b8a2d30d14..da9bcdcd985 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -9,6 +9,7 @@
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/ptrace.h>
12 13
13#include <asm/apic.h> 14#include <asm/apic.h>
14 15
@@ -16,36 +17,591 @@ static u32 ibs_caps;
16 17
17#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 18#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18 19
19static struct pmu perf_ibs; 20#include <linux/kprobes.h>
21#include <linux/hardirq.h>
22
23#include <asm/nmi.h>
24
25#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
27
28enum ibs_states {
29 IBS_ENABLED = 0,
30 IBS_STARTED = 1,
31 IBS_STOPPING = 2,
32
33 IBS_MAX_STATES,
34};
35
36struct cpu_perf_ibs {
37 struct perf_event *event;
38 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
39};
40
41struct perf_ibs {
42 struct pmu pmu;
43 unsigned int msr;
44 u64 config_mask;
45 u64 cnt_mask;
46 u64 enable_mask;
47 u64 valid_mask;
48 u64 max_period;
49 unsigned long offset_mask[1];
50 int offset_max;
51 struct cpu_perf_ibs __percpu *pcpu;
52 u64 (*get_count)(u64 config);
53};
54
55struct perf_ibs_data {
56 u32 size;
57 union {
58 u32 data[0]; /* data buffer starts here */
59 u32 caps;
60 };
61 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
62};
63
64static int
65perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
66{
67 s64 left = local64_read(&hwc->period_left);
68 s64 period = hwc->sample_period;
69 int overflow = 0;
70
71 /*
72 * If we are way outside a reasonable range then just skip forward:
73 */
74 if (unlikely(left <= -period)) {
75 left = period;
76 local64_set(&hwc->period_left, left);
77 hwc->last_period = period;
78 overflow = 1;
79 }
80
81 if (unlikely(left < (s64)min)) {
82 left += period;
83 local64_set(&hwc->period_left, left);
84 hwc->last_period = period;
85 overflow = 1;
86 }
87
88 /*
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
93 */
94 if (left > max) {
95 left -= max;
96 if (left > max)
97 left = max;
98 else if (left < min)
99 left = min;
100 }
101
102 *hw_period = (u64)left;
103
104 return overflow;
105}
106
107static int
108perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
109{
110 struct hw_perf_event *hwc = &event->hw;
111 int shift = 64 - width;
112 u64 prev_raw_count;
113 u64 delta;
114
115 /*
116 * Careful: an NMI might modify the previous event value.
117 *
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
121 */
122 prev_raw_count = local64_read(&hwc->prev_count);
123 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
124 new_raw_count) != prev_raw_count)
125 return 0;
126
127 /*
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
131 *
132 * Careful, not all hw sign-extends above the physical width
133 * of the count.
134 */
135 delta = (new_raw_count << shift) - (prev_raw_count << shift);
136 delta >>= shift;
137
138 local64_add(delta, &event->count);
139 local64_sub(delta, &hwc->period_left);
140
141 return 1;
142}
143
144static struct perf_ibs perf_ibs_fetch;
145static struct perf_ibs perf_ibs_op;
146
147static struct perf_ibs *get_ibs_pmu(int type)
148{
149 if (perf_ibs_fetch.pmu.type == type)
150 return &perf_ibs_fetch;
151 if (perf_ibs_op.pmu.type == type)
152 return &perf_ibs_op;
153 return NULL;
154}
155
156/*
157 * Use IBS for precise event sampling:
158 *
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
162 *
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
165 * mode.
166 *
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
171 *
172 */
173static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
174{
175 switch (event->attr.precise_ip) {
176 case 0:
177 return -ENOENT;
178 case 1:
179 case 2:
180 break;
181 default:
182 return -EOPNOTSUPP;
183 }
184
185 switch (event->attr.type) {
186 case PERF_TYPE_HARDWARE:
187 switch (event->attr.config) {
188 case PERF_COUNT_HW_CPU_CYCLES:
189 *config = 0;
190 return 0;
191 }
192 break;
193 case PERF_TYPE_RAW:
194 switch (event->attr.config) {
195 case 0x0076:
196 *config = 0;
197 return 0;
198 case 0x00C1:
199 *config = IBS_OP_CNT_CTL;
200 return 0;
201 }
202 break;
203 default:
204 return -ENOENT;
205 }
206
207 return -EOPNOTSUPP;
208}
20 209
21static int perf_ibs_init(struct perf_event *event) 210static int perf_ibs_init(struct perf_event *event)
22{ 211{
23 if (perf_ibs.type != event->attr.type) 212 struct hw_perf_event *hwc = &event->hw;
213 struct perf_ibs *perf_ibs;
214 u64 max_cnt, config;
215 int ret;
216
217 perf_ibs = get_ibs_pmu(event->attr.type);
218 if (perf_ibs) {
219 config = event->attr.config;
220 } else {
221 perf_ibs = &perf_ibs_op;
222 ret = perf_ibs_precise_event(event, &config);
223 if (ret)
224 return ret;
225 }
226
227 if (event->pmu != &perf_ibs->pmu)
24 return -ENOENT; 228 return -ENOENT;
229
230 if (config & ~perf_ibs->config_mask)
231 return -EINVAL;
232
233 if (hwc->sample_period) {
234 if (config & perf_ibs->cnt_mask)
235 /* raw max_cnt may not be set */
236 return -EINVAL;
237 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
238 /*
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
242 */
243 return -EINVAL;
244 hwc->sample_period &= ~0x0FULL;
245 if (!hwc->sample_period)
246 hwc->sample_period = 0x10;
247 } else {
248 max_cnt = config & perf_ibs->cnt_mask;
249 config &= ~perf_ibs->cnt_mask;
250 event->attr.sample_period = max_cnt << 4;
251 hwc->sample_period = event->attr.sample_period;
252 }
253
254 if (!hwc->sample_period)
255 return -EINVAL;
256
257 /*
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
260 */
261 hwc->last_period = hwc->sample_period;
262 local64_set(&hwc->period_left, hwc->sample_period);
263
264 hwc->config_base = perf_ibs->msr;
265 hwc->config = config;
266
25 return 0; 267 return 0;
26} 268}
27 269
270static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
271 struct hw_perf_event *hwc, u64 *period)
272{
273 int overflow;
274
275 /* ignore lower 4 bits in min count: */
276 overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
277 local64_set(&hwc->prev_count, 0);
278
279 return overflow;
280}
281
282static u64 get_ibs_fetch_count(u64 config)
283{
284 return (config & IBS_FETCH_CNT) >> 12;
285}
286
287static u64 get_ibs_op_count(u64 config)
288{
289 u64 count = 0;
290
291 if (config & IBS_OP_VAL)
292 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
293
294 if (ibs_caps & IBS_CAPS_RDWROPCNT)
295 count += (config & IBS_OP_CUR_CNT) >> 32;
296
297 return count;
298}
299
300static void
301perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
302 u64 *config)
303{
304 u64 count = perf_ibs->get_count(*config);
305
306 /*
307 * Set width to 64 since we do not overflow on max width but
308 * instead on max count. In perf_ibs_set_period() we clear
309 * prev count manually on overflow.
310 */
311 while (!perf_event_try_update(event, count, 64)) {
312 rdmsrl(event->hw.config_base, *config);
313 count = perf_ibs->get_count(*config);
314 }
315}
316
317static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
318 struct hw_perf_event *hwc, u64 config)
319{
320 wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
321}
322
323/*
324 * Erratum #420 Instruction-Based Sampling Engine May Generate
325 * Interrupt that Cannot Be Cleared:
326 *
327 * Must clear counter mask first, then clear the enable bit. See
328 * Revision Guide for AMD Family 10h Processors, Publication #41322.
329 */
330static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
331 struct hw_perf_event *hwc, u64 config)
332{
333 config &= ~perf_ibs->cnt_mask;
334 wrmsrl(hwc->config_base, config);
335 config &= ~perf_ibs->enable_mask;
336 wrmsrl(hwc->config_base, config);
337}
338
339/*
340 * We cannot restore the ibs pmu state, so we always needs to update
341 * the event while stopping it and then reset the state when starting
342 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
343 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
344 */
345static void perf_ibs_start(struct perf_event *event, int flags)
346{
347 struct hw_perf_event *hwc = &event->hw;
348 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
349 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
350 u64 period;
351
352 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
353 return;
354
355 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
356 hwc->state = 0;
357
358 perf_ibs_set_period(perf_ibs, hwc, &period);
359 set_bit(IBS_STARTED, pcpu->state);
360 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
361
362 perf_event_update_userpage(event);
363}
364
365static void perf_ibs_stop(struct perf_event *event, int flags)
366{
367 struct hw_perf_event *hwc = &event->hw;
368 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370 u64 config;
371 int stopping;
372
373 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
374
375 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
376 return;
377
378 rdmsrl(hwc->config_base, config);
379
380 if (stopping) {
381 set_bit(IBS_STOPPING, pcpu->state);
382 perf_ibs_disable_event(perf_ibs, hwc, config);
383 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
384 hwc->state |= PERF_HES_STOPPED;
385 }
386
387 if (hwc->state & PERF_HES_UPTODATE)
388 return;
389
390 /*
391 * Clear valid bit to not count rollovers on update, rollovers
392 * are only updated in the irq handler.
393 */
394 config &= ~perf_ibs->valid_mask;
395
396 perf_ibs_event_update(perf_ibs, event, &config);
397 hwc->state |= PERF_HES_UPTODATE;
398}
399
28static int perf_ibs_add(struct perf_event *event, int flags) 400static int perf_ibs_add(struct perf_event *event, int flags)
29{ 401{
402 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
403 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
404
405 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
406 return -ENOSPC;
407
408 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
409
410 pcpu->event = event;
411
412 if (flags & PERF_EF_START)
413 perf_ibs_start(event, PERF_EF_RELOAD);
414
30 return 0; 415 return 0;
31} 416}
32 417
33static void perf_ibs_del(struct perf_event *event, int flags) 418static void perf_ibs_del(struct perf_event *event, int flags)
34{ 419{
420 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
421 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
422
423 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
424 return;
425
426 perf_ibs_stop(event, PERF_EF_UPDATE);
427
428 pcpu->event = NULL;
429
430 perf_event_update_userpage(event);
35} 431}
36 432
37static struct pmu perf_ibs = { 433static void perf_ibs_read(struct perf_event *event) { }
38 .event_init= perf_ibs_init, 434
39 .add= perf_ibs_add, 435static struct perf_ibs perf_ibs_fetch = {
40 .del= perf_ibs_del, 436 .pmu = {
437 .task_ctx_nr = perf_invalid_context,
438
439 .event_init = perf_ibs_init,
440 .add = perf_ibs_add,
441 .del = perf_ibs_del,
442 .start = perf_ibs_start,
443 .stop = perf_ibs_stop,
444 .read = perf_ibs_read,
445 },
446 .msr = MSR_AMD64_IBSFETCHCTL,
447 .config_mask = IBS_FETCH_CONFIG_MASK,
448 .cnt_mask = IBS_FETCH_MAX_CNT,
449 .enable_mask = IBS_FETCH_ENABLE,
450 .valid_mask = IBS_FETCH_VAL,
451 .max_period = IBS_FETCH_MAX_CNT << 4,
452 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
453 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
454
455 .get_count = get_ibs_fetch_count,
41}; 456};
42 457
458static struct perf_ibs perf_ibs_op = {
459 .pmu = {
460 .task_ctx_nr = perf_invalid_context,
461
462 .event_init = perf_ibs_init,
463 .add = perf_ibs_add,
464 .del = perf_ibs_del,
465 .start = perf_ibs_start,
466 .stop = perf_ibs_stop,
467 .read = perf_ibs_read,
468 },
469 .msr = MSR_AMD64_IBSOPCTL,
470 .config_mask = IBS_OP_CONFIG_MASK,
471 .cnt_mask = IBS_OP_MAX_CNT,
472 .enable_mask = IBS_OP_ENABLE,
473 .valid_mask = IBS_OP_VAL,
474 .max_period = IBS_OP_MAX_CNT << 4,
475 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
476 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
477
478 .get_count = get_ibs_op_count,
479};
480
481static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
482{
483 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
484 struct perf_event *event = pcpu->event;
485 struct hw_perf_event *hwc = &event->hw;
486 struct perf_sample_data data;
487 struct perf_raw_record raw;
488 struct pt_regs regs;
489 struct perf_ibs_data ibs_data;
490 int offset, size, check_rip, offset_max, throttle = 0;
491 unsigned int msr;
492 u64 *buf, *config, period;
493
494 if (!test_bit(IBS_STARTED, pcpu->state)) {
495 /*
496 * Catch spurious interrupts after stopping IBS: After
497 * disabling IBS there could be still incomming NMIs
498 * with samples that even have the valid bit cleared.
499 * Mark all this NMIs as handled.
500 */
501 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
502 }
503
504 msr = hwc->config_base;
505 buf = ibs_data.regs;
506 rdmsrl(msr, *buf);
507 if (!(*buf++ & perf_ibs->valid_mask))
508 return 0;
509
510 config = &ibs_data.regs[0];
511 perf_ibs_event_update(perf_ibs, event, config);
512 perf_sample_data_init(&data, 0, hwc->last_period);
513 if (!perf_ibs_set_period(perf_ibs, hwc, &period))
514 goto out; /* no sw counter overflow */
515
516 ibs_data.caps = ibs_caps;
517 size = 1;
518 offset = 1;
519 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
520 if (event->attr.sample_type & PERF_SAMPLE_RAW)
521 offset_max = perf_ibs->offset_max;
522 else if (check_rip)
523 offset_max = 2;
524 else
525 offset_max = 1;
526 do {
527 rdmsrl(msr + offset, *buf++);
528 size++;
529 offset = find_next_bit(perf_ibs->offset_mask,
530 perf_ibs->offset_max,
531 offset + 1);
532 } while (offset < offset_max);
533 ibs_data.size = sizeof(u64) * size;
534
535 regs = *iregs;
536 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
537 regs.flags &= ~PERF_EFLAGS_EXACT;
538 } else {
539 instruction_pointer_set(&regs, ibs_data.regs[1]);
540 regs.flags |= PERF_EFLAGS_EXACT;
541 }
542
543 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
544 raw.size = sizeof(u32) + ibs_data.size;
545 raw.data = ibs_data.data;
546 data.raw = &raw;
547 }
548
549 throttle = perf_event_overflow(event, &data, &regs);
550out:
551 if (throttle)
552 perf_ibs_disable_event(perf_ibs, hwc, *config);
553 else
554 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
555
556 perf_event_update_userpage(event);
557
558 return 1;
559}
560
561static int __kprobes
562perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
563{
564 int handled = 0;
565
566 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
567 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
568
569 if (handled)
570 inc_irq_stat(apic_perf_irqs);
571
572 return handled;
573}
574
575static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
576{
577 struct cpu_perf_ibs __percpu *pcpu;
578 int ret;
579
580 pcpu = alloc_percpu(struct cpu_perf_ibs);
581 if (!pcpu)
582 return -ENOMEM;
583
584 perf_ibs->pcpu = pcpu;
585
586 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
587 if (ret) {
588 perf_ibs->pcpu = NULL;
589 free_percpu(pcpu);
590 }
591
592 return ret;
593}
594
43static __init int perf_event_ibs_init(void) 595static __init int perf_event_ibs_init(void)
44{ 596{
45 if (!ibs_caps) 597 if (!ibs_caps)
46 return -ENODEV; /* ibs not supported by the cpu */ 598 return -ENODEV; /* ibs not supported by the cpu */
47 599
48 perf_pmu_register(&perf_ibs, "ibs", -1); 600 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
601 if (ibs_caps & IBS_CAPS_OPCNT)
602 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
603 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
604 register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
49 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); 605 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
50 606
51 return 0; 607 return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 26b3e2fef10..166546ec6ae 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1027 u64 status; 1027 u64 status;
1028 int handled; 1028 int handled;
1029 1029
1030 perf_sample_data_init(&data, 0);
1031
1032 cpuc = &__get_cpu_var(cpu_hw_events); 1030 cpuc = &__get_cpu_var(cpu_hw_events);
1033 1031
1034 /* 1032 /*
@@ -1082,7 +1080,7 @@ again:
1082 if (!intel_pmu_save_and_restart(event)) 1080 if (!intel_pmu_save_and_restart(event))
1083 continue; 1081 continue;
1084 1082
1085 data.period = event->hw.last_period; 1083 perf_sample_data_init(&data, 0, event->hw.last_period);
1086 1084
1087 if (has_branch_stack(event)) 1085 if (has_branch_stack(event))
1088 data.br_stack = &cpuc->lbr_stack; 1086 data.br_stack = &cpuc->lbr_stack;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f64df19e7d..5a3edc27f6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void)
316 316
317 ds->bts_index = ds->bts_buffer_base; 317 ds->bts_index = ds->bts_buffer_base;
318 318
319 perf_sample_data_init(&data, 0); 319 perf_sample_data_init(&data, 0, event->hw.last_period);
320 data.period = event->hw.last_period;
321 regs.ip = 0; 320 regs.ip = 0;
322 321
323 /* 322 /*
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
564 if (!intel_pmu_save_and_restart(event)) 563 if (!intel_pmu_save_and_restart(event))
565 return; 564 return;
566 565
567 perf_sample_data_init(&data, 0); 566 perf_sample_data_init(&data, 0, event->hw.last_period);
568 data.period = event->hw.last_period;
569 567
570 /* 568 /*
571 * We use the interrupt regs as a base because the PEBS record 569 * We use the interrupt regs as a base because the PEBS record
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index a2dfacfd710..47124a73dd7 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1005 int idx, handled = 0; 1005 int idx, handled = 0;
1006 u64 val; 1006 u64 val;
1007 1007
1008 perf_sample_data_init(&data, 0);
1009
1010 cpuc = &__get_cpu_var(cpu_hw_events); 1008 cpuc = &__get_cpu_var(cpu_hw_events);
1011 1009
1012 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1010 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
1034 handled += overflow; 1032 handled += overflow;
1035 1033
1036 /* event overflow for sure */ 1034 /* event overflow for sure */
1037 data.period = event->hw.last_period; 1035 perf_sample_data_init(&data, 0, hwc->last_period);
1038 1036
1039 if (!x86_perf_event_set_period(event)) 1037 if (!x86_perf_event_set_period(event))
1040 continue; 1038 continue;
1039
1040
1041 if (perf_event_overflow(event, &data, regs)) 1041 if (perf_event_overflow(event, &data, regs))
1042 x86_pmu_stop(event, 0); 1042 x86_pmu_stop(event, 0);
1043 } 1043 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1b81839b6c8..571246d81ed 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) 271 current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
272 return 1; 272 return 1;
273 273
274 show_registers(regs); 274 show_regs(regs);
275#ifdef CONFIG_X86_32 275#ifdef CONFIG_X86_32
276 if (user_mode_vm(regs)) { 276 if (user_mode_vm(regs)) {
277 sp = regs->sp; 277 sp = regs->sp;
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err)
311 311
312static int __init kstack_setup(char *s) 312static int __init kstack_setup(char *s)
313{ 313{
314 ssize_t ret;
315 unsigned long val;
316
314 if (!s) 317 if (!s)
315 return -EINVAL; 318 return -EINVAL;
316 kstack_depth_to_print = simple_strtoul(s, NULL, 0); 319
320 ret = kstrtoul(s, 0, &val);
321 if (ret)
322 return ret;
323 kstack_depth_to_print = val;
317 return 0; 324 return 0;
318} 325}
319early_param("kstack", kstack_setup); 326early_param("kstack", kstack_setup);
320 327
321static int __init code_bytes_setup(char *s) 328static int __init code_bytes_setup(char *s)
322{ 329{
323 code_bytes = simple_strtoul(s, NULL, 0); 330 ssize_t ret;
331 unsigned long val;
332
333 if (!s)
334 return -EINVAL;
335
336 ret = kstrtoul(s, 0, &val);
337 if (ret)
338 return ret;
339
340 code_bytes = val;
324 if (code_bytes > 8192) 341 if (code_bytes > 8192)
325 code_bytes = 8192; 342 code_bytes = 8192;
326 343
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 88ec9129271..e0b1d783daa 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
82} 82}
83 83
84 84
85void show_registers(struct pt_regs *regs) 85void show_regs(struct pt_regs *regs)
86{ 86{
87 int i; 87 int i;
88 88
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 17107bd6e1f..791b76122aa 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
245 show_trace_log_lvl(task, regs, sp, bp, log_lvl); 245 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
246} 246}
247 247
248void show_registers(struct pt_regs *regs) 248void show_regs(struct pt_regs *regs)
249{ 249{
250 int i; 250 int i;
251 unsigned long sp; 251 unsigned long sp;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 7b784f4ef1e..01ccf9b7147 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -56,6 +56,7 @@
56#include <asm/irq_vectors.h> 56#include <asm/irq_vectors.h>
57#include <asm/cpufeature.h> 57#include <asm/cpufeature.h>
58#include <asm/alternative-asm.h> 58#include <asm/alternative-asm.h>
59#include <asm/asm.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
61#include <linux/elf-em.h> 62#include <linux/elf-em.h>
@@ -151,10 +152,8 @@
151.pushsection .fixup, "ax" 152.pushsection .fixup, "ax"
15299: movl $0, (%esp) 15399: movl $0, (%esp)
153 jmp 98b 154 jmp 98b
154.section __ex_table, "a"
155 .align 4
156 .long 98b, 99b
157.popsection 155.popsection
156 _ASM_EXTABLE(98b,99b)
158.endm 157.endm
159 158
160.macro PTGS_TO_GS 159.macro PTGS_TO_GS
@@ -164,10 +163,8 @@
164.pushsection .fixup, "ax" 163.pushsection .fixup, "ax"
16599: movl $0, PT_GS(%esp) 16499: movl $0, PT_GS(%esp)
166 jmp 98b 165 jmp 98b
167.section __ex_table, "a"
168 .align 4
169 .long 98b, 99b
170.popsection 166.popsection
167 _ASM_EXTABLE(98b,99b)
171.endm 168.endm
172 169
173.macro GS_TO_REG reg 170.macro GS_TO_REG reg
@@ -249,12 +246,10 @@
249 jmp 2b 246 jmp 2b
2506: movl $0, (%esp) 2476: movl $0, (%esp)
251 jmp 3b 248 jmp 3b
252.section __ex_table, "a"
253 .align 4
254 .long 1b, 4b
255 .long 2b, 5b
256 .long 3b, 6b
257.popsection 249.popsection
250 _ASM_EXTABLE(1b,4b)
251 _ASM_EXTABLE(2b,5b)
252 _ASM_EXTABLE(3b,6b)
258 POP_GS_EX 253 POP_GS_EX
259.endm 254.endm
260 255
@@ -415,10 +410,7 @@ sysenter_past_esp:
415 jae syscall_fault 410 jae syscall_fault
4161: movl (%ebp),%ebp 4111: movl (%ebp),%ebp
417 movl %ebp,PT_EBP(%esp) 412 movl %ebp,PT_EBP(%esp)
418.section __ex_table,"a" 413 _ASM_EXTABLE(1b,syscall_fault)
419 .align 4
420 .long 1b,syscall_fault
421.previous
422 414
423 GET_THREAD_INFO(%ebp) 415 GET_THREAD_INFO(%ebp)
424 416
@@ -485,10 +477,8 @@ sysexit_audit:
485.pushsection .fixup,"ax" 477.pushsection .fixup,"ax"
4862: movl $0,PT_FS(%esp) 4782: movl $0,PT_FS(%esp)
487 jmp 1b 479 jmp 1b
488.section __ex_table,"a"
489 .align 4
490 .long 1b,2b
491.popsection 480.popsection
481 _ASM_EXTABLE(1b,2b)
492 PTGS_TO_GS_EX 482 PTGS_TO_GS_EX
493ENDPROC(ia32_sysenter_target) 483ENDPROC(ia32_sysenter_target)
494 484
@@ -543,10 +533,7 @@ ENTRY(iret_exc)
543 pushl $do_iret_error 533 pushl $do_iret_error
544 jmp error_code 534 jmp error_code
545.previous 535.previous
546.section __ex_table,"a" 536 _ASM_EXTABLE(irq_return,iret_exc)
547 .align 4
548 .long irq_return,iret_exc
549.previous
550 537
551 CFI_RESTORE_STATE 538 CFI_RESTORE_STATE
552ldt_ss: 539ldt_ss:
@@ -901,10 +888,7 @@ END(device_not_available)
901#ifdef CONFIG_PARAVIRT 888#ifdef CONFIG_PARAVIRT
902ENTRY(native_iret) 889ENTRY(native_iret)
903 iret 890 iret
904.section __ex_table,"a" 891 _ASM_EXTABLE(native_iret, iret_exc)
905 .align 4
906 .long native_iret, iret_exc
907.previous
908END(native_iret) 892END(native_iret)
909 893
910ENTRY(native_irq_enable_sysexit) 894ENTRY(native_irq_enable_sysexit)
@@ -1093,13 +1077,10 @@ ENTRY(xen_failsafe_callback)
1093 movl %eax,16(%esp) 1077 movl %eax,16(%esp)
1094 jmp 4b 1078 jmp 4b
1095.previous 1079.previous
1096.section __ex_table,"a" 1080 _ASM_EXTABLE(1b,6b)
1097 .align 4 1081 _ASM_EXTABLE(2b,7b)
1098 .long 1b,6b 1082 _ASM_EXTABLE(3b,8b)
1099 .long 2b,7b 1083 _ASM_EXTABLE(4b,9b)
1100 .long 3b,8b
1101 .long 4b,9b
1102.previous
1103ENDPROC(xen_failsafe_callback) 1084ENDPROC(xen_failsafe_callback)
1104 1085
1105BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, 1086BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK,
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cdc79b5cfcd..320852d0202 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
55#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <asm/asm.h>
58#include <linux/err.h> 59#include <linux/err.h>
59 60
60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 61/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -900,18 +901,12 @@ restore_args:
900 901
901irq_return: 902irq_return:
902 INTERRUPT_RETURN 903 INTERRUPT_RETURN
903 904 _ASM_EXTABLE(irq_return, bad_iret)
904 .section __ex_table, "a"
905 .quad irq_return, bad_iret
906 .previous
907 905
908#ifdef CONFIG_PARAVIRT 906#ifdef CONFIG_PARAVIRT
909ENTRY(native_iret) 907ENTRY(native_iret)
910 iretq 908 iretq
911 909 _ASM_EXTABLE(native_iret, bad_iret)
912 .section __ex_table,"a"
913 .quad native_iret, bad_iret
914 .previous
915#endif 910#endif
916 911
917 .section .fixup,"ax" 912 .section .fixup,"ax"
@@ -1181,10 +1176,7 @@ gs_change:
1181 CFI_ENDPROC 1176 CFI_ENDPROC
1182END(native_load_gs_index) 1177END(native_load_gs_index)
1183 1178
1184 .section __ex_table,"a" 1179 _ASM_EXTABLE(gs_change,bad_gs)
1185 .align 8
1186 .quad gs_change,bad_gs
1187 .previous
1188 .section .fixup,"ax" 1180 .section .fixup,"ax"
1189 /* running with kernelgs */ 1181 /* running with kernelgs */
1190bad_gs: 1182bad_gs:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c9a281f272f..32ff36596ab 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,40 +24,21 @@
24#include <trace/syscall.h> 24#include <trace/syscall.h>
25 25
26#include <asm/cacheflush.h> 26#include <asm/cacheflush.h>
27#include <asm/kprobes.h>
27#include <asm/ftrace.h> 28#include <asm/ftrace.h>
28#include <asm/nops.h> 29#include <asm/nops.h>
29#include <asm/nmi.h>
30
31 30
32#ifdef CONFIG_DYNAMIC_FTRACE 31#ifdef CONFIG_DYNAMIC_FTRACE
33 32
34/*
35 * modifying_code is set to notify NMIs that they need to use
36 * memory barriers when entering or exiting. But we don't want
37 * to burden NMIs with unnecessary memory barriers when code
38 * modification is not being done (which is most of the time).
39 *
40 * A mutex is already held when ftrace_arch_code_modify_prepare
41 * and post_process are called. No locks need to be taken here.
42 *
43 * Stop machine will make sure currently running NMIs are done
44 * and new NMIs will see the updated variable before we need
45 * to worry about NMIs doing memory barriers.
46 */
47static int modifying_code __read_mostly;
48static DEFINE_PER_CPU(int, save_modifying_code);
49
50int ftrace_arch_code_modify_prepare(void) 33int ftrace_arch_code_modify_prepare(void)
51{ 34{
52 set_kernel_text_rw(); 35 set_kernel_text_rw();
53 set_all_modules_text_rw(); 36 set_all_modules_text_rw();
54 modifying_code = 1;
55 return 0; 37 return 0;
56} 38}
57 39
58int ftrace_arch_code_modify_post_process(void) 40int ftrace_arch_code_modify_post_process(void)
59{ 41{
60 modifying_code = 0;
61 set_all_modules_text_ro(); 42 set_all_modules_text_ro();
62 set_kernel_text_ro(); 43 set_kernel_text_ro();
63 return 0; 44 return 0;
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
90 return calc.code; 71 return calc.code;
91} 72}
92 73
93/*
94 * Modifying code must take extra care. On an SMP machine, if
95 * the code being modified is also being executed on another CPU
96 * that CPU will have undefined results and possibly take a GPF.
97 * We use kstop_machine to stop other CPUS from exectuing code.
98 * But this does not stop NMIs from happening. We still need
99 * to protect against that. We separate out the modification of
100 * the code to take care of this.
101 *
102 * Two buffers are added: An IP buffer and a "code" buffer.
103 *
104 * 1) Put the instruction pointer into the IP buffer
105 * and the new code into the "code" buffer.
106 * 2) Wait for any running NMIs to finish and set a flag that says
107 * we are modifying code, it is done in an atomic operation.
108 * 3) Write the code
109 * 4) clear the flag.
110 * 5) Wait for any running NMIs to finish.
111 *
112 * If an NMI is executed, the first thing it does is to call
113 * "ftrace_nmi_enter". This will check if the flag is set to write
114 * and if it is, it will write what is in the IP and "code" buffers.
115 *
116 * The trick is, it does not matter if everyone is writing the same
117 * content to the code location. Also, if a CPU is executing code
118 * it is OK to write to that code location if the contents being written
119 * are the same as what exists.
120 */
121
122#define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */
123static atomic_t nmi_running = ATOMIC_INIT(0);
124static int mod_code_status; /* holds return value of text write */
125static void *mod_code_ip; /* holds the IP to write to */
126static const void *mod_code_newcode; /* holds the text to write to the IP */
127
128static unsigned nmi_wait_count;
129static atomic_t nmi_update_count = ATOMIC_INIT(0);
130
131int ftrace_arch_read_dyn_info(char *buf, int size)
132{
133 int r;
134
135 r = snprintf(buf, size, "%u %u",
136 nmi_wait_count,
137 atomic_read(&nmi_update_count));
138 return r;
139}
140
141static void clear_mod_flag(void)
142{
143 int old = atomic_read(&nmi_running);
144
145 for (;;) {
146 int new = old & ~MOD_CODE_WRITE_FLAG;
147
148 if (old == new)
149 break;
150
151 old = atomic_cmpxchg(&nmi_running, old, new);
152 }
153}
154
155static void ftrace_mod_code(void)
156{
157 /*
158 * Yes, more than one CPU process can be writing to mod_code_status.
159 * (and the code itself)
160 * But if one were to fail, then they all should, and if one were
161 * to succeed, then they all should.
162 */
163 mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
164 MCOUNT_INSN_SIZE);
165
166 /* if we fail, then kill any new writers */
167 if (mod_code_status)
168 clear_mod_flag();
169}
170
171void ftrace_nmi_enter(void)
172{
173 __this_cpu_write(save_modifying_code, modifying_code);
174
175 if (!__this_cpu_read(save_modifying_code))
176 return;
177
178 if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
179 smp_rmb();
180 ftrace_mod_code();
181 atomic_inc(&nmi_update_count);
182 }
183 /* Must have previous changes seen before executions */
184 smp_mb();
185}
186
187void ftrace_nmi_exit(void)
188{
189 if (!__this_cpu_read(save_modifying_code))
190 return;
191
192 /* Finish all executions before clearing nmi_running */
193 smp_mb();
194 atomic_dec(&nmi_running);
195}
196
197static void wait_for_nmi_and_set_mod_flag(void)
198{
199 if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
200 return;
201
202 do {
203 cpu_relax();
204 } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
205
206 nmi_wait_count++;
207}
208
209static void wait_for_nmi(void)
210{
211 if (!atomic_read(&nmi_running))
212 return;
213
214 do {
215 cpu_relax();
216 } while (atomic_read(&nmi_running));
217
218 nmi_wait_count++;
219}
220
221static inline int 74static inline int
222within(unsigned long addr, unsigned long start, unsigned long end) 75within(unsigned long addr, unsigned long start, unsigned long end)
223{ 76{
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code)
238 if (within(ip, (unsigned long)_text, (unsigned long)_etext)) 91 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
239 ip = (unsigned long)__va(__pa(ip)); 92 ip = (unsigned long)__va(__pa(ip));
240 93
241 mod_code_ip = (void *)ip; 94 return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE);
242 mod_code_newcode = new_code;
243
244 /* The buffers need to be visible before we let NMIs write them */
245 smp_mb();
246
247 wait_for_nmi_and_set_mod_flag();
248
249 /* Make sure all running NMIs have finished before we write the code */
250 smp_mb();
251
252 ftrace_mod_code();
253
254 /* Make sure the write happens before clearing the bit */
255 smp_mb();
256
257 clear_mod_flag();
258 wait_for_nmi();
259
260 return mod_code_status;
261} 95}
262 96
263static const unsigned char *ftrace_nop_replace(void) 97static const unsigned char *ftrace_nop_replace(void)
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
334 return ret; 168 return ret;
335} 169}
336 170
171int modifying_ftrace_code __read_mostly;
172
173/*
174 * A breakpoint was added to the code address we are about to
175 * modify, and this is the handle that will just skip over it.
176 * We are either changing a nop into a trace call, or a trace
177 * call to a nop. While the change is taking place, we treat
178 * it just like it was a nop.
179 */
180int ftrace_int3_handler(struct pt_regs *regs)
181{
182 if (WARN_ON_ONCE(!regs))
183 return 0;
184
185 if (!ftrace_location(regs->ip - 1))
186 return 0;
187
188 regs->ip += MCOUNT_INSN_SIZE - 1;
189
190 return 1;
191}
192
193static int ftrace_write(unsigned long ip, const char *val, int size)
194{
195 /*
196 * On x86_64, kernel text mappings are mapped read-only with
197 * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
198 * of the kernel text mapping to modify the kernel text.
199 *
200 * For 32bit kernels, these mappings are same and we can use
201 * kernel identity mapping to modify code.
202 */
203 if (within(ip, (unsigned long)_text, (unsigned long)_etext))
204 ip = (unsigned long)__va(__pa(ip));
205
206 return probe_kernel_write((void *)ip, val, size);
207}
208
209static int add_break(unsigned long ip, const char *old)
210{
211 unsigned char replaced[MCOUNT_INSN_SIZE];
212 unsigned char brk = BREAKPOINT_INSTRUCTION;
213
214 if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
215 return -EFAULT;
216
217 /* Make sure it is what we expect it to be */
218 if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
219 return -EINVAL;
220
221 if (ftrace_write(ip, &brk, 1))
222 return -EPERM;
223
224 return 0;
225}
226
227static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
228{
229 unsigned const char *old;
230 unsigned long ip = rec->ip;
231
232 old = ftrace_call_replace(ip, addr);
233
234 return add_break(rec->ip, old);
235}
236
237
238static int add_brk_on_nop(struct dyn_ftrace *rec)
239{
240 unsigned const char *old;
241
242 old = ftrace_nop_replace();
243
244 return add_break(rec->ip, old);
245}
246
247static int add_breakpoints(struct dyn_ftrace *rec, int enable)
248{
249 unsigned long ftrace_addr;
250 int ret;
251
252 ret = ftrace_test_record(rec, enable);
253
254 ftrace_addr = (unsigned long)FTRACE_ADDR;
255
256 switch (ret) {
257 case FTRACE_UPDATE_IGNORE:
258 return 0;
259
260 case FTRACE_UPDATE_MAKE_CALL:
261 /* converting nop to call */
262 return add_brk_on_nop(rec);
263
264 case FTRACE_UPDATE_MAKE_NOP:
265 /* converting a call to a nop */
266 return add_brk_on_call(rec, ftrace_addr);
267 }
268 return 0;
269}
270
271/*
272 * On error, we need to remove breakpoints. This needs to
273 * be done caefully. If the address does not currently have a
274 * breakpoint, we know we are done. Otherwise, we look at the
275 * remaining 4 bytes of the instruction. If it matches a nop
276 * we replace the breakpoint with the nop. Otherwise we replace
277 * it with the call instruction.
278 */
279static int remove_breakpoint(struct dyn_ftrace *rec)
280{
281 unsigned char ins[MCOUNT_INSN_SIZE];
282 unsigned char brk = BREAKPOINT_INSTRUCTION;
283 const unsigned char *nop;
284 unsigned long ftrace_addr;
285 unsigned long ip = rec->ip;
286
287 /* If we fail the read, just give up */
288 if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
289 return -EFAULT;
290
291 /* If this does not have a breakpoint, we are done */
292 if (ins[0] != brk)
293 return -1;
294
295 nop = ftrace_nop_replace();
296
297 /*
298 * If the last 4 bytes of the instruction do not match
299 * a nop, then we assume that this is a call to ftrace_addr.
300 */
301 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
302 /*
303 * For extra paranoidism, we check if the breakpoint is on
304 * a call that would actually jump to the ftrace_addr.
305 * If not, don't touch the breakpoint, we make just create
306 * a disaster.
307 */
308 ftrace_addr = (unsigned long)FTRACE_ADDR;
309 nop = ftrace_call_replace(ip, ftrace_addr);
310
311 if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
312 return -EINVAL;
313 }
314
315 return probe_kernel_write((void *)ip, &nop[0], 1);
316}
317
318static int add_update_code(unsigned long ip, unsigned const char *new)
319{
320 /* skip breakpoint */
321 ip++;
322 new++;
323 if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1))
324 return -EPERM;
325 return 0;
326}
327
328static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
329{
330 unsigned long ip = rec->ip;
331 unsigned const char *new;
332
333 new = ftrace_call_replace(ip, addr);
334 return add_update_code(ip, new);
335}
336
337static int add_update_nop(struct dyn_ftrace *rec)
338{
339 unsigned long ip = rec->ip;
340 unsigned const char *new;
341
342 new = ftrace_nop_replace();
343 return add_update_code(ip, new);
344}
345
346static int add_update(struct dyn_ftrace *rec, int enable)
347{
348 unsigned long ftrace_addr;
349 int ret;
350
351 ret = ftrace_test_record(rec, enable);
352
353 ftrace_addr = (unsigned long)FTRACE_ADDR;
354
355 switch (ret) {
356 case FTRACE_UPDATE_IGNORE:
357 return 0;
358
359 case FTRACE_UPDATE_MAKE_CALL:
360 /* converting nop to call */
361 return add_update_call(rec, ftrace_addr);
362
363 case FTRACE_UPDATE_MAKE_NOP:
364 /* converting a call to a nop */
365 return add_update_nop(rec);
366 }
367
368 return 0;
369}
370
371static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
372{
373 unsigned long ip = rec->ip;
374 unsigned const char *new;
375
376 new = ftrace_call_replace(ip, addr);
377
378 if (ftrace_write(ip, new, 1))
379 return -EPERM;
380
381 return 0;
382}
383
384static int finish_update_nop(struct dyn_ftrace *rec)
385{
386 unsigned long ip = rec->ip;
387 unsigned const char *new;
388
389 new = ftrace_nop_replace();
390
391 if (ftrace_write(ip, new, 1))
392 return -EPERM;
393 return 0;
394}
395
396static int finish_update(struct dyn_ftrace *rec, int enable)
397{
398 unsigned long ftrace_addr;
399 int ret;
400
401 ret = ftrace_update_record(rec, enable);
402
403 ftrace_addr = (unsigned long)FTRACE_ADDR;
404
405 switch (ret) {
406 case FTRACE_UPDATE_IGNORE:
407 return 0;
408
409 case FTRACE_UPDATE_MAKE_CALL:
410 /* converting nop to call */
411 return finish_update_call(rec, ftrace_addr);
412
413 case FTRACE_UPDATE_MAKE_NOP:
414 /* converting a call to a nop */
415 return finish_update_nop(rec);
416 }
417
418 return 0;
419}
420
421static void do_sync_core(void *data)
422{
423 sync_core();
424}
425
426static void run_sync(void)
427{
428 int enable_irqs = irqs_disabled();
429
430 /* We may be called with interrupts disbled (on bootup). */
431 if (enable_irqs)
432 local_irq_enable();
433 on_each_cpu(do_sync_core, NULL, 1);
434 if (enable_irqs)
435 local_irq_disable();
436}
437
438void ftrace_replace_code(int enable)
439{
440 struct ftrace_rec_iter *iter;
441 struct dyn_ftrace *rec;
442 const char *report = "adding breakpoints";
443 int count = 0;
444 int ret;
445
446 for_ftrace_rec_iter(iter) {
447 rec = ftrace_rec_iter_record(iter);
448
449 ret = add_breakpoints(rec, enable);
450 if (ret)
451 goto remove_breakpoints;
452 count++;
453 }
454
455 run_sync();
456
457 report = "updating code";
458
459 for_ftrace_rec_iter(iter) {
460 rec = ftrace_rec_iter_record(iter);
461
462 ret = add_update(rec, enable);
463 if (ret)
464 goto remove_breakpoints;
465 }
466
467 run_sync();
468
469 report = "removing breakpoints";
470
471 for_ftrace_rec_iter(iter) {
472 rec = ftrace_rec_iter_record(iter);
473
474 ret = finish_update(rec, enable);
475 if (ret)
476 goto remove_breakpoints;
477 }
478
479 run_sync();
480
481 return;
482
483 remove_breakpoints:
484 ftrace_bug(ret, rec ? rec->ip : 0);
485 printk(KERN_WARNING "Failed on %s (%d):\n", report, count);
486 for_ftrace_rec_iter(iter) {
487 rec = ftrace_rec_iter_record(iter);
488 remove_breakpoint(rec);
489 }
490}
491
492void arch_ftrace_update_code(int command)
493{
494 modifying_ftrace_code++;
495
496 ftrace_modify_all_code(command);
497
498 modifying_ftrace_code--;
499}
500
337int __init ftrace_dyn_arch_init(void *data) 501int __init ftrace_dyn_arch_init(void *data)
338{ 502{
339 /* The return code is retured via data */ 503 /* The return code is retured via data */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ce0be7cd085..463c9797ca6 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -21,6 +21,7 @@
21#include <asm/msr-index.h> 21#include <asm/msr-index.h>
22#include <asm/cpufeature.h> 22#include <asm/cpufeature.h>
23#include <asm/percpu.h> 23#include <asm/percpu.h>
24#include <asm/nops.h>
24 25
25/* Physical address */ 26/* Physical address */
26#define pa(X) ((X) - __PAGE_OFFSET) 27#define pa(X) ((X) - __PAGE_OFFSET)
@@ -363,28 +364,23 @@ default_entry:
363 pushl $0 364 pushl $0
364 popfl 365 popfl
365 366
366#ifdef CONFIG_SMP
367 cmpb $0, ready
368 jnz checkCPUtype
369#endif /* CONFIG_SMP */
370
371/* 367/*
372 * start system 32-bit setup. We need to re-do some of the things done 368 * start system 32-bit setup. We need to re-do some of the things done
373 * in 16-bit mode for the "real" operations. 369 * in 16-bit mode for the "real" operations.
374 */ 370 */
375 call setup_idt 371 movl setup_once_ref,%eax
376 372 andl %eax,%eax
377checkCPUtype: 373 jz 1f # Did we do this already?
378 374 call *%eax
379 movl $-1,X86_CPUID # -1 for no CPUID initially 3751:
380 376
381/* check if it is 486 or 386. */ 377/* check if it is 486 or 386. */
382/* 378/*
383 * XXX - this does a lot of unnecessary setup. Alignment checks don't 379 * XXX - this does a lot of unnecessary setup. Alignment checks don't
384 * apply at our cpl of 0 and the stack ought to be aligned already, and 380 * apply at our cpl of 0 and the stack ought to be aligned already, and
385 * we don't need to preserve eflags. 381 * we don't need to preserve eflags.
386 */ 382 */
387 383 movl $-1,X86_CPUID # -1 for no CPUID initially
388 movb $3,X86 # at least 386 384 movb $3,X86 # at least 386
389 pushfl # push EFLAGS 385 pushfl # push EFLAGS
390 popl %eax # get EFLAGS 386 popl %eax # get EFLAGS
@@ -450,21 +446,6 @@ is386: movl $2,%ecx # set MP
450 movl $(__KERNEL_PERCPU), %eax 446 movl $(__KERNEL_PERCPU), %eax
451 movl %eax,%fs # set this cpu's percpu 447 movl %eax,%fs # set this cpu's percpu
452 448
453#ifdef CONFIG_CC_STACKPROTECTOR
454 /*
455 * The linker can't handle this by relocation. Manually set
456 * base address in stack canary segment descriptor.
457 */
458 cmpb $0,ready
459 jne 1f
460 movl $gdt_page,%eax
461 movl $stack_canary,%ecx
462 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
463 shrl $16, %ecx
464 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
465 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4661:
467#endif
468 movl $(__KERNEL_STACK_CANARY),%eax 449 movl $(__KERNEL_STACK_CANARY),%eax
469 movl %eax,%gs 450 movl %eax,%gs
470 451
@@ -473,7 +454,6 @@ is386: movl $2,%ecx # set MP
473 454
474 cld # gcc2 wants the direction flag cleared at all times 455 cld # gcc2 wants the direction flag cleared at all times
475 pushl $0 # fake return address for unwinder 456 pushl $0 # fake return address for unwinder
476 movb $1, ready
477 jmp *(initial_code) 457 jmp *(initial_code)
478 458
479/* 459/*
@@ -495,81 +475,122 @@ check_x87:
495 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 475 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
496 ret 476 ret
497 477
478
479#include "verify_cpu.S"
480
498/* 481/*
499 * setup_idt 482 * setup_once
500 * 483 *
501 * sets up a idt with 256 entries pointing to 484 * The setup work we only want to run on the BSP.
502 * ignore_int, interrupt gates. It doesn't actually load
503 * idt - that can be done only after paging has been enabled
504 * and the kernel moved to PAGE_OFFSET. Interrupts
505 * are enabled elsewhere, when we can be relatively
506 * sure everything is ok.
507 * 485 *
508 * Warning: %esi is live across this function. 486 * Warning: %esi is live across this function.
509 */ 487 */
510setup_idt: 488__INIT
511 lea ignore_int,%edx 489setup_once:
512 movl $(__KERNEL_CS << 16),%eax 490 /*
513 movw %dx,%ax /* selector = 0x0010 = cs */ 491 * Set up a idt with 256 entries pointing to ignore_int,
514 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 492 * interrupt gates. It doesn't actually load idt - that needs
493 * to be done on each CPU. Interrupts are enabled elsewhere,
494 * when we can be relatively sure everything is ok.
495 */
515 496
516 lea idt_table,%edi 497 movl $idt_table,%edi
517 mov $256,%ecx 498 movl $early_idt_handlers,%eax
518rp_sidt: 499 movl $NUM_EXCEPTION_VECTORS,%ecx
5001:
519 movl %eax,(%edi) 501 movl %eax,(%edi)
520 movl %edx,4(%edi) 502 movl %eax,4(%edi)
503 /* interrupt gate, dpl=0, present */
504 movl $(0x8E000000 + __KERNEL_CS),2(%edi)
505 addl $9,%eax
521 addl $8,%edi 506 addl $8,%edi
522 dec %ecx 507 loop 1b
523 jne rp_sidt
524 508
525.macro set_early_handler handler,trapno 509 movl $256 - NUM_EXCEPTION_VECTORS,%ecx
526 lea \handler,%edx 510 movl $ignore_int,%edx
527 movl $(__KERNEL_CS << 16),%eax 511 movl $(__KERNEL_CS << 16),%eax
528 movw %dx,%ax 512 movw %dx,%ax /* selector = 0x0010 = cs */
529 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 513 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
530 lea idt_table,%edi 5142:
531 movl %eax,8*\trapno(%edi) 515 movl %eax,(%edi)
532 movl %edx,8*\trapno+4(%edi) 516 movl %edx,4(%edi)
533.endm 517 addl $8,%edi
518 loop 2b
534 519
535 set_early_handler handler=early_divide_err,trapno=0 520#ifdef CONFIG_CC_STACKPROTECTOR
536 set_early_handler handler=early_illegal_opcode,trapno=6 521 /*
537 set_early_handler handler=early_protection_fault,trapno=13 522 * Configure the stack canary. The linker can't handle this by
538 set_early_handler handler=early_page_fault,trapno=14 523 * relocation. Manually set base address in stack canary
524 * segment descriptor.
525 */
526 movl $gdt_page,%eax
527 movl $stack_canary,%ecx
528 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
529 shrl $16, %ecx
530 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
531 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
532#endif
539 533
534 andl $0,setup_once_ref /* Once is enough, thanks */
540 ret 535 ret
541 536
542early_divide_err: 537ENTRY(early_idt_handlers)
543 xor %edx,%edx 538 # 36(%esp) %eflags
544 pushl $0 /* fake errcode */ 539 # 32(%esp) %cs
545 jmp early_fault 540 # 28(%esp) %eip
541 # 24(%rsp) error code
542 i = 0
543 .rept NUM_EXCEPTION_VECTORS
544 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
545 ASM_NOP2
546 .else
547 pushl $0 # Dummy error code, to make stack frame uniform
548 .endif
549 pushl $i # 20(%esp) Vector number
550 jmp early_idt_handler
551 i = i + 1
552 .endr
553ENDPROC(early_idt_handlers)
554
555 /* This is global to keep gas from relaxing the jumps */
556ENTRY(early_idt_handler)
557 cld
558 cmpl $2,%ss:early_recursion_flag
559 je hlt_loop
560 incl %ss:early_recursion_flag
546 561
547early_illegal_opcode: 562 push %eax # 16(%esp)
548 movl $6,%edx 563 push %ecx # 12(%esp)
549 pushl $0 /* fake errcode */ 564 push %edx # 8(%esp)
550 jmp early_fault 565 push %ds # 4(%esp)
566 push %es # 0(%esp)
567 movl $(__KERNEL_DS),%eax
568 movl %eax,%ds
569 movl %eax,%es
551 570
552early_protection_fault: 571 cmpl $(__KERNEL_CS),32(%esp)
553 movl $13,%edx 572 jne 10f
554 jmp early_fault
555 573
556early_page_fault: 574 leal 28(%esp),%eax # Pointer to %eip
557 movl $14,%edx 575 call early_fixup_exception
558 jmp early_fault 576 andl %eax,%eax
577 jnz ex_entry /* found an exception entry */
559 578
560early_fault: 57910:
561 cld
562#ifdef CONFIG_PRINTK 580#ifdef CONFIG_PRINTK
563 pusha 581 xorl %eax,%eax
564 movl $(__KERNEL_DS),%eax 582 movw %ax,2(%esp) /* clean up the segment values on some cpus */
565 movl %eax,%ds 583 movw %ax,6(%esp)
566 movl %eax,%es 584 movw %ax,34(%esp)
567 cmpl $2,early_recursion_flag 585 leal 40(%esp),%eax
568 je hlt_loop 586 pushl %eax /* %esp before the exception */
569 incl early_recursion_flag 587 pushl %ebx
588 pushl %ebp
589 pushl %esi
590 pushl %edi
570 movl %cr2,%eax 591 movl %cr2,%eax
571 pushl %eax 592 pushl %eax
572 pushl %edx /* trapno */ 593 pushl (20+6*4)(%esp) /* trapno */
573 pushl $fault_msg 594 pushl $fault_msg
574 call printk 595 call printk
575#endif 596#endif
@@ -578,6 +599,17 @@ hlt_loop:
578 hlt 599 hlt
579 jmp hlt_loop 600 jmp hlt_loop
580 601
602ex_entry:
603 pop %es
604 pop %ds
605 pop %edx
606 pop %ecx
607 pop %eax
608 addl $8,%esp /* drop vector number and error code */
609 decl %ss:early_recursion_flag
610 iret
611ENDPROC(early_idt_handler)
612
581/* This is the default interrupt "handler" :-) */ 613/* This is the default interrupt "handler" :-) */
582 ALIGN 614 ALIGN
583ignore_int: 615ignore_int:
@@ -611,13 +643,18 @@ ignore_int:
611 popl %eax 643 popl %eax
612#endif 644#endif
613 iret 645 iret
646ENDPROC(ignore_int)
647__INITDATA
648 .align 4
649early_recursion_flag:
650 .long 0
614 651
615#include "verify_cpu.S" 652__REFDATA
616 653 .align 4
617 __REFDATA
618.align 4
619ENTRY(initial_code) 654ENTRY(initial_code)
620 .long i386_start_kernel 655 .long i386_start_kernel
656ENTRY(setup_once_ref)
657 .long setup_once
621 658
622/* 659/*
623 * BSS section 660 * BSS section
@@ -670,22 +707,19 @@ ENTRY(initial_page_table)
670ENTRY(stack_start) 707ENTRY(stack_start)
671 .long init_thread_union+THREAD_SIZE 708 .long init_thread_union+THREAD_SIZE
672 709
673early_recursion_flag: 710__INITRODATA
674 .long 0
675
676ready: .byte 0
677
678int_msg: 711int_msg:
679 .asciz "Unknown interrupt or fault at: %p %p %p\n" 712 .asciz "Unknown interrupt or fault at: %p %p %p\n"
680 713
681fault_msg: 714fault_msg:
682/* fault info: */ 715/* fault info: */
683 .ascii "BUG: Int %d: CR2 %p\n" 716 .ascii "BUG: Int %d: CR2 %p\n"
684/* pusha regs: */ 717/* regs pushed in early_idt_handler: */
685 .ascii " EDI %p ESI %p EBP %p ESP %p\n" 718 .ascii " EDI %p ESI %p EBP %p EBX %p\n"
686 .ascii " EBX %p EDX %p ECX %p EAX %p\n" 719 .ascii " ESP %p ES %p DS %p\n"
720 .ascii " EDX %p ECX %p EAX %p\n"
687/* fault frame: */ 721/* fault frame: */
688 .ascii " err %p EIP %p CS %p flg %p\n" 722 .ascii " vec %p err %p EIP %p CS %p flg %p\n"
689 .ascii "Stack: %p %p %p %p %p %p %p %p\n" 723 .ascii "Stack: %p %p %p %p %p %p %p %p\n"
690 .ascii " %p %p %p %p %p %p %p %p\n" 724 .ascii " %p %p %p %p %p %p %p %p\n"
691 .asciz " %p %p %p %p %p %p %p %p\n" 725 .asciz " %p %p %p %p %p %p %p %p\n"
@@ -699,6 +733,7 @@ fault_msg:
699 * segment size, and 32-bit linear address value: 733 * segment size, and 32-bit linear address value:
700 */ 734 */
701 735
736 .data
702.globl boot_gdt_descr 737.globl boot_gdt_descr
703.globl idt_descr 738.globl idt_descr
704 739
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 40f4eb3766d..7a40f244732 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,12 +19,15 @@
19#include <asm/cache.h> 19#include <asm/cache.h>
20#include <asm/processor-flags.h> 20#include <asm/processor-flags.h>
21#include <asm/percpu.h> 21#include <asm/percpu.h>
22#include <asm/nops.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
25#include <asm/paravirt.h> 26#include <asm/paravirt.h>
27#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
26#else 28#else
27#define GET_CR2_INTO_RCX movq %cr2, %rcx 29#define GET_CR2_INTO(reg) movq %cr2, reg
30#define INTERRUPT_RETURN iretq
28#endif 31#endif
29 32
30/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE 33/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
@@ -270,36 +273,56 @@ bad_address:
270 jmp bad_address 273 jmp bad_address
271 274
272 .section ".init.text","ax" 275 .section ".init.text","ax"
273#ifdef CONFIG_EARLY_PRINTK
274 .globl early_idt_handlers 276 .globl early_idt_handlers
275early_idt_handlers: 277early_idt_handlers:
278 # 104(%rsp) %rflags
279 # 96(%rsp) %cs
280 # 88(%rsp) %rip
281 # 80(%rsp) error code
276 i = 0 282 i = 0
277 .rept NUM_EXCEPTION_VECTORS 283 .rept NUM_EXCEPTION_VECTORS
278 movl $i, %esi 284 .if (EXCEPTION_ERRCODE_MASK >> i) & 1
285 ASM_NOP2
286 .else
287 pushq $0 # Dummy error code, to make stack frame uniform
288 .endif
289 pushq $i # 72(%rsp) Vector number
279 jmp early_idt_handler 290 jmp early_idt_handler
280 i = i + 1 291 i = i + 1
281 .endr 292 .endr
282#endif
283 293
284ENTRY(early_idt_handler) 294ENTRY(early_idt_handler)
285#ifdef CONFIG_EARLY_PRINTK 295 cld
296
286 cmpl $2,early_recursion_flag(%rip) 297 cmpl $2,early_recursion_flag(%rip)
287 jz 1f 298 jz 1f
288 incl early_recursion_flag(%rip) 299 incl early_recursion_flag(%rip)
289 GET_CR2_INTO_RCX 300
290 movq %rcx,%r9 301 pushq %rax # 64(%rsp)
291 xorl %r8d,%r8d # zero for error code 302 pushq %rcx # 56(%rsp)
292 movl %esi,%ecx # get vector number 303 pushq %rdx # 48(%rsp)
293 # Test %ecx against mask of vectors that push error code. 304 pushq %rsi # 40(%rsp)
294 cmpl $31,%ecx 305 pushq %rdi # 32(%rsp)
295 ja 0f 306 pushq %r8 # 24(%rsp)
296 movl $1,%eax 307 pushq %r9 # 16(%rsp)
297 salq %cl,%rax 308 pushq %r10 # 8(%rsp)
298 testl $0x27d00,%eax 309 pushq %r11 # 0(%rsp)
299 je 0f 310
300 popq %r8 # get error code 311 cmpl $__KERNEL_CS,96(%rsp)
3010: movq 0(%rsp),%rcx # get ip 312 jne 10f
302 movq 8(%rsp),%rdx # get cs 313
314 leaq 88(%rsp),%rdi # Pointer to %rip
315 call early_fixup_exception
316 andl %eax,%eax
317 jnz 20f # Found an exception entry
318
31910:
320#ifdef CONFIG_EARLY_PRINTK
321 GET_CR2_INTO(%r9) # can clobber any volatile register if pv
322 movl 80(%rsp),%r8d # error code
323 movl 72(%rsp),%esi # vector number
324 movl 96(%rsp),%edx # %cs
325 movq 88(%rsp),%rcx # %rip
303 xorl %eax,%eax 326 xorl %eax,%eax
304 leaq early_idt_msg(%rip),%rdi 327 leaq early_idt_msg(%rip),%rdi
305 call early_printk 328 call early_printk
@@ -308,17 +331,32 @@ ENTRY(early_idt_handler)
308 call dump_stack 331 call dump_stack
309#ifdef CONFIG_KALLSYMS 332#ifdef CONFIG_KALLSYMS
310 leaq early_idt_ripmsg(%rip),%rdi 333 leaq early_idt_ripmsg(%rip),%rdi
311 movq 0(%rsp),%rsi # get rip again 334 movq 40(%rsp),%rsi # %rip again
312 call __print_symbol 335 call __print_symbol
313#endif 336#endif
314#endif /* EARLY_PRINTK */ 337#endif /* EARLY_PRINTK */
3151: hlt 3381: hlt
316 jmp 1b 339 jmp 1b
317 340
318#ifdef CONFIG_EARLY_PRINTK 34120: # Exception table entry found
342 popq %r11
343 popq %r10
344 popq %r9
345 popq %r8
346 popq %rdi
347 popq %rsi
348 popq %rdx
349 popq %rcx
350 popq %rax
351 addq $16,%rsp # drop vector number and error code
352 decl early_recursion_flag(%rip)
353 INTERRUPT_RETURN
354
355 .balign 4
319early_recursion_flag: 356early_recursion_flag:
320 .long 0 357 .long 0
321 358
359#ifdef CONFIG_EARLY_PRINTK
322early_idt_msg: 360early_idt_msg:
323 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" 361 .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
324early_idt_ripmsg: 362early_idt_ripmsg:
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 2d6e6498c17..f250431fb50 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void)
88 __thread_clear_has_fpu(me); 88 __thread_clear_has_fpu(me);
89 /* We do 'stts()' in kernel_fpu_end() */ 89 /* We do 'stts()' in kernel_fpu_end() */
90 } else { 90 } else {
91 percpu_write(fpu_owner_task, NULL); 91 this_cpu_write(fpu_owner_task, NULL);
92 clts(); 92 clts();
93 } 93 }
94} 94}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e213fc8408d..e2f751efb7b 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
1037 "current sp %p does not match saved sp %p\n", 1037 "current sp %p does not match saved sp %p\n",
1038 stack_addr(regs), kcb->jprobe_saved_sp); 1038 stack_addr(regs), kcb->jprobe_saved_sp);
1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp); 1039 printk(KERN_ERR "Saved registers for jprobe %p\n", jp);
1040 show_registers(saved_regs); 1040 show_regs(saved_regs);
1041 printk(KERN_ERR "Current registers\n"); 1041 printk(KERN_ERR "Current registers\n");
1042 show_registers(regs); 1042 show_regs(regs);
1043 BUG(); 1043 BUG();
1044 } 1044 }
1045 *regs = kcb->jprobe_saved_regs; 1045 *regs = kcb->jprobe_saved_regs;
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
deleted file mode 100644
index 7eb1e2b9782..00000000000
--- a/arch/x86/kernel/mca_32.c
+++ /dev/null
@@ -1,476 +0,0 @@
1/*
2 * Written by Martin Kolinek, February 1996
3 *
4 * Changes:
5 *
6 * Chris Beauregard July 28th, 1996
7 * - Fixed up integrated SCSI detection
8 *
9 * Chris Beauregard August 3rd, 1996
10 * - Made mca_info local
11 * - Made integrated registers accessible through standard function calls
12 * - Added name field
13 * - More sanity checking
14 *
15 * Chris Beauregard August 9th, 1996
16 * - Rewrote /proc/mca
17 *
18 * Chris Beauregard January 7th, 1997
19 * - Added basic NMI-processing
20 * - Added more information to mca_info structure
21 *
22 * David Weinehall October 12th, 1998
23 * - Made a lot of cleaning up in the source
24 * - Added use of save_flags / restore_flags
25 * - Added the 'driver_loaded' flag in MCA_adapter
26 * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter
27 *
28 * David Weinehall March 24th, 1999
29 * - Fixed the output of 'Driver Installed' in /proc/mca/pos
30 * - Made the Integrated Video & SCSI show up even if they have id 0000
31 *
32 * Alexander Viro November 9th, 1999
33 * - Switched to regular procfs methods
34 *
35 * Alfred Arnold & David Weinehall August 23rd, 2000
36 * - Added support for Planar POS-registers
37 */
38
39#include <linux/module.h>
40#include <linux/types.h>
41#include <linux/errno.h>
42#include <linux/kernel.h>
43#include <linux/mca.h>
44#include <linux/kprobes.h>
45#include <linux/slab.h>
46#include <asm/io.h>
47#include <linux/proc_fs.h>
48#include <linux/mman.h>
49#include <linux/mm.h>
50#include <linux/pagemap.h>
51#include <linux/ioport.h>
52#include <asm/uaccess.h>
53#include <linux/init.h>
54
55static unsigned char which_scsi;
56
57int MCA_bus;
58EXPORT_SYMBOL(MCA_bus);
59
60/*
61 * Motherboard register spinlock. Untested on SMP at the moment, but
62 * are there any MCA SMP boxes?
63 *
64 * Yes - Alan
65 */
66static DEFINE_SPINLOCK(mca_lock);
67
68/* Build the status info for the adapter */
69
70static void mca_configure_adapter_status(struct mca_device *mca_dev)
71{
72 mca_dev->status = MCA_ADAPTER_NONE;
73
74 mca_dev->pos_id = mca_dev->pos[0]
75 + (mca_dev->pos[1] << 8);
76
77 if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
78
79 /*
80 * id = 0x0000 usually indicates hardware failure,
81 * however, ZP Gu (zpg@castle.net> reports that his 9556
82 * has 0x0000 as id and everything still works. There
83 * also seem to be an adapter with id = 0x0000; the
84 * NCR Parallel Bus Memory Card. Until this is confirmed,
85 * however, this code will stay.
86 */
87
88 mca_dev->status = MCA_ADAPTER_ERROR;
89
90 return;
91 } else if (mca_dev->pos_id != 0xffff) {
92
93 /*
94 * 0xffff usually indicates that there's no adapter,
95 * however, some integrated adapters may have 0xffff as
96 * their id and still be valid. Examples are on-board
97 * VGA of the 55sx, the integrated SCSI of the 56 & 57,
98 * and possibly also the 95 ULTIMEDIA.
99 */
100
101 mca_dev->status = MCA_ADAPTER_NORMAL;
102 }
103
104 if ((mca_dev->pos_id == 0xffff ||
105 mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
106 int j;
107
108 for (j = 2; j < 8; j++) {
109 if (mca_dev->pos[j] != 0xff) {
110 mca_dev->status = MCA_ADAPTER_NORMAL;
111 break;
112 }
113 }
114 }
115
116 if (!(mca_dev->pos[2] & MCA_ENABLED)) {
117
118 /* enabled bit is in POS 2 */
119
120 mca_dev->status = MCA_ADAPTER_DISABLED;
121 }
122} /* mca_configure_adapter_status */
123
124/*--------------------------------------------------------------------*/
125
126static struct resource mca_standard_resources[] = {
127 { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" },
128 { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" },
129 { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" },
130 { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" },
131 { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" },
132 { .start = 0x96, .end = 0x97, .name = "POS (MCA)" },
133 { .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
134};
135
136#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
137
138/*
139 * mca_read_and_store_pos - read the POS registers into a memory buffer
140 * @pos: a char pointer to 8 bytes, contains the POS register value on
141 * successful return
142 *
143 * Returns 1 if a card actually exists (i.e. the pos isn't
144 * all 0xff) or 0 otherwise
145 */
146static int mca_read_and_store_pos(unsigned char *pos)
147{
148 int j;
149 int found = 0;
150
151 for (j = 0; j < 8; j++) {
152 pos[j] = inb_p(MCA_POS_REG(j));
153 if (pos[j] != 0xff) {
154 /* 0xff all across means no device. 0x00 means
155 * something's broken, but a device is
156 * probably there. However, if you get 0x00
157 * from a motherboard register it won't matter
158 * what we find. For the record, on the
159 * 57SLC, the integrated SCSI adapter has
160 * 0xffff for the adapter ID, but nonzero for
161 * other registers. */
162
163 found = 1;
164 }
165 }
166 return found;
167}
168
169static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
170{
171 unsigned char byte;
172 unsigned long flags;
173
174 if (reg < 0 || reg >= 8)
175 return 0;
176
177 spin_lock_irqsave(&mca_lock, flags);
178 if (mca_dev->pos_register) {
179 /* Disable adapter setup, enable motherboard setup */
180
181 outb_p(0, MCA_ADAPTER_SETUP_REG);
182 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
183
184 byte = inb_p(MCA_POS_REG(reg));
185 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
186 } else {
187
188 /* Make sure motherboard setup is off */
189
190 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
191
192 /* Read the appropriate register */
193
194 outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG);
195 byte = inb_p(MCA_POS_REG(reg));
196 outb_p(0, MCA_ADAPTER_SETUP_REG);
197 }
198 spin_unlock_irqrestore(&mca_lock, flags);
199
200 mca_dev->pos[reg] = byte;
201
202 return byte;
203}
204
205static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
206 unsigned char byte)
207{
208 unsigned long flags;
209
210 if (reg < 0 || reg >= 8)
211 return;
212
213 spin_lock_irqsave(&mca_lock, flags);
214
215 /* Make sure motherboard setup is off */
216
217 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
218
219 /* Read in the appropriate register */
220
221 outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG);
222 outb_p(byte, MCA_POS_REG(reg));
223 outb_p(0, MCA_ADAPTER_SETUP_REG);
224
225 spin_unlock_irqrestore(&mca_lock, flags);
226
227 /* Update the global register list, while we have the byte */
228
229 mca_dev->pos[reg] = byte;
230
231}
232
233/* for the primary MCA bus, we have identity transforms */
234static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq)
235{
236 return irq;
237}
238
239static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port)
240{
241 return port;
242}
243
244static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem)
245{
246 return mem;
247}
248
249
250static int __init mca_init(void)
251{
252 unsigned int i, j;
253 struct mca_device *mca_dev;
254 unsigned char pos[8];
255 short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
256 struct mca_bus *bus;
257
258 /*
259 * WARNING: Be careful when making changes here. Putting an adapter
260 * and the motherboard simultaneously into setup mode may result in
261 * damage to chips (according to The Indispensable PC Hardware Book
262 * by Hans-Peter Messmer). Also, we disable system interrupts (so
263 * that we are not disturbed in the middle of this).
264 */
265
266 /* Make sure the MCA bus is present */
267
268 if (mca_system_init()) {
269 printk(KERN_ERR "MCA bus system initialisation failed\n");
270 return -ENODEV;
271 }
272
273 if (!MCA_bus)
274 return -ENODEV;
275
276 printk(KERN_INFO "Micro Channel bus detected.\n");
277
278 /* All MCA systems have at least a primary bus */
279 bus = mca_attach_bus(MCA_PRIMARY_BUS);
280 if (!bus)
281 goto out_nomem;
282 bus->default_dma_mask = 0xffffffffLL;
283 bus->f.mca_write_pos = mca_pc_write_pos;
284 bus->f.mca_read_pos = mca_pc_read_pos;
285 bus->f.mca_transform_irq = mca_dummy_transform_irq;
286 bus->f.mca_transform_ioport = mca_dummy_transform_ioport;
287 bus->f.mca_transform_memory = mca_dummy_transform_memory;
288
289 /* get the motherboard device */
290 mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL);
291 if (unlikely(!mca_dev))
292 goto out_nomem;
293
294 /*
295 * We do not expect many MCA interrupts during initialization,
296 * but let us be safe:
297 */
298 spin_lock_irq(&mca_lock);
299
300 /* Make sure adapter setup is off */
301
302 outb_p(0, MCA_ADAPTER_SETUP_REG);
303
304 /* Read motherboard POS registers */
305
306 mca_dev->pos_register = 0x7f;
307 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
308 mca_dev->name[0] = 0;
309 mca_read_and_store_pos(mca_dev->pos);
310 mca_configure_adapter_status(mca_dev);
311 /* fake POS and slot for a motherboard */
312 mca_dev->pos_id = MCA_MOTHERBOARD_POS;
313 mca_dev->slot = MCA_MOTHERBOARD;
314 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
315
316 mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
317 if (unlikely(!mca_dev))
318 goto out_unlock_nomem;
319
320 /* Put motherboard into video setup mode, read integrated video
321 * POS registers, and turn motherboard setup off.
322 */
323
324 mca_dev->pos_register = 0xdf;
325 outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG);
326 mca_dev->name[0] = 0;
327 mca_read_and_store_pos(mca_dev->pos);
328 mca_configure_adapter_status(mca_dev);
329 /* fake POS and slot for the integrated video */
330 mca_dev->pos_id = MCA_INTEGVIDEO_POS;
331 mca_dev->slot = MCA_INTEGVIDEO;
332 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
333
334 /*
335 * Put motherboard into scsi setup mode, read integrated scsi
336 * POS registers, and turn motherboard setup off.
337 *
338 * It seems there are two possible SCSI registers. Martin says that
339 * for the 56,57, 0xf7 is the one, but fails on the 76.
340 * Alfredo (apena@vnet.ibm.com) says
341 * 0xfd works on his machine. We'll try both of them. I figure it's
342 * a good bet that only one could be valid at a time. This could
343 * screw up though if one is used for something else on the other
344 * machine.
345 */
346
347 for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
348 outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
349 if (mca_read_and_store_pos(pos))
350 break;
351 }
352 if (which_scsi) {
353 /* found a scsi card */
354 mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
355 if (unlikely(!mca_dev))
356 goto out_unlock_nomem;
357
358 for (j = 0; j < 8; j++)
359 mca_dev->pos[j] = pos[j];
360
361 mca_configure_adapter_status(mca_dev);
362 /* fake POS and slot for integrated SCSI controller */
363 mca_dev->pos_id = MCA_INTEGSCSI_POS;
364 mca_dev->slot = MCA_INTEGSCSI;
365 mca_dev->pos_register = which_scsi;
366 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
367 }
368
369 /* Turn off motherboard setup */
370
371 outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
372
373 /*
374 * Now loop over MCA slots: put each adapter into setup mode, and
375 * read its POS registers. Then put adapter setup off.
376 */
377
378 for (i = 0; i < MCA_MAX_SLOT_NR; i++) {
379 outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
380 if (!mca_read_and_store_pos(pos))
381 continue;
382
383 mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
384 if (unlikely(!mca_dev))
385 goto out_unlock_nomem;
386
387 for (j = 0; j < 8; j++)
388 mca_dev->pos[j] = pos[j];
389
390 mca_dev->driver_loaded = 0;
391 mca_dev->slot = i;
392 mca_dev->pos_register = 0;
393 mca_configure_adapter_status(mca_dev);
394 mca_register_device(MCA_PRIMARY_BUS, mca_dev);
395 }
396 outb_p(0, MCA_ADAPTER_SETUP_REG);
397
398 /* Enable interrupts and return memory start */
399 spin_unlock_irq(&mca_lock);
400
401 for (i = 0; i < MCA_STANDARD_RESOURCES; i++)
402 request_resource(&ioport_resource, mca_standard_resources + i);
403
404 mca_do_proc_init();
405
406 return 0;
407
408 out_unlock_nomem:
409 spin_unlock_irq(&mca_lock);
410 out_nomem:
411 printk(KERN_EMERG "Failed memory allocation in MCA setup!\n");
412 return -ENOMEM;
413}
414
415subsys_initcall(mca_init);
416
417/*--------------------------------------------------------------------*/
418
419static __kprobes void
420mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
421{
422 int slot = mca_dev->slot;
423
424 if (slot == MCA_INTEGSCSI) {
425 printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
426 mca_dev->name);
427 } else if (slot == MCA_INTEGVIDEO) {
428 printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
429 mca_dev->name);
430 } else if (slot == MCA_MOTHERBOARD) {
431 printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
432 mca_dev->name);
433 }
434
435 /* More info available in POS 6 and 7? */
436
437 if (check_flag) {
438 unsigned char pos6, pos7;
439
440 pos6 = mca_device_read_pos(mca_dev, 6);
441 pos7 = mca_device_read_pos(mca_dev, 7);
442
443 printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7);
444 }
445
446} /* mca_handle_nmi_slot */
447
448/*--------------------------------------------------------------------*/
449
450static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
451{
452 struct mca_device *mca_dev = to_mca_device(dev);
453 unsigned char pos5;
454
455 pos5 = mca_device_read_pos(mca_dev, 5);
456
457 if (!(pos5 & 0x80)) {
458 /*
459 * Bit 7 of POS 5 is reset when this adapter has a hardware
460 * error. Bit 7 it reset if there's error information
461 * available in POS 6 and 7.
462 */
463 mca_handle_nmi_device(mca_dev, !(pos5 & 0x40));
464 return 1;
465 }
466 return 0;
467}
468
469void __kprobes mca_handle_nmi(void)
470{
471 /*
472 * First try - scan the various adapters and see if a specific
473 * adapter was responsible for the error.
474 */
475 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
476}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index c9bda6d6035..fbdfc691718 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -299,12 +299,11 @@ static ssize_t reload_store(struct device *dev,
299{ 299{
300 unsigned long val; 300 unsigned long val;
301 int cpu = dev->id; 301 int cpu = dev->id;
302 int ret = 0; 302 ssize_t ret = 0;
303 char *end;
304 303
305 val = simple_strtoul(buf, &end, 0); 304 ret = kstrtoul(buf, 0, &val);
306 if (end == buf) 305 if (ret)
307 return -EINVAL; 306 return ret;
308 307
309 if (val == 1) { 308 if (val == 1) {
310 get_online_cpus(); 309 get_online_cpus();
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index ca470e4c92d..b02d4dd6b8a 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -97,7 +97,7 @@ static void __init MP_bus_info(struct mpc_bus *m)
97 97
98 set_bit(m->busid, mp_bus_not_pci); 98 set_bit(m->busid, mp_bus_not_pci);
99 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 99 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
100#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 100#ifdef CONFIG_EISA
101 mp_bus_id_to_type[m->busid] = MP_BUS_ISA; 101 mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
102#endif 102#endif
103 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 103 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -105,12 +105,10 @@ static void __init MP_bus_info(struct mpc_bus *m)
105 x86_init.mpparse.mpc_oem_pci_bus(m); 105 x86_init.mpparse.mpc_oem_pci_bus(m);
106 106
107 clear_bit(m->busid, mp_bus_not_pci); 107 clear_bit(m->busid, mp_bus_not_pci);
108#if defined(CONFIG_EISA) || defined(CONFIG_MCA) 108#ifdef CONFIG_EISA
109 mp_bus_id_to_type[m->busid] = MP_BUS_PCI; 109 mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
110 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 110 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
111 mp_bus_id_to_type[m->busid] = MP_BUS_EISA; 111 mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
112 } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
113 mp_bus_id_to_type[m->busid] = MP_BUS_MCA;
114#endif 112#endif
115 } else 113 } else
116 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); 114 printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
@@ -368,9 +366,6 @@ static void __init construct_ioapic_table(int mpc_default_type)
368 case 3: 366 case 3:
369 memcpy(bus.bustype, "EISA ", 6); 367 memcpy(bus.bustype, "EISA ", 6);
370 break; 368 break;
371 case 4:
372 case 7:
373 memcpy(bus.bustype, "MCA ", 6);
374 } 369 }
375 MP_bus_info(&bus); 370 MP_bus_info(&bus);
376 if (mpc_default_type > 4) { 371 if (mpc_default_type > 4) {
@@ -623,7 +618,7 @@ void __init default_find_smp_config(void)
623 return; 618 return;
624 /* 619 /*
625 * If it is an SMP machine we should know now, unless the 620 * If it is an SMP machine we should know now, unless the
626 * configuration is in an EISA/MCA bus machine with an 621 * configuration is in an EISA bus machine with an
627 * extended bios data area. 622 * extended bios data area.
628 * 623 *
629 * there is a real-mode segmented pointer pointing to the 624 * there is a real-mode segmented pointer pointing to the
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 585be4bd71a..90875279ef3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -19,8 +19,6 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/export.h> 20#include <linux/export.h>
21 21
22#include <linux/mca.h>
23
24#if defined(CONFIG_EDAC) 22#if defined(CONFIG_EDAC)
25#include <linux/edac.h> 23#include <linux/edac.h>
26#endif 24#endif
@@ -84,7 +82,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
84 82
85#define nmi_to_desc(type) (&nmi_desc[type]) 83#define nmi_to_desc(type) (&nmi_desc[type])
86 84
87static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 85static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
88{ 86{
89 struct nmi_desc *desc = nmi_to_desc(type); 87 struct nmi_desc *desc = nmi_to_desc(type);
90 struct nmiaction *a; 88 struct nmiaction *a;
@@ -166,7 +164,7 @@ void unregister_nmi_handler(unsigned int type, const char *name)
166} 164}
167EXPORT_SYMBOL_GPL(unregister_nmi_handler); 165EXPORT_SYMBOL_GPL(unregister_nmi_handler);
168 166
169static notrace __kprobes void 167static __kprobes void
170pci_serr_error(unsigned char reason, struct pt_regs *regs) 168pci_serr_error(unsigned char reason, struct pt_regs *regs)
171{ 169{
172 /* check to see if anyone registered against these types of errors */ 170 /* check to see if anyone registered against these types of errors */
@@ -197,7 +195,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
197 outb(reason, NMI_REASON_PORT); 195 outb(reason, NMI_REASON_PORT);
198} 196}
199 197
200static notrace __kprobes void 198static __kprobes void
201io_check_error(unsigned char reason, struct pt_regs *regs) 199io_check_error(unsigned char reason, struct pt_regs *regs)
202{ 200{
203 unsigned long i; 201 unsigned long i;
@@ -209,7 +207,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
209 pr_emerg( 207 pr_emerg(
210 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 208 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
211 reason, smp_processor_id()); 209 reason, smp_processor_id());
212 show_registers(regs); 210 show_regs(regs);
213 211
214 if (panic_on_io_nmi) 212 if (panic_on_io_nmi)
215 panic("NMI IOCK error: Not continuing"); 213 panic("NMI IOCK error: Not continuing");
@@ -228,7 +226,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
228 outb(reason, NMI_REASON_PORT); 226 outb(reason, NMI_REASON_PORT);
229} 227}
230 228
231static notrace __kprobes void 229static __kprobes void
232unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 230unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
233{ 231{
234 int handled; 232 int handled;
@@ -247,16 +245,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
247 245
248 __this_cpu_add(nmi_stats.unknown, 1); 246 __this_cpu_add(nmi_stats.unknown, 1);
249 247
250#ifdef CONFIG_MCA
251 /*
252 * Might actually be able to figure out what the guilty party
253 * is:
254 */
255 if (MCA_bus) {
256 mca_handle_nmi();
257 return;
258 }
259#endif
260 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 248 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
261 reason, smp_processor_id()); 249 reason, smp_processor_id());
262 250
@@ -270,7 +258,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
270static DEFINE_PER_CPU(bool, swallow_nmi); 258static DEFINE_PER_CPU(bool, swallow_nmi);
271static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 259static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
272 260
273static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 261static __kprobes void default_do_nmi(struct pt_regs *regs)
274{ 262{
275 unsigned char reason = 0; 263 unsigned char reason = 0;
276 int handled; 264 int handled;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 2c39dcd510f..e31bf8d5c4d 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -13,6 +13,7 @@
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/delay.h> 14#include <linux/delay.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/percpu.h>
16 17
17#include <asm/apic.h> 18#include <asm/apic.h>
18#include <asm/nmi.h> 19#include <asm/nmi.h>
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected)
117 unexpected_testcase_failures++; 118 unexpected_testcase_failures++;
118 119
119 if (nmi_fail == FAILURE) 120 if (nmi_fail == FAILURE)
120 printk("FAILED |"); 121 printk(KERN_CONT "FAILED |");
121 else if (nmi_fail == TIMEOUT) 122 else if (nmi_fail == TIMEOUT)
122 printk("TIMEOUT|"); 123 printk(KERN_CONT "TIMEOUT|");
123 else 124 else
124 printk("ERROR |"); 125 printk(KERN_CONT "ERROR |");
125 dump_stack(); 126 dump_stack();
126 } else { 127 } else {
127 testcase_successes++; 128 testcase_successes++;
128 printk(" ok |"); 129 printk(KERN_CONT " ok |");
129 } 130 }
130 testcase_total++; 131 testcase_total++;
131 132
@@ -150,10 +151,10 @@ void __init nmi_selftest(void)
150 151
151 print_testname("remote IPI"); 152 print_testname("remote IPI");
152 dotest(remote_ipi, SUCCESS); 153 dotest(remote_ipi, SUCCESS);
153 printk("\n"); 154 printk(KERN_CONT "\n");
154 print_testname("local IPI"); 155 print_testname("local IPI");
155 dotest(local_ipi, SUCCESS); 156 dotest(local_ipi, SUCCESS);
156 printk("\n"); 157 printk(KERN_CONT "\n");
157 158
158 cleanup_nmi_testsuite(); 159 cleanup_nmi_testsuite();
159 160
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ab137605e69..9ce885996fd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
241 241
242static inline void enter_lazy(enum paravirt_lazy_mode mode) 242static inline void enter_lazy(enum paravirt_lazy_mode mode)
243{ 243{
244 BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); 244 BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
245 245
246 percpu_write(paravirt_lazy_mode, mode); 246 this_cpu_write(paravirt_lazy_mode, mode);
247} 247}
248 248
249static void leave_lazy(enum paravirt_lazy_mode mode) 249static void leave_lazy(enum paravirt_lazy_mode mode)
250{ 250{
251 BUG_ON(percpu_read(paravirt_lazy_mode) != mode); 251 BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
252 252
253 percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); 253 this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
254} 254}
255 255
256void paravirt_enter_lazy_mmu(void) 256void paravirt_enter_lazy_mmu(void)
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev)
267{ 267{
268 BUG_ON(preemptible()); 268 BUG_ON(preemptible());
269 269
270 if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { 270 if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
271 arch_leave_lazy_mmu_mode(); 271 arch_leave_lazy_mmu_mode();
272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); 272 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
273 } 273 }
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
289 if (in_interrupt()) 289 if (in_interrupt())
290 return PARAVIRT_LAZY_NONE; 290 return PARAVIRT_LAZY_NONE;
291 291
292 return percpu_read(paravirt_lazy_mode); 292 return this_cpu_read(paravirt_lazy_mode);
293} 293}
294 294
295void arch_flush_lazy_mmu_mode(void) 295void arch_flush_lazy_mmu_mode(void)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index d0b2fb9ccbb..b72838bae64 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1480,8 +1480,9 @@ cleanup:
1480static int __init calgary_parse_options(char *p) 1480static int __init calgary_parse_options(char *p)
1481{ 1481{
1482 unsigned int bridge; 1482 unsigned int bridge;
1483 unsigned long val;
1483 size_t len; 1484 size_t len;
1484 char* endp; 1485 ssize_t ret;
1485 1486
1486 while (*p) { 1487 while (*p) {
1487 if (!strncmp(p, "64k", 3)) 1488 if (!strncmp(p, "64k", 3))
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p)
1512 ++p; 1513 ++p;
1513 if (*p == '\0') 1514 if (*p == '\0')
1514 break; 1515 break;
1515 bridge = simple_strtoul(p, &endp, 0); 1516 ret = kstrtoul(p, 0, &val);
1516 if (p == endp) 1517 if (ret)
1517 break; 1518 break;
1518 1519
1520 bridge = val;
1519 if (bridge < MAX_PHB_BUS_NUM) { 1521 if (bridge < MAX_PHB_BUS_NUM) {
1520 printk(KERN_INFO "Calgary: disabling " 1522 printk(KERN_INFO "Calgary: disabling "
1521 "translation for PHB %#x\n", bridge); 1523 "translation for PHB %#x\n", bridge);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e8173154800..735279e54e5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -56,10 +56,16 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
56struct kmem_cache *task_xstate_cachep; 56struct kmem_cache *task_xstate_cachep;
57EXPORT_SYMBOL_GPL(task_xstate_cachep); 57EXPORT_SYMBOL_GPL(task_xstate_cachep);
58 58
59/*
60 * this gets called so that we can store lazy state into memory and copy the
61 * current task into the new thread.
62 */
59int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 63int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
60{ 64{
61 int ret; 65 int ret;
62 66
67 unlazy_fpu(src);
68
63 *dst = *src; 69 *dst = *src;
64 if (fpu_allocated(&src->thread.fpu)) { 70 if (fpu_allocated(&src->thread.fpu)) {
65 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
@@ -89,6 +95,16 @@ void arch_task_cache_init(void)
89 SLAB_PANIC | SLAB_NOTRACK, NULL); 95 SLAB_PANIC | SLAB_NOTRACK, NULL);
90} 96}
91 97
98static inline void drop_fpu(struct task_struct *tsk)
99{
100 /*
101 * Forget coprocessor state..
102 */
103 tsk->fpu_counter = 0;
104 clear_fpu(tsk);
105 clear_used_math();
106}
107
92/* 108/*
93 * Free current thread data structures etc.. 109 * Free current thread data structures etc..
94 */ 110 */
@@ -111,12 +127,8 @@ void exit_thread(void)
111 put_cpu(); 127 put_cpu();
112 kfree(bp); 128 kfree(bp);
113 } 129 }
114}
115 130
116void show_regs(struct pt_regs *regs) 131 drop_fpu(me);
117{
118 show_registers(regs);
119 show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0);
120} 132}
121 133
122void show_regs_common(void) 134void show_regs_common(void)
@@ -151,12 +163,7 @@ void flush_thread(void)
151 163
152 flush_ptrace_hw_breakpoint(tsk); 164 flush_ptrace_hw_breakpoint(tsk);
153 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 165 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
154 /* 166 drop_fpu(tsk);
155 * Forget coprocessor state..
156 */
157 tsk->fpu_counter = 0;
158 clear_fpu(tsk);
159 clear_used_math();
160} 167}
161 168
162static void hard_disable_TSC(void) 169static void hard_disable_TSC(void)
@@ -385,7 +392,7 @@ static inline void play_dead(void)
385#ifdef CONFIG_X86_64 392#ifdef CONFIG_X86_64
386void enter_idle(void) 393void enter_idle(void)
387{ 394{
388 percpu_write(is_idle, 1); 395 this_cpu_write(is_idle, 1);
389 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 396 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
390} 397}
391 398
@@ -582,9 +589,17 @@ int mwait_usable(const struct cpuinfo_x86 *c)
582{ 589{
583 u32 eax, ebx, ecx, edx; 590 u32 eax, ebx, ecx, edx;
584 591
592 /* Use mwait if idle=mwait boot option is given */
585 if (boot_option_idle_override == IDLE_FORCE_MWAIT) 593 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
586 return 1; 594 return 1;
587 595
596 /*
597 * Any idle= boot option other than idle=mwait means that we must not
598 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
599 */
600 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
601 return 0;
602
588 if (c->cpuid_level < MWAIT_INFO) 603 if (c->cpuid_level < MWAIT_INFO)
589 return 0; 604 return 0;
590 605
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ae6847303e2..516fa186121 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -126,15 +126,6 @@ void release_thread(struct task_struct *dead_task)
126 release_vm86_irqs(dead_task); 126 release_vm86_irqs(dead_task);
127} 127}
128 128
129/*
130 * This gets called before we allocate a new thread and copy
131 * the current task into it.
132 */
133void prepare_to_copy(struct task_struct *tsk)
134{
135 unlazy_fpu(tsk);
136}
137
138int copy_thread(unsigned long clone_flags, unsigned long sp, 129int copy_thread(unsigned long clone_flags, unsigned long sp,
139 unsigned long unused, 130 unsigned long unused,
140 struct task_struct *p, struct pt_regs *regs) 131 struct task_struct *p, struct pt_regs *regs)
@@ -302,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
302 293
303 switch_fpu_finish(next_p, fpu); 294 switch_fpu_finish(next_p, fpu);
304 295
305 percpu_write(current_task, next_p); 296 this_cpu_write(current_task, next_p);
306 297
307 return prev_p; 298 return prev_p;
308} 299}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 43d8b48b23e..61cdf7fdf09 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -145,15 +145,6 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
145 return get_desc_base(&t->thread.tls_array[tls]); 145 return get_desc_base(&t->thread.tls_array[tls]);
146} 146}
147 147
148/*
149 * This gets called before we allocate a new thread and copy
150 * the current task into it.
151 */
152void prepare_to_copy(struct task_struct *tsk)
153{
154 unlazy_fpu(tsk);
155}
156
157int copy_thread(unsigned long clone_flags, unsigned long sp, 148int copy_thread(unsigned long clone_flags, unsigned long sp,
158 unsigned long unused, 149 unsigned long unused,
159 struct task_struct *p, struct pt_regs *regs) 150 struct task_struct *p, struct pt_regs *regs)
@@ -237,7 +228,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
237 current->thread.usersp = new_sp; 228 current->thread.usersp = new_sp;
238 regs->ip = new_ip; 229 regs->ip = new_ip;
239 regs->sp = new_sp; 230 regs->sp = new_sp;
240 percpu_write(old_rsp, new_sp); 231 this_cpu_write(old_rsp, new_sp);
241 regs->cs = _cs; 232 regs->cs = _cs;
242 regs->ss = _ss; 233 regs->ss = _ss;
243 regs->flags = X86_EFLAGS_IF; 234 regs->flags = X86_EFLAGS_IF;
@@ -359,11 +350,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
359 /* 350 /*
360 * Switch the PDA and FPU contexts. 351 * Switch the PDA and FPU contexts.
361 */ 352 */
362 prev->usersp = percpu_read(old_rsp); 353 prev->usersp = this_cpu_read(old_rsp);
363 percpu_write(old_rsp, next->usersp); 354 this_cpu_write(old_rsp, next->usersp);
364 percpu_write(current_task, next_p); 355 this_cpu_write(current_task, next_p);
365 356
366 percpu_write(kernel_stack, 357 this_cpu_write(kernel_stack,
367 (unsigned long)task_stack_page(next_p) + 358 (unsigned long)task_stack_page(next_p) +
368 THREAD_SIZE - KERNEL_STACK_OFFSET); 359 THREAD_SIZE - KERNEL_STACK_OFFSET);
369 360
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 685845cf16e..13b1990c7c5 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs)
1480 regs->flags |= X86_EFLAGS_TF; 1480 regs->flags |= X86_EFLAGS_TF;
1481 1481
1482 /* do the secure computing check first */ 1482 /* do the secure computing check first */
1483 secure_computing(regs->orig_ax); 1483 if (secure_computing(regs->orig_ax)) {
1484 /* seccomp failures shouldn't expose any additional code. */
1485 ret = -1L;
1486 goto out;
1487 }
1484 1488
1485 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) 1489 if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
1486 ret = -1L; 1490 ret = -1L;
@@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs)
1505 regs->dx, regs->r10); 1509 regs->dx, regs->r10);
1506#endif 1510#endif
1507 1511
1512out:
1508 return ret ?: regs->orig_ax; 1513 return ret ?: regs->orig_ax;
1509} 1514}
1510 1515
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index d840e69a853..77215c23fba 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,7 +39,8 @@ static int reboot_mode;
39enum reboot_type reboot_type = BOOT_ACPI; 39enum reboot_type reboot_type = BOOT_ACPI;
40int reboot_force; 40int reboot_force;
41 41
42/* This variable is used privately to keep track of whether or not 42/*
43 * This variable is used privately to keep track of whether or not
43 * reboot_type is still set to its default value (i.e., reboot= hasn't 44 * reboot_type is still set to its default value (i.e., reboot= hasn't
44 * been set on the command line). This is needed so that we can 45 * been set on the command line). This is needed so that we can
45 * suppress DMI scanning for reboot quirks. Without it, it's 46 * suppress DMI scanning for reboot quirks. Without it, it's
@@ -51,7 +52,8 @@ static int reboot_default = 1;
51static int reboot_cpu = -1; 52static int reboot_cpu = -1;
52#endif 53#endif
53 54
54/* This is set if we need to go through the 'emergency' path. 55/*
56 * This is set if we need to go through the 'emergency' path.
55 * When machine_emergency_restart() is called, we may be on 57 * When machine_emergency_restart() is called, we may be on
56 * an inconsistent state and won't be able to do a clean cleanup 58 * an inconsistent state and won't be able to do a clean cleanup
57 */ 59 */
@@ -60,22 +62,24 @@ static int reboot_emergency;
60/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ 62/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
61bool port_cf9_safe = false; 63bool port_cf9_safe = false;
62 64
63/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] 65/*
64 warm Don't set the cold reboot flag 66 * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
65 cold Set the cold reboot flag 67 * warm Don't set the cold reboot flag
66 bios Reboot by jumping through the BIOS (only for X86_32) 68 * cold Set the cold reboot flag
67 smp Reboot by executing reset on BSP or other CPU (only for X86_32) 69 * bios Reboot by jumping through the BIOS (only for X86_32)
68 triple Force a triple fault (init) 70 * smp Reboot by executing reset on BSP or other CPU (only for X86_32)
69 kbd Use the keyboard controller. cold reset (default) 71 * triple Force a triple fault (init)
70 acpi Use the RESET_REG in the FADT 72 * kbd Use the keyboard controller. cold reset (default)
71 efi Use efi reset_system runtime service 73 * acpi Use the RESET_REG in the FADT
72 pci Use the so-called "PCI reset register", CF9 74 * efi Use efi reset_system runtime service
73 force Avoid anything that could hang. 75 * pci Use the so-called "PCI reset register", CF9
76 * force Avoid anything that could hang.
74 */ 77 */
75static int __init reboot_setup(char *str) 78static int __init reboot_setup(char *str)
76{ 79{
77 for (;;) { 80 for (;;) {
78 /* Having anything passed on the command line via 81 /*
82 * Having anything passed on the command line via
79 * reboot= will cause us to disable DMI checking 83 * reboot= will cause us to disable DMI checking
80 * below. 84 * below.
81 */ 85 */
@@ -98,9 +102,11 @@ static int __init reboot_setup(char *str)
98 if (isdigit(*(str+2))) 102 if (isdigit(*(str+2)))
99 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); 103 reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
100 } 104 }
101 /* we will leave sorting out the final value 105 /*
102 when we are ready to reboot, since we might not 106 * We will leave sorting out the final value
103 have detected BSP APIC ID or smp_num_cpu */ 107 * when we are ready to reboot, since we might not
108 * have detected BSP APIC ID or smp_num_cpu
109 */
104 break; 110 break;
105#endif /* CONFIG_SMP */ 111#endif /* CONFIG_SMP */
106 112
@@ -150,6 +156,82 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)
150 return 0; 156 return 0;
151} 157}
152 158
159extern const unsigned char machine_real_restart_asm[];
160extern const u64 machine_real_restart_gdt[3];
161
162void machine_real_restart(unsigned int type)
163{
164 void *restart_va;
165 unsigned long restart_pa;
166 void (*restart_lowmem)(unsigned int);
167 u64 *lowmem_gdt;
168
169 local_irq_disable();
170
171 /*
172 * Write zero to CMOS register number 0x0f, which the BIOS POST
173 * routine will recognize as telling it to do a proper reboot. (Well
174 * that's what this book in front of me says -- it may only apply to
175 * the Phoenix BIOS though, it's not clear). At the same time,
176 * disable NMIs by setting the top bit in the CMOS address register,
177 * as we're about to do peculiar things to the CPU. I'm not sure if
178 * `outb_p' is needed instead of just `outb'. Use it to be on the
179 * safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
180 */
181 spin_lock(&rtc_lock);
182 CMOS_WRITE(0x00, 0x8f);
183 spin_unlock(&rtc_lock);
184
185 /*
186 * Switch back to the initial page table.
187 */
188 load_cr3(initial_page_table);
189
190 /*
191 * Write 0x1234 to absolute memory location 0x472. The BIOS reads
192 * this on booting to tell it to "Bypass memory test (also warm
193 * boot)". This seems like a fairly standard thing that gets set by
194 * REBOOT.COM programs, and the previous reset routine did this
195 * too. */
196 *((unsigned short *)0x472) = reboot_mode;
197
198 /* Patch the GDT in the low memory trampoline */
199 lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
200
201 restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
202 restart_pa = virt_to_phys(restart_va);
203 restart_lowmem = (void (*)(unsigned int))restart_pa;
204
205 /* GDT[0]: GDT self-pointer */
206 lowmem_gdt[0] =
207 (u64)(sizeof(machine_real_restart_gdt) - 1) +
208 ((u64)virt_to_phys(lowmem_gdt) << 16);
209 /* GDT[1]: 64K real mode code segment */
210 lowmem_gdt[1] =
211 GDT_ENTRY(0x009b, restart_pa, 0xffff);
212
213 /* Jump to the identity-mapped low memory code */
214 restart_lowmem(type);
215}
216#ifdef CONFIG_APM_MODULE
217EXPORT_SYMBOL(machine_real_restart);
218#endif
219
220#endif /* CONFIG_X86_32 */
221
222/*
223 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
224 */
225static int __init set_pci_reboot(const struct dmi_system_id *d)
226{
227 if (reboot_type != BOOT_CF9) {
228 reboot_type = BOOT_CF9;
229 printk(KERN_INFO "%s series board detected. "
230 "Selecting PCI-method for reboots.\n", d->ident);
231 }
232 return 0;
233}
234
153static int __init set_kbd_reboot(const struct dmi_system_id *d) 235static int __init set_kbd_reboot(const struct dmi_system_id *d)
154{ 236{
155 if (reboot_type != BOOT_KBD) { 237 if (reboot_type != BOOT_KBD) {
@@ -159,7 +241,12 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)
159 return 0; 241 return 0;
160} 242}
161 243
244/*
245 * This is a single dmi_table handling all reboot quirks. Note that
246 * REBOOT_BIOS is only available for 32bit
247 */
162static struct dmi_system_id __initdata reboot_dmi_table[] = { 248static struct dmi_system_id __initdata reboot_dmi_table[] = {
249#ifdef CONFIG_X86_32
163 { /* Handle problems with rebooting on Dell E520's */ 250 { /* Handle problems with rebooting on Dell E520's */
164 .callback = set_bios_reboot, 251 .callback = set_bios_reboot,
165 .ident = "Dell E520", 252 .ident = "Dell E520",
@@ -184,7 +271,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
184 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), 271 DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
185 }, 272 },
186 }, 273 },
187 { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ 274 { /* Handle problems with rebooting on Dell Optiplex 745's SFF */
188 .callback = set_bios_reboot, 275 .callback = set_bios_reboot,
189 .ident = "Dell OptiPlex 745", 276 .ident = "Dell OptiPlex 745",
190 .matches = { 277 .matches = {
@@ -192,7 +279,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
192 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), 279 DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
193 }, 280 },
194 }, 281 },
195 { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ 282 { /* Handle problems with rebooting on Dell Optiplex 745's DFF */
196 .callback = set_bios_reboot, 283 .callback = set_bios_reboot,
197 .ident = "Dell OptiPlex 745", 284 .ident = "Dell OptiPlex 745",
198 .matches = { 285 .matches = {
@@ -201,7 +288,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
201 DMI_MATCH(DMI_BOARD_NAME, "0MM599"), 288 DMI_MATCH(DMI_BOARD_NAME, "0MM599"),
202 }, 289 },
203 }, 290 },
204 { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ 291 { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */
205 .callback = set_bios_reboot, 292 .callback = set_bios_reboot,
206 .ident = "Dell OptiPlex 745", 293 .ident = "Dell OptiPlex 745",
207 .matches = { 294 .matches = {
@@ -210,7 +297,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
210 DMI_MATCH(DMI_BOARD_NAME, "0KW626"), 297 DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
211 }, 298 },
212 }, 299 },
213 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ 300 { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
214 .callback = set_bios_reboot, 301 .callback = set_bios_reboot,
215 .ident = "Dell OptiPlex 330", 302 .ident = "Dell OptiPlex 330",
216 .matches = { 303 .matches = {
@@ -219,7 +306,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
219 DMI_MATCH(DMI_BOARD_NAME, "0KP561"), 306 DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
220 }, 307 },
221 }, 308 },
222 { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ 309 { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
223 .callback = set_bios_reboot, 310 .callback = set_bios_reboot,
224 .ident = "Dell OptiPlex 360", 311 .ident = "Dell OptiPlex 360",
225 .matches = { 312 .matches = {
@@ -228,7 +315,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
228 DMI_MATCH(DMI_BOARD_NAME, "0T656F"), 315 DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
229 }, 316 },
230 }, 317 },
231 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ 318 { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */
232 .callback = set_bios_reboot, 319 .callback = set_bios_reboot,
233 .ident = "Dell OptiPlex 760", 320 .ident = "Dell OptiPlex 760",
234 .matches = { 321 .matches = {
@@ -301,7 +388,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
301 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), 388 DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
302 }, 389 },
303 }, 390 },
304 { /* Handle problems with rebooting on ASUS P4S800 */ 391 { /* Handle problems with rebooting on ASUS P4S800 */
305 .callback = set_bios_reboot, 392 .callback = set_bios_reboot,
306 .ident = "ASUS P4S800", 393 .ident = "ASUS P4S800",
307 .matches = { 394 .matches = {
@@ -309,7 +396,9 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
309 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 396 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
310 }, 397 },
311 }, 398 },
312 { /* Handle reboot issue on Acer Aspire one */ 399#endif /* CONFIG_X86_32 */
400
401 { /* Handle reboot issue on Acer Aspire one */
313 .callback = set_kbd_reboot, 402 .callback = set_kbd_reboot,
314 .ident = "Acer Aspire One A110", 403 .ident = "Acer Aspire One A110",
315 .matches = { 404 .matches = {
@@ -317,96 +406,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
317 DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), 406 DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
318 }, 407 },
319 }, 408 },
320 { }
321};
322
323static int __init reboot_init(void)
324{
325 /* Only do the DMI check if reboot_type hasn't been overridden
326 * on the command line
327 */
328 if (reboot_default) {
329 dmi_check_system(reboot_dmi_table);
330 }
331 return 0;
332}
333core_initcall(reboot_init);
334
335extern const unsigned char machine_real_restart_asm[];
336extern const u64 machine_real_restart_gdt[3];
337
338void machine_real_restart(unsigned int type)
339{
340 void *restart_va;
341 unsigned long restart_pa;
342 void (*restart_lowmem)(unsigned int);
343 u64 *lowmem_gdt;
344
345 local_irq_disable();
346
347 /* Write zero to CMOS register number 0x0f, which the BIOS POST
348 routine will recognize as telling it to do a proper reboot. (Well
349 that's what this book in front of me says -- it may only apply to
350 the Phoenix BIOS though, it's not clear). At the same time,
351 disable NMIs by setting the top bit in the CMOS address register,
352 as we're about to do peculiar things to the CPU. I'm not sure if
353 `outb_p' is needed instead of just `outb'. Use it to be on the
354 safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
355 */
356 spin_lock(&rtc_lock);
357 CMOS_WRITE(0x00, 0x8f);
358 spin_unlock(&rtc_lock);
359
360 /*
361 * Switch back to the initial page table.
362 */
363 load_cr3(initial_page_table);
364
365 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
366 this on booting to tell it to "Bypass memory test (also warm
367 boot)". This seems like a fairly standard thing that gets set by
368 REBOOT.COM programs, and the previous reset routine did this
369 too. */
370 *((unsigned short *)0x472) = reboot_mode;
371
372 /* Patch the GDT in the low memory trampoline */
373 lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt);
374
375 restart_va = TRAMPOLINE_SYM(machine_real_restart_asm);
376 restart_pa = virt_to_phys(restart_va);
377 restart_lowmem = (void (*)(unsigned int))restart_pa;
378
379 /* GDT[0]: GDT self-pointer */
380 lowmem_gdt[0] =
381 (u64)(sizeof(machine_real_restart_gdt) - 1) +
382 ((u64)virt_to_phys(lowmem_gdt) << 16);
383 /* GDT[1]: 64K real mode code segment */
384 lowmem_gdt[1] =
385 GDT_ENTRY(0x009b, restart_pa, 0xffff);
386
387 /* Jump to the identity-mapped low memory code */
388 restart_lowmem(type);
389}
390#ifdef CONFIG_APM_MODULE
391EXPORT_SYMBOL(machine_real_restart);
392#endif
393
394#endif /* CONFIG_X86_32 */
395
396/*
397 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
398 */
399static int __init set_pci_reboot(const struct dmi_system_id *d)
400{
401 if (reboot_type != BOOT_CF9) {
402 reboot_type = BOOT_CF9;
403 printk(KERN_INFO "%s series board detected. "
404 "Selecting PCI-method for reboots.\n", d->ident);
405 }
406 return 0;
407}
408
409static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
410 { /* Handle problems with rebooting on Apple MacBook5 */ 409 { /* Handle problems with rebooting on Apple MacBook5 */
411 .callback = set_pci_reboot, 410 .callback = set_pci_reboot,
412 .ident = "Apple MacBook5", 411 .ident = "Apple MacBook5",
@@ -474,17 +473,17 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
474 { } 473 { }
475}; 474};
476 475
477static int __init pci_reboot_init(void) 476static int __init reboot_init(void)
478{ 477{
479 /* Only do the DMI check if reboot_type hasn't been overridden 478 /*
479 * Only do the DMI check if reboot_type hasn't been overridden
480 * on the command line 480 * on the command line
481 */ 481 */
482 if (reboot_default) { 482 if (reboot_default)
483 dmi_check_system(pci_reboot_dmi_table); 483 dmi_check_system(reboot_dmi_table);
484 }
485 return 0; 484 return 0;
486} 485}
487core_initcall(pci_reboot_init); 486core_initcall(reboot_init);
488 487
489static inline void kb_wait(void) 488static inline void kb_wait(void)
490{ 489{
@@ -502,14 +501,14 @@ static void vmxoff_nmi(int cpu, struct pt_regs *regs)
502 cpu_emergency_vmxoff(); 501 cpu_emergency_vmxoff();
503} 502}
504 503
505/* Use NMIs as IPIs to tell all CPUs to disable virtualization 504/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
506 */
507static void emergency_vmx_disable_all(void) 505static void emergency_vmx_disable_all(void)
508{ 506{
509 /* Just make sure we won't change CPUs while doing this */ 507 /* Just make sure we won't change CPUs while doing this */
510 local_irq_disable(); 508 local_irq_disable();
511 509
512 /* We need to disable VMX on all CPUs before rebooting, otherwise 510 /*
511 * We need to disable VMX on all CPUs before rebooting, otherwise
513 * we risk hanging up the machine, because the CPU ignore INIT 512 * we risk hanging up the machine, because the CPU ignore INIT
514 * signals when VMX is enabled. 513 * signals when VMX is enabled.
515 * 514 *
@@ -528,8 +527,7 @@ static void emergency_vmx_disable_all(void)
528 * is still enabling VMX. 527 * is still enabling VMX.
529 */ 528 */
530 if (cpu_has_vmx() && cpu_vmx_enabled()) { 529 if (cpu_has_vmx() && cpu_vmx_enabled()) {
531 /* Disable VMX on this CPU. 530 /* Disable VMX on this CPU. */
532 */
533 cpu_vmxoff(); 531 cpu_vmxoff();
534 532
535 /* Halt and disable VMX on the other CPUs */ 533 /* Halt and disable VMX on the other CPUs */
@@ -574,12 +572,12 @@ static void native_machine_emergency_restart(void)
574 /* Could also try the reset bit in the Hammer NB */ 572 /* Could also try the reset bit in the Hammer NB */
575 switch (reboot_type) { 573 switch (reboot_type) {
576 case BOOT_KBD: 574 case BOOT_KBD:
577 mach_reboot_fixups(); /* for board specific fixups */ 575 mach_reboot_fixups(); /* For board specific fixups */
578 576
579 for (i = 0; i < 10; i++) { 577 for (i = 0; i < 10; i++) {
580 kb_wait(); 578 kb_wait();
581 udelay(50); 579 udelay(50);
582 outb(0xfe, 0x64); /* pulse reset low */ 580 outb(0xfe, 0x64); /* Pulse reset low */
583 udelay(50); 581 udelay(50);
584 } 582 }
585 if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { 583 if (attempt == 0 && orig_reboot_type == BOOT_ACPI) {
@@ -621,7 +619,7 @@ static void native_machine_emergency_restart(void)
621 619
622 case BOOT_CF9: 620 case BOOT_CF9:
623 port_cf9_safe = true; 621 port_cf9_safe = true;
624 /* fall through */ 622 /* Fall through */
625 623
626 case BOOT_CF9_COND: 624 case BOOT_CF9_COND:
627 if (port_cf9_safe) { 625 if (port_cf9_safe) {
@@ -659,7 +657,8 @@ void native_machine_shutdown(void)
659 /* Make certain I only run on the appropriate processor */ 657 /* Make certain I only run on the appropriate processor */
660 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); 658 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
661 659
662 /* O.K Now that I'm on the appropriate processor, 660 /*
661 * O.K Now that I'm on the appropriate processor,
663 * stop all of the others. 662 * stop all of the others.
664 */ 663 */
665 stop_other_cpus(); 664 stop_other_cpus();
@@ -697,12 +696,11 @@ static void native_machine_restart(char *__unused)
697 696
698static void native_machine_halt(void) 697static void native_machine_halt(void)
699{ 698{
700 /* stop other cpus and apics */ 699 /* Stop other cpus and apics */
701 machine_shutdown(); 700 machine_shutdown();
702 701
703 tboot_shutdown(TB_SHUTDOWN_HALT); 702 tboot_shutdown(TB_SHUTDOWN_HALT);
704 703
705 /* stop this cpu */
706 stop_this_cpu(NULL); 704 stop_this_cpu(NULL);
707} 705}
708 706
@@ -713,7 +711,7 @@ static void native_machine_power_off(void)
713 machine_shutdown(); 711 machine_shutdown();
714 pm_power_off(); 712 pm_power_off();
715 } 713 }
716 /* a fallback in case there is no PM info available */ 714 /* A fallback in case there is no PM info available */
717 tboot_shutdown(TB_SHUTDOWN_HALT); 715 tboot_shutdown(TB_SHUTDOWN_HALT);
718} 716}
719 717
@@ -775,7 +773,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
775 773
776 cpu = raw_smp_processor_id(); 774 cpu = raw_smp_processor_id();
777 775
778 /* Don't do anything if this handler is invoked on crashing cpu. 776 /*
777 * Don't do anything if this handler is invoked on crashing cpu.
779 * Otherwise, system will completely hang. Crashing cpu can get 778 * Otherwise, system will completely hang. Crashing cpu can get
780 * an NMI if system was initially booted with nmi_watchdog parameter. 779 * an NMI if system was initially booted with nmi_watchdog parameter.
781 */ 780 */
@@ -799,7 +798,8 @@ static void smp_send_nmi_allbutself(void)
799 apic->send_IPI_allbutself(NMI_VECTOR); 798 apic->send_IPI_allbutself(NMI_VECTOR);
800} 799}
801 800
802/* Halt all other CPUs, calling the specified function on each of them 801/*
802 * Halt all other CPUs, calling the specified function on each of them
803 * 803 *
804 * This function can be used to halt all other CPUs on crash 804 * This function can be used to halt all other CPUs on crash
805 * or emergency reboot time. The function passed as parameter 805 * or emergency reboot time. The function passed as parameter
@@ -810,7 +810,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
810 unsigned long msecs; 810 unsigned long msecs;
811 local_irq_disable(); 811 local_irq_disable();
812 812
813 /* Make a note of crashing cpu. Will be used in NMI callback.*/ 813 /* Make a note of crashing cpu. Will be used in NMI callback. */
814 crashing_cpu = safe_smp_processor_id(); 814 crashing_cpu = safe_smp_processor_id();
815 815
816 shootdown_callback = callback; 816 shootdown_callback = callback;
@@ -819,8 +819,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
819 /* Would it be better to replace the trap vector here? */ 819 /* Would it be better to replace the trap vector here? */
820 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, 820 if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
821 NMI_FLAG_FIRST, "crash")) 821 NMI_FLAG_FIRST, "crash"))
822 return; /* return what? */ 822 return; /* Return what? */
823 /* Ensure the new callback function is set before sending 823 /*
824 * Ensure the new callback function is set before sending
824 * out the NMI 825 * out the NMI
825 */ 826 */
826 wmb(); 827 wmb();
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1a290156205..366c688d619 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -34,7 +34,6 @@
34#include <linux/memblock.h> 34#include <linux/memblock.h>
35#include <linux/seq_file.h> 35#include <linux/seq_file.h>
36#include <linux/console.h> 36#include <linux/console.h>
37#include <linux/mca.h>
38#include <linux/root_dev.h> 37#include <linux/root_dev.h>
39#include <linux/highmem.h> 38#include <linux/highmem.h>
40#include <linux/module.h> 39#include <linux/module.h>
@@ -179,12 +178,6 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
179/* common cpu data for all cpus */ 178/* common cpu data for all cpus */
180struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; 179struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
181EXPORT_SYMBOL(boot_cpu_data); 180EXPORT_SYMBOL(boot_cpu_data);
182static void set_mca_bus(int x)
183{
184#ifdef CONFIG_MCA
185 MCA_bus = x;
186#endif
187}
188 181
189unsigned int def_to_bigsmp; 182unsigned int def_to_bigsmp;
190 183
@@ -393,10 +386,9 @@ static void __init reserve_initrd(void)
393 initrd_start = 0; 386 initrd_start = 0;
394 387
395 if (ramdisk_size >= (end_of_lowmem>>1)) { 388 if (ramdisk_size >= (end_of_lowmem>>1)) {
396 memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); 389 panic("initrd too large to handle, "
397 printk(KERN_ERR "initrd too large to handle, " 390 "disabling initrd (%lld needed, %lld available)\n",
398 "disabling initrd\n"); 391 ramdisk_size, end_of_lowmem>>1);
399 return;
400 } 392 }
401 393
402 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, 394 printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
@@ -717,7 +709,6 @@ void __init setup_arch(char **cmdline_p)
717 apm_info.bios = boot_params.apm_bios_info; 709 apm_info.bios = boot_params.apm_bios_info;
718 ist_info = boot_params.ist_info; 710 ist_info = boot_params.ist_info;
719 if (boot_params.sys_desc_table.length != 0) { 711 if (boot_params.sys_desc_table.length != 0) {
720 set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
721 machine_id = boot_params.sys_desc_table.table[0]; 712 machine_id = boot_params.sys_desc_table.table[0];
722 machine_submodel_id = boot_params.sys_desc_table.table[1]; 713 machine_submodel_id = boot_params.sys_desc_table.table[1];
723 BIOS_revision = boot_params.sys_desc_table.table[2]; 714 BIOS_revision = boot_params.sys_desc_table.table[2];
@@ -1012,7 +1003,8 @@ void __init setup_arch(char **cmdline_p)
1012 init_cpu_to_node(); 1003 init_cpu_to_node();
1013 1004
1014 init_apic_mappings(); 1005 init_apic_mappings();
1015 ioapic_and_gsi_init(); 1006 if (x86_io_apic_ops.init)
1007 x86_io_apic_ops.init();
1016 1008
1017 kvm_guest_init(); 1009 kvm_guest_init();
1018 1010
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 66c74f481ca..48d2b7ded42 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -109,6 +109,9 @@
109 * about nothing of note with C stepping upwards. 109 * about nothing of note with C stepping upwards.
110 */ 110 */
111 111
112static atomic_t stopping_cpu = ATOMIC_INIT(-1);
113static bool smp_no_nmi_ipi = false;
114
112/* 115/*
113 * this function sends a 'reschedule' IPI to another CPU. 116 * this function sends a 'reschedule' IPI to another CPU.
114 * it goes straight through and wastes no time serializing 117 * it goes straight through and wastes no time serializing
@@ -149,8 +152,6 @@ void native_send_call_func_ipi(const struct cpumask *mask)
149 free_cpumask_var(allbutself); 152 free_cpumask_var(allbutself);
150} 153}
151 154
152static atomic_t stopping_cpu = ATOMIC_INIT(-1);
153
154static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) 155static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
155{ 156{
156 /* We are registered on stopping cpu too, avoid spurious NMI */ 157 /* We are registered on stopping cpu too, avoid spurious NMI */
@@ -162,7 +163,19 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
162 return NMI_HANDLED; 163 return NMI_HANDLED;
163} 164}
164 165
165static void native_nmi_stop_other_cpus(int wait) 166/*
167 * this function calls the 'stop' function on all other CPUs in the system.
168 */
169
170asmlinkage void smp_reboot_interrupt(void)
171{
172 ack_APIC_irq();
173 irq_enter();
174 stop_this_cpu(NULL);
175 irq_exit();
176}
177
178static void native_stop_other_cpus(int wait)
166{ 179{
167 unsigned long flags; 180 unsigned long flags;
168 unsigned long timeout; 181 unsigned long timeout;
@@ -174,20 +187,25 @@ static void native_nmi_stop_other_cpus(int wait)
174 * Use an own vector here because smp_call_function 187 * Use an own vector here because smp_call_function
175 * does lots of things not suitable in a panic situation. 188 * does lots of things not suitable in a panic situation.
176 */ 189 */
190
191 /*
192 * We start by using the REBOOT_VECTOR irq.
193 * The irq is treated as a sync point to allow critical
194 * regions of code on other cpus to release their spin locks
195 * and re-enable irqs. Jumping straight to an NMI might
196 * accidentally cause deadlocks with further shutdown/panic
197 * code. By syncing, we give the cpus up to one second to
198 * finish their work before we force them off with the NMI.
199 */
177 if (num_online_cpus() > 1) { 200 if (num_online_cpus() > 1) {
178 /* did someone beat us here? */ 201 /* did someone beat us here? */
179 if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) 202 if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1)
180 return; 203 return;
181 204
182 if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, 205 /* sync above data before sending IRQ */
183 NMI_FLAG_FIRST, "smp_stop"))
184 /* Note: we ignore failures here */
185 return;
186
187 /* sync above data before sending NMI */
188 wmb(); 206 wmb();
189 207
190 apic->send_IPI_allbutself(NMI_VECTOR); 208 apic->send_IPI_allbutself(REBOOT_VECTOR);
191 209
192 /* 210 /*
193 * Don't wait longer than a second if the caller 211 * Don't wait longer than a second if the caller
@@ -197,63 +215,37 @@ static void native_nmi_stop_other_cpus(int wait)
197 while (num_online_cpus() > 1 && (wait || timeout--)) 215 while (num_online_cpus() > 1 && (wait || timeout--))
198 udelay(1); 216 udelay(1);
199 } 217 }
218
219 /* if the REBOOT_VECTOR didn't work, try with the NMI */
220 if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi)) {
221 if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback,
222 NMI_FLAG_FIRST, "smp_stop"))
223 /* Note: we ignore failures here */
224 /* Hope the REBOOT_IRQ is good enough */
225 goto finish;
200 226
201 local_irq_save(flags); 227 /* sync above data before sending IRQ */
202 disable_local_APIC(); 228 wmb();
203 local_irq_restore(flags);
204}
205
206/*
207 * this function calls the 'stop' function on all other CPUs in the system.
208 */
209
210asmlinkage void smp_reboot_interrupt(void)
211{
212 ack_APIC_irq();
213 irq_enter();
214 stop_this_cpu(NULL);
215 irq_exit();
216}
217
218static void native_irq_stop_other_cpus(int wait)
219{
220 unsigned long flags;
221 unsigned long timeout;
222 229
223 if (reboot_force) 230 pr_emerg("Shutting down cpus with NMI\n");
224 return;
225 231
226 /* 232 apic->send_IPI_allbutself(NMI_VECTOR);
227 * Use an own vector here because smp_call_function
228 * does lots of things not suitable in a panic situation.
229 * On most systems we could also use an NMI here,
230 * but there are a few systems around where NMI
231 * is problematic so stay with an non NMI for now
232 * (this implies we cannot stop CPUs spinning with irq off
233 * currently)
234 */
235 if (num_online_cpus() > 1) {
236 apic->send_IPI_allbutself(REBOOT_VECTOR);
237 233
238 /* 234 /*
239 * Don't wait longer than a second if the caller 235 * Don't wait longer than a 10 ms if the caller
240 * didn't ask us to wait. 236 * didn't ask us to wait.
241 */ 237 */
242 timeout = USEC_PER_SEC; 238 timeout = USEC_PER_MSEC * 10;
243 while (num_online_cpus() > 1 && (wait || timeout--)) 239 while (num_online_cpus() > 1 && (wait || timeout--))
244 udelay(1); 240 udelay(1);
245 } 241 }
246 242
243finish:
247 local_irq_save(flags); 244 local_irq_save(flags);
248 disable_local_APIC(); 245 disable_local_APIC();
249 local_irq_restore(flags); 246 local_irq_restore(flags);
250} 247}
251 248
252static void native_smp_disable_nmi_ipi(void)
253{
254 smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
255}
256
257/* 249/*
258 * Reschedule call back. 250 * Reschedule call back.
259 */ 251 */
@@ -287,8 +279,8 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
287 279
288static int __init nonmi_ipi_setup(char *str) 280static int __init nonmi_ipi_setup(char *str)
289{ 281{
290 native_smp_disable_nmi_ipi(); 282 smp_no_nmi_ipi = true;
291 return 1; 283 return 1;
292} 284}
293 285
294__setup("nonmi_ipi", nonmi_ipi_setup); 286__setup("nonmi_ipi", nonmi_ipi_setup);
@@ -298,7 +290,7 @@ struct smp_ops smp_ops = {
298 .smp_prepare_cpus = native_smp_prepare_cpus, 290 .smp_prepare_cpus = native_smp_prepare_cpus,
299 .smp_cpus_done = native_smp_cpus_done, 291 .smp_cpus_done = native_smp_cpus_done,
300 292
301 .stop_other_cpus = native_nmi_stop_other_cpus, 293 .stop_other_cpus = native_stop_other_cpus,
302 .smp_send_reschedule = native_smp_send_reschedule, 294 .smp_send_reschedule = native_smp_send_reschedule,
303 295
304 .cpu_up = native_cpu_up, 296 .cpu_up = native_cpu_up,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3acaf51dfdd..433529e29be 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -299,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
299 identify_secondary_cpu(c); 299 identify_secondary_cpu(c);
300} 300}
301 301
302static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 302static bool __cpuinit
303topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
303{ 304{
304 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 305 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
305 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 306
306 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 307 return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
307 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 308 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
308 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); 309 "[node: %d != %d]. Ignoring dependency.\n",
309 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); 310 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
310} 311}
311 312
313#define link_mask(_m, c1, c2) \
314do { \
315 cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
316 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
317} while (0)
318
319static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
320{
321 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
322 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
323
324 if (c->phys_proc_id == o->phys_proc_id &&
325 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
326 c->compute_unit_id == o->compute_unit_id)
327 return topology_sane(c, o, "smt");
328
329 } else if (c->phys_proc_id == o->phys_proc_id &&
330 c->cpu_core_id == o->cpu_core_id) {
331 return topology_sane(c, o, "smt");
332 }
333
334 return false;
335}
336
337static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
338{
339 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
340
341 if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
342 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
343 return topology_sane(c, o, "llc");
344
345 return false;
346}
347
348static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
349{
350 if (c->phys_proc_id == o->phys_proc_id)
351 return topology_sane(c, o, "mc");
352
353 return false;
354}
312 355
313void __cpuinit set_cpu_sibling_map(int cpu) 356void __cpuinit set_cpu_sibling_map(int cpu)
314{ 357{
315 int i; 358 bool has_mc = boot_cpu_data.x86_max_cores > 1;
359 bool has_smt = smp_num_siblings > 1;
316 struct cpuinfo_x86 *c = &cpu_data(cpu); 360 struct cpuinfo_x86 *c = &cpu_data(cpu);
361 struct cpuinfo_x86 *o;
362 int i;
317 363
318 cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 364 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
319 365
320 if (smp_num_siblings > 1) { 366 if (!has_smt && !has_mc) {
321 for_each_cpu(i, cpu_sibling_setup_mask) {
322 struct cpuinfo_x86 *o = &cpu_data(i);
323
324 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
325 if (c->phys_proc_id == o->phys_proc_id &&
326 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
327 c->compute_unit_id == o->compute_unit_id)
328 link_thread_siblings(cpu, i);
329 } else if (c->phys_proc_id == o->phys_proc_id &&
330 c->cpu_core_id == o->cpu_core_id) {
331 link_thread_siblings(cpu, i);
332 }
333 }
334 } else {
335 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 367 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
336 } 368 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
337 369 cpumask_set_cpu(cpu, cpu_core_mask(cpu));
338 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
339
340 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
341 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
342 c->booted_cores = 1; 370 c->booted_cores = 1;
343 return; 371 return;
344 } 372 }
345 373
346 for_each_cpu(i, cpu_sibling_setup_mask) { 374 for_each_cpu(i, cpu_sibling_setup_mask) {
347 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 375 o = &cpu_data(i);
348 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 376
349 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); 377 if ((i == cpu) || (has_smt && match_smt(c, o)))
350 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); 378 link_mask(sibling, cpu, i);
351 } 379
352 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 380 if ((i == cpu) || (has_mc && match_llc(c, o)))
353 cpumask_set_cpu(i, cpu_core_mask(cpu)); 381 link_mask(llc_shared, cpu, i);
354 cpumask_set_cpu(cpu, cpu_core_mask(i)); 382
383 if ((i == cpu) || (has_mc && match_mc(c, o))) {
384 link_mask(core, cpu, i);
385
355 /* 386 /*
356 * Does this new cpu bringup a new core? 387 * Does this new cpu bringup a new core?
357 */ 388 */
@@ -382,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
382 * For perf, we return last level cache shared map. 413 * For perf, we return last level cache shared map.
383 * And for power savings, we return cpu_core_map 414 * And for power savings, we return cpu_core_map
384 */ 415 */
385 if ((sched_mc_power_savings || sched_smt_power_savings) && 416 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
386 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
387 return cpu_core_mask(cpu); 417 return cpu_core_mask(cpu);
388 else 418 else
389 return cpu_llc_shared_mask(cpu); 419 return cpu_llc_shared_mask(cpu);
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index c29e235792a..b79133abda4 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -12,6 +12,7 @@
12#include <linux/module.h> 12#include <linux/module.h>
13#include <asm/cacheflush.h> 13#include <asm/cacheflush.h>
14#include <asm/sections.h> 14#include <asm/sections.h>
15#include <asm/asm.h>
15 16
16int rodata_test(void) 17int rodata_test(void)
17{ 18{
@@ -42,14 +43,7 @@ int rodata_test(void)
42 ".section .fixup,\"ax\"\n" 43 ".section .fixup,\"ax\"\n"
43 "2: jmp 1b\n" 44 "2: jmp 1b\n"
44 ".previous\n" 45 ".previous\n"
45 ".section __ex_table,\"a\"\n" 46 _ASM_EXTABLE(0b,2b)
46 " .align 16\n"
47#ifdef CONFIG_X86_32
48 " .long 0b,2b\n"
49#else
50 " .quad 0b,2b\n"
51#endif
52 ".previous"
53 : [rslt] "=r" (result) 47 : [rslt] "=r" (result)
54 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) 48 : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
55 ); 49 );
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index c6eba2b4267..24d3c91e981 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -14,7 +14,6 @@
14#include <linux/i8253.h> 14#include <linux/i8253.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/mca.h>
18 17
19#include <asm/vsyscall.h> 18#include <asm/vsyscall.h>
20#include <asm/x86_init.h> 19#include <asm/x86_init.h>
@@ -58,11 +57,6 @@ EXPORT_SYMBOL(profile_pc);
58static irqreturn_t timer_interrupt(int irq, void *dev_id) 57static irqreturn_t timer_interrupt(int irq, void *dev_id)
59{ 58{
60 global_clock_event->event_handler(global_clock_event); 59 global_clock_event->event_handler(global_clock_event);
61
62 /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
63 if (MCA_bus)
64 outb_p(inb_p(0x61)| 0x80, 0x61);
65
66 return IRQ_HANDLED; 60 return IRQ_HANDLED;
67} 61}
68 62
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff9281f1602..ff08457a025 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,10 +37,6 @@
37#include <linux/eisa.h> 37#include <linux/eisa.h>
38#endif 38#endif
39 39
40#ifdef CONFIG_MCA
41#include <linux/mca.h>
42#endif
43
44#if defined(CONFIG_EDAC) 40#if defined(CONFIG_EDAC)
45#include <linux/edac.h> 41#include <linux/edac.h>
46#endif 42#endif
@@ -50,6 +46,7 @@
50#include <asm/processor.h> 46#include <asm/processor.h>
51#include <asm/debugreg.h> 47#include <asm/debugreg.h>
52#include <linux/atomic.h> 48#include <linux/atomic.h>
49#include <asm/ftrace.h>
53#include <asm/traps.h> 50#include <asm/traps.h>
54#include <asm/desc.h> 51#include <asm/desc.h>
55#include <asm/i387.h> 52#include <asm/i387.h>
@@ -303,8 +300,13 @@ gp_in_kernel:
303} 300}
304 301
305/* May run on IST stack. */ 302/* May run on IST stack. */
306dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) 303dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
307{ 304{
305#ifdef CONFIG_DYNAMIC_FTRACE
306 /* ftrace must be first, everything else may cause a recursive crash */
307 if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
308 return;
309#endif
308#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 310#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
309 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 311 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
310 SIGTRAP) == NOTIFY_STOP) 312 SIGTRAP) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a1d804bcd48..8eeb55a551b 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -15,6 +15,7 @@
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/pci_ids.h> 16#include <linux/pci_ids.h>
17#include <linux/pci_regs.h> 17#include <linux/pci_regs.h>
18#include <linux/smp.h>
18 19
19#include <asm/apic.h> 20#include <asm/apic.h>
20#include <asm/pci-direct.h> 21#include <asm/pci-direct.h>
@@ -22,6 +23,8 @@
22#include <asm/paravirt.h> 23#include <asm/paravirt.h>
23#include <asm/setup.h> 24#include <asm/setup.h>
24 25
26#define TOPOLOGY_REGISTER_OFFSET 0x10
27
25#if defined CONFIG_PCI && defined CONFIG_PARAVIRT 28#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
26/* 29/*
27 * Interrupt control on vSMPowered systems: 30 * Interrupt control on vSMPowered systems:
@@ -149,12 +152,49 @@ int is_vsmp_box(void)
149 return 0; 152 return 0;
150} 153}
151#endif 154#endif
155
156static void __init vsmp_cap_cpus(void)
157{
158#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP)
159 void __iomem *address;
160 unsigned int cfg, topology, node_shift, maxcpus;
161
162 /*
163 * CONFIG_X86_VSMP is not configured, so limit the number CPUs to the
164 * ones present in the first board, unless explicitly overridden by
165 * setup_max_cpus
166 */
167 if (setup_max_cpus != NR_CPUS)
168 return;
169
170 /* Read the vSMP Foundation topology register */
171 cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0);
172 address = early_ioremap(cfg + TOPOLOGY_REGISTER_OFFSET, 4);
173 if (WARN_ON(!address))
174 return;
175
176 topology = readl(address);
177 node_shift = (topology >> 16) & 0x7;
178 if (!node_shift)
179 /* The value 0 should be decoded as 8 */
180 node_shift = 8;
181 maxcpus = (topology & ((1 << node_shift) - 1)) + 1;
182
183 pr_info("vSMP CTL: Capping CPUs to %d (CONFIG_X86_VSMP is unset)\n",
184 maxcpus);
185 setup_max_cpus = maxcpus;
186 early_iounmap(address, 4);
187#endif
188}
189
152void __init vsmp_init(void) 190void __init vsmp_init(void)
153{ 191{
154 detect_vsmp_box(); 192 detect_vsmp_box();
155 if (!is_vsmp_box()) 193 if (!is_vsmp_box())
156 return; 194 return;
157 195
196 vsmp_cap_cpus();
197
158 set_vsmp_pv_ops(); 198 set_vsmp_pv_ops();
159 return; 199 return;
160} 200}
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9cf71d0b2d3..35c5e543f55 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -18,6 +18,7 @@
18#include <asm/e820.h> 18#include <asm/e820.h>
19#include <asm/time.h> 19#include <asm/time.h>
20#include <asm/irq.h> 20#include <asm/irq.h>
21#include <asm/io_apic.h>
21#include <asm/pat.h> 22#include <asm/pat.h>
22#include <asm/tsc.h> 23#include <asm/tsc.h>
23#include <asm/iommu.h> 24#include <asm/iommu.h>
@@ -119,3 +120,10 @@ struct x86_msi_ops x86_msi = {
119 .teardown_msi_irqs = default_teardown_msi_irqs, 120 .teardown_msi_irqs = default_teardown_msi_irqs,
120 .restore_msi_irqs = default_restore_msi_irqs, 121 .restore_msi_irqs = default_restore_msi_irqs,
121}; 122};
123
124struct x86_io_apic_ops x86_io_apic_ops = {
125 .init = native_io_apic_init_mappings,
126 .read = native_io_apic_read,
127 .write = native_io_apic_write,
128 .modify = native_io_apic_modify,
129};
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index e62728e30b0..bd18149b2b0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -48,8 +48,6 @@ void __sanitize_i387_state(struct task_struct *tsk)
48 if (!fx) 48 if (!fx)
49 return; 49 return;
50 50
51 BUG_ON(__thread_has_fpu(tsk));
52
53 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 51 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
54 52
55 /* 53 /*