diff options
Diffstat (limited to 'arch/x86/kernel')
58 files changed, 1695 insertions, 1300 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 56ebd1f9844..bb8529275aa 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -49,7 +49,6 @@ obj-y += cpu/ | |||
49 | obj-y += acpi/ | 49 | obj-y += acpi/ |
50 | obj-y += reboot.o | 50 | obj-y += reboot.o |
51 | obj-$(CONFIG_X86_32) += reboot_32.o | 51 | obj-$(CONFIG_X86_32) += reboot_32.o |
52 | obj-$(CONFIG_MCA) += mca_32.o | ||
53 | obj-$(CONFIG_X86_MSR) += msr.o | 52 | obj-$(CONFIG_X86_MSR) += msr.o |
54 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 53 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
55 | obj-$(CONFIG_PCI) += early-quirks.o | 54 | obj-$(CONFIG_PCI) += early-quirks.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7c439fe4941..8afb6931981 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -990,7 +990,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
990 | int i; | 990 | int i; |
991 | struct mpc_intsrc mp_irq; | 991 | struct mpc_intsrc mp_irq; |
992 | 992 | ||
993 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 993 | #ifdef CONFIG_EISA |
994 | /* | 994 | /* |
995 | * Fabricate the legacy ISA bus (bus #31). | 995 | * Fabricate the legacy ISA bus (bus #31). |
996 | */ | 996 | */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3722179a49d..39a222e094a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1326,11 +1326,13 @@ void __cpuinit setup_local_APIC(void) | |||
1326 | acked); | 1326 | acked); |
1327 | break; | 1327 | break; |
1328 | } | 1328 | } |
1329 | if (cpu_has_tsc) { | 1329 | if (queued) { |
1330 | rdtscll(ntsc); | 1330 | if (cpu_has_tsc) { |
1331 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | 1331 | rdtscll(ntsc); |
1332 | } else | 1332 | max_loops = (cpu_khz << 10) - (ntsc - tsc); |
1333 | max_loops--; | 1333 | } else |
1334 | max_loops--; | ||
1335 | } | ||
1334 | } while (queued && max_loops > 0); | 1336 | } while (queued && max_loops > 0); |
1335 | WARN_ON(max_loops <= 0); | 1337 | WARN_ON(max_loops <= 0); |
1336 | 1338 | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 359b6899a36..0e881c46e8c 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -227,6 +227,7 @@ static struct apic apic_flat = { | |||
227 | 227 | ||
228 | .read = native_apic_mem_read, | 228 | .read = native_apic_mem_read, |
229 | .write = native_apic_mem_write, | 229 | .write = native_apic_mem_write, |
230 | .eoi_write = native_apic_mem_write, | ||
230 | .icr_read = native_apic_icr_read, | 231 | .icr_read = native_apic_icr_read, |
231 | .icr_write = native_apic_icr_write, | 232 | .icr_write = native_apic_icr_write, |
232 | .wait_icr_idle = native_apic_wait_icr_idle, | 233 | .wait_icr_idle = native_apic_wait_icr_idle, |
@@ -386,6 +387,7 @@ static struct apic apic_physflat = { | |||
386 | 387 | ||
387 | .read = native_apic_mem_read, | 388 | .read = native_apic_mem_read, |
388 | .write = native_apic_mem_write, | 389 | .write = native_apic_mem_write, |
390 | .eoi_write = native_apic_mem_write, | ||
389 | .icr_read = native_apic_icr_read, | 391 | .icr_read = native_apic_icr_read, |
390 | .icr_write = native_apic_icr_write, | 392 | .icr_write = native_apic_icr_write, |
391 | .wait_icr_idle = native_apic_wait_icr_idle, | 393 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 634ae6cdd5c..a6e4c6e06c0 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -181,6 +181,7 @@ struct apic apic_noop = { | |||
181 | 181 | ||
182 | .read = noop_apic_read, | 182 | .read = noop_apic_read, |
183 | .write = noop_apic_write, | 183 | .write = noop_apic_write, |
184 | .eoi_write = noop_apic_write, | ||
184 | .icr_read = noop_apic_icr_read, | 185 | .icr_read = noop_apic_icr_read, |
185 | .icr_write = noop_apic_icr_write, | 186 | .icr_write = noop_apic_icr_write, |
186 | .wait_icr_idle = noop_apic_wait_icr_idle, | 187 | .wait_icr_idle = noop_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 23e75422e01..6ec6d5d297c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -295,6 +295,7 @@ static struct apic apic_numachip __refconst = { | |||
295 | 295 | ||
296 | .read = native_apic_mem_read, | 296 | .read = native_apic_mem_read, |
297 | .write = native_apic_mem_write, | 297 | .write = native_apic_mem_write, |
298 | .eoi_write = native_apic_mem_write, | ||
298 | .icr_read = native_apic_icr_read, | 299 | .icr_read = native_apic_icr_read, |
299 | .icr_write = native_apic_icr_write, | 300 | .icr_write = native_apic_icr_write, |
300 | .wait_icr_idle = native_apic_wait_icr_idle, | 301 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 0cdec7065af..31fbdbfbf96 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -248,6 +248,7 @@ static struct apic apic_bigsmp = { | |||
248 | 248 | ||
249 | .read = native_apic_mem_read, | 249 | .read = native_apic_mem_read, |
250 | .write = native_apic_mem_write, | 250 | .write = native_apic_mem_write, |
251 | .eoi_write = native_apic_mem_write, | ||
251 | .icr_read = native_apic_icr_read, | 252 | .icr_read = native_apic_icr_read, |
252 | .icr_write = native_apic_icr_write, | 253 | .icr_write = native_apic_icr_write, |
253 | .wait_icr_idle = native_apic_wait_icr_idle, | 254 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index e42d1d3b913..db4ab1be3c7 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -678,6 +678,7 @@ static struct apic __refdata apic_es7000_cluster = { | |||
678 | 678 | ||
679 | .read = native_apic_mem_read, | 679 | .read = native_apic_mem_read, |
680 | .write = native_apic_mem_write, | 680 | .write = native_apic_mem_write, |
681 | .eoi_write = native_apic_mem_write, | ||
681 | .icr_read = native_apic_icr_read, | 682 | .icr_read = native_apic_icr_read, |
682 | .icr_write = native_apic_icr_write, | 683 | .icr_write = native_apic_icr_write, |
683 | .wait_icr_idle = native_apic_wait_icr_idle, | 684 | .wait_icr_idle = native_apic_wait_icr_idle, |
@@ -742,6 +743,7 @@ static struct apic __refdata apic_es7000 = { | |||
742 | 743 | ||
743 | .read = native_apic_mem_read, | 744 | .read = native_apic_mem_read, |
744 | .write = native_apic_mem_write, | 745 | .write = native_apic_mem_write, |
746 | .eoi_write = native_apic_mem_write, | ||
745 | .icr_read = native_apic_icr_read, | 747 | .icr_read = native_apic_icr_read, |
746 | .icr_write = native_apic_icr_write, | 748 | .icr_write = native_apic_icr_write, |
747 | .wait_icr_idle = native_apic_wait_icr_idle, | 749 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ef0648cd708..ac96561d1a9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -68,24 +68,6 @@ | |||
68 | #define for_each_irq_pin(entry, head) \ | 68 | #define for_each_irq_pin(entry, head) \ |
69 | for (entry = head; entry; entry = entry->next) | 69 | for (entry = head; entry; entry = entry->next) |
70 | 70 | ||
71 | static void __init __ioapic_init_mappings(void); | ||
72 | |||
73 | static unsigned int __io_apic_read (unsigned int apic, unsigned int reg); | ||
74 | static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val); | ||
75 | static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); | ||
76 | |||
77 | static struct io_apic_ops io_apic_ops = { | ||
78 | .init = __ioapic_init_mappings, | ||
79 | .read = __io_apic_read, | ||
80 | .write = __io_apic_write, | ||
81 | .modify = __io_apic_modify, | ||
82 | }; | ||
83 | |||
84 | void __init set_io_apic_ops(const struct io_apic_ops *ops) | ||
85 | { | ||
86 | io_apic_ops = *ops; | ||
87 | } | ||
88 | |||
89 | #ifdef CONFIG_IRQ_REMAP | 71 | #ifdef CONFIG_IRQ_REMAP |
90 | static void irq_remap_modify_chip_defaults(struct irq_chip *chip); | 72 | static void irq_remap_modify_chip_defaults(struct irq_chip *chip); |
91 | static inline bool irq_remapped(struct irq_cfg *cfg) | 73 | static inline bool irq_remapped(struct irq_cfg *cfg) |
@@ -158,7 +140,7 @@ int mp_irq_entries; | |||
158 | /* GSI interrupts */ | 140 | /* GSI interrupts */ |
159 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | 141 | static int nr_irqs_gsi = NR_IRQS_LEGACY; |
160 | 142 | ||
161 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 143 | #ifdef CONFIG_EISA |
162 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | 144 | int mp_bus_id_to_type[MAX_MP_BUSSES]; |
163 | #endif | 145 | #endif |
164 | 146 | ||
@@ -329,21 +311,6 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg) | |||
329 | irq_free_desc(at); | 311 | irq_free_desc(at); |
330 | } | 312 | } |
331 | 313 | ||
332 | static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) | ||
333 | { | ||
334 | return io_apic_ops.read(apic, reg); | ||
335 | } | ||
336 | |||
337 | static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) | ||
338 | { | ||
339 | io_apic_ops.write(apic, reg, value); | ||
340 | } | ||
341 | |||
342 | static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) | ||
343 | { | ||
344 | io_apic_ops.modify(apic, reg, value); | ||
345 | } | ||
346 | |||
347 | 314 | ||
348 | struct io_apic { | 315 | struct io_apic { |
349 | unsigned int index; | 316 | unsigned int index; |
@@ -365,14 +332,14 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector) | |||
365 | writel(vector, &io_apic->eoi); | 332 | writel(vector, &io_apic->eoi); |
366 | } | 333 | } |
367 | 334 | ||
368 | static unsigned int __io_apic_read(unsigned int apic, unsigned int reg) | 335 | unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) |
369 | { | 336 | { |
370 | struct io_apic __iomem *io_apic = io_apic_base(apic); | 337 | struct io_apic __iomem *io_apic = io_apic_base(apic); |
371 | writel(reg, &io_apic->index); | 338 | writel(reg, &io_apic->index); |
372 | return readl(&io_apic->data); | 339 | return readl(&io_apic->data); |
373 | } | 340 | } |
374 | 341 | ||
375 | static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) | 342 | void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) |
376 | { | 343 | { |
377 | struct io_apic __iomem *io_apic = io_apic_base(apic); | 344 | struct io_apic __iomem *io_apic = io_apic_base(apic); |
378 | 345 | ||
@@ -386,7 +353,7 @@ static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int va | |||
386 | * | 353 | * |
387 | * Older SiS APIC requires we rewrite the index register | 354 | * Older SiS APIC requires we rewrite the index register |
388 | */ | 355 | */ |
389 | static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) | 356 | void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) |
390 | { | 357 | { |
391 | struct io_apic __iomem *io_apic = io_apic_base(apic); | 358 | struct io_apic __iomem *io_apic = io_apic_base(apic); |
392 | 359 | ||
@@ -395,29 +362,6 @@ static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int v | |||
395 | writel(value, &io_apic->data); | 362 | writel(value, &io_apic->data); |
396 | } | 363 | } |
397 | 364 | ||
398 | static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | ||
399 | { | ||
400 | struct irq_pin_list *entry; | ||
401 | unsigned long flags; | ||
402 | |||
403 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
404 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
405 | unsigned int reg; | ||
406 | int pin; | ||
407 | |||
408 | pin = entry->pin; | ||
409 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | ||
410 | /* Is the remote IRR bit set? */ | ||
411 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | ||
412 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
413 | return true; | ||
414 | } | ||
415 | } | ||
416 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
417 | |||
418 | return false; | ||
419 | } | ||
420 | |||
421 | union entry_union { | 365 | union entry_union { |
422 | struct { u32 w1, w2; }; | 366 | struct { u32 w1, w2; }; |
423 | struct IO_APIC_route_entry entry; | 367 | struct IO_APIC_route_entry entry; |
@@ -891,7 +835,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
891 | return -1; | 835 | return -1; |
892 | } | 836 | } |
893 | 837 | ||
894 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 838 | #ifdef CONFIG_EISA |
895 | /* | 839 | /* |
896 | * EISA Edge/Level control register, ELCR | 840 | * EISA Edge/Level control register, ELCR |
897 | */ | 841 | */ |
@@ -928,12 +872,6 @@ static int EISA_ELCR(unsigned int irq) | |||
928 | #define default_PCI_trigger(idx) (1) | 872 | #define default_PCI_trigger(idx) (1) |
929 | #define default_PCI_polarity(idx) (1) | 873 | #define default_PCI_polarity(idx) (1) |
930 | 874 | ||
931 | /* MCA interrupts are always polarity zero level triggered, | ||
932 | * when listed as conforming in the MP table. */ | ||
933 | |||
934 | #define default_MCA_trigger(idx) (1) | ||
935 | #define default_MCA_polarity(idx) default_ISA_polarity(idx) | ||
936 | |||
937 | static int irq_polarity(int idx) | 875 | static int irq_polarity(int idx) |
938 | { | 876 | { |
939 | int bus = mp_irqs[idx].srcbus; | 877 | int bus = mp_irqs[idx].srcbus; |
@@ -991,7 +929,7 @@ static int irq_trigger(int idx) | |||
991 | trigger = default_ISA_trigger(idx); | 929 | trigger = default_ISA_trigger(idx); |
992 | else | 930 | else |
993 | trigger = default_PCI_trigger(idx); | 931 | trigger = default_PCI_trigger(idx); |
994 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 932 | #ifdef CONFIG_EISA |
995 | switch (mp_bus_id_to_type[bus]) { | 933 | switch (mp_bus_id_to_type[bus]) { |
996 | case MP_BUS_ISA: /* ISA pin */ | 934 | case MP_BUS_ISA: /* ISA pin */ |
997 | { | 935 | { |
@@ -1008,11 +946,6 @@ static int irq_trigger(int idx) | |||
1008 | /* set before the switch */ | 946 | /* set before the switch */ |
1009 | break; | 947 | break; |
1010 | } | 948 | } |
1011 | case MP_BUS_MCA: /* MCA pin */ | ||
1012 | { | ||
1013 | trigger = default_MCA_trigger(idx); | ||
1014 | break; | ||
1015 | } | ||
1016 | default: | 949 | default: |
1017 | { | 950 | { |
1018 | printk(KERN_WARNING "broken BIOS!!\n"); | 951 | printk(KERN_WARNING "broken BIOS!!\n"); |
@@ -2439,6 +2372,29 @@ static void ack_apic_edge(struct irq_data *data) | |||
2439 | atomic_t irq_mis_count; | 2372 | atomic_t irq_mis_count; |
2440 | 2373 | ||
2441 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2374 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2375 | static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | ||
2376 | { | ||
2377 | struct irq_pin_list *entry; | ||
2378 | unsigned long flags; | ||
2379 | |||
2380 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2381 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2382 | unsigned int reg; | ||
2383 | int pin; | ||
2384 | |||
2385 | pin = entry->pin; | ||
2386 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | ||
2387 | /* Is the remote IRR bit set? */ | ||
2388 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | ||
2389 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2390 | return true; | ||
2391 | } | ||
2392 | } | ||
2393 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2394 | |||
2395 | return false; | ||
2396 | } | ||
2397 | |||
2442 | static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) | 2398 | static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) |
2443 | { | 2399 | { |
2444 | /* If we are moving the irq we need to mask it */ | 2400 | /* If we are moving the irq we need to mask it */ |
@@ -3756,12 +3712,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) | |||
3756 | return res; | 3712 | return res; |
3757 | } | 3713 | } |
3758 | 3714 | ||
3759 | void __init ioapic_and_gsi_init(void) | 3715 | void __init native_io_apic_init_mappings(void) |
3760 | { | ||
3761 | io_apic_ops.init(); | ||
3762 | } | ||
3763 | |||
3764 | static void __init __ioapic_init_mappings(void) | ||
3765 | { | 3716 | { |
3766 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; | 3717 | unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; |
3767 | struct resource *ioapic_res; | 3718 | struct resource *ioapic_res; |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 00d2422ca7c..f00a68cca37 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -530,6 +530,7 @@ static struct apic __refdata apic_numaq = { | |||
530 | 530 | ||
531 | .read = native_apic_mem_read, | 531 | .read = native_apic_mem_read, |
532 | .write = native_apic_mem_write, | 532 | .write = native_apic_mem_write, |
533 | .eoi_write = native_apic_mem_write, | ||
533 | .icr_read = native_apic_icr_read, | 534 | .icr_read = native_apic_icr_read, |
534 | .icr_write = native_apic_icr_write, | 535 | .icr_write = native_apic_icr_write, |
535 | .wait_icr_idle = native_apic_wait_icr_idle, | 536 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index ff2c1b9aac4..1b291da09e6 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -142,6 +142,7 @@ static struct apic apic_default = { | |||
142 | 142 | ||
143 | .read = native_apic_mem_read, | 143 | .read = native_apic_mem_read, |
144 | .write = native_apic_mem_write, | 144 | .write = native_apic_mem_write, |
145 | .eoi_write = native_apic_mem_write, | ||
145 | .icr_read = native_apic_icr_read, | 146 | .icr_read = native_apic_icr_read, |
146 | .icr_write = native_apic_icr_write, | 147 | .icr_write = native_apic_icr_write, |
147 | .wait_icr_idle = native_apic_wait_icr_idle, | 148 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index fea000b27f0..659897c0075 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -546,6 +546,7 @@ static struct apic apic_summit = { | |||
546 | 546 | ||
547 | .read = native_apic_mem_read, | 547 | .read = native_apic_mem_read, |
548 | .write = native_apic_mem_write, | 548 | .write = native_apic_mem_write, |
549 | .eoi_write = native_apic_mem_write, | ||
549 | .icr_read = native_apic_icr_read, | 550 | .icr_read = native_apic_icr_read, |
550 | .icr_write = native_apic_icr_write, | 551 | .icr_write = native_apic_icr_write, |
551 | .wait_icr_idle = native_apic_wait_icr_idle, | 552 | .wait_icr_idle = native_apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 48f3103b3c9..ff35cff0e1a 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -260,6 +260,7 @@ static struct apic apic_x2apic_cluster = { | |||
260 | 260 | ||
261 | .read = native_apic_msr_read, | 261 | .read = native_apic_msr_read, |
262 | .write = native_apic_msr_write, | 262 | .write = native_apic_msr_write, |
263 | .eoi_write = native_apic_msr_eoi_write, | ||
263 | .icr_read = native_x2apic_icr_read, | 264 | .icr_read = native_x2apic_icr_read, |
264 | .icr_write = native_x2apic_icr_write, | 265 | .icr_write = native_x2apic_icr_write, |
265 | .wait_icr_idle = native_x2apic_wait_icr_idle, | 266 | .wait_icr_idle = native_x2apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 991e315f422..c17e982db27 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -172,6 +172,7 @@ static struct apic apic_x2apic_phys = { | |||
172 | 172 | ||
173 | .read = native_apic_msr_read, | 173 | .read = native_apic_msr_read, |
174 | .write = native_apic_msr_write, | 174 | .write = native_apic_msr_write, |
175 | .eoi_write = native_apic_msr_eoi_write, | ||
175 | .icr_read = native_x2apic_icr_read, | 176 | .icr_read = native_x2apic_icr_read, |
176 | .icr_write = native_x2apic_icr_write, | 177 | .icr_write = native_x2apic_icr_write, |
177 | .wait_icr_idle = native_x2apic_wait_icr_idle, | 178 | .wait_icr_idle = native_x2apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 87bfa69e216..c6d03f7a440 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -404,6 +404,7 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
404 | 404 | ||
405 | .read = native_apic_msr_read, | 405 | .read = native_apic_msr_read, |
406 | .write = native_apic_msr_write, | 406 | .write = native_apic_msr_write, |
407 | .eoi_write = native_apic_msr_eoi_write, | ||
407 | .icr_read = native_x2apic_icr_read, | 408 | .icr_read = native_x2apic_icr_read, |
408 | .icr_write = native_x2apic_icr_write, | 409 | .icr_write = native_x2apic_icr_write, |
409 | .wait_icr_idle = native_x2apic_wait_icr_idle, | 410 | .wait_icr_idle = native_x2apic_wait_icr_idle, |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index 5da1269e8dd..e2dbcb7dabd 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -27,21 +27,29 @@ static int num_scan_areas; | |||
27 | 27 | ||
28 | static __init int set_corruption_check(char *arg) | 28 | static __init int set_corruption_check(char *arg) |
29 | { | 29 | { |
30 | char *end; | 30 | ssize_t ret; |
31 | unsigned long val; | ||
31 | 32 | ||
32 | memory_corruption_check = simple_strtol(arg, &end, 10); | 33 | ret = kstrtoul(arg, 10, &val); |
34 | if (ret) | ||
35 | return ret; | ||
33 | 36 | ||
34 | return (*end == 0) ? 0 : -EINVAL; | 37 | memory_corruption_check = val; |
38 | return 0; | ||
35 | } | 39 | } |
36 | early_param("memory_corruption_check", set_corruption_check); | 40 | early_param("memory_corruption_check", set_corruption_check); |
37 | 41 | ||
38 | static __init int set_corruption_check_period(char *arg) | 42 | static __init int set_corruption_check_period(char *arg) |
39 | { | 43 | { |
40 | char *end; | 44 | ssize_t ret; |
45 | unsigned long val; | ||
41 | 46 | ||
42 | corruption_check_period = simple_strtoul(arg, &end, 10); | 47 | ret = kstrtoul(arg, 10, &val); |
48 | if (ret) | ||
49 | return ret; | ||
43 | 50 | ||
44 | return (*end == 0) ? 0 : -EINVAL; | 51 | corruption_check_period = val; |
52 | return 0; | ||
45 | } | 53 | } |
46 | early_param("memory_corruption_check_period", set_corruption_check_period); | 54 | early_param("memory_corruption_check_period", set_corruption_check_period); |
47 | 55 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cf79302198a..82f29e70d05 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1185,7 +1185,7 @@ void __cpuinit cpu_init(void) | |||
1185 | oist = &per_cpu(orig_ist, cpu); | 1185 | oist = &per_cpu(orig_ist, cpu); |
1186 | 1186 | ||
1187 | #ifdef CONFIG_NUMA | 1187 | #ifdef CONFIG_NUMA |
1188 | if (cpu != 0 && percpu_read(numa_node) == 0 && | 1188 | if (cpu != 0 && this_cpu_read(numa_node) == 0 && |
1189 | early_cpu_to_node(cpu) != NUMA_NO_NODE) | 1189 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1190 | set_numa_node(early_cpu_to_node(cpu)); | 1190 | set_numa_node(early_cpu_to_node(cpu)); |
1191 | #endif | 1191 | #endif |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b8f3653dddb..9a7c90d80bc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -615,14 +615,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
615 | new_l2 = this_leaf.size/1024; | 615 | new_l2 = this_leaf.size/1024; |
616 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 616 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
617 | index_msb = get_count_order(num_threads_sharing); | 617 | index_msb = get_count_order(num_threads_sharing); |
618 | l2_id = c->apicid >> index_msb; | 618 | l2_id = c->apicid & ~((1 << index_msb) - 1); |
619 | break; | 619 | break; |
620 | case 3: | 620 | case 3: |
621 | new_l3 = this_leaf.size/1024; | 621 | new_l3 = this_leaf.size/1024; |
622 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 622 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
623 | index_msb = get_count_order( | 623 | index_msb = get_count_order( |
624 | num_threads_sharing); | 624 | num_threads_sharing); |
625 | l3_id = c->apicid >> index_msb; | 625 | l3_id = c->apicid & ~((1 << index_msb) - 1); |
626 | break; | 626 | break; |
627 | default: | 627 | default: |
628 | break; | 628 | break; |
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 5502b289341..36565373af8 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) | 23 | * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) |
24 | * | 24 | * |
25 | * Arrays used to match for this should also be declared using | 25 | * Arrays used to match for this should also be declared using |
26 | * MODULE_DEVICE_TABLE(x86_cpu, ...) | 26 | * MODULE_DEVICE_TABLE(x86cpu, ...) |
27 | * | 27 | * |
28 | * This always matches against the boot cpu, assuming models and features are | 28 | * This always matches against the boot cpu, assuming models and features are |
29 | * consistent over all CPUs. | 29 | * consistent over all CPUs. |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 11c9166c333..2afcbd253e1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -583,7 +583,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
583 | struct mce m; | 583 | struct mce m; |
584 | int i; | 584 | int i; |
585 | 585 | ||
586 | percpu_inc(mce_poll_count); | 586 | this_cpu_inc(mce_poll_count); |
587 | 587 | ||
588 | mce_gather_info(&m, NULL); | 588 | mce_gather_info(&m, NULL); |
589 | 589 | ||
@@ -1017,7 +1017,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1017 | 1017 | ||
1018 | atomic_inc(&mce_entry); | 1018 | atomic_inc(&mce_entry); |
1019 | 1019 | ||
1020 | percpu_inc(mce_exception_count); | 1020 | this_cpu_inc(mce_exception_count); |
1021 | 1021 | ||
1022 | if (!banks) | 1022 | if (!banks) |
1023 | goto out; | 1023 | goto out; |
@@ -1431,6 +1431,43 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) | |||
1431 | */ | 1431 | */ |
1432 | if (c->x86 == 6 && banks > 0) | 1432 | if (c->x86 == 6 && banks > 0) |
1433 | mce_banks[0].ctl = 0; | 1433 | mce_banks[0].ctl = 0; |
1434 | |||
1435 | /* | ||
1436 | * Turn off MC4_MISC thresholding banks on those models since | ||
1437 | * they're not supported there. | ||
1438 | */ | ||
1439 | if (c->x86 == 0x15 && | ||
1440 | (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { | ||
1441 | int i; | ||
1442 | u64 val, hwcr; | ||
1443 | bool need_toggle; | ||
1444 | u32 msrs[] = { | ||
1445 | 0x00000413, /* MC4_MISC0 */ | ||
1446 | 0xc0000408, /* MC4_MISC1 */ | ||
1447 | }; | ||
1448 | |||
1449 | rdmsrl(MSR_K7_HWCR, hwcr); | ||
1450 | |||
1451 | /* McStatusWrEn has to be set */ | ||
1452 | need_toggle = !(hwcr & BIT(18)); | ||
1453 | |||
1454 | if (need_toggle) | ||
1455 | wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); | ||
1456 | |||
1457 | for (i = 0; i < ARRAY_SIZE(msrs); i++) { | ||
1458 | rdmsrl(msrs[i], val); | ||
1459 | |||
1460 | /* CntP bit set? */ | ||
1461 | if (val & BIT(62)) { | ||
1462 | val &= ~BIT(62); | ||
1463 | wrmsrl(msrs[i], val); | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1467 | /* restore old settings */ | ||
1468 | if (need_toggle) | ||
1469 | wrmsrl(MSR_K7_HWCR, hwcr); | ||
1470 | } | ||
1434 | } | 1471 | } |
1435 | 1472 | ||
1436 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 1473 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 99b57179f91..f4873a64f46 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -51,6 +51,7 @@ struct threshold_block { | |||
51 | unsigned int cpu; | 51 | unsigned int cpu; |
52 | u32 address; | 52 | u32 address; |
53 | u16 interrupt_enable; | 53 | u16 interrupt_enable; |
54 | bool interrupt_capable; | ||
54 | u16 threshold_limit; | 55 | u16 threshold_limit; |
55 | struct kobject kobj; | 56 | struct kobject kobj; |
56 | struct list_head miscj; | 57 | struct list_head miscj; |
@@ -83,6 +84,21 @@ struct thresh_restart { | |||
83 | u16 old_limit; | 84 | u16 old_limit; |
84 | }; | 85 | }; |
85 | 86 | ||
87 | static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits) | ||
88 | { | ||
89 | /* | ||
90 | * bank 4 supports APIC LVT interrupts implicitly since forever. | ||
91 | */ | ||
92 | if (bank == 4) | ||
93 | return true; | ||
94 | |||
95 | /* | ||
96 | * IntP: interrupt present; if this bit is set, the thresholding | ||
97 | * bank can generate APIC LVT interrupts | ||
98 | */ | ||
99 | return msr_high_bits & BIT(28); | ||
100 | } | ||
101 | |||
86 | static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) | 102 | static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) |
87 | { | 103 | { |
88 | int msr = (hi & MASK_LVTOFF_HI) >> 20; | 104 | int msr = (hi & MASK_LVTOFF_HI) >> 20; |
@@ -104,8 +120,10 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) | |||
104 | return 1; | 120 | return 1; |
105 | }; | 121 | }; |
106 | 122 | ||
107 | /* must be called with correct cpu affinity */ | 123 | /* |
108 | /* Called via smp_call_function_single() */ | 124 | * Called via smp_call_function_single(), must be called with correct |
125 | * cpu affinity. | ||
126 | */ | ||
109 | static void threshold_restart_bank(void *_tr) | 127 | static void threshold_restart_bank(void *_tr) |
110 | { | 128 | { |
111 | struct thresh_restart *tr = _tr; | 129 | struct thresh_restart *tr = _tr; |
@@ -128,6 +146,12 @@ static void threshold_restart_bank(void *_tr) | |||
128 | (new_count & THRESHOLD_MAX); | 146 | (new_count & THRESHOLD_MAX); |
129 | } | 147 | } |
130 | 148 | ||
149 | /* clear IntType */ | ||
150 | hi &= ~MASK_INT_TYPE_HI; | ||
151 | |||
152 | if (!tr->b->interrupt_capable) | ||
153 | goto done; | ||
154 | |||
131 | if (tr->set_lvt_off) { | 155 | if (tr->set_lvt_off) { |
132 | if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { | 156 | if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) { |
133 | /* set new lvt offset */ | 157 | /* set new lvt offset */ |
@@ -136,9 +160,10 @@ static void threshold_restart_bank(void *_tr) | |||
136 | } | 160 | } |
137 | } | 161 | } |
138 | 162 | ||
139 | tr->b->interrupt_enable ? | 163 | if (tr->b->interrupt_enable) |
140 | (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 164 | hi |= INT_TYPE_APIC; |
141 | (hi &= ~MASK_INT_TYPE_HI); | 165 | |
166 | done: | ||
142 | 167 | ||
143 | hi |= MASK_COUNT_EN_HI; | 168 | hi |= MASK_COUNT_EN_HI; |
144 | wrmsr(tr->b->address, lo, hi); | 169 | wrmsr(tr->b->address, lo, hi); |
@@ -202,14 +227,17 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
202 | if (shared_bank[bank] && c->cpu_core_id) | 227 | if (shared_bank[bank] && c->cpu_core_id) |
203 | break; | 228 | break; |
204 | 229 | ||
205 | offset = setup_APIC_mce(offset, | ||
206 | (high & MASK_LVTOFF_HI) >> 20); | ||
207 | |||
208 | memset(&b, 0, sizeof(b)); | 230 | memset(&b, 0, sizeof(b)); |
209 | b.cpu = cpu; | 231 | b.cpu = cpu; |
210 | b.bank = bank; | 232 | b.bank = bank; |
211 | b.block = block; | 233 | b.block = block; |
212 | b.address = address; | 234 | b.address = address; |
235 | b.interrupt_capable = lvt_interrupt_supported(bank, high); | ||
236 | |||
237 | if (b.interrupt_capable) { | ||
238 | int new = (high & MASK_LVTOFF_HI) >> 20; | ||
239 | offset = setup_APIC_mce(offset, new); | ||
240 | } | ||
213 | 241 | ||
214 | mce_threshold_block_init(&b, offset); | 242 | mce_threshold_block_init(&b, offset); |
215 | mce_threshold_vector = amd_threshold_interrupt; | 243 | mce_threshold_vector = amd_threshold_interrupt; |
@@ -309,6 +337,9 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) | |||
309 | struct thresh_restart tr; | 337 | struct thresh_restart tr; |
310 | unsigned long new; | 338 | unsigned long new; |
311 | 339 | ||
340 | if (!b->interrupt_capable) | ||
341 | return -EINVAL; | ||
342 | |||
312 | if (strict_strtoul(buf, 0, &new) < 0) | 343 | if (strict_strtoul(buf, 0, &new) < 0) |
313 | return -EINVAL; | 344 | return -EINVAL; |
314 | 345 | ||
@@ -390,10 +421,10 @@ RW_ATTR(threshold_limit); | |||
390 | RW_ATTR(error_count); | 421 | RW_ATTR(error_count); |
391 | 422 | ||
392 | static struct attribute *default_attrs[] = { | 423 | static struct attribute *default_attrs[] = { |
393 | &interrupt_enable.attr, | ||
394 | &threshold_limit.attr, | 424 | &threshold_limit.attr, |
395 | &error_count.attr, | 425 | &error_count.attr, |
396 | NULL | 426 | NULL, /* possibly interrupt_enable if supported, see below */ |
427 | NULL, | ||
397 | }; | 428 | }; |
398 | 429 | ||
399 | #define to_block(k) container_of(k, struct threshold_block, kobj) | 430 | #define to_block(k) container_of(k, struct threshold_block, kobj) |
@@ -467,8 +498,14 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
467 | b->cpu = cpu; | 498 | b->cpu = cpu; |
468 | b->address = address; | 499 | b->address = address; |
469 | b->interrupt_enable = 0; | 500 | b->interrupt_enable = 0; |
501 | b->interrupt_capable = lvt_interrupt_supported(bank, high); | ||
470 | b->threshold_limit = THRESHOLD_MAX; | 502 | b->threshold_limit = THRESHOLD_MAX; |
471 | 503 | ||
504 | if (b->interrupt_capable) | ||
505 | threshold_ktype.default_attrs[2] = &interrupt_enable.attr; | ||
506 | else | ||
507 | threshold_ktype.default_attrs[2] = NULL; | ||
508 | |||
472 | INIT_LIST_HEAD(&b->miscj); | 509 | INIT_LIST_HEAD(&b->miscj); |
473 | 510 | ||
474 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) { | 511 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) { |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bb8e03407e1..e049d6da018 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
484 | 484 | ||
485 | /* mark unused */ | 485 | /* mark unused */ |
486 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 486 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
487 | |||
488 | /* mark not used */ | ||
489 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
490 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.branch_reg.idx = EXTRA_REG_NONE; |
491 | 488 | ||
492 | return x86_pmu.hw_config(event); | 489 | return x86_pmu.hw_config(event); |
@@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1186 | int idx, handled = 0; | 1183 | int idx, handled = 0; |
1187 | u64 val; | 1184 | u64 val; |
1188 | 1185 | ||
1189 | perf_sample_data_init(&data, 0); | ||
1190 | |||
1191 | cpuc = &__get_cpu_var(cpu_hw_events); | 1186 | cpuc = &__get_cpu_var(cpu_hw_events); |
1192 | 1187 | ||
1193 | /* | 1188 | /* |
@@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1222 | * event overflow | 1217 | * event overflow |
1223 | */ | 1218 | */ |
1224 | handled++; | 1219 | handled++; |
1225 | data.period = event->hw.last_period; | 1220 | perf_sample_data_init(&data, 0, event->hw.last_period); |
1226 | 1221 | ||
1227 | if (!x86_perf_event_set_period(event)) | 1222 | if (!x86_perf_event_set_period(event)) |
1228 | continue; | 1223 | continue; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 95e7fe1c5f0..11a4eb9131d 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event) | |||
134 | 134 | ||
135 | static int amd_pmu_hw_config(struct perf_event *event) | 135 | static int amd_pmu_hw_config(struct perf_event *event) |
136 | { | 136 | { |
137 | int ret = x86_pmu_hw_config(event); | 137 | int ret; |
138 | 138 | ||
139 | /* pass precise event sampling to ibs: */ | ||
140 | if (event->attr.precise_ip && get_ibs_caps()) | ||
141 | return -ENOENT; | ||
142 | |||
143 | ret = x86_pmu_hw_config(event); | ||
139 | if (ret) | 144 | if (ret) |
140 | return ret; | 145 | return ret; |
141 | 146 | ||
@@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
205 | * when we come here | 210 | * when we come here |
206 | */ | 211 | */ |
207 | for (i = 0; i < x86_pmu.num_counters; i++) { | 212 | for (i = 0; i < x86_pmu.num_counters; i++) { |
208 | if (nb->owners[i] == event) { | 213 | if (cmpxchg(nb->owners + i, event, NULL) == event) |
209 | cmpxchg(nb->owners+i, event, NULL); | ||
210 | break; | 214 | break; |
211 | } | ||
212 | } | 215 | } |
213 | } | 216 | } |
214 | 217 | ||
@@ -493,6 +496,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
493 | * 0x023 DE PERF_CTL[2:0] | 496 | * 0x023 DE PERF_CTL[2:0] |
494 | * 0x02D LS PERF_CTL[3] | 497 | * 0x02D LS PERF_CTL[3] |
495 | * 0x02E LS PERF_CTL[3,0] | 498 | * 0x02E LS PERF_CTL[3,0] |
499 | * 0x031 LS PERF_CTL[2:0] (**) | ||
496 | * 0x043 CU PERF_CTL[2:0] | 500 | * 0x043 CU PERF_CTL[2:0] |
497 | * 0x045 CU PERF_CTL[2:0] | 501 | * 0x045 CU PERF_CTL[2:0] |
498 | * 0x046 CU PERF_CTL[2:0] | 502 | * 0x046 CU PERF_CTL[2:0] |
@@ -506,10 +510,12 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
506 | * 0x0DD LS PERF_CTL[5:0] | 510 | * 0x0DD LS PERF_CTL[5:0] |
507 | * 0x0DE LS PERF_CTL[5:0] | 511 | * 0x0DE LS PERF_CTL[5:0] |
508 | * 0x0DF LS PERF_CTL[5:0] | 512 | * 0x0DF LS PERF_CTL[5:0] |
513 | * 0x1C0 EX PERF_CTL[5:3] | ||
509 | * 0x1D6 EX PERF_CTL[5:0] | 514 | * 0x1D6 EX PERF_CTL[5:0] |
510 | * 0x1D8 EX PERF_CTL[5:0] | 515 | * 0x1D8 EX PERF_CTL[5:0] |
511 | * | 516 | * |
512 | * (*) depending on the umask all FPU counters may be used | 517 | * (*) depending on the umask all FPU counters may be used |
518 | * (**) only one unitmask enabled at a time | ||
513 | */ | 519 | */ |
514 | 520 | ||
515 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 521 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
@@ -559,6 +565,12 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev | |||
559 | return &amd_f15_PMC3; | 565 | return &amd_f15_PMC3; |
560 | case 0x02E: | 566 | case 0x02E: |
561 | return &amd_f15_PMC30; | 567 | return &amd_f15_PMC30; |
568 | case 0x031: | ||
569 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) | ||
570 | return &amd_f15_PMC20; | ||
571 | return &emptyconstraint; | ||
572 | case 0x1C0: | ||
573 | return &amd_f15_PMC53; | ||
562 | default: | 574 | default: |
563 | return &amd_f15_PMC50; | 575 | return &amd_f15_PMC50; |
564 | } | 576 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 3b8a2d30d14..da9bcdcd985 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/perf_event.h> | 9 | #include <linux/perf_event.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/pci.h> | 11 | #include <linux/pci.h> |
12 | #include <linux/ptrace.h> | ||
12 | 13 | ||
13 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
14 | 15 | ||
@@ -16,36 +17,591 @@ static u32 ibs_caps; | |||
16 | 17 | ||
17 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | 18 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) |
18 | 19 | ||
19 | static struct pmu perf_ibs; | 20 | #include <linux/kprobes.h> |
21 | #include <linux/hardirq.h> | ||
22 | |||
23 | #include <asm/nmi.h> | ||
24 | |||
25 | #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) | ||
26 | #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT | ||
27 | |||
28 | enum ibs_states { | ||
29 | IBS_ENABLED = 0, | ||
30 | IBS_STARTED = 1, | ||
31 | IBS_STOPPING = 2, | ||
32 | |||
33 | IBS_MAX_STATES, | ||
34 | }; | ||
35 | |||
36 | struct cpu_perf_ibs { | ||
37 | struct perf_event *event; | ||
38 | unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; | ||
39 | }; | ||
40 | |||
41 | struct perf_ibs { | ||
42 | struct pmu pmu; | ||
43 | unsigned int msr; | ||
44 | u64 config_mask; | ||
45 | u64 cnt_mask; | ||
46 | u64 enable_mask; | ||
47 | u64 valid_mask; | ||
48 | u64 max_period; | ||
49 | unsigned long offset_mask[1]; | ||
50 | int offset_max; | ||
51 | struct cpu_perf_ibs __percpu *pcpu; | ||
52 | u64 (*get_count)(u64 config); | ||
53 | }; | ||
54 | |||
55 | struct perf_ibs_data { | ||
56 | u32 size; | ||
57 | union { | ||
58 | u32 data[0]; /* data buffer starts here */ | ||
59 | u32 caps; | ||
60 | }; | ||
61 | u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; | ||
62 | }; | ||
63 | |||
64 | static int | ||
65 | perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) | ||
66 | { | ||
67 | s64 left = local64_read(&hwc->period_left); | ||
68 | s64 period = hwc->sample_period; | ||
69 | int overflow = 0; | ||
70 | |||
71 | /* | ||
72 | * If we are way outside a reasonable range then just skip forward: | ||
73 | */ | ||
74 | if (unlikely(left <= -period)) { | ||
75 | left = period; | ||
76 | local64_set(&hwc->period_left, left); | ||
77 | hwc->last_period = period; | ||
78 | overflow = 1; | ||
79 | } | ||
80 | |||
81 | if (unlikely(left < (s64)min)) { | ||
82 | left += period; | ||
83 | local64_set(&hwc->period_left, left); | ||
84 | hwc->last_period = period; | ||
85 | overflow = 1; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * If the hw period that triggers the sw overflow is too short | ||
90 | * we might hit the irq handler. This biases the results. | ||
91 | * Thus we shorten the next-to-last period and set the last | ||
92 | * period to the max period. | ||
93 | */ | ||
94 | if (left > max) { | ||
95 | left -= max; | ||
96 | if (left > max) | ||
97 | left = max; | ||
98 | else if (left < min) | ||
99 | left = min; | ||
100 | } | ||
101 | |||
102 | *hw_period = (u64)left; | ||
103 | |||
104 | return overflow; | ||
105 | } | ||
106 | |||
107 | static int | ||
108 | perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) | ||
109 | { | ||
110 | struct hw_perf_event *hwc = &event->hw; | ||
111 | int shift = 64 - width; | ||
112 | u64 prev_raw_count; | ||
113 | u64 delta; | ||
114 | |||
115 | /* | ||
116 | * Careful: an NMI might modify the previous event value. | ||
117 | * | ||
118 | * Our tactic to handle this is to first atomically read and | ||
119 | * exchange a new raw count - then add that new-prev delta | ||
120 | * count to the generic event atomically: | ||
121 | */ | ||
122 | prev_raw_count = local64_read(&hwc->prev_count); | ||
123 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
124 | new_raw_count) != prev_raw_count) | ||
125 | return 0; | ||
126 | |||
127 | /* | ||
128 | * Now we have the new raw value and have updated the prev | ||
129 | * timestamp already. We can now calculate the elapsed delta | ||
130 | * (event-)time and add that to the generic event. | ||
131 | * | ||
132 | * Careful, not all hw sign-extends above the physical width | ||
133 | * of the count. | ||
134 | */ | ||
135 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
136 | delta >>= shift; | ||
137 | |||
138 | local64_add(delta, &event->count); | ||
139 | local64_sub(delta, &hwc->period_left); | ||
140 | |||
141 | return 1; | ||
142 | } | ||
143 | |||
144 | static struct perf_ibs perf_ibs_fetch; | ||
145 | static struct perf_ibs perf_ibs_op; | ||
146 | |||
147 | static struct perf_ibs *get_ibs_pmu(int type) | ||
148 | { | ||
149 | if (perf_ibs_fetch.pmu.type == type) | ||
150 | return &perf_ibs_fetch; | ||
151 | if (perf_ibs_op.pmu.type == type) | ||
152 | return &perf_ibs_op; | ||
153 | return NULL; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Use IBS for precise event sampling: | ||
158 | * | ||
159 | * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count | ||
160 | * perf record -a -e r076:p ... # same as -e cpu-cycles:p | ||
161 | * perf record -a -e r0C1:p ... # use ibs op counting micro-ops | ||
162 | * | ||
163 | * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, | ||
164 | * MSRC001_1033) is used to select either cycle or micro-ops counting | ||
165 | * mode. | ||
166 | * | ||
167 | * The rip of IBS samples has skid 0. Thus, IBS supports precise | ||
168 | * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the | ||
169 | * rip is invalid when IBS was not able to record the rip correctly. | ||
170 | * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. | ||
171 | * | ||
172 | */ | ||
173 | static int perf_ibs_precise_event(struct perf_event *event, u64 *config) | ||
174 | { | ||
175 | switch (event->attr.precise_ip) { | ||
176 | case 0: | ||
177 | return -ENOENT; | ||
178 | case 1: | ||
179 | case 2: | ||
180 | break; | ||
181 | default: | ||
182 | return -EOPNOTSUPP; | ||
183 | } | ||
184 | |||
185 | switch (event->attr.type) { | ||
186 | case PERF_TYPE_HARDWARE: | ||
187 | switch (event->attr.config) { | ||
188 | case PERF_COUNT_HW_CPU_CYCLES: | ||
189 | *config = 0; | ||
190 | return 0; | ||
191 | } | ||
192 | break; | ||
193 | case PERF_TYPE_RAW: | ||
194 | switch (event->attr.config) { | ||
195 | case 0x0076: | ||
196 | *config = 0; | ||
197 | return 0; | ||
198 | case 0x00C1: | ||
199 | *config = IBS_OP_CNT_CTL; | ||
200 | return 0; | ||
201 | } | ||
202 | break; | ||
203 | default: | ||
204 | return -ENOENT; | ||
205 | } | ||
206 | |||
207 | return -EOPNOTSUPP; | ||
208 | } | ||
20 | 209 | ||
21 | static int perf_ibs_init(struct perf_event *event) | 210 | static int perf_ibs_init(struct perf_event *event) |
22 | { | 211 | { |
23 | if (perf_ibs.type != event->attr.type) | 212 | struct hw_perf_event *hwc = &event->hw; |
213 | struct perf_ibs *perf_ibs; | ||
214 | u64 max_cnt, config; | ||
215 | int ret; | ||
216 | |||
217 | perf_ibs = get_ibs_pmu(event->attr.type); | ||
218 | if (perf_ibs) { | ||
219 | config = event->attr.config; | ||
220 | } else { | ||
221 | perf_ibs = &perf_ibs_op; | ||
222 | ret = perf_ibs_precise_event(event, &config); | ||
223 | if (ret) | ||
224 | return ret; | ||
225 | } | ||
226 | |||
227 | if (event->pmu != &perf_ibs->pmu) | ||
24 | return -ENOENT; | 228 | return -ENOENT; |
229 | |||
230 | if (config & ~perf_ibs->config_mask) | ||
231 | return -EINVAL; | ||
232 | |||
233 | if (hwc->sample_period) { | ||
234 | if (config & perf_ibs->cnt_mask) | ||
235 | /* raw max_cnt may not be set */ | ||
236 | return -EINVAL; | ||
237 | if (!event->attr.sample_freq && hwc->sample_period & 0x0f) | ||
238 | /* | ||
239 | * lower 4 bits can not be set in ibs max cnt, | ||
240 | * but allowing it in case we adjust the | ||
241 | * sample period to set a frequency. | ||
242 | */ | ||
243 | return -EINVAL; | ||
244 | hwc->sample_period &= ~0x0FULL; | ||
245 | if (!hwc->sample_period) | ||
246 | hwc->sample_period = 0x10; | ||
247 | } else { | ||
248 | max_cnt = config & perf_ibs->cnt_mask; | ||
249 | config &= ~perf_ibs->cnt_mask; | ||
250 | event->attr.sample_period = max_cnt << 4; | ||
251 | hwc->sample_period = event->attr.sample_period; | ||
252 | } | ||
253 | |||
254 | if (!hwc->sample_period) | ||
255 | return -EINVAL; | ||
256 | |||
257 | /* | ||
258 | * If we modify hwc->sample_period, we also need to update | ||
259 | * hwc->last_period and hwc->period_left. | ||
260 | */ | ||
261 | hwc->last_period = hwc->sample_period; | ||
262 | local64_set(&hwc->period_left, hwc->sample_period); | ||
263 | |||
264 | hwc->config_base = perf_ibs->msr; | ||
265 | hwc->config = config; | ||
266 | |||
25 | return 0; | 267 | return 0; |
26 | } | 268 | } |
27 | 269 | ||
270 | static int perf_ibs_set_period(struct perf_ibs *perf_ibs, | ||
271 | struct hw_perf_event *hwc, u64 *period) | ||
272 | { | ||
273 | int overflow; | ||
274 | |||
275 | /* ignore lower 4 bits in min count: */ | ||
276 | overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); | ||
277 | local64_set(&hwc->prev_count, 0); | ||
278 | |||
279 | return overflow; | ||
280 | } | ||
281 | |||
282 | static u64 get_ibs_fetch_count(u64 config) | ||
283 | { | ||
284 | return (config & IBS_FETCH_CNT) >> 12; | ||
285 | } | ||
286 | |||
287 | static u64 get_ibs_op_count(u64 config) | ||
288 | { | ||
289 | u64 count = 0; | ||
290 | |||
291 | if (config & IBS_OP_VAL) | ||
292 | count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ | ||
293 | |||
294 | if (ibs_caps & IBS_CAPS_RDWROPCNT) | ||
295 | count += (config & IBS_OP_CUR_CNT) >> 32; | ||
296 | |||
297 | return count; | ||
298 | } | ||
299 | |||
300 | static void | ||
301 | perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, | ||
302 | u64 *config) | ||
303 | { | ||
304 | u64 count = perf_ibs->get_count(*config); | ||
305 | |||
306 | /* | ||
307 | * Set width to 64 since we do not overflow on max width but | ||
308 | * instead on max count. In perf_ibs_set_period() we clear | ||
309 | * prev count manually on overflow. | ||
310 | */ | ||
311 | while (!perf_event_try_update(event, count, 64)) { | ||
312 | rdmsrl(event->hw.config_base, *config); | ||
313 | count = perf_ibs->get_count(*config); | ||
314 | } | ||
315 | } | ||
316 | |||
317 | static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, | ||
318 | struct hw_perf_event *hwc, u64 config) | ||
319 | { | ||
320 | wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Erratum #420 Instruction-Based Sampling Engine May Generate | ||
325 | * Interrupt that Cannot Be Cleared: | ||
326 | * | ||
327 | * Must clear counter mask first, then clear the enable bit. See | ||
328 | * Revision Guide for AMD Family 10h Processors, Publication #41322. | ||
329 | */ | ||
330 | static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, | ||
331 | struct hw_perf_event *hwc, u64 config) | ||
332 | { | ||
333 | config &= ~perf_ibs->cnt_mask; | ||
334 | wrmsrl(hwc->config_base, config); | ||
335 | config &= ~perf_ibs->enable_mask; | ||
336 | wrmsrl(hwc->config_base, config); | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * We cannot restore the ibs pmu state, so we always needs to update | ||
341 | * the event while stopping it and then reset the state when starting | ||
342 | * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in | ||
343 | * perf_ibs_start()/perf_ibs_stop() and instead always do it. | ||
344 | */ | ||
345 | static void perf_ibs_start(struct perf_event *event, int flags) | ||
346 | { | ||
347 | struct hw_perf_event *hwc = &event->hw; | ||
348 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
349 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
350 | u64 period; | ||
351 | |||
352 | if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) | ||
353 | return; | ||
354 | |||
355 | WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); | ||
356 | hwc->state = 0; | ||
357 | |||
358 | perf_ibs_set_period(perf_ibs, hwc, &period); | ||
359 | set_bit(IBS_STARTED, pcpu->state); | ||
360 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | ||
361 | |||
362 | perf_event_update_userpage(event); | ||
363 | } | ||
364 | |||
365 | static void perf_ibs_stop(struct perf_event *event, int flags) | ||
366 | { | ||
367 | struct hw_perf_event *hwc = &event->hw; | ||
368 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
369 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
370 | u64 config; | ||
371 | int stopping; | ||
372 | |||
373 | stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); | ||
374 | |||
375 | if (!stopping && (hwc->state & PERF_HES_UPTODATE)) | ||
376 | return; | ||
377 | |||
378 | rdmsrl(hwc->config_base, config); | ||
379 | |||
380 | if (stopping) { | ||
381 | set_bit(IBS_STOPPING, pcpu->state); | ||
382 | perf_ibs_disable_event(perf_ibs, hwc, config); | ||
383 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
384 | hwc->state |= PERF_HES_STOPPED; | ||
385 | } | ||
386 | |||
387 | if (hwc->state & PERF_HES_UPTODATE) | ||
388 | return; | ||
389 | |||
390 | /* | ||
391 | * Clear valid bit to not count rollovers on update, rollovers | ||
392 | * are only updated in the irq handler. | ||
393 | */ | ||
394 | config &= ~perf_ibs->valid_mask; | ||
395 | |||
396 | perf_ibs_event_update(perf_ibs, event, &config); | ||
397 | hwc->state |= PERF_HES_UPTODATE; | ||
398 | } | ||
399 | |||
28 | static int perf_ibs_add(struct perf_event *event, int flags) | 400 | static int perf_ibs_add(struct perf_event *event, int flags) |
29 | { | 401 | { |
402 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
403 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
404 | |||
405 | if (test_and_set_bit(IBS_ENABLED, pcpu->state)) | ||
406 | return -ENOSPC; | ||
407 | |||
408 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
409 | |||
410 | pcpu->event = event; | ||
411 | |||
412 | if (flags & PERF_EF_START) | ||
413 | perf_ibs_start(event, PERF_EF_RELOAD); | ||
414 | |||
30 | return 0; | 415 | return 0; |
31 | } | 416 | } |
32 | 417 | ||
33 | static void perf_ibs_del(struct perf_event *event, int flags) | 418 | static void perf_ibs_del(struct perf_event *event, int flags) |
34 | { | 419 | { |
420 | struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); | ||
421 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
422 | |||
423 | if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) | ||
424 | return; | ||
425 | |||
426 | perf_ibs_stop(event, PERF_EF_UPDATE); | ||
427 | |||
428 | pcpu->event = NULL; | ||
429 | |||
430 | perf_event_update_userpage(event); | ||
35 | } | 431 | } |
36 | 432 | ||
37 | static struct pmu perf_ibs = { | 433 | static void perf_ibs_read(struct perf_event *event) { } |
38 | .event_init= perf_ibs_init, | 434 | |
39 | .add= perf_ibs_add, | 435 | static struct perf_ibs perf_ibs_fetch = { |
40 | .del= perf_ibs_del, | 436 | .pmu = { |
437 | .task_ctx_nr = perf_invalid_context, | ||
438 | |||
439 | .event_init = perf_ibs_init, | ||
440 | .add = perf_ibs_add, | ||
441 | .del = perf_ibs_del, | ||
442 | .start = perf_ibs_start, | ||
443 | .stop = perf_ibs_stop, | ||
444 | .read = perf_ibs_read, | ||
445 | }, | ||
446 | .msr = MSR_AMD64_IBSFETCHCTL, | ||
447 | .config_mask = IBS_FETCH_CONFIG_MASK, | ||
448 | .cnt_mask = IBS_FETCH_MAX_CNT, | ||
449 | .enable_mask = IBS_FETCH_ENABLE, | ||
450 | .valid_mask = IBS_FETCH_VAL, | ||
451 | .max_period = IBS_FETCH_MAX_CNT << 4, | ||
452 | .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, | ||
453 | .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, | ||
454 | |||
455 | .get_count = get_ibs_fetch_count, | ||
41 | }; | 456 | }; |
42 | 457 | ||
458 | static struct perf_ibs perf_ibs_op = { | ||
459 | .pmu = { | ||
460 | .task_ctx_nr = perf_invalid_context, | ||
461 | |||
462 | .event_init = perf_ibs_init, | ||
463 | .add = perf_ibs_add, | ||
464 | .del = perf_ibs_del, | ||
465 | .start = perf_ibs_start, | ||
466 | .stop = perf_ibs_stop, | ||
467 | .read = perf_ibs_read, | ||
468 | }, | ||
469 | .msr = MSR_AMD64_IBSOPCTL, | ||
470 | .config_mask = IBS_OP_CONFIG_MASK, | ||
471 | .cnt_mask = IBS_OP_MAX_CNT, | ||
472 | .enable_mask = IBS_OP_ENABLE, | ||
473 | .valid_mask = IBS_OP_VAL, | ||
474 | .max_period = IBS_OP_MAX_CNT << 4, | ||
475 | .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, | ||
476 | .offset_max = MSR_AMD64_IBSOP_REG_COUNT, | ||
477 | |||
478 | .get_count = get_ibs_op_count, | ||
479 | }; | ||
480 | |||
481 | static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) | ||
482 | { | ||
483 | struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); | ||
484 | struct perf_event *event = pcpu->event; | ||
485 | struct hw_perf_event *hwc = &event->hw; | ||
486 | struct perf_sample_data data; | ||
487 | struct perf_raw_record raw; | ||
488 | struct pt_regs regs; | ||
489 | struct perf_ibs_data ibs_data; | ||
490 | int offset, size, check_rip, offset_max, throttle = 0; | ||
491 | unsigned int msr; | ||
492 | u64 *buf, *config, period; | ||
493 | |||
494 | if (!test_bit(IBS_STARTED, pcpu->state)) { | ||
495 | /* | ||
496 | * Catch spurious interrupts after stopping IBS: After | ||
497 | * disabling IBS there could be still incomming NMIs | ||
498 | * with samples that even have the valid bit cleared. | ||
499 | * Mark all this NMIs as handled. | ||
500 | */ | ||
501 | return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0; | ||
502 | } | ||
503 | |||
504 | msr = hwc->config_base; | ||
505 | buf = ibs_data.regs; | ||
506 | rdmsrl(msr, *buf); | ||
507 | if (!(*buf++ & perf_ibs->valid_mask)) | ||
508 | return 0; | ||
509 | |||
510 | config = &ibs_data.regs[0]; | ||
511 | perf_ibs_event_update(perf_ibs, event, config); | ||
512 | perf_sample_data_init(&data, 0, hwc->last_period); | ||
513 | if (!perf_ibs_set_period(perf_ibs, hwc, &period)) | ||
514 | goto out; /* no sw counter overflow */ | ||
515 | |||
516 | ibs_data.caps = ibs_caps; | ||
517 | size = 1; | ||
518 | offset = 1; | ||
519 | check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); | ||
520 | if (event->attr.sample_type & PERF_SAMPLE_RAW) | ||
521 | offset_max = perf_ibs->offset_max; | ||
522 | else if (check_rip) | ||
523 | offset_max = 2; | ||
524 | else | ||
525 | offset_max = 1; | ||
526 | do { | ||
527 | rdmsrl(msr + offset, *buf++); | ||
528 | size++; | ||
529 | offset = find_next_bit(perf_ibs->offset_mask, | ||
530 | perf_ibs->offset_max, | ||
531 | offset + 1); | ||
532 | } while (offset < offset_max); | ||
533 | ibs_data.size = sizeof(u64) * size; | ||
534 | |||
535 | regs = *iregs; | ||
536 | if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { | ||
537 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
538 | } else { | ||
539 | instruction_pointer_set(®s, ibs_data.regs[1]); | ||
540 | regs.flags |= PERF_EFLAGS_EXACT; | ||
541 | } | ||
542 | |||
543 | if (event->attr.sample_type & PERF_SAMPLE_RAW) { | ||
544 | raw.size = sizeof(u32) + ibs_data.size; | ||
545 | raw.data = ibs_data.data; | ||
546 | data.raw = &raw; | ||
547 | } | ||
548 | |||
549 | throttle = perf_event_overflow(event, &data, ®s); | ||
550 | out: | ||
551 | if (throttle) | ||
552 | perf_ibs_disable_event(perf_ibs, hwc, *config); | ||
553 | else | ||
554 | perf_ibs_enable_event(perf_ibs, hwc, period >> 4); | ||
555 | |||
556 | perf_event_update_userpage(event); | ||
557 | |||
558 | return 1; | ||
559 | } | ||
560 | |||
561 | static int __kprobes | ||
562 | perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) | ||
563 | { | ||
564 | int handled = 0; | ||
565 | |||
566 | handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); | ||
567 | handled += perf_ibs_handle_irq(&perf_ibs_op, regs); | ||
568 | |||
569 | if (handled) | ||
570 | inc_irq_stat(apic_perf_irqs); | ||
571 | |||
572 | return handled; | ||
573 | } | ||
574 | |||
575 | static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) | ||
576 | { | ||
577 | struct cpu_perf_ibs __percpu *pcpu; | ||
578 | int ret; | ||
579 | |||
580 | pcpu = alloc_percpu(struct cpu_perf_ibs); | ||
581 | if (!pcpu) | ||
582 | return -ENOMEM; | ||
583 | |||
584 | perf_ibs->pcpu = pcpu; | ||
585 | |||
586 | ret = perf_pmu_register(&perf_ibs->pmu, name, -1); | ||
587 | if (ret) { | ||
588 | perf_ibs->pcpu = NULL; | ||
589 | free_percpu(pcpu); | ||
590 | } | ||
591 | |||
592 | return ret; | ||
593 | } | ||
594 | |||
43 | static __init int perf_event_ibs_init(void) | 595 | static __init int perf_event_ibs_init(void) |
44 | { | 596 | { |
45 | if (!ibs_caps) | 597 | if (!ibs_caps) |
46 | return -ENODEV; /* ibs not supported by the cpu */ | 598 | return -ENODEV; /* ibs not supported by the cpu */ |
47 | 599 | ||
48 | perf_pmu_register(&perf_ibs, "ibs", -1); | 600 | perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); |
601 | if (ibs_caps & IBS_CAPS_OPCNT) | ||
602 | perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; | ||
603 | perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); | ||
604 | register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); | ||
49 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); | 605 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); |
50 | 606 | ||
51 | return 0; | 607 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 26b3e2fef10..166546ec6ae 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1027,8 +1027,6 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1027 | u64 status; | 1027 | u64 status; |
1028 | int handled; | 1028 | int handled; |
1029 | 1029 | ||
1030 | perf_sample_data_init(&data, 0); | ||
1031 | |||
1032 | cpuc = &__get_cpu_var(cpu_hw_events); | 1030 | cpuc = &__get_cpu_var(cpu_hw_events); |
1033 | 1031 | ||
1034 | /* | 1032 | /* |
@@ -1082,7 +1080,7 @@ again: | |||
1082 | if (!intel_pmu_save_and_restart(event)) | 1080 | if (!intel_pmu_save_and_restart(event)) |
1083 | continue; | 1081 | continue; |
1084 | 1082 | ||
1085 | data.period = event->hw.last_period; | 1083 | perf_sample_data_init(&data, 0, event->hw.last_period); |
1086 | 1084 | ||
1087 | if (has_branch_stack(event)) | 1085 | if (has_branch_stack(event)) |
1088 | data.br_stack = &cpuc->lbr_stack; | 1086 | data.br_stack = &cpuc->lbr_stack; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 7f64df19e7d..5a3edc27f6e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -316,8 +316,7 @@ int intel_pmu_drain_bts_buffer(void) | |||
316 | 316 | ||
317 | ds->bts_index = ds->bts_buffer_base; | 317 | ds->bts_index = ds->bts_buffer_base; |
318 | 318 | ||
319 | perf_sample_data_init(&data, 0); | 319 | perf_sample_data_init(&data, 0, event->hw.last_period); |
320 | data.period = event->hw.last_period; | ||
321 | regs.ip = 0; | 320 | regs.ip = 0; |
322 | 321 | ||
323 | /* | 322 | /* |
@@ -564,8 +563,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
564 | if (!intel_pmu_save_and_restart(event)) | 563 | if (!intel_pmu_save_and_restart(event)) |
565 | return; | 564 | return; |
566 | 565 | ||
567 | perf_sample_data_init(&data, 0); | 566 | perf_sample_data_init(&data, 0, event->hw.last_period); |
568 | data.period = event->hw.last_period; | ||
569 | 567 | ||
570 | /* | 568 | /* |
571 | * We use the interrupt regs as a base because the PEBS record | 569 | * We use the interrupt regs as a base because the PEBS record |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index a2dfacfd710..47124a73dd7 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1005,8 +1005,6 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
1005 | int idx, handled = 0; | 1005 | int idx, handled = 0; |
1006 | u64 val; | 1006 | u64 val; |
1007 | 1007 | ||
1008 | perf_sample_data_init(&data, 0); | ||
1009 | |||
1010 | cpuc = &__get_cpu_var(cpu_hw_events); | 1008 | cpuc = &__get_cpu_var(cpu_hw_events); |
1011 | 1009 | ||
1012 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1010 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
@@ -1034,10 +1032,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
1034 | handled += overflow; | 1032 | handled += overflow; |
1035 | 1033 | ||
1036 | /* event overflow for sure */ | 1034 | /* event overflow for sure */ |
1037 | data.period = event->hw.last_period; | 1035 | perf_sample_data_init(&data, 0, hwc->last_period); |
1038 | 1036 | ||
1039 | if (!x86_perf_event_set_period(event)) | 1037 | if (!x86_perf_event_set_period(event)) |
1040 | continue; | 1038 | continue; |
1039 | |||
1040 | |||
1041 | if (perf_event_overflow(event, &data, regs)) | 1041 | if (perf_event_overflow(event, &data, regs)) |
1042 | x86_pmu_stop(event, 0); | 1042 | x86_pmu_stop(event, 0); |
1043 | } | 1043 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 1b81839b6c8..571246d81ed 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -271,7 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) | 271 | current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) |
272 | return 1; | 272 | return 1; |
273 | 273 | ||
274 | show_registers(regs); | 274 | show_regs(regs); |
275 | #ifdef CONFIG_X86_32 | 275 | #ifdef CONFIG_X86_32 |
276 | if (user_mode_vm(regs)) { | 276 | if (user_mode_vm(regs)) { |
277 | sp = regs->sp; | 277 | sp = regs->sp; |
@@ -311,16 +311,33 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
311 | 311 | ||
312 | static int __init kstack_setup(char *s) | 312 | static int __init kstack_setup(char *s) |
313 | { | 313 | { |
314 | ssize_t ret; | ||
315 | unsigned long val; | ||
316 | |||
314 | if (!s) | 317 | if (!s) |
315 | return -EINVAL; | 318 | return -EINVAL; |
316 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | 319 | |
320 | ret = kstrtoul(s, 0, &val); | ||
321 | if (ret) | ||
322 | return ret; | ||
323 | kstack_depth_to_print = val; | ||
317 | return 0; | 324 | return 0; |
318 | } | 325 | } |
319 | early_param("kstack", kstack_setup); | 326 | early_param("kstack", kstack_setup); |
320 | 327 | ||
321 | static int __init code_bytes_setup(char *s) | 328 | static int __init code_bytes_setup(char *s) |
322 | { | 329 | { |
323 | code_bytes = simple_strtoul(s, NULL, 0); | 330 | ssize_t ret; |
331 | unsigned long val; | ||
332 | |||
333 | if (!s) | ||
334 | return -EINVAL; | ||
335 | |||
336 | ret = kstrtoul(s, 0, &val); | ||
337 | if (ret) | ||
338 | return ret; | ||
339 | |||
340 | code_bytes = val; | ||
324 | if (code_bytes > 8192) | 341 | if (code_bytes > 8192) |
325 | code_bytes = 8192; | 342 | code_bytes = 8192; |
326 | 343 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 88ec9129271..e0b1d783daa 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -82,7 +82,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | 84 | ||
85 | void show_registers(struct pt_regs *regs) | 85 | void show_regs(struct pt_regs *regs) |
86 | { | 86 | { |
87 | int i; | 87 | int i; |
88 | 88 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 17107bd6e1f..791b76122aa 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -245,7 +245,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 245 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
246 | } | 246 | } |
247 | 247 | ||
248 | void show_registers(struct pt_regs *regs) | 248 | void show_regs(struct pt_regs *regs) |
249 | { | 249 | { |
250 | int i; | 250 | int i; |
251 | unsigned long sp; | 251 | unsigned long sp; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7b784f4ef1e..01ccf9b7147 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/irq_vectors.h> | 56 | #include <asm/irq_vectors.h> |
57 | #include <asm/cpufeature.h> | 57 | #include <asm/cpufeature.h> |
58 | #include <asm/alternative-asm.h> | 58 | #include <asm/alternative-asm.h> |
59 | #include <asm/asm.h> | ||
59 | 60 | ||
60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
61 | #include <linux/elf-em.h> | 62 | #include <linux/elf-em.h> |
@@ -151,10 +152,8 @@ | |||
151 | .pushsection .fixup, "ax" | 152 | .pushsection .fixup, "ax" |
152 | 99: movl $0, (%esp) | 153 | 99: movl $0, (%esp) |
153 | jmp 98b | 154 | jmp 98b |
154 | .section __ex_table, "a" | ||
155 | .align 4 | ||
156 | .long 98b, 99b | ||
157 | .popsection | 155 | .popsection |
156 | _ASM_EXTABLE(98b,99b) | ||
158 | .endm | 157 | .endm |
159 | 158 | ||
160 | .macro PTGS_TO_GS | 159 | .macro PTGS_TO_GS |
@@ -164,10 +163,8 @@ | |||
164 | .pushsection .fixup, "ax" | 163 | .pushsection .fixup, "ax" |
165 | 99: movl $0, PT_GS(%esp) | 164 | 99: movl $0, PT_GS(%esp) |
166 | jmp 98b | 165 | jmp 98b |
167 | .section __ex_table, "a" | ||
168 | .align 4 | ||
169 | .long 98b, 99b | ||
170 | .popsection | 166 | .popsection |
167 | _ASM_EXTABLE(98b,99b) | ||
171 | .endm | 168 | .endm |
172 | 169 | ||
173 | .macro GS_TO_REG reg | 170 | .macro GS_TO_REG reg |
@@ -249,12 +246,10 @@ | |||
249 | jmp 2b | 246 | jmp 2b |
250 | 6: movl $0, (%esp) | 247 | 6: movl $0, (%esp) |
251 | jmp 3b | 248 | jmp 3b |
252 | .section __ex_table, "a" | ||
253 | .align 4 | ||
254 | .long 1b, 4b | ||
255 | .long 2b, 5b | ||
256 | .long 3b, 6b | ||
257 | .popsection | 249 | .popsection |
250 | _ASM_EXTABLE(1b,4b) | ||
251 | _ASM_EXTABLE(2b,5b) | ||
252 | _ASM_EXTABLE(3b,6b) | ||
258 | POP_GS_EX | 253 | POP_GS_EX |
259 | .endm | 254 | .endm |
260 | 255 | ||
@@ -415,10 +410,7 @@ sysenter_past_esp: | |||
415 | jae syscall_fault | 410 | jae syscall_fault |
416 | 1: movl (%ebp),%ebp | 411 | 1: movl (%ebp),%ebp |
417 | movl %ebp,PT_EBP(%esp) | 412 | movl %ebp,PT_EBP(%esp) |
418 | .section __ex_table,"a" | 413 | _ASM_EXTABLE(1b,syscall_fault) |
419 | .align 4 | ||
420 | .long 1b,syscall_fault | ||
421 | .previous | ||
422 | 414 | ||
423 | GET_THREAD_INFO(%ebp) | 415 | GET_THREAD_INFO(%ebp) |
424 | 416 | ||
@@ -485,10 +477,8 @@ sysexit_audit: | |||
485 | .pushsection .fixup,"ax" | 477 | .pushsection .fixup,"ax" |
486 | 2: movl $0,PT_FS(%esp) | 478 | 2: movl $0,PT_FS(%esp) |
487 | jmp 1b | 479 | jmp 1b |
488 | .section __ex_table,"a" | ||
489 | .align 4 | ||
490 | .long 1b,2b | ||
491 | .popsection | 480 | .popsection |
481 | _ASM_EXTABLE(1b,2b) | ||
492 | PTGS_TO_GS_EX | 482 | PTGS_TO_GS_EX |
493 | ENDPROC(ia32_sysenter_target) | 483 | ENDPROC(ia32_sysenter_target) |
494 | 484 | ||
@@ -543,10 +533,7 @@ ENTRY(iret_exc) | |||
543 | pushl $do_iret_error | 533 | pushl $do_iret_error |
544 | jmp error_code | 534 | jmp error_code |
545 | .previous | 535 | .previous |
546 | .section __ex_table,"a" | 536 | _ASM_EXTABLE(irq_return,iret_exc) |
547 | .align 4 | ||
548 | .long irq_return,iret_exc | ||
549 | .previous | ||
550 | 537 | ||
551 | CFI_RESTORE_STATE | 538 | CFI_RESTORE_STATE |
552 | ldt_ss: | 539 | ldt_ss: |
@@ -901,10 +888,7 @@ END(device_not_available) | |||
901 | #ifdef CONFIG_PARAVIRT | 888 | #ifdef CONFIG_PARAVIRT |
902 | ENTRY(native_iret) | 889 | ENTRY(native_iret) |
903 | iret | 890 | iret |
904 | .section __ex_table,"a" | 891 | _ASM_EXTABLE(native_iret, iret_exc) |
905 | .align 4 | ||
906 | .long native_iret, iret_exc | ||
907 | .previous | ||
908 | END(native_iret) | 892 | END(native_iret) |
909 | 893 | ||
910 | ENTRY(native_irq_enable_sysexit) | 894 | ENTRY(native_irq_enable_sysexit) |
@@ -1093,13 +1077,10 @@ ENTRY(xen_failsafe_callback) | |||
1093 | movl %eax,16(%esp) | 1077 | movl %eax,16(%esp) |
1094 | jmp 4b | 1078 | jmp 4b |
1095 | .previous | 1079 | .previous |
1096 | .section __ex_table,"a" | 1080 | _ASM_EXTABLE(1b,6b) |
1097 | .align 4 | 1081 | _ASM_EXTABLE(2b,7b) |
1098 | .long 1b,6b | 1082 | _ASM_EXTABLE(3b,8b) |
1099 | .long 2b,7b | 1083 | _ASM_EXTABLE(4b,9b) |
1100 | .long 3b,8b | ||
1101 | .long 4b,9b | ||
1102 | .previous | ||
1103 | ENDPROC(xen_failsafe_callback) | 1084 | ENDPROC(xen_failsafe_callback) |
1104 | 1085 | ||
1105 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, | 1086 | BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cdc79b5cfcd..320852d0202 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <asm/asm.h> | ||
58 | #include <linux/err.h> | 59 | #include <linux/err.h> |
59 | 60 | ||
60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 61 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
@@ -900,18 +901,12 @@ restore_args: | |||
900 | 901 | ||
901 | irq_return: | 902 | irq_return: |
902 | INTERRUPT_RETURN | 903 | INTERRUPT_RETURN |
903 | 904 | _ASM_EXTABLE(irq_return, bad_iret) | |
904 | .section __ex_table, "a" | ||
905 | .quad irq_return, bad_iret | ||
906 | .previous | ||
907 | 905 | ||
908 | #ifdef CONFIG_PARAVIRT | 906 | #ifdef CONFIG_PARAVIRT |
909 | ENTRY(native_iret) | 907 | ENTRY(native_iret) |
910 | iretq | 908 | iretq |
911 | 909 | _ASM_EXTABLE(native_iret, bad_iret) | |
912 | .section __ex_table,"a" | ||
913 | .quad native_iret, bad_iret | ||
914 | .previous | ||
915 | #endif | 910 | #endif |
916 | 911 | ||
917 | .section .fixup,"ax" | 912 | .section .fixup,"ax" |
@@ -1181,10 +1176,7 @@ gs_change: | |||
1181 | CFI_ENDPROC | 1176 | CFI_ENDPROC |
1182 | END(native_load_gs_index) | 1177 | END(native_load_gs_index) |
1183 | 1178 | ||
1184 | .section __ex_table,"a" | 1179 | _ASM_EXTABLE(gs_change,bad_gs) |
1185 | .align 8 | ||
1186 | .quad gs_change,bad_gs | ||
1187 | .previous | ||
1188 | .section .fixup,"ax" | 1180 | .section .fixup,"ax" |
1189 | /* running with kernelgs */ | 1181 | /* running with kernelgs */ |
1190 | bad_gs: | 1182 | bad_gs: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c9a281f272f..32ff36596ab 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -24,40 +24,21 @@ | |||
24 | #include <trace/syscall.h> | 24 | #include <trace/syscall.h> |
25 | 25 | ||
26 | #include <asm/cacheflush.h> | 26 | #include <asm/cacheflush.h> |
27 | #include <asm/kprobes.h> | ||
27 | #include <asm/ftrace.h> | 28 | #include <asm/ftrace.h> |
28 | #include <asm/nops.h> | 29 | #include <asm/nops.h> |
29 | #include <asm/nmi.h> | ||
30 | |||
31 | 30 | ||
32 | #ifdef CONFIG_DYNAMIC_FTRACE | 31 | #ifdef CONFIG_DYNAMIC_FTRACE |
33 | 32 | ||
34 | /* | ||
35 | * modifying_code is set to notify NMIs that they need to use | ||
36 | * memory barriers when entering or exiting. But we don't want | ||
37 | * to burden NMIs with unnecessary memory barriers when code | ||
38 | * modification is not being done (which is most of the time). | ||
39 | * | ||
40 | * A mutex is already held when ftrace_arch_code_modify_prepare | ||
41 | * and post_process are called. No locks need to be taken here. | ||
42 | * | ||
43 | * Stop machine will make sure currently running NMIs are done | ||
44 | * and new NMIs will see the updated variable before we need | ||
45 | * to worry about NMIs doing memory barriers. | ||
46 | */ | ||
47 | static int modifying_code __read_mostly; | ||
48 | static DEFINE_PER_CPU(int, save_modifying_code); | ||
49 | |||
50 | int ftrace_arch_code_modify_prepare(void) | 33 | int ftrace_arch_code_modify_prepare(void) |
51 | { | 34 | { |
52 | set_kernel_text_rw(); | 35 | set_kernel_text_rw(); |
53 | set_all_modules_text_rw(); | 36 | set_all_modules_text_rw(); |
54 | modifying_code = 1; | ||
55 | return 0; | 37 | return 0; |
56 | } | 38 | } |
57 | 39 | ||
58 | int ftrace_arch_code_modify_post_process(void) | 40 | int ftrace_arch_code_modify_post_process(void) |
59 | { | 41 | { |
60 | modifying_code = 0; | ||
61 | set_all_modules_text_ro(); | 42 | set_all_modules_text_ro(); |
62 | set_kernel_text_ro(); | 43 | set_kernel_text_ro(); |
63 | return 0; | 44 | return 0; |
@@ -90,134 +71,6 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
90 | return calc.code; | 71 | return calc.code; |
91 | } | 72 | } |
92 | 73 | ||
93 | /* | ||
94 | * Modifying code must take extra care. On an SMP machine, if | ||
95 | * the code being modified is also being executed on another CPU | ||
96 | * that CPU will have undefined results and possibly take a GPF. | ||
97 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
98 | * But this does not stop NMIs from happening. We still need | ||
99 | * to protect against that. We separate out the modification of | ||
100 | * the code to take care of this. | ||
101 | * | ||
102 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
103 | * | ||
104 | * 1) Put the instruction pointer into the IP buffer | ||
105 | * and the new code into the "code" buffer. | ||
106 | * 2) Wait for any running NMIs to finish and set a flag that says | ||
107 | * we are modifying code, it is done in an atomic operation. | ||
108 | * 3) Write the code | ||
109 | * 4) clear the flag. | ||
110 | * 5) Wait for any running NMIs to finish. | ||
111 | * | ||
112 | * If an NMI is executed, the first thing it does is to call | ||
113 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
114 | * and if it is, it will write what is in the IP and "code" buffers. | ||
115 | * | ||
116 | * The trick is, it does not matter if everyone is writing the same | ||
117 | * content to the code location. Also, if a CPU is executing code | ||
118 | * it is OK to write to that code location if the contents being written | ||
119 | * are the same as what exists. | ||
120 | */ | ||
121 | |||
122 | #define MOD_CODE_WRITE_FLAG (1 << 31) /* set when NMI should do the write */ | ||
123 | static atomic_t nmi_running = ATOMIC_INIT(0); | ||
124 | static int mod_code_status; /* holds return value of text write */ | ||
125 | static void *mod_code_ip; /* holds the IP to write to */ | ||
126 | static const void *mod_code_newcode; /* holds the text to write to the IP */ | ||
127 | |||
128 | static unsigned nmi_wait_count; | ||
129 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
130 | |||
131 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
132 | { | ||
133 | int r; | ||
134 | |||
135 | r = snprintf(buf, size, "%u %u", | ||
136 | nmi_wait_count, | ||
137 | atomic_read(&nmi_update_count)); | ||
138 | return r; | ||
139 | } | ||
140 | |||
141 | static void clear_mod_flag(void) | ||
142 | { | ||
143 | int old = atomic_read(&nmi_running); | ||
144 | |||
145 | for (;;) { | ||
146 | int new = old & ~MOD_CODE_WRITE_FLAG; | ||
147 | |||
148 | if (old == new) | ||
149 | break; | ||
150 | |||
151 | old = atomic_cmpxchg(&nmi_running, old, new); | ||
152 | } | ||
153 | } | ||
154 | |||
155 | static void ftrace_mod_code(void) | ||
156 | { | ||
157 | /* | ||
158 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
159 | * (and the code itself) | ||
160 | * But if one were to fail, then they all should, and if one were | ||
161 | * to succeed, then they all should. | ||
162 | */ | ||
163 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
164 | MCOUNT_INSN_SIZE); | ||
165 | |||
166 | /* if we fail, then kill any new writers */ | ||
167 | if (mod_code_status) | ||
168 | clear_mod_flag(); | ||
169 | } | ||
170 | |||
171 | void ftrace_nmi_enter(void) | ||
172 | { | ||
173 | __this_cpu_write(save_modifying_code, modifying_code); | ||
174 | |||
175 | if (!__this_cpu_read(save_modifying_code)) | ||
176 | return; | ||
177 | |||
178 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { | ||
179 | smp_rmb(); | ||
180 | ftrace_mod_code(); | ||
181 | atomic_inc(&nmi_update_count); | ||
182 | } | ||
183 | /* Must have previous changes seen before executions */ | ||
184 | smp_mb(); | ||
185 | } | ||
186 | |||
187 | void ftrace_nmi_exit(void) | ||
188 | { | ||
189 | if (!__this_cpu_read(save_modifying_code)) | ||
190 | return; | ||
191 | |||
192 | /* Finish all executions before clearing nmi_running */ | ||
193 | smp_mb(); | ||
194 | atomic_dec(&nmi_running); | ||
195 | } | ||
196 | |||
197 | static void wait_for_nmi_and_set_mod_flag(void) | ||
198 | { | ||
199 | if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)) | ||
200 | return; | ||
201 | |||
202 | do { | ||
203 | cpu_relax(); | ||
204 | } while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG)); | ||
205 | |||
206 | nmi_wait_count++; | ||
207 | } | ||
208 | |||
209 | static void wait_for_nmi(void) | ||
210 | { | ||
211 | if (!atomic_read(&nmi_running)) | ||
212 | return; | ||
213 | |||
214 | do { | ||
215 | cpu_relax(); | ||
216 | } while (atomic_read(&nmi_running)); | ||
217 | |||
218 | nmi_wait_count++; | ||
219 | } | ||
220 | |||
221 | static inline int | 74 | static inline int |
222 | within(unsigned long addr, unsigned long start, unsigned long end) | 75 | within(unsigned long addr, unsigned long start, unsigned long end) |
223 | { | 76 | { |
@@ -238,26 +91,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) | |||
238 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
239 | ip = (unsigned long)__va(__pa(ip)); | 92 | ip = (unsigned long)__va(__pa(ip)); |
240 | 93 | ||
241 | mod_code_ip = (void *)ip; | 94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); |
242 | mod_code_newcode = new_code; | ||
243 | |||
244 | /* The buffers need to be visible before we let NMIs write them */ | ||
245 | smp_mb(); | ||
246 | |||
247 | wait_for_nmi_and_set_mod_flag(); | ||
248 | |||
249 | /* Make sure all running NMIs have finished before we write the code */ | ||
250 | smp_mb(); | ||
251 | |||
252 | ftrace_mod_code(); | ||
253 | |||
254 | /* Make sure the write happens before clearing the bit */ | ||
255 | smp_mb(); | ||
256 | |||
257 | clear_mod_flag(); | ||
258 | wait_for_nmi(); | ||
259 | |||
260 | return mod_code_status; | ||
261 | } | 95 | } |
262 | 96 | ||
263 | static const unsigned char *ftrace_nop_replace(void) | 97 | static const unsigned char *ftrace_nop_replace(void) |
@@ -334,6 +168,336 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
334 | return ret; | 168 | return ret; |
335 | } | 169 | } |
336 | 170 | ||
171 | int modifying_ftrace_code __read_mostly; | ||
172 | |||
173 | /* | ||
174 | * A breakpoint was added to the code address we are about to | ||
175 | * modify, and this is the handle that will just skip over it. | ||
176 | * We are either changing a nop into a trace call, or a trace | ||
177 | * call to a nop. While the change is taking place, we treat | ||
178 | * it just like it was a nop. | ||
179 | */ | ||
180 | int ftrace_int3_handler(struct pt_regs *regs) | ||
181 | { | ||
182 | if (WARN_ON_ONCE(!regs)) | ||
183 | return 0; | ||
184 | |||
185 | if (!ftrace_location(regs->ip - 1)) | ||
186 | return 0; | ||
187 | |||
188 | regs->ip += MCOUNT_INSN_SIZE - 1; | ||
189 | |||
190 | return 1; | ||
191 | } | ||
192 | |||
193 | static int ftrace_write(unsigned long ip, const char *val, int size) | ||
194 | { | ||
195 | /* | ||
196 | * On x86_64, kernel text mappings are mapped read-only with | ||
197 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
198 | * of the kernel text mapping to modify the kernel text. | ||
199 | * | ||
200 | * For 32bit kernels, these mappings are same and we can use | ||
201 | * kernel identity mapping to modify code. | ||
202 | */ | ||
203 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
204 | ip = (unsigned long)__va(__pa(ip)); | ||
205 | |||
206 | return probe_kernel_write((void *)ip, val, size); | ||
207 | } | ||
208 | |||
209 | static int add_break(unsigned long ip, const char *old) | ||
210 | { | ||
211 | unsigned char replaced[MCOUNT_INSN_SIZE]; | ||
212 | unsigned char brk = BREAKPOINT_INSTRUCTION; | ||
213 | |||
214 | if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) | ||
215 | return -EFAULT; | ||
216 | |||
217 | /* Make sure it is what we expect it to be */ | ||
218 | if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) | ||
219 | return -EINVAL; | ||
220 | |||
221 | if (ftrace_write(ip, &brk, 1)) | ||
222 | return -EPERM; | ||
223 | |||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) | ||
228 | { | ||
229 | unsigned const char *old; | ||
230 | unsigned long ip = rec->ip; | ||
231 | |||
232 | old = ftrace_call_replace(ip, addr); | ||
233 | |||
234 | return add_break(rec->ip, old); | ||
235 | } | ||
236 | |||
237 | |||
238 | static int add_brk_on_nop(struct dyn_ftrace *rec) | ||
239 | { | ||
240 | unsigned const char *old; | ||
241 | |||
242 | old = ftrace_nop_replace(); | ||
243 | |||
244 | return add_break(rec->ip, old); | ||
245 | } | ||
246 | |||
247 | static int add_breakpoints(struct dyn_ftrace *rec, int enable) | ||
248 | { | ||
249 | unsigned long ftrace_addr; | ||
250 | int ret; | ||
251 | |||
252 | ret = ftrace_test_record(rec, enable); | ||
253 | |||
254 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
255 | |||
256 | switch (ret) { | ||
257 | case FTRACE_UPDATE_IGNORE: | ||
258 | return 0; | ||
259 | |||
260 | case FTRACE_UPDATE_MAKE_CALL: | ||
261 | /* converting nop to call */ | ||
262 | return add_brk_on_nop(rec); | ||
263 | |||
264 | case FTRACE_UPDATE_MAKE_NOP: | ||
265 | /* converting a call to a nop */ | ||
266 | return add_brk_on_call(rec, ftrace_addr); | ||
267 | } | ||
268 | return 0; | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * On error, we need to remove breakpoints. This needs to | ||
273 | * be done caefully. If the address does not currently have a | ||
274 | * breakpoint, we know we are done. Otherwise, we look at the | ||
275 | * remaining 4 bytes of the instruction. If it matches a nop | ||
276 | * we replace the breakpoint with the nop. Otherwise we replace | ||
277 | * it with the call instruction. | ||
278 | */ | ||
279 | static int remove_breakpoint(struct dyn_ftrace *rec) | ||
280 | { | ||
281 | unsigned char ins[MCOUNT_INSN_SIZE]; | ||
282 | unsigned char brk = BREAKPOINT_INSTRUCTION; | ||
283 | const unsigned char *nop; | ||
284 | unsigned long ftrace_addr; | ||
285 | unsigned long ip = rec->ip; | ||
286 | |||
287 | /* If we fail the read, just give up */ | ||
288 | if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE)) | ||
289 | return -EFAULT; | ||
290 | |||
291 | /* If this does not have a breakpoint, we are done */ | ||
292 | if (ins[0] != brk) | ||
293 | return -1; | ||
294 | |||
295 | nop = ftrace_nop_replace(); | ||
296 | |||
297 | /* | ||
298 | * If the last 4 bytes of the instruction do not match | ||
299 | * a nop, then we assume that this is a call to ftrace_addr. | ||
300 | */ | ||
301 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) { | ||
302 | /* | ||
303 | * For extra paranoidism, we check if the breakpoint is on | ||
304 | * a call that would actually jump to the ftrace_addr. | ||
305 | * If not, don't touch the breakpoint, we make just create | ||
306 | * a disaster. | ||
307 | */ | ||
308 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
309 | nop = ftrace_call_replace(ip, ftrace_addr); | ||
310 | |||
311 | if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) | ||
312 | return -EINVAL; | ||
313 | } | ||
314 | |||
315 | return probe_kernel_write((void *)ip, &nop[0], 1); | ||
316 | } | ||
317 | |||
318 | static int add_update_code(unsigned long ip, unsigned const char *new) | ||
319 | { | ||
320 | /* skip breakpoint */ | ||
321 | ip++; | ||
322 | new++; | ||
323 | if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) | ||
324 | return -EPERM; | ||
325 | return 0; | ||
326 | } | ||
327 | |||
328 | static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) | ||
329 | { | ||
330 | unsigned long ip = rec->ip; | ||
331 | unsigned const char *new; | ||
332 | |||
333 | new = ftrace_call_replace(ip, addr); | ||
334 | return add_update_code(ip, new); | ||
335 | } | ||
336 | |||
337 | static int add_update_nop(struct dyn_ftrace *rec) | ||
338 | { | ||
339 | unsigned long ip = rec->ip; | ||
340 | unsigned const char *new; | ||
341 | |||
342 | new = ftrace_nop_replace(); | ||
343 | return add_update_code(ip, new); | ||
344 | } | ||
345 | |||
346 | static int add_update(struct dyn_ftrace *rec, int enable) | ||
347 | { | ||
348 | unsigned long ftrace_addr; | ||
349 | int ret; | ||
350 | |||
351 | ret = ftrace_test_record(rec, enable); | ||
352 | |||
353 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
354 | |||
355 | switch (ret) { | ||
356 | case FTRACE_UPDATE_IGNORE: | ||
357 | return 0; | ||
358 | |||
359 | case FTRACE_UPDATE_MAKE_CALL: | ||
360 | /* converting nop to call */ | ||
361 | return add_update_call(rec, ftrace_addr); | ||
362 | |||
363 | case FTRACE_UPDATE_MAKE_NOP: | ||
364 | /* converting a call to a nop */ | ||
365 | return add_update_nop(rec); | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) | ||
372 | { | ||
373 | unsigned long ip = rec->ip; | ||
374 | unsigned const char *new; | ||
375 | |||
376 | new = ftrace_call_replace(ip, addr); | ||
377 | |||
378 | if (ftrace_write(ip, new, 1)) | ||
379 | return -EPERM; | ||
380 | |||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static int finish_update_nop(struct dyn_ftrace *rec) | ||
385 | { | ||
386 | unsigned long ip = rec->ip; | ||
387 | unsigned const char *new; | ||
388 | |||
389 | new = ftrace_nop_replace(); | ||
390 | |||
391 | if (ftrace_write(ip, new, 1)) | ||
392 | return -EPERM; | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | static int finish_update(struct dyn_ftrace *rec, int enable) | ||
397 | { | ||
398 | unsigned long ftrace_addr; | ||
399 | int ret; | ||
400 | |||
401 | ret = ftrace_update_record(rec, enable); | ||
402 | |||
403 | ftrace_addr = (unsigned long)FTRACE_ADDR; | ||
404 | |||
405 | switch (ret) { | ||
406 | case FTRACE_UPDATE_IGNORE: | ||
407 | return 0; | ||
408 | |||
409 | case FTRACE_UPDATE_MAKE_CALL: | ||
410 | /* converting nop to call */ | ||
411 | return finish_update_call(rec, ftrace_addr); | ||
412 | |||
413 | case FTRACE_UPDATE_MAKE_NOP: | ||
414 | /* converting a call to a nop */ | ||
415 | return finish_update_nop(rec); | ||
416 | } | ||
417 | |||
418 | return 0; | ||
419 | } | ||
420 | |||
421 | static void do_sync_core(void *data) | ||
422 | { | ||
423 | sync_core(); | ||
424 | } | ||
425 | |||
426 | static void run_sync(void) | ||
427 | { | ||
428 | int enable_irqs = irqs_disabled(); | ||
429 | |||
430 | /* We may be called with interrupts disbled (on bootup). */ | ||
431 | if (enable_irqs) | ||
432 | local_irq_enable(); | ||
433 | on_each_cpu(do_sync_core, NULL, 1); | ||
434 | if (enable_irqs) | ||
435 | local_irq_disable(); | ||
436 | } | ||
437 | |||
438 | void ftrace_replace_code(int enable) | ||
439 | { | ||
440 | struct ftrace_rec_iter *iter; | ||
441 | struct dyn_ftrace *rec; | ||
442 | const char *report = "adding breakpoints"; | ||
443 | int count = 0; | ||
444 | int ret; | ||
445 | |||
446 | for_ftrace_rec_iter(iter) { | ||
447 | rec = ftrace_rec_iter_record(iter); | ||
448 | |||
449 | ret = add_breakpoints(rec, enable); | ||
450 | if (ret) | ||
451 | goto remove_breakpoints; | ||
452 | count++; | ||
453 | } | ||
454 | |||
455 | run_sync(); | ||
456 | |||
457 | report = "updating code"; | ||
458 | |||
459 | for_ftrace_rec_iter(iter) { | ||
460 | rec = ftrace_rec_iter_record(iter); | ||
461 | |||
462 | ret = add_update(rec, enable); | ||
463 | if (ret) | ||
464 | goto remove_breakpoints; | ||
465 | } | ||
466 | |||
467 | run_sync(); | ||
468 | |||
469 | report = "removing breakpoints"; | ||
470 | |||
471 | for_ftrace_rec_iter(iter) { | ||
472 | rec = ftrace_rec_iter_record(iter); | ||
473 | |||
474 | ret = finish_update(rec, enable); | ||
475 | if (ret) | ||
476 | goto remove_breakpoints; | ||
477 | } | ||
478 | |||
479 | run_sync(); | ||
480 | |||
481 | return; | ||
482 | |||
483 | remove_breakpoints: | ||
484 | ftrace_bug(ret, rec ? rec->ip : 0); | ||
485 | printk(KERN_WARNING "Failed on %s (%d):\n", report, count); | ||
486 | for_ftrace_rec_iter(iter) { | ||
487 | rec = ftrace_rec_iter_record(iter); | ||
488 | remove_breakpoint(rec); | ||
489 | } | ||
490 | } | ||
491 | |||
492 | void arch_ftrace_update_code(int command) | ||
493 | { | ||
494 | modifying_ftrace_code++; | ||
495 | |||
496 | ftrace_modify_all_code(command); | ||
497 | |||
498 | modifying_ftrace_code--; | ||
499 | } | ||
500 | |||
337 | int __init ftrace_dyn_arch_init(void *data) | 501 | int __init ftrace_dyn_arch_init(void *data) |
338 | { | 502 | { |
339 | /* The return code is retured via data */ | 503 | /* The return code is retured via data */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ce0be7cd085..463c9797ca6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/msr-index.h> | 21 | #include <asm/msr-index.h> |
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
24 | #include <asm/nops.h> | ||
24 | 25 | ||
25 | /* Physical address */ | 26 | /* Physical address */ |
26 | #define pa(X) ((X) - __PAGE_OFFSET) | 27 | #define pa(X) ((X) - __PAGE_OFFSET) |
@@ -363,28 +364,23 @@ default_entry: | |||
363 | pushl $0 | 364 | pushl $0 |
364 | popfl | 365 | popfl |
365 | 366 | ||
366 | #ifdef CONFIG_SMP | ||
367 | cmpb $0, ready | ||
368 | jnz checkCPUtype | ||
369 | #endif /* CONFIG_SMP */ | ||
370 | |||
371 | /* | 367 | /* |
372 | * start system 32-bit setup. We need to re-do some of the things done | 368 | * start system 32-bit setup. We need to re-do some of the things done |
373 | * in 16-bit mode for the "real" operations. | 369 | * in 16-bit mode for the "real" operations. |
374 | */ | 370 | */ |
375 | call setup_idt | 371 | movl setup_once_ref,%eax |
376 | 372 | andl %eax,%eax | |
377 | checkCPUtype: | 373 | jz 1f # Did we do this already? |
378 | 374 | call *%eax | |
379 | movl $-1,X86_CPUID # -1 for no CPUID initially | 375 | 1: |
380 | 376 | ||
381 | /* check if it is 486 or 386. */ | 377 | /* check if it is 486 or 386. */ |
382 | /* | 378 | /* |
383 | * XXX - this does a lot of unnecessary setup. Alignment checks don't | 379 | * XXX - this does a lot of unnecessary setup. Alignment checks don't |
384 | * apply at our cpl of 0 and the stack ought to be aligned already, and | 380 | * apply at our cpl of 0 and the stack ought to be aligned already, and |
385 | * we don't need to preserve eflags. | 381 | * we don't need to preserve eflags. |
386 | */ | 382 | */ |
387 | 383 | movl $-1,X86_CPUID # -1 for no CPUID initially | |
388 | movb $3,X86 # at least 386 | 384 | movb $3,X86 # at least 386 |
389 | pushfl # push EFLAGS | 385 | pushfl # push EFLAGS |
390 | popl %eax # get EFLAGS | 386 | popl %eax # get EFLAGS |
@@ -450,21 +446,6 @@ is386: movl $2,%ecx # set MP | |||
450 | movl $(__KERNEL_PERCPU), %eax | 446 | movl $(__KERNEL_PERCPU), %eax |
451 | movl %eax,%fs # set this cpu's percpu | 447 | movl %eax,%fs # set this cpu's percpu |
452 | 448 | ||
453 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
454 | /* | ||
455 | * The linker can't handle this by relocation. Manually set | ||
456 | * base address in stack canary segment descriptor. | ||
457 | */ | ||
458 | cmpb $0,ready | ||
459 | jne 1f | ||
460 | movl $gdt_page,%eax | ||
461 | movl $stack_canary,%ecx | ||
462 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | ||
463 | shrl $16, %ecx | ||
464 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | ||
465 | movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) | ||
466 | 1: | ||
467 | #endif | ||
468 | movl $(__KERNEL_STACK_CANARY),%eax | 449 | movl $(__KERNEL_STACK_CANARY),%eax |
469 | movl %eax,%gs | 450 | movl %eax,%gs |
470 | 451 | ||
@@ -473,7 +454,6 @@ is386: movl $2,%ecx # set MP | |||
473 | 454 | ||
474 | cld # gcc2 wants the direction flag cleared at all times | 455 | cld # gcc2 wants the direction flag cleared at all times |
475 | pushl $0 # fake return address for unwinder | 456 | pushl $0 # fake return address for unwinder |
476 | movb $1, ready | ||
477 | jmp *(initial_code) | 457 | jmp *(initial_code) |
478 | 458 | ||
479 | /* | 459 | /* |
@@ -495,81 +475,122 @@ check_x87: | |||
495 | .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ | 475 | .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ |
496 | ret | 476 | ret |
497 | 477 | ||
478 | |||
479 | #include "verify_cpu.S" | ||
480 | |||
498 | /* | 481 | /* |
499 | * setup_idt | 482 | * setup_once |
500 | * | 483 | * |
501 | * sets up a idt with 256 entries pointing to | 484 | * The setup work we only want to run on the BSP. |
502 | * ignore_int, interrupt gates. It doesn't actually load | ||
503 | * idt - that can be done only after paging has been enabled | ||
504 | * and the kernel moved to PAGE_OFFSET. Interrupts | ||
505 | * are enabled elsewhere, when we can be relatively | ||
506 | * sure everything is ok. | ||
507 | * | 485 | * |
508 | * Warning: %esi is live across this function. | 486 | * Warning: %esi is live across this function. |
509 | */ | 487 | */ |
510 | setup_idt: | 488 | __INIT |
511 | lea ignore_int,%edx | 489 | setup_once: |
512 | movl $(__KERNEL_CS << 16),%eax | 490 | /* |
513 | movw %dx,%ax /* selector = 0x0010 = cs */ | 491 | * Set up a idt with 256 entries pointing to ignore_int, |
514 | movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ | 492 | * interrupt gates. It doesn't actually load idt - that needs |
493 | * to be done on each CPU. Interrupts are enabled elsewhere, | ||
494 | * when we can be relatively sure everything is ok. | ||
495 | */ | ||
515 | 496 | ||
516 | lea idt_table,%edi | 497 | movl $idt_table,%edi |
517 | mov $256,%ecx | 498 | movl $early_idt_handlers,%eax |
518 | rp_sidt: | 499 | movl $NUM_EXCEPTION_VECTORS,%ecx |
500 | 1: | ||
519 | movl %eax,(%edi) | 501 | movl %eax,(%edi) |
520 | movl %edx,4(%edi) | 502 | movl %eax,4(%edi) |
503 | /* interrupt gate, dpl=0, present */ | ||
504 | movl $(0x8E000000 + __KERNEL_CS),2(%edi) | ||
505 | addl $9,%eax | ||
521 | addl $8,%edi | 506 | addl $8,%edi |
522 | dec %ecx | 507 | loop 1b |
523 | jne rp_sidt | ||
524 | 508 | ||
525 | .macro set_early_handler handler,trapno | 509 | movl $256 - NUM_EXCEPTION_VECTORS,%ecx |
526 | lea \handler,%edx | 510 | movl $ignore_int,%edx |
527 | movl $(__KERNEL_CS << 16),%eax | 511 | movl $(__KERNEL_CS << 16),%eax |
528 | movw %dx,%ax | 512 | movw %dx,%ax /* selector = 0x0010 = cs */ |
529 | movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ | 513 | movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ |
530 | lea idt_table,%edi | 514 | 2: |
531 | movl %eax,8*\trapno(%edi) | 515 | movl %eax,(%edi) |
532 | movl %edx,8*\trapno+4(%edi) | 516 | movl %edx,4(%edi) |
533 | .endm | 517 | addl $8,%edi |
518 | loop 2b | ||
534 | 519 | ||
535 | set_early_handler handler=early_divide_err,trapno=0 | 520 | #ifdef CONFIG_CC_STACKPROTECTOR |
536 | set_early_handler handler=early_illegal_opcode,trapno=6 | 521 | /* |
537 | set_early_handler handler=early_protection_fault,trapno=13 | 522 | * Configure the stack canary. The linker can't handle this by |
538 | set_early_handler handler=early_page_fault,trapno=14 | 523 | * relocation. Manually set base address in stack canary |
524 | * segment descriptor. | ||
525 | */ | ||
526 | movl $gdt_page,%eax | ||
527 | movl $stack_canary,%ecx | ||
528 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | ||
529 | shrl $16, %ecx | ||
530 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | ||
531 | movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) | ||
532 | #endif | ||
539 | 533 | ||
534 | andl $0,setup_once_ref /* Once is enough, thanks */ | ||
540 | ret | 535 | ret |
541 | 536 | ||
542 | early_divide_err: | 537 | ENTRY(early_idt_handlers) |
543 | xor %edx,%edx | 538 | # 36(%esp) %eflags |
544 | pushl $0 /* fake errcode */ | 539 | # 32(%esp) %cs |
545 | jmp early_fault | 540 | # 28(%esp) %eip |
541 | # 24(%rsp) error code | ||
542 | i = 0 | ||
543 | .rept NUM_EXCEPTION_VECTORS | ||
544 | .if (EXCEPTION_ERRCODE_MASK >> i) & 1 | ||
545 | ASM_NOP2 | ||
546 | .else | ||
547 | pushl $0 # Dummy error code, to make stack frame uniform | ||
548 | .endif | ||
549 | pushl $i # 20(%esp) Vector number | ||
550 | jmp early_idt_handler | ||
551 | i = i + 1 | ||
552 | .endr | ||
553 | ENDPROC(early_idt_handlers) | ||
554 | |||
555 | /* This is global to keep gas from relaxing the jumps */ | ||
556 | ENTRY(early_idt_handler) | ||
557 | cld | ||
558 | cmpl $2,%ss:early_recursion_flag | ||
559 | je hlt_loop | ||
560 | incl %ss:early_recursion_flag | ||
546 | 561 | ||
547 | early_illegal_opcode: | 562 | push %eax # 16(%esp) |
548 | movl $6,%edx | 563 | push %ecx # 12(%esp) |
549 | pushl $0 /* fake errcode */ | 564 | push %edx # 8(%esp) |
550 | jmp early_fault | 565 | push %ds # 4(%esp) |
566 | push %es # 0(%esp) | ||
567 | movl $(__KERNEL_DS),%eax | ||
568 | movl %eax,%ds | ||
569 | movl %eax,%es | ||
551 | 570 | ||
552 | early_protection_fault: | 571 | cmpl $(__KERNEL_CS),32(%esp) |
553 | movl $13,%edx | 572 | jne 10f |
554 | jmp early_fault | ||
555 | 573 | ||
556 | early_page_fault: | 574 | leal 28(%esp),%eax # Pointer to %eip |
557 | movl $14,%edx | 575 | call early_fixup_exception |
558 | jmp early_fault | 576 | andl %eax,%eax |
577 | jnz ex_entry /* found an exception entry */ | ||
559 | 578 | ||
560 | early_fault: | 579 | 10: |
561 | cld | ||
562 | #ifdef CONFIG_PRINTK | 580 | #ifdef CONFIG_PRINTK |
563 | pusha | 581 | xorl %eax,%eax |
564 | movl $(__KERNEL_DS),%eax | 582 | movw %ax,2(%esp) /* clean up the segment values on some cpus */ |
565 | movl %eax,%ds | 583 | movw %ax,6(%esp) |
566 | movl %eax,%es | 584 | movw %ax,34(%esp) |
567 | cmpl $2,early_recursion_flag | 585 | leal 40(%esp),%eax |
568 | je hlt_loop | 586 | pushl %eax /* %esp before the exception */ |
569 | incl early_recursion_flag | 587 | pushl %ebx |
588 | pushl %ebp | ||
589 | pushl %esi | ||
590 | pushl %edi | ||
570 | movl %cr2,%eax | 591 | movl %cr2,%eax |
571 | pushl %eax | 592 | pushl %eax |
572 | pushl %edx /* trapno */ | 593 | pushl (20+6*4)(%esp) /* trapno */ |
573 | pushl $fault_msg | 594 | pushl $fault_msg |
574 | call printk | 595 | call printk |
575 | #endif | 596 | #endif |
@@ -578,6 +599,17 @@ hlt_loop: | |||
578 | hlt | 599 | hlt |
579 | jmp hlt_loop | 600 | jmp hlt_loop |
580 | 601 | ||
602 | ex_entry: | ||
603 | pop %es | ||
604 | pop %ds | ||
605 | pop %edx | ||
606 | pop %ecx | ||
607 | pop %eax | ||
608 | addl $8,%esp /* drop vector number and error code */ | ||
609 | decl %ss:early_recursion_flag | ||
610 | iret | ||
611 | ENDPROC(early_idt_handler) | ||
612 | |||
581 | /* This is the default interrupt "handler" :-) */ | 613 | /* This is the default interrupt "handler" :-) */ |
582 | ALIGN | 614 | ALIGN |
583 | ignore_int: | 615 | ignore_int: |
@@ -611,13 +643,18 @@ ignore_int: | |||
611 | popl %eax | 643 | popl %eax |
612 | #endif | 644 | #endif |
613 | iret | 645 | iret |
646 | ENDPROC(ignore_int) | ||
647 | __INITDATA | ||
648 | .align 4 | ||
649 | early_recursion_flag: | ||
650 | .long 0 | ||
614 | 651 | ||
615 | #include "verify_cpu.S" | 652 | __REFDATA |
616 | 653 | .align 4 | |
617 | __REFDATA | ||
618 | .align 4 | ||
619 | ENTRY(initial_code) | 654 | ENTRY(initial_code) |
620 | .long i386_start_kernel | 655 | .long i386_start_kernel |
656 | ENTRY(setup_once_ref) | ||
657 | .long setup_once | ||
621 | 658 | ||
622 | /* | 659 | /* |
623 | * BSS section | 660 | * BSS section |
@@ -670,22 +707,19 @@ ENTRY(initial_page_table) | |||
670 | ENTRY(stack_start) | 707 | ENTRY(stack_start) |
671 | .long init_thread_union+THREAD_SIZE | 708 | .long init_thread_union+THREAD_SIZE |
672 | 709 | ||
673 | early_recursion_flag: | 710 | __INITRODATA |
674 | .long 0 | ||
675 | |||
676 | ready: .byte 0 | ||
677 | |||
678 | int_msg: | 711 | int_msg: |
679 | .asciz "Unknown interrupt or fault at: %p %p %p\n" | 712 | .asciz "Unknown interrupt or fault at: %p %p %p\n" |
680 | 713 | ||
681 | fault_msg: | 714 | fault_msg: |
682 | /* fault info: */ | 715 | /* fault info: */ |
683 | .ascii "BUG: Int %d: CR2 %p\n" | 716 | .ascii "BUG: Int %d: CR2 %p\n" |
684 | /* pusha regs: */ | 717 | /* regs pushed in early_idt_handler: */ |
685 | .ascii " EDI %p ESI %p EBP %p ESP %p\n" | 718 | .ascii " EDI %p ESI %p EBP %p EBX %p\n" |
686 | .ascii " EBX %p EDX %p ECX %p EAX %p\n" | 719 | .ascii " ESP %p ES %p DS %p\n" |
720 | .ascii " EDX %p ECX %p EAX %p\n" | ||
687 | /* fault frame: */ | 721 | /* fault frame: */ |
688 | .ascii " err %p EIP %p CS %p flg %p\n" | 722 | .ascii " vec %p err %p EIP %p CS %p flg %p\n" |
689 | .ascii "Stack: %p %p %p %p %p %p %p %p\n" | 723 | .ascii "Stack: %p %p %p %p %p %p %p %p\n" |
690 | .ascii " %p %p %p %p %p %p %p %p\n" | 724 | .ascii " %p %p %p %p %p %p %p %p\n" |
691 | .asciz " %p %p %p %p %p %p %p %p\n" | 725 | .asciz " %p %p %p %p %p %p %p %p\n" |
@@ -699,6 +733,7 @@ fault_msg: | |||
699 | * segment size, and 32-bit linear address value: | 733 | * segment size, and 32-bit linear address value: |
700 | */ | 734 | */ |
701 | 735 | ||
736 | .data | ||
702 | .globl boot_gdt_descr | 737 | .globl boot_gdt_descr |
703 | .globl idt_descr | 738 | .globl idt_descr |
704 | 739 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 40f4eb3766d..7a40f244732 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -19,12 +19,15 @@ | |||
19 | #include <asm/cache.h> | 19 | #include <asm/cache.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/percpu.h> | 21 | #include <asm/percpu.h> |
22 | #include <asm/nops.h> | ||
22 | 23 | ||
23 | #ifdef CONFIG_PARAVIRT | 24 | #ifdef CONFIG_PARAVIRT |
24 | #include <asm/asm-offsets.h> | 25 | #include <asm/asm-offsets.h> |
25 | #include <asm/paravirt.h> | 26 | #include <asm/paravirt.h> |
27 | #define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg | ||
26 | #else | 28 | #else |
27 | #define GET_CR2_INTO_RCX movq %cr2, %rcx | 29 | #define GET_CR2_INTO(reg) movq %cr2, reg |
30 | #define INTERRUPT_RETURN iretq | ||
28 | #endif | 31 | #endif |
29 | 32 | ||
30 | /* we are not able to switch in one step to the final KERNEL ADDRESS SPACE | 33 | /* we are not able to switch in one step to the final KERNEL ADDRESS SPACE |
@@ -270,36 +273,56 @@ bad_address: | |||
270 | jmp bad_address | 273 | jmp bad_address |
271 | 274 | ||
272 | .section ".init.text","ax" | 275 | .section ".init.text","ax" |
273 | #ifdef CONFIG_EARLY_PRINTK | ||
274 | .globl early_idt_handlers | 276 | .globl early_idt_handlers |
275 | early_idt_handlers: | 277 | early_idt_handlers: |
278 | # 104(%rsp) %rflags | ||
279 | # 96(%rsp) %cs | ||
280 | # 88(%rsp) %rip | ||
281 | # 80(%rsp) error code | ||
276 | i = 0 | 282 | i = 0 |
277 | .rept NUM_EXCEPTION_VECTORS | 283 | .rept NUM_EXCEPTION_VECTORS |
278 | movl $i, %esi | 284 | .if (EXCEPTION_ERRCODE_MASK >> i) & 1 |
285 | ASM_NOP2 | ||
286 | .else | ||
287 | pushq $0 # Dummy error code, to make stack frame uniform | ||
288 | .endif | ||
289 | pushq $i # 72(%rsp) Vector number | ||
279 | jmp early_idt_handler | 290 | jmp early_idt_handler |
280 | i = i + 1 | 291 | i = i + 1 |
281 | .endr | 292 | .endr |
282 | #endif | ||
283 | 293 | ||
284 | ENTRY(early_idt_handler) | 294 | ENTRY(early_idt_handler) |
285 | #ifdef CONFIG_EARLY_PRINTK | 295 | cld |
296 | |||
286 | cmpl $2,early_recursion_flag(%rip) | 297 | cmpl $2,early_recursion_flag(%rip) |
287 | jz 1f | 298 | jz 1f |
288 | incl early_recursion_flag(%rip) | 299 | incl early_recursion_flag(%rip) |
289 | GET_CR2_INTO_RCX | 300 | |
290 | movq %rcx,%r9 | 301 | pushq %rax # 64(%rsp) |
291 | xorl %r8d,%r8d # zero for error code | 302 | pushq %rcx # 56(%rsp) |
292 | movl %esi,%ecx # get vector number | 303 | pushq %rdx # 48(%rsp) |
293 | # Test %ecx against mask of vectors that push error code. | 304 | pushq %rsi # 40(%rsp) |
294 | cmpl $31,%ecx | 305 | pushq %rdi # 32(%rsp) |
295 | ja 0f | 306 | pushq %r8 # 24(%rsp) |
296 | movl $1,%eax | 307 | pushq %r9 # 16(%rsp) |
297 | salq %cl,%rax | 308 | pushq %r10 # 8(%rsp) |
298 | testl $0x27d00,%eax | 309 | pushq %r11 # 0(%rsp) |
299 | je 0f | 310 | |
300 | popq %r8 # get error code | 311 | cmpl $__KERNEL_CS,96(%rsp) |
301 | 0: movq 0(%rsp),%rcx # get ip | 312 | jne 10f |
302 | movq 8(%rsp),%rdx # get cs | 313 | |
314 | leaq 88(%rsp),%rdi # Pointer to %rip | ||
315 | call early_fixup_exception | ||
316 | andl %eax,%eax | ||
317 | jnz 20f # Found an exception entry | ||
318 | |||
319 | 10: | ||
320 | #ifdef CONFIG_EARLY_PRINTK | ||
321 | GET_CR2_INTO(%r9) # can clobber any volatile register if pv | ||
322 | movl 80(%rsp),%r8d # error code | ||
323 | movl 72(%rsp),%esi # vector number | ||
324 | movl 96(%rsp),%edx # %cs | ||
325 | movq 88(%rsp),%rcx # %rip | ||
303 | xorl %eax,%eax | 326 | xorl %eax,%eax |
304 | leaq early_idt_msg(%rip),%rdi | 327 | leaq early_idt_msg(%rip),%rdi |
305 | call early_printk | 328 | call early_printk |
@@ -308,17 +331,32 @@ ENTRY(early_idt_handler) | |||
308 | call dump_stack | 331 | call dump_stack |
309 | #ifdef CONFIG_KALLSYMS | 332 | #ifdef CONFIG_KALLSYMS |
310 | leaq early_idt_ripmsg(%rip),%rdi | 333 | leaq early_idt_ripmsg(%rip),%rdi |
311 | movq 0(%rsp),%rsi # get rip again | 334 | movq 40(%rsp),%rsi # %rip again |
312 | call __print_symbol | 335 | call __print_symbol |
313 | #endif | 336 | #endif |
314 | #endif /* EARLY_PRINTK */ | 337 | #endif /* EARLY_PRINTK */ |
315 | 1: hlt | 338 | 1: hlt |
316 | jmp 1b | 339 | jmp 1b |
317 | 340 | ||
318 | #ifdef CONFIG_EARLY_PRINTK | 341 | 20: # Exception table entry found |
342 | popq %r11 | ||
343 | popq %r10 | ||
344 | popq %r9 | ||
345 | popq %r8 | ||
346 | popq %rdi | ||
347 | popq %rsi | ||
348 | popq %rdx | ||
349 | popq %rcx | ||
350 | popq %rax | ||
351 | addq $16,%rsp # drop vector number and error code | ||
352 | decl early_recursion_flag(%rip) | ||
353 | INTERRUPT_RETURN | ||
354 | |||
355 | .balign 4 | ||
319 | early_recursion_flag: | 356 | early_recursion_flag: |
320 | .long 0 | 357 | .long 0 |
321 | 358 | ||
359 | #ifdef CONFIG_EARLY_PRINTK | ||
322 | early_idt_msg: | 360 | early_idt_msg: |
323 | .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" | 361 | .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n" |
324 | early_idt_ripmsg: | 362 | early_idt_ripmsg: |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 2d6e6498c17..f250431fb50 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -88,7 +88,7 @@ void kernel_fpu_begin(void) | |||
88 | __thread_clear_has_fpu(me); | 88 | __thread_clear_has_fpu(me); |
89 | /* We do 'stts()' in kernel_fpu_end() */ | 89 | /* We do 'stts()' in kernel_fpu_end() */ |
90 | } else { | 90 | } else { |
91 | percpu_write(fpu_owner_task, NULL); | 91 | this_cpu_write(fpu_owner_task, NULL); |
92 | clts(); | 92 | clts(); |
93 | } | 93 | } |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e213fc8408d..e2f751efb7b 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1037,9 +1037,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1037 | "current sp %p does not match saved sp %p\n", | 1037 | "current sp %p does not match saved sp %p\n", |
1038 | stack_addr(regs), kcb->jprobe_saved_sp); | 1038 | stack_addr(regs), kcb->jprobe_saved_sp); |
1039 | printk(KERN_ERR "Saved registers for jprobe %p\n", jp); | 1039 | printk(KERN_ERR "Saved registers for jprobe %p\n", jp); |
1040 | show_registers(saved_regs); | 1040 | show_regs(saved_regs); |
1041 | printk(KERN_ERR "Current registers\n"); | 1041 | printk(KERN_ERR "Current registers\n"); |
1042 | show_registers(regs); | 1042 | show_regs(regs); |
1043 | BUG(); | 1043 | BUG(); |
1044 | } | 1044 | } |
1045 | *regs = kcb->jprobe_saved_regs; | 1045 | *regs = kcb->jprobe_saved_regs; |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c deleted file mode 100644 index 7eb1e2b9782..00000000000 --- a/arch/x86/kernel/mca_32.c +++ /dev/null | |||
@@ -1,476 +0,0 @@ | |||
1 | /* | ||
2 | * Written by Martin Kolinek, February 1996 | ||
3 | * | ||
4 | * Changes: | ||
5 | * | ||
6 | * Chris Beauregard July 28th, 1996 | ||
7 | * - Fixed up integrated SCSI detection | ||
8 | * | ||
9 | * Chris Beauregard August 3rd, 1996 | ||
10 | * - Made mca_info local | ||
11 | * - Made integrated registers accessible through standard function calls | ||
12 | * - Added name field | ||
13 | * - More sanity checking | ||
14 | * | ||
15 | * Chris Beauregard August 9th, 1996 | ||
16 | * - Rewrote /proc/mca | ||
17 | * | ||
18 | * Chris Beauregard January 7th, 1997 | ||
19 | * - Added basic NMI-processing | ||
20 | * - Added more information to mca_info structure | ||
21 | * | ||
22 | * David Weinehall October 12th, 1998 | ||
23 | * - Made a lot of cleaning up in the source | ||
24 | * - Added use of save_flags / restore_flags | ||
25 | * - Added the 'driver_loaded' flag in MCA_adapter | ||
26 | * - Added an alternative implemention of ZP Gu's mca_find_unused_adapter | ||
27 | * | ||
28 | * David Weinehall March 24th, 1999 | ||
29 | * - Fixed the output of 'Driver Installed' in /proc/mca/pos | ||
30 | * - Made the Integrated Video & SCSI show up even if they have id 0000 | ||
31 | * | ||
32 | * Alexander Viro November 9th, 1999 | ||
33 | * - Switched to regular procfs methods | ||
34 | * | ||
35 | * Alfred Arnold & David Weinehall August 23rd, 2000 | ||
36 | * - Added support for Planar POS-registers | ||
37 | */ | ||
38 | |||
39 | #include <linux/module.h> | ||
40 | #include <linux/types.h> | ||
41 | #include <linux/errno.h> | ||
42 | #include <linux/kernel.h> | ||
43 | #include <linux/mca.h> | ||
44 | #include <linux/kprobes.h> | ||
45 | #include <linux/slab.h> | ||
46 | #include <asm/io.h> | ||
47 | #include <linux/proc_fs.h> | ||
48 | #include <linux/mman.h> | ||
49 | #include <linux/mm.h> | ||
50 | #include <linux/pagemap.h> | ||
51 | #include <linux/ioport.h> | ||
52 | #include <asm/uaccess.h> | ||
53 | #include <linux/init.h> | ||
54 | |||
55 | static unsigned char which_scsi; | ||
56 | |||
57 | int MCA_bus; | ||
58 | EXPORT_SYMBOL(MCA_bus); | ||
59 | |||
60 | /* | ||
61 | * Motherboard register spinlock. Untested on SMP at the moment, but | ||
62 | * are there any MCA SMP boxes? | ||
63 | * | ||
64 | * Yes - Alan | ||
65 | */ | ||
66 | static DEFINE_SPINLOCK(mca_lock); | ||
67 | |||
68 | /* Build the status info for the adapter */ | ||
69 | |||
70 | static void mca_configure_adapter_status(struct mca_device *mca_dev) | ||
71 | { | ||
72 | mca_dev->status = MCA_ADAPTER_NONE; | ||
73 | |||
74 | mca_dev->pos_id = mca_dev->pos[0] | ||
75 | + (mca_dev->pos[1] << 8); | ||
76 | |||
77 | if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) { | ||
78 | |||
79 | /* | ||
80 | * id = 0x0000 usually indicates hardware failure, | ||
81 | * however, ZP Gu (zpg@castle.net> reports that his 9556 | ||
82 | * has 0x0000 as id and everything still works. There | ||
83 | * also seem to be an adapter with id = 0x0000; the | ||
84 | * NCR Parallel Bus Memory Card. Until this is confirmed, | ||
85 | * however, this code will stay. | ||
86 | */ | ||
87 | |||
88 | mca_dev->status = MCA_ADAPTER_ERROR; | ||
89 | |||
90 | return; | ||
91 | } else if (mca_dev->pos_id != 0xffff) { | ||
92 | |||
93 | /* | ||
94 | * 0xffff usually indicates that there's no adapter, | ||
95 | * however, some integrated adapters may have 0xffff as | ||
96 | * their id and still be valid. Examples are on-board | ||
97 | * VGA of the 55sx, the integrated SCSI of the 56 & 57, | ||
98 | * and possibly also the 95 ULTIMEDIA. | ||
99 | */ | ||
100 | |||
101 | mca_dev->status = MCA_ADAPTER_NORMAL; | ||
102 | } | ||
103 | |||
104 | if ((mca_dev->pos_id == 0xffff || | ||
105 | mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) { | ||
106 | int j; | ||
107 | |||
108 | for (j = 2; j < 8; j++) { | ||
109 | if (mca_dev->pos[j] != 0xff) { | ||
110 | mca_dev->status = MCA_ADAPTER_NORMAL; | ||
111 | break; | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | |||
116 | if (!(mca_dev->pos[2] & MCA_ENABLED)) { | ||
117 | |||
118 | /* enabled bit is in POS 2 */ | ||
119 | |||
120 | mca_dev->status = MCA_ADAPTER_DISABLED; | ||
121 | } | ||
122 | } /* mca_configure_adapter_status */ | ||
123 | |||
124 | /*--------------------------------------------------------------------*/ | ||
125 | |||
126 | static struct resource mca_standard_resources[] = { | ||
127 | { .start = 0x60, .end = 0x60, .name = "system control port B (MCA)" }, | ||
128 | { .start = 0x90, .end = 0x90, .name = "arbitration (MCA)" }, | ||
129 | { .start = 0x91, .end = 0x91, .name = "card Select Feedback (MCA)" }, | ||
130 | { .start = 0x92, .end = 0x92, .name = "system Control port A (MCA)" }, | ||
131 | { .start = 0x94, .end = 0x94, .name = "system board setup (MCA)" }, | ||
132 | { .start = 0x96, .end = 0x97, .name = "POS (MCA)" }, | ||
133 | { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } | ||
134 | }; | ||
135 | |||
136 | #define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) | ||
137 | |||
138 | /* | ||
139 | * mca_read_and_store_pos - read the POS registers into a memory buffer | ||
140 | * @pos: a char pointer to 8 bytes, contains the POS register value on | ||
141 | * successful return | ||
142 | * | ||
143 | * Returns 1 if a card actually exists (i.e. the pos isn't | ||
144 | * all 0xff) or 0 otherwise | ||
145 | */ | ||
146 | static int mca_read_and_store_pos(unsigned char *pos) | ||
147 | { | ||
148 | int j; | ||
149 | int found = 0; | ||
150 | |||
151 | for (j = 0; j < 8; j++) { | ||
152 | pos[j] = inb_p(MCA_POS_REG(j)); | ||
153 | if (pos[j] != 0xff) { | ||
154 | /* 0xff all across means no device. 0x00 means | ||
155 | * something's broken, but a device is | ||
156 | * probably there. However, if you get 0x00 | ||
157 | * from a motherboard register it won't matter | ||
158 | * what we find. For the record, on the | ||
159 | * 57SLC, the integrated SCSI adapter has | ||
160 | * 0xffff for the adapter ID, but nonzero for | ||
161 | * other registers. */ | ||
162 | |||
163 | found = 1; | ||
164 | } | ||
165 | } | ||
166 | return found; | ||
167 | } | ||
168 | |||
169 | static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg) | ||
170 | { | ||
171 | unsigned char byte; | ||
172 | unsigned long flags; | ||
173 | |||
174 | if (reg < 0 || reg >= 8) | ||
175 | return 0; | ||
176 | |||
177 | spin_lock_irqsave(&mca_lock, flags); | ||
178 | if (mca_dev->pos_register) { | ||
179 | /* Disable adapter setup, enable motherboard setup */ | ||
180 | |||
181 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
182 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
183 | |||
184 | byte = inb_p(MCA_POS_REG(reg)); | ||
185 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
186 | } else { | ||
187 | |||
188 | /* Make sure motherboard setup is off */ | ||
189 | |||
190 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
191 | |||
192 | /* Read the appropriate register */ | ||
193 | |||
194 | outb_p(0x8|(mca_dev->slot & 0xf), MCA_ADAPTER_SETUP_REG); | ||
195 | byte = inb_p(MCA_POS_REG(reg)); | ||
196 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
197 | } | ||
198 | spin_unlock_irqrestore(&mca_lock, flags); | ||
199 | |||
200 | mca_dev->pos[reg] = byte; | ||
201 | |||
202 | return byte; | ||
203 | } | ||
204 | |||
205 | static void mca_pc_write_pos(struct mca_device *mca_dev, int reg, | ||
206 | unsigned char byte) | ||
207 | { | ||
208 | unsigned long flags; | ||
209 | |||
210 | if (reg < 0 || reg >= 8) | ||
211 | return; | ||
212 | |||
213 | spin_lock_irqsave(&mca_lock, flags); | ||
214 | |||
215 | /* Make sure motherboard setup is off */ | ||
216 | |||
217 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
218 | |||
219 | /* Read in the appropriate register */ | ||
220 | |||
221 | outb_p(0x8|(mca_dev->slot&0xf), MCA_ADAPTER_SETUP_REG); | ||
222 | outb_p(byte, MCA_POS_REG(reg)); | ||
223 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
224 | |||
225 | spin_unlock_irqrestore(&mca_lock, flags); | ||
226 | |||
227 | /* Update the global register list, while we have the byte */ | ||
228 | |||
229 | mca_dev->pos[reg] = byte; | ||
230 | |||
231 | } | ||
232 | |||
233 | /* for the primary MCA bus, we have identity transforms */ | ||
234 | static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq) | ||
235 | { | ||
236 | return irq; | ||
237 | } | ||
238 | |||
239 | static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port) | ||
240 | { | ||
241 | return port; | ||
242 | } | ||
243 | |||
244 | static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem) | ||
245 | { | ||
246 | return mem; | ||
247 | } | ||
248 | |||
249 | |||
250 | static int __init mca_init(void) | ||
251 | { | ||
252 | unsigned int i, j; | ||
253 | struct mca_device *mca_dev; | ||
254 | unsigned char pos[8]; | ||
255 | short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00}; | ||
256 | struct mca_bus *bus; | ||
257 | |||
258 | /* | ||
259 | * WARNING: Be careful when making changes here. Putting an adapter | ||
260 | * and the motherboard simultaneously into setup mode may result in | ||
261 | * damage to chips (according to The Indispensable PC Hardware Book | ||
262 | * by Hans-Peter Messmer). Also, we disable system interrupts (so | ||
263 | * that we are not disturbed in the middle of this). | ||
264 | */ | ||
265 | |||
266 | /* Make sure the MCA bus is present */ | ||
267 | |||
268 | if (mca_system_init()) { | ||
269 | printk(KERN_ERR "MCA bus system initialisation failed\n"); | ||
270 | return -ENODEV; | ||
271 | } | ||
272 | |||
273 | if (!MCA_bus) | ||
274 | return -ENODEV; | ||
275 | |||
276 | printk(KERN_INFO "Micro Channel bus detected.\n"); | ||
277 | |||
278 | /* All MCA systems have at least a primary bus */ | ||
279 | bus = mca_attach_bus(MCA_PRIMARY_BUS); | ||
280 | if (!bus) | ||
281 | goto out_nomem; | ||
282 | bus->default_dma_mask = 0xffffffffLL; | ||
283 | bus->f.mca_write_pos = mca_pc_write_pos; | ||
284 | bus->f.mca_read_pos = mca_pc_read_pos; | ||
285 | bus->f.mca_transform_irq = mca_dummy_transform_irq; | ||
286 | bus->f.mca_transform_ioport = mca_dummy_transform_ioport; | ||
287 | bus->f.mca_transform_memory = mca_dummy_transform_memory; | ||
288 | |||
289 | /* get the motherboard device */ | ||
290 | mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); | ||
291 | if (unlikely(!mca_dev)) | ||
292 | goto out_nomem; | ||
293 | |||
294 | /* | ||
295 | * We do not expect many MCA interrupts during initialization, | ||
296 | * but let us be safe: | ||
297 | */ | ||
298 | spin_lock_irq(&mca_lock); | ||
299 | |||
300 | /* Make sure adapter setup is off */ | ||
301 | |||
302 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
303 | |||
304 | /* Read motherboard POS registers */ | ||
305 | |||
306 | mca_dev->pos_register = 0x7f; | ||
307 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
308 | mca_dev->name[0] = 0; | ||
309 | mca_read_and_store_pos(mca_dev->pos); | ||
310 | mca_configure_adapter_status(mca_dev); | ||
311 | /* fake POS and slot for a motherboard */ | ||
312 | mca_dev->pos_id = MCA_MOTHERBOARD_POS; | ||
313 | mca_dev->slot = MCA_MOTHERBOARD; | ||
314 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
315 | |||
316 | mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
317 | if (unlikely(!mca_dev)) | ||
318 | goto out_unlock_nomem; | ||
319 | |||
320 | /* Put motherboard into video setup mode, read integrated video | ||
321 | * POS registers, and turn motherboard setup off. | ||
322 | */ | ||
323 | |||
324 | mca_dev->pos_register = 0xdf; | ||
325 | outb_p(mca_dev->pos_register, MCA_MOTHERBOARD_SETUP_REG); | ||
326 | mca_dev->name[0] = 0; | ||
327 | mca_read_and_store_pos(mca_dev->pos); | ||
328 | mca_configure_adapter_status(mca_dev); | ||
329 | /* fake POS and slot for the integrated video */ | ||
330 | mca_dev->pos_id = MCA_INTEGVIDEO_POS; | ||
331 | mca_dev->slot = MCA_INTEGVIDEO; | ||
332 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
333 | |||
334 | /* | ||
335 | * Put motherboard into scsi setup mode, read integrated scsi | ||
336 | * POS registers, and turn motherboard setup off. | ||
337 | * | ||
338 | * It seems there are two possible SCSI registers. Martin says that | ||
339 | * for the 56,57, 0xf7 is the one, but fails on the 76. | ||
340 | * Alfredo (apena@vnet.ibm.com) says | ||
341 | * 0xfd works on his machine. We'll try both of them. I figure it's | ||
342 | * a good bet that only one could be valid at a time. This could | ||
343 | * screw up though if one is used for something else on the other | ||
344 | * machine. | ||
345 | */ | ||
346 | |||
347 | for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) { | ||
348 | outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG); | ||
349 | if (mca_read_and_store_pos(pos)) | ||
350 | break; | ||
351 | } | ||
352 | if (which_scsi) { | ||
353 | /* found a scsi card */ | ||
354 | mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
355 | if (unlikely(!mca_dev)) | ||
356 | goto out_unlock_nomem; | ||
357 | |||
358 | for (j = 0; j < 8; j++) | ||
359 | mca_dev->pos[j] = pos[j]; | ||
360 | |||
361 | mca_configure_adapter_status(mca_dev); | ||
362 | /* fake POS and slot for integrated SCSI controller */ | ||
363 | mca_dev->pos_id = MCA_INTEGSCSI_POS; | ||
364 | mca_dev->slot = MCA_INTEGSCSI; | ||
365 | mca_dev->pos_register = which_scsi; | ||
366 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
367 | } | ||
368 | |||
369 | /* Turn off motherboard setup */ | ||
370 | |||
371 | outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG); | ||
372 | |||
373 | /* | ||
374 | * Now loop over MCA slots: put each adapter into setup mode, and | ||
375 | * read its POS registers. Then put adapter setup off. | ||
376 | */ | ||
377 | |||
378 | for (i = 0; i < MCA_MAX_SLOT_NR; i++) { | ||
379 | outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG); | ||
380 | if (!mca_read_and_store_pos(pos)) | ||
381 | continue; | ||
382 | |||
383 | mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); | ||
384 | if (unlikely(!mca_dev)) | ||
385 | goto out_unlock_nomem; | ||
386 | |||
387 | for (j = 0; j < 8; j++) | ||
388 | mca_dev->pos[j] = pos[j]; | ||
389 | |||
390 | mca_dev->driver_loaded = 0; | ||
391 | mca_dev->slot = i; | ||
392 | mca_dev->pos_register = 0; | ||
393 | mca_configure_adapter_status(mca_dev); | ||
394 | mca_register_device(MCA_PRIMARY_BUS, mca_dev); | ||
395 | } | ||
396 | outb_p(0, MCA_ADAPTER_SETUP_REG); | ||
397 | |||
398 | /* Enable interrupts and return memory start */ | ||
399 | spin_unlock_irq(&mca_lock); | ||
400 | |||
401 | for (i = 0; i < MCA_STANDARD_RESOURCES; i++) | ||
402 | request_resource(&ioport_resource, mca_standard_resources + i); | ||
403 | |||
404 | mca_do_proc_init(); | ||
405 | |||
406 | return 0; | ||
407 | |||
408 | out_unlock_nomem: | ||
409 | spin_unlock_irq(&mca_lock); | ||
410 | out_nomem: | ||
411 | printk(KERN_EMERG "Failed memory allocation in MCA setup!\n"); | ||
412 | return -ENOMEM; | ||
413 | } | ||
414 | |||
415 | subsys_initcall(mca_init); | ||
416 | |||
417 | /*--------------------------------------------------------------------*/ | ||
418 | |||
419 | static __kprobes void | ||
420 | mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) | ||
421 | { | ||
422 | int slot = mca_dev->slot; | ||
423 | |||
424 | if (slot == MCA_INTEGSCSI) { | ||
425 | printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n", | ||
426 | mca_dev->name); | ||
427 | } else if (slot == MCA_INTEGVIDEO) { | ||
428 | printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n", | ||
429 | mca_dev->name); | ||
430 | } else if (slot == MCA_MOTHERBOARD) { | ||
431 | printk(KERN_CRIT "NMI: caused by motherboard (%s)\n", | ||
432 | mca_dev->name); | ||
433 | } | ||
434 | |||
435 | /* More info available in POS 6 and 7? */ | ||
436 | |||
437 | if (check_flag) { | ||
438 | unsigned char pos6, pos7; | ||
439 | |||
440 | pos6 = mca_device_read_pos(mca_dev, 6); | ||
441 | pos7 = mca_device_read_pos(mca_dev, 7); | ||
442 | |||
443 | printk(KERN_CRIT "NMI: POS 6 = 0x%x, POS 7 = 0x%x\n", pos6, pos7); | ||
444 | } | ||
445 | |||
446 | } /* mca_handle_nmi_slot */ | ||
447 | |||
448 | /*--------------------------------------------------------------------*/ | ||
449 | |||
450 | static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) | ||
451 | { | ||
452 | struct mca_device *mca_dev = to_mca_device(dev); | ||
453 | unsigned char pos5; | ||
454 | |||
455 | pos5 = mca_device_read_pos(mca_dev, 5); | ||
456 | |||
457 | if (!(pos5 & 0x80)) { | ||
458 | /* | ||
459 | * Bit 7 of POS 5 is reset when this adapter has a hardware | ||
460 | * error. Bit 7 it reset if there's error information | ||
461 | * available in POS 6 and 7. | ||
462 | */ | ||
463 | mca_handle_nmi_device(mca_dev, !(pos5 & 0x40)); | ||
464 | return 1; | ||
465 | } | ||
466 | return 0; | ||
467 | } | ||
468 | |||
469 | void __kprobes mca_handle_nmi(void) | ||
470 | { | ||
471 | /* | ||
472 | * First try - scan the various adapters and see if a specific | ||
473 | * adapter was responsible for the error. | ||
474 | */ | ||
475 | bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); | ||
476 | } | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c9bda6d6035..fbdfc691718 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -299,12 +299,11 @@ static ssize_t reload_store(struct device *dev, | |||
299 | { | 299 | { |
300 | unsigned long val; | 300 | unsigned long val; |
301 | int cpu = dev->id; | 301 | int cpu = dev->id; |
302 | int ret = 0; | 302 | ssize_t ret = 0; |
303 | char *end; | ||
304 | 303 | ||
305 | val = simple_strtoul(buf, &end, 0); | 304 | ret = kstrtoul(buf, 0, &val); |
306 | if (end == buf) | 305 | if (ret) |
307 | return -EINVAL; | 306 | return ret; |
308 | 307 | ||
309 | if (val == 1) { | 308 | if (val == 1) { |
310 | get_online_cpus(); | 309 | get_online_cpus(); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index ca470e4c92d..b02d4dd6b8a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -97,7 +97,7 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
97 | 97 | ||
98 | set_bit(m->busid, mp_bus_not_pci); | 98 | set_bit(m->busid, mp_bus_not_pci); |
99 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { | 99 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { |
100 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 100 | #ifdef CONFIG_EISA |
101 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; | 101 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; |
102 | #endif | 102 | #endif |
103 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 103 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
@@ -105,12 +105,10 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
105 | x86_init.mpparse.mpc_oem_pci_bus(m); | 105 | x86_init.mpparse.mpc_oem_pci_bus(m); |
106 | 106 | ||
107 | clear_bit(m->busid, mp_bus_not_pci); | 107 | clear_bit(m->busid, mp_bus_not_pci); |
108 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 108 | #ifdef CONFIG_EISA |
109 | mp_bus_id_to_type[m->busid] = MP_BUS_PCI; | 109 | mp_bus_id_to_type[m->busid] = MP_BUS_PCI; |
110 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | 110 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { |
111 | mp_bus_id_to_type[m->busid] = MP_BUS_EISA; | 111 | mp_bus_id_to_type[m->busid] = MP_BUS_EISA; |
112 | } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) { | ||
113 | mp_bus_id_to_type[m->busid] = MP_BUS_MCA; | ||
114 | #endif | 112 | #endif |
115 | } else | 113 | } else |
116 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 114 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
@@ -368,9 +366,6 @@ static void __init construct_ioapic_table(int mpc_default_type) | |||
368 | case 3: | 366 | case 3: |
369 | memcpy(bus.bustype, "EISA ", 6); | 367 | memcpy(bus.bustype, "EISA ", 6); |
370 | break; | 368 | break; |
371 | case 4: | ||
372 | case 7: | ||
373 | memcpy(bus.bustype, "MCA ", 6); | ||
374 | } | 369 | } |
375 | MP_bus_info(&bus); | 370 | MP_bus_info(&bus); |
376 | if (mpc_default_type > 4) { | 371 | if (mpc_default_type > 4) { |
@@ -623,7 +618,7 @@ void __init default_find_smp_config(void) | |||
623 | return; | 618 | return; |
624 | /* | 619 | /* |
625 | * If it is an SMP machine we should know now, unless the | 620 | * If it is an SMP machine we should know now, unless the |
626 | * configuration is in an EISA/MCA bus machine with an | 621 | * configuration is in an EISA bus machine with an |
627 | * extended bios data area. | 622 | * extended bios data area. |
628 | * | 623 | * |
629 | * there is a real-mode segmented pointer pointing to the | 624 | * there is a real-mode segmented pointer pointing to the |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 585be4bd71a..90875279ef3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -19,8 +19,6 @@ | |||
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include <linux/export.h> | 20 | #include <linux/export.h> |
21 | 21 | ||
22 | #include <linux/mca.h> | ||
23 | |||
24 | #if defined(CONFIG_EDAC) | 22 | #if defined(CONFIG_EDAC) |
25 | #include <linux/edac.h> | 23 | #include <linux/edac.h> |
26 | #endif | 24 | #endif |
@@ -84,7 +82,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
84 | 82 | ||
85 | #define nmi_to_desc(type) (&nmi_desc[type]) | 83 | #define nmi_to_desc(type) (&nmi_desc[type]) |
86 | 84 | ||
87 | static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 85 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
88 | { | 86 | { |
89 | struct nmi_desc *desc = nmi_to_desc(type); | 87 | struct nmi_desc *desc = nmi_to_desc(type); |
90 | struct nmiaction *a; | 88 | struct nmiaction *a; |
@@ -166,7 +164,7 @@ void unregister_nmi_handler(unsigned int type, const char *name) | |||
166 | } | 164 | } |
167 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); | 165 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); |
168 | 166 | ||
169 | static notrace __kprobes void | 167 | static __kprobes void |
170 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | 168 | pci_serr_error(unsigned char reason, struct pt_regs *regs) |
171 | { | 169 | { |
172 | /* check to see if anyone registered against these types of errors */ | 170 | /* check to see if anyone registered against these types of errors */ |
@@ -197,7 +195,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) | |||
197 | outb(reason, NMI_REASON_PORT); | 195 | outb(reason, NMI_REASON_PORT); |
198 | } | 196 | } |
199 | 197 | ||
200 | static notrace __kprobes void | 198 | static __kprobes void |
201 | io_check_error(unsigned char reason, struct pt_regs *regs) | 199 | io_check_error(unsigned char reason, struct pt_regs *regs) |
202 | { | 200 | { |
203 | unsigned long i; | 201 | unsigned long i; |
@@ -209,7 +207,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
209 | pr_emerg( | 207 | pr_emerg( |
210 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | 208 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", |
211 | reason, smp_processor_id()); | 209 | reason, smp_processor_id()); |
212 | show_registers(regs); | 210 | show_regs(regs); |
213 | 211 | ||
214 | if (panic_on_io_nmi) | 212 | if (panic_on_io_nmi) |
215 | panic("NMI IOCK error: Not continuing"); | 213 | panic("NMI IOCK error: Not continuing"); |
@@ -228,7 +226,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |||
228 | outb(reason, NMI_REASON_PORT); | 226 | outb(reason, NMI_REASON_PORT); |
229 | } | 227 | } |
230 | 228 | ||
231 | static notrace __kprobes void | 229 | static __kprobes void |
232 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | 230 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) |
233 | { | 231 | { |
234 | int handled; | 232 | int handled; |
@@ -247,16 +245,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
247 | 245 | ||
248 | __this_cpu_add(nmi_stats.unknown, 1); | 246 | __this_cpu_add(nmi_stats.unknown, 1); |
249 | 247 | ||
250 | #ifdef CONFIG_MCA | ||
251 | /* | ||
252 | * Might actually be able to figure out what the guilty party | ||
253 | * is: | ||
254 | */ | ||
255 | if (MCA_bus) { | ||
256 | mca_handle_nmi(); | ||
257 | return; | ||
258 | } | ||
259 | #endif | ||
260 | pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 248 | pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", |
261 | reason, smp_processor_id()); | 249 | reason, smp_processor_id()); |
262 | 250 | ||
@@ -270,7 +258,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |||
270 | static DEFINE_PER_CPU(bool, swallow_nmi); | 258 | static DEFINE_PER_CPU(bool, swallow_nmi); |
271 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | 259 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); |
272 | 260 | ||
273 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 261 | static __kprobes void default_do_nmi(struct pt_regs *regs) |
274 | { | 262 | { |
275 | unsigned char reason = 0; | 263 | unsigned char reason = 0; |
276 | int handled; | 264 | int handled; |
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 2c39dcd510f..e31bf8d5c4d 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/cpumask.h> | 13 | #include <linux/cpumask.h> |
14 | #include <linux/delay.h> | 14 | #include <linux/delay.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/percpu.h> | ||
16 | 17 | ||
17 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
18 | #include <asm/nmi.h> | 19 | #include <asm/nmi.h> |
@@ -117,15 +118,15 @@ static void __init dotest(void (*testcase_fn)(void), int expected) | |||
117 | unexpected_testcase_failures++; | 118 | unexpected_testcase_failures++; |
118 | 119 | ||
119 | if (nmi_fail == FAILURE) | 120 | if (nmi_fail == FAILURE) |
120 | printk("FAILED |"); | 121 | printk(KERN_CONT "FAILED |"); |
121 | else if (nmi_fail == TIMEOUT) | 122 | else if (nmi_fail == TIMEOUT) |
122 | printk("TIMEOUT|"); | 123 | printk(KERN_CONT "TIMEOUT|"); |
123 | else | 124 | else |
124 | printk("ERROR |"); | 125 | printk(KERN_CONT "ERROR |"); |
125 | dump_stack(); | 126 | dump_stack(); |
126 | } else { | 127 | } else { |
127 | testcase_successes++; | 128 | testcase_successes++; |
128 | printk(" ok |"); | 129 | printk(KERN_CONT " ok |"); |
129 | } | 130 | } |
130 | testcase_total++; | 131 | testcase_total++; |
131 | 132 | ||
@@ -150,10 +151,10 @@ void __init nmi_selftest(void) | |||
150 | 151 | ||
151 | print_testname("remote IPI"); | 152 | print_testname("remote IPI"); |
152 | dotest(remote_ipi, SUCCESS); | 153 | dotest(remote_ipi, SUCCESS); |
153 | printk("\n"); | 154 | printk(KERN_CONT "\n"); |
154 | print_testname("local IPI"); | 155 | print_testname("local IPI"); |
155 | dotest(local_ipi, SUCCESS); | 156 | dotest(local_ipi, SUCCESS); |
156 | printk("\n"); | 157 | printk(KERN_CONT "\n"); |
157 | 158 | ||
158 | cleanup_nmi_testsuite(); | 159 | cleanup_nmi_testsuite(); |
159 | 160 | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ab137605e69..9ce885996fd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -241,16 +241,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA | |||
241 | 241 | ||
242 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 242 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
243 | { | 243 | { |
244 | BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 244 | BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
245 | 245 | ||
246 | percpu_write(paravirt_lazy_mode, mode); | 246 | this_cpu_write(paravirt_lazy_mode, mode); |
247 | } | 247 | } |
248 | 248 | ||
249 | static void leave_lazy(enum paravirt_lazy_mode mode) | 249 | static void leave_lazy(enum paravirt_lazy_mode mode) |
250 | { | 250 | { |
251 | BUG_ON(percpu_read(paravirt_lazy_mode) != mode); | 251 | BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode); |
252 | 252 | ||
253 | percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); | 253 | this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); |
254 | } | 254 | } |
255 | 255 | ||
256 | void paravirt_enter_lazy_mmu(void) | 256 | void paravirt_enter_lazy_mmu(void) |
@@ -267,7 +267,7 @@ void paravirt_start_context_switch(struct task_struct *prev) | |||
267 | { | 267 | { |
268 | BUG_ON(preemptible()); | 268 | BUG_ON(preemptible()); |
269 | 269 | ||
270 | if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { | 270 | if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { |
271 | arch_leave_lazy_mmu_mode(); | 271 | arch_leave_lazy_mmu_mode(); |
272 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); | 272 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); |
273 | } | 273 | } |
@@ -289,7 +289,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
289 | if (in_interrupt()) | 289 | if (in_interrupt()) |
290 | return PARAVIRT_LAZY_NONE; | 290 | return PARAVIRT_LAZY_NONE; |
291 | 291 | ||
292 | return percpu_read(paravirt_lazy_mode); | 292 | return this_cpu_read(paravirt_lazy_mode); |
293 | } | 293 | } |
294 | 294 | ||
295 | void arch_flush_lazy_mmu_mode(void) | 295 | void arch_flush_lazy_mmu_mode(void) |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index d0b2fb9ccbb..b72838bae64 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1480,8 +1480,9 @@ cleanup: | |||
1480 | static int __init calgary_parse_options(char *p) | 1480 | static int __init calgary_parse_options(char *p) |
1481 | { | 1481 | { |
1482 | unsigned int bridge; | 1482 | unsigned int bridge; |
1483 | unsigned long val; | ||
1483 | size_t len; | 1484 | size_t len; |
1484 | char* endp; | 1485 | ssize_t ret; |
1485 | 1486 | ||
1486 | while (*p) { | 1487 | while (*p) { |
1487 | if (!strncmp(p, "64k", 3)) | 1488 | if (!strncmp(p, "64k", 3)) |
@@ -1512,10 +1513,11 @@ static int __init calgary_parse_options(char *p) | |||
1512 | ++p; | 1513 | ++p; |
1513 | if (*p == '\0') | 1514 | if (*p == '\0') |
1514 | break; | 1515 | break; |
1515 | bridge = simple_strtoul(p, &endp, 0); | 1516 | ret = kstrtoul(p, 0, &val); |
1516 | if (p == endp) | 1517 | if (ret) |
1517 | break; | 1518 | break; |
1518 | 1519 | ||
1520 | bridge = val; | ||
1519 | if (bridge < MAX_PHB_BUS_NUM) { | 1521 | if (bridge < MAX_PHB_BUS_NUM) { |
1520 | printk(KERN_INFO "Calgary: disabling " | 1522 | printk(KERN_INFO "Calgary: disabling " |
1521 | "translation for PHB %#x\n", bridge); | 1523 | "translation for PHB %#x\n", bridge); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e8173154800..735279e54e5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -56,10 +56,16 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); | |||
56 | struct kmem_cache *task_xstate_cachep; | 56 | struct kmem_cache *task_xstate_cachep; |
57 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | 57 | EXPORT_SYMBOL_GPL(task_xstate_cachep); |
58 | 58 | ||
59 | /* | ||
60 | * this gets called so that we can store lazy state into memory and copy the | ||
61 | * current task into the new thread. | ||
62 | */ | ||
59 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 63 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
60 | { | 64 | { |
61 | int ret; | 65 | int ret; |
62 | 66 | ||
67 | unlazy_fpu(src); | ||
68 | |||
63 | *dst = *src; | 69 | *dst = *src; |
64 | if (fpu_allocated(&src->thread.fpu)) { | 70 | if (fpu_allocated(&src->thread.fpu)) { |
65 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); | 71 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
@@ -89,6 +95,16 @@ void arch_task_cache_init(void) | |||
89 | SLAB_PANIC | SLAB_NOTRACK, NULL); | 95 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
90 | } | 96 | } |
91 | 97 | ||
98 | static inline void drop_fpu(struct task_struct *tsk) | ||
99 | { | ||
100 | /* | ||
101 | * Forget coprocessor state.. | ||
102 | */ | ||
103 | tsk->fpu_counter = 0; | ||
104 | clear_fpu(tsk); | ||
105 | clear_used_math(); | ||
106 | } | ||
107 | |||
92 | /* | 108 | /* |
93 | * Free current thread data structures etc.. | 109 | * Free current thread data structures etc.. |
94 | */ | 110 | */ |
@@ -111,12 +127,8 @@ void exit_thread(void) | |||
111 | put_cpu(); | 127 | put_cpu(); |
112 | kfree(bp); | 128 | kfree(bp); |
113 | } | 129 | } |
114 | } | ||
115 | 130 | ||
116 | void show_regs(struct pt_regs *regs) | 131 | drop_fpu(me); |
117 | { | ||
118 | show_registers(regs); | ||
119 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0); | ||
120 | } | 132 | } |
121 | 133 | ||
122 | void show_regs_common(void) | 134 | void show_regs_common(void) |
@@ -151,12 +163,7 @@ void flush_thread(void) | |||
151 | 163 | ||
152 | flush_ptrace_hw_breakpoint(tsk); | 164 | flush_ptrace_hw_breakpoint(tsk); |
153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 165 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
154 | /* | 166 | drop_fpu(tsk); |
155 | * Forget coprocessor state.. | ||
156 | */ | ||
157 | tsk->fpu_counter = 0; | ||
158 | clear_fpu(tsk); | ||
159 | clear_used_math(); | ||
160 | } | 167 | } |
161 | 168 | ||
162 | static void hard_disable_TSC(void) | 169 | static void hard_disable_TSC(void) |
@@ -385,7 +392,7 @@ static inline void play_dead(void) | |||
385 | #ifdef CONFIG_X86_64 | 392 | #ifdef CONFIG_X86_64 |
386 | void enter_idle(void) | 393 | void enter_idle(void) |
387 | { | 394 | { |
388 | percpu_write(is_idle, 1); | 395 | this_cpu_write(is_idle, 1); |
389 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | 396 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); |
390 | } | 397 | } |
391 | 398 | ||
@@ -582,9 +589,17 @@ int mwait_usable(const struct cpuinfo_x86 *c) | |||
582 | { | 589 | { |
583 | u32 eax, ebx, ecx, edx; | 590 | u32 eax, ebx, ecx, edx; |
584 | 591 | ||
592 | /* Use mwait if idle=mwait boot option is given */ | ||
585 | if (boot_option_idle_override == IDLE_FORCE_MWAIT) | 593 | if (boot_option_idle_override == IDLE_FORCE_MWAIT) |
586 | return 1; | 594 | return 1; |
587 | 595 | ||
596 | /* | ||
597 | * Any idle= boot option other than idle=mwait means that we must not | ||
598 | * use mwait. Eg: idle=halt or idle=poll or idle=nomwait | ||
599 | */ | ||
600 | if (boot_option_idle_override != IDLE_NO_OVERRIDE) | ||
601 | return 0; | ||
602 | |||
588 | if (c->cpuid_level < MWAIT_INFO) | 603 | if (c->cpuid_level < MWAIT_INFO) |
589 | return 0; | 604 | return 0; |
590 | 605 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ae6847303e2..516fa186121 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -126,15 +126,6 @@ void release_thread(struct task_struct *dead_task) | |||
126 | release_vm86_irqs(dead_task); | 126 | release_vm86_irqs(dead_task); |
127 | } | 127 | } |
128 | 128 | ||
129 | /* | ||
130 | * This gets called before we allocate a new thread and copy | ||
131 | * the current task into it. | ||
132 | */ | ||
133 | void prepare_to_copy(struct task_struct *tsk) | ||
134 | { | ||
135 | unlazy_fpu(tsk); | ||
136 | } | ||
137 | |||
138 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 129 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
139 | unsigned long unused, | 130 | unsigned long unused, |
140 | struct task_struct *p, struct pt_regs *regs) | 131 | struct task_struct *p, struct pt_regs *regs) |
@@ -302,7 +293,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
302 | 293 | ||
303 | switch_fpu_finish(next_p, fpu); | 294 | switch_fpu_finish(next_p, fpu); |
304 | 295 | ||
305 | percpu_write(current_task, next_p); | 296 | this_cpu_write(current_task, next_p); |
306 | 297 | ||
307 | return prev_p; | 298 | return prev_p; |
308 | } | 299 | } |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 43d8b48b23e..61cdf7fdf09 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -145,15 +145,6 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) | |||
145 | return get_desc_base(&t->thread.tls_array[tls]); | 145 | return get_desc_base(&t->thread.tls_array[tls]); |
146 | } | 146 | } |
147 | 147 | ||
148 | /* | ||
149 | * This gets called before we allocate a new thread and copy | ||
150 | * the current task into it. | ||
151 | */ | ||
152 | void prepare_to_copy(struct task_struct *tsk) | ||
153 | { | ||
154 | unlazy_fpu(tsk); | ||
155 | } | ||
156 | |||
157 | int copy_thread(unsigned long clone_flags, unsigned long sp, | 148 | int copy_thread(unsigned long clone_flags, unsigned long sp, |
158 | unsigned long unused, | 149 | unsigned long unused, |
159 | struct task_struct *p, struct pt_regs *regs) | 150 | struct task_struct *p, struct pt_regs *regs) |
@@ -237,7 +228,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
237 | current->thread.usersp = new_sp; | 228 | current->thread.usersp = new_sp; |
238 | regs->ip = new_ip; | 229 | regs->ip = new_ip; |
239 | regs->sp = new_sp; | 230 | regs->sp = new_sp; |
240 | percpu_write(old_rsp, new_sp); | 231 | this_cpu_write(old_rsp, new_sp); |
241 | regs->cs = _cs; | 232 | regs->cs = _cs; |
242 | regs->ss = _ss; | 233 | regs->ss = _ss; |
243 | regs->flags = X86_EFLAGS_IF; | 234 | regs->flags = X86_EFLAGS_IF; |
@@ -359,11 +350,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
359 | /* | 350 | /* |
360 | * Switch the PDA and FPU contexts. | 351 | * Switch the PDA and FPU contexts. |
361 | */ | 352 | */ |
362 | prev->usersp = percpu_read(old_rsp); | 353 | prev->usersp = this_cpu_read(old_rsp); |
363 | percpu_write(old_rsp, next->usersp); | 354 | this_cpu_write(old_rsp, next->usersp); |
364 | percpu_write(current_task, next_p); | 355 | this_cpu_write(current_task, next_p); |
365 | 356 | ||
366 | percpu_write(kernel_stack, | 357 | this_cpu_write(kernel_stack, |
367 | (unsigned long)task_stack_page(next_p) + | 358 | (unsigned long)task_stack_page(next_p) + |
368 | THREAD_SIZE - KERNEL_STACK_OFFSET); | 359 | THREAD_SIZE - KERNEL_STACK_OFFSET); |
369 | 360 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 685845cf16e..13b1990c7c5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1480,7 +1480,11 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1480 | regs->flags |= X86_EFLAGS_TF; | 1480 | regs->flags |= X86_EFLAGS_TF; |
1481 | 1481 | ||
1482 | /* do the secure computing check first */ | 1482 | /* do the secure computing check first */ |
1483 | secure_computing(regs->orig_ax); | 1483 | if (secure_computing(regs->orig_ax)) { |
1484 | /* seccomp failures shouldn't expose any additional code. */ | ||
1485 | ret = -1L; | ||
1486 | goto out; | ||
1487 | } | ||
1484 | 1488 | ||
1485 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | 1489 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
1486 | ret = -1L; | 1490 | ret = -1L; |
@@ -1505,6 +1509,7 @@ long syscall_trace_enter(struct pt_regs *regs) | |||
1505 | regs->dx, regs->r10); | 1509 | regs->dx, regs->r10); |
1506 | #endif | 1510 | #endif |
1507 | 1511 | ||
1512 | out: | ||
1508 | return ret ?: regs->orig_ax; | 1513 | return ret ?: regs->orig_ax; |
1509 | } | 1514 | } |
1510 | 1515 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index d840e69a853..77215c23fba 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -39,7 +39,8 @@ static int reboot_mode; | |||
39 | enum reboot_type reboot_type = BOOT_ACPI; | 39 | enum reboot_type reboot_type = BOOT_ACPI; |
40 | int reboot_force; | 40 | int reboot_force; |
41 | 41 | ||
42 | /* This variable is used privately to keep track of whether or not | 42 | /* |
43 | * This variable is used privately to keep track of whether or not | ||
43 | * reboot_type is still set to its default value (i.e., reboot= hasn't | 44 | * reboot_type is still set to its default value (i.e., reboot= hasn't |
44 | * been set on the command line). This is needed so that we can | 45 | * been set on the command line). This is needed so that we can |
45 | * suppress DMI scanning for reboot quirks. Without it, it's | 46 | * suppress DMI scanning for reboot quirks. Without it, it's |
@@ -51,7 +52,8 @@ static int reboot_default = 1; | |||
51 | static int reboot_cpu = -1; | 52 | static int reboot_cpu = -1; |
52 | #endif | 53 | #endif |
53 | 54 | ||
54 | /* This is set if we need to go through the 'emergency' path. | 55 | /* |
56 | * This is set if we need to go through the 'emergency' path. | ||
55 | * When machine_emergency_restart() is called, we may be on | 57 | * When machine_emergency_restart() is called, we may be on |
56 | * an inconsistent state and won't be able to do a clean cleanup | 58 | * an inconsistent state and won't be able to do a clean cleanup |
57 | */ | 59 | */ |
@@ -60,22 +62,24 @@ static int reboot_emergency; | |||
60 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ | 62 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ |
61 | bool port_cf9_safe = false; | 63 | bool port_cf9_safe = false; |
62 | 64 | ||
63 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | 65 | /* |
64 | warm Don't set the cold reboot flag | 66 | * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] |
65 | cold Set the cold reboot flag | 67 | * warm Don't set the cold reboot flag |
66 | bios Reboot by jumping through the BIOS (only for X86_32) | 68 | * cold Set the cold reboot flag |
67 | smp Reboot by executing reset on BSP or other CPU (only for X86_32) | 69 | * bios Reboot by jumping through the BIOS (only for X86_32) |
68 | triple Force a triple fault (init) | 70 | * smp Reboot by executing reset on BSP or other CPU (only for X86_32) |
69 | kbd Use the keyboard controller. cold reset (default) | 71 | * triple Force a triple fault (init) |
70 | acpi Use the RESET_REG in the FADT | 72 | * kbd Use the keyboard controller. cold reset (default) |
71 | efi Use efi reset_system runtime service | 73 | * acpi Use the RESET_REG in the FADT |
72 | pci Use the so-called "PCI reset register", CF9 | 74 | * efi Use efi reset_system runtime service |
73 | force Avoid anything that could hang. | 75 | * pci Use the so-called "PCI reset register", CF9 |
76 | * force Avoid anything that could hang. | ||
74 | */ | 77 | */ |
75 | static int __init reboot_setup(char *str) | 78 | static int __init reboot_setup(char *str) |
76 | { | 79 | { |
77 | for (;;) { | 80 | for (;;) { |
78 | /* Having anything passed on the command line via | 81 | /* |
82 | * Having anything passed on the command line via | ||
79 | * reboot= will cause us to disable DMI checking | 83 | * reboot= will cause us to disable DMI checking |
80 | * below. | 84 | * below. |
81 | */ | 85 | */ |
@@ -98,9 +102,11 @@ static int __init reboot_setup(char *str) | |||
98 | if (isdigit(*(str+2))) | 102 | if (isdigit(*(str+2))) |
99 | reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); | 103 | reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0'); |
100 | } | 104 | } |
101 | /* we will leave sorting out the final value | 105 | /* |
102 | when we are ready to reboot, since we might not | 106 | * We will leave sorting out the final value |
103 | have detected BSP APIC ID or smp_num_cpu */ | 107 | * when we are ready to reboot, since we might not |
108 | * have detected BSP APIC ID or smp_num_cpu | ||
109 | */ | ||
104 | break; | 110 | break; |
105 | #endif /* CONFIG_SMP */ | 111 | #endif /* CONFIG_SMP */ |
106 | 112 | ||
@@ -150,6 +156,82 @@ static int __init set_bios_reboot(const struct dmi_system_id *d) | |||
150 | return 0; | 156 | return 0; |
151 | } | 157 | } |
152 | 158 | ||
159 | extern const unsigned char machine_real_restart_asm[]; | ||
160 | extern const u64 machine_real_restart_gdt[3]; | ||
161 | |||
162 | void machine_real_restart(unsigned int type) | ||
163 | { | ||
164 | void *restart_va; | ||
165 | unsigned long restart_pa; | ||
166 | void (*restart_lowmem)(unsigned int); | ||
167 | u64 *lowmem_gdt; | ||
168 | |||
169 | local_irq_disable(); | ||
170 | |||
171 | /* | ||
172 | * Write zero to CMOS register number 0x0f, which the BIOS POST | ||
173 | * routine will recognize as telling it to do a proper reboot. (Well | ||
174 | * that's what this book in front of me says -- it may only apply to | ||
175 | * the Phoenix BIOS though, it's not clear). At the same time, | ||
176 | * disable NMIs by setting the top bit in the CMOS address register, | ||
177 | * as we're about to do peculiar things to the CPU. I'm not sure if | ||
178 | * `outb_p' is needed instead of just `outb'. Use it to be on the | ||
179 | * safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | ||
180 | */ | ||
181 | spin_lock(&rtc_lock); | ||
182 | CMOS_WRITE(0x00, 0x8f); | ||
183 | spin_unlock(&rtc_lock); | ||
184 | |||
185 | /* | ||
186 | * Switch back to the initial page table. | ||
187 | */ | ||
188 | load_cr3(initial_page_table); | ||
189 | |||
190 | /* | ||
191 | * Write 0x1234 to absolute memory location 0x472. The BIOS reads | ||
192 | * this on booting to tell it to "Bypass memory test (also warm | ||
193 | * boot)". This seems like a fairly standard thing that gets set by | ||
194 | * REBOOT.COM programs, and the previous reset routine did this | ||
195 | * too. */ | ||
196 | *((unsigned short *)0x472) = reboot_mode; | ||
197 | |||
198 | /* Patch the GDT in the low memory trampoline */ | ||
199 | lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); | ||
200 | |||
201 | restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); | ||
202 | restart_pa = virt_to_phys(restart_va); | ||
203 | restart_lowmem = (void (*)(unsigned int))restart_pa; | ||
204 | |||
205 | /* GDT[0]: GDT self-pointer */ | ||
206 | lowmem_gdt[0] = | ||
207 | (u64)(sizeof(machine_real_restart_gdt) - 1) + | ||
208 | ((u64)virt_to_phys(lowmem_gdt) << 16); | ||
209 | /* GDT[1]: 64K real mode code segment */ | ||
210 | lowmem_gdt[1] = | ||
211 | GDT_ENTRY(0x009b, restart_pa, 0xffff); | ||
212 | |||
213 | /* Jump to the identity-mapped low memory code */ | ||
214 | restart_lowmem(type); | ||
215 | } | ||
216 | #ifdef CONFIG_APM_MODULE | ||
217 | EXPORT_SYMBOL(machine_real_restart); | ||
218 | #endif | ||
219 | |||
220 | #endif /* CONFIG_X86_32 */ | ||
221 | |||
222 | /* | ||
223 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | ||
224 | */ | ||
225 | static int __init set_pci_reboot(const struct dmi_system_id *d) | ||
226 | { | ||
227 | if (reboot_type != BOOT_CF9) { | ||
228 | reboot_type = BOOT_CF9; | ||
229 | printk(KERN_INFO "%s series board detected. " | ||
230 | "Selecting PCI-method for reboots.\n", d->ident); | ||
231 | } | ||
232 | return 0; | ||
233 | } | ||
234 | |||
153 | static int __init set_kbd_reboot(const struct dmi_system_id *d) | 235 | static int __init set_kbd_reboot(const struct dmi_system_id *d) |
154 | { | 236 | { |
155 | if (reboot_type != BOOT_KBD) { | 237 | if (reboot_type != BOOT_KBD) { |
@@ -159,7 +241,12 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d) | |||
159 | return 0; | 241 | return 0; |
160 | } | 242 | } |
161 | 243 | ||
244 | /* | ||
245 | * This is a single dmi_table handling all reboot quirks. Note that | ||
246 | * REBOOT_BIOS is only available for 32bit | ||
247 | */ | ||
162 | static struct dmi_system_id __initdata reboot_dmi_table[] = { | 248 | static struct dmi_system_id __initdata reboot_dmi_table[] = { |
249 | #ifdef CONFIG_X86_32 | ||
163 | { /* Handle problems with rebooting on Dell E520's */ | 250 | { /* Handle problems with rebooting on Dell E520's */ |
164 | .callback = set_bios_reboot, | 251 | .callback = set_bios_reboot, |
165 | .ident = "Dell E520", | 252 | .ident = "Dell E520", |
@@ -184,7 +271,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
184 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), | 271 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"), |
185 | }, | 272 | }, |
186 | }, | 273 | }, |
187 | { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/ | 274 | { /* Handle problems with rebooting on Dell Optiplex 745's SFF */ |
188 | .callback = set_bios_reboot, | 275 | .callback = set_bios_reboot, |
189 | .ident = "Dell OptiPlex 745", | 276 | .ident = "Dell OptiPlex 745", |
190 | .matches = { | 277 | .matches = { |
@@ -192,7 +279,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
192 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), | 279 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), |
193 | }, | 280 | }, |
194 | }, | 281 | }, |
195 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ | 282 | { /* Handle problems with rebooting on Dell Optiplex 745's DFF */ |
196 | .callback = set_bios_reboot, | 283 | .callback = set_bios_reboot, |
197 | .ident = "Dell OptiPlex 745", | 284 | .ident = "Dell OptiPlex 745", |
198 | .matches = { | 285 | .matches = { |
@@ -201,7 +288,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
201 | DMI_MATCH(DMI_BOARD_NAME, "0MM599"), | 288 | DMI_MATCH(DMI_BOARD_NAME, "0MM599"), |
202 | }, | 289 | }, |
203 | }, | 290 | }, |
204 | { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ | 291 | { /* Handle problems with rebooting on Dell Optiplex 745 with 0KW626 */ |
205 | .callback = set_bios_reboot, | 292 | .callback = set_bios_reboot, |
206 | .ident = "Dell OptiPlex 745", | 293 | .ident = "Dell OptiPlex 745", |
207 | .matches = { | 294 | .matches = { |
@@ -210,7 +297,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
210 | DMI_MATCH(DMI_BOARD_NAME, "0KW626"), | 297 | DMI_MATCH(DMI_BOARD_NAME, "0KW626"), |
211 | }, | 298 | }, |
212 | }, | 299 | }, |
213 | { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ | 300 | { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ |
214 | .callback = set_bios_reboot, | 301 | .callback = set_bios_reboot, |
215 | .ident = "Dell OptiPlex 330", | 302 | .ident = "Dell OptiPlex 330", |
216 | .matches = { | 303 | .matches = { |
@@ -219,7 +306,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
219 | DMI_MATCH(DMI_BOARD_NAME, "0KP561"), | 306 | DMI_MATCH(DMI_BOARD_NAME, "0KP561"), |
220 | }, | 307 | }, |
221 | }, | 308 | }, |
222 | { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ | 309 | { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ |
223 | .callback = set_bios_reboot, | 310 | .callback = set_bios_reboot, |
224 | .ident = "Dell OptiPlex 360", | 311 | .ident = "Dell OptiPlex 360", |
225 | .matches = { | 312 | .matches = { |
@@ -228,7 +315,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
228 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), | 315 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), |
229 | }, | 316 | }, |
230 | }, | 317 | }, |
231 | { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ | 318 | { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G */ |
232 | .callback = set_bios_reboot, | 319 | .callback = set_bios_reboot, |
233 | .ident = "Dell OptiPlex 760", | 320 | .ident = "Dell OptiPlex 760", |
234 | .matches = { | 321 | .matches = { |
@@ -301,7 +388,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
301 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), | 388 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), |
302 | }, | 389 | }, |
303 | }, | 390 | }, |
304 | { /* Handle problems with rebooting on ASUS P4S800 */ | 391 | { /* Handle problems with rebooting on ASUS P4S800 */ |
305 | .callback = set_bios_reboot, | 392 | .callback = set_bios_reboot, |
306 | .ident = "ASUS P4S800", | 393 | .ident = "ASUS P4S800", |
307 | .matches = { | 394 | .matches = { |
@@ -309,7 +396,9 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
309 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | 396 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), |
310 | }, | 397 | }, |
311 | }, | 398 | }, |
312 | { /* Handle reboot issue on Acer Aspire one */ | 399 | #endif /* CONFIG_X86_32 */ |
400 | |||
401 | { /* Handle reboot issue on Acer Aspire one */ | ||
313 | .callback = set_kbd_reboot, | 402 | .callback = set_kbd_reboot, |
314 | .ident = "Acer Aspire One A110", | 403 | .ident = "Acer Aspire One A110", |
315 | .matches = { | 404 | .matches = { |
@@ -317,96 +406,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
317 | DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), | 406 | DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"), |
318 | }, | 407 | }, |
319 | }, | 408 | }, |
320 | { } | ||
321 | }; | ||
322 | |||
323 | static int __init reboot_init(void) | ||
324 | { | ||
325 | /* Only do the DMI check if reboot_type hasn't been overridden | ||
326 | * on the command line | ||
327 | */ | ||
328 | if (reboot_default) { | ||
329 | dmi_check_system(reboot_dmi_table); | ||
330 | } | ||
331 | return 0; | ||
332 | } | ||
333 | core_initcall(reboot_init); | ||
334 | |||
335 | extern const unsigned char machine_real_restart_asm[]; | ||
336 | extern const u64 machine_real_restart_gdt[3]; | ||
337 | |||
338 | void machine_real_restart(unsigned int type) | ||
339 | { | ||
340 | void *restart_va; | ||
341 | unsigned long restart_pa; | ||
342 | void (*restart_lowmem)(unsigned int); | ||
343 | u64 *lowmem_gdt; | ||
344 | |||
345 | local_irq_disable(); | ||
346 | |||
347 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | ||
348 | routine will recognize as telling it to do a proper reboot. (Well | ||
349 | that's what this book in front of me says -- it may only apply to | ||
350 | the Phoenix BIOS though, it's not clear). At the same time, | ||
351 | disable NMIs by setting the top bit in the CMOS address register, | ||
352 | as we're about to do peculiar things to the CPU. I'm not sure if | ||
353 | `outb_p' is needed instead of just `outb'. Use it to be on the | ||
354 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | ||
355 | */ | ||
356 | spin_lock(&rtc_lock); | ||
357 | CMOS_WRITE(0x00, 0x8f); | ||
358 | spin_unlock(&rtc_lock); | ||
359 | |||
360 | /* | ||
361 | * Switch back to the initial page table. | ||
362 | */ | ||
363 | load_cr3(initial_page_table); | ||
364 | |||
365 | /* Write 0x1234 to absolute memory location 0x472. The BIOS reads | ||
366 | this on booting to tell it to "Bypass memory test (also warm | ||
367 | boot)". This seems like a fairly standard thing that gets set by | ||
368 | REBOOT.COM programs, and the previous reset routine did this | ||
369 | too. */ | ||
370 | *((unsigned short *)0x472) = reboot_mode; | ||
371 | |||
372 | /* Patch the GDT in the low memory trampoline */ | ||
373 | lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); | ||
374 | |||
375 | restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); | ||
376 | restart_pa = virt_to_phys(restart_va); | ||
377 | restart_lowmem = (void (*)(unsigned int))restart_pa; | ||
378 | |||
379 | /* GDT[0]: GDT self-pointer */ | ||
380 | lowmem_gdt[0] = | ||
381 | (u64)(sizeof(machine_real_restart_gdt) - 1) + | ||
382 | ((u64)virt_to_phys(lowmem_gdt) << 16); | ||
383 | /* GDT[1]: 64K real mode code segment */ | ||
384 | lowmem_gdt[1] = | ||
385 | GDT_ENTRY(0x009b, restart_pa, 0xffff); | ||
386 | |||
387 | /* Jump to the identity-mapped low memory code */ | ||
388 | restart_lowmem(type); | ||
389 | } | ||
390 | #ifdef CONFIG_APM_MODULE | ||
391 | EXPORT_SYMBOL(machine_real_restart); | ||
392 | #endif | ||
393 | |||
394 | #endif /* CONFIG_X86_32 */ | ||
395 | |||
396 | /* | ||
397 | * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot | ||
398 | */ | ||
399 | static int __init set_pci_reboot(const struct dmi_system_id *d) | ||
400 | { | ||
401 | if (reboot_type != BOOT_CF9) { | ||
402 | reboot_type = BOOT_CF9; | ||
403 | printk(KERN_INFO "%s series board detected. " | ||
404 | "Selecting PCI-method for reboots.\n", d->ident); | ||
405 | } | ||
406 | return 0; | ||
407 | } | ||
408 | |||
409 | static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | ||
410 | { /* Handle problems with rebooting on Apple MacBook5 */ | 409 | { /* Handle problems with rebooting on Apple MacBook5 */ |
411 | .callback = set_pci_reboot, | 410 | .callback = set_pci_reboot, |
412 | .ident = "Apple MacBook5", | 411 | .ident = "Apple MacBook5", |
@@ -474,17 +473,17 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
474 | { } | 473 | { } |
475 | }; | 474 | }; |
476 | 475 | ||
477 | static int __init pci_reboot_init(void) | 476 | static int __init reboot_init(void) |
478 | { | 477 | { |
479 | /* Only do the DMI check if reboot_type hasn't been overridden | 478 | /* |
479 | * Only do the DMI check if reboot_type hasn't been overridden | ||
480 | * on the command line | 480 | * on the command line |
481 | */ | 481 | */ |
482 | if (reboot_default) { | 482 | if (reboot_default) |
483 | dmi_check_system(pci_reboot_dmi_table); | 483 | dmi_check_system(reboot_dmi_table); |
484 | } | ||
485 | return 0; | 484 | return 0; |
486 | } | 485 | } |
487 | core_initcall(pci_reboot_init); | 486 | core_initcall(reboot_init); |
488 | 487 | ||
489 | static inline void kb_wait(void) | 488 | static inline void kb_wait(void) |
490 | { | 489 | { |
@@ -502,14 +501,14 @@ static void vmxoff_nmi(int cpu, struct pt_regs *regs) | |||
502 | cpu_emergency_vmxoff(); | 501 | cpu_emergency_vmxoff(); |
503 | } | 502 | } |
504 | 503 | ||
505 | /* Use NMIs as IPIs to tell all CPUs to disable virtualization | 504 | /* Use NMIs as IPIs to tell all CPUs to disable virtualization */ |
506 | */ | ||
507 | static void emergency_vmx_disable_all(void) | 505 | static void emergency_vmx_disable_all(void) |
508 | { | 506 | { |
509 | /* Just make sure we won't change CPUs while doing this */ | 507 | /* Just make sure we won't change CPUs while doing this */ |
510 | local_irq_disable(); | 508 | local_irq_disable(); |
511 | 509 | ||
512 | /* We need to disable VMX on all CPUs before rebooting, otherwise | 510 | /* |
511 | * We need to disable VMX on all CPUs before rebooting, otherwise | ||
513 | * we risk hanging up the machine, because the CPU ignore INIT | 512 | * we risk hanging up the machine, because the CPU ignore INIT |
514 | * signals when VMX is enabled. | 513 | * signals when VMX is enabled. |
515 | * | 514 | * |
@@ -528,8 +527,7 @@ static void emergency_vmx_disable_all(void) | |||
528 | * is still enabling VMX. | 527 | * is still enabling VMX. |
529 | */ | 528 | */ |
530 | if (cpu_has_vmx() && cpu_vmx_enabled()) { | 529 | if (cpu_has_vmx() && cpu_vmx_enabled()) { |
531 | /* Disable VMX on this CPU. | 530 | /* Disable VMX on this CPU. */ |
532 | */ | ||
533 | cpu_vmxoff(); | 531 | cpu_vmxoff(); |
534 | 532 | ||
535 | /* Halt and disable VMX on the other CPUs */ | 533 | /* Halt and disable VMX on the other CPUs */ |
@@ -574,12 +572,12 @@ static void native_machine_emergency_restart(void) | |||
574 | /* Could also try the reset bit in the Hammer NB */ | 572 | /* Could also try the reset bit in the Hammer NB */ |
575 | switch (reboot_type) { | 573 | switch (reboot_type) { |
576 | case BOOT_KBD: | 574 | case BOOT_KBD: |
577 | mach_reboot_fixups(); /* for board specific fixups */ | 575 | mach_reboot_fixups(); /* For board specific fixups */ |
578 | 576 | ||
579 | for (i = 0; i < 10; i++) { | 577 | for (i = 0; i < 10; i++) { |
580 | kb_wait(); | 578 | kb_wait(); |
581 | udelay(50); | 579 | udelay(50); |
582 | outb(0xfe, 0x64); /* pulse reset low */ | 580 | outb(0xfe, 0x64); /* Pulse reset low */ |
583 | udelay(50); | 581 | udelay(50); |
584 | } | 582 | } |
585 | if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { | 583 | if (attempt == 0 && orig_reboot_type == BOOT_ACPI) { |
@@ -621,7 +619,7 @@ static void native_machine_emergency_restart(void) | |||
621 | 619 | ||
622 | case BOOT_CF9: | 620 | case BOOT_CF9: |
623 | port_cf9_safe = true; | 621 | port_cf9_safe = true; |
624 | /* fall through */ | 622 | /* Fall through */ |
625 | 623 | ||
626 | case BOOT_CF9_COND: | 624 | case BOOT_CF9_COND: |
627 | if (port_cf9_safe) { | 625 | if (port_cf9_safe) { |
@@ -659,7 +657,8 @@ void native_machine_shutdown(void) | |||
659 | /* Make certain I only run on the appropriate processor */ | 657 | /* Make certain I only run on the appropriate processor */ |
660 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); | 658 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); |
661 | 659 | ||
662 | /* O.K Now that I'm on the appropriate processor, | 660 | /* |
661 | * O.K Now that I'm on the appropriate processor, | ||
663 | * stop all of the others. | 662 | * stop all of the others. |
664 | */ | 663 | */ |
665 | stop_other_cpus(); | 664 | stop_other_cpus(); |
@@ -697,12 +696,11 @@ static void native_machine_restart(char *__unused) | |||
697 | 696 | ||
698 | static void native_machine_halt(void) | 697 | static void native_machine_halt(void) |
699 | { | 698 | { |
700 | /* stop other cpus and apics */ | 699 | /* Stop other cpus and apics */ |
701 | machine_shutdown(); | 700 | machine_shutdown(); |
702 | 701 | ||
703 | tboot_shutdown(TB_SHUTDOWN_HALT); | 702 | tboot_shutdown(TB_SHUTDOWN_HALT); |
704 | 703 | ||
705 | /* stop this cpu */ | ||
706 | stop_this_cpu(NULL); | 704 | stop_this_cpu(NULL); |
707 | } | 705 | } |
708 | 706 | ||
@@ -713,7 +711,7 @@ static void native_machine_power_off(void) | |||
713 | machine_shutdown(); | 711 | machine_shutdown(); |
714 | pm_power_off(); | 712 | pm_power_off(); |
715 | } | 713 | } |
716 | /* a fallback in case there is no PM info available */ | 714 | /* A fallback in case there is no PM info available */ |
717 | tboot_shutdown(TB_SHUTDOWN_HALT); | 715 | tboot_shutdown(TB_SHUTDOWN_HALT); |
718 | } | 716 | } |
719 | 717 | ||
@@ -775,7 +773,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) | |||
775 | 773 | ||
776 | cpu = raw_smp_processor_id(); | 774 | cpu = raw_smp_processor_id(); |
777 | 775 | ||
778 | /* Don't do anything if this handler is invoked on crashing cpu. | 776 | /* |
777 | * Don't do anything if this handler is invoked on crashing cpu. | ||
779 | * Otherwise, system will completely hang. Crashing cpu can get | 778 | * Otherwise, system will completely hang. Crashing cpu can get |
780 | * an NMI if system was initially booted with nmi_watchdog parameter. | 779 | * an NMI if system was initially booted with nmi_watchdog parameter. |
781 | */ | 780 | */ |
@@ -799,7 +798,8 @@ static void smp_send_nmi_allbutself(void) | |||
799 | apic->send_IPI_allbutself(NMI_VECTOR); | 798 | apic->send_IPI_allbutself(NMI_VECTOR); |
800 | } | 799 | } |
801 | 800 | ||
802 | /* Halt all other CPUs, calling the specified function on each of them | 801 | /* |
802 | * Halt all other CPUs, calling the specified function on each of them | ||
803 | * | 803 | * |
804 | * This function can be used to halt all other CPUs on crash | 804 | * This function can be used to halt all other CPUs on crash |
805 | * or emergency reboot time. The function passed as parameter | 805 | * or emergency reboot time. The function passed as parameter |
@@ -810,7 +810,7 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |||
810 | unsigned long msecs; | 810 | unsigned long msecs; |
811 | local_irq_disable(); | 811 | local_irq_disable(); |
812 | 812 | ||
813 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | 813 | /* Make a note of crashing cpu. Will be used in NMI callback. */ |
814 | crashing_cpu = safe_smp_processor_id(); | 814 | crashing_cpu = safe_smp_processor_id(); |
815 | 815 | ||
816 | shootdown_callback = callback; | 816 | shootdown_callback = callback; |
@@ -819,8 +819,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |||
819 | /* Would it be better to replace the trap vector here? */ | 819 | /* Would it be better to replace the trap vector here? */ |
820 | if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, | 820 | if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, |
821 | NMI_FLAG_FIRST, "crash")) | 821 | NMI_FLAG_FIRST, "crash")) |
822 | return; /* return what? */ | 822 | return; /* Return what? */ |
823 | /* Ensure the new callback function is set before sending | 823 | /* |
824 | * Ensure the new callback function is set before sending | ||
824 | * out the NMI | 825 | * out the NMI |
825 | */ | 826 | */ |
826 | wmb(); | 827 | wmb(); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1a290156205..366c688d619 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/memblock.h> | 34 | #include <linux/memblock.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/console.h> | 36 | #include <linux/console.h> |
37 | #include <linux/mca.h> | ||
38 | #include <linux/root_dev.h> | 37 | #include <linux/root_dev.h> |
39 | #include <linux/highmem.h> | 38 | #include <linux/highmem.h> |
40 | #include <linux/module.h> | 39 | #include <linux/module.h> |
@@ -179,12 +178,6 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | |||
179 | /* common cpu data for all cpus */ | 178 | /* common cpu data for all cpus */ |
180 | struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | 179 | struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; |
181 | EXPORT_SYMBOL(boot_cpu_data); | 180 | EXPORT_SYMBOL(boot_cpu_data); |
182 | static void set_mca_bus(int x) | ||
183 | { | ||
184 | #ifdef CONFIG_MCA | ||
185 | MCA_bus = x; | ||
186 | #endif | ||
187 | } | ||
188 | 181 | ||
189 | unsigned int def_to_bigsmp; | 182 | unsigned int def_to_bigsmp; |
190 | 183 | ||
@@ -393,10 +386,9 @@ static void __init reserve_initrd(void) | |||
393 | initrd_start = 0; | 386 | initrd_start = 0; |
394 | 387 | ||
395 | if (ramdisk_size >= (end_of_lowmem>>1)) { | 388 | if (ramdisk_size >= (end_of_lowmem>>1)) { |
396 | memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); | 389 | panic("initrd too large to handle, " |
397 | printk(KERN_ERR "initrd too large to handle, " | 390 | "disabling initrd (%lld needed, %lld available)\n", |
398 | "disabling initrd\n"); | 391 | ramdisk_size, end_of_lowmem>>1); |
399 | return; | ||
400 | } | 392 | } |
401 | 393 | ||
402 | printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, | 394 | printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image, |
@@ -717,7 +709,6 @@ void __init setup_arch(char **cmdline_p) | |||
717 | apm_info.bios = boot_params.apm_bios_info; | 709 | apm_info.bios = boot_params.apm_bios_info; |
718 | ist_info = boot_params.ist_info; | 710 | ist_info = boot_params.ist_info; |
719 | if (boot_params.sys_desc_table.length != 0) { | 711 | if (boot_params.sys_desc_table.length != 0) { |
720 | set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); | ||
721 | machine_id = boot_params.sys_desc_table.table[0]; | 712 | machine_id = boot_params.sys_desc_table.table[0]; |
722 | machine_submodel_id = boot_params.sys_desc_table.table[1]; | 713 | machine_submodel_id = boot_params.sys_desc_table.table[1]; |
723 | BIOS_revision = boot_params.sys_desc_table.table[2]; | 714 | BIOS_revision = boot_params.sys_desc_table.table[2]; |
@@ -1012,7 +1003,8 @@ void __init setup_arch(char **cmdline_p) | |||
1012 | init_cpu_to_node(); | 1003 | init_cpu_to_node(); |
1013 | 1004 | ||
1014 | init_apic_mappings(); | 1005 | init_apic_mappings(); |
1015 | ioapic_and_gsi_init(); | 1006 | if (x86_io_apic_ops.init) |
1007 | x86_io_apic_ops.init(); | ||
1016 | 1008 | ||
1017 | kvm_guest_init(); | 1009 | kvm_guest_init(); |
1018 | 1010 | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 66c74f481ca..48d2b7ded42 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -109,6 +109,9 @@ | |||
109 | * about nothing of note with C stepping upwards. | 109 | * about nothing of note with C stepping upwards. |
110 | */ | 110 | */ |
111 | 111 | ||
112 | static atomic_t stopping_cpu = ATOMIC_INIT(-1); | ||
113 | static bool smp_no_nmi_ipi = false; | ||
114 | |||
112 | /* | 115 | /* |
113 | * this function sends a 'reschedule' IPI to another CPU. | 116 | * this function sends a 'reschedule' IPI to another CPU. |
114 | * it goes straight through and wastes no time serializing | 117 | * it goes straight through and wastes no time serializing |
@@ -149,8 +152,6 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
149 | free_cpumask_var(allbutself); | 152 | free_cpumask_var(allbutself); |
150 | } | 153 | } |
151 | 154 | ||
152 | static atomic_t stopping_cpu = ATOMIC_INIT(-1); | ||
153 | |||
154 | static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) | 155 | static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) |
155 | { | 156 | { |
156 | /* We are registered on stopping cpu too, avoid spurious NMI */ | 157 | /* We are registered on stopping cpu too, avoid spurious NMI */ |
@@ -162,7 +163,19 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) | |||
162 | return NMI_HANDLED; | 163 | return NMI_HANDLED; |
163 | } | 164 | } |
164 | 165 | ||
165 | static void native_nmi_stop_other_cpus(int wait) | 166 | /* |
167 | * this function calls the 'stop' function on all other CPUs in the system. | ||
168 | */ | ||
169 | |||
170 | asmlinkage void smp_reboot_interrupt(void) | ||
171 | { | ||
172 | ack_APIC_irq(); | ||
173 | irq_enter(); | ||
174 | stop_this_cpu(NULL); | ||
175 | irq_exit(); | ||
176 | } | ||
177 | |||
178 | static void native_stop_other_cpus(int wait) | ||
166 | { | 179 | { |
167 | unsigned long flags; | 180 | unsigned long flags; |
168 | unsigned long timeout; | 181 | unsigned long timeout; |
@@ -174,20 +187,25 @@ static void native_nmi_stop_other_cpus(int wait) | |||
174 | * Use an own vector here because smp_call_function | 187 | * Use an own vector here because smp_call_function |
175 | * does lots of things not suitable in a panic situation. | 188 | * does lots of things not suitable in a panic situation. |
176 | */ | 189 | */ |
190 | |||
191 | /* | ||
192 | * We start by using the REBOOT_VECTOR irq. | ||
193 | * The irq is treated as a sync point to allow critical | ||
194 | * regions of code on other cpus to release their spin locks | ||
195 | * and re-enable irqs. Jumping straight to an NMI might | ||
196 | * accidentally cause deadlocks with further shutdown/panic | ||
197 | * code. By syncing, we give the cpus up to one second to | ||
198 | * finish their work before we force them off with the NMI. | ||
199 | */ | ||
177 | if (num_online_cpus() > 1) { | 200 | if (num_online_cpus() > 1) { |
178 | /* did someone beat us here? */ | 201 | /* did someone beat us here? */ |
179 | if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) | 202 | if (atomic_cmpxchg(&stopping_cpu, -1, safe_smp_processor_id()) != -1) |
180 | return; | 203 | return; |
181 | 204 | ||
182 | if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, | 205 | /* sync above data before sending IRQ */ |
183 | NMI_FLAG_FIRST, "smp_stop")) | ||
184 | /* Note: we ignore failures here */ | ||
185 | return; | ||
186 | |||
187 | /* sync above data before sending NMI */ | ||
188 | wmb(); | 206 | wmb(); |
189 | 207 | ||
190 | apic->send_IPI_allbutself(NMI_VECTOR); | 208 | apic->send_IPI_allbutself(REBOOT_VECTOR); |
191 | 209 | ||
192 | /* | 210 | /* |
193 | * Don't wait longer than a second if the caller | 211 | * Don't wait longer than a second if the caller |
@@ -197,63 +215,37 @@ static void native_nmi_stop_other_cpus(int wait) | |||
197 | while (num_online_cpus() > 1 && (wait || timeout--)) | 215 | while (num_online_cpus() > 1 && (wait || timeout--)) |
198 | udelay(1); | 216 | udelay(1); |
199 | } | 217 | } |
218 | |||
219 | /* if the REBOOT_VECTOR didn't work, try with the NMI */ | ||
220 | if ((num_online_cpus() > 1) && (!smp_no_nmi_ipi)) { | ||
221 | if (register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, | ||
222 | NMI_FLAG_FIRST, "smp_stop")) | ||
223 | /* Note: we ignore failures here */ | ||
224 | /* Hope the REBOOT_IRQ is good enough */ | ||
225 | goto finish; | ||
200 | 226 | ||
201 | local_irq_save(flags); | 227 | /* sync above data before sending IRQ */ |
202 | disable_local_APIC(); | 228 | wmb(); |
203 | local_irq_restore(flags); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * this function calls the 'stop' function on all other CPUs in the system. | ||
208 | */ | ||
209 | |||
210 | asmlinkage void smp_reboot_interrupt(void) | ||
211 | { | ||
212 | ack_APIC_irq(); | ||
213 | irq_enter(); | ||
214 | stop_this_cpu(NULL); | ||
215 | irq_exit(); | ||
216 | } | ||
217 | |||
218 | static void native_irq_stop_other_cpus(int wait) | ||
219 | { | ||
220 | unsigned long flags; | ||
221 | unsigned long timeout; | ||
222 | 229 | ||
223 | if (reboot_force) | 230 | pr_emerg("Shutting down cpus with NMI\n"); |
224 | return; | ||
225 | 231 | ||
226 | /* | 232 | apic->send_IPI_allbutself(NMI_VECTOR); |
227 | * Use an own vector here because smp_call_function | ||
228 | * does lots of things not suitable in a panic situation. | ||
229 | * On most systems we could also use an NMI here, | ||
230 | * but there are a few systems around where NMI | ||
231 | * is problematic so stay with an non NMI for now | ||
232 | * (this implies we cannot stop CPUs spinning with irq off | ||
233 | * currently) | ||
234 | */ | ||
235 | if (num_online_cpus() > 1) { | ||
236 | apic->send_IPI_allbutself(REBOOT_VECTOR); | ||
237 | 233 | ||
238 | /* | 234 | /* |
239 | * Don't wait longer than a second if the caller | 235 | * Don't wait longer than a 10 ms if the caller |
240 | * didn't ask us to wait. | 236 | * didn't ask us to wait. |
241 | */ | 237 | */ |
242 | timeout = USEC_PER_SEC; | 238 | timeout = USEC_PER_MSEC * 10; |
243 | while (num_online_cpus() > 1 && (wait || timeout--)) | 239 | while (num_online_cpus() > 1 && (wait || timeout--)) |
244 | udelay(1); | 240 | udelay(1); |
245 | } | 241 | } |
246 | 242 | ||
243 | finish: | ||
247 | local_irq_save(flags); | 244 | local_irq_save(flags); |
248 | disable_local_APIC(); | 245 | disable_local_APIC(); |
249 | local_irq_restore(flags); | 246 | local_irq_restore(flags); |
250 | } | 247 | } |
251 | 248 | ||
252 | static void native_smp_disable_nmi_ipi(void) | ||
253 | { | ||
254 | smp_ops.stop_other_cpus = native_irq_stop_other_cpus; | ||
255 | } | ||
256 | |||
257 | /* | 249 | /* |
258 | * Reschedule call back. | 250 | * Reschedule call back. |
259 | */ | 251 | */ |
@@ -287,8 +279,8 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
287 | 279 | ||
288 | static int __init nonmi_ipi_setup(char *str) | 280 | static int __init nonmi_ipi_setup(char *str) |
289 | { | 281 | { |
290 | native_smp_disable_nmi_ipi(); | 282 | smp_no_nmi_ipi = true; |
291 | return 1; | 283 | return 1; |
292 | } | 284 | } |
293 | 285 | ||
294 | __setup("nonmi_ipi", nonmi_ipi_setup); | 286 | __setup("nonmi_ipi", nonmi_ipi_setup); |
@@ -298,7 +290,7 @@ struct smp_ops smp_ops = { | |||
298 | .smp_prepare_cpus = native_smp_prepare_cpus, | 290 | .smp_prepare_cpus = native_smp_prepare_cpus, |
299 | .smp_cpus_done = native_smp_cpus_done, | 291 | .smp_cpus_done = native_smp_cpus_done, |
300 | 292 | ||
301 | .stop_other_cpus = native_nmi_stop_other_cpus, | 293 | .stop_other_cpus = native_stop_other_cpus, |
302 | .smp_send_reschedule = native_smp_send_reschedule, | 294 | .smp_send_reschedule = native_smp_send_reschedule, |
303 | 295 | ||
304 | .cpu_up = native_cpu_up, | 296 | .cpu_up = native_cpu_up, |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3acaf51dfdd..433529e29be 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -299,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id) | |||
299 | identify_secondary_cpu(c); | 299 | identify_secondary_cpu(c); |
300 | } | 300 | } |
301 | 301 | ||
302 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | 302 | static bool __cpuinit |
303 | topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) | ||
303 | { | 304 | { |
304 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | 305 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; |
305 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | 306 | |
306 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | 307 | return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2), |
307 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | 308 | "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " |
308 | cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); | 309 | "[node: %d != %d]. Ignoring dependency.\n", |
309 | cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); | 310 | cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); |
310 | } | 311 | } |
311 | 312 | ||
313 | #define link_mask(_m, c1, c2) \ | ||
314 | do { \ | ||
315 | cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \ | ||
316 | cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \ | ||
317 | } while (0) | ||
318 | |||
319 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
320 | { | ||
321 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | ||
322 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | ||
323 | |||
324 | if (c->phys_proc_id == o->phys_proc_id && | ||
325 | per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && | ||
326 | c->compute_unit_id == o->compute_unit_id) | ||
327 | return topology_sane(c, o, "smt"); | ||
328 | |||
329 | } else if (c->phys_proc_id == o->phys_proc_id && | ||
330 | c->cpu_core_id == o->cpu_core_id) { | ||
331 | return topology_sane(c, o, "smt"); | ||
332 | } | ||
333 | |||
334 | return false; | ||
335 | } | ||
336 | |||
337 | static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
338 | { | ||
339 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | ||
340 | |||
341 | if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID && | ||
342 | per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) | ||
343 | return topology_sane(c, o, "llc"); | ||
344 | |||
345 | return false; | ||
346 | } | ||
347 | |||
348 | static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
349 | { | ||
350 | if (c->phys_proc_id == o->phys_proc_id) | ||
351 | return topology_sane(c, o, "mc"); | ||
352 | |||
353 | return false; | ||
354 | } | ||
312 | 355 | ||
313 | void __cpuinit set_cpu_sibling_map(int cpu) | 356 | void __cpuinit set_cpu_sibling_map(int cpu) |
314 | { | 357 | { |
315 | int i; | 358 | bool has_mc = boot_cpu_data.x86_max_cores > 1; |
359 | bool has_smt = smp_num_siblings > 1; | ||
316 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 360 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
361 | struct cpuinfo_x86 *o; | ||
362 | int i; | ||
317 | 363 | ||
318 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); | 364 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); |
319 | 365 | ||
320 | if (smp_num_siblings > 1) { | 366 | if (!has_smt && !has_mc) { |
321 | for_each_cpu(i, cpu_sibling_setup_mask) { | ||
322 | struct cpuinfo_x86 *o = &cpu_data(i); | ||
323 | |||
324 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | ||
325 | if (c->phys_proc_id == o->phys_proc_id && | ||
326 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) && | ||
327 | c->compute_unit_id == o->compute_unit_id) | ||
328 | link_thread_siblings(cpu, i); | ||
329 | } else if (c->phys_proc_id == o->phys_proc_id && | ||
330 | c->cpu_core_id == o->cpu_core_id) { | ||
331 | link_thread_siblings(cpu, i); | ||
332 | } | ||
333 | } | ||
334 | } else { | ||
335 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 367 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
336 | } | 368 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); |
337 | 369 | cpumask_set_cpu(cpu, cpu_core_mask(cpu)); | |
338 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); | ||
339 | |||
340 | if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { | ||
341 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); | ||
342 | c->booted_cores = 1; | 370 | c->booted_cores = 1; |
343 | return; | 371 | return; |
344 | } | 372 | } |
345 | 373 | ||
346 | for_each_cpu(i, cpu_sibling_setup_mask) { | 374 | for_each_cpu(i, cpu_sibling_setup_mask) { |
347 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 375 | o = &cpu_data(i); |
348 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 376 | |
349 | cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); | 377 | if ((i == cpu) || (has_smt && match_smt(c, o))) |
350 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); | 378 | link_mask(sibling, cpu, i); |
351 | } | 379 | |
352 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { | 380 | if ((i == cpu) || (has_mc && match_llc(c, o))) |
353 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 381 | link_mask(llc_shared, cpu, i); |
354 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 382 | |
383 | if ((i == cpu) || (has_mc && match_mc(c, o))) { | ||
384 | link_mask(core, cpu, i); | ||
385 | |||
355 | /* | 386 | /* |
356 | * Does this new cpu bringup a new core? | 387 | * Does this new cpu bringup a new core? |
357 | */ | 388 | */ |
@@ -382,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
382 | * For perf, we return last level cache shared map. | 413 | * For perf, we return last level cache shared map. |
383 | * And for power savings, we return cpu_core_map | 414 | * And for power savings, we return cpu_core_map |
384 | */ | 415 | */ |
385 | if ((sched_mc_power_savings || sched_smt_power_savings) && | 416 | if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) |
386 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
387 | return cpu_core_mask(cpu); | 417 | return cpu_core_mask(cpu); |
388 | else | 418 | else |
389 | return cpu_llc_shared_mask(cpu); | 419 | return cpu_llc_shared_mask(cpu); |
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index c29e235792a..b79133abda4 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <asm/cacheflush.h> | 13 | #include <asm/cacheflush.h> |
14 | #include <asm/sections.h> | 14 | #include <asm/sections.h> |
15 | #include <asm/asm.h> | ||
15 | 16 | ||
16 | int rodata_test(void) | 17 | int rodata_test(void) |
17 | { | 18 | { |
@@ -42,14 +43,7 @@ int rodata_test(void) | |||
42 | ".section .fixup,\"ax\"\n" | 43 | ".section .fixup,\"ax\"\n" |
43 | "2: jmp 1b\n" | 44 | "2: jmp 1b\n" |
44 | ".previous\n" | 45 | ".previous\n" |
45 | ".section __ex_table,\"a\"\n" | 46 | _ASM_EXTABLE(0b,2b) |
46 | " .align 16\n" | ||
47 | #ifdef CONFIG_X86_32 | ||
48 | " .long 0b,2b\n" | ||
49 | #else | ||
50 | " .quad 0b,2b\n" | ||
51 | #endif | ||
52 | ".previous" | ||
53 | : [rslt] "=r" (result) | 47 | : [rslt] "=r" (result) |
54 | : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) | 48 | : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) |
55 | ); | 49 | ); |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index c6eba2b4267..24d3c91e981 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/i8253.h> | 14 | #include <linux/i8253.h> |
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/export.h> | 16 | #include <linux/export.h> |
17 | #include <linux/mca.h> | ||
18 | 17 | ||
19 | #include <asm/vsyscall.h> | 18 | #include <asm/vsyscall.h> |
20 | #include <asm/x86_init.h> | 19 | #include <asm/x86_init.h> |
@@ -58,11 +57,6 @@ EXPORT_SYMBOL(profile_pc); | |||
58 | static irqreturn_t timer_interrupt(int irq, void *dev_id) | 57 | static irqreturn_t timer_interrupt(int irq, void *dev_id) |
59 | { | 58 | { |
60 | global_clock_event->event_handler(global_clock_event); | 59 | global_clock_event->event_handler(global_clock_event); |
61 | |||
62 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | ||
63 | if (MCA_bus) | ||
64 | outb_p(inb_p(0x61)| 0x80, 0x61); | ||
65 | |||
66 | return IRQ_HANDLED; | 60 | return IRQ_HANDLED; |
67 | } | 61 | } |
68 | 62 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff9281f1602..ff08457a025 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -37,10 +37,6 @@ | |||
37 | #include <linux/eisa.h> | 37 | #include <linux/eisa.h> |
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | #ifdef CONFIG_MCA | ||
41 | #include <linux/mca.h> | ||
42 | #endif | ||
43 | |||
44 | #if defined(CONFIG_EDAC) | 40 | #if defined(CONFIG_EDAC) |
45 | #include <linux/edac.h> | 41 | #include <linux/edac.h> |
46 | #endif | 42 | #endif |
@@ -50,6 +46,7 @@ | |||
50 | #include <asm/processor.h> | 46 | #include <asm/processor.h> |
51 | #include <asm/debugreg.h> | 47 | #include <asm/debugreg.h> |
52 | #include <linux/atomic.h> | 48 | #include <linux/atomic.h> |
49 | #include <asm/ftrace.h> | ||
53 | #include <asm/traps.h> | 50 | #include <asm/traps.h> |
54 | #include <asm/desc.h> | 51 | #include <asm/desc.h> |
55 | #include <asm/i387.h> | 52 | #include <asm/i387.h> |
@@ -303,8 +300,13 @@ gp_in_kernel: | |||
303 | } | 300 | } |
304 | 301 | ||
305 | /* May run on IST stack. */ | 302 | /* May run on IST stack. */ |
306 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
307 | { | 304 | { |
305 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
306 | /* ftrace must be first, everything else may cause a recursive crash */ | ||
307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | ||
308 | return; | ||
309 | #endif | ||
308 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
309 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, | 311 | if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, |
310 | SIGTRAP) == NOTIFY_STOP) | 312 | SIGTRAP) == NOTIFY_STOP) |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index a1d804bcd48..8eeb55a551b 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/pci_ids.h> | 16 | #include <linux/pci_ids.h> |
17 | #include <linux/pci_regs.h> | 17 | #include <linux/pci_regs.h> |
18 | #include <linux/smp.h> | ||
18 | 19 | ||
19 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
20 | #include <asm/pci-direct.h> | 21 | #include <asm/pci-direct.h> |
@@ -22,6 +23,8 @@ | |||
22 | #include <asm/paravirt.h> | 23 | #include <asm/paravirt.h> |
23 | #include <asm/setup.h> | 24 | #include <asm/setup.h> |
24 | 25 | ||
26 | #define TOPOLOGY_REGISTER_OFFSET 0x10 | ||
27 | |||
25 | #if defined CONFIG_PCI && defined CONFIG_PARAVIRT | 28 | #if defined CONFIG_PCI && defined CONFIG_PARAVIRT |
26 | /* | 29 | /* |
27 | * Interrupt control on vSMPowered systems: | 30 | * Interrupt control on vSMPowered systems: |
@@ -149,12 +152,49 @@ int is_vsmp_box(void) | |||
149 | return 0; | 152 | return 0; |
150 | } | 153 | } |
151 | #endif | 154 | #endif |
155 | |||
156 | static void __init vsmp_cap_cpus(void) | ||
157 | { | ||
158 | #if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) | ||
159 | void __iomem *address; | ||
160 | unsigned int cfg, topology, node_shift, maxcpus; | ||
161 | |||
162 | /* | ||
163 | * CONFIG_X86_VSMP is not configured, so limit the number CPUs to the | ||
164 | * ones present in the first board, unless explicitly overridden by | ||
165 | * setup_max_cpus | ||
166 | */ | ||
167 | if (setup_max_cpus != NR_CPUS) | ||
168 | return; | ||
169 | |||
170 | /* Read the vSMP Foundation topology register */ | ||
171 | cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); | ||
172 | address = early_ioremap(cfg + TOPOLOGY_REGISTER_OFFSET, 4); | ||
173 | if (WARN_ON(!address)) | ||
174 | return; | ||
175 | |||
176 | topology = readl(address); | ||
177 | node_shift = (topology >> 16) & 0x7; | ||
178 | if (!node_shift) | ||
179 | /* The value 0 should be decoded as 8 */ | ||
180 | node_shift = 8; | ||
181 | maxcpus = (topology & ((1 << node_shift) - 1)) + 1; | ||
182 | |||
183 | pr_info("vSMP CTL: Capping CPUs to %d (CONFIG_X86_VSMP is unset)\n", | ||
184 | maxcpus); | ||
185 | setup_max_cpus = maxcpus; | ||
186 | early_iounmap(address, 4); | ||
187 | #endif | ||
188 | } | ||
189 | |||
152 | void __init vsmp_init(void) | 190 | void __init vsmp_init(void) |
153 | { | 191 | { |
154 | detect_vsmp_box(); | 192 | detect_vsmp_box(); |
155 | if (!is_vsmp_box()) | 193 | if (!is_vsmp_box()) |
156 | return; | 194 | return; |
157 | 195 | ||
196 | vsmp_cap_cpus(); | ||
197 | |||
158 | set_vsmp_pv_ops(); | 198 | set_vsmp_pv_ops(); |
159 | return; | 199 | return; |
160 | } | 200 | } |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9cf71d0b2d3..35c5e543f55 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
19 | #include <asm/time.h> | 19 | #include <asm/time.h> |
20 | #include <asm/irq.h> | 20 | #include <asm/irq.h> |
21 | #include <asm/io_apic.h> | ||
21 | #include <asm/pat.h> | 22 | #include <asm/pat.h> |
22 | #include <asm/tsc.h> | 23 | #include <asm/tsc.h> |
23 | #include <asm/iommu.h> | 24 | #include <asm/iommu.h> |
@@ -119,3 +120,10 @@ struct x86_msi_ops x86_msi = { | |||
119 | .teardown_msi_irqs = default_teardown_msi_irqs, | 120 | .teardown_msi_irqs = default_teardown_msi_irqs, |
120 | .restore_msi_irqs = default_restore_msi_irqs, | 121 | .restore_msi_irqs = default_restore_msi_irqs, |
121 | }; | 122 | }; |
123 | |||
124 | struct x86_io_apic_ops x86_io_apic_ops = { | ||
125 | .init = native_io_apic_init_mappings, | ||
126 | .read = native_io_apic_read, | ||
127 | .write = native_io_apic_write, | ||
128 | .modify = native_io_apic_modify, | ||
129 | }; | ||
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index e62728e30b0..bd18149b2b0 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -48,8 +48,6 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
48 | if (!fx) | 48 | if (!fx) |
49 | return; | 49 | return; |
50 | 50 | ||
51 | BUG_ON(__thread_has_fpu(tsk)); | ||
52 | |||
53 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | 51 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; |
54 | 52 | ||
55 | /* | 53 | /* |