diff options
Diffstat (limited to 'arch/x86/kernel')
77 files changed, 2522 insertions, 1496 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 04105574c8e9..8baca3c4871c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -17,22 +17,9 @@ CFLAGS_REMOVE_ftrace.o = -pg | |||
| 17 | CFLAGS_REMOVE_early_printk.o = -pg | 17 | CFLAGS_REMOVE_early_printk.o = -pg |
| 18 | endif | 18 | endif |
| 19 | 19 | ||
| 20 | # | ||
| 21 | # vsyscalls (which work on the user stack) should have | ||
| 22 | # no stack-protector checks: | ||
| 23 | # | ||
| 24 | nostackp := $(call cc-option, -fno-stack-protector) | ||
| 25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | ||
| 26 | CFLAGS_hpet.o := $(nostackp) | ||
| 27 | CFLAGS_paravirt.o := $(nostackp) | ||
| 28 | GCOV_PROFILE_vsyscall_64.o := n | ||
| 29 | GCOV_PROFILE_hpet.o := n | ||
| 30 | GCOV_PROFILE_tsc.o := n | ||
| 31 | GCOV_PROFILE_paravirt.o := n | ||
| 32 | |||
| 33 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 20 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
| 34 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 21 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
| 35 | obj-y += time.o ioport.o ldt.o dumpstack.o | 22 | obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o |
| 36 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o | 23 | obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o |
| 37 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 24 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
| 38 | obj-y += probe_roms.o | 25 | obj-y += probe_roms.o |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 5812404a0d4c..f50e7fb2a201 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
| @@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
| 149 | } | 149 | } |
| 150 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); | 150 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); |
| 151 | 151 | ||
| 152 | /* | ||
| 153 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
| 154 | * which can obviate IPI to trigger checking of need_resched. | ||
| 155 | * We execute MONITOR against need_resched and enter optimized wait state | ||
| 156 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
| 157 | * up from MWAIT (without an IPI). | ||
| 158 | * | ||
| 159 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
| 160 | * capability. | ||
| 161 | */ | ||
| 162 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | ||
| 163 | { | ||
| 164 | if (!need_resched()) { | ||
| 165 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | ||
| 166 | clflush((void *)¤t_thread_info()->flags); | ||
| 167 | |||
| 168 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
| 169 | smp_mb(); | ||
| 170 | if (!need_resched()) | ||
| 171 | __mwait(ax, cx); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 152 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) | 175 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) |
| 153 | { | 176 | { |
| 154 | unsigned int cpu = smp_processor_id(); | 177 | unsigned int cpu = smp_processor_id(); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c63822816249..1f84794f0759 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -738,5 +738,5 @@ void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n) | |||
| 738 | 738 | ||
| 739 | atomic_set(&stop_machine_first, 1); | 739 | atomic_set(&stop_machine_first, 1); |
| 740 | wrote_text = 0; | 740 | wrote_text = 0; |
| 741 | __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | 741 | __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask); |
| 742 | } | 742 | } |
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b117efd24f71..b1e7c7f7a0af 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * This allows to use PCI devices that only support 32bit addresses on systems | 5 | * This allows to use PCI devices that only support 32bit addresses on systems |
| 6 | * with more than 4GB. | 6 | * with more than 4GB. |
| 7 | * | 7 | * |
| 8 | * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification. | 8 | * See Documentation/DMA-API-HOWTO.txt for the interface specification. |
| 9 | * | 9 | * |
| 10 | * Copyright 2002 Andi Kleen, SuSE Labs. | 10 | * Copyright 2002 Andi Kleen, SuSE Labs. |
| 11 | * Subject to the GNU General Public License v2 only. | 11 | * Subject to the GNU General Public License v2 only. |
| @@ -30,7 +30,7 @@ | |||
| 30 | #include <linux/syscore_ops.h> | 30 | #include <linux/syscore_ops.h> |
| 31 | #include <linux/io.h> | 31 | #include <linux/io.h> |
| 32 | #include <linux/gfp.h> | 32 | #include <linux/gfp.h> |
| 33 | #include <asm/atomic.h> | 33 | #include <linux/atomic.h> |
| 34 | #include <asm/mtrr.h> | 34 | #include <asm/mtrr.h> |
| 35 | #include <asm/pgtable.h> | 35 | #include <asm/pgtable.h> |
| 36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b24be38c8cf8..f98d84caf94c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
| @@ -38,7 +38,7 @@ | |||
| 38 | #include <asm/perf_event.h> | 38 | #include <asm/perf_event.h> |
| 39 | #include <asm/x86_init.h> | 39 | #include <asm/x86_init.h> |
| 40 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
| 41 | #include <asm/atomic.h> | 41 | #include <linux/atomic.h> |
| 42 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
| 43 | #include <asm/i8259.h> | 43 | #include <asm/i8259.h> |
| 44 | #include <asm/proto.h> | 44 | #include <asm/proto.h> |
| @@ -186,7 +186,7 @@ static struct resource lapic_resource = { | |||
| 186 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | 186 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, |
| 187 | }; | 187 | }; |
| 188 | 188 | ||
| 189 | static unsigned int calibration_result; | 189 | unsigned int lapic_timer_frequency = 0; |
| 190 | 190 | ||
| 191 | static void apic_pm_activate(void); | 191 | static void apic_pm_activate(void); |
| 192 | 192 | ||
| @@ -454,7 +454,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
| 454 | switch (mode) { | 454 | switch (mode) { |
| 455 | case CLOCK_EVT_MODE_PERIODIC: | 455 | case CLOCK_EVT_MODE_PERIODIC: |
| 456 | case CLOCK_EVT_MODE_ONESHOT: | 456 | case CLOCK_EVT_MODE_ONESHOT: |
| 457 | __setup_APIC_LVTT(calibration_result, | 457 | __setup_APIC_LVTT(lapic_timer_frequency, |
| 458 | mode != CLOCK_EVT_MODE_PERIODIC, 1); | 458 | mode != CLOCK_EVT_MODE_PERIODIC, 1); |
| 459 | break; | 459 | break; |
| 460 | case CLOCK_EVT_MODE_UNUSED: | 460 | case CLOCK_EVT_MODE_UNUSED: |
| @@ -638,6 +638,25 @@ static int __init calibrate_APIC_clock(void) | |||
| 638 | long delta, deltatsc; | 638 | long delta, deltatsc; |
| 639 | int pm_referenced = 0; | 639 | int pm_referenced = 0; |
| 640 | 640 | ||
| 641 | /** | ||
| 642 | * check if lapic timer has already been calibrated by platform | ||
| 643 | * specific routine, such as tsc calibration code. if so, we just fill | ||
| 644 | * in the clockevent structure and return. | ||
| 645 | */ | ||
| 646 | |||
| 647 | if (lapic_timer_frequency) { | ||
| 648 | apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", | ||
| 649 | lapic_timer_frequency); | ||
| 650 | lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, | ||
| 651 | TICK_NSEC, lapic_clockevent.shift); | ||
| 652 | lapic_clockevent.max_delta_ns = | ||
| 653 | clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); | ||
| 654 | lapic_clockevent.min_delta_ns = | ||
| 655 | clockevent_delta2ns(0xF, &lapic_clockevent); | ||
| 656 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
| 657 | return 0; | ||
| 658 | } | ||
| 659 | |||
| 641 | local_irq_disable(); | 660 | local_irq_disable(); |
| 642 | 661 | ||
| 643 | /* Replace the global interrupt handler */ | 662 | /* Replace the global interrupt handler */ |
| @@ -679,12 +698,12 @@ static int __init calibrate_APIC_clock(void) | |||
| 679 | lapic_clockevent.min_delta_ns = | 698 | lapic_clockevent.min_delta_ns = |
| 680 | clockevent_delta2ns(0xF, &lapic_clockevent); | 699 | clockevent_delta2ns(0xF, &lapic_clockevent); |
| 681 | 700 | ||
| 682 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | 701 | lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; |
| 683 | 702 | ||
| 684 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | 703 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); |
| 685 | apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); | 704 | apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); |
| 686 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | 705 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", |
| 687 | calibration_result); | 706 | lapic_timer_frequency); |
| 688 | 707 | ||
| 689 | if (cpu_has_tsc) { | 708 | if (cpu_has_tsc) { |
| 690 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " | 709 | apic_printk(APIC_VERBOSE, "..... CPU clock speed is " |
| @@ -695,13 +714,13 @@ static int __init calibrate_APIC_clock(void) | |||
| 695 | 714 | ||
| 696 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " | 715 | apic_printk(APIC_VERBOSE, "..... host bus clock speed is " |
| 697 | "%u.%04u MHz.\n", | 716 | "%u.%04u MHz.\n", |
| 698 | calibration_result / (1000000 / HZ), | 717 | lapic_timer_frequency / (1000000 / HZ), |
| 699 | calibration_result % (1000000 / HZ)); | 718 | lapic_timer_frequency % (1000000 / HZ)); |
| 700 | 719 | ||
| 701 | /* | 720 | /* |
| 702 | * Do a sanity check on the APIC calibration result | 721 | * Do a sanity check on the APIC calibration result |
| 703 | */ | 722 | */ |
| 704 | if (calibration_result < (1000000 / HZ)) { | 723 | if (lapic_timer_frequency < (1000000 / HZ)) { |
| 705 | local_irq_enable(); | 724 | local_irq_enable(); |
| 706 | pr_warning("APIC frequency too slow, disabling apic timer\n"); | 725 | pr_warning("APIC frequency too slow, disabling apic timer\n"); |
| 707 | return -1; | 726 | return -1; |
| @@ -1437,27 +1456,21 @@ void enable_x2apic(void) | |||
| 1437 | 1456 | ||
| 1438 | int __init enable_IR(void) | 1457 | int __init enable_IR(void) |
| 1439 | { | 1458 | { |
| 1440 | #ifdef CONFIG_INTR_REMAP | 1459 | #ifdef CONFIG_IRQ_REMAP |
| 1441 | if (!intr_remapping_supported()) { | 1460 | if (!intr_remapping_supported()) { |
| 1442 | pr_debug("intr-remapping not supported\n"); | 1461 | pr_debug("intr-remapping not supported\n"); |
| 1443 | return 0; | 1462 | return -1; |
| 1444 | } | 1463 | } |
| 1445 | 1464 | ||
| 1446 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1465 | if (!x2apic_preenabled && skip_ioapic_setup) { |
| 1447 | pr_info("Skipped enabling intr-remap because of skipping " | 1466 | pr_info("Skipped enabling intr-remap because of skipping " |
| 1448 | "io-apic setup\n"); | 1467 | "io-apic setup\n"); |
| 1449 | return 0; | 1468 | return -1; |
| 1450 | } | 1469 | } |
| 1451 | 1470 | ||
| 1452 | if (enable_intr_remapping(x2apic_supported())) | 1471 | return enable_intr_remapping(); |
| 1453 | return 0; | ||
| 1454 | |||
| 1455 | pr_info("Enabled Interrupt-remapping\n"); | ||
| 1456 | |||
| 1457 | return 1; | ||
| 1458 | |||
| 1459 | #endif | 1472 | #endif |
| 1460 | return 0; | 1473 | return -1; |
| 1461 | } | 1474 | } |
| 1462 | 1475 | ||
| 1463 | void __init enable_IR_x2apic(void) | 1476 | void __init enable_IR_x2apic(void) |
| @@ -1481,11 +1494,11 @@ void __init enable_IR_x2apic(void) | |||
| 1481 | mask_ioapic_entries(); | 1494 | mask_ioapic_entries(); |
| 1482 | 1495 | ||
| 1483 | if (dmar_table_init_ret) | 1496 | if (dmar_table_init_ret) |
| 1484 | ret = 0; | 1497 | ret = -1; |
| 1485 | else | 1498 | else |
| 1486 | ret = enable_IR(); | 1499 | ret = enable_IR(); |
| 1487 | 1500 | ||
| 1488 | if (!ret) { | 1501 | if (ret < 0) { |
| 1489 | /* IR is required if there is APIC ID > 255 even when running | 1502 | /* IR is required if there is APIC ID > 255 even when running |
| 1490 | * under KVM | 1503 | * under KVM |
| 1491 | */ | 1504 | */ |
| @@ -1499,6 +1512,9 @@ void __init enable_IR_x2apic(void) | |||
| 1499 | x2apic_force_phys(); | 1512 | x2apic_force_phys(); |
| 1500 | } | 1513 | } |
| 1501 | 1514 | ||
| 1515 | if (ret == IRQ_REMAP_XAPIC_MODE) | ||
| 1516 | goto nox2apic; | ||
| 1517 | |||
| 1502 | x2apic_enabled = 1; | 1518 | x2apic_enabled = 1; |
| 1503 | 1519 | ||
| 1504 | if (x2apic_supported() && !x2apic_mode) { | 1520 | if (x2apic_supported() && !x2apic_mode) { |
| @@ -1508,19 +1524,21 @@ void __init enable_IR_x2apic(void) | |||
| 1508 | } | 1524 | } |
| 1509 | 1525 | ||
| 1510 | nox2apic: | 1526 | nox2apic: |
| 1511 | if (!ret) /* IR enabling failed */ | 1527 | if (ret < 0) /* IR enabling failed */ |
| 1512 | restore_ioapic_entries(); | 1528 | restore_ioapic_entries(); |
| 1513 | legacy_pic->restore_mask(); | 1529 | legacy_pic->restore_mask(); |
| 1514 | local_irq_restore(flags); | 1530 | local_irq_restore(flags); |
| 1515 | 1531 | ||
| 1516 | out: | 1532 | out: |
| 1517 | if (x2apic_enabled) | 1533 | if (x2apic_enabled || !x2apic_supported()) |
| 1518 | return; | 1534 | return; |
| 1519 | 1535 | ||
| 1520 | if (x2apic_preenabled) | 1536 | if (x2apic_preenabled) |
| 1521 | panic("x2apic: enabled by BIOS but kernel init failed."); | 1537 | panic("x2apic: enabled by BIOS but kernel init failed."); |
| 1522 | else if (cpu_has_x2apic) | 1538 | else if (ret == IRQ_REMAP_XAPIC_MODE) |
| 1523 | pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); | 1539 | pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n"); |
| 1540 | else if (ret < 0) | ||
| 1541 | pr_info("x2apic not enabled, IRQ remapping init failed\n"); | ||
| 1524 | } | 1542 | } |
| 1525 | 1543 | ||
| 1526 | #ifdef CONFIG_X86_64 | 1544 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index efd737e827f4..521bead01137 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
| @@ -255,12 +255,24 @@ static struct apic apic_bigsmp = { | |||
| 255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, | 255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, |
| 256 | }; | 256 | }; |
| 257 | 257 | ||
| 258 | struct apic * __init generic_bigsmp_probe(void) | 258 | void __init generic_bigsmp_probe(void) |
| 259 | { | 259 | { |
| 260 | if (probe_bigsmp()) | 260 | unsigned int cpu; |
| 261 | return &apic_bigsmp; | ||
| 262 | 261 | ||
| 263 | return NULL; | 262 | if (!probe_bigsmp()) |
| 263 | return; | ||
| 264 | |||
| 265 | apic = &apic_bigsmp; | ||
| 266 | |||
| 267 | for_each_possible_cpu(cpu) { | ||
| 268 | if (early_per_cpu(x86_cpu_to_logical_apicid, | ||
| 269 | cpu) == BAD_APICID) | ||
| 270 | continue; | ||
| 271 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = | ||
| 272 | bigsmp_early_logical_apicid(cpu); | ||
| 273 | } | ||
| 274 | |||
| 275 | pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name); | ||
| 264 | } | 276 | } |
| 265 | 277 | ||
| 266 | apic_driver(apic_bigsmp); | 278 | apic_driver(apic_bigsmp); |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 9536b3fe43f8..5d513bc47b6b 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
| @@ -48,7 +48,7 @@ | |||
| 48 | #include <linux/io.h> | 48 | #include <linux/io.h> |
| 49 | 49 | ||
| 50 | #include <asm/apicdef.h> | 50 | #include <asm/apicdef.h> |
| 51 | #include <asm/atomic.h> | 51 | #include <linux/atomic.h> |
| 52 | #include <asm/fixmap.h> | 52 | #include <asm/fixmap.h> |
| 53 | #include <asm/mpspec.h> | 53 | #include <asm/mpspec.h> |
| 54 | #include <asm/setup.h> | 54 | #include <asm/setup.h> |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index d5e57db0f7be..31cb9ae992b7 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
| @@ -60,22 +60,10 @@ void arch_trigger_all_cpu_backtrace(void) | |||
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static int __kprobes | 62 | static int __kprobes |
| 63 | arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | 63 | arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) |
| 64 | unsigned long cmd, void *__args) | ||
| 65 | { | 64 | { |
| 66 | struct die_args *args = __args; | ||
| 67 | struct pt_regs *regs; | ||
| 68 | int cpu; | 65 | int cpu; |
| 69 | 66 | ||
| 70 | switch (cmd) { | ||
| 71 | case DIE_NMI: | ||
| 72 | break; | ||
| 73 | |||
| 74 | default: | ||
| 75 | return NOTIFY_DONE; | ||
| 76 | } | ||
| 77 | |||
| 78 | regs = args->regs; | ||
| 79 | cpu = smp_processor_id(); | 67 | cpu = smp_processor_id(); |
| 80 | 68 | ||
| 81 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { | 69 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
| @@ -86,21 +74,16 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
| 86 | show_regs(regs); | 74 | show_regs(regs); |
| 87 | arch_spin_unlock(&lock); | 75 | arch_spin_unlock(&lock); |
| 88 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 76 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
| 89 | return NOTIFY_STOP; | 77 | return NMI_HANDLED; |
| 90 | } | 78 | } |
| 91 | 79 | ||
| 92 | return NOTIFY_DONE; | 80 | return NMI_DONE; |
| 93 | } | 81 | } |
| 94 | 82 | ||
| 95 | static __read_mostly struct notifier_block backtrace_notifier = { | ||
| 96 | .notifier_call = arch_trigger_all_cpu_backtrace_handler, | ||
| 97 | .next = NULL, | ||
| 98 | .priority = NMI_LOCAL_LOW_PRIOR, | ||
| 99 | }; | ||
| 100 | |||
| 101 | static int __init register_trigger_all_cpu_backtrace(void) | 83 | static int __init register_trigger_all_cpu_backtrace(void) |
| 102 | { | 84 | { |
| 103 | register_die_notifier(&backtrace_notifier); | 85 | register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler, |
| 86 | 0, "arch_bt"); | ||
| 104 | return 0; | 87 | return 0; |
| 105 | } | 88 | } |
| 106 | early_initcall(register_trigger_all_cpu_backtrace); | 89 | early_initcall(register_trigger_all_cpu_backtrace); |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8eb863e27ea6..6d939d7847e2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
| @@ -92,21 +92,21 @@ static struct ioapic { | |||
| 92 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | 92 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); |
| 93 | } ioapics[MAX_IO_APICS]; | 93 | } ioapics[MAX_IO_APICS]; |
| 94 | 94 | ||
| 95 | #define mpc_ioapic_ver(id) ioapics[id].mp_config.apicver | 95 | #define mpc_ioapic_ver(ioapic_idx) ioapics[ioapic_idx].mp_config.apicver |
| 96 | 96 | ||
| 97 | int mpc_ioapic_id(int id) | 97 | int mpc_ioapic_id(int ioapic_idx) |
| 98 | { | 98 | { |
| 99 | return ioapics[id].mp_config.apicid; | 99 | return ioapics[ioapic_idx].mp_config.apicid; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | unsigned int mpc_ioapic_addr(int id) | 102 | unsigned int mpc_ioapic_addr(int ioapic_idx) |
| 103 | { | 103 | { |
| 104 | return ioapics[id].mp_config.apicaddr; | 104 | return ioapics[ioapic_idx].mp_config.apicaddr; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id) | 107 | struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx) |
| 108 | { | 108 | { |
| 109 | return &ioapics[id].gsi_config; | 109 | return &ioapics[ioapic_idx].gsi_config; |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | int nr_ioapics; | 112 | int nr_ioapics; |
| @@ -186,21 +186,15 @@ static struct irq_pin_list *alloc_irq_pin_list(int node) | |||
| 186 | 186 | ||
| 187 | 187 | ||
| 188 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 188 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
| 189 | #ifdef CONFIG_SPARSE_IRQ | ||
| 190 | static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; | 189 | static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; |
| 191 | #else | ||
| 192 | static struct irq_cfg irq_cfgx[NR_IRQS]; | ||
| 193 | #endif | ||
| 194 | 190 | ||
| 195 | int __init arch_early_irq_init(void) | 191 | int __init arch_early_irq_init(void) |
| 196 | { | 192 | { |
| 197 | struct irq_cfg *cfg; | 193 | struct irq_cfg *cfg; |
| 198 | int count, node, i; | 194 | int count, node, i; |
| 199 | 195 | ||
| 200 | if (!legacy_pic->nr_legacy_irqs) { | 196 | if (!legacy_pic->nr_legacy_irqs) |
| 201 | nr_irqs_gsi = 0; | ||
| 202 | io_apic_irqs = ~0UL; | 197 | io_apic_irqs = ~0UL; |
| 203 | } | ||
| 204 | 198 | ||
| 205 | for (i = 0; i < nr_ioapics; i++) { | 199 | for (i = 0; i < nr_ioapics; i++) { |
| 206 | ioapics[i].saved_registers = | 200 | ioapics[i].saved_registers = |
| @@ -234,7 +228,6 @@ int __init arch_early_irq_init(void) | |||
| 234 | return 0; | 228 | return 0; |
| 235 | } | 229 | } |
| 236 | 230 | ||
| 237 | #ifdef CONFIG_SPARSE_IRQ | ||
| 238 | static struct irq_cfg *irq_cfg(unsigned int irq) | 231 | static struct irq_cfg *irq_cfg(unsigned int irq) |
| 239 | { | 232 | { |
| 240 | return irq_get_chip_data(irq); | 233 | return irq_get_chip_data(irq); |
| @@ -269,22 +262,6 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) | |||
| 269 | kfree(cfg); | 262 | kfree(cfg); |
| 270 | } | 263 | } |
| 271 | 264 | ||
| 272 | #else | ||
| 273 | |||
| 274 | struct irq_cfg *irq_cfg(unsigned int irq) | ||
| 275 | { | ||
| 276 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
| 277 | } | ||
| 278 | |||
| 279 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) | ||
| 280 | { | ||
| 281 | return irq_cfgx + irq; | ||
| 282 | } | ||
| 283 | |||
| 284 | static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { } | ||
| 285 | |||
| 286 | #endif | ||
| 287 | |||
| 288 | static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) | 265 | static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) |
| 289 | { | 266 | { |
| 290 | int res = irq_alloc_desc_at(at, node); | 267 | int res = irq_alloc_desc_at(at, node); |
| @@ -394,13 +371,21 @@ union entry_union { | |||
| 394 | struct IO_APIC_route_entry entry; | 371 | struct IO_APIC_route_entry entry; |
| 395 | }; | 372 | }; |
| 396 | 373 | ||
| 374 | static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) | ||
| 375 | { | ||
| 376 | union entry_union eu; | ||
| 377 | |||
| 378 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | ||
| 379 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | ||
| 380 | return eu.entry; | ||
| 381 | } | ||
| 382 | |||
| 397 | static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | 383 | static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) |
| 398 | { | 384 | { |
| 399 | union entry_union eu; | 385 | union entry_union eu; |
| 400 | unsigned long flags; | 386 | unsigned long flags; |
| 401 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 387 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 402 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | 388 | eu.entry = __ioapic_read_entry(apic, pin); |
| 403 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | ||
| 404 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 389 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 405 | return eu.entry; | 390 | return eu.entry; |
| 406 | } | 391 | } |
| @@ -529,18 +514,6 @@ static void io_apic_modify_irq(struct irq_cfg *cfg, | |||
| 529 | __io_apic_modify_irq(entry, mask_and, mask_or, final); | 514 | __io_apic_modify_irq(entry, mask_and, mask_or, final); |
| 530 | } | 515 | } |
| 531 | 516 | ||
| 532 | static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) | ||
| 533 | { | ||
| 534 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, | ||
| 535 | IO_APIC_REDIR_MASKED, NULL); | ||
| 536 | } | ||
| 537 | |||
| 538 | static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | ||
| 539 | { | ||
| 540 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, | ||
| 541 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
| 542 | } | ||
| 543 | |||
| 544 | static void io_apic_sync(struct irq_pin_list *entry) | 517 | static void io_apic_sync(struct irq_pin_list *entry) |
| 545 | { | 518 | { |
| 546 | /* | 519 | /* |
| @@ -585,6 +558,66 @@ static void unmask_ioapic_irq(struct irq_data *data) | |||
| 585 | unmask_ioapic(data->chip_data); | 558 | unmask_ioapic(data->chip_data); |
| 586 | } | 559 | } |
| 587 | 560 | ||
| 561 | /* | ||
| 562 | * IO-APIC versions below 0x20 don't support EOI register. | ||
| 563 | * For the record, here is the information about various versions: | ||
| 564 | * 0Xh 82489DX | ||
| 565 | * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant | ||
| 566 | * 2Xh I/O(x)APIC which is PCI 2.2 Compliant | ||
| 567 | * 30h-FFh Reserved | ||
| 568 | * | ||
| 569 | * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic | ||
| 570 | * version as 0x2. This is an error with documentation and these ICH chips | ||
| 571 | * use io-apic's of version 0x20. | ||
| 572 | * | ||
| 573 | * For IO-APIC's with EOI register, we use that to do an explicit EOI. | ||
| 574 | * Otherwise, we simulate the EOI message manually by changing the trigger | ||
| 575 | * mode to edge and then back to level, with RTE being masked during this. | ||
| 576 | */ | ||
| 577 | static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) | ||
| 578 | { | ||
| 579 | if (mpc_ioapic_ver(apic) >= 0x20) { | ||
| 580 | /* | ||
| 581 | * Intr-remapping uses pin number as the virtual vector | ||
| 582 | * in the RTE. Actual vector is programmed in | ||
| 583 | * intr-remapping table entry. Hence for the io-apic | ||
| 584 | * EOI we use the pin number. | ||
| 585 | */ | ||
| 586 | if (cfg && irq_remapped(cfg)) | ||
| 587 | io_apic_eoi(apic, pin); | ||
| 588 | else | ||
| 589 | io_apic_eoi(apic, vector); | ||
| 590 | } else { | ||
| 591 | struct IO_APIC_route_entry entry, entry1; | ||
| 592 | |||
| 593 | entry = entry1 = __ioapic_read_entry(apic, pin); | ||
| 594 | |||
| 595 | /* | ||
| 596 | * Mask the entry and change the trigger mode to edge. | ||
| 597 | */ | ||
| 598 | entry1.mask = 1; | ||
| 599 | entry1.trigger = IOAPIC_EDGE; | ||
| 600 | |||
| 601 | __ioapic_write_entry(apic, pin, entry1); | ||
| 602 | |||
| 603 | /* | ||
| 604 | * Restore the previous level triggered entry. | ||
| 605 | */ | ||
| 606 | __ioapic_write_entry(apic, pin, entry); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
| 611 | { | ||
| 612 | struct irq_pin_list *entry; | ||
| 613 | unsigned long flags; | ||
| 614 | |||
| 615 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 616 | for_each_irq_pin(entry, cfg->irq_2_pin) | ||
| 617 | __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); | ||
| 618 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 619 | } | ||
| 620 | |||
| 588 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 621 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
| 589 | { | 622 | { |
| 590 | struct IO_APIC_route_entry entry; | 623 | struct IO_APIC_route_entry entry; |
| @@ -593,10 +626,44 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
| 593 | entry = ioapic_read_entry(apic, pin); | 626 | entry = ioapic_read_entry(apic, pin); |
| 594 | if (entry.delivery_mode == dest_SMI) | 627 | if (entry.delivery_mode == dest_SMI) |
| 595 | return; | 628 | return; |
| 629 | |||
| 630 | /* | ||
| 631 | * Make sure the entry is masked and re-read the contents to check | ||
| 632 | * if it is a level triggered pin and if the remote-IRR is set. | ||
| 633 | */ | ||
| 634 | if (!entry.mask) { | ||
| 635 | entry.mask = 1; | ||
| 636 | ioapic_write_entry(apic, pin, entry); | ||
| 637 | entry = ioapic_read_entry(apic, pin); | ||
| 638 | } | ||
| 639 | |||
| 640 | if (entry.irr) { | ||
| 641 | unsigned long flags; | ||
| 642 | |||
| 643 | /* | ||
| 644 | * Make sure the trigger mode is set to level. Explicit EOI | ||
| 645 | * doesn't clear the remote-IRR if the trigger mode is not | ||
| 646 | * set to level. | ||
| 647 | */ | ||
| 648 | if (!entry.trigger) { | ||
| 649 | entry.trigger = IOAPIC_LEVEL; | ||
| 650 | ioapic_write_entry(apic, pin, entry); | ||
| 651 | } | ||
| 652 | |||
| 653 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 654 | __eoi_ioapic_pin(apic, pin, entry.vector, NULL); | ||
| 655 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 656 | } | ||
| 657 | |||
| 596 | /* | 658 | /* |
| 597 | * Disable it in the IO-APIC irq-routing table: | 659 | * Clear the rest of the bits in the IO-APIC RTE except for the mask |
| 660 | * bit. | ||
| 598 | */ | 661 | */ |
| 599 | ioapic_mask_entry(apic, pin); | 662 | ioapic_mask_entry(apic, pin); |
| 663 | entry = ioapic_read_entry(apic, pin); | ||
| 664 | if (entry.irr) | ||
| 665 | printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", | ||
| 666 | mpc_ioapic_id(apic), pin); | ||
| 600 | } | 667 | } |
| 601 | 668 | ||
| 602 | static void clear_IO_APIC (void) | 669 | static void clear_IO_APIC (void) |
| @@ -712,13 +779,13 @@ int restore_ioapic_entries(void) | |||
| 712 | /* | 779 | /* |
| 713 | * Find the IRQ entry number of a certain pin. | 780 | * Find the IRQ entry number of a certain pin. |
| 714 | */ | 781 | */ |
| 715 | static int find_irq_entry(int apic, int pin, int type) | 782 | static int find_irq_entry(int ioapic_idx, int pin, int type) |
| 716 | { | 783 | { |
| 717 | int i; | 784 | int i; |
| 718 | 785 | ||
| 719 | for (i = 0; i < mp_irq_entries; i++) | 786 | for (i = 0; i < mp_irq_entries; i++) |
| 720 | if (mp_irqs[i].irqtype == type && | 787 | if (mp_irqs[i].irqtype == type && |
| 721 | (mp_irqs[i].dstapic == mpc_ioapic_id(apic) || | 788 | (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) || |
| 722 | mp_irqs[i].dstapic == MP_APIC_ALL) && | 789 | mp_irqs[i].dstapic == MP_APIC_ALL) && |
| 723 | mp_irqs[i].dstirq == pin) | 790 | mp_irqs[i].dstirq == pin) |
| 724 | return i; | 791 | return i; |
| @@ -757,12 +824,13 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
| 757 | (mp_irqs[i].srcbusirq == irq)) | 824 | (mp_irqs[i].srcbusirq == irq)) |
| 758 | break; | 825 | break; |
| 759 | } | 826 | } |
| 827 | |||
| 760 | if (i < mp_irq_entries) { | 828 | if (i < mp_irq_entries) { |
| 761 | int apic; | 829 | int ioapic_idx; |
| 762 | for(apic = 0; apic < nr_ioapics; apic++) { | 830 | |
| 763 | if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic) | 831 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) |
| 764 | return apic; | 832 | if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic) |
| 765 | } | 833 | return ioapic_idx; |
| 766 | } | 834 | } |
| 767 | 835 | ||
| 768 | return -1; | 836 | return -1; |
| @@ -977,7 +1045,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
| 977 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | 1045 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, |
| 978 | struct io_apic_irq_attr *irq_attr) | 1046 | struct io_apic_irq_attr *irq_attr) |
| 979 | { | 1047 | { |
| 980 | int apic, i, best_guess = -1; | 1048 | int ioapic_idx, i, best_guess = -1; |
| 981 | 1049 | ||
| 982 | apic_printk(APIC_DEBUG, | 1050 | apic_printk(APIC_DEBUG, |
| 983 | "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | 1051 | "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", |
| @@ -990,8 +1058,8 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | |||
| 990 | for (i = 0; i < mp_irq_entries; i++) { | 1058 | for (i = 0; i < mp_irq_entries; i++) { |
| 991 | int lbus = mp_irqs[i].srcbus; | 1059 | int lbus = mp_irqs[i].srcbus; |
| 992 | 1060 | ||
| 993 | for (apic = 0; apic < nr_ioapics; apic++) | 1061 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) |
| 994 | if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic || | 1062 | if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic || |
| 995 | mp_irqs[i].dstapic == MP_APIC_ALL) | 1063 | mp_irqs[i].dstapic == MP_APIC_ALL) |
| 996 | break; | 1064 | break; |
| 997 | 1065 | ||
| @@ -999,13 +1067,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | |||
| 999 | !mp_irqs[i].irqtype && | 1067 | !mp_irqs[i].irqtype && |
| 1000 | (bus == lbus) && | 1068 | (bus == lbus) && |
| 1001 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { | 1069 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { |
| 1002 | int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); | 1070 | int irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq); |
| 1003 | 1071 | ||
| 1004 | if (!(apic || IO_APIC_IRQ(irq))) | 1072 | if (!(ioapic_idx || IO_APIC_IRQ(irq))) |
| 1005 | continue; | 1073 | continue; |
| 1006 | 1074 | ||
| 1007 | if (pin == (mp_irqs[i].srcbusirq & 3)) { | 1075 | if (pin == (mp_irqs[i].srcbusirq & 3)) { |
| 1008 | set_io_apic_irq_attr(irq_attr, apic, | 1076 | set_io_apic_irq_attr(irq_attr, ioapic_idx, |
| 1009 | mp_irqs[i].dstirq, | 1077 | mp_irqs[i].dstirq, |
| 1010 | irq_trigger(i), | 1078 | irq_trigger(i), |
| 1011 | irq_polarity(i)); | 1079 | irq_polarity(i)); |
| @@ -1016,7 +1084,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | |||
| 1016 | * best-guess fuzzy result for broken mptables. | 1084 | * best-guess fuzzy result for broken mptables. |
| 1017 | */ | 1085 | */ |
| 1018 | if (best_guess < 0) { | 1086 | if (best_guess < 0) { |
| 1019 | set_io_apic_irq_attr(irq_attr, apic, | 1087 | set_io_apic_irq_attr(irq_attr, ioapic_idx, |
| 1020 | mp_irqs[i].dstirq, | 1088 | mp_irqs[i].dstirq, |
| 1021 | irq_trigger(i), | 1089 | irq_trigger(i), |
| 1022 | irq_polarity(i)); | 1090 | irq_polarity(i)); |
| @@ -1202,7 +1270,6 @@ void __setup_vector_irq(int cpu) | |||
| 1202 | } | 1270 | } |
| 1203 | 1271 | ||
| 1204 | static struct irq_chip ioapic_chip; | 1272 | static struct irq_chip ioapic_chip; |
| 1205 | static struct irq_chip ir_ioapic_chip; | ||
| 1206 | 1273 | ||
| 1207 | #ifdef CONFIG_X86_32 | 1274 | #ifdef CONFIG_X86_32 |
| 1208 | static inline int IO_APIC_irq_trigger(int irq) | 1275 | static inline int IO_APIC_irq_trigger(int irq) |
| @@ -1246,7 +1313,7 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, | |||
| 1246 | 1313 | ||
| 1247 | if (irq_remapped(cfg)) { | 1314 | if (irq_remapped(cfg)) { |
| 1248 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 1315 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 1249 | chip = &ir_ioapic_chip; | 1316 | irq_remap_modify_chip_defaults(chip); |
| 1250 | fasteoi = trigger != 0; | 1317 | fasteoi = trigger != 0; |
| 1251 | } | 1318 | } |
| 1252 | 1319 | ||
| @@ -1255,77 +1322,100 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, | |||
| 1255 | fasteoi ? "fasteoi" : "edge"); | 1322 | fasteoi ? "fasteoi" : "edge"); |
| 1256 | } | 1323 | } |
| 1257 | 1324 | ||
| 1258 | static int setup_ioapic_entry(int apic_id, int irq, | 1325 | |
| 1259 | struct IO_APIC_route_entry *entry, | 1326 | static int setup_ir_ioapic_entry(int irq, |
| 1260 | unsigned int destination, int trigger, | 1327 | struct IR_IO_APIC_route_entry *entry, |
| 1261 | int polarity, int vector, int pin) | 1328 | unsigned int destination, int vector, |
| 1329 | struct io_apic_irq_attr *attr) | ||
| 1262 | { | 1330 | { |
| 1263 | /* | 1331 | int index; |
| 1264 | * add it to the IO-APIC irq-routing table: | 1332 | struct irte irte; |
| 1265 | */ | 1333 | int ioapic_id = mpc_ioapic_id(attr->ioapic); |
| 1266 | memset(entry,0,sizeof(*entry)); | 1334 | struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); |
| 1267 | 1335 | ||
| 1268 | if (intr_remapping_enabled) { | 1336 | if (!iommu) { |
| 1269 | struct intel_iommu *iommu = map_ioapic_to_ir(apic_id); | 1337 | pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); |
| 1270 | struct irte irte; | 1338 | return -ENODEV; |
| 1271 | struct IR_IO_APIC_route_entry *ir_entry = | 1339 | } |
| 1272 | (struct IR_IO_APIC_route_entry *) entry; | ||
| 1273 | int index; | ||
| 1274 | 1340 | ||
| 1275 | if (!iommu) | 1341 | index = alloc_irte(iommu, irq, 1); |
| 1276 | panic("No mapping iommu for ioapic %d\n", apic_id); | 1342 | if (index < 0) { |
| 1343 | pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); | ||
| 1344 | return -ENOMEM; | ||
| 1345 | } | ||
| 1277 | 1346 | ||
| 1278 | index = alloc_irte(iommu, irq, 1); | 1347 | prepare_irte(&irte, vector, destination); |
| 1279 | if (index < 0) | ||
| 1280 | panic("Failed to allocate IRTE for ioapic %d\n", apic_id); | ||
| 1281 | 1348 | ||
| 1282 | prepare_irte(&irte, vector, destination); | 1349 | /* Set source-id of interrupt request */ |
| 1350 | set_ioapic_sid(&irte, ioapic_id); | ||
| 1283 | 1351 | ||
| 1284 | /* Set source-id of interrupt request */ | 1352 | modify_irte(irq, &irte); |
| 1285 | set_ioapic_sid(&irte, apic_id); | ||
| 1286 | 1353 | ||
| 1287 | modify_irte(irq, &irte); | 1354 | apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " |
| 1355 | "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " | ||
| 1356 | "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " | ||
| 1357 | "Avail:%X Vector:%02X Dest:%08X " | ||
| 1358 | "SID:%04X SQ:%X SVT:%X)\n", | ||
| 1359 | attr->ioapic, irte.present, irte.fpd, irte.dst_mode, | ||
| 1360 | irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, | ||
| 1361 | irte.avail, irte.vector, irte.dest_id, | ||
| 1362 | irte.sid, irte.sq, irte.svt); | ||
| 1363 | |||
| 1364 | memset(entry, 0, sizeof(*entry)); | ||
| 1365 | |||
| 1366 | entry->index2 = (index >> 15) & 0x1; | ||
| 1367 | entry->zero = 0; | ||
| 1368 | entry->format = 1; | ||
| 1369 | entry->index = (index & 0x7fff); | ||
| 1370 | /* | ||
| 1371 | * IO-APIC RTE will be configured with virtual vector. | ||
| 1372 | * irq handler will do the explicit EOI to the io-apic. | ||
| 1373 | */ | ||
| 1374 | entry->vector = attr->ioapic_pin; | ||
| 1375 | entry->mask = 0; /* enable IRQ */ | ||
| 1376 | entry->trigger = attr->trigger; | ||
| 1377 | entry->polarity = attr->polarity; | ||
| 1288 | 1378 | ||
| 1289 | ir_entry->index2 = (index >> 15) & 0x1; | 1379 | /* Mask level triggered irqs. |
| 1290 | ir_entry->zero = 0; | 1380 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. |
| 1291 | ir_entry->format = 1; | 1381 | */ |
| 1292 | ir_entry->index = (index & 0x7fff); | 1382 | if (attr->trigger) |
| 1293 | /* | 1383 | entry->mask = 1; |
| 1294 | * IO-APIC RTE will be configured with virtual vector. | ||
| 1295 | * irq handler will do the explicit EOI to the io-apic. | ||
| 1296 | */ | ||
| 1297 | ir_entry->vector = pin; | ||
| 1298 | |||
| 1299 | apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: " | ||
| 1300 | "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d " | ||
| 1301 | "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X " | ||
| 1302 | "Avail:%X Vector:%02X Dest:%08X " | ||
| 1303 | "SID:%04X SQ:%X SVT:%X)\n", | ||
| 1304 | apic_id, irte.present, irte.fpd, irte.dst_mode, | ||
| 1305 | irte.redir_hint, irte.trigger_mode, irte.dlvry_mode, | ||
| 1306 | irte.avail, irte.vector, irte.dest_id, | ||
| 1307 | irte.sid, irte.sq, irte.svt); | ||
| 1308 | } else { | ||
| 1309 | entry->delivery_mode = apic->irq_delivery_mode; | ||
| 1310 | entry->dest_mode = apic->irq_dest_mode; | ||
| 1311 | entry->dest = destination; | ||
| 1312 | entry->vector = vector; | ||
| 1313 | } | ||
| 1314 | 1384 | ||
| 1315 | entry->mask = 0; /* enable IRQ */ | 1385 | return 0; |
| 1316 | entry->trigger = trigger; | 1386 | } |
| 1317 | entry->polarity = polarity; | ||
| 1318 | 1387 | ||
| 1319 | /* Mask level triggered irqs. | 1388 | static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, |
| 1389 | unsigned int destination, int vector, | ||
| 1390 | struct io_apic_irq_attr *attr) | ||
| 1391 | { | ||
| 1392 | if (intr_remapping_enabled) | ||
| 1393 | return setup_ir_ioapic_entry(irq, | ||
| 1394 | (struct IR_IO_APIC_route_entry *)entry, | ||
| 1395 | destination, vector, attr); | ||
| 1396 | |||
| 1397 | memset(entry, 0, sizeof(*entry)); | ||
| 1398 | |||
| 1399 | entry->delivery_mode = apic->irq_delivery_mode; | ||
| 1400 | entry->dest_mode = apic->irq_dest_mode; | ||
| 1401 | entry->dest = destination; | ||
| 1402 | entry->vector = vector; | ||
| 1403 | entry->mask = 0; /* enable IRQ */ | ||
| 1404 | entry->trigger = attr->trigger; | ||
| 1405 | entry->polarity = attr->polarity; | ||
| 1406 | |||
| 1407 | /* | ||
| 1408 | * Mask level triggered irqs. | ||
| 1320 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. | 1409 | * Use IRQ_DELAYED_DISABLE for edge triggered irqs. |
| 1321 | */ | 1410 | */ |
| 1322 | if (trigger) | 1411 | if (attr->trigger) |
| 1323 | entry->mask = 1; | 1412 | entry->mask = 1; |
| 1413 | |||
| 1324 | return 0; | 1414 | return 0; |
| 1325 | } | 1415 | } |
| 1326 | 1416 | ||
| 1327 | static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | 1417 | static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, |
| 1328 | struct irq_cfg *cfg, int trigger, int polarity) | 1418 | struct io_apic_irq_attr *attr) |
| 1329 | { | 1419 | { |
| 1330 | struct IO_APIC_route_entry entry; | 1420 | struct IO_APIC_route_entry entry; |
| 1331 | unsigned int dest; | 1421 | unsigned int dest; |
| @@ -1348,49 +1438,48 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | |||
| 1348 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1438 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
| 1349 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1439 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
| 1350 | "IRQ %d Mode:%i Active:%i Dest:%d)\n", | 1440 | "IRQ %d Mode:%i Active:%i Dest:%d)\n", |
| 1351 | apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector, | 1441 | attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, |
| 1352 | irq, trigger, polarity, dest); | 1442 | cfg->vector, irq, attr->trigger, attr->polarity, dest); |
| 1353 | |||
| 1354 | 1443 | ||
| 1355 | if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry, | 1444 | if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { |
| 1356 | dest, trigger, polarity, cfg->vector, pin)) { | 1445 | pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
| 1357 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1446 | mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); |
| 1358 | mpc_ioapic_id(apic_id), pin); | ||
| 1359 | __clear_irq_vector(irq, cfg); | 1447 | __clear_irq_vector(irq, cfg); |
| 1448 | |||
| 1360 | return; | 1449 | return; |
| 1361 | } | 1450 | } |
| 1362 | 1451 | ||
| 1363 | ioapic_register_intr(irq, cfg, trigger); | 1452 | ioapic_register_intr(irq, cfg, attr->trigger); |
| 1364 | if (irq < legacy_pic->nr_legacy_irqs) | 1453 | if (irq < legacy_pic->nr_legacy_irqs) |
| 1365 | legacy_pic->mask(irq); | 1454 | legacy_pic->mask(irq); |
| 1366 | 1455 | ||
| 1367 | ioapic_write_entry(apic_id, pin, entry); | 1456 | ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry); |
| 1368 | } | 1457 | } |
| 1369 | 1458 | ||
| 1370 | static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) | 1459 | static bool __init io_apic_pin_not_connected(int idx, int ioapic_idx, int pin) |
| 1371 | { | 1460 | { |
| 1372 | if (idx != -1) | 1461 | if (idx != -1) |
| 1373 | return false; | 1462 | return false; |
| 1374 | 1463 | ||
| 1375 | apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", | 1464 | apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", |
| 1376 | mpc_ioapic_id(apic_id), pin); | 1465 | mpc_ioapic_id(ioapic_idx), pin); |
| 1377 | return true; | 1466 | return true; |
| 1378 | } | 1467 | } |
| 1379 | 1468 | ||
| 1380 | static void __init __io_apic_setup_irqs(unsigned int apic_id) | 1469 | static void __init __io_apic_setup_irqs(unsigned int ioapic_idx) |
| 1381 | { | 1470 | { |
| 1382 | int idx, node = cpu_to_node(0); | 1471 | int idx, node = cpu_to_node(0); |
| 1383 | struct io_apic_irq_attr attr; | 1472 | struct io_apic_irq_attr attr; |
| 1384 | unsigned int pin, irq; | 1473 | unsigned int pin, irq; |
| 1385 | 1474 | ||
| 1386 | for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) { | 1475 | for (pin = 0; pin < ioapics[ioapic_idx].nr_registers; pin++) { |
| 1387 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1476 | idx = find_irq_entry(ioapic_idx, pin, mp_INT); |
| 1388 | if (io_apic_pin_not_connected(idx, apic_id, pin)) | 1477 | if (io_apic_pin_not_connected(idx, ioapic_idx, pin)) |
| 1389 | continue; | 1478 | continue; |
| 1390 | 1479 | ||
| 1391 | irq = pin_2_irq(idx, apic_id, pin); | 1480 | irq = pin_2_irq(idx, ioapic_idx, pin); |
| 1392 | 1481 | ||
| 1393 | if ((apic_id > 0) && (irq > 16)) | 1482 | if ((ioapic_idx > 0) && (irq > 16)) |
| 1394 | continue; | 1483 | continue; |
| 1395 | 1484 | ||
| 1396 | /* | 1485 | /* |
| @@ -1398,10 +1487,10 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id) | |||
| 1398 | * installed and if it returns 1: | 1487 | * installed and if it returns 1: |
| 1399 | */ | 1488 | */ |
| 1400 | if (apic->multi_timer_check && | 1489 | if (apic->multi_timer_check && |
| 1401 | apic->multi_timer_check(apic_id, irq)) | 1490 | apic->multi_timer_check(ioapic_idx, irq)) |
| 1402 | continue; | 1491 | continue; |
| 1403 | 1492 | ||
| 1404 | set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), | 1493 | set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), |
| 1405 | irq_polarity(idx)); | 1494 | irq_polarity(idx)); |
| 1406 | 1495 | ||
| 1407 | io_apic_setup_irq_pin(irq, node, &attr); | 1496 | io_apic_setup_irq_pin(irq, node, &attr); |
| @@ -1410,12 +1499,12 @@ static void __init __io_apic_setup_irqs(unsigned int apic_id) | |||
| 1410 | 1499 | ||
| 1411 | static void __init setup_IO_APIC_irqs(void) | 1500 | static void __init setup_IO_APIC_irqs(void) |
| 1412 | { | 1501 | { |
| 1413 | unsigned int apic_id; | 1502 | unsigned int ioapic_idx; |
| 1414 | 1503 | ||
| 1415 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1504 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
| 1416 | 1505 | ||
| 1417 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) | 1506 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) |
| 1418 | __io_apic_setup_irqs(apic_id); | 1507 | __io_apic_setup_irqs(ioapic_idx); |
| 1419 | } | 1508 | } |
| 1420 | 1509 | ||
| 1421 | /* | 1510 | /* |
| @@ -1425,28 +1514,28 @@ static void __init setup_IO_APIC_irqs(void) | |||
| 1425 | */ | 1514 | */ |
| 1426 | void setup_IO_APIC_irq_extra(u32 gsi) | 1515 | void setup_IO_APIC_irq_extra(u32 gsi) |
| 1427 | { | 1516 | { |
| 1428 | int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); | 1517 | int ioapic_idx = 0, pin, idx, irq, node = cpu_to_node(0); |
| 1429 | struct io_apic_irq_attr attr; | 1518 | struct io_apic_irq_attr attr; |
| 1430 | 1519 | ||
| 1431 | /* | 1520 | /* |
| 1432 | * Convert 'gsi' to 'ioapic.pin'. | 1521 | * Convert 'gsi' to 'ioapic.pin'. |
| 1433 | */ | 1522 | */ |
| 1434 | apic_id = mp_find_ioapic(gsi); | 1523 | ioapic_idx = mp_find_ioapic(gsi); |
| 1435 | if (apic_id < 0) | 1524 | if (ioapic_idx < 0) |
| 1436 | return; | 1525 | return; |
| 1437 | 1526 | ||
| 1438 | pin = mp_find_ioapic_pin(apic_id, gsi); | 1527 | pin = mp_find_ioapic_pin(ioapic_idx, gsi); |
| 1439 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1528 | idx = find_irq_entry(ioapic_idx, pin, mp_INT); |
| 1440 | if (idx == -1) | 1529 | if (idx == -1) |
| 1441 | return; | 1530 | return; |
| 1442 | 1531 | ||
| 1443 | irq = pin_2_irq(idx, apic_id, pin); | 1532 | irq = pin_2_irq(idx, ioapic_idx, pin); |
| 1444 | 1533 | ||
| 1445 | /* Only handle the non legacy irqs on secondary ioapics */ | 1534 | /* Only handle the non legacy irqs on secondary ioapics */ |
| 1446 | if (apic_id == 0 || irq < NR_IRQS_LEGACY) | 1535 | if (ioapic_idx == 0 || irq < NR_IRQS_LEGACY) |
| 1447 | return; | 1536 | return; |
| 1448 | 1537 | ||
| 1449 | set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), | 1538 | set_io_apic_irq_attr(&attr, ioapic_idx, pin, irq_trigger(idx), |
| 1450 | irq_polarity(idx)); | 1539 | irq_polarity(idx)); |
| 1451 | 1540 | ||
| 1452 | io_apic_setup_irq_pin_once(irq, node, &attr); | 1541 | io_apic_setup_irq_pin_once(irq, node, &attr); |
| @@ -1455,8 +1544,8 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
| 1455 | /* | 1544 | /* |
| 1456 | * Set up the timer pin, possibly with the 8259A-master behind. | 1545 | * Set up the timer pin, possibly with the 8259A-master behind. |
| 1457 | */ | 1546 | */ |
| 1458 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | 1547 | static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, |
| 1459 | int vector) | 1548 | unsigned int pin, int vector) |
| 1460 | { | 1549 | { |
| 1461 | struct IO_APIC_route_entry entry; | 1550 | struct IO_APIC_route_entry entry; |
| 1462 | 1551 | ||
| @@ -1487,45 +1576,29 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | |||
| 1487 | /* | 1576 | /* |
| 1488 | * Add it to the IO-APIC irq-routing table: | 1577 | * Add it to the IO-APIC irq-routing table: |
| 1489 | */ | 1578 | */ |
| 1490 | ioapic_write_entry(apic_id, pin, entry); | 1579 | ioapic_write_entry(ioapic_idx, pin, entry); |
| 1491 | } | 1580 | } |
| 1492 | 1581 | ||
| 1493 | 1582 | __apicdebuginit(void) print_IO_APIC(int ioapic_idx) | |
| 1494 | __apicdebuginit(void) print_IO_APIC(void) | ||
| 1495 | { | 1583 | { |
| 1496 | int apic, i; | 1584 | int i; |
| 1497 | union IO_APIC_reg_00 reg_00; | 1585 | union IO_APIC_reg_00 reg_00; |
| 1498 | union IO_APIC_reg_01 reg_01; | 1586 | union IO_APIC_reg_01 reg_01; |
| 1499 | union IO_APIC_reg_02 reg_02; | 1587 | union IO_APIC_reg_02 reg_02; |
| 1500 | union IO_APIC_reg_03 reg_03; | 1588 | union IO_APIC_reg_03 reg_03; |
| 1501 | unsigned long flags; | 1589 | unsigned long flags; |
| 1502 | struct irq_cfg *cfg; | ||
| 1503 | unsigned int irq; | ||
| 1504 | |||
| 1505 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | ||
| 1506 | for (i = 0; i < nr_ioapics; i++) | ||
| 1507 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | ||
| 1508 | mpc_ioapic_id(i), ioapics[i].nr_registers); | ||
| 1509 | |||
| 1510 | /* | ||
| 1511 | * We are a bit conservative about what we expect. We have to | ||
| 1512 | * know about every hardware change ASAP. | ||
| 1513 | */ | ||
| 1514 | printk(KERN_INFO "testing the IO APIC.......................\n"); | ||
| 1515 | |||
| 1516 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
| 1517 | 1590 | ||
| 1518 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 1591 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 1519 | reg_00.raw = io_apic_read(apic, 0); | 1592 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
| 1520 | reg_01.raw = io_apic_read(apic, 1); | 1593 | reg_01.raw = io_apic_read(ioapic_idx, 1); |
| 1521 | if (reg_01.bits.version >= 0x10) | 1594 | if (reg_01.bits.version >= 0x10) |
| 1522 | reg_02.raw = io_apic_read(apic, 2); | 1595 | reg_02.raw = io_apic_read(ioapic_idx, 2); |
| 1523 | if (reg_01.bits.version >= 0x20) | 1596 | if (reg_01.bits.version >= 0x20) |
| 1524 | reg_03.raw = io_apic_read(apic, 3); | 1597 | reg_03.raw = io_apic_read(ioapic_idx, 3); |
| 1525 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 1598 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 1526 | 1599 | ||
| 1527 | printk("\n"); | 1600 | printk("\n"); |
| 1528 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic)); | 1601 | printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx)); |
| 1529 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 1602 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
| 1530 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1603 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
| 1531 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1604 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
| @@ -1575,7 +1648,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1575 | struct IO_APIC_route_entry entry; | 1648 | struct IO_APIC_route_entry entry; |
| 1576 | struct IR_IO_APIC_route_entry *ir_entry; | 1649 | struct IR_IO_APIC_route_entry *ir_entry; |
| 1577 | 1650 | ||
| 1578 | entry = ioapic_read_entry(apic, i); | 1651 | entry = ioapic_read_entry(ioapic_idx, i); |
| 1579 | ir_entry = (struct IR_IO_APIC_route_entry *) &entry; | 1652 | ir_entry = (struct IR_IO_APIC_route_entry *) &entry; |
| 1580 | printk(KERN_DEBUG " %02x %04X ", | 1653 | printk(KERN_DEBUG " %02x %04X ", |
| 1581 | i, | 1654 | i, |
| @@ -1596,7 +1669,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1596 | } else { | 1669 | } else { |
| 1597 | struct IO_APIC_route_entry entry; | 1670 | struct IO_APIC_route_entry entry; |
| 1598 | 1671 | ||
| 1599 | entry = ioapic_read_entry(apic, i); | 1672 | entry = ioapic_read_entry(ioapic_idx, i); |
| 1600 | printk(KERN_DEBUG " %02x %02X ", | 1673 | printk(KERN_DEBUG " %02x %02X ", |
| 1601 | i, | 1674 | i, |
| 1602 | entry.dest | 1675 | entry.dest |
| @@ -1614,12 +1687,38 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1614 | ); | 1687 | ); |
| 1615 | } | 1688 | } |
| 1616 | } | 1689 | } |
| 1617 | } | 1690 | } |
| 1691 | |||
| 1692 | __apicdebuginit(void) print_IO_APICs(void) | ||
| 1693 | { | ||
| 1694 | int ioapic_idx; | ||
| 1695 | struct irq_cfg *cfg; | ||
| 1696 | unsigned int irq; | ||
| 1697 | struct irq_chip *chip; | ||
| 1698 | |||
| 1699 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | ||
| 1700 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) | ||
| 1701 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | ||
| 1702 | mpc_ioapic_id(ioapic_idx), | ||
| 1703 | ioapics[ioapic_idx].nr_registers); | ||
| 1704 | |||
| 1705 | /* | ||
| 1706 | * We are a bit conservative about what we expect. We have to | ||
| 1707 | * know about every hardware change ASAP. | ||
| 1708 | */ | ||
| 1709 | printk(KERN_INFO "testing the IO APIC.......................\n"); | ||
| 1710 | |||
| 1711 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) | ||
| 1712 | print_IO_APIC(ioapic_idx); | ||
| 1618 | 1713 | ||
| 1619 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1714 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
| 1620 | for_each_active_irq(irq) { | 1715 | for_each_active_irq(irq) { |
| 1621 | struct irq_pin_list *entry; | 1716 | struct irq_pin_list *entry; |
| 1622 | 1717 | ||
| 1718 | chip = irq_get_chip(irq); | ||
| 1719 | if (chip != &ioapic_chip) | ||
| 1720 | continue; | ||
| 1721 | |||
| 1623 | cfg = irq_get_chip_data(irq); | 1722 | cfg = irq_get_chip_data(irq); |
| 1624 | if (!cfg) | 1723 | if (!cfg) |
| 1625 | continue; | 1724 | continue; |
| @@ -1633,8 +1732,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
| 1633 | } | 1732 | } |
| 1634 | 1733 | ||
| 1635 | printk(KERN_INFO ".................................... done.\n"); | 1734 | printk(KERN_INFO ".................................... done.\n"); |
| 1636 | |||
| 1637 | return; | ||
| 1638 | } | 1735 | } |
| 1639 | 1736 | ||
| 1640 | __apicdebuginit(void) print_APIC_field(int base) | 1737 | __apicdebuginit(void) print_APIC_field(int base) |
| @@ -1828,7 +1925,7 @@ __apicdebuginit(int) print_ICs(void) | |||
| 1828 | return 0; | 1925 | return 0; |
| 1829 | 1926 | ||
| 1830 | print_local_APICs(show_lapic); | 1927 | print_local_APICs(show_lapic); |
| 1831 | print_IO_APIC(); | 1928 | print_IO_APICs(); |
| 1832 | 1929 | ||
| 1833 | return 0; | 1930 | return 0; |
| 1834 | } | 1931 | } |
| @@ -1953,7 +2050,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
| 1953 | { | 2050 | { |
| 1954 | union IO_APIC_reg_00 reg_00; | 2051 | union IO_APIC_reg_00 reg_00; |
| 1955 | physid_mask_t phys_id_present_map; | 2052 | physid_mask_t phys_id_present_map; |
| 1956 | int apic_id; | 2053 | int ioapic_idx; |
| 1957 | int i; | 2054 | int i; |
| 1958 | unsigned char old_id; | 2055 | unsigned char old_id; |
| 1959 | unsigned long flags; | 2056 | unsigned long flags; |
| @@ -1967,21 +2064,20 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
| 1967 | /* | 2064 | /* |
| 1968 | * Set the IOAPIC ID to the value stored in the MPC table. | 2065 | * Set the IOAPIC ID to the value stored in the MPC table. |
| 1969 | */ | 2066 | */ |
| 1970 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { | 2067 | for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) { |
| 1971 | |||
| 1972 | /* Read the register 0 value */ | 2068 | /* Read the register 0 value */ |
| 1973 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2069 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 1974 | reg_00.raw = io_apic_read(apic_id, 0); | 2070 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
| 1975 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2071 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 1976 | 2072 | ||
| 1977 | old_id = mpc_ioapic_id(apic_id); | 2073 | old_id = mpc_ioapic_id(ioapic_idx); |
| 1978 | 2074 | ||
| 1979 | if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) { | 2075 | if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { |
| 1980 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", | 2076 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", |
| 1981 | apic_id, mpc_ioapic_id(apic_id)); | 2077 | ioapic_idx, mpc_ioapic_id(ioapic_idx)); |
| 1982 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2078 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
| 1983 | reg_00.bits.ID); | 2079 | reg_00.bits.ID); |
| 1984 | ioapics[apic_id].mp_config.apicid = reg_00.bits.ID; | 2080 | ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; |
| 1985 | } | 2081 | } |
| 1986 | 2082 | ||
| 1987 | /* | 2083 | /* |
| @@ -1990,9 +2086,9 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
| 1990 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2086 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
| 1991 | */ | 2087 | */ |
| 1992 | if (apic->check_apicid_used(&phys_id_present_map, | 2088 | if (apic->check_apicid_used(&phys_id_present_map, |
| 1993 | mpc_ioapic_id(apic_id))) { | 2089 | mpc_ioapic_id(ioapic_idx))) { |
| 1994 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2090 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
| 1995 | apic_id, mpc_ioapic_id(apic_id)); | 2091 | ioapic_idx, mpc_ioapic_id(ioapic_idx)); |
| 1996 | for (i = 0; i < get_physical_broadcast(); i++) | 2092 | for (i = 0; i < get_physical_broadcast(); i++) |
| 1997 | if (!physid_isset(i, phys_id_present_map)) | 2093 | if (!physid_isset(i, phys_id_present_map)) |
| 1998 | break; | 2094 | break; |
| @@ -2001,14 +2097,14 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
| 2001 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", | 2097 | printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", |
| 2002 | i); | 2098 | i); |
| 2003 | physid_set(i, phys_id_present_map); | 2099 | physid_set(i, phys_id_present_map); |
| 2004 | ioapics[apic_id].mp_config.apicid = i; | 2100 | ioapics[ioapic_idx].mp_config.apicid = i; |
| 2005 | } else { | 2101 | } else { |
| 2006 | physid_mask_t tmp; | 2102 | physid_mask_t tmp; |
| 2007 | apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id), | 2103 | apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx), |
| 2008 | &tmp); | 2104 | &tmp); |
| 2009 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2105 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
| 2010 | "phys_id_present_map\n", | 2106 | "phys_id_present_map\n", |
| 2011 | mpc_ioapic_id(apic_id)); | 2107 | mpc_ioapic_id(ioapic_idx)); |
| 2012 | physids_or(phys_id_present_map, phys_id_present_map, tmp); | 2108 | physids_or(phys_id_present_map, phys_id_present_map, tmp); |
| 2013 | } | 2109 | } |
| 2014 | 2110 | ||
| @@ -2016,35 +2112,35 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void) | |||
| 2016 | * We need to adjust the IRQ routing table | 2112 | * We need to adjust the IRQ routing table |
| 2017 | * if the ID changed. | 2113 | * if the ID changed. |
| 2018 | */ | 2114 | */ |
| 2019 | if (old_id != mpc_ioapic_id(apic_id)) | 2115 | if (old_id != mpc_ioapic_id(ioapic_idx)) |
| 2020 | for (i = 0; i < mp_irq_entries; i++) | 2116 | for (i = 0; i < mp_irq_entries; i++) |
| 2021 | if (mp_irqs[i].dstapic == old_id) | 2117 | if (mp_irqs[i].dstapic == old_id) |
| 2022 | mp_irqs[i].dstapic | 2118 | mp_irqs[i].dstapic |
| 2023 | = mpc_ioapic_id(apic_id); | 2119 | = mpc_ioapic_id(ioapic_idx); |
| 2024 | 2120 | ||
| 2025 | /* | 2121 | /* |
| 2026 | * Update the ID register according to the right value | 2122 | * Update the ID register according to the right value |
| 2027 | * from the MPC table if they are different. | 2123 | * from the MPC table if they are different. |
| 2028 | */ | 2124 | */ |
| 2029 | if (mpc_ioapic_id(apic_id) == reg_00.bits.ID) | 2125 | if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID) |
| 2030 | continue; | 2126 | continue; |
| 2031 | 2127 | ||
| 2032 | apic_printk(APIC_VERBOSE, KERN_INFO | 2128 | apic_printk(APIC_VERBOSE, KERN_INFO |
| 2033 | "...changing IO-APIC physical APIC ID to %d ...", | 2129 | "...changing IO-APIC physical APIC ID to %d ...", |
| 2034 | mpc_ioapic_id(apic_id)); | 2130 | mpc_ioapic_id(ioapic_idx)); |
| 2035 | 2131 | ||
| 2036 | reg_00.bits.ID = mpc_ioapic_id(apic_id); | 2132 | reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); |
| 2037 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2133 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2038 | io_apic_write(apic_id, 0, reg_00.raw); | 2134 | io_apic_write(ioapic_idx, 0, reg_00.raw); |
| 2039 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2135 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2040 | 2136 | ||
| 2041 | /* | 2137 | /* |
| 2042 | * Sanity check | 2138 | * Sanity check |
| 2043 | */ | 2139 | */ |
| 2044 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 2140 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2045 | reg_00.raw = io_apic_read(apic_id, 0); | 2141 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
| 2046 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 2142 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2047 | if (reg_00.bits.ID != mpc_ioapic_id(apic_id)) | 2143 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) |
| 2048 | printk("could not set ID!\n"); | 2144 | printk("could not set ID!\n"); |
| 2049 | else | 2145 | else |
| 2050 | apic_printk(APIC_VERBOSE, " ok.\n"); | 2146 | apic_printk(APIC_VERBOSE, " ok.\n"); |
| @@ -2255,7 +2351,7 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
| 2255 | return ret; | 2351 | return ret; |
| 2256 | } | 2352 | } |
| 2257 | 2353 | ||
| 2258 | #ifdef CONFIG_INTR_REMAP | 2354 | #ifdef CONFIG_IRQ_REMAP |
| 2259 | 2355 | ||
| 2260 | /* | 2356 | /* |
| 2261 | * Migrate the IO-APIC irq in the presence of intr-remapping. | 2357 | * Migrate the IO-APIC irq in the presence of intr-remapping. |
| @@ -2267,6 +2363,9 @@ ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
| 2267 | * updated vector information), by using a virtual vector (io-apic pin number). | 2363 | * updated vector information), by using a virtual vector (io-apic pin number). |
| 2268 | * Real vector that is used for interrupting cpu will be coming from | 2364 | * Real vector that is used for interrupting cpu will be coming from |
| 2269 | * the interrupt-remapping table entry. | 2365 | * the interrupt-remapping table entry. |
| 2366 | * | ||
| 2367 | * As the migration is a simple atomic update of IRTE, the same mechanism | ||
| 2368 | * is used to migrate MSI irq's in the presence of interrupt-remapping. | ||
| 2270 | */ | 2369 | */ |
| 2271 | static int | 2370 | static int |
| 2272 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | 2371 | ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| @@ -2291,10 +2390,16 @@ ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
| 2291 | irte.dest_id = IRTE_DEST(dest); | 2390 | irte.dest_id = IRTE_DEST(dest); |
| 2292 | 2391 | ||
| 2293 | /* | 2392 | /* |
| 2294 | * Modified the IRTE and flushes the Interrupt entry cache. | 2393 | * Atomically updates the IRTE with the new destination, vector |
| 2394 | * and flushes the interrupt entry cache. | ||
| 2295 | */ | 2395 | */ |
| 2296 | modify_irte(irq, &irte); | 2396 | modify_irte(irq, &irte); |
| 2297 | 2397 | ||
| 2398 | /* | ||
| 2399 | * After this point, all the interrupts will start arriving | ||
| 2400 | * at the new destination. So, time to cleanup the previous | ||
| 2401 | * vector allocation. | ||
| 2402 | */ | ||
| 2298 | if (cfg->move_in_progress) | 2403 | if (cfg->move_in_progress) |
| 2299 | send_cleanup_vector(cfg); | 2404 | send_cleanup_vector(cfg); |
| 2300 | 2405 | ||
| @@ -2407,48 +2512,6 @@ static void ack_apic_edge(struct irq_data *data) | |||
| 2407 | 2512 | ||
| 2408 | atomic_t irq_mis_count; | 2513 | atomic_t irq_mis_count; |
| 2409 | 2514 | ||
| 2410 | /* | ||
| 2411 | * IO-APIC versions below 0x20 don't support EOI register. | ||
| 2412 | * For the record, here is the information about various versions: | ||
| 2413 | * 0Xh 82489DX | ||
| 2414 | * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant | ||
| 2415 | * 2Xh I/O(x)APIC which is PCI 2.2 Compliant | ||
| 2416 | * 30h-FFh Reserved | ||
| 2417 | * | ||
| 2418 | * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic | ||
| 2419 | * version as 0x2. This is an error with documentation and these ICH chips | ||
| 2420 | * use io-apic's of version 0x20. | ||
| 2421 | * | ||
| 2422 | * For IO-APIC's with EOI register, we use that to do an explicit EOI. | ||
| 2423 | * Otherwise, we simulate the EOI message manually by changing the trigger | ||
| 2424 | * mode to edge and then back to level, with RTE being masked during this. | ||
| 2425 | */ | ||
| 2426 | static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
| 2427 | { | ||
| 2428 | struct irq_pin_list *entry; | ||
| 2429 | unsigned long flags; | ||
| 2430 | |||
| 2431 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
| 2432 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
| 2433 | if (mpc_ioapic_ver(entry->apic) >= 0x20) { | ||
| 2434 | /* | ||
| 2435 | * Intr-remapping uses pin number as the virtual vector | ||
| 2436 | * in the RTE. Actual vector is programmed in | ||
| 2437 | * intr-remapping table entry. Hence for the io-apic | ||
| 2438 | * EOI we use the pin number. | ||
| 2439 | */ | ||
| 2440 | if (irq_remapped(cfg)) | ||
| 2441 | io_apic_eoi(entry->apic, entry->pin); | ||
| 2442 | else | ||
| 2443 | io_apic_eoi(entry->apic, cfg->vector); | ||
| 2444 | } else { | ||
| 2445 | __mask_and_edge_IO_APIC_irq(entry); | ||
| 2446 | __unmask_and_level_IO_APIC_irq(entry); | ||
| 2447 | } | ||
| 2448 | } | ||
| 2449 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
| 2450 | } | ||
| 2451 | |||
| 2452 | static void ack_apic_level(struct irq_data *data) | 2515 | static void ack_apic_level(struct irq_data *data) |
| 2453 | { | 2516 | { |
| 2454 | struct irq_cfg *cfg = data->chip_data; | 2517 | struct irq_cfg *cfg = data->chip_data; |
| @@ -2552,7 +2615,7 @@ static void ack_apic_level(struct irq_data *data) | |||
| 2552 | } | 2615 | } |
| 2553 | } | 2616 | } |
| 2554 | 2617 | ||
| 2555 | #ifdef CONFIG_INTR_REMAP | 2618 | #ifdef CONFIG_IRQ_REMAP |
| 2556 | static void ir_ack_apic_edge(struct irq_data *data) | 2619 | static void ir_ack_apic_edge(struct irq_data *data) |
| 2557 | { | 2620 | { |
| 2558 | ack_APIC_irq(); | 2621 | ack_APIC_irq(); |
| @@ -2563,7 +2626,23 @@ static void ir_ack_apic_level(struct irq_data *data) | |||
| 2563 | ack_APIC_irq(); | 2626 | ack_APIC_irq(); |
| 2564 | eoi_ioapic_irq(data->irq, data->chip_data); | 2627 | eoi_ioapic_irq(data->irq, data->chip_data); |
| 2565 | } | 2628 | } |
| 2566 | #endif /* CONFIG_INTR_REMAP */ | 2629 | |
| 2630 | static void ir_print_prefix(struct irq_data *data, struct seq_file *p) | ||
| 2631 | { | ||
| 2632 | seq_printf(p, " IR-%s", data->chip->name); | ||
| 2633 | } | ||
| 2634 | |||
| 2635 | static void irq_remap_modify_chip_defaults(struct irq_chip *chip) | ||
| 2636 | { | ||
| 2637 | chip->irq_print_chip = ir_print_prefix; | ||
| 2638 | chip->irq_ack = ir_ack_apic_edge; | ||
| 2639 | chip->irq_eoi = ir_ack_apic_level; | ||
| 2640 | |||
| 2641 | #ifdef CONFIG_SMP | ||
| 2642 | chip->irq_set_affinity = ir_ioapic_set_affinity; | ||
| 2643 | #endif | ||
| 2644 | } | ||
| 2645 | #endif /* CONFIG_IRQ_REMAP */ | ||
| 2567 | 2646 | ||
| 2568 | static struct irq_chip ioapic_chip __read_mostly = { | 2647 | static struct irq_chip ioapic_chip __read_mostly = { |
| 2569 | .name = "IO-APIC", | 2648 | .name = "IO-APIC", |
| @@ -2578,21 +2657,6 @@ static struct irq_chip ioapic_chip __read_mostly = { | |||
| 2578 | .irq_retrigger = ioapic_retrigger_irq, | 2657 | .irq_retrigger = ioapic_retrigger_irq, |
| 2579 | }; | 2658 | }; |
| 2580 | 2659 | ||
| 2581 | static struct irq_chip ir_ioapic_chip __read_mostly = { | ||
| 2582 | .name = "IR-IO-APIC", | ||
| 2583 | .irq_startup = startup_ioapic_irq, | ||
| 2584 | .irq_mask = mask_ioapic_irq, | ||
| 2585 | .irq_unmask = unmask_ioapic_irq, | ||
| 2586 | #ifdef CONFIG_INTR_REMAP | ||
| 2587 | .irq_ack = ir_ack_apic_edge, | ||
| 2588 | .irq_eoi = ir_ack_apic_level, | ||
| 2589 | #ifdef CONFIG_SMP | ||
| 2590 | .irq_set_affinity = ir_ioapic_set_affinity, | ||
| 2591 | #endif | ||
| 2592 | #endif | ||
| 2593 | .irq_retrigger = ioapic_retrigger_irq, | ||
| 2594 | }; | ||
| 2595 | |||
| 2596 | static inline void init_IO_APIC_traps(void) | 2660 | static inline void init_IO_APIC_traps(void) |
| 2597 | { | 2661 | { |
| 2598 | struct irq_cfg *cfg; | 2662 | struct irq_cfg *cfg; |
| @@ -2944,27 +3008,26 @@ static int __init io_apic_bug_finalize(void) | |||
| 2944 | 3008 | ||
| 2945 | late_initcall(io_apic_bug_finalize); | 3009 | late_initcall(io_apic_bug_finalize); |
| 2946 | 3010 | ||
| 2947 | static void resume_ioapic_id(int ioapic_id) | 3011 | static void resume_ioapic_id(int ioapic_idx) |
| 2948 | { | 3012 | { |
| 2949 | unsigned long flags; | 3013 | unsigned long flags; |
| 2950 | union IO_APIC_reg_00 reg_00; | 3014 | union IO_APIC_reg_00 reg_00; |
| 2951 | 3015 | ||
| 2952 | |||
| 2953 | raw_spin_lock_irqsave(&ioapic_lock, flags); | 3016 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
| 2954 | reg_00.raw = io_apic_read(ioapic_id, 0); | 3017 | reg_00.raw = io_apic_read(ioapic_idx, 0); |
| 2955 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) { | 3018 | if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) { |
| 2956 | reg_00.bits.ID = mpc_ioapic_id(ioapic_id); | 3019 | reg_00.bits.ID = mpc_ioapic_id(ioapic_idx); |
| 2957 | io_apic_write(ioapic_id, 0, reg_00.raw); | 3020 | io_apic_write(ioapic_idx, 0, reg_00.raw); |
| 2958 | } | 3021 | } |
| 2959 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 3022 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
| 2960 | } | 3023 | } |
| 2961 | 3024 | ||
| 2962 | static void ioapic_resume(void) | 3025 | static void ioapic_resume(void) |
| 2963 | { | 3026 | { |
| 2964 | int ioapic_id; | 3027 | int ioapic_idx; |
| 2965 | 3028 | ||
| 2966 | for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--) | 3029 | for (ioapic_idx = nr_ioapics - 1; ioapic_idx >= 0; ioapic_idx--) |
| 2967 | resume_ioapic_id(ioapic_id); | 3030 | resume_ioapic_id(ioapic_idx); |
| 2968 | 3031 | ||
| 2969 | restore_ioapic_entries(); | 3032 | restore_ioapic_entries(); |
| 2970 | } | 3033 | } |
| @@ -3144,45 +3207,6 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) | |||
| 3144 | 3207 | ||
| 3145 | return 0; | 3208 | return 0; |
| 3146 | } | 3209 | } |
| 3147 | #ifdef CONFIG_INTR_REMAP | ||
| 3148 | /* | ||
| 3149 | * Migrate the MSI irq to another cpumask. This migration is | ||
| 3150 | * done in the process context using interrupt-remapping hardware. | ||
| 3151 | */ | ||
| 3152 | static int | ||
| 3153 | ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | ||
| 3154 | bool force) | ||
| 3155 | { | ||
| 3156 | struct irq_cfg *cfg = data->chip_data; | ||
| 3157 | unsigned int dest, irq = data->irq; | ||
| 3158 | struct irte irte; | ||
| 3159 | |||
| 3160 | if (get_irte(irq, &irte)) | ||
| 3161 | return -1; | ||
| 3162 | |||
| 3163 | if (__ioapic_set_affinity(data, mask, &dest)) | ||
| 3164 | return -1; | ||
| 3165 | |||
| 3166 | irte.vector = cfg->vector; | ||
| 3167 | irte.dest_id = IRTE_DEST(dest); | ||
| 3168 | |||
| 3169 | /* | ||
| 3170 | * atomically update the IRTE with the new destination and vector. | ||
| 3171 | */ | ||
| 3172 | modify_irte(irq, &irte); | ||
| 3173 | |||
| 3174 | /* | ||
| 3175 | * After this point, all the interrupts will start arriving | ||
| 3176 | * at the new destination. So, time to cleanup the previous | ||
| 3177 | * vector allocation. | ||
| 3178 | */ | ||
| 3179 | if (cfg->move_in_progress) | ||
| 3180 | send_cleanup_vector(cfg); | ||
| 3181 | |||
| 3182 | return 0; | ||
| 3183 | } | ||
| 3184 | |||
| 3185 | #endif | ||
| 3186 | #endif /* CONFIG_SMP */ | 3210 | #endif /* CONFIG_SMP */ |
| 3187 | 3211 | ||
| 3188 | /* | 3212 | /* |
| @@ -3200,19 +3224,6 @@ static struct irq_chip msi_chip = { | |||
| 3200 | .irq_retrigger = ioapic_retrigger_irq, | 3224 | .irq_retrigger = ioapic_retrigger_irq, |
| 3201 | }; | 3225 | }; |
| 3202 | 3226 | ||
| 3203 | static struct irq_chip msi_ir_chip = { | ||
| 3204 | .name = "IR-PCI-MSI", | ||
| 3205 | .irq_unmask = unmask_msi_irq, | ||
| 3206 | .irq_mask = mask_msi_irq, | ||
| 3207 | #ifdef CONFIG_INTR_REMAP | ||
| 3208 | .irq_ack = ir_ack_apic_edge, | ||
| 3209 | #ifdef CONFIG_SMP | ||
| 3210 | .irq_set_affinity = ir_msi_set_affinity, | ||
| 3211 | #endif | ||
| 3212 | #endif | ||
| 3213 | .irq_retrigger = ioapic_retrigger_irq, | ||
| 3214 | }; | ||
| 3215 | |||
| 3216 | /* | 3227 | /* |
| 3217 | * Map the PCI dev to the corresponding remapping hardware unit | 3228 | * Map the PCI dev to the corresponding remapping hardware unit |
| 3218 | * and allocate 'nvec' consecutive interrupt-remapping table entries | 3229 | * and allocate 'nvec' consecutive interrupt-remapping table entries |
| @@ -3255,7 +3266,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
| 3255 | 3266 | ||
| 3256 | if (irq_remapped(irq_get_chip_data(irq))) { | 3267 | if (irq_remapped(irq_get_chip_data(irq))) { |
| 3257 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3268 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3258 | chip = &msi_ir_chip; | 3269 | irq_remap_modify_chip_defaults(chip); |
| 3259 | } | 3270 | } |
| 3260 | 3271 | ||
| 3261 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | 3272 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); |
| @@ -3328,7 +3339,7 @@ void native_teardown_msi_irq(unsigned int irq) | |||
| 3328 | destroy_irq(irq); | 3339 | destroy_irq(irq); |
| 3329 | } | 3340 | } |
| 3330 | 3341 | ||
| 3331 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) | 3342 | #ifdef CONFIG_DMAR_TABLE |
| 3332 | #ifdef CONFIG_SMP | 3343 | #ifdef CONFIG_SMP |
| 3333 | static int | 3344 | static int |
| 3334 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, | 3345 | dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, |
| @@ -3409,19 +3420,6 @@ static int hpet_msi_set_affinity(struct irq_data *data, | |||
| 3409 | 3420 | ||
| 3410 | #endif /* CONFIG_SMP */ | 3421 | #endif /* CONFIG_SMP */ |
| 3411 | 3422 | ||
| 3412 | static struct irq_chip ir_hpet_msi_type = { | ||
| 3413 | .name = "IR-HPET_MSI", | ||
| 3414 | .irq_unmask = hpet_msi_unmask, | ||
| 3415 | .irq_mask = hpet_msi_mask, | ||
| 3416 | #ifdef CONFIG_INTR_REMAP | ||
| 3417 | .irq_ack = ir_ack_apic_edge, | ||
| 3418 | #ifdef CONFIG_SMP | ||
| 3419 | .irq_set_affinity = ir_msi_set_affinity, | ||
| 3420 | #endif | ||
| 3421 | #endif | ||
| 3422 | .irq_retrigger = ioapic_retrigger_irq, | ||
| 3423 | }; | ||
| 3424 | |||
| 3425 | static struct irq_chip hpet_msi_type = { | 3423 | static struct irq_chip hpet_msi_type = { |
| 3426 | .name = "HPET_MSI", | 3424 | .name = "HPET_MSI", |
| 3427 | .irq_unmask = hpet_msi_unmask, | 3425 | .irq_unmask = hpet_msi_unmask, |
| @@ -3458,7 +3456,7 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
| 3458 | hpet_msi_write(irq_get_handler_data(irq), &msg); | 3456 | hpet_msi_write(irq_get_handler_data(irq), &msg); |
| 3459 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3457 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
| 3460 | if (irq_remapped(irq_get_chip_data(irq))) | 3458 | if (irq_remapped(irq_get_chip_data(irq))) |
| 3461 | chip = &ir_hpet_msi_type; | 3459 | irq_remap_modify_chip_defaults(chip); |
| 3462 | 3460 | ||
| 3463 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | 3461 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); |
| 3464 | return 0; | 3462 | return 0; |
| @@ -3566,26 +3564,25 @@ io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | |||
| 3566 | return -EINVAL; | 3564 | return -EINVAL; |
| 3567 | ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); | 3565 | ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); |
| 3568 | if (!ret) | 3566 | if (!ret) |
| 3569 | setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg, | 3567 | setup_ioapic_irq(irq, cfg, attr); |
| 3570 | attr->trigger, attr->polarity); | ||
| 3571 | return ret; | 3568 | return ret; |
| 3572 | } | 3569 | } |
| 3573 | 3570 | ||
| 3574 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, | 3571 | int io_apic_setup_irq_pin_once(unsigned int irq, int node, |
| 3575 | struct io_apic_irq_attr *attr) | 3572 | struct io_apic_irq_attr *attr) |
| 3576 | { | 3573 | { |
| 3577 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; | 3574 | unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin; |
| 3578 | int ret; | 3575 | int ret; |
| 3579 | 3576 | ||
| 3580 | /* Avoid redundant programming */ | 3577 | /* Avoid redundant programming */ |
| 3581 | if (test_bit(pin, ioapics[id].pin_programmed)) { | 3578 | if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) { |
| 3582 | pr_debug("Pin %d-%d already programmed\n", | 3579 | pr_debug("Pin %d-%d already programmed\n", |
| 3583 | mpc_ioapic_id(id), pin); | 3580 | mpc_ioapic_id(ioapic_idx), pin); |
| 3584 | return 0; | 3581 | return 0; |
| 3585 | } | 3582 | } |
| 3586 | ret = io_apic_setup_irq_pin(irq, node, attr); | 3583 | ret = io_apic_setup_irq_pin(irq, node, attr); |
| 3587 | if (!ret) | 3584 | if (!ret) |
| 3588 | set_bit(pin, ioapics[id].pin_programmed); | 3585 | set_bit(pin, ioapics[ioapic_idx].pin_programmed); |
| 3589 | return ret; | 3586 | return ret; |
| 3590 | } | 3587 | } |
| 3591 | 3588 | ||
| @@ -3621,7 +3618,6 @@ int get_nr_irqs_gsi(void) | |||
| 3621 | return nr_irqs_gsi; | 3618 | return nr_irqs_gsi; |
| 3622 | } | 3619 | } |
| 3623 | 3620 | ||
| 3624 | #ifdef CONFIG_SPARSE_IRQ | ||
| 3625 | int __init arch_probe_nr_irqs(void) | 3621 | int __init arch_probe_nr_irqs(void) |
| 3626 | { | 3622 | { |
| 3627 | int nr; | 3623 | int nr; |
| @@ -3641,7 +3637,6 @@ int __init arch_probe_nr_irqs(void) | |||
| 3641 | 3637 | ||
| 3642 | return NR_IRQS_LEGACY; | 3638 | return NR_IRQS_LEGACY; |
| 3643 | } | 3639 | } |
| 3644 | #endif | ||
| 3645 | 3640 | ||
| 3646 | int io_apic_set_pci_routing(struct device *dev, int irq, | 3641 | int io_apic_set_pci_routing(struct device *dev, int irq, |
| 3647 | struct io_apic_irq_attr *irq_attr) | 3642 | struct io_apic_irq_attr *irq_attr) |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index b5254ad044ab..0787bb3412f4 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
| @@ -200,14 +200,8 @@ void __init default_setup_apic_routing(void) | |||
| 200 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | 200 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support |
| 201 | */ | 201 | */ |
| 202 | 202 | ||
| 203 | if (!cmdline_apic && apic == &apic_default) { | 203 | if (!cmdline_apic && apic == &apic_default) |
| 204 | struct apic *bigsmp = generic_bigsmp_probe(); | 204 | generic_bigsmp_probe(); |
| 205 | if (bigsmp) { | ||
| 206 | apic = bigsmp; | ||
| 207 | printk(KERN_INFO "Overriding APIC driver with %s\n", | ||
| 208 | apic->name); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | #endif | 205 | #endif |
| 212 | 206 | ||
| 213 | if (apic->setup_apic_routing) | 207 | if (apic->setup_apic_routing) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index adc66c3a1fef..62ae3001ae02 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
| @@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri | |||
| 207 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | | 207 | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | |
| 208 | APIC_DM_INIT; | 208 | APIC_DM_INIT; |
| 209 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 209 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
| 210 | mdelay(10); | ||
| 211 | 210 | ||
| 212 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 211 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
| 213 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 212 | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
| @@ -673,18 +672,11 @@ void __cpuinit uv_cpu_init(void) | |||
| 673 | /* | 672 | /* |
| 674 | * When NMI is received, print a stack trace. | 673 | * When NMI is received, print a stack trace. |
| 675 | */ | 674 | */ |
| 676 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | 675 | int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) |
| 677 | { | 676 | { |
| 678 | unsigned long real_uv_nmi; | 677 | unsigned long real_uv_nmi; |
| 679 | int bid; | 678 | int bid; |
| 680 | 679 | ||
| 681 | if (reason != DIE_NMIUNKNOWN) | ||
| 682 | return NOTIFY_OK; | ||
| 683 | |||
| 684 | if (in_crash_kexec) | ||
| 685 | /* do nothing if entering the crash kernel */ | ||
| 686 | return NOTIFY_OK; | ||
| 687 | |||
| 688 | /* | 680 | /* |
| 689 | * Each blade has an MMR that indicates when an NMI has been sent | 681 | * Each blade has an MMR that indicates when an NMI has been sent |
| 690 | * to cpus on the blade. If an NMI is detected, atomically | 682 | * to cpus on the blade. If an NMI is detected, atomically |
| @@ -705,7 +697,7 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
| 705 | } | 697 | } |
| 706 | 698 | ||
| 707 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) | 699 | if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count)) |
| 708 | return NOTIFY_DONE; | 700 | return NMI_DONE; |
| 709 | 701 | ||
| 710 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; | 702 | __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count; |
| 711 | 703 | ||
| @@ -718,17 +710,12 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | |||
| 718 | dump_stack(); | 710 | dump_stack(); |
| 719 | spin_unlock(&uv_nmi_lock); | 711 | spin_unlock(&uv_nmi_lock); |
| 720 | 712 | ||
| 721 | return NOTIFY_STOP; | 713 | return NMI_HANDLED; |
| 722 | } | 714 | } |
| 723 | 715 | ||
| 724 | static struct notifier_block uv_dump_stack_nmi_nb = { | ||
| 725 | .notifier_call = uv_handle_nmi, | ||
| 726 | .priority = NMI_LOCAL_LOW_PRIOR - 1, | ||
| 727 | }; | ||
| 728 | |||
| 729 | void uv_register_nmi_notifier(void) | 716 | void uv_register_nmi_notifier(void) |
| 730 | { | 717 | { |
| 731 | if (register_die_notifier(&uv_dump_stack_nmi_nb)) | 718 | if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) |
| 732 | printk(KERN_WARNING "UV NMI handler failed to register\n"); | 719 | printk(KERN_WARNING "UV NMI handler failed to register\n"); |
| 733 | } | 720 | } |
| 734 | 721 | ||
| @@ -833,6 +820,10 @@ void __init uv_system_init(void) | |||
| 833 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; | 820 | uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; |
| 834 | uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; | 821 | uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; |
| 835 | 822 | ||
| 823 | uv_cpu_hub_info(cpu)->m_shift = 64 - m_val; | ||
| 824 | uv_cpu_hub_info(cpu)->n_lshift = is_uv2_1_hub() ? | ||
| 825 | (m_val == 40 ? 40 : 39) : m_val; | ||
| 826 | |||
| 836 | pnode = uv_apicid_to_pnode(apicid); | 827 | pnode = uv_apicid_to_pnode(apicid); |
| 837 | blade = boot_pnode_to_blade(pnode); | 828 | blade = boot_pnode_to_blade(pnode); |
| 838 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 829 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
| @@ -863,8 +854,7 @@ void __init uv_system_init(void) | |||
| 863 | if (uv_node_to_blade[nid] >= 0) | 854 | if (uv_node_to_blade[nid] >= 0) |
| 864 | continue; | 855 | continue; |
| 865 | paddr = node_start_pfn(nid) << PAGE_SHIFT; | 856 | paddr = node_start_pfn(nid) << PAGE_SHIFT; |
| 866 | paddr = uv_soc_phys_ram_to_gpa(paddr); | 857 | pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); |
| 867 | pnode = (paddr >> m_val) & pnode_mask; | ||
| 868 | blade = boot_pnode_to_blade(pnode); | 858 | blade = boot_pnode_to_blade(pnode); |
| 869 | uv_node_to_blade[nid] = blade; | 859 | uv_node_to_blade[nid] = blade; |
| 870 | } | 860 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0371c484bb8a..a46bd383953c 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -249,8 +249,6 @@ extern int (*console_blank_hook)(int); | |||
| 249 | #define APM_MINOR_DEV 134 | 249 | #define APM_MINOR_DEV 134 |
| 250 | 250 | ||
| 251 | /* | 251 | /* |
| 252 | * See Documentation/Config.help for the configuration options. | ||
| 253 | * | ||
| 254 | * Various options can be changed at boot time as follows: | 252 | * Various options can be changed at boot time as follows: |
| 255 | * (We allow underscores for compatibility with the modules code) | 253 | * (We allow underscores for compatibility with the modules code) |
| 256 | * apm=on/off enable/disable APM | 254 | * apm=on/off enable/disable APM |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6042981d0309..25f24dccdcfa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -15,6 +15,7 @@ CFLAGS_common.o := $(nostackp) | |||
| 15 | obj-y := intel_cacheinfo.o scattered.o topology.o | 15 | obj-y := intel_cacheinfo.o scattered.o topology.o |
| 16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
| 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
| 18 | obj-y += rdrand.o | ||
| 18 | 19 | ||
| 19 | obj-$(CONFIG_X86_32) += bugs.o | 20 | obj-$(CONFIG_X86_32) += bugs.o |
| 20 | obj-$(CONFIG_X86_64) += bugs_64.o | 21 | obj-$(CONFIG_X86_64) += bugs_64.o |
| @@ -28,10 +29,15 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | |||
| 28 | 29 | ||
| 29 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 30 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
| 30 | 31 | ||
| 32 | ifdef CONFIG_PERF_EVENTS | ||
| 33 | obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o | ||
| 34 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | ||
| 35 | endif | ||
| 36 | |||
| 31 | obj-$(CONFIG_X86_MCE) += mcheck/ | 37 | obj-$(CONFIG_X86_MCE) += mcheck/ |
| 32 | obj-$(CONFIG_MTRR) += mtrr/ | 38 | obj-$(CONFIG_MTRR) += mtrr/ |
| 33 | 39 | ||
| 34 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 40 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o |
| 35 | 41 | ||
| 36 | quiet_cmd_mkcapflags = MKCAP $@ | 42 | quiet_cmd_mkcapflags = MKCAP $@ |
| 37 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 43 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b13ed393dfce..c7e46cb35327 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | #include <linux/export.h> | ||
| 1 | #include <linux/init.h> | 2 | #include <linux/init.h> |
| 2 | #include <linux/bitops.h> | 3 | #include <linux/bitops.h> |
| 4 | #include <linux/elf.h> | ||
| 3 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
| 4 | 6 | ||
| 5 | #include <linux/io.h> | 7 | #include <linux/io.h> |
| @@ -410,8 +412,38 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) | |||
| 410 | #endif | 412 | #endif |
| 411 | } | 413 | } |
| 412 | 414 | ||
| 415 | static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) | ||
| 416 | { | ||
| 417 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | ||
| 418 | |||
| 419 | if (c->x86 > 0x10 || | ||
| 420 | (c->x86 == 0x10 && c->x86_model >= 0x2)) { | ||
| 421 | u64 val; | ||
| 422 | |||
| 423 | rdmsrl(MSR_K7_HWCR, val); | ||
| 424 | if (!(val & BIT(24))) | ||
| 425 | printk(KERN_WARNING FW_BUG "TSC doesn't count " | ||
| 426 | "with P0 frequency!\n"); | ||
| 427 | } | ||
| 428 | } | ||
| 429 | |||
| 430 | if (c->x86 == 0x15) { | ||
| 431 | unsigned long upperbit; | ||
| 432 | u32 cpuid, assoc; | ||
| 433 | |||
| 434 | cpuid = cpuid_edx(0x80000005); | ||
| 435 | assoc = cpuid >> 16 & 0xff; | ||
| 436 | upperbit = ((cpuid >> 24) << 10) / assoc; | ||
| 437 | |||
| 438 | va_align.mask = (upperbit - 1) & PAGE_MASK; | ||
| 439 | va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; | ||
| 440 | } | ||
| 441 | } | ||
| 442 | |||
| 413 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 443 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
| 414 | { | 444 | { |
| 445 | u32 dummy; | ||
| 446 | |||
| 415 | early_init_amd_mc(c); | 447 | early_init_amd_mc(c); |
| 416 | 448 | ||
| 417 | /* | 449 | /* |
| @@ -442,22 +474,7 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
| 442 | } | 474 | } |
| 443 | #endif | 475 | #endif |
| 444 | 476 | ||
| 445 | /* We need to do the following only once */ | 477 | rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); |
| 446 | if (c != &boot_cpu_data) | ||
| 447 | return; | ||
| 448 | |||
| 449 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { | ||
| 450 | |||
| 451 | if (c->x86 > 0x10 || | ||
| 452 | (c->x86 == 0x10 && c->x86_model >= 0x2)) { | ||
| 453 | u64 val; | ||
| 454 | |||
| 455 | rdmsrl(MSR_K7_HWCR, val); | ||
| 456 | if (!(val & BIT(24))) | ||
| 457 | printk(KERN_WARNING FW_BUG "TSC doesn't count " | ||
| 458 | "with P0 frequency!\n"); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | } | 478 | } |
| 462 | 479 | ||
| 463 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 480 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
| @@ -679,6 +696,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { | |||
| 679 | .c_size_cache = amd_size_cache, | 696 | .c_size_cache = amd_size_cache, |
| 680 | #endif | 697 | #endif |
| 681 | .c_early_init = early_init_amd, | 698 | .c_early_init = early_init_amd, |
| 699 | .c_bsp_init = bsp_init_amd, | ||
| 682 | .c_init = init_amd, | 700 | .c_init = init_amd, |
| 683 | .c_x86_vendor = X86_VENDOR_AMD, | 701 | .c_x86_vendor = X86_VENDOR_AMD, |
| 684 | }; | 702 | }; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22a073d7fbff..aa003b13a831 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -15,13 +15,14 @@ | |||
| 15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
| 16 | #include <asm/perf_event.h> | 16 | #include <asm/perf_event.h> |
| 17 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
| 18 | #include <asm/archrandom.h> | ||
| 18 | #include <asm/hypervisor.h> | 19 | #include <asm/hypervisor.h> |
| 19 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
| 20 | #include <asm/sections.h> | 21 | #include <asm/sections.h> |
| 21 | #include <linux/topology.h> | 22 | #include <linux/topology.h> |
| 22 | #include <linux/cpumask.h> | 23 | #include <linux/cpumask.h> |
| 23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
| 24 | #include <asm/atomic.h> | 25 | #include <linux/atomic.h> |
| 25 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
| 26 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
| 27 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
| @@ -681,6 +682,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
| 681 | filter_cpuid_features(c, false); | 682 | filter_cpuid_features(c, false); |
| 682 | 683 | ||
| 683 | setup_smep(c); | 684 | setup_smep(c); |
| 685 | |||
| 686 | if (this_cpu->c_bsp_init) | ||
| 687 | this_cpu->c_bsp_init(c); | ||
| 684 | } | 688 | } |
| 685 | 689 | ||
| 686 | void __init early_cpu_init(void) | 690 | void __init early_cpu_init(void) |
| @@ -857,6 +861,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
| 857 | #endif | 861 | #endif |
| 858 | 862 | ||
| 859 | init_hypervisor(c); | 863 | init_hypervisor(c); |
| 864 | x86_init_rdrand(c); | ||
| 860 | 865 | ||
| 861 | /* | 866 | /* |
| 862 | * Clear/Set all flags overriden by options, need do it | 867 | * Clear/Set all flags overriden by options, need do it |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e765633f210e..1b22dcc51af4 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
| @@ -18,6 +18,7 @@ struct cpu_dev { | |||
| 18 | struct cpu_model_info c_models[4]; | 18 | struct cpu_model_info c_models[4]; |
| 19 | 19 | ||
| 20 | void (*c_early_init)(struct cpuinfo_x86 *); | 20 | void (*c_early_init)(struct cpuinfo_x86 *); |
| 21 | void (*c_bsp_init)(struct cpuinfo_x86 *); | ||
| 21 | void (*c_init)(struct cpuinfo_x86 *); | 22 | void (*c_init)(struct cpuinfo_x86 *); |
| 22 | void (*c_identify)(struct cpuinfo_x86 *); | 23 | void (*c_identify)(struct cpuinfo_x86 *); |
| 23 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); | 24 | unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ed6086eedf1d..523131213f08 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -47,6 +47,15 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
| 48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
| 49 | 49 | ||
| 50 | if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { | ||
| 51 | unsigned lower_word; | ||
| 52 | |||
| 53 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
| 54 | /* Required by the SDM */ | ||
| 55 | sync_core(); | ||
| 56 | rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); | ||
| 57 | } | ||
| 58 | |||
| 50 | /* | 59 | /* |
| 51 | * Atom erratum AAE44/AAF40/AAG38/AAH41: | 60 | * Atom erratum AAE44/AAF40/AAG38/AAH41: |
| 52 | * | 61 | * |
| @@ -55,17 +64,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
| 55 | * need the microcode to have already been loaded... so if it is | 64 | * need the microcode to have already been loaded... so if it is |
| 56 | * not, recommend a BIOS update and disable large pages. | 65 | * not, recommend a BIOS update and disable large pages. |
| 57 | */ | 66 | */ |
| 58 | if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { | 67 | if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && |
| 59 | u32 ucode, junk; | 68 | c->microcode < 0x20e) { |
| 60 | 69 | printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); | |
| 61 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 70 | clear_cpu_cap(c, X86_FEATURE_PSE); |
| 62 | sync_core(); | ||
| 63 | rdmsr(MSR_IA32_UCODE_REV, junk, ucode); | ||
| 64 | |||
| 65 | if (ucode < 0x20e) { | ||
| 66 | printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); | ||
| 67 | clear_cpu_cap(c, X86_FEATURE_PSE); | ||
| 68 | } | ||
| 69 | } | 71 | } |
| 70 | 72 | ||
| 71 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c105c533ed94..a3b0811693c9 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -151,28 +151,17 @@ union _cpuid4_leaf_ecx { | |||
| 151 | u32 full; | 151 | u32 full; |
| 152 | }; | 152 | }; |
| 153 | 153 | ||
| 154 | struct amd_l3_cache { | 154 | struct _cpuid4_info_regs { |
| 155 | struct amd_northbridge *nb; | ||
| 156 | unsigned indices; | ||
| 157 | u8 subcaches[4]; | ||
| 158 | }; | ||
| 159 | |||
| 160 | struct _cpuid4_info { | ||
| 161 | union _cpuid4_leaf_eax eax; | 155 | union _cpuid4_leaf_eax eax; |
| 162 | union _cpuid4_leaf_ebx ebx; | 156 | union _cpuid4_leaf_ebx ebx; |
| 163 | union _cpuid4_leaf_ecx ecx; | 157 | union _cpuid4_leaf_ecx ecx; |
| 164 | unsigned long size; | 158 | unsigned long size; |
| 165 | struct amd_l3_cache *l3; | 159 | struct amd_northbridge *nb; |
| 166 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | ||
| 167 | }; | 160 | }; |
| 168 | 161 | ||
| 169 | /* subset of above _cpuid4_info w/o shared_cpu_map */ | 162 | struct _cpuid4_info { |
| 170 | struct _cpuid4_info_regs { | 163 | struct _cpuid4_info_regs base; |
| 171 | union _cpuid4_leaf_eax eax; | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
| 172 | union _cpuid4_leaf_ebx ebx; | ||
| 173 | union _cpuid4_leaf_ecx ecx; | ||
| 174 | unsigned long size; | ||
| 175 | struct amd_l3_cache *l3; | ||
| 176 | }; | 165 | }; |
| 177 | 166 | ||
| 178 | unsigned short num_cache_leaves; | 167 | unsigned short num_cache_leaves; |
| @@ -314,16 +303,23 @@ struct _cache_attr { | |||
| 314 | /* | 303 | /* |
| 315 | * L3 cache descriptors | 304 | * L3 cache descriptors |
| 316 | */ | 305 | */ |
| 317 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | 306 | static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb) |
| 318 | { | 307 | { |
| 308 | struct amd_l3_cache *l3 = &nb->l3_cache; | ||
| 319 | unsigned int sc0, sc1, sc2, sc3; | 309 | unsigned int sc0, sc1, sc2, sc3; |
| 320 | u32 val = 0; | 310 | u32 val = 0; |
| 321 | 311 | ||
| 322 | pci_read_config_dword(l3->nb->misc, 0x1C4, &val); | 312 | pci_read_config_dword(nb->misc, 0x1C4, &val); |
| 323 | 313 | ||
| 324 | /* calculate subcache sizes */ | 314 | /* calculate subcache sizes */ |
| 325 | l3->subcaches[0] = sc0 = !(val & BIT(0)); | 315 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
| 326 | l3->subcaches[1] = sc1 = !(val & BIT(4)); | 316 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
| 317 | |||
| 318 | if (boot_cpu_data.x86 == 0x15) { | ||
| 319 | l3->subcaches[0] = sc0 += !(val & BIT(1)); | ||
| 320 | l3->subcaches[1] = sc1 += !(val & BIT(5)); | ||
| 321 | } | ||
| 322 | |||
| 327 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 323 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
| 328 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 324 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
| 329 | 325 | ||
| @@ -333,33 +329,16 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | |||
| 333 | static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, | 329 | static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, |
| 334 | int index) | 330 | int index) |
| 335 | { | 331 | { |
| 336 | static struct amd_l3_cache *__cpuinitdata l3_caches; | ||
| 337 | int node; | 332 | int node; |
| 338 | 333 | ||
| 339 | /* only for L3, and not in virtualized environments */ | 334 | /* only for L3, and not in virtualized environments */ |
| 340 | if (index < 3 || amd_nb_num() == 0) | 335 | if (index < 3) |
| 341 | return; | 336 | return; |
| 342 | 337 | ||
| 343 | /* | ||
| 344 | * Strictly speaking, the amount in @size below is leaked since it is | ||
| 345 | * never freed but this is done only on shutdown so it doesn't matter. | ||
| 346 | */ | ||
| 347 | if (!l3_caches) { | ||
| 348 | int size = amd_nb_num() * sizeof(struct amd_l3_cache); | ||
| 349 | |||
| 350 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
| 351 | if (!l3_caches) | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | |||
| 355 | node = amd_get_nb_id(smp_processor_id()); | 338 | node = amd_get_nb_id(smp_processor_id()); |
| 356 | 339 | this_leaf->nb = node_to_amd_nb(node); | |
| 357 | if (!l3_caches[node].nb) { | 340 | if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) |
| 358 | l3_caches[node].nb = node_to_amd_nb(node); | 341 | amd_calc_l3_indices(this_leaf->nb); |
| 359 | amd_calc_l3_indices(&l3_caches[node]); | ||
| 360 | } | ||
| 361 | |||
| 362 | this_leaf->l3 = &l3_caches[node]; | ||
| 363 | } | 342 | } |
| 364 | 343 | ||
| 365 | /* | 344 | /* |
| @@ -369,11 +348,11 @@ static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, | |||
| 369 | * | 348 | * |
| 370 | * @returns: the disabled index if used or negative value if slot free. | 349 | * @returns: the disabled index if used or negative value if slot free. |
| 371 | */ | 350 | */ |
| 372 | int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot) | 351 | int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) |
| 373 | { | 352 | { |
| 374 | unsigned int reg = 0; | 353 | unsigned int reg = 0; |
| 375 | 354 | ||
| 376 | pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, ®); | 355 | pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); |
| 377 | 356 | ||
| 378 | /* check whether this slot is activated already */ | 357 | /* check whether this slot is activated already */ |
| 379 | if (reg & (3UL << 30)) | 358 | if (reg & (3UL << 30)) |
| @@ -387,11 +366,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | |||
| 387 | { | 366 | { |
| 388 | int index; | 367 | int index; |
| 389 | 368 | ||
| 390 | if (!this_leaf->l3 || | 369 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) |
| 391 | !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
| 392 | return -EINVAL; | 370 | return -EINVAL; |
| 393 | 371 | ||
| 394 | index = amd_get_l3_disable_slot(this_leaf->l3, slot); | 372 | index = amd_get_l3_disable_slot(this_leaf->base.nb, slot); |
| 395 | if (index >= 0) | 373 | if (index >= 0) |
| 396 | return sprintf(buf, "%d\n", index); | 374 | return sprintf(buf, "%d\n", index); |
| 397 | 375 | ||
| @@ -408,7 +386,7 @@ show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \ | |||
| 408 | SHOW_CACHE_DISABLE(0) | 386 | SHOW_CACHE_DISABLE(0) |
| 409 | SHOW_CACHE_DISABLE(1) | 387 | SHOW_CACHE_DISABLE(1) |
| 410 | 388 | ||
| 411 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | 389 | static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, |
| 412 | unsigned slot, unsigned long idx) | 390 | unsigned slot, unsigned long idx) |
| 413 | { | 391 | { |
| 414 | int i; | 392 | int i; |
| @@ -421,10 +399,10 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
| 421 | for (i = 0; i < 4; i++) { | 399 | for (i = 0; i < 4; i++) { |
| 422 | u32 reg = idx | (i << 20); | 400 | u32 reg = idx | (i << 20); |
| 423 | 401 | ||
| 424 | if (!l3->subcaches[i]) | 402 | if (!nb->l3_cache.subcaches[i]) |
| 425 | continue; | 403 | continue; |
| 426 | 404 | ||
| 427 | pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); | 405 | pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); |
| 428 | 406 | ||
| 429 | /* | 407 | /* |
| 430 | * We need to WBINVD on a core on the node containing the L3 | 408 | * We need to WBINVD on a core on the node containing the L3 |
| @@ -434,7 +412,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
| 434 | wbinvd_on_cpu(cpu); | 412 | wbinvd_on_cpu(cpu); |
| 435 | 413 | ||
| 436 | reg |= BIT(31); | 414 | reg |= BIT(31); |
| 437 | pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg); | 415 | pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); |
| 438 | } | 416 | } |
| 439 | } | 417 | } |
| 440 | 418 | ||
| @@ -448,24 +426,24 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | |||
| 448 | * | 426 | * |
| 449 | * @return: 0 on success, error status on failure | 427 | * @return: 0 on success, error status on failure |
| 450 | */ | 428 | */ |
| 451 | int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot, | 429 | int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, |
| 452 | unsigned long index) | 430 | unsigned long index) |
| 453 | { | 431 | { |
| 454 | int ret = 0; | 432 | int ret = 0; |
| 455 | 433 | ||
| 456 | /* check if @slot is already used or the index is already disabled */ | 434 | /* check if @slot is already used or the index is already disabled */ |
| 457 | ret = amd_get_l3_disable_slot(l3, slot); | 435 | ret = amd_get_l3_disable_slot(nb, slot); |
| 458 | if (ret >= 0) | 436 | if (ret >= 0) |
| 459 | return -EINVAL; | 437 | return -EINVAL; |
| 460 | 438 | ||
| 461 | if (index > l3->indices) | 439 | if (index > nb->l3_cache.indices) |
| 462 | return -EINVAL; | 440 | return -EINVAL; |
| 463 | 441 | ||
| 464 | /* check whether the other slot has disabled the same index already */ | 442 | /* check whether the other slot has disabled the same index already */ |
| 465 | if (index == amd_get_l3_disable_slot(l3, !slot)) | 443 | if (index == amd_get_l3_disable_slot(nb, !slot)) |
| 466 | return -EINVAL; | 444 | return -EINVAL; |
| 467 | 445 | ||
| 468 | amd_l3_disable_index(l3, cpu, slot, index); | 446 | amd_l3_disable_index(nb, cpu, slot, index); |
| 469 | 447 | ||
| 470 | return 0; | 448 | return 0; |
| 471 | } | 449 | } |
| @@ -480,8 +458,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
| 480 | if (!capable(CAP_SYS_ADMIN)) | 458 | if (!capable(CAP_SYS_ADMIN)) |
| 481 | return -EPERM; | 459 | return -EPERM; |
| 482 | 460 | ||
| 483 | if (!this_leaf->l3 || | 461 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) |
| 484 | !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | ||
| 485 | return -EINVAL; | 462 | return -EINVAL; |
| 486 | 463 | ||
| 487 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 464 | cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
| @@ -489,7 +466,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
| 489 | if (strict_strtoul(buf, 10, &val) < 0) | 466 | if (strict_strtoul(buf, 10, &val) < 0) |
| 490 | return -EINVAL; | 467 | return -EINVAL; |
| 491 | 468 | ||
| 492 | err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val); | 469 | err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); |
| 493 | if (err) { | 470 | if (err) { |
| 494 | if (err == -EEXIST) | 471 | if (err == -EEXIST) |
| 495 | printk(KERN_WARNING "L3 disable slot %d in use!\n", | 472 | printk(KERN_WARNING "L3 disable slot %d in use!\n", |
| @@ -518,7 +495,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | |||
| 518 | static ssize_t | 495 | static ssize_t |
| 519 | show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) | 496 | show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) |
| 520 | { | 497 | { |
| 521 | if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | 498 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
| 522 | return -EINVAL; | 499 | return -EINVAL; |
| 523 | 500 | ||
| 524 | return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); | 501 | return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); |
| @@ -533,7 +510,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, | |||
| 533 | if (!capable(CAP_SYS_ADMIN)) | 510 | if (!capable(CAP_SYS_ADMIN)) |
| 534 | return -EPERM; | 511 | return -EPERM; |
| 535 | 512 | ||
| 536 | if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | 513 | if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) |
| 537 | return -EINVAL; | 514 | return -EINVAL; |
| 538 | 515 | ||
| 539 | if (strict_strtoul(buf, 16, &val) < 0) | 516 | if (strict_strtoul(buf, 16, &val) < 0) |
| @@ -769,7 +746,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
| 769 | return; | 746 | return; |
| 770 | } | 747 | } |
| 771 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 748 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
| 772 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 749 | num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; |
| 773 | 750 | ||
| 774 | if (num_threads_sharing == 1) | 751 | if (num_threads_sharing == 1) |
| 775 | cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); | 752 | cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); |
| @@ -820,29 +797,19 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
| 820 | for (i = 0; i < num_cache_leaves; i++) | 797 | for (i = 0; i < num_cache_leaves; i++) |
| 821 | cache_remove_shared_cpu_map(cpu, i); | 798 | cache_remove_shared_cpu_map(cpu, i); |
| 822 | 799 | ||
| 823 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
| 824 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 800 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
| 825 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 801 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
| 826 | } | 802 | } |
| 827 | 803 | ||
| 828 | static int | ||
| 829 | __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) | ||
| 830 | { | ||
| 831 | struct _cpuid4_info_regs *leaf_regs = | ||
| 832 | (struct _cpuid4_info_regs *)this_leaf; | ||
| 833 | |||
| 834 | return cpuid4_cache_lookup_regs(index, leaf_regs); | ||
| 835 | } | ||
| 836 | |||
| 837 | static void __cpuinit get_cpu_leaves(void *_retval) | 804 | static void __cpuinit get_cpu_leaves(void *_retval) |
| 838 | { | 805 | { |
| 839 | int j, *retval = _retval, cpu = smp_processor_id(); | 806 | int j, *retval = _retval, cpu = smp_processor_id(); |
| 840 | 807 | ||
| 841 | /* Do cpuid and store the results */ | 808 | /* Do cpuid and store the results */ |
| 842 | for (j = 0; j < num_cache_leaves; j++) { | 809 | for (j = 0; j < num_cache_leaves; j++) { |
| 843 | struct _cpuid4_info *this_leaf; | 810 | struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j); |
| 844 | this_leaf = CPUID4_INFO_IDX(cpu, j); | 811 | |
| 845 | *retval = cpuid4_cache_lookup(j, this_leaf); | 812 | *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base); |
| 846 | if (unlikely(*retval < 0)) { | 813 | if (unlikely(*retval < 0)) { |
| 847 | int i; | 814 | int i; |
| 848 | 815 | ||
| @@ -900,16 +867,16 @@ static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \ | |||
| 900 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | 867 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ |
| 901 | } | 868 | } |
| 902 | 869 | ||
| 903 | show_one_plus(level, eax.split.level, 0); | 870 | show_one_plus(level, base.eax.split.level, 0); |
| 904 | show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); | 871 | show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1); |
| 905 | show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); | 872 | show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1); |
| 906 | show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); | 873 | show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1); |
| 907 | show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); | 874 | show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1); |
| 908 | 875 | ||
| 909 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, | 876 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, |
| 910 | unsigned int cpu) | 877 | unsigned int cpu) |
| 911 | { | 878 | { |
| 912 | return sprintf(buf, "%luK\n", this_leaf->size / 1024); | 879 | return sprintf(buf, "%luK\n", this_leaf->base.size / 1024); |
| 913 | } | 880 | } |
| 914 | 881 | ||
| 915 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | 882 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, |
| @@ -946,7 +913,7 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf, | |||
| 946 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, | 913 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, |
| 947 | unsigned int cpu) | 914 | unsigned int cpu) |
| 948 | { | 915 | { |
| 949 | switch (this_leaf->eax.split.type) { | 916 | switch (this_leaf->base.eax.split.type) { |
| 950 | case CACHE_TYPE_DATA: | 917 | case CACHE_TYPE_DATA: |
| 951 | return sprintf(buf, "Data\n"); | 918 | return sprintf(buf, "Data\n"); |
| 952 | case CACHE_TYPE_INST: | 919 | case CACHE_TYPE_INST: |
| @@ -1135,7 +1102,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
| 1135 | 1102 | ||
| 1136 | ktype_cache.default_attrs = default_attrs; | 1103 | ktype_cache.default_attrs = default_attrs; |
| 1137 | #ifdef CONFIG_AMD_NB | 1104 | #ifdef CONFIG_AMD_NB |
| 1138 | if (this_leaf->l3) | 1105 | if (this_leaf->base.nb) |
| 1139 | ktype_cache.default_attrs = amd_l3_attrs(); | 1106 | ktype_cache.default_attrs = amd_l3_attrs(); |
| 1140 | #endif | 1107 | #endif |
| 1141 | retval = kobject_init_and_add(&(this_object->kobj), | 1108 | retval = kobject_init_and_add(&(this_object->kobj), |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 83930deec3c6..507ea58688e2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 29 | */ | 29 | */ |
| 30 | 30 | ||
| 31 | #include <linux/export.h> | ||
| 31 | #include <linux/kernel.h> | 32 | #include <linux/kernel.h> |
| 32 | #include <linux/acpi.h> | 33 | #include <linux/acpi.h> |
| 33 | #include <linux/cper.h> | 34 | #include <linux/cper.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 0ed633c5048b..319882ef848d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
| @@ -78,27 +78,20 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
| 78 | 78 | ||
| 79 | static cpumask_var_t mce_inject_cpumask; | 79 | static cpumask_var_t mce_inject_cpumask; |
| 80 | 80 | ||
| 81 | static int mce_raise_notify(struct notifier_block *self, | 81 | static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) |
| 82 | unsigned long val, void *data) | ||
| 83 | { | 82 | { |
| 84 | struct die_args *args = (struct die_args *)data; | ||
| 85 | int cpu = smp_processor_id(); | 83 | int cpu = smp_processor_id(); |
| 86 | struct mce *m = &__get_cpu_var(injectm); | 84 | struct mce *m = &__get_cpu_var(injectm); |
| 87 | if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) | 85 | if (!cpumask_test_cpu(cpu, mce_inject_cpumask)) |
| 88 | return NOTIFY_DONE; | 86 | return NMI_DONE; |
| 89 | cpumask_clear_cpu(cpu, mce_inject_cpumask); | 87 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
| 90 | if (m->inject_flags & MCJ_EXCEPTION) | 88 | if (m->inject_flags & MCJ_EXCEPTION) |
| 91 | raise_exception(m, args->regs); | 89 | raise_exception(m, regs); |
| 92 | else if (m->status) | 90 | else if (m->status) |
| 93 | raise_poll(m); | 91 | raise_poll(m); |
| 94 | return NOTIFY_STOP; | 92 | return NMI_HANDLED; |
| 95 | } | 93 | } |
| 96 | 94 | ||
| 97 | static struct notifier_block mce_raise_nb = { | ||
| 98 | .notifier_call = mce_raise_notify, | ||
| 99 | .priority = NMI_LOCAL_NORMAL_PRIOR, | ||
| 100 | }; | ||
| 101 | |||
| 102 | /* Inject mce on current CPU */ | 95 | /* Inject mce on current CPU */ |
| 103 | static int raise_local(void) | 96 | static int raise_local(void) |
| 104 | { | 97 | { |
| @@ -215,8 +208,9 @@ static int inject_init(void) | |||
| 215 | if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) | 208 | if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) |
| 216 | return -ENOMEM; | 209 | return -ENOMEM; |
| 217 | printk(KERN_INFO "Machine check injector initialized\n"); | 210 | printk(KERN_INFO "Machine check injector initialized\n"); |
| 218 | mce_chrdev_ops.write = mce_write; | 211 | register_mce_write_callback(mce_write); |
| 219 | register_die_notifier(&mce_raise_nb); | 212 | register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, |
| 213 | "mce_notify"); | ||
| 220 | return 0; | 214 | return 0; |
| 221 | } | 215 | } |
| 222 | 216 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 08363b042122..2af127d4c3d1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -36,8 +36,8 @@ | |||
| 36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
| 37 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
| 38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
| 39 | #include <linux/edac_mce.h> | ||
| 40 | #include <linux/irq_work.h> | 39 | #include <linux/irq_work.h> |
| 40 | #include <linux/export.h> | ||
| 41 | 41 | ||
| 42 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
| 43 | #include <asm/mce.h> | 43 | #include <asm/mce.h> |
| @@ -144,23 +144,20 @@ static struct mce_log mcelog = { | |||
| 144 | void mce_log(struct mce *mce) | 144 | void mce_log(struct mce *mce) |
| 145 | { | 145 | { |
| 146 | unsigned next, entry; | 146 | unsigned next, entry; |
| 147 | int ret = 0; | ||
| 147 | 148 | ||
| 148 | /* Emit the trace record: */ | 149 | /* Emit the trace record: */ |
| 149 | trace_mce_record(mce); | 150 | trace_mce_record(mce); |
| 150 | 151 | ||
| 152 | ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); | ||
| 153 | if (ret == NOTIFY_STOP) | ||
| 154 | return; | ||
| 155 | |||
| 151 | mce->finished = 0; | 156 | mce->finished = 0; |
| 152 | wmb(); | 157 | wmb(); |
| 153 | for (;;) { | 158 | for (;;) { |
| 154 | entry = rcu_dereference_check_mce(mcelog.next); | 159 | entry = rcu_dereference_check_mce(mcelog.next); |
| 155 | for (;;) { | 160 | for (;;) { |
| 156 | /* | ||
| 157 | * If edac_mce is enabled, it will check the error type | ||
| 158 | * and will process it, if it is a known error. | ||
| 159 | * Otherwise, the error will be sent through mcelog | ||
| 160 | * interface | ||
| 161 | */ | ||
| 162 | if (edac_mce_parse(mce)) | ||
| 163 | return; | ||
| 164 | 161 | ||
| 165 | /* | 162 | /* |
| 166 | * When the buffer fills up discard new entries. | 163 | * When the buffer fills up discard new entries. |
| @@ -217,8 +214,13 @@ static void print_mce(struct mce *m) | |||
| 217 | pr_cont("MISC %llx ", m->misc); | 214 | pr_cont("MISC %llx ", m->misc); |
| 218 | 215 | ||
| 219 | pr_cont("\n"); | 216 | pr_cont("\n"); |
| 220 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 217 | /* |
| 221 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); | 218 | * Note this output is parsed by external tools and old fields |
| 219 | * should not be changed. | ||
| 220 | */ | ||
| 221 | pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", | ||
| 222 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, | ||
| 223 | cpu_data(m->extcpu).microcode); | ||
| 222 | 224 | ||
| 223 | /* | 225 | /* |
| 224 | * Print out human-readable details about the MCE error, | 226 | * Print out human-readable details about the MCE error, |
| @@ -551,10 +553,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
| 551 | * Don't get the IP here because it's unlikely to | 553 | * Don't get the IP here because it's unlikely to |
| 552 | * have anything to do with the actual error location. | 554 | * have anything to do with the actual error location. |
| 553 | */ | 555 | */ |
| 554 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 556 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) |
| 555 | mce_log(&m); | 557 | mce_log(&m); |
| 556 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
| 557 | } | ||
| 558 | 558 | ||
| 559 | /* | 559 | /* |
| 560 | * Clear state for this bank. | 560 | * Clear state for this bank. |
| @@ -908,9 +908,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
| 908 | 908 | ||
| 909 | percpu_inc(mce_exception_count); | 909 | percpu_inc(mce_exception_count); |
| 910 | 910 | ||
| 911 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
| 912 | 18, SIGKILL) == NOTIFY_STOP) | ||
| 913 | goto out; | ||
| 914 | if (!banks) | 911 | if (!banks) |
| 915 | goto out; | 912 | goto out; |
| 916 | 913 | ||
| @@ -1140,6 +1137,15 @@ static void mce_start_timer(unsigned long data) | |||
| 1140 | add_timer_on(t, smp_processor_id()); | 1137 | add_timer_on(t, smp_processor_id()); |
| 1141 | } | 1138 | } |
| 1142 | 1139 | ||
| 1140 | /* Must not be called in IRQ context where del_timer_sync() can deadlock */ | ||
| 1141 | static void mce_timer_delete_all(void) | ||
| 1142 | { | ||
| 1143 | int cpu; | ||
| 1144 | |||
| 1145 | for_each_online_cpu(cpu) | ||
| 1146 | del_timer_sync(&per_cpu(mce_timer, cpu)); | ||
| 1147 | } | ||
| 1148 | |||
| 1143 | static void mce_do_trigger(struct work_struct *work) | 1149 | static void mce_do_trigger(struct work_struct *work) |
| 1144 | { | 1150 | { |
| 1145 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); | 1151 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); |
| @@ -1628,16 +1634,35 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, | |||
| 1628 | } | 1634 | } |
| 1629 | } | 1635 | } |
| 1630 | 1636 | ||
| 1631 | /* Modified in mce-inject.c, so not static or const */ | 1637 | static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf, |
| 1632 | struct file_operations mce_chrdev_ops = { | 1638 | size_t usize, loff_t *off); |
| 1639 | |||
| 1640 | void register_mce_write_callback(ssize_t (*fn)(struct file *filp, | ||
| 1641 | const char __user *ubuf, | ||
| 1642 | size_t usize, loff_t *off)) | ||
| 1643 | { | ||
| 1644 | mce_write = fn; | ||
| 1645 | } | ||
| 1646 | EXPORT_SYMBOL_GPL(register_mce_write_callback); | ||
| 1647 | |||
| 1648 | ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, | ||
| 1649 | size_t usize, loff_t *off) | ||
| 1650 | { | ||
| 1651 | if (mce_write) | ||
| 1652 | return mce_write(filp, ubuf, usize, off); | ||
| 1653 | else | ||
| 1654 | return -EINVAL; | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | static const struct file_operations mce_chrdev_ops = { | ||
| 1633 | .open = mce_chrdev_open, | 1658 | .open = mce_chrdev_open, |
| 1634 | .release = mce_chrdev_release, | 1659 | .release = mce_chrdev_release, |
| 1635 | .read = mce_chrdev_read, | 1660 | .read = mce_chrdev_read, |
| 1661 | .write = mce_chrdev_write, | ||
| 1636 | .poll = mce_chrdev_poll, | 1662 | .poll = mce_chrdev_poll, |
| 1637 | .unlocked_ioctl = mce_chrdev_ioctl, | 1663 | .unlocked_ioctl = mce_chrdev_ioctl, |
| 1638 | .llseek = no_llseek, | 1664 | .llseek = no_llseek, |
| 1639 | }; | 1665 | }; |
| 1640 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | ||
| 1641 | 1666 | ||
| 1642 | static struct miscdevice mce_chrdev_device = { | 1667 | static struct miscdevice mce_chrdev_device = { |
| 1643 | MISC_MCELOG_MINOR, | 1668 | MISC_MCELOG_MINOR, |
| @@ -1750,7 +1775,6 @@ static struct syscore_ops mce_syscore_ops = { | |||
| 1750 | 1775 | ||
| 1751 | static void mce_cpu_restart(void *data) | 1776 | static void mce_cpu_restart(void *data) |
| 1752 | { | 1777 | { |
| 1753 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
| 1754 | if (!mce_available(__this_cpu_ptr(&cpu_info))) | 1778 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
| 1755 | return; | 1779 | return; |
| 1756 | __mcheck_cpu_init_generic(); | 1780 | __mcheck_cpu_init_generic(); |
| @@ -1760,16 +1784,15 @@ static void mce_cpu_restart(void *data) | |||
| 1760 | /* Reinit MCEs after user configuration changes */ | 1784 | /* Reinit MCEs after user configuration changes */ |
| 1761 | static void mce_restart(void) | 1785 | static void mce_restart(void) |
| 1762 | { | 1786 | { |
| 1787 | mce_timer_delete_all(); | ||
| 1763 | on_each_cpu(mce_cpu_restart, NULL, 1); | 1788 | on_each_cpu(mce_cpu_restart, NULL, 1); |
| 1764 | } | 1789 | } |
| 1765 | 1790 | ||
| 1766 | /* Toggle features for corrected errors */ | 1791 | /* Toggle features for corrected errors */ |
| 1767 | static void mce_disable_ce(void *all) | 1792 | static void mce_disable_cmci(void *data) |
| 1768 | { | 1793 | { |
| 1769 | if (!mce_available(__this_cpu_ptr(&cpu_info))) | 1794 | if (!mce_available(__this_cpu_ptr(&cpu_info))) |
| 1770 | return; | 1795 | return; |
| 1771 | if (all) | ||
| 1772 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
| 1773 | cmci_clear(); | 1796 | cmci_clear(); |
| 1774 | } | 1797 | } |
| 1775 | 1798 | ||
| @@ -1852,7 +1875,8 @@ static ssize_t set_ignore_ce(struct sys_device *s, | |||
| 1852 | if (mce_ignore_ce ^ !!new) { | 1875 | if (mce_ignore_ce ^ !!new) { |
| 1853 | if (new) { | 1876 | if (new) { |
| 1854 | /* disable ce features */ | 1877 | /* disable ce features */ |
| 1855 | on_each_cpu(mce_disable_ce, (void *)1, 1); | 1878 | mce_timer_delete_all(); |
| 1879 | on_each_cpu(mce_disable_cmci, NULL, 1); | ||
| 1856 | mce_ignore_ce = 1; | 1880 | mce_ignore_ce = 1; |
| 1857 | } else { | 1881 | } else { |
| 1858 | /* enable ce features */ | 1882 | /* enable ce features */ |
| @@ -1875,7 +1899,7 @@ static ssize_t set_cmci_disabled(struct sys_device *s, | |||
| 1875 | if (mce_cmci_disabled ^ !!new) { | 1899 | if (mce_cmci_disabled ^ !!new) { |
| 1876 | if (new) { | 1900 | if (new) { |
| 1877 | /* disable cmci */ | 1901 | /* disable cmci */ |
| 1878 | on_each_cpu(mce_disable_ce, NULL, 1); | 1902 | on_each_cpu(mce_disable_cmci, NULL, 1); |
| 1879 | mce_cmci_disabled = 1; | 1903 | mce_cmci_disabled = 1; |
| 1880 | } else { | 1904 | } else { |
| 1881 | /* enable cmci */ | 1905 | /* enable cmci */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 8694ef56459d..38e49bc95ffc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
| @@ -28,7 +28,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | |||
| 28 | * cmci_discover_lock protects against parallel discovery attempts | 28 | * cmci_discover_lock protects against parallel discovery attempts |
| 29 | * which could race against each other. | 29 | * which could race against each other. |
| 30 | */ | 30 | */ |
| 31 | static DEFINE_SPINLOCK(cmci_discover_lock); | 31 | static DEFINE_RAW_SPINLOCK(cmci_discover_lock); |
| 32 | 32 | ||
| 33 | #define CMCI_THRESHOLD 1 | 33 | #define CMCI_THRESHOLD 1 |
| 34 | 34 | ||
| @@ -85,7 +85,7 @@ static void cmci_discover(int banks, int boot) | |||
| 85 | int hdr = 0; | 85 | int hdr = 0; |
| 86 | int i; | 86 | int i; |
| 87 | 87 | ||
| 88 | spin_lock_irqsave(&cmci_discover_lock, flags); | 88 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
| 89 | for (i = 0; i < banks; i++) { | 89 | for (i = 0; i < banks; i++) { |
| 90 | u64 val; | 90 | u64 val; |
| 91 | 91 | ||
| @@ -116,7 +116,7 @@ static void cmci_discover(int banks, int boot) | |||
| 116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | 116 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); |
| 117 | } | 117 | } |
| 118 | } | 118 | } |
| 119 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | 119 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
| 120 | if (hdr) | 120 | if (hdr) |
| 121 | printk(KERN_CONT "\n"); | 121 | printk(KERN_CONT "\n"); |
| 122 | } | 122 | } |
| @@ -150,7 +150,7 @@ void cmci_clear(void) | |||
| 150 | 150 | ||
| 151 | if (!cmci_supported(&banks)) | 151 | if (!cmci_supported(&banks)) |
| 152 | return; | 152 | return; |
| 153 | spin_lock_irqsave(&cmci_discover_lock, flags); | 153 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); |
| 154 | for (i = 0; i < banks; i++) { | 154 | for (i = 0; i < banks; i++) { |
| 155 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 155 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
| 156 | continue; | 156 | continue; |
| @@ -160,7 +160,7 @@ void cmci_clear(void) | |||
| 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); | 160 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
| 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 161 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
| 162 | } | 162 | } |
| 163 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | 163 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | /* | 166 | /* |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 27c625178bf1..787e06c84ea6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
| 19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
| 20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
| 21 | #include <linux/export.h> | ||
| 21 | #include <linux/sysdev.h> | 22 | #include <linux/sysdev.h> |
| 22 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 23 | #include <linux/init.h> | 24 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index d944bf6c50e9..0a630dd4b620 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | */ | 11 | */ |
| 12 | 12 | ||
| 13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
| 14 | #include <linux/time.h> | ||
| 15 | #include <linux/clocksource.h> | ||
| 14 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 15 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
| 16 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
| @@ -36,6 +38,25 @@ static bool __init ms_hyperv_platform(void) | |||
| 36 | !memcmp("Microsoft Hv", hyp_signature, 12); | 38 | !memcmp("Microsoft Hv", hyp_signature, 12); |
| 37 | } | 39 | } |
| 38 | 40 | ||
| 41 | static cycle_t read_hv_clock(struct clocksource *arg) | ||
| 42 | { | ||
| 43 | cycle_t current_tick; | ||
| 44 | /* | ||
| 45 | * Read the partition counter to get the current tick count. This count | ||
| 46 | * is set to 0 when the partition is created and is incremented in | ||
| 47 | * 100 nanosecond units. | ||
| 48 | */ | ||
| 49 | rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); | ||
| 50 | return current_tick; | ||
| 51 | } | ||
| 52 | |||
| 53 | static struct clocksource hyperv_cs = { | ||
| 54 | .name = "hyperv_clocksource", | ||
| 55 | .rating = 400, /* use this when running on Hyperv*/ | ||
| 56 | .read = read_hv_clock, | ||
| 57 | .mask = CLOCKSOURCE_MASK(64), | ||
| 58 | }; | ||
| 59 | |||
| 39 | static void __init ms_hyperv_init_platform(void) | 60 | static void __init ms_hyperv_init_platform(void) |
| 40 | { | 61 | { |
| 41 | /* | 62 | /* |
| @@ -46,6 +67,8 @@ static void __init ms_hyperv_init_platform(void) | |||
| 46 | 67 | ||
| 47 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | 68 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", |
| 48 | ms_hyperv.features, ms_hyperv.hints); | 69 | ms_hyperv.features, ms_hyperv.hints); |
| 70 | |||
| 71 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); | ||
| 49 | } | 72 | } |
| 50 | 73 | ||
| 51 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | 74 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 08119a37e53c..6b96110bb0c3 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
| @@ -149,7 +149,6 @@ struct set_mtrr_data { | |||
| 149 | */ | 149 | */ |
| 150 | static int mtrr_rendezvous_handler(void *info) | 150 | static int mtrr_rendezvous_handler(void *info) |
| 151 | { | 151 | { |
| 152 | #ifdef CONFIG_SMP | ||
| 153 | struct set_mtrr_data *data = info; | 152 | struct set_mtrr_data *data = info; |
| 154 | 153 | ||
| 155 | /* | 154 | /* |
| @@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info) | |||
| 171 | } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { | 170 | } else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) { |
| 172 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
| 173 | } | 172 | } |
| 174 | #endif | ||
| 175 | return 0; | 173 | return 0; |
| 176 | } | 174 | } |
| 177 | 175 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4ee3abf20ed6..640891014b2a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -32,6 +32,8 @@ | |||
| 32 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
| 33 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
| 34 | 34 | ||
| 35 | #include "perf_event.h" | ||
| 36 | |||
| 35 | #if 0 | 37 | #if 0 |
| 36 | #undef wrmsrl | 38 | #undef wrmsrl |
| 37 | #define wrmsrl(msr, val) \ | 39 | #define wrmsrl(msr, val) \ |
| @@ -43,283 +45,17 @@ do { \ | |||
| 43 | } while (0) | 45 | } while (0) |
| 44 | #endif | 46 | #endif |
| 45 | 47 | ||
| 46 | /* | 48 | struct x86_pmu x86_pmu __read_mostly; |
| 47 | * | NHM/WSM | SNB | | ||
| 48 | * register ------------------------------- | ||
| 49 | * | HT | no HT | HT | no HT | | ||
| 50 | *----------------------------------------- | ||
| 51 | * offcore | core | core | cpu | core | | ||
| 52 | * lbr_sel | core | core | cpu | core | | ||
| 53 | * ld_lat | cpu | core | cpu | core | | ||
| 54 | *----------------------------------------- | ||
| 55 | * | ||
| 56 | * Given that there is a small number of shared regs, | ||
| 57 | * we can pre-allocate their slot in the per-cpu | ||
| 58 | * per-core reg tables. | ||
| 59 | */ | ||
| 60 | enum extra_reg_type { | ||
| 61 | EXTRA_REG_NONE = -1, /* not used */ | ||
| 62 | |||
| 63 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | ||
| 64 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | ||
| 65 | |||
| 66 | EXTRA_REG_MAX /* number of entries needed */ | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct event_constraint { | ||
| 70 | union { | ||
| 71 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 72 | u64 idxmsk64; | ||
| 73 | }; | ||
| 74 | u64 code; | ||
| 75 | u64 cmask; | ||
| 76 | int weight; | ||
| 77 | }; | ||
| 78 | |||
| 79 | struct amd_nb { | ||
| 80 | int nb_id; /* NorthBridge id */ | ||
| 81 | int refcnt; /* reference count */ | ||
| 82 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
| 83 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
| 84 | }; | ||
| 85 | |||
| 86 | struct intel_percore; | ||
| 87 | |||
| 88 | #define MAX_LBR_ENTRIES 16 | ||
| 89 | |||
| 90 | struct cpu_hw_events { | ||
| 91 | /* | ||
| 92 | * Generic x86 PMC bits | ||
| 93 | */ | ||
| 94 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | ||
| 95 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 96 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 97 | int enabled; | ||
| 98 | |||
| 99 | int n_events; | ||
| 100 | int n_added; | ||
| 101 | int n_txn; | ||
| 102 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | ||
| 103 | u64 tags[X86_PMC_IDX_MAX]; | ||
| 104 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
| 105 | |||
| 106 | unsigned int group_flag; | ||
| 107 | |||
| 108 | /* | ||
| 109 | * Intel DebugStore bits | ||
| 110 | */ | ||
| 111 | struct debug_store *ds; | ||
| 112 | u64 pebs_enabled; | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Intel LBR bits | ||
| 116 | */ | ||
| 117 | int lbr_users; | ||
| 118 | void *lbr_context; | ||
| 119 | struct perf_branch_stack lbr_stack; | ||
| 120 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
| 121 | |||
| 122 | /* | ||
| 123 | * manage shared (per-core, per-cpu) registers | ||
| 124 | * used on Intel NHM/WSM/SNB | ||
| 125 | */ | ||
| 126 | struct intel_shared_regs *shared_regs; | ||
| 127 | |||
| 128 | /* | ||
| 129 | * AMD specific bits | ||
| 130 | */ | ||
| 131 | struct amd_nb *amd_nb; | ||
| 132 | }; | ||
| 133 | |||
| 134 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | ||
| 135 | { .idxmsk64 = (n) }, \ | ||
| 136 | .code = (c), \ | ||
| 137 | .cmask = (m), \ | ||
| 138 | .weight = (w), \ | ||
| 139 | } | ||
| 140 | |||
| 141 | #define EVENT_CONSTRAINT(c, n, m) \ | ||
| 142 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Constraint on the Event code. | ||
| 146 | */ | ||
| 147 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | ||
| 148 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) | ||
| 149 | |||
| 150 | /* | ||
| 151 | * Constraint on the Event code + UMask + fixed-mask | ||
| 152 | * | ||
| 153 | * filter mask to validate fixed counter events. | ||
| 154 | * the following filters disqualify for fixed counters: | ||
| 155 | * - inv | ||
| 156 | * - edge | ||
| 157 | * - cnt-mask | ||
| 158 | * The other filters are supported by fixed counters. | ||
| 159 | * The any-thread option is supported starting with v3. | ||
| 160 | */ | ||
| 161 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
| 162 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Constraint on the Event code + UMask | ||
| 166 | */ | ||
| 167 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ | ||
| 168 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
| 169 | 49 | ||
| 170 | #define EVENT_CONSTRAINT_END \ | 50 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
| 171 | EVENT_CONSTRAINT(0, 0, 0) | ||
| 172 | |||
| 173 | #define for_each_event_constraint(e, c) \ | ||
| 174 | for ((e) = (c); (e)->weight; (e)++) | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Per register state. | ||
| 178 | */ | ||
| 179 | struct er_account { | ||
| 180 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
| 181 | u64 config; /* extra MSR config */ | ||
| 182 | u64 reg; /* extra MSR number */ | ||
| 183 | atomic_t ref; /* reference count */ | ||
| 184 | }; | ||
| 185 | |||
| 186 | /* | ||
| 187 | * Extra registers for specific events. | ||
| 188 | * | ||
| 189 | * Some events need large masks and require external MSRs. | ||
| 190 | * Those extra MSRs end up being shared for all events on | ||
| 191 | * a PMU and sometimes between PMU of sibling HT threads. | ||
| 192 | * In either case, the kernel needs to handle conflicting | ||
| 193 | * accesses to those extra, shared, regs. The data structure | ||
| 194 | * to manage those registers is stored in cpu_hw_event. | ||
| 195 | */ | ||
| 196 | struct extra_reg { | ||
| 197 | unsigned int event; | ||
| 198 | unsigned int msr; | ||
| 199 | u64 config_mask; | ||
| 200 | u64 valid_mask; | ||
| 201 | int idx; /* per_xxx->regs[] reg index */ | ||
| 202 | }; | ||
| 203 | |||
| 204 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ | ||
| 205 | .event = (e), \ | ||
| 206 | .msr = (ms), \ | ||
| 207 | .config_mask = (m), \ | ||
| 208 | .valid_mask = (vm), \ | ||
| 209 | .idx = EXTRA_REG_##i \ | ||
| 210 | } | ||
| 211 | |||
| 212 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | ||
| 213 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) | ||
| 214 | |||
| 215 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
| 216 | |||
| 217 | union perf_capabilities { | ||
| 218 | struct { | ||
| 219 | u64 lbr_format : 6; | ||
| 220 | u64 pebs_trap : 1; | ||
| 221 | u64 pebs_arch_reg : 1; | ||
| 222 | u64 pebs_format : 4; | ||
| 223 | u64 smm_freeze : 1; | ||
| 224 | }; | ||
| 225 | u64 capabilities; | ||
| 226 | }; | ||
| 227 | |||
| 228 | /* | ||
| 229 | * struct x86_pmu - generic x86 pmu | ||
| 230 | */ | ||
| 231 | struct x86_pmu { | ||
| 232 | /* | ||
| 233 | * Generic x86 PMC bits | ||
| 234 | */ | ||
| 235 | const char *name; | ||
| 236 | int version; | ||
| 237 | int (*handle_irq)(struct pt_regs *); | ||
| 238 | void (*disable_all)(void); | ||
| 239 | void (*enable_all)(int added); | ||
| 240 | void (*enable)(struct perf_event *); | ||
| 241 | void (*disable)(struct perf_event *); | ||
| 242 | int (*hw_config)(struct perf_event *event); | ||
| 243 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
| 244 | unsigned eventsel; | ||
| 245 | unsigned perfctr; | ||
| 246 | u64 (*event_map)(int); | ||
| 247 | int max_events; | ||
| 248 | int num_counters; | ||
| 249 | int num_counters_fixed; | ||
| 250 | int cntval_bits; | ||
| 251 | u64 cntval_mask; | ||
| 252 | int apic; | ||
| 253 | u64 max_period; | ||
| 254 | struct event_constraint * | ||
| 255 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | ||
| 256 | struct perf_event *event); | ||
| 257 | |||
| 258 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | ||
| 259 | struct perf_event *event); | ||
| 260 | struct event_constraint *event_constraints; | ||
| 261 | void (*quirks)(void); | ||
| 262 | int perfctr_second_write; | ||
| 263 | |||
| 264 | int (*cpu_prepare)(int cpu); | ||
| 265 | void (*cpu_starting)(int cpu); | ||
| 266 | void (*cpu_dying)(int cpu); | ||
| 267 | void (*cpu_dead)(int cpu); | ||
| 268 | |||
| 269 | /* | ||
| 270 | * Intel Arch Perfmon v2+ | ||
| 271 | */ | ||
| 272 | u64 intel_ctrl; | ||
| 273 | union perf_capabilities intel_cap; | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Intel DebugStore bits | ||
| 277 | */ | ||
| 278 | int bts, pebs; | ||
| 279 | int bts_active, pebs_active; | ||
| 280 | int pebs_record_size; | ||
| 281 | void (*drain_pebs)(struct pt_regs *regs); | ||
| 282 | struct event_constraint *pebs_constraints; | ||
| 283 | |||
| 284 | /* | ||
| 285 | * Intel LBR | ||
| 286 | */ | ||
| 287 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
| 288 | int lbr_nr; /* hardware stack size */ | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Extra registers for events | ||
| 292 | */ | ||
| 293 | struct extra_reg *extra_regs; | ||
| 294 | unsigned int er_flags; | ||
| 295 | }; | ||
| 296 | |||
| 297 | #define ERF_NO_HT_SHARING 1 | ||
| 298 | #define ERF_HAS_RSP_1 2 | ||
| 299 | |||
| 300 | static struct x86_pmu x86_pmu __read_mostly; | ||
| 301 | |||
| 302 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | ||
| 303 | .enabled = 1, | 51 | .enabled = 1, |
| 304 | }; | 52 | }; |
| 305 | 53 | ||
| 306 | static int x86_perf_event_set_period(struct perf_event *event); | 54 | u64 __read_mostly hw_cache_event_ids |
| 307 | |||
| 308 | /* | ||
| 309 | * Generalized hw caching related hw_event table, filled | ||
| 310 | * in on a per model basis. A value of 0 means | ||
| 311 | * 'not supported', -1 means 'hw_event makes no sense on | ||
| 312 | * this CPU', any other value means the raw hw_event | ||
| 313 | * ID. | ||
| 314 | */ | ||
| 315 | |||
| 316 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 317 | |||
| 318 | static u64 __read_mostly hw_cache_event_ids | ||
| 319 | [PERF_COUNT_HW_CACHE_MAX] | 55 | [PERF_COUNT_HW_CACHE_MAX] |
| 320 | [PERF_COUNT_HW_CACHE_OP_MAX] | 56 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 321 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 57 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
| 322 | static u64 __read_mostly hw_cache_extra_regs | 58 | u64 __read_mostly hw_cache_extra_regs |
| 323 | [PERF_COUNT_HW_CACHE_MAX] | 59 | [PERF_COUNT_HW_CACHE_MAX] |
| 324 | [PERF_COUNT_HW_CACHE_OP_MAX] | 60 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 325 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 61 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
| @@ -329,8 +65,7 @@ static u64 __read_mostly hw_cache_extra_regs | |||
| 329 | * Can only be executed on the CPU where the event is active. | 65 | * Can only be executed on the CPU where the event is active. |
| 330 | * Returns the delta events processed. | 66 | * Returns the delta events processed. |
| 331 | */ | 67 | */ |
| 332 | static u64 | 68 | u64 x86_perf_event_update(struct perf_event *event) |
| 333 | x86_perf_event_update(struct perf_event *event) | ||
| 334 | { | 69 | { |
| 335 | struct hw_perf_event *hwc = &event->hw; | 70 | struct hw_perf_event *hwc = &event->hw; |
| 336 | int shift = 64 - x86_pmu.cntval_bits; | 71 | int shift = 64 - x86_pmu.cntval_bits; |
| @@ -373,30 +108,6 @@ again: | |||
| 373 | return new_raw_count; | 108 | return new_raw_count; |
| 374 | } | 109 | } |
| 375 | 110 | ||
| 376 | static inline int x86_pmu_addr_offset(int index) | ||
| 377 | { | ||
| 378 | int offset; | ||
| 379 | |||
| 380 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ | ||
| 381 | alternative_io(ASM_NOP2, | ||
| 382 | "shll $1, %%eax", | ||
| 383 | X86_FEATURE_PERFCTR_CORE, | ||
| 384 | "=a" (offset), | ||
| 385 | "a" (index)); | ||
| 386 | |||
| 387 | return offset; | ||
| 388 | } | ||
| 389 | |||
| 390 | static inline unsigned int x86_pmu_config_addr(int index) | ||
| 391 | { | ||
| 392 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | ||
| 393 | } | ||
| 394 | |||
| 395 | static inline unsigned int x86_pmu_event_addr(int index) | ||
| 396 | { | ||
| 397 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | ||
| 398 | } | ||
| 399 | |||
| 400 | /* | 111 | /* |
| 401 | * Find and validate any extra registers to set up. | 112 | * Find and validate any extra registers to set up. |
| 402 | */ | 113 | */ |
| @@ -532,9 +243,6 @@ msr_fail: | |||
| 532 | return false; | 243 | return false; |
| 533 | } | 244 | } |
| 534 | 245 | ||
| 535 | static void reserve_ds_buffers(void); | ||
| 536 | static void release_ds_buffers(void); | ||
| 537 | |||
| 538 | static void hw_perf_event_destroy(struct perf_event *event) | 246 | static void hw_perf_event_destroy(struct perf_event *event) |
| 539 | { | 247 | { |
| 540 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 248 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
| @@ -583,7 +291,7 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) | |||
| 583 | return x86_pmu_extra_regs(val, event); | 291 | return x86_pmu_extra_regs(val, event); |
| 584 | } | 292 | } |
| 585 | 293 | ||
| 586 | static int x86_setup_perfctr(struct perf_event *event) | 294 | int x86_setup_perfctr(struct perf_event *event) |
| 587 | { | 295 | { |
| 588 | struct perf_event_attr *attr = &event->attr; | 296 | struct perf_event_attr *attr = &event->attr; |
| 589 | struct hw_perf_event *hwc = &event->hw; | 297 | struct hw_perf_event *hwc = &event->hw; |
| @@ -647,7 +355,7 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
| 647 | return 0; | 355 | return 0; |
| 648 | } | 356 | } |
| 649 | 357 | ||
| 650 | static int x86_pmu_hw_config(struct perf_event *event) | 358 | int x86_pmu_hw_config(struct perf_event *event) |
| 651 | { | 359 | { |
| 652 | if (event->attr.precise_ip) { | 360 | if (event->attr.precise_ip) { |
| 653 | int precise = 0; | 361 | int precise = 0; |
| @@ -723,7 +431,7 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
| 723 | return x86_pmu.hw_config(event); | 431 | return x86_pmu.hw_config(event); |
| 724 | } | 432 | } |
| 725 | 433 | ||
| 726 | static void x86_pmu_disable_all(void) | 434 | void x86_pmu_disable_all(void) |
| 727 | { | 435 | { |
| 728 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 436 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 729 | int idx; | 437 | int idx; |
| @@ -758,15 +466,7 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
| 758 | x86_pmu.disable_all(); | 466 | x86_pmu.disable_all(); |
| 759 | } | 467 | } |
| 760 | 468 | ||
| 761 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 469 | void x86_pmu_enable_all(int added) |
| 762 | u64 enable_mask) | ||
| 763 | { | ||
| 764 | if (hwc->extra_reg.reg) | ||
| 765 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); | ||
| 766 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | ||
| 767 | } | ||
| 768 | |||
| 769 | static void x86_pmu_enable_all(int added) | ||
| 770 | { | 470 | { |
| 771 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 471 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 772 | int idx; | 472 | int idx; |
| @@ -788,7 +488,7 @@ static inline int is_x86_event(struct perf_event *event) | |||
| 788 | return event->pmu == &pmu; | 488 | return event->pmu == &pmu; |
| 789 | } | 489 | } |
| 790 | 490 | ||
| 791 | static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 491 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
| 792 | { | 492 | { |
| 793 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 493 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
| 794 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 494 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| @@ -959,7 +659,6 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, | |||
| 959 | } | 659 | } |
| 960 | 660 | ||
| 961 | static void x86_pmu_start(struct perf_event *event, int flags); | 661 | static void x86_pmu_start(struct perf_event *event, int flags); |
| 962 | static void x86_pmu_stop(struct perf_event *event, int flags); | ||
| 963 | 662 | ||
| 964 | static void x86_pmu_enable(struct pmu *pmu) | 663 | static void x86_pmu_enable(struct pmu *pmu) |
| 965 | { | 664 | { |
| @@ -1031,21 +730,13 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
| 1031 | x86_pmu.enable_all(added); | 730 | x86_pmu.enable_all(added); |
| 1032 | } | 731 | } |
| 1033 | 732 | ||
| 1034 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
| 1035 | { | ||
| 1036 | struct hw_perf_event *hwc = &event->hw; | ||
| 1037 | |||
| 1038 | wrmsrl(hwc->config_base, hwc->config); | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 733 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
| 1042 | 734 | ||
| 1043 | /* | 735 | /* |
| 1044 | * Set the next IRQ period, based on the hwc->period_left value. | 736 | * Set the next IRQ period, based on the hwc->period_left value. |
| 1045 | * To be called with the event disabled in hw: | 737 | * To be called with the event disabled in hw: |
| 1046 | */ | 738 | */ |
| 1047 | static int | 739 | int x86_perf_event_set_period(struct perf_event *event) |
| 1048 | x86_perf_event_set_period(struct perf_event *event) | ||
| 1049 | { | 740 | { |
| 1050 | struct hw_perf_event *hwc = &event->hw; | 741 | struct hw_perf_event *hwc = &event->hw; |
| 1051 | s64 left = local64_read(&hwc->period_left); | 742 | s64 left = local64_read(&hwc->period_left); |
| @@ -1105,7 +796,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
| 1105 | return ret; | 796 | return ret; |
| 1106 | } | 797 | } |
| 1107 | 798 | ||
| 1108 | static void x86_pmu_enable_event(struct perf_event *event) | 799 | void x86_pmu_enable_event(struct perf_event *event) |
| 1109 | { | 800 | { |
| 1110 | if (__this_cpu_read(cpu_hw_events.enabled)) | 801 | if (__this_cpu_read(cpu_hw_events.enabled)) |
| 1111 | __x86_pmu_enable_event(&event->hw, | 802 | __x86_pmu_enable_event(&event->hw, |
| @@ -1244,7 +935,7 @@ void perf_event_print_debug(void) | |||
| 1244 | local_irq_restore(flags); | 935 | local_irq_restore(flags); |
| 1245 | } | 936 | } |
| 1246 | 937 | ||
| 1247 | static void x86_pmu_stop(struct perf_event *event, int flags) | 938 | void x86_pmu_stop(struct perf_event *event, int flags) |
| 1248 | { | 939 | { |
| 1249 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 940 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 1250 | struct hw_perf_event *hwc = &event->hw; | 941 | struct hw_perf_event *hwc = &event->hw; |
| @@ -1297,7 +988,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
| 1297 | perf_event_update_userpage(event); | 988 | perf_event_update_userpage(event); |
| 1298 | } | 989 | } |
| 1299 | 990 | ||
| 1300 | static int x86_pmu_handle_irq(struct pt_regs *regs) | 991 | int x86_pmu_handle_irq(struct pt_regs *regs) |
| 1301 | { | 992 | { |
| 1302 | struct perf_sample_data data; | 993 | struct perf_sample_data data; |
| 1303 | struct cpu_hw_events *cpuc; | 994 | struct cpu_hw_events *cpuc; |
| @@ -1367,109 +1058,28 @@ void perf_events_lapic_init(void) | |||
| 1367 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1058 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1368 | } | 1059 | } |
| 1369 | 1060 | ||
| 1370 | struct pmu_nmi_state { | ||
| 1371 | unsigned int marked; | ||
| 1372 | int handled; | ||
| 1373 | }; | ||
| 1374 | |||
| 1375 | static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); | ||
| 1376 | |||
| 1377 | static int __kprobes | 1061 | static int __kprobes |
| 1378 | perf_event_nmi_handler(struct notifier_block *self, | 1062 | perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
| 1379 | unsigned long cmd, void *__args) | ||
| 1380 | { | 1063 | { |
| 1381 | struct die_args *args = __args; | ||
| 1382 | unsigned int this_nmi; | ||
| 1383 | int handled; | ||
| 1384 | |||
| 1385 | if (!atomic_read(&active_events)) | 1064 | if (!atomic_read(&active_events)) |
| 1386 | return NOTIFY_DONE; | 1065 | return NMI_DONE; |
| 1387 | |||
| 1388 | switch (cmd) { | ||
| 1389 | case DIE_NMI: | ||
| 1390 | break; | ||
| 1391 | case DIE_NMIUNKNOWN: | ||
| 1392 | this_nmi = percpu_read(irq_stat.__nmi_count); | ||
| 1393 | if (this_nmi != __this_cpu_read(pmu_nmi.marked)) | ||
| 1394 | /* let the kernel handle the unknown nmi */ | ||
| 1395 | return NOTIFY_DONE; | ||
| 1396 | /* | ||
| 1397 | * This one is a PMU back-to-back nmi. Two events | ||
| 1398 | * trigger 'simultaneously' raising two back-to-back | ||
| 1399 | * NMIs. If the first NMI handles both, the latter | ||
| 1400 | * will be empty and daze the CPU. So, we drop it to | ||
| 1401 | * avoid false-positive 'unknown nmi' messages. | ||
| 1402 | */ | ||
| 1403 | return NOTIFY_STOP; | ||
| 1404 | default: | ||
| 1405 | return NOTIFY_DONE; | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | handled = x86_pmu.handle_irq(args->regs); | ||
| 1409 | if (!handled) | ||
| 1410 | return NOTIFY_DONE; | ||
| 1411 | |||
| 1412 | this_nmi = percpu_read(irq_stat.__nmi_count); | ||
| 1413 | if ((handled > 1) || | ||
| 1414 | /* the next nmi could be a back-to-back nmi */ | ||
| 1415 | ((__this_cpu_read(pmu_nmi.marked) == this_nmi) && | ||
| 1416 | (__this_cpu_read(pmu_nmi.handled) > 1))) { | ||
| 1417 | /* | ||
| 1418 | * We could have two subsequent back-to-back nmis: The | ||
| 1419 | * first handles more than one counter, the 2nd | ||
| 1420 | * handles only one counter and the 3rd handles no | ||
| 1421 | * counter. | ||
| 1422 | * | ||
| 1423 | * This is the 2nd nmi because the previous was | ||
| 1424 | * handling more than one counter. We will mark the | ||
| 1425 | * next (3rd) and then drop it if unhandled. | ||
| 1426 | */ | ||
| 1427 | __this_cpu_write(pmu_nmi.marked, this_nmi + 1); | ||
| 1428 | __this_cpu_write(pmu_nmi.handled, handled); | ||
| 1429 | } | ||
| 1430 | 1066 | ||
| 1431 | return NOTIFY_STOP; | 1067 | return x86_pmu.handle_irq(regs); |
| 1432 | } | 1068 | } |
| 1433 | 1069 | ||
| 1434 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | 1070 | struct event_constraint emptyconstraint; |
| 1435 | .notifier_call = perf_event_nmi_handler, | 1071 | struct event_constraint unconstrained; |
| 1436 | .next = NULL, | ||
| 1437 | .priority = NMI_LOCAL_LOW_PRIOR, | ||
| 1438 | }; | ||
| 1439 | |||
| 1440 | static struct event_constraint unconstrained; | ||
| 1441 | static struct event_constraint emptyconstraint; | ||
| 1442 | |||
| 1443 | static struct event_constraint * | ||
| 1444 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
| 1445 | { | ||
| 1446 | struct event_constraint *c; | ||
| 1447 | |||
| 1448 | if (x86_pmu.event_constraints) { | ||
| 1449 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
| 1450 | if ((event->hw.config & c->cmask) == c->code) | ||
| 1451 | return c; | ||
| 1452 | } | ||
| 1453 | } | ||
| 1454 | |||
| 1455 | return &unconstrained; | ||
| 1456 | } | ||
| 1457 | |||
| 1458 | #include "perf_event_amd.c" | ||
| 1459 | #include "perf_event_p6.c" | ||
| 1460 | #include "perf_event_p4.c" | ||
| 1461 | #include "perf_event_intel_lbr.c" | ||
| 1462 | #include "perf_event_intel_ds.c" | ||
| 1463 | #include "perf_event_intel.c" | ||
| 1464 | 1072 | ||
| 1465 | static int __cpuinit | 1073 | static int __cpuinit |
| 1466 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | 1074 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) |
| 1467 | { | 1075 | { |
| 1468 | unsigned int cpu = (long)hcpu; | 1076 | unsigned int cpu = (long)hcpu; |
| 1077 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
| 1469 | int ret = NOTIFY_OK; | 1078 | int ret = NOTIFY_OK; |
| 1470 | 1079 | ||
| 1471 | switch (action & ~CPU_TASKS_FROZEN) { | 1080 | switch (action & ~CPU_TASKS_FROZEN) { |
| 1472 | case CPU_UP_PREPARE: | 1081 | case CPU_UP_PREPARE: |
| 1082 | cpuc->kfree_on_online = NULL; | ||
| 1473 | if (x86_pmu.cpu_prepare) | 1083 | if (x86_pmu.cpu_prepare) |
| 1474 | ret = x86_pmu.cpu_prepare(cpu); | 1084 | ret = x86_pmu.cpu_prepare(cpu); |
| 1475 | break; | 1085 | break; |
| @@ -1479,6 +1089,10 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
| 1479 | x86_pmu.cpu_starting(cpu); | 1089 | x86_pmu.cpu_starting(cpu); |
| 1480 | break; | 1090 | break; |
| 1481 | 1091 | ||
| 1092 | case CPU_ONLINE: | ||
| 1093 | kfree(cpuc->kfree_on_online); | ||
| 1094 | break; | ||
| 1095 | |||
| 1482 | case CPU_DYING: | 1096 | case CPU_DYING: |
| 1483 | if (x86_pmu.cpu_dying) | 1097 | if (x86_pmu.cpu_dying) |
| 1484 | x86_pmu.cpu_dying(cpu); | 1098 | x86_pmu.cpu_dying(cpu); |
| @@ -1557,7 +1171,7 @@ static int __init init_hw_perf_events(void) | |||
| 1557 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | 1171 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
| 1558 | 1172 | ||
| 1559 | perf_events_lapic_init(); | 1173 | perf_events_lapic_init(); |
| 1560 | register_die_notifier(&perf_event_nmi_notifier); | 1174 | register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); |
| 1561 | 1175 | ||
| 1562 | unconstrained = (struct event_constraint) | 1176 | unconstrained = (struct event_constraint) |
| 1563 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1177 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
| @@ -1900,6 +1514,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
| 1900 | 1514 | ||
| 1901 | perf_callchain_store(entry, regs->ip); | 1515 | perf_callchain_store(entry, regs->ip); |
| 1902 | 1516 | ||
| 1517 | if (!current->mm) | ||
| 1518 | return; | ||
| 1519 | |||
| 1903 | if (perf_callchain_user32(regs, entry)) | 1520 | if (perf_callchain_user32(regs, entry)) |
| 1904 | return; | 1521 | return; |
| 1905 | 1522 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h new file mode 100644 index 000000000000..b9698d40ac4b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.h | |||
| @@ -0,0 +1,505 @@ | |||
| 1 | /* | ||
| 2 | * Performance events x86 architecture header | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
| 5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
| 6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
| 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
| 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
| 10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
| 11 | * | ||
| 12 | * For licencing details see kernel-base/COPYING | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <linux/perf_event.h> | ||
| 16 | |||
| 17 | /* | ||
| 18 | * | NHM/WSM | SNB | | ||
| 19 | * register ------------------------------- | ||
| 20 | * | HT | no HT | HT | no HT | | ||
| 21 | *----------------------------------------- | ||
| 22 | * offcore | core | core | cpu | core | | ||
| 23 | * lbr_sel | core | core | cpu | core | | ||
| 24 | * ld_lat | cpu | core | cpu | core | | ||
| 25 | *----------------------------------------- | ||
| 26 | * | ||
| 27 | * Given that there is a small number of shared regs, | ||
| 28 | * we can pre-allocate their slot in the per-cpu | ||
| 29 | * per-core reg tables. | ||
| 30 | */ | ||
| 31 | enum extra_reg_type { | ||
| 32 | EXTRA_REG_NONE = -1, /* not used */ | ||
| 33 | |||
| 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | ||
| 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | ||
| 36 | |||
| 37 | EXTRA_REG_MAX /* number of entries needed */ | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct event_constraint { | ||
| 41 | union { | ||
| 42 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 43 | u64 idxmsk64; | ||
| 44 | }; | ||
| 45 | u64 code; | ||
| 46 | u64 cmask; | ||
| 47 | int weight; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct amd_nb { | ||
| 51 | int nb_id; /* NorthBridge id */ | ||
| 52 | int refcnt; /* reference count */ | ||
| 53 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
| 54 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
| 55 | }; | ||
| 56 | |||
| 57 | /* The maximal number of PEBS events: */ | ||
| 58 | #define MAX_PEBS_EVENTS 4 | ||
| 59 | |||
| 60 | /* | ||
| 61 | * A debug store configuration. | ||
| 62 | * | ||
| 63 | * We only support architectures that use 64bit fields. | ||
| 64 | */ | ||
| 65 | struct debug_store { | ||
| 66 | u64 bts_buffer_base; | ||
| 67 | u64 bts_index; | ||
| 68 | u64 bts_absolute_maximum; | ||
| 69 | u64 bts_interrupt_threshold; | ||
| 70 | u64 pebs_buffer_base; | ||
| 71 | u64 pebs_index; | ||
| 72 | u64 pebs_absolute_maximum; | ||
| 73 | u64 pebs_interrupt_threshold; | ||
| 74 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 75 | }; | ||
| 76 | |||
| 77 | /* | ||
| 78 | * Per register state. | ||
| 79 | */ | ||
| 80 | struct er_account { | ||
| 81 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
| 82 | u64 config; /* extra MSR config */ | ||
| 83 | u64 reg; /* extra MSR number */ | ||
| 84 | atomic_t ref; /* reference count */ | ||
| 85 | }; | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Per core/cpu state | ||
| 89 | * | ||
| 90 | * Used to coordinate shared registers between HT threads or | ||
| 91 | * among events on a single PMU. | ||
| 92 | */ | ||
| 93 | struct intel_shared_regs { | ||
| 94 | struct er_account regs[EXTRA_REG_MAX]; | ||
| 95 | int refcnt; /* per-core: #HT threads */ | ||
| 96 | unsigned core_id; /* per-core: core id */ | ||
| 97 | }; | ||
| 98 | |||
| 99 | #define MAX_LBR_ENTRIES 16 | ||
| 100 | |||
| 101 | struct cpu_hw_events { | ||
| 102 | /* | ||
| 103 | * Generic x86 PMC bits | ||
| 104 | */ | ||
| 105 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | ||
| 106 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 107 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
| 108 | int enabled; | ||
| 109 | |||
| 110 | int n_events; | ||
| 111 | int n_added; | ||
| 112 | int n_txn; | ||
| 113 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | ||
| 114 | u64 tags[X86_PMC_IDX_MAX]; | ||
| 115 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
| 116 | |||
| 117 | unsigned int group_flag; | ||
| 118 | |||
| 119 | /* | ||
| 120 | * Intel DebugStore bits | ||
| 121 | */ | ||
| 122 | struct debug_store *ds; | ||
| 123 | u64 pebs_enabled; | ||
| 124 | |||
| 125 | /* | ||
| 126 | * Intel LBR bits | ||
| 127 | */ | ||
| 128 | int lbr_users; | ||
| 129 | void *lbr_context; | ||
| 130 | struct perf_branch_stack lbr_stack; | ||
| 131 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
| 132 | |||
| 133 | /* | ||
| 134 | * Intel host/guest exclude bits | ||
| 135 | */ | ||
| 136 | u64 intel_ctrl_guest_mask; | ||
| 137 | u64 intel_ctrl_host_mask; | ||
| 138 | struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; | ||
| 139 | |||
| 140 | /* | ||
| 141 | * manage shared (per-core, per-cpu) registers | ||
| 142 | * used on Intel NHM/WSM/SNB | ||
| 143 | */ | ||
| 144 | struct intel_shared_regs *shared_regs; | ||
| 145 | |||
| 146 | /* | ||
| 147 | * AMD specific bits | ||
| 148 | */ | ||
| 149 | struct amd_nb *amd_nb; | ||
| 150 | |||
| 151 | void *kfree_on_online; | ||
| 152 | }; | ||
| 153 | |||
| 154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | ||
| 155 | { .idxmsk64 = (n) }, \ | ||
| 156 | .code = (c), \ | ||
| 157 | .cmask = (m), \ | ||
| 158 | .weight = (w), \ | ||
| 159 | } | ||
| 160 | |||
| 161 | #define EVENT_CONSTRAINT(c, n, m) \ | ||
| 162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Constraint on the Event code. | ||
| 166 | */ | ||
| 167 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | ||
| 168 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Constraint on the Event code + UMask + fixed-mask | ||
| 172 | * | ||
| 173 | * filter mask to validate fixed counter events. | ||
| 174 | * the following filters disqualify for fixed counters: | ||
| 175 | * - inv | ||
| 176 | * - edge | ||
| 177 | * - cnt-mask | ||
| 178 | * The other filters are supported by fixed counters. | ||
| 179 | * The any-thread option is supported starting with v3. | ||
| 180 | */ | ||
| 181 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
| 182 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Constraint on the Event code + UMask | ||
| 186 | */ | ||
| 187 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ | ||
| 188 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
| 189 | |||
| 190 | #define EVENT_CONSTRAINT_END \ | ||
| 191 | EVENT_CONSTRAINT(0, 0, 0) | ||
| 192 | |||
| 193 | #define for_each_event_constraint(e, c) \ | ||
| 194 | for ((e) = (c); (e)->weight; (e)++) | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Extra registers for specific events. | ||
| 198 | * | ||
| 199 | * Some events need large masks and require external MSRs. | ||
| 200 | * Those extra MSRs end up being shared for all events on | ||
| 201 | * a PMU and sometimes between PMU of sibling HT threads. | ||
| 202 | * In either case, the kernel needs to handle conflicting | ||
| 203 | * accesses to those extra, shared, regs. The data structure | ||
| 204 | * to manage those registers is stored in cpu_hw_event. | ||
| 205 | */ | ||
| 206 | struct extra_reg { | ||
| 207 | unsigned int event; | ||
| 208 | unsigned int msr; | ||
| 209 | u64 config_mask; | ||
| 210 | u64 valid_mask; | ||
| 211 | int idx; /* per_xxx->regs[] reg index */ | ||
| 212 | }; | ||
| 213 | |||
| 214 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ | ||
| 215 | .event = (e), \ | ||
| 216 | .msr = (ms), \ | ||
| 217 | .config_mask = (m), \ | ||
| 218 | .valid_mask = (vm), \ | ||
| 219 | .idx = EXTRA_REG_##i \ | ||
| 220 | } | ||
| 221 | |||
| 222 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | ||
| 223 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) | ||
| 224 | |||
| 225 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
| 226 | |||
| 227 | union perf_capabilities { | ||
| 228 | struct { | ||
| 229 | u64 lbr_format:6; | ||
| 230 | u64 pebs_trap:1; | ||
| 231 | u64 pebs_arch_reg:1; | ||
| 232 | u64 pebs_format:4; | ||
| 233 | u64 smm_freeze:1; | ||
| 234 | }; | ||
| 235 | u64 capabilities; | ||
| 236 | }; | ||
| 237 | |||
| 238 | /* | ||
| 239 | * struct x86_pmu - generic x86 pmu | ||
| 240 | */ | ||
| 241 | struct x86_pmu { | ||
| 242 | /* | ||
| 243 | * Generic x86 PMC bits | ||
| 244 | */ | ||
| 245 | const char *name; | ||
| 246 | int version; | ||
| 247 | int (*handle_irq)(struct pt_regs *); | ||
| 248 | void (*disable_all)(void); | ||
| 249 | void (*enable_all)(int added); | ||
| 250 | void (*enable)(struct perf_event *); | ||
| 251 | void (*disable)(struct perf_event *); | ||
| 252 | int (*hw_config)(struct perf_event *event); | ||
| 253 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
| 254 | unsigned eventsel; | ||
| 255 | unsigned perfctr; | ||
| 256 | u64 (*event_map)(int); | ||
| 257 | int max_events; | ||
| 258 | int num_counters; | ||
| 259 | int num_counters_fixed; | ||
| 260 | int cntval_bits; | ||
| 261 | u64 cntval_mask; | ||
| 262 | int apic; | ||
| 263 | u64 max_period; | ||
| 264 | struct event_constraint * | ||
| 265 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | ||
| 266 | struct perf_event *event); | ||
| 267 | |||
| 268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | ||
| 269 | struct perf_event *event); | ||
| 270 | struct event_constraint *event_constraints; | ||
| 271 | void (*quirks)(void); | ||
| 272 | int perfctr_second_write; | ||
| 273 | |||
| 274 | int (*cpu_prepare)(int cpu); | ||
| 275 | void (*cpu_starting)(int cpu); | ||
| 276 | void (*cpu_dying)(int cpu); | ||
| 277 | void (*cpu_dead)(int cpu); | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Intel Arch Perfmon v2+ | ||
| 281 | */ | ||
| 282 | u64 intel_ctrl; | ||
| 283 | union perf_capabilities intel_cap; | ||
| 284 | |||
| 285 | /* | ||
| 286 | * Intel DebugStore bits | ||
| 287 | */ | ||
| 288 | int bts, pebs; | ||
| 289 | int bts_active, pebs_active; | ||
| 290 | int pebs_record_size; | ||
| 291 | void (*drain_pebs)(struct pt_regs *regs); | ||
| 292 | struct event_constraint *pebs_constraints; | ||
| 293 | |||
| 294 | /* | ||
| 295 | * Intel LBR | ||
| 296 | */ | ||
| 297 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
| 298 | int lbr_nr; /* hardware stack size */ | ||
| 299 | |||
| 300 | /* | ||
| 301 | * Extra registers for events | ||
| 302 | */ | ||
| 303 | struct extra_reg *extra_regs; | ||
| 304 | unsigned int er_flags; | ||
| 305 | |||
| 306 | /* | ||
| 307 | * Intel host/guest support (KVM) | ||
| 308 | */ | ||
| 309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | ||
| 310 | }; | ||
| 311 | |||
| 312 | #define ERF_NO_HT_SHARING 1 | ||
| 313 | #define ERF_HAS_RSP_1 2 | ||
| 314 | |||
| 315 | extern struct x86_pmu x86_pmu __read_mostly; | ||
| 316 | |||
| 317 | DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
| 318 | |||
| 319 | int x86_perf_event_set_period(struct perf_event *event); | ||
| 320 | |||
| 321 | /* | ||
| 322 | * Generalized hw caching related hw_event table, filled | ||
| 323 | * in on a per model basis. A value of 0 means | ||
| 324 | * 'not supported', -1 means 'hw_event makes no sense on | ||
| 325 | * this CPU', any other value means the raw hw_event | ||
| 326 | * ID. | ||
| 327 | */ | ||
| 328 | |||
| 329 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
| 330 | |||
| 331 | extern u64 __read_mostly hw_cache_event_ids | ||
| 332 | [PERF_COUNT_HW_CACHE_MAX] | ||
| 333 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 334 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
| 335 | extern u64 __read_mostly hw_cache_extra_regs | ||
| 336 | [PERF_COUNT_HW_CACHE_MAX] | ||
| 337 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
| 338 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
| 339 | |||
| 340 | u64 x86_perf_event_update(struct perf_event *event); | ||
| 341 | |||
| 342 | static inline int x86_pmu_addr_offset(int index) | ||
| 343 | { | ||
| 344 | int offset; | ||
| 345 | |||
| 346 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ | ||
| 347 | alternative_io(ASM_NOP2, | ||
| 348 | "shll $1, %%eax", | ||
| 349 | X86_FEATURE_PERFCTR_CORE, | ||
| 350 | "=a" (offset), | ||
| 351 | "a" (index)); | ||
| 352 | |||
| 353 | return offset; | ||
| 354 | } | ||
| 355 | |||
| 356 | static inline unsigned int x86_pmu_config_addr(int index) | ||
| 357 | { | ||
| 358 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | ||
| 359 | } | ||
| 360 | |||
| 361 | static inline unsigned int x86_pmu_event_addr(int index) | ||
| 362 | { | ||
| 363 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | ||
| 364 | } | ||
| 365 | |||
| 366 | int x86_setup_perfctr(struct perf_event *event); | ||
| 367 | |||
| 368 | int x86_pmu_hw_config(struct perf_event *event); | ||
| 369 | |||
| 370 | void x86_pmu_disable_all(void); | ||
| 371 | |||
| 372 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
| 373 | u64 enable_mask) | ||
| 374 | { | ||
| 375 | if (hwc->extra_reg.reg) | ||
| 376 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); | ||
| 377 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | ||
| 378 | } | ||
| 379 | |||
| 380 | void x86_pmu_enable_all(int added); | ||
| 381 | |||
| 382 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | ||
| 383 | |||
| 384 | void x86_pmu_stop(struct perf_event *event, int flags); | ||
| 385 | |||
| 386 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
| 387 | { | ||
| 388 | struct hw_perf_event *hwc = &event->hw; | ||
| 389 | |||
| 390 | wrmsrl(hwc->config_base, hwc->config); | ||
| 391 | } | ||
| 392 | |||
| 393 | void x86_pmu_enable_event(struct perf_event *event); | ||
| 394 | |||
| 395 | int x86_pmu_handle_irq(struct pt_regs *regs); | ||
| 396 | |||
| 397 | extern struct event_constraint emptyconstraint; | ||
| 398 | |||
| 399 | extern struct event_constraint unconstrained; | ||
| 400 | |||
| 401 | #ifdef CONFIG_CPU_SUP_AMD | ||
| 402 | |||
| 403 | int amd_pmu_init(void); | ||
| 404 | |||
| 405 | #else /* CONFIG_CPU_SUP_AMD */ | ||
| 406 | |||
| 407 | static inline int amd_pmu_init(void) | ||
| 408 | { | ||
| 409 | return 0; | ||
| 410 | } | ||
| 411 | |||
| 412 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
| 413 | |||
| 414 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 415 | |||
| 416 | int intel_pmu_save_and_restart(struct perf_event *event); | ||
| 417 | |||
| 418 | struct event_constraint * | ||
| 419 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event); | ||
| 420 | |||
| 421 | struct intel_shared_regs *allocate_shared_regs(int cpu); | ||
| 422 | |||
| 423 | int intel_pmu_init(void); | ||
| 424 | |||
| 425 | void init_debug_store_on_cpu(int cpu); | ||
| 426 | |||
| 427 | void fini_debug_store_on_cpu(int cpu); | ||
| 428 | |||
| 429 | void release_ds_buffers(void); | ||
| 430 | |||
| 431 | void reserve_ds_buffers(void); | ||
| 432 | |||
| 433 | extern struct event_constraint bts_constraint; | ||
| 434 | |||
| 435 | void intel_pmu_enable_bts(u64 config); | ||
| 436 | |||
| 437 | void intel_pmu_disable_bts(void); | ||
| 438 | |||
| 439 | int intel_pmu_drain_bts_buffer(void); | ||
| 440 | |||
| 441 | extern struct event_constraint intel_core2_pebs_event_constraints[]; | ||
| 442 | |||
| 443 | extern struct event_constraint intel_atom_pebs_event_constraints[]; | ||
| 444 | |||
| 445 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; | ||
| 446 | |||
| 447 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; | ||
| 448 | |||
| 449 | extern struct event_constraint intel_snb_pebs_event_constraints[]; | ||
| 450 | |||
| 451 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); | ||
| 452 | |||
| 453 | void intel_pmu_pebs_enable(struct perf_event *event); | ||
| 454 | |||
| 455 | void intel_pmu_pebs_disable(struct perf_event *event); | ||
| 456 | |||
| 457 | void intel_pmu_pebs_enable_all(void); | ||
| 458 | |||
| 459 | void intel_pmu_pebs_disable_all(void); | ||
| 460 | |||
| 461 | void intel_ds_init(void); | ||
| 462 | |||
| 463 | void intel_pmu_lbr_reset(void); | ||
| 464 | |||
| 465 | void intel_pmu_lbr_enable(struct perf_event *event); | ||
| 466 | |||
| 467 | void intel_pmu_lbr_disable(struct perf_event *event); | ||
| 468 | |||
| 469 | void intel_pmu_lbr_enable_all(void); | ||
| 470 | |||
| 471 | void intel_pmu_lbr_disable_all(void); | ||
| 472 | |||
| 473 | void intel_pmu_lbr_read(void); | ||
| 474 | |||
| 475 | void intel_pmu_lbr_init_core(void); | ||
| 476 | |||
| 477 | void intel_pmu_lbr_init_nhm(void); | ||
| 478 | |||
| 479 | void intel_pmu_lbr_init_atom(void); | ||
| 480 | |||
| 481 | int p4_pmu_init(void); | ||
| 482 | |||
| 483 | int p6_pmu_init(void); | ||
| 484 | |||
| 485 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
| 486 | |||
| 487 | static inline void reserve_ds_buffers(void) | ||
| 488 | { | ||
| 489 | } | ||
| 490 | |||
| 491 | static inline void release_ds_buffers(void) | ||
| 492 | { | ||
| 493 | } | ||
| 494 | |||
| 495 | static inline int intel_pmu_init(void) | ||
| 496 | { | ||
| 497 | return 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | static inline struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
| 501 | { | ||
| 502 | return NULL; | ||
| 503 | } | ||
| 504 | |||
| 505 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 941caa2e449b..aeefd45697a2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
| @@ -1,4 +1,10 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_AMD | 1 | #include <linux/perf_event.h> |
| 2 | #include <linux/types.h> | ||
| 3 | #include <linux/init.h> | ||
| 4 | #include <linux/slab.h> | ||
| 5 | #include <asm/apicdef.h> | ||
| 6 | |||
| 7 | #include "perf_event.h" | ||
| 2 | 8 | ||
| 3 | static __initconst const u64 amd_hw_cache_event_ids | 9 | static __initconst const u64 amd_hw_cache_event_ids |
| 4 | [PERF_COUNT_HW_CACHE_MAX] | 10 | [PERF_COUNT_HW_CACHE_MAX] |
| @@ -132,6 +138,19 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
| 132 | if (ret) | 138 | if (ret) |
| 133 | return ret; | 139 | return ret; |
| 134 | 140 | ||
| 141 | if (event->attr.exclude_host && event->attr.exclude_guest) | ||
| 142 | /* | ||
| 143 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | ||
| 144 | * and will count in both modes. We don't want to count in that | ||
| 145 | * case so we emulate no-counting by setting US = OS = 0. | ||
| 146 | */ | ||
| 147 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | | ||
| 148 | ARCH_PERFMON_EVENTSEL_OS); | ||
| 149 | else if (event->attr.exclude_host) | ||
| 150 | event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; | ||
| 151 | else if (event->attr.exclude_guest) | ||
| 152 | event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; | ||
| 153 | |||
| 135 | if (event->attr.type != PERF_TYPE_RAW) | 154 | if (event->attr.type != PERF_TYPE_RAW) |
| 136 | return 0; | 155 | return 0; |
| 137 | 156 | ||
| @@ -350,7 +369,7 @@ static void amd_pmu_cpu_starting(int cpu) | |||
| 350 | continue; | 369 | continue; |
| 351 | 370 | ||
| 352 | if (nb->nb_id == nb_id) { | 371 | if (nb->nb_id == nb_id) { |
| 353 | kfree(cpuc->amd_nb); | 372 | cpuc->kfree_on_online = cpuc->amd_nb; |
| 354 | cpuc->amd_nb = nb; | 373 | cpuc->amd_nb = nb; |
| 355 | break; | 374 | break; |
| 356 | } | 375 | } |
| @@ -392,7 +411,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
| 392 | .perfctr = MSR_K7_PERFCTR0, | 411 | .perfctr = MSR_K7_PERFCTR0, |
| 393 | .event_map = amd_pmu_event_map, | 412 | .event_map = amd_pmu_event_map, |
| 394 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 413 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
| 395 | .num_counters = 4, | 414 | .num_counters = AMD64_NUM_COUNTERS, |
| 396 | .cntval_bits = 48, | 415 | .cntval_bits = 48, |
| 397 | .cntval_mask = (1ULL << 48) - 1, | 416 | .cntval_mask = (1ULL << 48) - 1, |
| 398 | .apic = 1, | 417 | .apic = 1, |
| @@ -556,7 +575,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { | |||
| 556 | .perfctr = MSR_F15H_PERF_CTR, | 575 | .perfctr = MSR_F15H_PERF_CTR, |
| 557 | .event_map = amd_pmu_event_map, | 576 | .event_map = amd_pmu_event_map, |
| 558 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 577 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
| 559 | .num_counters = 6, | 578 | .num_counters = AMD64_NUM_COUNTERS_F15H, |
| 560 | .cntval_bits = 48, | 579 | .cntval_bits = 48, |
| 561 | .cntval_mask = (1ULL << 48) - 1, | 580 | .cntval_mask = (1ULL << 48) - 1, |
| 562 | .apic = 1, | 581 | .apic = 1, |
| @@ -573,7 +592,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { | |||
| 573 | #endif | 592 | #endif |
| 574 | }; | 593 | }; |
| 575 | 594 | ||
| 576 | static __init int amd_pmu_init(void) | 595 | __init int amd_pmu_init(void) |
| 577 | { | 596 | { |
| 578 | /* Performance-monitoring supported from K7 and later: */ | 597 | /* Performance-monitoring supported from K7 and later: */ |
| 579 | if (boot_cpu_data.x86 < 6) | 598 | if (boot_cpu_data.x86 < 6) |
| @@ -602,12 +621,3 @@ static __init int amd_pmu_init(void) | |||
| 602 | 621 | ||
| 603 | return 0; | 622 | return 0; |
| 604 | } | 623 | } |
| 605 | |||
| 606 | #else /* CONFIG_CPU_SUP_AMD */ | ||
| 607 | |||
| 608 | static int amd_pmu_init(void) | ||
| 609 | { | ||
| 610 | return 0; | ||
| 611 | } | ||
| 612 | |||
| 613 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c new file mode 100644 index 000000000000..ab6343d21825 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | /* | ||
| 2 | * Performance events - AMD IBS | ||
| 3 | * | ||
| 4 | * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter | ||
| 5 | * | ||
| 6 | * For licencing details see kernel-base/COPYING | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/perf_event.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/pci.h> | ||
| 12 | |||
| 13 | #include <asm/apic.h> | ||
| 14 | |||
| 15 | static u32 ibs_caps; | ||
| 16 | |||
| 17 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) | ||
| 18 | |||
| 19 | static struct pmu perf_ibs; | ||
| 20 | |||
| 21 | static int perf_ibs_init(struct perf_event *event) | ||
| 22 | { | ||
| 23 | if (perf_ibs.type != event->attr.type) | ||
| 24 | return -ENOENT; | ||
| 25 | return 0; | ||
| 26 | } | ||
| 27 | |||
| 28 | static int perf_ibs_add(struct perf_event *event, int flags) | ||
| 29 | { | ||
| 30 | return 0; | ||
| 31 | } | ||
| 32 | |||
| 33 | static void perf_ibs_del(struct perf_event *event, int flags) | ||
| 34 | { | ||
| 35 | } | ||
| 36 | |||
| 37 | static struct pmu perf_ibs = { | ||
| 38 | .event_init= perf_ibs_init, | ||
| 39 | .add= perf_ibs_add, | ||
| 40 | .del= perf_ibs_del, | ||
| 41 | }; | ||
| 42 | |||
| 43 | static __init int perf_event_ibs_init(void) | ||
| 44 | { | ||
| 45 | if (!ibs_caps) | ||
| 46 | return -ENODEV; /* ibs not supported by the cpu */ | ||
| 47 | |||
| 48 | perf_pmu_register(&perf_ibs, "ibs", -1); | ||
| 49 | printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); | ||
| 50 | |||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ | ||
| 55 | |||
| 56 | static __init int perf_event_ibs_init(void) { return 0; } | ||
| 57 | |||
| 58 | #endif | ||
| 59 | |||
| 60 | /* IBS - apic initialization, for perf and oprofile */ | ||
| 61 | |||
| 62 | static __init u32 __get_ibs_caps(void) | ||
| 63 | { | ||
| 64 | u32 caps; | ||
| 65 | unsigned int max_level; | ||
| 66 | |||
| 67 | if (!boot_cpu_has(X86_FEATURE_IBS)) | ||
| 68 | return 0; | ||
| 69 | |||
| 70 | /* check IBS cpuid feature flags */ | ||
| 71 | max_level = cpuid_eax(0x80000000); | ||
| 72 | if (max_level < IBS_CPUID_FEATURES) | ||
| 73 | return IBS_CAPS_DEFAULT; | ||
| 74 | |||
| 75 | caps = cpuid_eax(IBS_CPUID_FEATURES); | ||
| 76 | if (!(caps & IBS_CAPS_AVAIL)) | ||
| 77 | /* cpuid flags not valid */ | ||
| 78 | return IBS_CAPS_DEFAULT; | ||
| 79 | |||
| 80 | return caps; | ||
| 81 | } | ||
| 82 | |||
| 83 | u32 get_ibs_caps(void) | ||
| 84 | { | ||
| 85 | return ibs_caps; | ||
| 86 | } | ||
| 87 | |||
| 88 | EXPORT_SYMBOL(get_ibs_caps); | ||
| 89 | |||
| 90 | static inline int get_eilvt(int offset) | ||
| 91 | { | ||
| 92 | return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); | ||
| 93 | } | ||
| 94 | |||
| 95 | static inline int put_eilvt(int offset) | ||
| 96 | { | ||
| 97 | return !setup_APIC_eilvt(offset, 0, 0, 1); | ||
| 98 | } | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Check and reserve APIC extended interrupt LVT offset for IBS if available. | ||
| 102 | */ | ||
| 103 | static inline int ibs_eilvt_valid(void) | ||
| 104 | { | ||
| 105 | int offset; | ||
| 106 | u64 val; | ||
| 107 | int valid = 0; | ||
| 108 | |||
| 109 | preempt_disable(); | ||
| 110 | |||
| 111 | rdmsrl(MSR_AMD64_IBSCTL, val); | ||
| 112 | offset = val & IBSCTL_LVT_OFFSET_MASK; | ||
| 113 | |||
| 114 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) { | ||
| 115 | pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", | ||
| 116 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | ||
| 117 | goto out; | ||
| 118 | } | ||
| 119 | |||
| 120 | if (!get_eilvt(offset)) { | ||
| 121 | pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", | ||
| 122 | smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); | ||
| 123 | goto out; | ||
| 124 | } | ||
| 125 | |||
| 126 | valid = 1; | ||
| 127 | out: | ||
| 128 | preempt_enable(); | ||
| 129 | |||
| 130 | return valid; | ||
| 131 | } | ||
| 132 | |||
| 133 | static int setup_ibs_ctl(int ibs_eilvt_off) | ||
| 134 | { | ||
| 135 | struct pci_dev *cpu_cfg; | ||
| 136 | int nodes; | ||
| 137 | u32 value = 0; | ||
| 138 | |||
| 139 | nodes = 0; | ||
| 140 | cpu_cfg = NULL; | ||
| 141 | do { | ||
| 142 | cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, | ||
| 143 | PCI_DEVICE_ID_AMD_10H_NB_MISC, | ||
| 144 | cpu_cfg); | ||
| 145 | if (!cpu_cfg) | ||
| 146 | break; | ||
| 147 | ++nodes; | ||
| 148 | pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off | ||
| 149 | | IBSCTL_LVT_OFFSET_VALID); | ||
| 150 | pci_read_config_dword(cpu_cfg, IBSCTL, &value); | ||
| 151 | if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { | ||
| 152 | pci_dev_put(cpu_cfg); | ||
| 153 | printk(KERN_DEBUG "Failed to setup IBS LVT offset, " | ||
| 154 | "IBSCTL = 0x%08x\n", value); | ||
| 155 | return -EINVAL; | ||
| 156 | } | ||
| 157 | } while (1); | ||
| 158 | |||
| 159 | if (!nodes) { | ||
| 160 | printk(KERN_DEBUG "No CPU node configured for IBS\n"); | ||
| 161 | return -ENODEV; | ||
| 162 | } | ||
| 163 | |||
| 164 | return 0; | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * This runs only on the current cpu. We try to find an LVT offset and | ||
| 169 | * setup the local APIC. For this we must disable preemption. On | ||
| 170 | * success we initialize all nodes with this offset. This updates then | ||
| 171 | * the offset in the IBS_CTL per-node msr. The per-core APIC setup of | ||
| 172 | * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that | ||
| 173 | * is using the new offset. | ||
| 174 | */ | ||
| 175 | static int force_ibs_eilvt_setup(void) | ||
| 176 | { | ||
| 177 | int offset; | ||
| 178 | int ret; | ||
| 179 | |||
| 180 | preempt_disable(); | ||
| 181 | /* find the next free available EILVT entry, skip offset 0 */ | ||
| 182 | for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { | ||
| 183 | if (get_eilvt(offset)) | ||
| 184 | break; | ||
| 185 | } | ||
| 186 | preempt_enable(); | ||
| 187 | |||
| 188 | if (offset == APIC_EILVT_NR_MAX) { | ||
| 189 | printk(KERN_DEBUG "No EILVT entry available\n"); | ||
| 190 | return -EBUSY; | ||
| 191 | } | ||
| 192 | |||
| 193 | ret = setup_ibs_ctl(offset); | ||
| 194 | if (ret) | ||
| 195 | goto out; | ||
| 196 | |||
| 197 | if (!ibs_eilvt_valid()) { | ||
| 198 | ret = -EFAULT; | ||
| 199 | goto out; | ||
| 200 | } | ||
| 201 | |||
| 202 | pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); | ||
| 203 | pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); | ||
| 204 | |||
| 205 | return 0; | ||
| 206 | out: | ||
| 207 | preempt_disable(); | ||
| 208 | put_eilvt(offset); | ||
| 209 | preempt_enable(); | ||
| 210 | return ret; | ||
| 211 | } | ||
| 212 | |||
| 213 | static inline int get_ibs_lvt_offset(void) | ||
| 214 | { | ||
| 215 | u64 val; | ||
| 216 | |||
| 217 | rdmsrl(MSR_AMD64_IBSCTL, val); | ||
| 218 | if (!(val & IBSCTL_LVT_OFFSET_VALID)) | ||
| 219 | return -EINVAL; | ||
| 220 | |||
| 221 | return val & IBSCTL_LVT_OFFSET_MASK; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void setup_APIC_ibs(void *dummy) | ||
| 225 | { | ||
| 226 | int offset; | ||
| 227 | |||
| 228 | offset = get_ibs_lvt_offset(); | ||
| 229 | if (offset < 0) | ||
| 230 | goto failed; | ||
| 231 | |||
| 232 | if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0)) | ||
| 233 | return; | ||
| 234 | failed: | ||
| 235 | pr_warn("perf: IBS APIC setup failed on cpu #%d\n", | ||
| 236 | smp_processor_id()); | ||
| 237 | } | ||
| 238 | |||
| 239 | static void clear_APIC_ibs(void *dummy) | ||
| 240 | { | ||
| 241 | int offset; | ||
| 242 | |||
| 243 | offset = get_ibs_lvt_offset(); | ||
| 244 | if (offset >= 0) | ||
| 245 | setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1); | ||
| 246 | } | ||
| 247 | |||
| 248 | static int __cpuinit | ||
| 249 | perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
| 250 | { | ||
| 251 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 252 | case CPU_STARTING: | ||
| 253 | setup_APIC_ibs(NULL); | ||
| 254 | break; | ||
| 255 | case CPU_DYING: | ||
| 256 | clear_APIC_ibs(NULL); | ||
| 257 | break; | ||
| 258 | default: | ||
| 259 | break; | ||
| 260 | } | ||
| 261 | |||
| 262 | return NOTIFY_OK; | ||
| 263 | } | ||
| 264 | |||
| 265 | static __init int amd_ibs_init(void) | ||
| 266 | { | ||
| 267 | u32 caps; | ||
| 268 | int ret; | ||
| 269 | |||
| 270 | caps = __get_ibs_caps(); | ||
| 271 | if (!caps) | ||
| 272 | return -ENODEV; /* ibs not supported by the cpu */ | ||
| 273 | |||
| 274 | if (!ibs_eilvt_valid()) { | ||
| 275 | ret = force_ibs_eilvt_setup(); | ||
| 276 | if (ret) { | ||
| 277 | pr_err("Failed to setup IBS, %d\n", ret); | ||
| 278 | return ret; | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | get_online_cpus(); | ||
| 283 | ibs_caps = caps; | ||
| 284 | /* make ibs_caps visible to other cpus: */ | ||
| 285 | smp_mb(); | ||
| 286 | perf_cpu_notifier(perf_ibs_cpu_notifier); | ||
| 287 | smp_call_function(setup_APIC_ibs, NULL, 1); | ||
| 288 | put_online_cpus(); | ||
| 289 | |||
| 290 | return perf_event_ibs_init(); | ||
| 291 | } | ||
| 292 | |||
| 293 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | ||
| 294 | device_initcall(amd_ibs_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 45fbb8f7f549..2be5ebe99872 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
| @@ -1,16 +1,20 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
| 2 | |||
| 3 | /* | 1 | /* |
| 4 | * Per core/cpu state | 2 | * Per core/cpu state |
| 5 | * | 3 | * |
| 6 | * Used to coordinate shared registers between HT threads or | 4 | * Used to coordinate shared registers between HT threads or |
| 7 | * among events on a single PMU. | 5 | * among events on a single PMU. |
| 8 | */ | 6 | */ |
| 9 | struct intel_shared_regs { | 7 | |
| 10 | struct er_account regs[EXTRA_REG_MAX]; | 8 | #include <linux/stddef.h> |
| 11 | int refcnt; /* per-core: #HT threads */ | 9 | #include <linux/types.h> |
| 12 | unsigned core_id; /* per-core: core id */ | 10 | #include <linux/init.h> |
| 13 | }; | 11 | #include <linux/slab.h> |
| 12 | #include <linux/export.h> | ||
| 13 | |||
| 14 | #include <asm/hardirq.h> | ||
| 15 | #include <asm/apic.h> | ||
| 16 | |||
| 17 | #include "perf_event.h" | ||
| 14 | 18 | ||
| 15 | /* | 19 | /* |
| 16 | * Intel PerfMon, used on Core and later. | 20 | * Intel PerfMon, used on Core and later. |
| @@ -746,7 +750,8 @@ static void intel_pmu_enable_all(int added) | |||
| 746 | 750 | ||
| 747 | intel_pmu_pebs_enable_all(); | 751 | intel_pmu_pebs_enable_all(); |
| 748 | intel_pmu_lbr_enable_all(); | 752 | intel_pmu_lbr_enable_all(); |
| 749 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 753 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, |
| 754 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); | ||
| 750 | 755 | ||
| 751 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 756 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| 752 | struct perf_event *event = | 757 | struct perf_event *event = |
| @@ -869,6 +874,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
| 869 | static void intel_pmu_disable_event(struct perf_event *event) | 874 | static void intel_pmu_disable_event(struct perf_event *event) |
| 870 | { | 875 | { |
| 871 | struct hw_perf_event *hwc = &event->hw; | 876 | struct hw_perf_event *hwc = &event->hw; |
| 877 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 872 | 878 | ||
| 873 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 879 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { |
| 874 | intel_pmu_disable_bts(); | 880 | intel_pmu_disable_bts(); |
| @@ -876,6 +882,9 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
| 876 | return; | 882 | return; |
| 877 | } | 883 | } |
| 878 | 884 | ||
| 885 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | ||
| 886 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | ||
| 887 | |||
| 879 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 888 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 880 | intel_pmu_disable_fixed(hwc); | 889 | intel_pmu_disable_fixed(hwc); |
| 881 | return; | 890 | return; |
| @@ -921,6 +930,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
| 921 | static void intel_pmu_enable_event(struct perf_event *event) | 930 | static void intel_pmu_enable_event(struct perf_event *event) |
| 922 | { | 931 | { |
| 923 | struct hw_perf_event *hwc = &event->hw; | 932 | struct hw_perf_event *hwc = &event->hw; |
| 933 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 924 | 934 | ||
| 925 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | 935 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { |
| 926 | if (!__this_cpu_read(cpu_hw_events.enabled)) | 936 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
| @@ -930,6 +940,11 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 930 | return; | 940 | return; |
| 931 | } | 941 | } |
| 932 | 942 | ||
| 943 | if (event->attr.exclude_host) | ||
| 944 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | ||
| 945 | if (event->attr.exclude_guest) | ||
| 946 | cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); | ||
| 947 | |||
| 933 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 948 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 934 | intel_pmu_enable_fixed(hwc); | 949 | intel_pmu_enable_fixed(hwc); |
| 935 | return; | 950 | return; |
| @@ -945,7 +960,7 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
| 945 | * Save and restart an expired event. Called by NMI contexts, | 960 | * Save and restart an expired event. Called by NMI contexts, |
| 946 | * so it has to be careful about preempting normal event ops: | 961 | * so it has to be careful about preempting normal event ops: |
| 947 | */ | 962 | */ |
| 948 | static int intel_pmu_save_and_restart(struct perf_event *event) | 963 | int intel_pmu_save_and_restart(struct perf_event *event) |
| 949 | { | 964 | { |
| 950 | x86_perf_event_update(event); | 965 | x86_perf_event_update(event); |
| 951 | return x86_perf_event_set_period(event); | 966 | return x86_perf_event_set_period(event); |
| @@ -1197,6 +1212,21 @@ intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | |||
| 1197 | return c; | 1212 | return c; |
| 1198 | } | 1213 | } |
| 1199 | 1214 | ||
| 1215 | struct event_constraint * | ||
| 1216 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
| 1217 | { | ||
| 1218 | struct event_constraint *c; | ||
| 1219 | |||
| 1220 | if (x86_pmu.event_constraints) { | ||
| 1221 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
| 1222 | if ((event->hw.config & c->cmask) == c->code) | ||
| 1223 | return c; | ||
| 1224 | } | ||
| 1225 | } | ||
| 1226 | |||
| 1227 | return &unconstrained; | ||
| 1228 | } | ||
| 1229 | |||
| 1200 | static struct event_constraint * | 1230 | static struct event_constraint * |
| 1201 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1231 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
| 1202 | { | 1232 | { |
| @@ -1284,12 +1314,84 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
| 1284 | return 0; | 1314 | return 0; |
| 1285 | } | 1315 | } |
| 1286 | 1316 | ||
| 1317 | struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) | ||
| 1318 | { | ||
| 1319 | if (x86_pmu.guest_get_msrs) | ||
| 1320 | return x86_pmu.guest_get_msrs(nr); | ||
| 1321 | *nr = 0; | ||
| 1322 | return NULL; | ||
| 1323 | } | ||
| 1324 | EXPORT_SYMBOL_GPL(perf_guest_get_msrs); | ||
| 1325 | |||
| 1326 | static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) | ||
| 1327 | { | ||
| 1328 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1329 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; | ||
| 1330 | |||
| 1331 | arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; | ||
| 1332 | arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; | ||
| 1333 | arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; | ||
| 1334 | |||
| 1335 | *nr = 1; | ||
| 1336 | return arr; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) | ||
| 1340 | { | ||
| 1341 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1342 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; | ||
| 1343 | int idx; | ||
| 1344 | |||
| 1345 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 1346 | struct perf_event *event = cpuc->events[idx]; | ||
| 1347 | |||
| 1348 | arr[idx].msr = x86_pmu_config_addr(idx); | ||
| 1349 | arr[idx].host = arr[idx].guest = 0; | ||
| 1350 | |||
| 1351 | if (!test_bit(idx, cpuc->active_mask)) | ||
| 1352 | continue; | ||
| 1353 | |||
| 1354 | arr[idx].host = arr[idx].guest = | ||
| 1355 | event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 1356 | |||
| 1357 | if (event->attr.exclude_host) | ||
| 1358 | arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 1359 | else if (event->attr.exclude_guest) | ||
| 1360 | arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | *nr = x86_pmu.num_counters; | ||
| 1364 | return arr; | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | static void core_pmu_enable_event(struct perf_event *event) | ||
| 1368 | { | ||
| 1369 | if (!event->attr.exclude_host) | ||
| 1370 | x86_pmu_enable_event(event); | ||
| 1371 | } | ||
| 1372 | |||
| 1373 | static void core_pmu_enable_all(int added) | ||
| 1374 | { | ||
| 1375 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
| 1376 | int idx; | ||
| 1377 | |||
| 1378 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
| 1379 | struct hw_perf_event *hwc = &cpuc->events[idx]->hw; | ||
| 1380 | |||
| 1381 | if (!test_bit(idx, cpuc->active_mask) || | ||
| 1382 | cpuc->events[idx]->attr.exclude_host) | ||
| 1383 | continue; | ||
| 1384 | |||
| 1385 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
| 1386 | } | ||
| 1387 | } | ||
| 1388 | |||
| 1287 | static __initconst const struct x86_pmu core_pmu = { | 1389 | static __initconst const struct x86_pmu core_pmu = { |
| 1288 | .name = "core", | 1390 | .name = "core", |
| 1289 | .handle_irq = x86_pmu_handle_irq, | 1391 | .handle_irq = x86_pmu_handle_irq, |
| 1290 | .disable_all = x86_pmu_disable_all, | 1392 | .disable_all = x86_pmu_disable_all, |
| 1291 | .enable_all = x86_pmu_enable_all, | 1393 | .enable_all = core_pmu_enable_all, |
| 1292 | .enable = x86_pmu_enable_event, | 1394 | .enable = core_pmu_enable_event, |
| 1293 | .disable = x86_pmu_disable_event, | 1395 | .disable = x86_pmu_disable_event, |
| 1294 | .hw_config = x86_pmu_hw_config, | 1396 | .hw_config = x86_pmu_hw_config, |
| 1295 | .schedule_events = x86_schedule_events, | 1397 | .schedule_events = x86_schedule_events, |
| @@ -1307,9 +1409,10 @@ static __initconst const struct x86_pmu core_pmu = { | |||
| 1307 | .get_event_constraints = intel_get_event_constraints, | 1409 | .get_event_constraints = intel_get_event_constraints, |
| 1308 | .put_event_constraints = intel_put_event_constraints, | 1410 | .put_event_constraints = intel_put_event_constraints, |
| 1309 | .event_constraints = intel_core_event_constraints, | 1411 | .event_constraints = intel_core_event_constraints, |
| 1412 | .guest_get_msrs = core_guest_get_msrs, | ||
| 1310 | }; | 1413 | }; |
| 1311 | 1414 | ||
| 1312 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | 1415 | struct intel_shared_regs *allocate_shared_regs(int cpu) |
| 1313 | { | 1416 | { |
| 1314 | struct intel_shared_regs *regs; | 1417 | struct intel_shared_regs *regs; |
| 1315 | int i; | 1418 | int i; |
| @@ -1362,7 +1465,7 @@ static void intel_pmu_cpu_starting(int cpu) | |||
| 1362 | 1465 | ||
| 1363 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1466 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
| 1364 | if (pc && pc->core_id == core_id) { | 1467 | if (pc && pc->core_id == core_id) { |
| 1365 | kfree(cpuc->shared_regs); | 1468 | cpuc->kfree_on_online = cpuc->shared_regs; |
| 1366 | cpuc->shared_regs = pc; | 1469 | cpuc->shared_regs = pc; |
| 1367 | break; | 1470 | break; |
| 1368 | } | 1471 | } |
| @@ -1413,6 +1516,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
| 1413 | .cpu_prepare = intel_pmu_cpu_prepare, | 1516 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 1414 | .cpu_starting = intel_pmu_cpu_starting, | 1517 | .cpu_starting = intel_pmu_cpu_starting, |
| 1415 | .cpu_dying = intel_pmu_cpu_dying, | 1518 | .cpu_dying = intel_pmu_cpu_dying, |
| 1519 | .guest_get_msrs = intel_guest_get_msrs, | ||
| 1416 | }; | 1520 | }; |
| 1417 | 1521 | ||
| 1418 | static void intel_clovertown_quirks(void) | 1522 | static void intel_clovertown_quirks(void) |
| @@ -1441,7 +1545,7 @@ static void intel_clovertown_quirks(void) | |||
| 1441 | x86_pmu.pebs_constraints = NULL; | 1545 | x86_pmu.pebs_constraints = NULL; |
| 1442 | } | 1546 | } |
| 1443 | 1547 | ||
| 1444 | static __init int intel_pmu_init(void) | 1548 | __init int intel_pmu_init(void) |
| 1445 | { | 1549 | { |
| 1446 | union cpuid10_edx edx; | 1550 | union cpuid10_edx edx; |
| 1447 | union cpuid10_eax eax; | 1551 | union cpuid10_eax eax; |
| @@ -1590,13 +1694,14 @@ static __init int intel_pmu_init(void) | |||
| 1590 | break; | 1694 | break; |
| 1591 | 1695 | ||
| 1592 | case 42: /* SandyBridge */ | 1696 | case 42: /* SandyBridge */ |
| 1697 | case 45: /* SandyBridge, "Romely-EP" */ | ||
| 1593 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1698 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 1594 | sizeof(hw_cache_event_ids)); | 1699 | sizeof(hw_cache_event_ids)); |
| 1595 | 1700 | ||
| 1596 | intel_pmu_lbr_init_nhm(); | 1701 | intel_pmu_lbr_init_nhm(); |
| 1597 | 1702 | ||
| 1598 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1703 | x86_pmu.event_constraints = intel_snb_event_constraints; |
| 1599 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | 1704 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
| 1600 | x86_pmu.extra_regs = intel_snb_extra_regs; | 1705 | x86_pmu.extra_regs = intel_snb_extra_regs; |
| 1601 | /* all extra regs are per-cpu when HT is on */ | 1706 | /* all extra regs are per-cpu when HT is on */ |
| 1602 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1707 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
| @@ -1627,16 +1732,3 @@ static __init int intel_pmu_init(void) | |||
| 1627 | } | 1732 | } |
| 1628 | return 0; | 1733 | return 0; |
| 1629 | } | 1734 | } |
| 1630 | |||
| 1631 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
| 1632 | |||
| 1633 | static int intel_pmu_init(void) | ||
| 1634 | { | ||
| 1635 | return 0; | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | static struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
| 1639 | { | ||
| 1640 | return NULL; | ||
| 1641 | } | ||
| 1642 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 1b1ef3addcfd..c0d238f49db8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
| @@ -1,7 +1,10 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #include <linux/bitops.h> |
| 2 | #include <linux/types.h> | ||
| 3 | #include <linux/slab.h> | ||
| 2 | 4 | ||
| 3 | /* The maximal number of PEBS events: */ | 5 | #include <asm/perf_event.h> |
| 4 | #define MAX_PEBS_EVENTS 4 | 6 | |
| 7 | #include "perf_event.h" | ||
| 5 | 8 | ||
| 6 | /* The size of a BTS record in bytes: */ | 9 | /* The size of a BTS record in bytes: */ |
| 7 | #define BTS_RECORD_SIZE 24 | 10 | #define BTS_RECORD_SIZE 24 |
| @@ -37,24 +40,7 @@ struct pebs_record_nhm { | |||
| 37 | u64 status, dla, dse, lat; | 40 | u64 status, dla, dse, lat; |
| 38 | }; | 41 | }; |
| 39 | 42 | ||
| 40 | /* | 43 | void init_debug_store_on_cpu(int cpu) |
| 41 | * A debug store configuration. | ||
| 42 | * | ||
| 43 | * We only support architectures that use 64bit fields. | ||
| 44 | */ | ||
| 45 | struct debug_store { | ||
| 46 | u64 bts_buffer_base; | ||
| 47 | u64 bts_index; | ||
| 48 | u64 bts_absolute_maximum; | ||
| 49 | u64 bts_interrupt_threshold; | ||
| 50 | u64 pebs_buffer_base; | ||
| 51 | u64 pebs_index; | ||
| 52 | u64 pebs_absolute_maximum; | ||
| 53 | u64 pebs_interrupt_threshold; | ||
| 54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 55 | }; | ||
| 56 | |||
| 57 | static void init_debug_store_on_cpu(int cpu) | ||
| 58 | { | 44 | { |
| 59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 45 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
| 60 | 46 | ||
| @@ -66,7 +52,7 @@ static void init_debug_store_on_cpu(int cpu) | |||
| 66 | (u32)((u64)(unsigned long)ds >> 32)); | 52 | (u32)((u64)(unsigned long)ds >> 32)); |
| 67 | } | 53 | } |
| 68 | 54 | ||
| 69 | static void fini_debug_store_on_cpu(int cpu) | 55 | void fini_debug_store_on_cpu(int cpu) |
| 70 | { | 56 | { |
| 71 | if (!per_cpu(cpu_hw_events, cpu).ds) | 57 | if (!per_cpu(cpu_hw_events, cpu).ds) |
| 72 | return; | 58 | return; |
| @@ -175,7 +161,7 @@ static void release_ds_buffer(int cpu) | |||
| 175 | kfree(ds); | 161 | kfree(ds); |
| 176 | } | 162 | } |
| 177 | 163 | ||
| 178 | static void release_ds_buffers(void) | 164 | void release_ds_buffers(void) |
| 179 | { | 165 | { |
| 180 | int cpu; | 166 | int cpu; |
| 181 | 167 | ||
| @@ -194,7 +180,7 @@ static void release_ds_buffers(void) | |||
| 194 | put_online_cpus(); | 180 | put_online_cpus(); |
| 195 | } | 181 | } |
| 196 | 182 | ||
| 197 | static void reserve_ds_buffers(void) | 183 | void reserve_ds_buffers(void) |
| 198 | { | 184 | { |
| 199 | int bts_err = 0, pebs_err = 0; | 185 | int bts_err = 0, pebs_err = 0; |
| 200 | int cpu; | 186 | int cpu; |
| @@ -260,10 +246,10 @@ static void reserve_ds_buffers(void) | |||
| 260 | * BTS | 246 | * BTS |
| 261 | */ | 247 | */ |
| 262 | 248 | ||
| 263 | static struct event_constraint bts_constraint = | 249 | struct event_constraint bts_constraint = |
| 264 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | 250 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); |
| 265 | 251 | ||
| 266 | static void intel_pmu_enable_bts(u64 config) | 252 | void intel_pmu_enable_bts(u64 config) |
| 267 | { | 253 | { |
| 268 | unsigned long debugctlmsr; | 254 | unsigned long debugctlmsr; |
| 269 | 255 | ||
| @@ -282,7 +268,7 @@ static void intel_pmu_enable_bts(u64 config) | |||
| 282 | update_debugctlmsr(debugctlmsr); | 268 | update_debugctlmsr(debugctlmsr); |
| 283 | } | 269 | } |
| 284 | 270 | ||
| 285 | static void intel_pmu_disable_bts(void) | 271 | void intel_pmu_disable_bts(void) |
| 286 | { | 272 | { |
| 287 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 273 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 288 | unsigned long debugctlmsr; | 274 | unsigned long debugctlmsr; |
| @@ -299,7 +285,7 @@ static void intel_pmu_disable_bts(void) | |||
| 299 | update_debugctlmsr(debugctlmsr); | 285 | update_debugctlmsr(debugctlmsr); |
| 300 | } | 286 | } |
| 301 | 287 | ||
| 302 | static int intel_pmu_drain_bts_buffer(void) | 288 | int intel_pmu_drain_bts_buffer(void) |
| 303 | { | 289 | { |
| 304 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 290 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 305 | struct debug_store *ds = cpuc->ds; | 291 | struct debug_store *ds = cpuc->ds; |
| @@ -361,7 +347,7 @@ static int intel_pmu_drain_bts_buffer(void) | |||
| 361 | /* | 347 | /* |
| 362 | * PEBS | 348 | * PEBS |
| 363 | */ | 349 | */ |
| 364 | static struct event_constraint intel_core2_pebs_event_constraints[] = { | 350 | struct event_constraint intel_core2_pebs_event_constraints[] = { |
| 365 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | 351 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ |
| 366 | INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | 352 | INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ |
| 367 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | 353 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ |
| @@ -370,14 +356,14 @@ static struct event_constraint intel_core2_pebs_event_constraints[] = { | |||
| 370 | EVENT_CONSTRAINT_END | 356 | EVENT_CONSTRAINT_END |
| 371 | }; | 357 | }; |
| 372 | 358 | ||
| 373 | static struct event_constraint intel_atom_pebs_event_constraints[] = { | 359 | struct event_constraint intel_atom_pebs_event_constraints[] = { |
| 374 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | 360 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ |
| 375 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ | 361 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ |
| 376 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | 362 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ |
| 377 | EVENT_CONSTRAINT_END | 363 | EVENT_CONSTRAINT_END |
| 378 | }; | 364 | }; |
| 379 | 365 | ||
| 380 | static struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 366 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
| 381 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 367 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ |
| 382 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 368 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
| 383 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 369 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
| @@ -392,7 +378,7 @@ static struct event_constraint intel_nehalem_pebs_event_constraints[] = { | |||
| 392 | EVENT_CONSTRAINT_END | 378 | EVENT_CONSTRAINT_END |
| 393 | }; | 379 | }; |
| 394 | 380 | ||
| 395 | static struct event_constraint intel_westmere_pebs_event_constraints[] = { | 381 | struct event_constraint intel_westmere_pebs_event_constraints[] = { |
| 396 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ | 382 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ |
| 397 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 383 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
| 398 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ | 384 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
| @@ -407,7 +393,7 @@ static struct event_constraint intel_westmere_pebs_event_constraints[] = { | |||
| 407 | EVENT_CONSTRAINT_END | 393 | EVENT_CONSTRAINT_END |
| 408 | }; | 394 | }; |
| 409 | 395 | ||
| 410 | static struct event_constraint intel_snb_pebs_events[] = { | 396 | struct event_constraint intel_snb_pebs_event_constraints[] = { |
| 411 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ | 397 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ |
| 412 | INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ | 398 | INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ |
| 413 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | 399 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ |
| @@ -428,8 +414,7 @@ static struct event_constraint intel_snb_pebs_events[] = { | |||
| 428 | EVENT_CONSTRAINT_END | 414 | EVENT_CONSTRAINT_END |
| 429 | }; | 415 | }; |
| 430 | 416 | ||
| 431 | static struct event_constraint * | 417 | struct event_constraint *intel_pebs_constraints(struct perf_event *event) |
| 432 | intel_pebs_constraints(struct perf_event *event) | ||
| 433 | { | 418 | { |
| 434 | struct event_constraint *c; | 419 | struct event_constraint *c; |
| 435 | 420 | ||
| @@ -446,7 +431,7 @@ intel_pebs_constraints(struct perf_event *event) | |||
| 446 | return &emptyconstraint; | 431 | return &emptyconstraint; |
| 447 | } | 432 | } |
| 448 | 433 | ||
| 449 | static void intel_pmu_pebs_enable(struct perf_event *event) | 434 | void intel_pmu_pebs_enable(struct perf_event *event) |
| 450 | { | 435 | { |
| 451 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 436 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 452 | struct hw_perf_event *hwc = &event->hw; | 437 | struct hw_perf_event *hwc = &event->hw; |
| @@ -460,7 +445,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event) | |||
| 460 | intel_pmu_lbr_enable(event); | 445 | intel_pmu_lbr_enable(event); |
| 461 | } | 446 | } |
| 462 | 447 | ||
| 463 | static void intel_pmu_pebs_disable(struct perf_event *event) | 448 | void intel_pmu_pebs_disable(struct perf_event *event) |
| 464 | { | 449 | { |
| 465 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 450 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 466 | struct hw_perf_event *hwc = &event->hw; | 451 | struct hw_perf_event *hwc = &event->hw; |
| @@ -475,7 +460,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event) | |||
| 475 | intel_pmu_lbr_disable(event); | 460 | intel_pmu_lbr_disable(event); |
| 476 | } | 461 | } |
| 477 | 462 | ||
| 478 | static void intel_pmu_pebs_enable_all(void) | 463 | void intel_pmu_pebs_enable_all(void) |
| 479 | { | 464 | { |
| 480 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 465 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 481 | 466 | ||
| @@ -483,7 +468,7 @@ static void intel_pmu_pebs_enable_all(void) | |||
| 483 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 468 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
| 484 | } | 469 | } |
| 485 | 470 | ||
| 486 | static void intel_pmu_pebs_disable_all(void) | 471 | void intel_pmu_pebs_disable_all(void) |
| 487 | { | 472 | { |
| 488 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 473 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 489 | 474 | ||
| @@ -576,8 +561,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
| 576 | return 0; | 561 | return 0; |
| 577 | } | 562 | } |
| 578 | 563 | ||
| 579 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
| 580 | |||
| 581 | static void __intel_pmu_pebs_event(struct perf_event *event, | 564 | static void __intel_pmu_pebs_event(struct perf_event *event, |
| 582 | struct pt_regs *iregs, void *__pebs) | 565 | struct pt_regs *iregs, void *__pebs) |
| 583 | { | 566 | { |
| @@ -716,7 +699,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | |||
| 716 | * BTS, PEBS probe and setup | 699 | * BTS, PEBS probe and setup |
| 717 | */ | 700 | */ |
| 718 | 701 | ||
| 719 | static void intel_ds_init(void) | 702 | void intel_ds_init(void) |
| 720 | { | 703 | { |
| 721 | /* | 704 | /* |
| 722 | * No support for 32bit formats | 705 | * No support for 32bit formats |
| @@ -749,15 +732,3 @@ static void intel_ds_init(void) | |||
| 749 | } | 732 | } |
| 750 | } | 733 | } |
| 751 | } | 734 | } |
| 752 | |||
| 753 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
| 754 | |||
| 755 | static void reserve_ds_buffers(void) | ||
| 756 | { | ||
| 757 | } | ||
| 758 | |||
| 759 | static void release_ds_buffers(void) | ||
| 760 | { | ||
| 761 | } | ||
| 762 | |||
| 763 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d202c1bece1a..3fab3de3ce96 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
| @@ -1,4 +1,10 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #include <linux/perf_event.h> |
| 2 | #include <linux/types.h> | ||
| 3 | |||
| 4 | #include <asm/perf_event.h> | ||
| 5 | #include <asm/msr.h> | ||
| 6 | |||
| 7 | #include "perf_event.h" | ||
| 2 | 8 | ||
| 3 | enum { | 9 | enum { |
| 4 | LBR_FORMAT_32 = 0x00, | 10 | LBR_FORMAT_32 = 0x00, |
| @@ -48,7 +54,7 @@ static void intel_pmu_lbr_reset_64(void) | |||
| 48 | } | 54 | } |
| 49 | } | 55 | } |
| 50 | 56 | ||
| 51 | static void intel_pmu_lbr_reset(void) | 57 | void intel_pmu_lbr_reset(void) |
| 52 | { | 58 | { |
| 53 | if (!x86_pmu.lbr_nr) | 59 | if (!x86_pmu.lbr_nr) |
| 54 | return; | 60 | return; |
| @@ -59,7 +65,7 @@ static void intel_pmu_lbr_reset(void) | |||
| 59 | intel_pmu_lbr_reset_64(); | 65 | intel_pmu_lbr_reset_64(); |
| 60 | } | 66 | } |
| 61 | 67 | ||
| 62 | static void intel_pmu_lbr_enable(struct perf_event *event) | 68 | void intel_pmu_lbr_enable(struct perf_event *event) |
| 63 | { | 69 | { |
| 64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 70 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 65 | 71 | ||
| @@ -81,7 +87,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event) | |||
| 81 | cpuc->lbr_users++; | 87 | cpuc->lbr_users++; |
| 82 | } | 88 | } |
| 83 | 89 | ||
| 84 | static void intel_pmu_lbr_disable(struct perf_event *event) | 90 | void intel_pmu_lbr_disable(struct perf_event *event) |
| 85 | { | 91 | { |
| 86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 92 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 87 | 93 | ||
| @@ -95,7 +101,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event) | |||
| 95 | __intel_pmu_lbr_disable(); | 101 | __intel_pmu_lbr_disable(); |
| 96 | } | 102 | } |
| 97 | 103 | ||
| 98 | static void intel_pmu_lbr_enable_all(void) | 104 | void intel_pmu_lbr_enable_all(void) |
| 99 | { | 105 | { |
| 100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 106 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 101 | 107 | ||
| @@ -103,7 +109,7 @@ static void intel_pmu_lbr_enable_all(void) | |||
| 103 | __intel_pmu_lbr_enable(); | 109 | __intel_pmu_lbr_enable(); |
| 104 | } | 110 | } |
| 105 | 111 | ||
| 106 | static void intel_pmu_lbr_disable_all(void) | 112 | void intel_pmu_lbr_disable_all(void) |
| 107 | { | 113 | { |
| 108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 114 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 109 | 115 | ||
| @@ -178,7 +184,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
| 178 | cpuc->lbr_stack.nr = i; | 184 | cpuc->lbr_stack.nr = i; |
| 179 | } | 185 | } |
| 180 | 186 | ||
| 181 | static void intel_pmu_lbr_read(void) | 187 | void intel_pmu_lbr_read(void) |
| 182 | { | 188 | { |
| 183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 189 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
| 184 | 190 | ||
| @@ -191,7 +197,7 @@ static void intel_pmu_lbr_read(void) | |||
| 191 | intel_pmu_lbr_read_64(cpuc); | 197 | intel_pmu_lbr_read_64(cpuc); |
| 192 | } | 198 | } |
| 193 | 199 | ||
| 194 | static void intel_pmu_lbr_init_core(void) | 200 | void intel_pmu_lbr_init_core(void) |
| 195 | { | 201 | { |
| 196 | x86_pmu.lbr_nr = 4; | 202 | x86_pmu.lbr_nr = 4; |
| 197 | x86_pmu.lbr_tos = 0x01c9; | 203 | x86_pmu.lbr_tos = 0x01c9; |
| @@ -199,7 +205,7 @@ static void intel_pmu_lbr_init_core(void) | |||
| 199 | x86_pmu.lbr_to = 0x60; | 205 | x86_pmu.lbr_to = 0x60; |
| 200 | } | 206 | } |
| 201 | 207 | ||
| 202 | static void intel_pmu_lbr_init_nhm(void) | 208 | void intel_pmu_lbr_init_nhm(void) |
| 203 | { | 209 | { |
| 204 | x86_pmu.lbr_nr = 16; | 210 | x86_pmu.lbr_nr = 16; |
| 205 | x86_pmu.lbr_tos = 0x01c9; | 211 | x86_pmu.lbr_tos = 0x01c9; |
| @@ -207,12 +213,10 @@ static void intel_pmu_lbr_init_nhm(void) | |||
| 207 | x86_pmu.lbr_to = 0x6c0; | 213 | x86_pmu.lbr_to = 0x6c0; |
| 208 | } | 214 | } |
| 209 | 215 | ||
| 210 | static void intel_pmu_lbr_init_atom(void) | 216 | void intel_pmu_lbr_init_atom(void) |
| 211 | { | 217 | { |
| 212 | x86_pmu.lbr_nr = 8; | 218 | x86_pmu.lbr_nr = 8; |
| 213 | x86_pmu.lbr_tos = 0x01c9; | 219 | x86_pmu.lbr_tos = 0x01c9; |
| 214 | x86_pmu.lbr_from = 0x40; | 220 | x86_pmu.lbr_from = 0x40; |
| 215 | x86_pmu.lbr_to = 0x60; | 221 | x86_pmu.lbr_to = 0x60; |
| 216 | } | 222 | } |
| 217 | |||
| 218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 7809d2bcb209..492bf1358a7c 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
| @@ -7,9 +7,13 @@ | |||
| 7 | * For licencing details see kernel-base/COPYING | 7 | * For licencing details see kernel-base/COPYING |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #ifdef CONFIG_CPU_SUP_INTEL | 10 | #include <linux/perf_event.h> |
| 11 | 11 | ||
| 12 | #include <asm/perf_event_p4.h> | 12 | #include <asm/perf_event_p4.h> |
| 13 | #include <asm/hardirq.h> | ||
| 14 | #include <asm/apic.h> | ||
| 15 | |||
| 16 | #include "perf_event.h" | ||
| 13 | 17 | ||
| 14 | #define P4_CNTR_LIMIT 3 | 18 | #define P4_CNTR_LIMIT 3 |
| 15 | /* | 19 | /* |
| @@ -1303,7 +1307,7 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
| 1303 | .perfctr_second_write = 1, | 1307 | .perfctr_second_write = 1, |
| 1304 | }; | 1308 | }; |
| 1305 | 1309 | ||
| 1306 | static __init int p4_pmu_init(void) | 1310 | __init int p4_pmu_init(void) |
| 1307 | { | 1311 | { |
| 1308 | unsigned int low, high; | 1312 | unsigned int low, high; |
| 1309 | 1313 | ||
| @@ -1326,5 +1330,3 @@ static __init int p4_pmu_init(void) | |||
| 1326 | 1330 | ||
| 1327 | return 0; | 1331 | return 0; |
| 1328 | } | 1332 | } |
| 1329 | |||
| 1330 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 20c097e33860..c7181befecde 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
| @@ -1,4 +1,7 @@ | |||
| 1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #include <linux/perf_event.h> |
| 2 | #include <linux/types.h> | ||
| 3 | |||
| 4 | #include "perf_event.h" | ||
| 2 | 5 | ||
| 3 | /* | 6 | /* |
| 4 | * Not sure about some of these | 7 | * Not sure about some of these |
| @@ -114,7 +117,7 @@ static __initconst const struct x86_pmu p6_pmu = { | |||
| 114 | .event_constraints = p6_event_constraints, | 117 | .event_constraints = p6_event_constraints, |
| 115 | }; | 118 | }; |
| 116 | 119 | ||
| 117 | static __init int p6_pmu_init(void) | 120 | __init int p6_pmu_init(void) |
| 118 | { | 121 | { |
| 119 | switch (boot_cpu_data.x86_model) { | 122 | switch (boot_cpu_data.x86_model) { |
| 120 | case 1: | 123 | case 1: |
| @@ -138,5 +141,3 @@ static __init int p6_pmu_init(void) | |||
| 138 | 141 | ||
| 139 | return 0; | 142 | return 0; |
| 140 | } | 143 | } |
| 141 | |||
| 142 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 62ac8cb6ba27..14b23140e81f 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
| @@ -85,6 +85,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
| 85 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); | 85 | seq_printf(m, "stepping\t: %d\n", c->x86_mask); |
| 86 | else | 86 | else |
| 87 | seq_printf(m, "stepping\t: unknown\n"); | 87 | seq_printf(m, "stepping\t: unknown\n"); |
| 88 | if (c->microcode) | ||
| 89 | seq_printf(m, "microcode\t: 0x%x\n", c->microcode); | ||
| 88 | 90 | ||
| 89 | if (cpu_has(c, X86_FEATURE_TSC)) { | 91 | if (cpu_has(c, X86_FEATURE_TSC)) { |
| 90 | unsigned int freq = cpufreq_quick_get(cpu); | 92 | unsigned int freq = cpufreq_quick_get(cpu); |
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c new file mode 100644 index 000000000000..feca286c2bb4 --- /dev/null +++ b/arch/x86/kernel/cpu/rdrand.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of the Linux kernel. | ||
| 3 | * | ||
| 4 | * Copyright (c) 2011, Intel Corporation | ||
| 5 | * Authors: Fenghua Yu <fenghua.yu@intel.com>, | ||
| 6 | * H. Peter Anvin <hpa@linux.intel.com> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify it | ||
| 9 | * under the terms and conditions of the GNU General Public License, | ||
| 10 | * version 2, as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 14 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 15 | * more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License along with | ||
| 18 | * this program; if not, write to the Free Software Foundation, Inc., | ||
| 19 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <asm/processor.h> | ||
| 24 | #include <asm/archrandom.h> | ||
| 25 | #include <asm/sections.h> | ||
| 26 | |||
| 27 | static int __init x86_rdrand_setup(char *s) | ||
| 28 | { | ||
| 29 | setup_clear_cpu_cap(X86_FEATURE_RDRAND); | ||
| 30 | return 1; | ||
| 31 | } | ||
| 32 | __setup("nordrand", x86_rdrand_setup); | ||
| 33 | |||
| 34 | /* We can't use arch_get_random_long() here since alternatives haven't run */ | ||
| 35 | static inline int rdrand_long(unsigned long *v) | ||
| 36 | { | ||
| 37 | int ok; | ||
| 38 | asm volatile("1: " RDRAND_LONG "\n\t" | ||
| 39 | "jc 2f\n\t" | ||
| 40 | "decl %0\n\t" | ||
| 41 | "jnz 1b\n\t" | ||
| 42 | "2:" | ||
| 43 | : "=r" (ok), "=a" (*v) | ||
| 44 | : "0" (RDRAND_RETRY_LOOPS)); | ||
| 45 | return ok; | ||
| 46 | } | ||
| 47 | |||
| 48 | /* | ||
| 49 | * Force a reseed cycle; we are architecturally guaranteed a reseed | ||
| 50 | * after no more than 512 128-bit chunks of random data. This also | ||
| 51 | * acts as a test of the CPU capability. | ||
| 52 | */ | ||
| 53 | #define RESEED_LOOP ((512*128)/sizeof(unsigned long)) | ||
| 54 | |||
| 55 | void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c) | ||
| 56 | { | ||
| 57 | #ifdef CONFIG_ARCH_RANDOM | ||
| 58 | unsigned long tmp; | ||
| 59 | int i, count, ok; | ||
| 60 | |||
| 61 | if (!cpu_has(c, X86_FEATURE_RDRAND)) | ||
| 62 | return; /* Nothing to do */ | ||
| 63 | |||
| 64 | for (count = i = 0; i < RESEED_LOOP; i++) { | ||
| 65 | ok = rdrand_long(&tmp); | ||
| 66 | if (ok) | ||
| 67 | count++; | ||
| 68 | } | ||
| 69 | |||
| 70 | if (count != RESEED_LOOP) | ||
| 71 | clear_cpu_cap(c, X86_FEATURE_RDRAND); | ||
| 72 | #endif | ||
| 73 | } | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 764c7c2b1811..13ad89971d47 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
| @@ -32,15 +32,12 @@ int in_crash_kexec; | |||
| 32 | 32 | ||
| 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
| 34 | 34 | ||
| 35 | static void kdump_nmi_callback(int cpu, struct die_args *args) | 35 | static void kdump_nmi_callback(int cpu, struct pt_regs *regs) |
| 36 | { | 36 | { |
| 37 | struct pt_regs *regs; | ||
| 38 | #ifdef CONFIG_X86_32 | 37 | #ifdef CONFIG_X86_32 |
| 39 | struct pt_regs fixed_regs; | 38 | struct pt_regs fixed_regs; |
| 40 | #endif | 39 | #endif |
| 41 | 40 | ||
| 42 | regs = args->regs; | ||
| 43 | |||
| 44 | #ifdef CONFIG_X86_32 | 41 | #ifdef CONFIG_X86_32 |
| 45 | if (!user_mode_vm(regs)) { | 42 | if (!user_mode_vm(regs)) { |
| 46 | crash_fixup_ss_esp(&fixed_regs, regs); | 43 | crash_fixup_ss_esp(&fixed_regs, regs); |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index a621f3427685..52821799a702 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | * Architecture specific OF callbacks. | 2 | * Architecture specific OF callbacks. |
| 3 | */ | 3 | */ |
| 4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
| 5 | #include <linux/export.h> | ||
| 5 | #include <linux/io.h> | 6 | #include <linux/io.h> |
| 6 | #include <linux/interrupt.h> | 7 | #include <linux/interrupt.h> |
| 7 | #include <linux/list.h> | 8 | #include <linux/list.h> |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 3e2ef8425316..303a0e48f076 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
| 14 | #include <linux/crash_dump.h> | 14 | #include <linux/crash_dump.h> |
| 15 | #include <linux/export.h> | ||
| 15 | #include <linux/bootmem.h> | 16 | #include <linux/bootmem.h> |
| 16 | #include <linux/pfn.h> | 17 | #include <linux/pfn.h> |
| 17 | #include <linux/suspend.h> | 18 | #include <linux/suspend.h> |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5c1a91974918..f3f6f5344001 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -54,6 +54,7 @@ | |||
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
| 56 | #include <asm/cpufeature.h> | 56 | #include <asm/cpufeature.h> |
| 57 | #include <asm/alternative-asm.h> | ||
| 57 | 58 | ||
| 58 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 59 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
| 59 | #include <linux/elf-em.h> | 60 | #include <linux/elf-em.h> |
| @@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error) | |||
| 873 | 661: pushl_cfi $do_general_protection | 874 | 661: pushl_cfi $do_general_protection |
| 874 | 662: | 875 | 662: |
| 875 | .section .altinstructions,"a" | 876 | .section .altinstructions,"a" |
| 876 | .balign 4 | 877 | altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f |
| 877 | .long 661b | ||
| 878 | .long 663f | ||
| 879 | .word X86_FEATURE_XMM | ||
| 880 | .byte 662b-661b | ||
| 881 | .byte 664f-663f | ||
| 882 | .previous | 878 | .previous |
| 883 | .section .altinstr_replacement,"ax" | 879 | .section .altinstr_replacement,"ax" |
| 884 | 663: pushl $do_simd_coprocessor_error | 880 | 663: pushl $do_simd_coprocessor_error |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e13329d800c8..faf8d5e74b0b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -331,10 +331,15 @@ ENDPROC(native_usergs_sysret64) | |||
| 331 | 1: incl PER_CPU_VAR(irq_count) | 331 | 1: incl PER_CPU_VAR(irq_count) |
| 332 | jne 2f | 332 | jne 2f |
| 333 | mov PER_CPU_VAR(irq_stack_ptr),%rsp | 333 | mov PER_CPU_VAR(irq_stack_ptr),%rsp |
| 334 | EMPTY_FRAME 0 | 334 | CFI_DEF_CFA_REGISTER rsi |
| 335 | 335 | ||
| 336 | 2: /* Store previous stack value */ | 336 | 2: /* Store previous stack value */ |
| 337 | pushq %rsi | 337 | pushq %rsi |
| 338 | CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ | ||
| 339 | 0x77 /* DW_OP_breg7 */, 0, \ | ||
| 340 | 0x06 /* DW_OP_deref */, \ | ||
| 341 | 0x08 /* DW_OP_const1u */, SS+8-RBP, \ | ||
| 342 | 0x22 /* DW_OP_plus */ | ||
| 338 | /* We entered an interrupt context - irqs are off: */ | 343 | /* We entered an interrupt context - irqs are off: */ |
| 339 | TRACE_IRQS_OFF | 344 | TRACE_IRQS_OFF |
| 340 | .endm | 345 | .endm |
| @@ -788,7 +793,6 @@ END(interrupt) | |||
| 788 | subq $ORIG_RAX-RBP, %rsp | 793 | subq $ORIG_RAX-RBP, %rsp |
| 789 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP | 794 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP |
| 790 | SAVE_ARGS_IRQ | 795 | SAVE_ARGS_IRQ |
| 791 | PARTIAL_FRAME 0 | ||
| 792 | call \func | 796 | call \func |
| 793 | .endm | 797 | .endm |
| 794 | 798 | ||
| @@ -813,10 +817,10 @@ ret_from_intr: | |||
| 813 | 817 | ||
| 814 | /* Restore saved previous stack */ | 818 | /* Restore saved previous stack */ |
| 815 | popq %rsi | 819 | popq %rsi |
| 816 | leaq 16(%rsi), %rsp | 820 | CFI_DEF_CFA_REGISTER rsi |
| 817 | 821 | leaq ARGOFFSET-RBP(%rsi), %rsp | |
| 818 | CFI_DEF_CFA_REGISTER rsp | 822 | CFI_DEF_CFA_REGISTER rsp |
| 819 | CFI_ADJUST_CFA_OFFSET -16 | 823 | CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET |
| 820 | 824 | ||
| 821 | exit_intr: | 825 | exit_intr: |
| 822 | GET_THREAD_INFO(%rcx) | 826 | GET_THREAD_INFO(%rcx) |
| @@ -1111,7 +1115,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
| 1111 | zeroentry coprocessor_error do_coprocessor_error | 1115 | zeroentry coprocessor_error do_coprocessor_error |
| 1112 | errorentry alignment_check do_alignment_check | 1116 | errorentry alignment_check do_alignment_check |
| 1113 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1117 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
| 1114 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
| 1115 | 1118 | ||
| 1116 | 1119 | ||
| 1117 | /* Reload gs selector with exception handling */ | 1120 | /* Reload gs selector with exception handling */ |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 4aecc54236a9..b946a9eac7d9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #include <linux/clocksource.h> | 1 | #include <linux/clocksource.h> |
| 2 | #include <linux/clockchips.h> | 2 | #include <linux/clockchips.h> |
| 3 | #include <linux/interrupt.h> | 3 | #include <linux/interrupt.h> |
| 4 | #include <linux/export.h> | ||
| 4 | #include <linux/sysdev.h> | 5 | #include <linux/sysdev.h> |
| 5 | #include <linux/delay.h> | 6 | #include <linux/delay.h> |
| 6 | #include <linux/errno.h> | 7 | #include <linux/errno.h> |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 12aff2537682..739d8598f789 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -321,7 +321,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) | |||
| 321 | return tmp; | 321 | return tmp; |
| 322 | } | 322 | } |
| 323 | 323 | ||
| 324 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16); | 324 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) |
| 325 | #define FP_EXP_TAG_VALID 0 | 325 | #define FP_EXP_TAG_VALID 0 |
| 326 | #define FP_EXP_TAG_ZERO 1 | 326 | #define FP_EXP_TAG_ZERO 1 |
| 327 | #define FP_EXP_TAG_SPECIAL 2 | 327 | #define FP_EXP_TAG_SPECIAL 2 |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 65b8f5c2eebf..610485223bdb 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | #include <linux/io.h> | 14 | #include <linux/io.h> |
| 15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
| 16 | 16 | ||
| 17 | #include <asm/atomic.h> | 17 | #include <linux/atomic.h> |
| 18 | #include <asm/system.h> | 18 | #include <asm/system.h> |
| 19 | #include <asm/timer.h> | 19 | #include <asm/timer.h> |
| 20 | #include <asm/hw_irq.h> | 20 | #include <asm/hw_irq.h> |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6c0802eb2f7f..429e0c92924e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
| 10 | #include <linux/ftrace.h> | 10 | #include <linux/ftrace.h> |
| 11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
| 12 | #include <linux/export.h> | ||
| 12 | 13 | ||
| 13 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
| 14 | #include <asm/io_apic.h> | 15 | #include <asm/io_apic.h> |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f09d4bbe2d2d..b3300e6bacef 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | #include <linux/io.h> | 15 | #include <linux/io.h> |
| 16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
| 17 | 17 | ||
| 18 | #include <asm/atomic.h> | 18 | #include <linux/atomic.h> |
| 19 | #include <asm/system.h> | 19 | #include <asm/system.h> |
| 20 | #include <asm/timer.h> | 20 | #include <asm/timer.h> |
| 21 | #include <asm/hw_irq.h> | 21 | #include <asm/hw_irq.h> |
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 3fee346ef545..ea9d5f2f13ef 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c | |||
| @@ -24,8 +24,9 @@ union jump_code_union { | |||
| 24 | } __attribute__((packed)); | 24 | } __attribute__((packed)); |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | void arch_jump_label_transform(struct jump_entry *entry, | 27 | static void __jump_label_transform(struct jump_entry *entry, |
| 28 | enum jump_label_type type) | 28 | enum jump_label_type type, |
| 29 | void *(*poker)(void *, const void *, size_t)) | ||
| 29 | { | 30 | { |
| 30 | union jump_code_union code; | 31 | union jump_code_union code; |
| 31 | 32 | ||
| @@ -35,17 +36,24 @@ void arch_jump_label_transform(struct jump_entry *entry, | |||
| 35 | (entry->code + JUMP_LABEL_NOP_SIZE); | 36 | (entry->code + JUMP_LABEL_NOP_SIZE); |
| 36 | } else | 37 | } else |
| 37 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); | 38 | memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); |
| 39 | |||
| 40 | (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | ||
| 41 | } | ||
| 42 | |||
| 43 | void arch_jump_label_transform(struct jump_entry *entry, | ||
| 44 | enum jump_label_type type) | ||
| 45 | { | ||
| 38 | get_online_cpus(); | 46 | get_online_cpus(); |
| 39 | mutex_lock(&text_mutex); | 47 | mutex_lock(&text_mutex); |
| 40 | text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); | 48 | __jump_label_transform(entry, type, text_poke_smp); |
| 41 | mutex_unlock(&text_mutex); | 49 | mutex_unlock(&text_mutex); |
| 42 | put_online_cpus(); | 50 | put_online_cpus(); |
| 43 | } | 51 | } |
| 44 | 52 | ||
| 45 | void arch_jump_label_text_poke_early(jump_label_t addr) | 53 | void arch_jump_label_transform_static(struct jump_entry *entry, |
| 54 | enum jump_label_type type) | ||
| 46 | { | 55 | { |
| 47 | text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5], | 56 | __jump_label_transform(entry, type, text_poke_early); |
| 48 | JUMP_LABEL_NOP_SIZE); | ||
| 49 | } | 57 | } |
| 50 | 58 | ||
| 51 | #endif | 59 | #endif |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 00354d4919a9..faba5771acad 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
| @@ -511,28 +511,37 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
| 511 | 511 | ||
| 512 | static int was_in_debug_nmi[NR_CPUS]; | 512 | static int was_in_debug_nmi[NR_CPUS]; |
| 513 | 513 | ||
| 514 | static int __kgdb_notify(struct die_args *args, unsigned long cmd) | 514 | static int kgdb_nmi_handler(unsigned int cmd, struct pt_regs *regs) |
| 515 | { | 515 | { |
| 516 | struct pt_regs *regs = args->regs; | ||
| 517 | |||
| 518 | switch (cmd) { | 516 | switch (cmd) { |
| 519 | case DIE_NMI: | 517 | case NMI_LOCAL: |
| 520 | if (atomic_read(&kgdb_active) != -1) { | 518 | if (atomic_read(&kgdb_active) != -1) { |
| 521 | /* KGDB CPU roundup */ | 519 | /* KGDB CPU roundup */ |
| 522 | kgdb_nmicallback(raw_smp_processor_id(), regs); | 520 | kgdb_nmicallback(raw_smp_processor_id(), regs); |
| 523 | was_in_debug_nmi[raw_smp_processor_id()] = 1; | 521 | was_in_debug_nmi[raw_smp_processor_id()] = 1; |
| 524 | touch_nmi_watchdog(); | 522 | touch_nmi_watchdog(); |
| 525 | return NOTIFY_STOP; | 523 | return NMI_HANDLED; |
| 526 | } | 524 | } |
| 527 | return NOTIFY_DONE; | 525 | break; |
| 528 | 526 | ||
| 529 | case DIE_NMIUNKNOWN: | 527 | case NMI_UNKNOWN: |
| 530 | if (was_in_debug_nmi[raw_smp_processor_id()]) { | 528 | if (was_in_debug_nmi[raw_smp_processor_id()]) { |
| 531 | was_in_debug_nmi[raw_smp_processor_id()] = 0; | 529 | was_in_debug_nmi[raw_smp_processor_id()] = 0; |
| 532 | return NOTIFY_STOP; | 530 | return NMI_HANDLED; |
| 533 | } | 531 | } |
| 534 | return NOTIFY_DONE; | 532 | break; |
| 533 | default: | ||
| 534 | /* do nothing */ | ||
| 535 | break; | ||
| 536 | } | ||
| 537 | return NMI_DONE; | ||
| 538 | } | ||
| 539 | |||
| 540 | static int __kgdb_notify(struct die_args *args, unsigned long cmd) | ||
| 541 | { | ||
| 542 | struct pt_regs *regs = args->regs; | ||
| 535 | 543 | ||
| 544 | switch (cmd) { | ||
| 536 | case DIE_DEBUG: | 545 | case DIE_DEBUG: |
| 537 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | 546 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { |
| 538 | if (user_mode(regs)) | 547 | if (user_mode(regs)) |
| @@ -590,11 +599,6 @@ kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) | |||
| 590 | 599 | ||
| 591 | static struct notifier_block kgdb_notifier = { | 600 | static struct notifier_block kgdb_notifier = { |
| 592 | .notifier_call = kgdb_notify, | 601 | .notifier_call = kgdb_notify, |
| 593 | |||
| 594 | /* | ||
| 595 | * Lowest-prio notifier priority, we want to be notified last: | ||
| 596 | */ | ||
| 597 | .priority = NMI_LOCAL_LOW_PRIOR, | ||
| 598 | }; | 602 | }; |
| 599 | 603 | ||
| 600 | /** | 604 | /** |
| @@ -605,7 +609,31 @@ static struct notifier_block kgdb_notifier = { | |||
| 605 | */ | 609 | */ |
| 606 | int kgdb_arch_init(void) | 610 | int kgdb_arch_init(void) |
| 607 | { | 611 | { |
| 608 | return register_die_notifier(&kgdb_notifier); | 612 | int retval; |
| 613 | |||
| 614 | retval = register_die_notifier(&kgdb_notifier); | ||
| 615 | if (retval) | ||
| 616 | goto out; | ||
| 617 | |||
| 618 | retval = register_nmi_handler(NMI_LOCAL, kgdb_nmi_handler, | ||
| 619 | 0, "kgdb"); | ||
| 620 | if (retval) | ||
| 621 | goto out1; | ||
| 622 | |||
| 623 | retval = register_nmi_handler(NMI_UNKNOWN, kgdb_nmi_handler, | ||
| 624 | 0, "kgdb"); | ||
| 625 | |||
| 626 | if (retval) | ||
| 627 | goto out2; | ||
| 628 | |||
| 629 | return retval; | ||
| 630 | |||
| 631 | out2: | ||
| 632 | unregister_nmi_handler(NMI_LOCAL, "kgdb"); | ||
| 633 | out1: | ||
| 634 | unregister_die_notifier(&kgdb_notifier); | ||
| 635 | out: | ||
| 636 | return retval; | ||
| 609 | } | 637 | } |
| 610 | 638 | ||
| 611 | static void kgdb_hw_overflow_handler(struct perf_event *event, | 639 | static void kgdb_hw_overflow_handler(struct perf_event *event, |
| @@ -673,6 +701,8 @@ void kgdb_arch_exit(void) | |||
| 673 | breakinfo[i].pev = NULL; | 701 | breakinfo[i].pev = NULL; |
| 674 | } | 702 | } |
| 675 | } | 703 | } |
| 704 | unregister_nmi_handler(NMI_UNKNOWN, "kgdb"); | ||
| 705 | unregister_nmi_handler(NMI_LOCAL, "kgdb"); | ||
| 676 | unregister_die_notifier(&kgdb_notifier); | 706 | unregister_die_notifier(&kgdb_notifier); |
| 677 | } | 707 | } |
| 678 | 708 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index f1a6244d7d93..7da647d8b64c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -75,8 +75,11 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | |||
| 75 | /* | 75 | /* |
| 76 | * Undefined/reserved opcodes, conditional jump, Opcode Extension | 76 | * Undefined/reserved opcodes, conditional jump, Opcode Extension |
| 77 | * Groups, and some special opcodes can not boost. | 77 | * Groups, and some special opcodes can not boost. |
| 78 | * This is non-const and volatile to keep gcc from statically | ||
| 79 | * optimizing it out, as variable_test_bit makes gcc think only | ||
| 80 | * *(unsigned long*) is used. | ||
| 78 | */ | 81 | */ |
| 79 | static const u32 twobyte_is_boostable[256 / 32] = { | 82 | static volatile u32 twobyte_is_boostable[256 / 32] = { |
| 80 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 83 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
| 81 | /* ---------------------------------------------- */ | 84 | /* ---------------------------------------------- */ |
| 82 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ | 85 | W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index c1a0188e29ae..44842d756b29 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -74,9 +74,10 @@ static cycle_t kvm_clock_read(void) | |||
| 74 | struct pvclock_vcpu_time_info *src; | 74 | struct pvclock_vcpu_time_info *src; |
| 75 | cycle_t ret; | 75 | cycle_t ret; |
| 76 | 76 | ||
| 77 | src = &get_cpu_var(hv_clock); | 77 | preempt_disable_notrace(); |
| 78 | src = &__get_cpu_var(hv_clock); | ||
| 78 | ret = pvclock_clocksource_read(src); | 79 | ret = pvclock_clocksource_read(src); |
| 79 | put_cpu_var(hv_clock); | 80 | preempt_enable_notrace(); |
| 80 | return ret; | 81 | return ret; |
| 81 | } | 82 | } |
| 82 | 83 | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 591be0ee1934..d494799aafcd 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
| @@ -74,14 +74,13 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
| 74 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 74 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
| 75 | { | 75 | { |
| 76 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 76 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 77 | u32 dummy; | ||
| 78 | 77 | ||
| 79 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 78 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
| 80 | pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); | 79 | pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); |
| 81 | return -1; | 80 | return -1; |
| 82 | } | 81 | } |
| 83 | 82 | ||
| 84 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 83 | csig->rev = c->microcode; |
| 85 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); | 84 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); |
| 86 | 85 | ||
| 87 | return 0; | 86 | return 0; |
| @@ -130,6 +129,7 @@ static int apply_microcode_amd(int cpu) | |||
| 130 | int cpu_num = raw_smp_processor_id(); | 129 | int cpu_num = raw_smp_processor_id(); |
| 131 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 130 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
| 132 | struct microcode_amd *mc_amd = uci->mc; | 131 | struct microcode_amd *mc_amd = uci->mc; |
| 132 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 133 | 133 | ||
| 134 | /* We should bind the task to the CPU */ | 134 | /* We should bind the task to the CPU */ |
| 135 | BUG_ON(cpu_num != cpu); | 135 | BUG_ON(cpu_num != cpu); |
| @@ -150,6 +150,7 @@ static int apply_microcode_amd(int cpu) | |||
| 150 | 150 | ||
| 151 | pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); | 151 | pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); |
| 152 | uci->cpu_sig.rev = rev; | 152 | uci->cpu_sig.rev = rev; |
| 153 | c->microcode = rev; | ||
| 153 | 154 | ||
| 154 | return 0; | 155 | return 0; |
| 155 | } | 156 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index f9242800bc84..f2d2a664e797 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
| @@ -483,7 +483,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
| 483 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 483 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
| 484 | pr_debug("CPU%d removed\n", cpu); | 484 | pr_debug("CPU%d removed\n", cpu); |
| 485 | break; | 485 | break; |
| 486 | case CPU_DEAD: | 486 | |
| 487 | /* | ||
| 488 | * When a CPU goes offline, don't free up or invalidate the copy of | ||
| 489 | * the microcode in kernel memory, so that we can reuse it when the | ||
| 490 | * CPU comes back online without unnecessarily requesting the userspace | ||
| 491 | * for it again. | ||
| 492 | */ | ||
| 487 | case CPU_UP_CANCELED_FROZEN: | 493 | case CPU_UP_CANCELED_FROZEN: |
| 488 | /* The CPU refused to come up during a system resume */ | 494 | /* The CPU refused to come up during a system resume */ |
| 489 | microcode_fini_cpu(cpu); | 495 | microcode_fini_cpu(cpu); |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 1a1b606d3e92..3ca42d0e43a2 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
| @@ -161,12 +161,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
| 161 | csig->pf = 1 << ((val[1] >> 18) & 7); | 161 | csig->pf = 1 << ((val[1] >> 18) & 7); |
| 162 | } | 162 | } |
| 163 | 163 | ||
| 164 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 164 | csig->rev = c->microcode; |
| 165 | /* see notes above for revision 1.07. Apparent chip bug */ | ||
| 166 | sync_core(); | ||
| 167 | /* get the current revision from MSR 0x8B */ | ||
| 168 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | ||
| 169 | |||
| 170 | pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", | 165 | pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", |
| 171 | cpu_num, csig->sig, csig->pf, csig->rev); | 166 | cpu_num, csig->sig, csig->pf, csig->rev); |
| 172 | 167 | ||
| @@ -299,9 +294,9 @@ static int apply_microcode(int cpu) | |||
| 299 | struct microcode_intel *mc_intel; | 294 | struct microcode_intel *mc_intel; |
| 300 | struct ucode_cpu_info *uci; | 295 | struct ucode_cpu_info *uci; |
| 301 | unsigned int val[2]; | 296 | unsigned int val[2]; |
| 302 | int cpu_num; | 297 | int cpu_num = raw_smp_processor_id(); |
| 298 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | ||
| 303 | 299 | ||
| 304 | cpu_num = raw_smp_processor_id(); | ||
| 305 | uci = ucode_cpu_info + cpu; | 300 | uci = ucode_cpu_info + cpu; |
| 306 | mc_intel = uci->mc; | 301 | mc_intel = uci->mc; |
| 307 | 302 | ||
| @@ -317,7 +312,7 @@ static int apply_microcode(int cpu) | |||
| 317 | (unsigned long) mc_intel->bits >> 16 >> 16); | 312 | (unsigned long) mc_intel->bits >> 16 >> 16); |
| 318 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 313 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); |
| 319 | 314 | ||
| 320 | /* see notes above for revision 1.07. Apparent chip bug */ | 315 | /* As documented in the SDM: Do a CPUID 1 here */ |
| 321 | sync_core(); | 316 | sync_core(); |
| 322 | 317 | ||
| 323 | /* get the current revision from MSR 0x8B */ | 318 | /* get the current revision from MSR 0x8B */ |
| @@ -335,6 +330,7 @@ static int apply_microcode(int cpu) | |||
| 335 | (mc_intel->hdr.date >> 16) & 0xff); | 330 | (mc_intel->hdr.date >> 16) & 0xff); |
| 336 | 331 | ||
| 337 | uci->cpu_sig.rev = val[1]; | 332 | uci->cpu_sig.rev = val[1]; |
| 333 | c->microcode = val[1]; | ||
| 338 | 334 | ||
| 339 | return 0; | 335 | return 0; |
| 340 | } | 336 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c new file mode 100644 index 000000000000..e88f37b58ddd --- /dev/null +++ b/arch/x86/kernel/nmi.c | |||
| @@ -0,0 +1,435 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
| 3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
| 4 | * Copyright (C) 2011 Don Zickus Red Hat, Inc. | ||
| 5 | * | ||
| 6 | * Pentium III FXSR, SSE support | ||
| 7 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 8 | */ | ||
| 9 | |||
| 10 | /* | ||
| 11 | * Handle hardware traps and faults. | ||
| 12 | */ | ||
| 13 | #include <linux/spinlock.h> | ||
| 14 | #include <linux/kprobes.h> | ||
| 15 | #include <linux/kdebug.h> | ||
| 16 | #include <linux/nmi.h> | ||
| 17 | #include <linux/delay.h> | ||
| 18 | #include <linux/hardirq.h> | ||
| 19 | #include <linux/slab.h> | ||
| 20 | #include <linux/export.h> | ||
| 21 | |||
| 22 | #include <linux/mca.h> | ||
| 23 | |||
| 24 | #if defined(CONFIG_EDAC) | ||
| 25 | #include <linux/edac.h> | ||
| 26 | #endif | ||
| 27 | |||
| 28 | #include <linux/atomic.h> | ||
| 29 | #include <asm/traps.h> | ||
| 30 | #include <asm/mach_traps.h> | ||
| 31 | #include <asm/nmi.h> | ||
| 32 | #include <asm/x86_init.h> | ||
| 33 | |||
| 34 | #define NMI_MAX_NAMELEN 16 | ||
| 35 | struct nmiaction { | ||
| 36 | struct list_head list; | ||
| 37 | nmi_handler_t handler; | ||
| 38 | unsigned int flags; | ||
| 39 | char *name; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct nmi_desc { | ||
| 43 | spinlock_t lock; | ||
| 44 | struct list_head head; | ||
| 45 | }; | ||
| 46 | |||
| 47 | static struct nmi_desc nmi_desc[NMI_MAX] = | ||
| 48 | { | ||
| 49 | { | ||
| 50 | .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock), | ||
| 51 | .head = LIST_HEAD_INIT(nmi_desc[0].head), | ||
| 52 | }, | ||
| 53 | { | ||
| 54 | .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock), | ||
| 55 | .head = LIST_HEAD_INIT(nmi_desc[1].head), | ||
| 56 | }, | ||
| 57 | |||
| 58 | }; | ||
| 59 | |||
| 60 | struct nmi_stats { | ||
| 61 | unsigned int normal; | ||
| 62 | unsigned int unknown; | ||
| 63 | unsigned int external; | ||
| 64 | unsigned int swallow; | ||
| 65 | }; | ||
| 66 | |||
| 67 | static DEFINE_PER_CPU(struct nmi_stats, nmi_stats); | ||
| 68 | |||
| 69 | static int ignore_nmis; | ||
| 70 | |||
| 71 | int unknown_nmi_panic; | ||
| 72 | /* | ||
| 73 | * Prevent NMI reason port (0x61) being accessed simultaneously, can | ||
| 74 | * only be used in NMI handler. | ||
| 75 | */ | ||
| 76 | static DEFINE_RAW_SPINLOCK(nmi_reason_lock); | ||
| 77 | |||
| 78 | static int __init setup_unknown_nmi_panic(char *str) | ||
| 79 | { | ||
| 80 | unknown_nmi_panic = 1; | ||
| 81 | return 1; | ||
| 82 | } | ||
| 83 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
| 84 | |||
| 85 | #define nmi_to_desc(type) (&nmi_desc[type]) | ||
| 86 | |||
| 87 | static int notrace __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | ||
| 88 | { | ||
| 89 | struct nmi_desc *desc = nmi_to_desc(type); | ||
| 90 | struct nmiaction *a; | ||
| 91 | int handled=0; | ||
| 92 | |||
| 93 | rcu_read_lock(); | ||
| 94 | |||
| 95 | /* | ||
| 96 | * NMIs are edge-triggered, which means if you have enough | ||
| 97 | * of them concurrently, you can lose some because only one | ||
| 98 | * can be latched at any given time. Walk the whole list | ||
| 99 | * to handle those situations. | ||
| 100 | */ | ||
| 101 | list_for_each_entry_rcu(a, &desc->head, list) | ||
| 102 | handled += a->handler(type, regs); | ||
| 103 | |||
| 104 | rcu_read_unlock(); | ||
| 105 | |||
| 106 | /* return total number of NMI events handled */ | ||
| 107 | return handled; | ||
| 108 | } | ||
| 109 | |||
| 110 | static int __setup_nmi(unsigned int type, struct nmiaction *action) | ||
| 111 | { | ||
| 112 | struct nmi_desc *desc = nmi_to_desc(type); | ||
| 113 | unsigned long flags; | ||
| 114 | |||
| 115 | spin_lock_irqsave(&desc->lock, flags); | ||
| 116 | |||
| 117 | /* | ||
| 118 | * most handlers of type NMI_UNKNOWN never return because | ||
| 119 | * they just assume the NMI is theirs. Just a sanity check | ||
| 120 | * to manage expectations | ||
| 121 | */ | ||
| 122 | WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); | ||
| 123 | |||
| 124 | /* | ||
| 125 | * some handlers need to be executed first otherwise a fake | ||
| 126 | * event confuses some handlers (kdump uses this flag) | ||
| 127 | */ | ||
| 128 | if (action->flags & NMI_FLAG_FIRST) | ||
| 129 | list_add_rcu(&action->list, &desc->head); | ||
| 130 | else | ||
| 131 | list_add_tail_rcu(&action->list, &desc->head); | ||
| 132 | |||
| 133 | spin_unlock_irqrestore(&desc->lock, flags); | ||
| 134 | return 0; | ||
| 135 | } | ||
| 136 | |||
| 137 | static struct nmiaction *__free_nmi(unsigned int type, const char *name) | ||
| 138 | { | ||
| 139 | struct nmi_desc *desc = nmi_to_desc(type); | ||
| 140 | struct nmiaction *n; | ||
| 141 | unsigned long flags; | ||
| 142 | |||
| 143 | spin_lock_irqsave(&desc->lock, flags); | ||
| 144 | |||
| 145 | list_for_each_entry_rcu(n, &desc->head, list) { | ||
| 146 | /* | ||
| 147 | * the name passed in to describe the nmi handler | ||
| 148 | * is used as the lookup key | ||
| 149 | */ | ||
| 150 | if (!strcmp(n->name, name)) { | ||
| 151 | WARN(in_nmi(), | ||
| 152 | "Trying to free NMI (%s) from NMI context!\n", n->name); | ||
| 153 | list_del_rcu(&n->list); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | spin_unlock_irqrestore(&desc->lock, flags); | ||
| 159 | synchronize_rcu(); | ||
| 160 | return (n); | ||
| 161 | } | ||
| 162 | |||
| 163 | int register_nmi_handler(unsigned int type, nmi_handler_t handler, | ||
| 164 | unsigned long nmiflags, const char *devname) | ||
| 165 | { | ||
| 166 | struct nmiaction *action; | ||
| 167 | int retval = -ENOMEM; | ||
| 168 | |||
| 169 | if (!handler) | ||
| 170 | return -EINVAL; | ||
| 171 | |||
| 172 | action = kzalloc(sizeof(struct nmiaction), GFP_KERNEL); | ||
| 173 | if (!action) | ||
| 174 | goto fail_action; | ||
| 175 | |||
| 176 | action->handler = handler; | ||
| 177 | action->flags = nmiflags; | ||
| 178 | action->name = kstrndup(devname, NMI_MAX_NAMELEN, GFP_KERNEL); | ||
| 179 | if (!action->name) | ||
| 180 | goto fail_action_name; | ||
| 181 | |||
| 182 | retval = __setup_nmi(type, action); | ||
| 183 | |||
| 184 | if (retval) | ||
| 185 | goto fail_setup_nmi; | ||
| 186 | |||
| 187 | return retval; | ||
| 188 | |||
| 189 | fail_setup_nmi: | ||
| 190 | kfree(action->name); | ||
| 191 | fail_action_name: | ||
| 192 | kfree(action); | ||
| 193 | fail_action: | ||
| 194 | |||
| 195 | return retval; | ||
| 196 | } | ||
| 197 | EXPORT_SYMBOL_GPL(register_nmi_handler); | ||
| 198 | |||
| 199 | void unregister_nmi_handler(unsigned int type, const char *name) | ||
| 200 | { | ||
| 201 | struct nmiaction *a; | ||
| 202 | |||
| 203 | a = __free_nmi(type, name); | ||
| 204 | if (a) { | ||
| 205 | kfree(a->name); | ||
| 206 | kfree(a); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | |||
| 210 | EXPORT_SYMBOL_GPL(unregister_nmi_handler); | ||
| 211 | |||
| 212 | static notrace __kprobes void | ||
| 213 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | ||
| 214 | { | ||
| 215 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", | ||
| 216 | reason, smp_processor_id()); | ||
| 217 | |||
| 218 | /* | ||
| 219 | * On some machines, PCI SERR line is used to report memory | ||
| 220 | * errors. EDAC makes use of it. | ||
| 221 | */ | ||
| 222 | #if defined(CONFIG_EDAC) | ||
| 223 | if (edac_handler_set()) { | ||
| 224 | edac_atomic_assert_error(); | ||
| 225 | return; | ||
| 226 | } | ||
| 227 | #endif | ||
| 228 | |||
| 229 | if (panic_on_unrecovered_nmi) | ||
| 230 | panic("NMI: Not continuing"); | ||
| 231 | |||
| 232 | pr_emerg("Dazed and confused, but trying to continue\n"); | ||
| 233 | |||
| 234 | /* Clear and disable the PCI SERR error line. */ | ||
| 235 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; | ||
| 236 | outb(reason, NMI_REASON_PORT); | ||
| 237 | } | ||
| 238 | |||
| 239 | static notrace __kprobes void | ||
| 240 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
| 241 | { | ||
| 242 | unsigned long i; | ||
| 243 | |||
| 244 | pr_emerg( | ||
| 245 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | ||
| 246 | reason, smp_processor_id()); | ||
| 247 | show_registers(regs); | ||
| 248 | |||
| 249 | if (panic_on_io_nmi) | ||
| 250 | panic("NMI IOCK error: Not continuing"); | ||
| 251 | |||
| 252 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
| 253 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | ||
| 254 | outb(reason, NMI_REASON_PORT); | ||
| 255 | |||
| 256 | i = 20000; | ||
| 257 | while (--i) { | ||
| 258 | touch_nmi_watchdog(); | ||
| 259 | udelay(100); | ||
| 260 | } | ||
| 261 | |||
| 262 | reason &= ~NMI_REASON_CLEAR_IOCHK; | ||
| 263 | outb(reason, NMI_REASON_PORT); | ||
| 264 | } | ||
| 265 | |||
| 266 | static notrace __kprobes void | ||
| 267 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | ||
| 268 | { | ||
| 269 | int handled; | ||
| 270 | |||
| 271 | /* | ||
| 272 | * Use 'false' as back-to-back NMIs are dealt with one level up. | ||
| 273 | * Of course this makes having multiple 'unknown' handlers useless | ||
| 274 | * as only the first one is ever run (unless it can actually determine | ||
| 275 | * if it caused the NMI) | ||
| 276 | */ | ||
| 277 | handled = nmi_handle(NMI_UNKNOWN, regs, false); | ||
| 278 | if (handled) { | ||
| 279 | __this_cpu_add(nmi_stats.unknown, handled); | ||
| 280 | return; | ||
| 281 | } | ||
| 282 | |||
| 283 | __this_cpu_add(nmi_stats.unknown, 1); | ||
| 284 | |||
| 285 | #ifdef CONFIG_MCA | ||
| 286 | /* | ||
| 287 | * Might actually be able to figure out what the guilty party | ||
| 288 | * is: | ||
| 289 | */ | ||
| 290 | if (MCA_bus) { | ||
| 291 | mca_handle_nmi(); | ||
| 292 | return; | ||
| 293 | } | ||
| 294 | #endif | ||
| 295 | pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
| 296 | reason, smp_processor_id()); | ||
| 297 | |||
| 298 | pr_emerg("Do you have a strange power saving mode enabled?\n"); | ||
| 299 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) | ||
| 300 | panic("NMI: Not continuing"); | ||
| 301 | |||
| 302 | pr_emerg("Dazed and confused, but trying to continue\n"); | ||
| 303 | } | ||
| 304 | |||
| 305 | static DEFINE_PER_CPU(bool, swallow_nmi); | ||
| 306 | static DEFINE_PER_CPU(unsigned long, last_nmi_rip); | ||
| 307 | |||
| 308 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
| 309 | { | ||
| 310 | unsigned char reason = 0; | ||
| 311 | int handled; | ||
| 312 | bool b2b = false; | ||
| 313 | |||
| 314 | /* | ||
| 315 | * CPU-specific NMI must be processed before non-CPU-specific | ||
| 316 | * NMI, otherwise we may lose it, because the CPU-specific | ||
| 317 | * NMI can not be detected/processed on other CPUs. | ||
| 318 | */ | ||
| 319 | |||
| 320 | /* | ||
| 321 | * Back-to-back NMIs are interesting because they can either | ||
| 322 | * be two NMI or more than two NMIs (any thing over two is dropped | ||
| 323 | * due to NMI being edge-triggered). If this is the second half | ||
| 324 | * of the back-to-back NMI, assume we dropped things and process | ||
| 325 | * more handlers. Otherwise reset the 'swallow' NMI behaviour | ||
| 326 | */ | ||
| 327 | if (regs->ip == __this_cpu_read(last_nmi_rip)) | ||
| 328 | b2b = true; | ||
| 329 | else | ||
| 330 | __this_cpu_write(swallow_nmi, false); | ||
| 331 | |||
| 332 | __this_cpu_write(last_nmi_rip, regs->ip); | ||
| 333 | |||
| 334 | handled = nmi_handle(NMI_LOCAL, regs, b2b); | ||
| 335 | __this_cpu_add(nmi_stats.normal, handled); | ||
| 336 | if (handled) { | ||
| 337 | /* | ||
| 338 | * There are cases when a NMI handler handles multiple | ||
| 339 | * events in the current NMI. One of these events may | ||
| 340 | * be queued for in the next NMI. Because the event is | ||
| 341 | * already handled, the next NMI will result in an unknown | ||
| 342 | * NMI. Instead lets flag this for a potential NMI to | ||
| 343 | * swallow. | ||
| 344 | */ | ||
| 345 | if (handled > 1) | ||
| 346 | __this_cpu_write(swallow_nmi, true); | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | |||
| 350 | /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ | ||
| 351 | raw_spin_lock(&nmi_reason_lock); | ||
| 352 | reason = x86_platform.get_nmi_reason(); | ||
| 353 | |||
| 354 | if (reason & NMI_REASON_MASK) { | ||
| 355 | if (reason & NMI_REASON_SERR) | ||
| 356 | pci_serr_error(reason, regs); | ||
| 357 | else if (reason & NMI_REASON_IOCHK) | ||
| 358 | io_check_error(reason, regs); | ||
| 359 | #ifdef CONFIG_X86_32 | ||
| 360 | /* | ||
| 361 | * Reassert NMI in case it became active | ||
| 362 | * meanwhile as it's edge-triggered: | ||
| 363 | */ | ||
| 364 | reassert_nmi(); | ||
| 365 | #endif | ||
| 366 | __this_cpu_add(nmi_stats.external, 1); | ||
| 367 | raw_spin_unlock(&nmi_reason_lock); | ||
| 368 | return; | ||
| 369 | } | ||
| 370 | raw_spin_unlock(&nmi_reason_lock); | ||
| 371 | |||
| 372 | /* | ||
| 373 | * Only one NMI can be latched at a time. To handle | ||
| 374 | * this we may process multiple nmi handlers at once to | ||
| 375 | * cover the case where an NMI is dropped. The downside | ||
| 376 | * to this approach is we may process an NMI prematurely, | ||
| 377 | * while its real NMI is sitting latched. This will cause | ||
| 378 | * an unknown NMI on the next run of the NMI processing. | ||
| 379 | * | ||
| 380 | * We tried to flag that condition above, by setting the | ||
| 381 | * swallow_nmi flag when we process more than one event. | ||
| 382 | * This condition is also only present on the second half | ||
| 383 | * of a back-to-back NMI, so we flag that condition too. | ||
| 384 | * | ||
| 385 | * If both are true, we assume we already processed this | ||
| 386 | * NMI previously and we swallow it. Otherwise we reset | ||
| 387 | * the logic. | ||
| 388 | * | ||
| 389 | * There are scenarios where we may accidentally swallow | ||
| 390 | * a 'real' unknown NMI. For example, while processing | ||
| 391 | * a perf NMI another perf NMI comes in along with a | ||
| 392 | * 'real' unknown NMI. These two NMIs get combined into | ||
| 393 | * one (as descibed above). When the next NMI gets | ||
| 394 | * processed, it will be flagged by perf as handled, but | ||
| 395 | * noone will know that there was a 'real' unknown NMI sent | ||
| 396 | * also. As a result it gets swallowed. Or if the first | ||
| 397 | * perf NMI returns two events handled then the second | ||
| 398 | * NMI will get eaten by the logic below, again losing a | ||
| 399 | * 'real' unknown NMI. But this is the best we can do | ||
| 400 | * for now. | ||
| 401 | */ | ||
| 402 | if (b2b && __this_cpu_read(swallow_nmi)) | ||
| 403 | __this_cpu_add(nmi_stats.swallow, 1); | ||
| 404 | else | ||
| 405 | unknown_nmi_error(reason, regs); | ||
| 406 | } | ||
| 407 | |||
| 408 | dotraplinkage notrace __kprobes void | ||
| 409 | do_nmi(struct pt_regs *regs, long error_code) | ||
| 410 | { | ||
| 411 | nmi_enter(); | ||
| 412 | |||
| 413 | inc_irq_stat(__nmi_count); | ||
| 414 | |||
| 415 | if (!ignore_nmis) | ||
| 416 | default_do_nmi(regs); | ||
| 417 | |||
| 418 | nmi_exit(); | ||
| 419 | } | ||
| 420 | |||
| 421 | void stop_nmi(void) | ||
| 422 | { | ||
| 423 | ignore_nmis++; | ||
| 424 | } | ||
| 425 | |||
| 426 | void restart_nmi(void) | ||
| 427 | { | ||
| 428 | ignore_nmis--; | ||
| 429 | } | ||
| 430 | |||
| 431 | /* reset the back-to-back NMI logic */ | ||
| 432 | void local_touch_nmi(void) | ||
| 433 | { | ||
| 434 | __this_cpu_write(last_nmi_rip, 0); | ||
| 435 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 613a7931ecc1..d90272e6bc40 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
| @@ -307,6 +307,10 @@ struct pv_info pv_info = { | |||
| 307 | .paravirt_enabled = 0, | 307 | .paravirt_enabled = 0, |
| 308 | .kernel_rpl = 0, | 308 | .kernel_rpl = 0, |
| 309 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | 309 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ |
| 310 | |||
| 311 | #ifdef CONFIG_X86_64 | ||
| 312 | .extra_user_64bit_cs = __USER_CS, | ||
| 313 | #endif | ||
| 310 | }; | 314 | }; |
| 311 | 315 | ||
| 312 | struct pv_init_ops pv_init_ops = { | 316 | struct pv_init_ops pv_init_ops = { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e8c33a302006..726494b58345 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -1553,7 +1553,7 @@ static void __init calgary_fixup_one_tce_space(struct pci_dev *dev) | |||
| 1553 | continue; | 1553 | continue; |
| 1554 | 1554 | ||
| 1555 | /* cover the whole region */ | 1555 | /* cover the whole region */ |
| 1556 | npages = (r->end - r->start) >> PAGE_SHIFT; | 1556 | npages = resource_size(r) >> PAGE_SHIFT; |
| 1557 | npages++; | 1557 | npages++; |
| 1558 | 1558 | ||
| 1559 | iommu_range_reserve(tbl, r->start, npages); | 1559 | iommu_range_reserve(tbl, r->start, npages); |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b49d00da2aed..80dc793b3f63 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #include <linux/dma-mapping.h> | 1 | #include <linux/dma-mapping.h> |
| 2 | #include <linux/dma-debug.h> | 2 | #include <linux/dma-debug.h> |
| 3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
| 4 | #include <linux/export.h> | ||
| 4 | #include <linux/bootmem.h> | 5 | #include <linux/bootmem.h> |
| 5 | #include <linux/gfp.h> | 6 | #include <linux/gfp.h> |
| 6 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
| @@ -117,8 +118,8 @@ again: | |||
| 117 | } | 118 | } |
| 118 | 119 | ||
| 119 | /* | 120 | /* |
| 120 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter | 121 | * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel |
| 121 | * documentation. | 122 | * parameter documentation. |
| 122 | */ | 123 | */ |
| 123 | static __init int iommu_setup(char *p) | 124 | static __init int iommu_setup(char *p) |
| 124 | { | 125 | { |
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ba0a4cce53be..34e06e84ce31 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c | |||
| @@ -10,9 +10,9 @@ | |||
| 10 | #include <linux/dmi.h> | 10 | #include <linux/dmi.h> |
| 11 | #include <linux/pfn.h> | 11 | #include <linux/pfn.h> |
| 12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
| 13 | #include <asm/pci-direct.h> | 13 | #include <linux/export.h> |
| 14 | |||
| 15 | 14 | ||
| 15 | #include <asm/pci-direct.h> | ||
| 16 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
| 17 | #include <asm/mmzone.h> | 17 | #include <asm/mmzone.h> |
| 18 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
| @@ -234,7 +234,7 @@ void __init probe_roms(void) | |||
| 234 | /* check for extension rom (ignore length byte!) */ | 234 | /* check for extension rom (ignore length byte!) */ |
| 235 | rom = isa_bus_to_virt(extension_rom_resource.start); | 235 | rom = isa_bus_to_virt(extension_rom_resource.start); |
| 236 | if (romsignature(rom)) { | 236 | if (romsignature(rom)) { |
| 237 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | 237 | length = resource_size(&extension_rom_resource); |
| 238 | if (romchecksum(rom, length)) { | 238 | if (romchecksum(rom, length)) { |
| 239 | request_resource(&iomem_resource, &extension_rom_resource); | 239 | request_resource(&iomem_resource, &extension_rom_resource); |
| 240 | upper = extension_rom_resource.start; | 240 | upper = extension_rom_resource.start; |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e1ba8cb24e4e..b9b3b1a51643 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -49,7 +49,7 @@ void free_thread_xstate(struct task_struct *tsk) | |||
| 49 | void free_thread_info(struct thread_info *ti) | 49 | void free_thread_info(struct thread_info *ti) |
| 50 | { | 50 | { |
| 51 | free_thread_xstate(ti->task); | 51 | free_thread_xstate(ti->task); |
| 52 | free_pages((unsigned long)ti, get_order(THREAD_SIZE)); | 52 | free_pages((unsigned long)ti, THREAD_ORDER); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void arch_task_cache_init(void) | 55 | void arch_task_cache_init(void) |
| @@ -438,29 +438,6 @@ void cpu_idle_wait(void) | |||
| 438 | } | 438 | } |
| 439 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | 439 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
| 440 | 440 | ||
| 441 | /* | ||
| 442 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
| 443 | * which can obviate IPI to trigger checking of need_resched. | ||
| 444 | * We execute MONITOR against need_resched and enter optimized wait state | ||
| 445 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
| 446 | * up from MWAIT (without an IPI). | ||
| 447 | * | ||
| 448 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
| 449 | * capability. | ||
| 450 | */ | ||
| 451 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | ||
| 452 | { | ||
| 453 | if (!need_resched()) { | ||
| 454 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | ||
| 455 | clflush((void *)¤t_thread_info()->flags); | ||
| 456 | |||
| 457 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
| 458 | smp_mb(); | ||
| 459 | if (!need_resched()) | ||
| 460 | __mwait(ax, cx); | ||
| 461 | } | ||
| 462 | } | ||
| 463 | |||
| 464 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 441 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
| 465 | static void mwait_idle(void) | 442 | static void mwait_idle(void) |
| 466 | { | 443 | { |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a3d0dc59067b..795b79f984c2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/uaccess.h> | 38 | #include <linux/uaccess.h> |
| 39 | #include <linux/io.h> | 39 | #include <linux/io.h> |
| 40 | #include <linux/kdebug.h> | 40 | #include <linux/kdebug.h> |
| 41 | #include <linux/cpuidle.h> | ||
| 41 | 42 | ||
| 42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
| 43 | #include <asm/system.h> | 44 | #include <asm/system.h> |
| @@ -56,6 +57,7 @@ | |||
| 56 | #include <asm/idle.h> | 57 | #include <asm/idle.h> |
| 57 | #include <asm/syscalls.h> | 58 | #include <asm/syscalls.h> |
| 58 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
| 60 | #include <asm/nmi.h> | ||
| 59 | 61 | ||
| 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
| 61 | 63 | ||
| @@ -106,10 +108,12 @@ void cpu_idle(void) | |||
| 106 | if (cpu_is_offline(cpu)) | 108 | if (cpu_is_offline(cpu)) |
| 107 | play_dead(); | 109 | play_dead(); |
| 108 | 110 | ||
| 111 | local_touch_nmi(); | ||
| 109 | local_irq_disable(); | 112 | local_irq_disable(); |
| 110 | /* Don't trace irqs off for idle */ | 113 | /* Don't trace irqs off for idle */ |
| 111 | stop_critical_timings(); | 114 | stop_critical_timings(); |
| 112 | pm_idle(); | 115 | if (cpuidle_idle_call()) |
| 116 | pm_idle(); | ||
| 113 | start_critical_timings(); | 117 | start_critical_timings(); |
| 114 | } | 118 | } |
| 115 | tick_nohz_restart_sched_tick(); | 119 | tick_nohz_restart_sched_tick(); |
| @@ -260,7 +264,7 @@ EXPORT_SYMBOL_GPL(start_thread); | |||
| 260 | 264 | ||
| 261 | 265 | ||
| 262 | /* | 266 | /* |
| 263 | * switch_to(x,yn) should switch tasks from x to y. | 267 | * switch_to(x,y) should switch tasks from x to y. |
| 264 | * | 268 | * |
| 265 | * We fsave/fwait so that an exception goes off at the right time | 269 | * We fsave/fwait so that an exception goes off at the right time |
| 266 | * (as a call from the fsave or fwait in effect) rather than to | 270 | * (as a call from the fsave or fwait in effect) rather than to |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ca6f7ab8df33..3bd7e6eebf31 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
| 38 | #include <linux/io.h> | 38 | #include <linux/io.h> |
| 39 | #include <linux/ftrace.h> | 39 | #include <linux/ftrace.h> |
| 40 | #include <linux/cpuidle.h> | ||
| 40 | 41 | ||
| 41 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
| 42 | #include <asm/system.h> | 43 | #include <asm/system.h> |
| @@ -50,6 +51,7 @@ | |||
| 50 | #include <asm/idle.h> | 51 | #include <asm/idle.h> |
| 51 | #include <asm/syscalls.h> | 52 | #include <asm/syscalls.h> |
| 52 | #include <asm/debugreg.h> | 53 | #include <asm/debugreg.h> |
| 54 | #include <asm/nmi.h> | ||
| 53 | 55 | ||
| 54 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
| 55 | 57 | ||
| @@ -132,11 +134,13 @@ void cpu_idle(void) | |||
| 132 | * from here on, until they go to idle. | 134 | * from here on, until they go to idle. |
| 133 | * Otherwise, idle callbacks can misfire. | 135 | * Otherwise, idle callbacks can misfire. |
| 134 | */ | 136 | */ |
| 137 | local_touch_nmi(); | ||
| 135 | local_irq_disable(); | 138 | local_irq_disable(); |
| 136 | enter_idle(); | 139 | enter_idle(); |
| 137 | /* Don't trace irqs off for idle */ | 140 | /* Don't trace irqs off for idle */ |
| 138 | stop_critical_timings(); | 141 | stop_critical_timings(); |
| 139 | pm_idle(); | 142 | if (cpuidle_idle_call()) |
| 143 | pm_idle(); | ||
| 140 | start_critical_timings(); | 144 | start_critical_timings(); |
| 141 | 145 | ||
| 142 | /* In many cases the interrupt that ended idle | 146 | /* In many cases the interrupt that ended idle |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9242436e9937..e334be1182b9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -464,7 +464,7 @@ static inline void kb_wait(void) | |||
| 464 | } | 464 | } |
| 465 | } | 465 | } |
| 466 | 466 | ||
| 467 | static void vmxoff_nmi(int cpu, struct die_args *args) | 467 | static void vmxoff_nmi(int cpu, struct pt_regs *regs) |
| 468 | { | 468 | { |
| 469 | cpu_emergency_vmxoff(); | 469 | cpu_emergency_vmxoff(); |
| 470 | } | 470 | } |
| @@ -736,14 +736,10 @@ static nmi_shootdown_cb shootdown_callback; | |||
| 736 | 736 | ||
| 737 | static atomic_t waiting_for_crash_ipi; | 737 | static atomic_t waiting_for_crash_ipi; |
| 738 | 738 | ||
| 739 | static int crash_nmi_callback(struct notifier_block *self, | 739 | static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) |
| 740 | unsigned long val, void *data) | ||
| 741 | { | 740 | { |
| 742 | int cpu; | 741 | int cpu; |
| 743 | 742 | ||
| 744 | if (val != DIE_NMI) | ||
| 745 | return NOTIFY_OK; | ||
| 746 | |||
| 747 | cpu = raw_smp_processor_id(); | 743 | cpu = raw_smp_processor_id(); |
| 748 | 744 | ||
| 749 | /* Don't do anything if this handler is invoked on crashing cpu. | 745 | /* Don't do anything if this handler is invoked on crashing cpu. |
| @@ -751,10 +747,10 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
| 751 | * an NMI if system was initially booted with nmi_watchdog parameter. | 747 | * an NMI if system was initially booted with nmi_watchdog parameter. |
| 752 | */ | 748 | */ |
| 753 | if (cpu == crashing_cpu) | 749 | if (cpu == crashing_cpu) |
| 754 | return NOTIFY_STOP; | 750 | return NMI_HANDLED; |
| 755 | local_irq_disable(); | 751 | local_irq_disable(); |
| 756 | 752 | ||
| 757 | shootdown_callback(cpu, (struct die_args *)data); | 753 | shootdown_callback(cpu, regs); |
| 758 | 754 | ||
| 759 | atomic_dec(&waiting_for_crash_ipi); | 755 | atomic_dec(&waiting_for_crash_ipi); |
| 760 | /* Assume hlt works */ | 756 | /* Assume hlt works */ |
| @@ -762,7 +758,7 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
| 762 | for (;;) | 758 | for (;;) |
| 763 | cpu_relax(); | 759 | cpu_relax(); |
| 764 | 760 | ||
| 765 | return 1; | 761 | return NMI_HANDLED; |
| 766 | } | 762 | } |
| 767 | 763 | ||
| 768 | static void smp_send_nmi_allbutself(void) | 764 | static void smp_send_nmi_allbutself(void) |
| @@ -770,12 +766,6 @@ static void smp_send_nmi_allbutself(void) | |||
| 770 | apic->send_IPI_allbutself(NMI_VECTOR); | 766 | apic->send_IPI_allbutself(NMI_VECTOR); |
| 771 | } | 767 | } |
| 772 | 768 | ||
| 773 | static struct notifier_block crash_nmi_nb = { | ||
| 774 | .notifier_call = crash_nmi_callback, | ||
| 775 | /* we want to be the first one called */ | ||
| 776 | .priority = NMI_LOCAL_HIGH_PRIOR+1, | ||
| 777 | }; | ||
| 778 | |||
| 779 | /* Halt all other CPUs, calling the specified function on each of them | 769 | /* Halt all other CPUs, calling the specified function on each of them |
| 780 | * | 770 | * |
| 781 | * This function can be used to halt all other CPUs on crash | 771 | * This function can be used to halt all other CPUs on crash |
| @@ -794,7 +784,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |||
| 794 | 784 | ||
| 795 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | 785 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); |
| 796 | /* Would it be better to replace the trap vector here? */ | 786 | /* Would it be better to replace the trap vector here? */ |
| 797 | if (register_die_notifier(&crash_nmi_nb)) | 787 | if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback, |
| 788 | NMI_FLAG_FIRST, "crash")) | ||
| 798 | return; /* return what? */ | 789 | return; /* return what? */ |
| 799 | /* Ensure the new callback function is set before sending | 790 | /* Ensure the new callback function is set before sending |
| 800 | * out the NMI | 791 | * out the NMI |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 3f2ad2640d85..348ce016a835 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <linux/mc146818rtc.h> | 5 | #include <linux/mc146818rtc.h> |
| 6 | #include <linux/acpi.h> | 6 | #include <linux/acpi.h> |
| 7 | #include <linux/bcd.h> | 7 | #include <linux/bcd.h> |
| 8 | #include <linux/export.h> | ||
| 8 | #include <linux/pnp.h> | 9 | #include <linux/pnp.h> |
| 9 | #include <linux/of.h> | 10 | #include <linux/of.h> |
| 10 | 11 | ||
| @@ -42,8 +43,11 @@ int mach_set_rtc_mmss(unsigned long nowtime) | |||
| 42 | { | 43 | { |
| 43 | int real_seconds, real_minutes, cmos_minutes; | 44 | int real_seconds, real_minutes, cmos_minutes; |
| 44 | unsigned char save_control, save_freq_select; | 45 | unsigned char save_control, save_freq_select; |
| 46 | unsigned long flags; | ||
| 45 | int retval = 0; | 47 | int retval = 0; |
| 46 | 48 | ||
| 49 | spin_lock_irqsave(&rtc_lock, flags); | ||
| 50 | |||
| 47 | /* tell the clock it's being set */ | 51 | /* tell the clock it's being set */ |
| 48 | save_control = CMOS_READ(RTC_CONTROL); | 52 | save_control = CMOS_READ(RTC_CONTROL); |
| 49 | CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); | 53 | CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); |
| @@ -93,12 +97,17 @@ int mach_set_rtc_mmss(unsigned long nowtime) | |||
| 93 | CMOS_WRITE(save_control, RTC_CONTROL); | 97 | CMOS_WRITE(save_control, RTC_CONTROL); |
| 94 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | 98 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); |
| 95 | 99 | ||
| 100 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
| 101 | |||
| 96 | return retval; | 102 | return retval; |
| 97 | } | 103 | } |
| 98 | 104 | ||
| 99 | unsigned long mach_get_cmos_time(void) | 105 | unsigned long mach_get_cmos_time(void) |
| 100 | { | 106 | { |
| 101 | unsigned int status, year, mon, day, hour, min, sec, century = 0; | 107 | unsigned int status, year, mon, day, hour, min, sec, century = 0; |
| 108 | unsigned long flags; | ||
| 109 | |||
| 110 | spin_lock_irqsave(&rtc_lock, flags); | ||
| 102 | 111 | ||
| 103 | /* | 112 | /* |
| 104 | * If UIP is clear, then we have >= 244 microseconds before | 113 | * If UIP is clear, then we have >= 244 microseconds before |
| @@ -125,6 +134,8 @@ unsigned long mach_get_cmos_time(void) | |||
| 125 | status = CMOS_READ(RTC_CONTROL); | 134 | status = CMOS_READ(RTC_CONTROL); |
| 126 | WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); | 135 | WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); |
| 127 | 136 | ||
| 137 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
| 138 | |||
| 128 | if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { | 139 | if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { |
| 129 | sec = bcd2bin(sec); | 140 | sec = bcd2bin(sec); |
| 130 | min = bcd2bin(min); | 141 | min = bcd2bin(min); |
| @@ -169,24 +180,15 @@ EXPORT_SYMBOL(rtc_cmos_write); | |||
| 169 | 180 | ||
| 170 | int update_persistent_clock(struct timespec now) | 181 | int update_persistent_clock(struct timespec now) |
| 171 | { | 182 | { |
| 172 | unsigned long flags; | 183 | return x86_platform.set_wallclock(now.tv_sec); |
| 173 | int retval; | ||
| 174 | |||
| 175 | spin_lock_irqsave(&rtc_lock, flags); | ||
| 176 | retval = x86_platform.set_wallclock(now.tv_sec); | ||
| 177 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
| 178 | |||
| 179 | return retval; | ||
| 180 | } | 184 | } |
| 181 | 185 | ||
| 182 | /* not static: needed by APM */ | 186 | /* not static: needed by APM */ |
| 183 | void read_persistent_clock(struct timespec *ts) | 187 | void read_persistent_clock(struct timespec *ts) |
| 184 | { | 188 | { |
| 185 | unsigned long retval, flags; | 189 | unsigned long retval; |
| 186 | 190 | ||
| 187 | spin_lock_irqsave(&rtc_lock, flags); | ||
| 188 | retval = x86_platform.get_wallclock(); | 191 | retval = x86_platform.get_wallclock(); |
| 189 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
| 190 | 192 | ||
| 191 | ts->tv_sec = retval; | 193 | ts->tv_sec = retval; |
| 192 | ts->tv_nsec = 0; | 194 | ts->tv_nsec = 0; |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index afaf38447ef5..cf0ef986cb6d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -1045,6 +1045,8 @@ void __init setup_arch(char **cmdline_p) | |||
| 1045 | 1045 | ||
| 1046 | x86_init.timers.wallclock_init(); | 1046 | x86_init.timers.wallclock_init(); |
| 1047 | 1047 | ||
| 1048 | x86_platform.wallclock_init(); | ||
| 1049 | |||
| 1048 | mcheck_init(); | 1050 | mcheck_init(); |
| 1049 | 1051 | ||
| 1050 | arch_init_ideal_nops(); | 1052 | arch_init_ideal_nops(); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 013e7eba83bb..16204dc15484 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
| 17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
| 18 | #include <linux/spinlock.h> | 18 | #include <linux/spinlock.h> |
| 19 | #include <linux/export.h> | ||
| 19 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
| 20 | #include <linux/mc146818rtc.h> | 21 | #include <linux/mc146818rtc.h> |
| 21 | #include <linux/cache.h> | 22 | #include <linux/cache.h> |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7977f0cfe339..c346d1161488 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
| @@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
| 74 | 74 | ||
| 75 | #ifdef CONFIG_X86_64 | 75 | #ifdef CONFIG_X86_64 |
| 76 | case 0x40 ... 0x4f: | 76 | case 0x40 ... 0x4f: |
| 77 | if (regs->cs != __USER_CS) | 77 | if (!user_64bit_mode(regs)) |
| 78 | /* 32-bit mode: register increment */ | 78 | /* 32-bit mode: register increment */ |
| 79 | return 0; | 79 | return 0; |
| 80 | /* 64-bit mode: REX prefix */ | 80 | /* 64-bit mode: REX prefix */ |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index ff14a5044ce6..051489082d59 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
| @@ -14,10 +14,73 @@ | |||
| 14 | #include <linux/personality.h> | 14 | #include <linux/personality.h> |
| 15 | #include <linux/random.h> | 15 | #include <linux/random.h> |
| 16 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
| 17 | #include <linux/elf.h> | ||
| 17 | 18 | ||
| 18 | #include <asm/ia32.h> | 19 | #include <asm/ia32.h> |
| 19 | #include <asm/syscalls.h> | 20 | #include <asm/syscalls.h> |
| 20 | 21 | ||
| 22 | /* | ||
| 23 | * Align a virtual address to avoid aliasing in the I$ on AMD F15h. | ||
| 24 | * | ||
| 25 | * @flags denotes the allocation direction - bottomup or topdown - | ||
| 26 | * or vDSO; see call sites below. | ||
| 27 | */ | ||
| 28 | unsigned long align_addr(unsigned long addr, struct file *filp, | ||
| 29 | enum align_flags flags) | ||
| 30 | { | ||
| 31 | unsigned long tmp_addr; | ||
| 32 | |||
| 33 | /* handle 32- and 64-bit case with a single conditional */ | ||
| 34 | if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) | ||
| 35 | return addr; | ||
| 36 | |||
| 37 | if (!(current->flags & PF_RANDOMIZE)) | ||
| 38 | return addr; | ||
| 39 | |||
| 40 | if (!((flags & ALIGN_VDSO) || filp)) | ||
| 41 | return addr; | ||
| 42 | |||
| 43 | tmp_addr = addr; | ||
| 44 | |||
| 45 | /* | ||
| 46 | * We need an address which is <= than the original | ||
| 47 | * one only when in topdown direction. | ||
| 48 | */ | ||
| 49 | if (!(flags & ALIGN_TOPDOWN)) | ||
| 50 | tmp_addr += va_align.mask; | ||
| 51 | |||
| 52 | tmp_addr &= ~va_align.mask; | ||
| 53 | |||
| 54 | return tmp_addr; | ||
| 55 | } | ||
| 56 | |||
| 57 | static int __init control_va_addr_alignment(char *str) | ||
| 58 | { | ||
| 59 | /* guard against enabling this on other CPU families */ | ||
| 60 | if (va_align.flags < 0) | ||
| 61 | return 1; | ||
| 62 | |||
| 63 | if (*str == 0) | ||
| 64 | return 1; | ||
| 65 | |||
| 66 | if (*str == '=') | ||
| 67 | str++; | ||
| 68 | |||
| 69 | if (!strcmp(str, "32")) | ||
| 70 | va_align.flags = ALIGN_VA_32; | ||
| 71 | else if (!strcmp(str, "64")) | ||
| 72 | va_align.flags = ALIGN_VA_64; | ||
| 73 | else if (!strcmp(str, "off")) | ||
| 74 | va_align.flags = 0; | ||
| 75 | else if (!strcmp(str, "on")) | ||
| 76 | va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; | ||
| 77 | else | ||
| 78 | return 0; | ||
| 79 | |||
| 80 | return 1; | ||
| 81 | } | ||
| 82 | __setup("align_va_addr", control_va_addr_alignment); | ||
| 83 | |||
| 21 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, | 84 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, |
| 22 | unsigned long, prot, unsigned long, flags, | 85 | unsigned long, prot, unsigned long, flags, |
| 23 | unsigned long, fd, unsigned long, off) | 86 | unsigned long, fd, unsigned long, off) |
| @@ -92,6 +155,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
| 92 | start_addr = addr; | 155 | start_addr = addr; |
| 93 | 156 | ||
| 94 | full_search: | 157 | full_search: |
| 158 | |||
| 159 | addr = align_addr(addr, filp, 0); | ||
| 160 | |||
| 95 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 161 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
| 96 | /* At this point: (!vma || addr < vma->vm_end). */ | 162 | /* At this point: (!vma || addr < vma->vm_end). */ |
| 97 | if (end - len < addr) { | 163 | if (end - len < addr) { |
| @@ -117,6 +183,7 @@ full_search: | |||
| 117 | mm->cached_hole_size = vma->vm_start - addr; | 183 | mm->cached_hole_size = vma->vm_start - addr; |
| 118 | 184 | ||
| 119 | addr = vma->vm_end; | 185 | addr = vma->vm_end; |
| 186 | addr = align_addr(addr, filp, 0); | ||
| 120 | } | 187 | } |
| 121 | } | 188 | } |
| 122 | 189 | ||
| @@ -161,10 +228,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 161 | 228 | ||
| 162 | /* make sure it can fit in the remaining address space */ | 229 | /* make sure it can fit in the remaining address space */ |
| 163 | if (addr > len) { | 230 | if (addr > len) { |
| 164 | vma = find_vma(mm, addr-len); | 231 | unsigned long tmp_addr = align_addr(addr - len, filp, |
| 165 | if (!vma || addr <= vma->vm_start) | 232 | ALIGN_TOPDOWN); |
| 233 | |||
| 234 | vma = find_vma(mm, tmp_addr); | ||
| 235 | if (!vma || tmp_addr + len <= vma->vm_start) | ||
| 166 | /* remember the address as a hint for next time */ | 236 | /* remember the address as a hint for next time */ |
| 167 | return mm->free_area_cache = addr-len; | 237 | return mm->free_area_cache = tmp_addr; |
| 168 | } | 238 | } |
| 169 | 239 | ||
| 170 | if (mm->mmap_base < len) | 240 | if (mm->mmap_base < len) |
| @@ -173,6 +243,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
| 173 | addr = mm->mmap_base-len; | 243 | addr = mm->mmap_base-len; |
| 174 | 244 | ||
| 175 | do { | 245 | do { |
| 246 | addr = align_addr(addr, filp, ALIGN_TOPDOWN); | ||
| 247 | |||
| 176 | /* | 248 | /* |
| 177 | * Lookup failure means no vma is above this address, | 249 | * Lookup failure means no vma is above this address, |
| 178 | * else if new region fits below vma->vm_start, | 250 | * else if new region fits below vma->vm_start, |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index fbb0a045a1a2..9a0e31293920 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
| @@ -168,7 +168,7 @@ ENTRY(sys_call_table) | |||
| 168 | .long ptregs_vm86 | 168 | .long ptregs_vm86 |
| 169 | .long sys_ni_syscall /* Old sys_query_module */ | 169 | .long sys_ni_syscall /* Old sys_query_module */ |
| 170 | .long sys_poll | 170 | .long sys_poll |
| 171 | .long sys_nfsservctl | 171 | .long sys_ni_syscall /* Old nfsservctl */ |
| 172 | .long sys_setresgid16 /* 170 */ | 172 | .long sys_setresgid16 /* 170 */ |
| 173 | .long sys_getresgid16 | 173 | .long sys_getresgid16 |
| 174 | .long sys_prctl | 174 | .long sys_prctl |
| @@ -346,3 +346,5 @@ ENTRY(sys_call_table) | |||
| 346 | .long sys_syncfs | 346 | .long sys_syncfs |
| 347 | .long sys_sendmmsg /* 345 */ | 347 | .long sys_sendmmsg /* 345 */ |
| 348 | .long sys_setns | 348 | .long sys_setns |
| 349 | .long sys_process_vm_readv | ||
| 350 | .long sys_process_vm_writev | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index e07a2fc876b9..e2410e27f97e 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/dma_remapping.h> | 22 | #include <linux/dma_remapping.h> |
| 23 | #include <linux/init_task.h> | 23 | #include <linux/init_task.h> |
| 24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
| 25 | #include <linux/export.h> | ||
| 25 | #include <linux/delay.h> | 26 | #include <linux/delay.h> |
| 26 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 27 | #include <linux/init.h> | 28 | #include <linux/init.h> |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 5a64d057be57..dd5fbf4101fc 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
| 14 | #include <linux/i8253.h> | 14 | #include <linux/i8253.h> |
| 15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
| 16 | #include <linux/export.h> | ||
| 16 | #include <linux/mca.h> | 17 | #include <linux/mca.h> |
| 17 | 18 | ||
| 18 | #include <asm/vsyscall.h> | 19 | #include <asm/vsyscall.h> |
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 8927486a4649..76ee97709a00 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | * Send feedback to <colpatch@us.ibm.com> | 26 | * Send feedback to <colpatch@us.ibm.com> |
| 27 | */ | 27 | */ |
| 28 | #include <linux/nodemask.h> | 28 | #include <linux/nodemask.h> |
| 29 | #include <linux/export.h> | ||
| 29 | #include <linux/mmzone.h> | 30 | #include <linux/mmzone.h> |
| 30 | #include <linux/init.h> | 31 | #include <linux/init.h> |
| 31 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index fbc097a085ca..a8e3eb83466c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -49,7 +49,7 @@ | |||
| 49 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
| 50 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
| 51 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
| 52 | #include <asm/atomic.h> | 52 | #include <linux/atomic.h> |
| 53 | #include <asm/system.h> | 53 | #include <asm/system.h> |
| 54 | #include <asm/traps.h> | 54 | #include <asm/traps.h> |
| 55 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
| @@ -81,15 +81,6 @@ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, }; | |||
| 81 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 81 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
| 82 | EXPORT_SYMBOL_GPL(used_vectors); | 82 | EXPORT_SYMBOL_GPL(used_vectors); |
| 83 | 83 | ||
| 84 | static int ignore_nmis; | ||
| 85 | |||
| 86 | int unknown_nmi_panic; | ||
| 87 | /* | ||
| 88 | * Prevent NMI reason port (0x61) being accessed simultaneously, can | ||
| 89 | * only be used in NMI handler. | ||
| 90 | */ | ||
| 91 | static DEFINE_RAW_SPINLOCK(nmi_reason_lock); | ||
| 92 | |||
| 93 | static inline void conditional_sti(struct pt_regs *regs) | 84 | static inline void conditional_sti(struct pt_regs *regs) |
| 94 | { | 85 | { |
| 95 | if (regs->flags & X86_EFLAGS_IF) | 86 | if (regs->flags & X86_EFLAGS_IF) |
| @@ -307,152 +298,6 @@ gp_in_kernel: | |||
| 307 | die("general protection fault", regs, error_code); | 298 | die("general protection fault", regs, error_code); |
| 308 | } | 299 | } |
| 309 | 300 | ||
| 310 | static int __init setup_unknown_nmi_panic(char *str) | ||
| 311 | { | ||
| 312 | unknown_nmi_panic = 1; | ||
| 313 | return 1; | ||
| 314 | } | ||
| 315 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
| 316 | |||
| 317 | static notrace __kprobes void | ||
| 318 | pci_serr_error(unsigned char reason, struct pt_regs *regs) | ||
| 319 | { | ||
| 320 | pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", | ||
| 321 | reason, smp_processor_id()); | ||
| 322 | |||
| 323 | /* | ||
| 324 | * On some machines, PCI SERR line is used to report memory | ||
| 325 | * errors. EDAC makes use of it. | ||
| 326 | */ | ||
| 327 | #if defined(CONFIG_EDAC) | ||
| 328 | if (edac_handler_set()) { | ||
| 329 | edac_atomic_assert_error(); | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | #endif | ||
| 333 | |||
| 334 | if (panic_on_unrecovered_nmi) | ||
| 335 | panic("NMI: Not continuing"); | ||
| 336 | |||
| 337 | pr_emerg("Dazed and confused, but trying to continue\n"); | ||
| 338 | |||
| 339 | /* Clear and disable the PCI SERR error line. */ | ||
| 340 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; | ||
| 341 | outb(reason, NMI_REASON_PORT); | ||
| 342 | } | ||
| 343 | |||
| 344 | static notrace __kprobes void | ||
| 345 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
| 346 | { | ||
| 347 | unsigned long i; | ||
| 348 | |||
| 349 | pr_emerg( | ||
| 350 | "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", | ||
| 351 | reason, smp_processor_id()); | ||
| 352 | show_registers(regs); | ||
| 353 | |||
| 354 | if (panic_on_io_nmi) | ||
| 355 | panic("NMI IOCK error: Not continuing"); | ||
| 356 | |||
| 357 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
| 358 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | ||
| 359 | outb(reason, NMI_REASON_PORT); | ||
| 360 | |||
| 361 | i = 20000; | ||
| 362 | while (--i) { | ||
| 363 | touch_nmi_watchdog(); | ||
| 364 | udelay(100); | ||
| 365 | } | ||
| 366 | |||
| 367 | reason &= ~NMI_REASON_CLEAR_IOCHK; | ||
| 368 | outb(reason, NMI_REASON_PORT); | ||
| 369 | } | ||
| 370 | |||
| 371 | static notrace __kprobes void | ||
| 372 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | ||
| 373 | { | ||
| 374 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == | ||
| 375 | NOTIFY_STOP) | ||
| 376 | return; | ||
| 377 | #ifdef CONFIG_MCA | ||
| 378 | /* | ||
| 379 | * Might actually be able to figure out what the guilty party | ||
| 380 | * is: | ||
| 381 | */ | ||
| 382 | if (MCA_bus) { | ||
| 383 | mca_handle_nmi(); | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | #endif | ||
| 387 | pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
| 388 | reason, smp_processor_id()); | ||
| 389 | |||
| 390 | pr_emerg("Do you have a strange power saving mode enabled?\n"); | ||
| 391 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) | ||
| 392 | panic("NMI: Not continuing"); | ||
| 393 | |||
| 394 | pr_emerg("Dazed and confused, but trying to continue\n"); | ||
| 395 | } | ||
| 396 | |||
| 397 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
| 398 | { | ||
| 399 | unsigned char reason = 0; | ||
| 400 | |||
| 401 | /* | ||
| 402 | * CPU-specific NMI must be processed before non-CPU-specific | ||
| 403 | * NMI, otherwise we may lose it, because the CPU-specific | ||
| 404 | * NMI can not be detected/processed on other CPUs. | ||
| 405 | */ | ||
| 406 | if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
| 407 | return; | ||
| 408 | |||
| 409 | /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ | ||
| 410 | raw_spin_lock(&nmi_reason_lock); | ||
| 411 | reason = get_nmi_reason(); | ||
| 412 | |||
| 413 | if (reason & NMI_REASON_MASK) { | ||
| 414 | if (reason & NMI_REASON_SERR) | ||
| 415 | pci_serr_error(reason, regs); | ||
| 416 | else if (reason & NMI_REASON_IOCHK) | ||
| 417 | io_check_error(reason, regs); | ||
| 418 | #ifdef CONFIG_X86_32 | ||
| 419 | /* | ||
| 420 | * Reassert NMI in case it became active | ||
| 421 | * meanwhile as it's edge-triggered: | ||
| 422 | */ | ||
| 423 | reassert_nmi(); | ||
| 424 | #endif | ||
| 425 | raw_spin_unlock(&nmi_reason_lock); | ||
| 426 | return; | ||
| 427 | } | ||
| 428 | raw_spin_unlock(&nmi_reason_lock); | ||
| 429 | |||
| 430 | unknown_nmi_error(reason, regs); | ||
| 431 | } | ||
| 432 | |||
| 433 | dotraplinkage notrace __kprobes void | ||
| 434 | do_nmi(struct pt_regs *regs, long error_code) | ||
| 435 | { | ||
| 436 | nmi_enter(); | ||
| 437 | |||
| 438 | inc_irq_stat(__nmi_count); | ||
| 439 | |||
| 440 | if (!ignore_nmis) | ||
| 441 | default_do_nmi(regs); | ||
| 442 | |||
| 443 | nmi_exit(); | ||
| 444 | } | ||
| 445 | |||
| 446 | void stop_nmi(void) | ||
| 447 | { | ||
| 448 | ignore_nmis++; | ||
| 449 | } | ||
| 450 | |||
| 451 | void restart_nmi(void) | ||
| 452 | { | ||
| 453 | ignore_nmis--; | ||
| 454 | } | ||
| 455 | |||
| 456 | /* May run on IST stack. */ | 301 | /* May run on IST stack. */ |
| 457 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | 302 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
| 458 | { | 303 | { |
| @@ -872,12 +717,6 @@ void __init trap_init(void) | |||
| 872 | set_bit(SYSCALL_VECTOR, used_vectors); | 717 | set_bit(SYSCALL_VECTOR, used_vectors); |
| 873 | #endif | 718 | #endif |
| 874 | 719 | ||
| 875 | #ifdef CONFIG_X86_64 | ||
| 876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
| 877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
| 878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
| 879 | #endif | ||
| 880 | |||
| 881 | /* | 720 | /* |
| 882 | * Should be a barrier for any external CPU state: | 721 | * Should be a barrier for any external CPU state: |
| 883 | */ | 722 | */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4aa9c54a9b76..0f703f10901a 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
| @@ -71,7 +71,6 @@ PHDRS { | |||
| 71 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
| 72 | data PT_LOAD FLAGS(6); /* RW_ */ | 72 | data PT_LOAD FLAGS(6); /* RW_ */ |
| 73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
| 74 | user PT_LOAD FLAGS(5); /* R_E */ | ||
| 75 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
| 76 | percpu PT_LOAD FLAGS(6); /* RW_ */ | 75 | percpu PT_LOAD FLAGS(6); /* RW_ */ |
| 77 | #endif | 76 | #endif |
| @@ -154,44 +153,16 @@ SECTIONS | |||
| 154 | 153 | ||
| 155 | #ifdef CONFIG_X86_64 | 154 | #ifdef CONFIG_X86_64 |
| 156 | 155 | ||
| 157 | #define VSYSCALL_ADDR (-10*1024*1024) | 156 | . = ALIGN(PAGE_SIZE); |
| 158 | |||
| 159 | #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET) | ||
| 160 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | ||
| 161 | |||
| 162 | #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) | ||
| 163 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | ||
| 164 | |||
| 165 | . = ALIGN(4096); | ||
| 166 | __vsyscall_0 = .; | ||
| 167 | |||
| 168 | . = VSYSCALL_ADDR; | ||
| 169 | .vsyscall : AT(VLOAD(.vsyscall)) { | ||
| 170 | *(.vsyscall_0) | ||
| 171 | |||
| 172 | . = 1024; | ||
| 173 | *(.vsyscall_1) | ||
| 174 | |||
| 175 | . = 2048; | ||
| 176 | *(.vsyscall_2) | ||
| 177 | |||
| 178 | . = 4096; /* Pad the whole page. */ | ||
| 179 | } :user =0xcc | ||
| 180 | . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE); | ||
| 181 | |||
| 182 | #undef VSYSCALL_ADDR | ||
| 183 | #undef VLOAD_OFFSET | ||
| 184 | #undef VLOAD | ||
| 185 | #undef VVIRT_OFFSET | ||
| 186 | #undef VVIRT | ||
| 187 | |||
| 188 | __vvar_page = .; | 157 | __vvar_page = .; |
| 189 | 158 | ||
| 190 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { | 159 | .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) { |
| 160 | /* work around gold bug 13023 */ | ||
| 161 | __vvar_beginning_hack = .; | ||
| 191 | 162 | ||
| 192 | /* Place all vvars at the offsets in asm/vvar.h. */ | 163 | /* Place all vvars at the offsets in asm/vvar.h. */ |
| 193 | #define EMIT_VVAR(name, offset) \ | 164 | #define EMIT_VVAR(name, offset) \ |
| 194 | . = offset; \ | 165 | . = __vvar_beginning_hack + offset; \ |
| 195 | *(.vvar_ ## name) | 166 | *(.vvar_ ## name) |
| 196 | #define __VVAR_KERNEL_LDS | 167 | #define __VVAR_KERNEL_LDS |
| 197 | #include <asm/vvar.h> | 168 | #include <asm/vvar.h> |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index dda7dff9cef7..e4d4a22e8b94 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
| @@ -18,9 +18,6 @@ | |||
| 18 | * use the vDSO. | 18 | * use the vDSO. |
| 19 | */ | 19 | */ |
| 20 | 20 | ||
| 21 | /* Disable profiling for userspace code: */ | ||
| 22 | #define DISABLE_BRANCH_PROFILING | ||
| 23 | |||
| 24 | #include <linux/time.h> | 21 | #include <linux/time.h> |
| 25 | #include <linux/init.h> | 22 | #include <linux/init.h> |
| 26 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
| @@ -28,6 +25,7 @@ | |||
| 28 | #include <linux/seqlock.h> | 25 | #include <linux/seqlock.h> |
| 29 | #include <linux/jiffies.h> | 26 | #include <linux/jiffies.h> |
| 30 | #include <linux/sysctl.h> | 27 | #include <linux/sysctl.h> |
| 28 | #include <linux/topology.h> | ||
| 31 | #include <linux/clocksource.h> | 29 | #include <linux/clocksource.h> |
| 32 | #include <linux/getcpu.h> | 30 | #include <linux/getcpu.h> |
| 33 | #include <linux/cpu.h> | 31 | #include <linux/cpu.h> |
| @@ -50,12 +48,36 @@ | |||
| 50 | #include <asm/vgtod.h> | 48 | #include <asm/vgtod.h> |
| 51 | #include <asm/traps.h> | 49 | #include <asm/traps.h> |
| 52 | 50 | ||
| 51 | #define CREATE_TRACE_POINTS | ||
| 52 | #include "vsyscall_trace.h" | ||
| 53 | |||
| 53 | DEFINE_VVAR(int, vgetcpu_mode); | 54 | DEFINE_VVAR(int, vgetcpu_mode); |
| 54 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 55 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
| 55 | { | 56 | { |
| 56 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
| 57 | }; | 58 | }; |
| 58 | 59 | ||
| 60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; | ||
| 61 | |||
| 62 | static int __init vsyscall_setup(char *str) | ||
| 63 | { | ||
| 64 | if (str) { | ||
| 65 | if (!strcmp("emulate", str)) | ||
| 66 | vsyscall_mode = EMULATE; | ||
| 67 | else if (!strcmp("native", str)) | ||
| 68 | vsyscall_mode = NATIVE; | ||
| 69 | else if (!strcmp("none", str)) | ||
| 70 | vsyscall_mode = NONE; | ||
| 71 | else | ||
| 72 | return -EINVAL; | ||
| 73 | |||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | return -EINVAL; | ||
| 78 | } | ||
| 79 | early_param("vsyscall", vsyscall_setup); | ||
| 80 | |||
| 59 | void update_vsyscall_tz(void) | 81 | void update_vsyscall_tz(void) |
| 60 | { | 82 | { |
| 61 | unsigned long flags; | 83 | unsigned long flags; |
| @@ -100,7 +122,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | |||
| 100 | 122 | ||
| 101 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", | 123 | printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
| 102 | level, tsk->comm, task_pid_nr(tsk), | 124 | level, tsk->comm, task_pid_nr(tsk), |
| 103 | message, regs->ip - 2, regs->cs, | 125 | message, regs->ip, regs->cs, |
| 104 | regs->sp, regs->ax, regs->si, regs->di); | 126 | regs->sp, regs->ax, regs->si, regs->di); |
| 105 | } | 127 | } |
| 106 | 128 | ||
| @@ -118,46 +140,39 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
| 118 | return nr; | 140 | return nr; |
| 119 | } | 141 | } |
| 120 | 142 | ||
| 121 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | 143 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
| 122 | { | 144 | { |
| 123 | struct task_struct *tsk; | 145 | struct task_struct *tsk; |
| 124 | unsigned long caller; | 146 | unsigned long caller; |
| 125 | int vsyscall_nr; | 147 | int vsyscall_nr; |
| 126 | long ret; | 148 | long ret; |
| 127 | 149 | ||
| 128 | local_irq_enable(); | ||
| 129 | |||
| 130 | /* | 150 | /* |
| 131 | * Real 64-bit user mode code has cs == __USER_CS. Anything else | 151 | * No point in checking CS -- the only way to get here is a user mode |
| 132 | * is bogus. | 152 | * trap to a high address, which means that we're in 64-bit user code. |
| 133 | */ | 153 | */ |
| 134 | if (regs->cs != __USER_CS) { | ||
| 135 | /* | ||
| 136 | * If we trapped from kernel mode, we might as well OOPS now | ||
| 137 | * instead of returning to some random address and OOPSing | ||
| 138 | * then. | ||
| 139 | */ | ||
| 140 | BUG_ON(!user_mode(regs)); | ||
| 141 | 154 | ||
| 142 | /* Compat mode and non-compat 32-bit CS should both segfault. */ | 155 | WARN_ON_ONCE(address != regs->ip); |
| 143 | warn_bad_vsyscall(KERN_WARNING, regs, | 156 | |
| 144 | "illegal int 0xcc from 32-bit mode"); | 157 | if (vsyscall_mode == NONE) { |
| 145 | goto sigsegv; | 158 | warn_bad_vsyscall(KERN_INFO, regs, |
| 159 | "vsyscall attempted with vsyscall=none"); | ||
| 160 | return false; | ||
| 146 | } | 161 | } |
| 147 | 162 | ||
| 148 | /* | 163 | vsyscall_nr = addr_to_vsyscall_nr(address); |
| 149 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, | 164 | |
| 150 | * and int 0xcc is two bytes long. | 165 | trace_emulate_vsyscall(vsyscall_nr); |
| 151 | */ | 166 | |
| 152 | vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2); | ||
| 153 | if (vsyscall_nr < 0) { | 167 | if (vsyscall_nr < 0) { |
| 154 | warn_bad_vsyscall(KERN_WARNING, regs, | 168 | warn_bad_vsyscall(KERN_WARNING, regs, |
| 155 | "illegal int 0xcc (exploit attempt?)"); | 169 | "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround"); |
| 156 | goto sigsegv; | 170 | goto sigsegv; |
| 157 | } | 171 | } |
| 158 | 172 | ||
| 159 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { | 173 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
| 160 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); | 174 | warn_bad_vsyscall(KERN_WARNING, regs, |
| 175 | "vsyscall with bad stack (exploit attempt?)"); | ||
| 161 | goto sigsegv; | 176 | goto sigsegv; |
| 162 | } | 177 | } |
| 163 | 178 | ||
| @@ -202,13 +217,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) | |||
| 202 | regs->ip = caller; | 217 | regs->ip = caller; |
| 203 | regs->sp += 8; | 218 | regs->sp += 8; |
| 204 | 219 | ||
| 205 | local_irq_disable(); | 220 | return true; |
| 206 | return; | ||
| 207 | 221 | ||
| 208 | sigsegv: | 222 | sigsegv: |
| 209 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ | ||
| 210 | force_sig(SIGSEGV, current); | 223 | force_sig(SIGSEGV, current); |
| 211 | local_irq_disable(); | 224 | return true; |
| 212 | } | 225 | } |
| 213 | 226 | ||
| 214 | /* | 227 | /* |
| @@ -256,15 +269,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |||
| 256 | 269 | ||
| 257 | void __init map_vsyscall(void) | 270 | void __init map_vsyscall(void) |
| 258 | { | 271 | { |
| 259 | extern char __vsyscall_0; | 272 | extern char __vsyscall_page; |
| 260 | unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); | 273 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
| 261 | extern char __vvar_page; | 274 | extern char __vvar_page; |
| 262 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | 275 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); |
| 263 | 276 | ||
| 264 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 277 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, |
| 265 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 278 | vsyscall_mode == NATIVE |
| 279 | ? PAGE_KERNEL_VSYSCALL | ||
| 280 | : PAGE_KERNEL_VVAR); | ||
| 281 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != | ||
| 282 | (unsigned long)VSYSCALL_START); | ||
| 283 | |||
| 266 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | 284 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); |
| 267 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); | 285 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != |
| 286 | (unsigned long)VVAR_ADDRESS); | ||
| 268 | } | 287 | } |
| 269 | 288 | ||
| 270 | static int __init vsyscall_init(void) | 289 | static int __init vsyscall_init(void) |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S index ffa845eae5ca..c9596a9af159 100644 --- a/arch/x86/kernel/vsyscall_emu_64.S +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
| @@ -7,21 +7,31 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
| 10 | |||
| 10 | #include <asm/irq_vectors.h> | 11 | #include <asm/irq_vectors.h> |
| 12 | #include <asm/page_types.h> | ||
| 13 | #include <asm/unistd_64.h> | ||
| 14 | |||
| 15 | __PAGE_ALIGNED_DATA | ||
| 16 | .globl __vsyscall_page | ||
| 17 | .balign PAGE_SIZE, 0xcc | ||
| 18 | .type __vsyscall_page, @object | ||
| 19 | __vsyscall_page: | ||
| 20 | |||
| 21 | mov $__NR_gettimeofday, %rax | ||
| 22 | syscall | ||
| 23 | ret | ||
| 11 | 24 | ||
| 12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | 25 | .balign 1024, 0xcc |
| 26 | mov $__NR_time, %rax | ||
| 27 | syscall | ||
| 28 | ret | ||
| 13 | 29 | ||
| 14 | .section .vsyscall_0, "a" | 30 | .balign 1024, 0xcc |
| 15 | ENTRY(vsyscall_0) | 31 | mov $__NR_getcpu, %rax |
| 16 | int $VSYSCALL_EMU_VECTOR | 32 | syscall |
| 17 | END(vsyscall_0) | 33 | ret |
| 18 | 34 | ||
| 19 | .section .vsyscall_1, "a" | 35 | .balign 4096, 0xcc |
| 20 | ENTRY(vsyscall_1) | ||
| 21 | int $VSYSCALL_EMU_VECTOR | ||
| 22 | END(vsyscall_1) | ||
| 23 | 36 | ||
| 24 | .section .vsyscall_2, "a" | 37 | .size __vsyscall_page, 4096 |
| 25 | ENTRY(vsyscall_2) | ||
| 26 | int $VSYSCALL_EMU_VECTOR | ||
| 27 | END(vsyscall_2) | ||
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h new file mode 100644 index 000000000000..a8b2edec54fe --- /dev/null +++ b/arch/x86/kernel/vsyscall_trace.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | #undef TRACE_SYSTEM | ||
| 2 | #define TRACE_SYSTEM vsyscall | ||
| 3 | |||
| 4 | #if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 5 | #define __VSYSCALL_TRACE_H | ||
| 6 | |||
| 7 | #include <linux/tracepoint.h> | ||
| 8 | |||
| 9 | TRACE_EVENT(emulate_vsyscall, | ||
| 10 | |||
| 11 | TP_PROTO(int nr), | ||
| 12 | |||
| 13 | TP_ARGS(nr), | ||
| 14 | |||
| 15 | TP_STRUCT__entry(__field(int, nr)), | ||
| 16 | |||
| 17 | TP_fast_assign( | ||
| 18 | __entry->nr = nr; | ||
| 19 | ), | ||
| 20 | |||
| 21 | TP_printk("nr = %d", __entry->nr) | ||
| 22 | ); | ||
| 23 | |||
| 24 | #endif | ||
| 25 | |||
| 26 | #undef TRACE_INCLUDE_PATH | ||
| 27 | #define TRACE_INCLUDE_PATH ../../arch/x86/kernel | ||
| 28 | #define TRACE_INCLUDE_FILE vsyscall_trace | ||
| 29 | #include <trace/define_trace.h> | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 6f164bd5e14d..c1d6cd549397 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
| @@ -21,12 +21,14 @@ | |||
| 21 | #include <asm/pat.h> | 21 | #include <asm/pat.h> |
| 22 | #include <asm/tsc.h> | 22 | #include <asm/tsc.h> |
| 23 | #include <asm/iommu.h> | 23 | #include <asm/iommu.h> |
| 24 | #include <asm/mach_traps.h> | ||
| 24 | 25 | ||
| 25 | void __cpuinit x86_init_noop(void) { } | 26 | void __cpuinit x86_init_noop(void) { } |
| 26 | void __init x86_init_uint_noop(unsigned int unused) { } | 27 | void __init x86_init_uint_noop(unsigned int unused) { } |
| 27 | void __init x86_init_pgd_noop(pgd_t *unused) { } | 28 | void __init x86_init_pgd_noop(pgd_t *unused) { } |
| 28 | int __init iommu_init_noop(void) { return 0; } | 29 | int __init iommu_init_noop(void) { return 0; } |
| 29 | void iommu_shutdown_noop(void) { } | 30 | void iommu_shutdown_noop(void) { } |
| 31 | void wallclock_init_noop(void) { } | ||
| 30 | 32 | ||
| 31 | /* | 33 | /* |
| 32 | * The platform setup functions are preset with the default functions | 34 | * The platform setup functions are preset with the default functions |
| @@ -97,11 +99,13 @@ static int default_i8042_detect(void) { return 1; }; | |||
| 97 | 99 | ||
| 98 | struct x86_platform_ops x86_platform = { | 100 | struct x86_platform_ops x86_platform = { |
| 99 | .calibrate_tsc = native_calibrate_tsc, | 101 | .calibrate_tsc = native_calibrate_tsc, |
| 102 | .wallclock_init = wallclock_init_noop, | ||
| 100 | .get_wallclock = mach_get_cmos_time, | 103 | .get_wallclock = mach_get_cmos_time, |
| 101 | .set_wallclock = mach_set_rtc_mmss, | 104 | .set_wallclock = mach_set_rtc_mmss, |
| 102 | .iommu_shutdown = iommu_shutdown_noop, | 105 | .iommu_shutdown = iommu_shutdown_noop, |
| 103 | .is_untracked_pat_range = is_ISA_range, | 106 | .is_untracked_pat_range = is_ISA_range, |
| 104 | .nmi_init = default_nmi_init, | 107 | .nmi_init = default_nmi_init, |
| 108 | .get_nmi_reason = default_get_nmi_reason, | ||
| 105 | .i8042_detect = default_i8042_detect | 109 | .i8042_detect = default_i8042_detect |
| 106 | }; | 110 | }; |
| 107 | 111 | ||
