diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-13 15:24:33 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-13 15:24:33 -0400 |
commit | 3493e84de60590d3012139187f631f2dfbf0887f (patch) | |
tree | c7a994c2e6f0d06b5044ea0b322e5ff0cc0b4a63 /arch/x86 | |
parent | 919aa96a9cfc5071f037bf58718e05335562a6ac (diff) | |
parent | 94d5d1b2d891f1fd5205f978246b7864d998b25c (diff) |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
perf_counter: Report the cloning task as parent on perf_counter_fork()
perf_counter: Fix an ipi-deadlock
perf: Rework/fix the whole read vs group stuff
perf_counter: Fix swcounter context invariance
perf report: Don't show unresolved DSOs and symbols when -S/-d is used
perf tools: Add a general option to enable raw sample records
perf tools: Add a per tracepoint counter attribute to get raw sample
perf_counter: Provide hw_perf_counter_setup_online() APIs
perf list: Fix large list output by using the pager
perf_counter, x86: Fix/improve apic fallback
perf record: Add missing -C option support for specifying profile cpu
perf tools: Fix dso__new handle() to handle deleted DSOs
perf tools: Fix fallback to cplus_demangle() when bfd_demangle() is not available
perf report: Show the tid too in -D
perf record: Fix .tid and .pid fill-in when synthesizing events
perf_counter, x86: Fix generic cache events on P6-mobile CPUs
perf_counter, x86: Fix lapic printk message
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 40 |
2 files changed, 34 insertions, 8 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6b0f8b..13ffa5df37d7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -24,6 +24,7 @@ config X86 | |||
24 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
25 | select HAVE_IDE | 25 | select HAVE_IDE |
26 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
27 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
27 | select HAVE_IOREMAP_PROT | 28 | select HAVE_IOREMAP_PROT |
28 | select HAVE_KPROBES | 29 | select HAVE_KPROBES |
29 | select ARCH_WANT_OPTIONAL_GPIOLIB | 30 | select ARCH_WANT_OPTIONAL_GPIOLIB |
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC | |||
742 | config X86_LOCAL_APIC | 743 | config X86_LOCAL_APIC |
743 | def_bool y | 744 | def_bool y |
744 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 745 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
745 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
746 | 746 | ||
747 | config X86_IO_APIC | 747 | config X86_IO_APIC |
748 | def_bool y | 748 | def_bool y |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..900332b800f8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -55,6 +55,7 @@ struct x86_pmu { | |||
55 | int num_counters_fixed; | 55 | int num_counters_fixed; |
56 | int counter_bits; | 56 | int counter_bits; |
57 | u64 counter_mask; | 57 | u64 counter_mask; |
58 | int apic; | ||
58 | u64 max_period; | 59 | u64 max_period; |
59 | u64 intel_ctrl; | 60 | u64 intel_ctrl; |
60 | }; | 61 | }; |
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] = | |||
72 | { | 73 | { |
73 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | 74 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, |
74 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 75 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
75 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, | 76 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, |
76 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, | 77 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, |
77 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 78 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
78 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 79 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
79 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | 80 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, |
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); | |||
613 | 614 | ||
614 | static bool reserve_pmc_hardware(void) | 615 | static bool reserve_pmc_hardware(void) |
615 | { | 616 | { |
617 | #ifdef CONFIG_X86_LOCAL_APIC | ||
616 | int i; | 618 | int i; |
617 | 619 | ||
618 | if (nmi_watchdog == NMI_LOCAL_APIC) | 620 | if (nmi_watchdog == NMI_LOCAL_APIC) |
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void) | |||
627 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 629 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
628 | goto eventsel_fail; | 630 | goto eventsel_fail; |
629 | } | 631 | } |
632 | #endif | ||
630 | 633 | ||
631 | return true; | 634 | return true; |
632 | 635 | ||
636 | #ifdef CONFIG_X86_LOCAL_APIC | ||
633 | eventsel_fail: | 637 | eventsel_fail: |
634 | for (i--; i >= 0; i--) | 638 | for (i--; i >= 0; i--) |
635 | release_evntsel_nmi(x86_pmu.eventsel + i); | 639 | release_evntsel_nmi(x86_pmu.eventsel + i); |
@@ -644,10 +648,12 @@ perfctr_fail: | |||
644 | enable_lapic_nmi_watchdog(); | 648 | enable_lapic_nmi_watchdog(); |
645 | 649 | ||
646 | return false; | 650 | return false; |
651 | #endif | ||
647 | } | 652 | } |
648 | 653 | ||
649 | static void release_pmc_hardware(void) | 654 | static void release_pmc_hardware(void) |
650 | { | 655 | { |
656 | #ifdef CONFIG_X86_LOCAL_APIC | ||
651 | int i; | 657 | int i; |
652 | 658 | ||
653 | for (i = 0; i < x86_pmu.num_counters; i++) { | 659 | for (i = 0; i < x86_pmu.num_counters; i++) { |
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void) | |||
657 | 663 | ||
658 | if (nmi_watchdog == NMI_LOCAL_APIC) | 664 | if (nmi_watchdog == NMI_LOCAL_APIC) |
659 | enable_lapic_nmi_watchdog(); | 665 | enable_lapic_nmi_watchdog(); |
666 | #endif | ||
660 | } | 667 | } |
661 | 668 | ||
662 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 669 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
748 | hwc->sample_period = x86_pmu.max_period; | 755 | hwc->sample_period = x86_pmu.max_period; |
749 | hwc->last_period = hwc->sample_period; | 756 | hwc->last_period = hwc->sample_period; |
750 | atomic64_set(&hwc->period_left, hwc->sample_period); | 757 | atomic64_set(&hwc->period_left, hwc->sample_period); |
758 | } else { | ||
759 | /* | ||
760 | * If we have a PMU initialized but no APIC | ||
761 | * interrupts, we cannot sample hardware | ||
762 | * counters (user-space has to fall back and | ||
763 | * sample via a hrtimer based software counter): | ||
764 | */ | ||
765 | if (!x86_pmu.apic) | ||
766 | return -EOPNOTSUPP; | ||
751 | } | 767 | } |
752 | 768 | ||
753 | counter->destroy = hw_perf_counter_destroy; | 769 | counter->destroy = hw_perf_counter_destroy; |
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) | |||
1449 | 1465 | ||
1450 | void set_perf_counter_pending(void) | 1466 | void set_perf_counter_pending(void) |
1451 | { | 1467 | { |
1468 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1452 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | 1469 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); |
1470 | #endif | ||
1453 | } | 1471 | } |
1454 | 1472 | ||
1455 | void perf_counters_lapic_init(void) | 1473 | void perf_counters_lapic_init(void) |
1456 | { | 1474 | { |
1457 | if (!x86_pmu_initialized()) | 1475 | #ifdef CONFIG_X86_LOCAL_APIC |
1476 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1458 | return; | 1477 | return; |
1459 | 1478 | ||
1460 | /* | 1479 | /* |
1461 | * Always use NMI for PMU | 1480 | * Always use NMI for PMU |
1462 | */ | 1481 | */ |
1463 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1482 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1483 | #endif | ||
1464 | } | 1484 | } |
1465 | 1485 | ||
1466 | static int __kprobes | 1486 | static int __kprobes |
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1484 | 1504 | ||
1485 | regs = args->regs; | 1505 | regs = args->regs; |
1486 | 1506 | ||
1507 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1487 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1508 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1509 | #endif | ||
1488 | /* | 1510 | /* |
1489 | * Can't rely on the handled return value to say it was our NMI, two | 1511 | * Can't rely on the handled return value to say it was our NMI, two |
1490 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | 1512 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = { | |||
1515 | .event_map = p6_pmu_event_map, | 1537 | .event_map = p6_pmu_event_map, |
1516 | .raw_event = p6_pmu_raw_event, | 1538 | .raw_event = p6_pmu_raw_event, |
1517 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 1539 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
1540 | .apic = 1, | ||
1518 | .max_period = (1ULL << 31) - 1, | 1541 | .max_period = (1ULL << 31) - 1, |
1519 | .version = 0, | 1542 | .version = 0, |
1520 | .num_counters = 2, | 1543 | .num_counters = 2, |
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = { | |||
1541 | .event_map = intel_pmu_event_map, | 1564 | .event_map = intel_pmu_event_map, |
1542 | .raw_event = intel_pmu_raw_event, | 1565 | .raw_event = intel_pmu_raw_event, |
1543 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 1566 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
1567 | .apic = 1, | ||
1544 | /* | 1568 | /* |
1545 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 1569 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
1546 | * so we install an artificial 1<<31 period regardless of | 1570 | * so we install an artificial 1<<31 period regardless of |
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = { | |||
1564 | .num_counters = 4, | 1588 | .num_counters = 4, |
1565 | .counter_bits = 48, | 1589 | .counter_bits = 48, |
1566 | .counter_mask = (1ULL << 48) - 1, | 1590 | .counter_mask = (1ULL << 48) - 1, |
1591 | .apic = 1, | ||
1567 | /* use highest bit to detect overflow */ | 1592 | /* use highest bit to detect overflow */ |
1568 | .max_period = (1ULL << 47) - 1, | 1593 | .max_period = (1ULL << 47) - 1, |
1569 | }; | 1594 | }; |
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void) | |||
1589 | return -ENODEV; | 1614 | return -ENODEV; |
1590 | } | 1615 | } |
1591 | 1616 | ||
1617 | x86_pmu = p6_pmu; | ||
1618 | |||
1592 | if (!cpu_has_apic) { | 1619 | if (!cpu_has_apic) { |
1593 | pr_info("no Local APIC, try rebooting with lapic"); | 1620 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); |
1594 | return -ENODEV; | 1621 | pr_info("no hardware sampling interrupt available.\n"); |
1622 | x86_pmu.apic = 0; | ||
1595 | } | 1623 | } |
1596 | 1624 | ||
1597 | x86_pmu = p6_pmu; | ||
1598 | |||
1599 | return 0; | 1625 | return 0; |
1600 | } | 1626 | } |
1601 | 1627 | ||