diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-05-01 13:09:39 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-05-01 13:09:39 -0400 |
commit | 809435ff4f43a5c0cb0201b3b89176253d5ade18 (patch) | |
tree | 607d673c02117656a956b54dd2bb80fe94edea61 /arch/x86/kernel/cpu | |
parent | 32673822e440eb92eb334631eb0a199d0c532d13 (diff) | |
parent | 058e297d34a404caaa5ed277de15698d8dc43000 (diff) |
Merge branch 'tip/perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/core
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 19 |
3 files changed, 63 insertions, 16 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 224a84f7080c..0de6b2b31f61 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -593,8 +593,12 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
593 | return -EOPNOTSUPP; | 593 | return -EOPNOTSUPP; |
594 | } | 594 | } |
595 | 595 | ||
596 | /* | ||
597 | * Do not allow config1 (extended registers) to propagate, | ||
598 | * there's no sane user-space generalization yet: | ||
599 | */ | ||
596 | if (attr->type == PERF_TYPE_RAW) | 600 | if (attr->type == PERF_TYPE_RAW) |
597 | return x86_pmu_extra_regs(event->attr.config, event); | 601 | return 0; |
598 | 602 | ||
599 | if (attr->type == PERF_TYPE_HW_CACHE) | 603 | if (attr->type == PERF_TYPE_HW_CACHE) |
600 | return set_ext_hw_attr(hwc, event); | 604 | return set_ext_hw_attr(hwc, event); |
@@ -616,8 +620,8 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
616 | /* | 620 | /* |
617 | * Branch tracing: | 621 | * Branch tracing: |
618 | */ | 622 | */ |
619 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 623 | if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && |
620 | (hwc->sample_period == 1)) { | 624 | !attr->freq && hwc->sample_period == 1) { |
621 | /* BTS is not supported by this architecture. */ | 625 | /* BTS is not supported by this architecture. */ |
622 | if (!x86_pmu.bts_active) | 626 | if (!x86_pmu.bts_active) |
623 | return -EOPNOTSUPP; | 627 | return -EOPNOTSUPP; |
@@ -1291,6 +1295,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1291 | 1295 | ||
1292 | cpuc = &__get_cpu_var(cpu_hw_events); | 1296 | cpuc = &__get_cpu_var(cpu_hw_events); |
1293 | 1297 | ||
1298 | /* | ||
1299 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
1300 | * inside the nmi handler. As a result, the unmasking was pushed | ||
1301 | * into all the nmi handlers. | ||
1302 | * | ||
1303 | * This generic handler doesn't seem to have any issues where the | ||
1304 | * unmasking occurs so it was left at the top. | ||
1305 | */ | ||
1306 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1307 | |||
1294 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1308 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1295 | if (!test_bit(idx, cpuc->active_mask)) { | 1309 | if (!test_bit(idx, cpuc->active_mask)) { |
1296 | /* | 1310 | /* |
@@ -1377,8 +1391,6 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1377 | return NOTIFY_DONE; | 1391 | return NOTIFY_DONE; |
1378 | } | 1392 | } |
1379 | 1393 | ||
1380 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1381 | |||
1382 | handled = x86_pmu.handle_irq(args->regs); | 1394 | handled = x86_pmu.handle_irq(args->regs); |
1383 | if (!handled) | 1395 | if (!handled) |
1384 | return NOTIFY_DONE; | 1396 | return NOTIFY_DONE; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8fc2b2cee1da..e61539b07d2c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -25,7 +25,7 @@ struct intel_percore { | |||
25 | /* | 25 | /* |
26 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
27 | */ | 27 | */ |
28 | static const u64 intel_perfmon_event_map[] = | 28 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
29 | { | 29 | { |
30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
@@ -391,12 +391,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
391 | { | 391 | { |
392 | [ C(L1D) ] = { | 392 | [ C(L1D) ] = { |
393 | [ C(OP_READ) ] = { | 393 | [ C(OP_READ) ] = { |
394 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | 394 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
395 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | 395 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
396 | }, | 396 | }, |
397 | [ C(OP_WRITE) ] = { | 397 | [ C(OP_WRITE) ] = { |
398 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | 398 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
399 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | 399 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
400 | }, | 400 | }, |
401 | [ C(OP_PREFETCH) ] = { | 401 | [ C(OP_PREFETCH) ] = { |
402 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | 402 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
@@ -933,6 +933,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
933 | 933 | ||
934 | cpuc = &__get_cpu_var(cpu_hw_events); | 934 | cpuc = &__get_cpu_var(cpu_hw_events); |
935 | 935 | ||
936 | /* | ||
937 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
938 | * inside the nmi handler. As a result, the unmasking was pushed | ||
939 | * into all the nmi handlers. | ||
940 | * | ||
941 | * This handler doesn't seem to have any issues with the unmasking | ||
942 | * so it was left at the top. | ||
943 | */ | ||
944 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
945 | |||
936 | intel_pmu_disable_all(); | 946 | intel_pmu_disable_all(); |
937 | handled = intel_pmu_drain_bts_buffer(); | 947 | handled = intel_pmu_drain_bts_buffer(); |
938 | status = intel_pmu_get_status(); | 948 | status = intel_pmu_get_status(); |
@@ -998,6 +1008,9 @@ intel_bts_constraints(struct perf_event *event) | |||
998 | struct hw_perf_event *hwc = &event->hw; | 1008 | struct hw_perf_event *hwc = &event->hw; |
999 | unsigned int hw_event, bts_event; | 1009 | unsigned int hw_event, bts_event; |
1000 | 1010 | ||
1011 | if (event->attr.freq) | ||
1012 | return NULL; | ||
1013 | |||
1001 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | 1014 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
1002 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | 1015 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
1003 | 1016 | ||
@@ -1305,7 +1318,7 @@ static void intel_clovertown_quirks(void) | |||
1305 | * AJ106 could possibly be worked around by not allowing LBR | 1318 | * AJ106 could possibly be worked around by not allowing LBR |
1306 | * usage from PEBS, including the fixup. | 1319 | * usage from PEBS, including the fixup. |
1307 | * AJ68 could possibly be worked around by always programming | 1320 | * AJ68 could possibly be worked around by always programming |
1308 | * a pebs_event_reset[0] value and coping with the lost events. | 1321 | * a pebs_event_reset[0] value and coping with the lost events. |
1309 | * | 1322 | * |
1310 | * But taken together it might just make sense to not enable PEBS on | 1323 | * But taken together it might just make sense to not enable PEBS on |
1311 | * these chips. | 1324 | * these chips. |
@@ -1409,6 +1422,18 @@ static __init int intel_pmu_init(void) | |||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | 1422 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; |
1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1423 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1424 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1425 | |||
1426 | if (ebx & 0x40) { | ||
1427 | /* | ||
1428 | * Erratum AAJ80 detected, we work it around by using | ||
1429 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1430 | * branch-misses, but it's still much better than the | ||
1431 | * architectural event which is often completely bogus: | ||
1432 | */ | ||
1433 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1434 | |||
1435 | pr_cont("erratum AAJ80 worked around, "); | ||
1436 | } | ||
1412 | pr_cont("Nehalem events, "); | 1437 | pr_cont("Nehalem events, "); |
1413 | break; | 1438 | break; |
1414 | 1439 | ||
@@ -1425,6 +1450,7 @@ static __init int intel_pmu_init(void) | |||
1425 | 1450 | ||
1426 | case 37: /* 32 nm nehalem, "Clarkdale" */ | 1451 | case 37: /* 32 nm nehalem, "Clarkdale" */ |
1427 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1452 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1453 | case 47: /* 32 nm Xeon E7 */ | ||
1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1454 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1429 | sizeof(hw_cache_event_ids)); | 1455 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | 1456 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f4c1da2f9352..ead584fb6a7d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -946,14 +946,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
946 | if (!x86_perf_event_set_period(event)) | 946 | if (!x86_perf_event_set_period(event)) |
947 | continue; | 947 | continue; |
948 | if (perf_event_overflow(event, 1, &data, regs)) | 948 | if (perf_event_overflow(event, 1, &data, regs)) |
949 | p4_pmu_disable_event(event); | 949 | x86_pmu_stop(event, 0); |
950 | } | 950 | } |
951 | 951 | ||
952 | if (handled) { | 952 | if (handled) |
953 | /* p4 quirk: unmask it again */ | ||
954 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
955 | inc_irq_stat(apic_perf_irqs); | 953 | inc_irq_stat(apic_perf_irqs); |
956 | } | 954 | |
955 | /* | ||
956 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has | ||
957 | * been observed that the OVF bit flag has to be cleared first _before_ | ||
958 | * the LVTPC can be unmasked. | ||
959 | * | ||
960 | * The reason is the NMI line will continue to be asserted while the OVF | ||
961 | * bit is set. This causes a second NMI to generate if the LVTPC is | ||
962 | * unmasked before the OVF bit is cleared, leading to unknown NMI | ||
963 | * messages. | ||
964 | */ | ||
965 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
957 | 966 | ||
958 | return handled; | 967 | return handled; |
959 | } | 968 | } |