diff options
author | Tejun Heo <tj@kernel.org> | 2011-05-02 08:16:37 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-05-02 08:16:47 -0400 |
commit | ba67cf5cf2ce10ad86a212b70f8c7c75d93a5016 (patch) | |
tree | 70242f5927c6d6454bd352ff78f956cfc5238f59 /arch/x86/kernel/cpu | |
parent | aff364860aa105b2deacc6f21ec8ef524460e3fc (diff) | |
parent | 2be19102b71c1a45d37fec50303791daa1a06869 (diff) |
Merge branch 'x86/urgent' into x86-mm
Merge reason: Pick up the following two fix commits.
2be19102b7: x86, NUMA: Fix empty memblk detection in numa_cleanup_meminfo()
765af22da8: x86-32, NUMA: Fix ACPI NUMA init broken by recent x86-64 change
Scheduled NUMA init 32/64bit unification changes depend on these.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 19 |
5 files changed, 101 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 3ecece0217ef..bb9eb29a52dd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -615,6 +615,25 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
615 | /* As a rule processors have APIC timer running in deep C states */ | 615 | /* As a rule processors have APIC timer running in deep C states */ |
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | 616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) |
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | 617 | set_cpu_cap(c, X86_FEATURE_ARAT); |
618 | |||
619 | /* | ||
620 | * Disable GART TLB Walk Errors on Fam10h. We do this here | ||
621 | * because this is always needed when GART is enabled, even in a | ||
622 | * kernel which has no MCE support built in. | ||
623 | */ | ||
624 | if (c->x86 == 0x10) { | ||
625 | /* | ||
626 | * BIOS should disable GartTlbWlk Errors themself. If | ||
627 | * it doesn't do it here as suggested by the BKDG. | ||
628 | * | ||
629 | * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 | ||
630 | */ | ||
631 | u64 mask; | ||
632 | |||
633 | rdmsrl(MSR_AMD64_MCx_MASK(4), mask); | ||
634 | mask |= (1 << 10); | ||
635 | wrmsrl(MSR_AMD64_MCx_MASK(4), mask); | ||
636 | } | ||
618 | } | 637 | } |
619 | 638 | ||
620 | #ifdef CONFIG_X86_32 | 639 | #ifdef CONFIG_X86_32 |
@@ -679,7 +698,7 @@ cpu_dev_register(amd_cpu_dev); | |||
679 | */ | 698 | */ |
680 | 699 | ||
681 | const int amd_erratum_400[] = | 700 | const int amd_erratum_400[] = |
682 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf), | 701 | AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0x0f, 0x4, 0x2, 0xff, 0xf), |
683 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); | 702 | AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf)); |
684 | EXPORT_SYMBOL_GPL(amd_erratum_400); | 703 | EXPORT_SYMBOL_GPL(amd_erratum_400); |
685 | 704 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index eed3673a8656..e638689279d3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -586,8 +586,12 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
586 | return -EOPNOTSUPP; | 586 | return -EOPNOTSUPP; |
587 | } | 587 | } |
588 | 588 | ||
589 | /* | ||
590 | * Do not allow config1 (extended registers) to propagate, | ||
591 | * there's no sane user-space generalization yet: | ||
592 | */ | ||
589 | if (attr->type == PERF_TYPE_RAW) | 593 | if (attr->type == PERF_TYPE_RAW) |
590 | return x86_pmu_extra_regs(event->attr.config, event); | 594 | return 0; |
591 | 595 | ||
592 | if (attr->type == PERF_TYPE_HW_CACHE) | 596 | if (attr->type == PERF_TYPE_HW_CACHE) |
593 | return set_ext_hw_attr(hwc, event); | 597 | return set_ext_hw_attr(hwc, event); |
@@ -609,8 +613,8 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
609 | /* | 613 | /* |
610 | * Branch tracing: | 614 | * Branch tracing: |
611 | */ | 615 | */ |
612 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 616 | if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && |
613 | (hwc->sample_period == 1)) { | 617 | !attr->freq && hwc->sample_period == 1) { |
614 | /* BTS is not supported by this architecture. */ | 618 | /* BTS is not supported by this architecture. */ |
615 | if (!x86_pmu.bts_active) | 619 | if (!x86_pmu.bts_active) |
616 | return -EOPNOTSUPP; | 620 | return -EOPNOTSUPP; |
@@ -1284,6 +1288,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1284 | 1288 | ||
1285 | cpuc = &__get_cpu_var(cpu_hw_events); | 1289 | cpuc = &__get_cpu_var(cpu_hw_events); |
1286 | 1290 | ||
1291 | /* | ||
1292 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
1293 | * inside the nmi handler. As a result, the unmasking was pushed | ||
1294 | * into all the nmi handlers. | ||
1295 | * | ||
1296 | * This generic handler doesn't seem to have any issues where the | ||
1297 | * unmasking occurs so it was left at the top. | ||
1298 | */ | ||
1299 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1300 | |||
1287 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1301 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1288 | if (!test_bit(idx, cpuc->active_mask)) { | 1302 | if (!test_bit(idx, cpuc->active_mask)) { |
1289 | /* | 1303 | /* |
@@ -1370,8 +1384,6 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1370 | return NOTIFY_DONE; | 1384 | return NOTIFY_DONE; |
1371 | } | 1385 | } |
1372 | 1386 | ||
1373 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1374 | |||
1375 | handled = x86_pmu.handle_irq(args->regs); | 1387 | handled = x86_pmu.handle_irq(args->regs); |
1376 | if (!handled) | 1388 | if (!handled) |
1377 | return NOTIFY_DONE; | 1389 | return NOTIFY_DONE; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 461f62bbd774..cf4e369cea67 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -8,7 +8,7 @@ static __initconst const u64 amd_hw_cache_event_ids | |||
8 | [ C(L1D) ] = { | 8 | [ C(L1D) ] = { |
9 | [ C(OP_READ) ] = { | 9 | [ C(OP_READ) ] = { |
10 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | 10 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
11 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | 11 | [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ |
12 | }, | 12 | }, |
13 | [ C(OP_WRITE) ] = { | 13 | [ C(OP_WRITE) ] = { |
14 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | 14 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ |
@@ -427,7 +427,9 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
427 | * | 427 | * |
428 | * Exceptions: | 428 | * Exceptions: |
429 | * | 429 | * |
430 | * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) | ||
430 | * 0x003 FP PERF_CTL[3] | 431 | * 0x003 FP PERF_CTL[3] |
432 | * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) | ||
431 | * 0x00B FP PERF_CTL[3] | 433 | * 0x00B FP PERF_CTL[3] |
432 | * 0x00D FP PERF_CTL[3] | 434 | * 0x00D FP PERF_CTL[3] |
433 | * 0x023 DE PERF_CTL[2:0] | 435 | * 0x023 DE PERF_CTL[2:0] |
@@ -448,6 +450,8 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
448 | * 0x0DF LS PERF_CTL[5:0] | 450 | * 0x0DF LS PERF_CTL[5:0] |
449 | * 0x1D6 EX PERF_CTL[5:0] | 451 | * 0x1D6 EX PERF_CTL[5:0] |
450 | * 0x1D8 EX PERF_CTL[5:0] | 452 | * 0x1D8 EX PERF_CTL[5:0] |
453 | * | ||
454 | * (*) depending on the umask all FPU counters may be used | ||
451 | */ | 455 | */ |
452 | 456 | ||
453 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 457 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
@@ -460,18 +464,28 @@ static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | |||
460 | static struct event_constraint * | 464 | static struct event_constraint * |
461 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) | 465 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) |
462 | { | 466 | { |
463 | unsigned int event_code = amd_get_event_code(&event->hw); | 467 | struct hw_perf_event *hwc = &event->hw; |
468 | unsigned int event_code = amd_get_event_code(hwc); | ||
464 | 469 | ||
465 | switch (event_code & AMD_EVENT_TYPE_MASK) { | 470 | switch (event_code & AMD_EVENT_TYPE_MASK) { |
466 | case AMD_EVENT_FP: | 471 | case AMD_EVENT_FP: |
467 | switch (event_code) { | 472 | switch (event_code) { |
473 | case 0x000: | ||
474 | if (!(hwc->config & 0x0000F000ULL)) | ||
475 | break; | ||
476 | if (!(hwc->config & 0x00000F00ULL)) | ||
477 | break; | ||
478 | return &amd_f15_PMC3; | ||
479 | case 0x004: | ||
480 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) | ||
481 | break; | ||
482 | return &amd_f15_PMC3; | ||
468 | case 0x003: | 483 | case 0x003: |
469 | case 0x00B: | 484 | case 0x00B: |
470 | case 0x00D: | 485 | case 0x00D: |
471 | return &amd_f15_PMC3; | 486 | return &amd_f15_PMC3; |
472 | default: | ||
473 | return &amd_f15_PMC53; | ||
474 | } | 487 | } |
488 | return &amd_f15_PMC53; | ||
475 | case AMD_EVENT_LS: | 489 | case AMD_EVENT_LS: |
476 | case AMD_EVENT_DC: | 490 | case AMD_EVENT_DC: |
477 | case AMD_EVENT_EX_LS: | 491 | case AMD_EVENT_EX_LS: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 8fc2b2cee1da..e61539b07d2c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -25,7 +25,7 @@ struct intel_percore { | |||
25 | /* | 25 | /* |
26 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
27 | */ | 27 | */ |
28 | static const u64 intel_perfmon_event_map[] = | 28 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
29 | { | 29 | { |
30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | 30 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | 31 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
@@ -391,12 +391,12 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
391 | { | 391 | { |
392 | [ C(L1D) ] = { | 392 | [ C(L1D) ] = { |
393 | [ C(OP_READ) ] = { | 393 | [ C(OP_READ) ] = { |
394 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | 394 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
395 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | 395 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
396 | }, | 396 | }, |
397 | [ C(OP_WRITE) ] = { | 397 | [ C(OP_WRITE) ] = { |
398 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | 398 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
399 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | 399 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
400 | }, | 400 | }, |
401 | [ C(OP_PREFETCH) ] = { | 401 | [ C(OP_PREFETCH) ] = { |
402 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | 402 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
@@ -933,6 +933,16 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
933 | 933 | ||
934 | cpuc = &__get_cpu_var(cpu_hw_events); | 934 | cpuc = &__get_cpu_var(cpu_hw_events); |
935 | 935 | ||
936 | /* | ||
937 | * Some chipsets need to unmask the LVTPC in a particular spot | ||
938 | * inside the nmi handler. As a result, the unmasking was pushed | ||
939 | * into all the nmi handlers. | ||
940 | * | ||
941 | * This handler doesn't seem to have any issues with the unmasking | ||
942 | * so it was left at the top. | ||
943 | */ | ||
944 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
945 | |||
936 | intel_pmu_disable_all(); | 946 | intel_pmu_disable_all(); |
937 | handled = intel_pmu_drain_bts_buffer(); | 947 | handled = intel_pmu_drain_bts_buffer(); |
938 | status = intel_pmu_get_status(); | 948 | status = intel_pmu_get_status(); |
@@ -998,6 +1008,9 @@ intel_bts_constraints(struct perf_event *event) | |||
998 | struct hw_perf_event *hwc = &event->hw; | 1008 | struct hw_perf_event *hwc = &event->hw; |
999 | unsigned int hw_event, bts_event; | 1009 | unsigned int hw_event, bts_event; |
1000 | 1010 | ||
1011 | if (event->attr.freq) | ||
1012 | return NULL; | ||
1013 | |||
1001 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; | 1014 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
1002 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); | 1015 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
1003 | 1016 | ||
@@ -1305,7 +1318,7 @@ static void intel_clovertown_quirks(void) | |||
1305 | * AJ106 could possibly be worked around by not allowing LBR | 1318 | * AJ106 could possibly be worked around by not allowing LBR |
1306 | * usage from PEBS, including the fixup. | 1319 | * usage from PEBS, including the fixup. |
1307 | * AJ68 could possibly be worked around by always programming | 1320 | * AJ68 could possibly be worked around by always programming |
1308 | * a pebs_event_reset[0] value and coping with the lost events. | 1321 | * a pebs_event_reset[0] value and coping with the lost events. |
1309 | * | 1322 | * |
1310 | * But taken together it might just make sense to not enable PEBS on | 1323 | * But taken together it might just make sense to not enable PEBS on |
1311 | * these chips. | 1324 | * these chips. |
@@ -1409,6 +1422,18 @@ static __init int intel_pmu_init(void) | |||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | 1422 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; |
1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1423 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1424 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1425 | |||
1426 | if (ebx & 0x40) { | ||
1427 | /* | ||
1428 | * Erratum AAJ80 detected, we work it around by using | ||
1429 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1430 | * branch-misses, but it's still much better than the | ||
1431 | * architectural event which is often completely bogus: | ||
1432 | */ | ||
1433 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1434 | |||
1435 | pr_cont("erratum AAJ80 worked around, "); | ||
1436 | } | ||
1412 | pr_cont("Nehalem events, "); | 1437 | pr_cont("Nehalem events, "); |
1413 | break; | 1438 | break; |
1414 | 1439 | ||
@@ -1425,6 +1450,7 @@ static __init int intel_pmu_init(void) | |||
1425 | 1450 | ||
1426 | case 37: /* 32 nm nehalem, "Clarkdale" */ | 1451 | case 37: /* 32 nm nehalem, "Clarkdale" */ |
1427 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1452 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1453 | case 47: /* 32 nm Xeon E7 */ | ||
1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1454 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1429 | sizeof(hw_cache_event_ids)); | 1455 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | 1456 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index c2520e178d32..e93fcd55fae1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -947,14 +947,23 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) | |||
947 | if (!x86_perf_event_set_period(event)) | 947 | if (!x86_perf_event_set_period(event)) |
948 | continue; | 948 | continue; |
949 | if (perf_event_overflow(event, 1, &data, regs)) | 949 | if (perf_event_overflow(event, 1, &data, regs)) |
950 | p4_pmu_disable_event(event); | 950 | x86_pmu_stop(event, 0); |
951 | } | 951 | } |
952 | 952 | ||
953 | if (handled) { | 953 | if (handled) |
954 | /* p4 quirk: unmask it again */ | ||
955 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
956 | inc_irq_stat(apic_perf_irqs); | 954 | inc_irq_stat(apic_perf_irqs); |
957 | } | 955 | |
956 | /* | ||
957 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has | ||
958 | * been observed that the OVF bit flag has to be cleared first _before_ | ||
959 | * the LVTPC can be unmasked. | ||
960 | * | ||
961 | * The reason is the NMI line will continue to be asserted while the OVF | ||
962 | * bit is set. This causes a second NMI to generate if the LVTPC is | ||
963 | * unmasked before the OVF bit is cleared, leading to unknown NMI | ||
964 | * messages. | ||
965 | */ | ||
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
958 | 967 | ||
959 | return handled; | 968 | return handled; |
960 | } | 969 | } |