diff options
author | Kan Liang <kan.liang@intel.com> | 2016-04-15 03:42:47 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-04-23 08:12:27 -0400 |
commit | 8b92c3a78d40fb220dc5ab122e3274d1b126bfbb (patch) | |
tree | 750ae43a2d2793fe095c3e65926f352c31616179 | |
parent | 65cbbd037b3d7be0a40bbdb5da9d43b0fccf17ee (diff) |
perf/x86/intel: Add Goldmont CPU support
Add perf core PMU support for Intel Goldmont CPU cores:
- The init code is based on Silvermont.
- There is a new cache event list, based on the Silvermont cache event list.
- Goldmont has 32 LBR entries. It also uses new LBRv6 format, which
report the cycle information using upper 16-bit of the LBR_TO.
- It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS for precise cycles.
For details, please refer to the latest SDM058:
http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1460706167-45320-1-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/events/intel/core.c | 157 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 6 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 13 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 2 |
4 files changed, 177 insertions, 1 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index aff79884e17d..92fda6bb779e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c | |||
@@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids | |||
1465 | }, | 1465 | }, |
1466 | }; | 1466 | }; |
1467 | 1467 | ||
1468 | static struct extra_reg intel_glm_extra_regs[] __read_mostly = { | ||
1469 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ | ||
1470 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0), | ||
1471 | INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1), | ||
1472 | EVENT_EXTRA_END | ||
1473 | }; | ||
1474 | |||
1475 | #define GLM_DEMAND_DATA_RD BIT_ULL(0) | ||
1476 | #define GLM_DEMAND_RFO BIT_ULL(1) | ||
1477 | #define GLM_ANY_RESPONSE BIT_ULL(16) | ||
1478 | #define GLM_SNP_NONE_OR_MISS BIT_ULL(33) | ||
1479 | #define GLM_DEMAND_READ GLM_DEMAND_DATA_RD | ||
1480 | #define GLM_DEMAND_WRITE GLM_DEMAND_RFO | ||
1481 | #define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) | ||
1482 | #define GLM_LLC_ACCESS GLM_ANY_RESPONSE | ||
1483 | #define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM) | ||
1484 | #define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM) | ||
1485 | |||
1486 | static __initconst const u64 glm_hw_cache_event_ids | ||
1487 | [PERF_COUNT_HW_CACHE_MAX] | ||
1488 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1489 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1490 | [C(L1D)] = { | ||
1491 | [C(OP_READ)] = { | ||
1492 | [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
1493 | [C(RESULT_MISS)] = 0x0, | ||
1494 | }, | ||
1495 | [C(OP_WRITE)] = { | ||
1496 | [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
1497 | [C(RESULT_MISS)] = 0x0, | ||
1498 | }, | ||
1499 | [C(OP_PREFETCH)] = { | ||
1500 | [C(RESULT_ACCESS)] = 0x0, | ||
1501 | [C(RESULT_MISS)] = 0x0, | ||
1502 | }, | ||
1503 | }, | ||
1504 | [C(L1I)] = { | ||
1505 | [C(OP_READ)] = { | ||
1506 | [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */ | ||
1507 | [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */ | ||
1508 | }, | ||
1509 | [C(OP_WRITE)] = { | ||
1510 | [C(RESULT_ACCESS)] = -1, | ||
1511 | [C(RESULT_MISS)] = -1, | ||
1512 | }, | ||
1513 | [C(OP_PREFETCH)] = { | ||
1514 | [C(RESULT_ACCESS)] = 0x0, | ||
1515 | [C(RESULT_MISS)] = 0x0, | ||
1516 | }, | ||
1517 | }, | ||
1518 | [C(LL)] = { | ||
1519 | [C(OP_READ)] = { | ||
1520 | [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1521 | [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1522 | }, | ||
1523 | [C(OP_WRITE)] = { | ||
1524 | [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1525 | [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1526 | }, | ||
1527 | [C(OP_PREFETCH)] = { | ||
1528 | [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1529 | [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */ | ||
1530 | }, | ||
1531 | }, | ||
1532 | [C(DTLB)] = { | ||
1533 | [C(OP_READ)] = { | ||
1534 | [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
1535 | [C(RESULT_MISS)] = 0x0, | ||
1536 | }, | ||
1537 | [C(OP_WRITE)] = { | ||
1538 | [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
1539 | [C(RESULT_MISS)] = 0x0, | ||
1540 | }, | ||
1541 | [C(OP_PREFETCH)] = { | ||
1542 | [C(RESULT_ACCESS)] = 0x0, | ||
1543 | [C(RESULT_MISS)] = 0x0, | ||
1544 | }, | ||
1545 | }, | ||
1546 | [C(ITLB)] = { | ||
1547 | [C(OP_READ)] = { | ||
1548 | [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
1549 | [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */ | ||
1550 | }, | ||
1551 | [C(OP_WRITE)] = { | ||
1552 | [C(RESULT_ACCESS)] = -1, | ||
1553 | [C(RESULT_MISS)] = -1, | ||
1554 | }, | ||
1555 | [C(OP_PREFETCH)] = { | ||
1556 | [C(RESULT_ACCESS)] = -1, | ||
1557 | [C(RESULT_MISS)] = -1, | ||
1558 | }, | ||
1559 | }, | ||
1560 | [C(BPU)] = { | ||
1561 | [C(OP_READ)] = { | ||
1562 | [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
1563 | [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
1564 | }, | ||
1565 | [C(OP_WRITE)] = { | ||
1566 | [C(RESULT_ACCESS)] = -1, | ||
1567 | [C(RESULT_MISS)] = -1, | ||
1568 | }, | ||
1569 | [C(OP_PREFETCH)] = { | ||
1570 | [C(RESULT_ACCESS)] = -1, | ||
1571 | [C(RESULT_MISS)] = -1, | ||
1572 | }, | ||
1573 | }, | ||
1574 | }; | ||
1575 | |||
1576 | static __initconst const u64 glm_hw_cache_extra_regs | ||
1577 | [PERF_COUNT_HW_CACHE_MAX] | ||
1578 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
1579 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { | ||
1580 | [C(LL)] = { | ||
1581 | [C(OP_READ)] = { | ||
1582 | [C(RESULT_ACCESS)] = GLM_DEMAND_READ| | ||
1583 | GLM_LLC_ACCESS, | ||
1584 | [C(RESULT_MISS)] = GLM_DEMAND_READ| | ||
1585 | GLM_LLC_MISS, | ||
1586 | }, | ||
1587 | [C(OP_WRITE)] = { | ||
1588 | [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE| | ||
1589 | GLM_LLC_ACCESS, | ||
1590 | [C(RESULT_MISS)] = GLM_DEMAND_WRITE| | ||
1591 | GLM_LLC_MISS, | ||
1592 | }, | ||
1593 | [C(OP_PREFETCH)] = { | ||
1594 | [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH| | ||
1595 | GLM_LLC_ACCESS, | ||
1596 | [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH| | ||
1597 | GLM_LLC_MISS, | ||
1598 | }, | ||
1599 | }, | ||
1600 | }; | ||
1601 | |||
1468 | #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ | 1602 | #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ |
1469 | #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ | 1603 | #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ |
1470 | #define KNL_MCDRAM_LOCAL BIT_ULL(21) | 1604 | #define KNL_MCDRAM_LOCAL BIT_ULL(21) |
@@ -3456,6 +3590,29 @@ __init int intel_pmu_init(void) | |||
3456 | pr_cont("Silvermont events, "); | 3590 | pr_cont("Silvermont events, "); |
3457 | break; | 3591 | break; |
3458 | 3592 | ||
3593 | case 92: /* 14nm Atom "Goldmont" */ | ||
3594 | case 95: /* 14nm Atom "Goldmont Denverton" */ | ||
3595 | memcpy(hw_cache_event_ids, glm_hw_cache_event_ids, | ||
3596 | sizeof(hw_cache_event_ids)); | ||
3597 | memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs, | ||
3598 | sizeof(hw_cache_extra_regs)); | ||
3599 | |||
3600 | intel_pmu_lbr_init_skl(); | ||
3601 | |||
3602 | x86_pmu.event_constraints = intel_slm_event_constraints; | ||
3603 | x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints; | ||
3604 | x86_pmu.extra_regs = intel_glm_extra_regs; | ||
3605 | /* | ||
3606 | * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS | ||
3607 | * for precise cycles. | ||
3608 | * :pp is identical to :ppp | ||
3609 | */ | ||
3610 | x86_pmu.pebs_aliases = NULL; | ||
3611 | x86_pmu.pebs_prec_dist = true; | ||
3612 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; | ||
3613 | pr_cont("Goldmont events, "); | ||
3614 | break; | ||
3615 | |||
3459 | case 37: /* 32nm Westmere */ | 3616 | case 37: /* 32nm Westmere */ |
3460 | case 44: /* 32nm Westmere-EP */ | 3617 | case 44: /* 32nm Westmere-EP */ |
3461 | case 47: /* 32nm Westmere-EX */ | 3618 | case 47: /* 32nm Westmere-EX */ |
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8584b90d8e0b..7ce9f3f669e6 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c | |||
@@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = { | |||
645 | EVENT_CONSTRAINT_END | 645 | EVENT_CONSTRAINT_END |
646 | }; | 646 | }; |
647 | 647 | ||
648 | struct event_constraint intel_glm_pebs_event_constraints[] = { | ||
649 | /* Allow all events as PEBS with no flags */ | ||
650 | INTEL_ALL_EVENT_CONSTRAINT(0, 0x1), | ||
651 | EVENT_CONSTRAINT_END | ||
652 | }; | ||
653 | |||
648 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { | 654 | struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
649 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ | 655 | INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ |
650 | INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ | 656 | INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c3b7c1780c9..ad26ca770c98 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c | |||
@@ -14,7 +14,8 @@ enum { | |||
14 | LBR_FORMAT_EIP_FLAGS = 0x03, | 14 | LBR_FORMAT_EIP_FLAGS = 0x03, |
15 | LBR_FORMAT_EIP_FLAGS2 = 0x04, | 15 | LBR_FORMAT_EIP_FLAGS2 = 0x04, |
16 | LBR_FORMAT_INFO = 0x05, | 16 | LBR_FORMAT_INFO = 0x05, |
17 | LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, | 17 | LBR_FORMAT_TIME = 0x06, |
18 | LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, | ||
18 | }; | 19 | }; |
19 | 20 | ||
20 | static enum { | 21 | static enum { |
@@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
464 | abort = !!(info & LBR_INFO_ABORT); | 465 | abort = !!(info & LBR_INFO_ABORT); |
465 | cycles = (info & LBR_INFO_CYCLES); | 466 | cycles = (info & LBR_INFO_CYCLES); |
466 | } | 467 | } |
468 | |||
469 | if (lbr_format == LBR_FORMAT_TIME) { | ||
470 | mis = !!(from & LBR_FROM_FLAG_MISPRED); | ||
471 | pred = !mis; | ||
472 | skip = 1; | ||
473 | cycles = ((to >> 48) & LBR_INFO_CYCLES); | ||
474 | |||
475 | to = (u64)((((s64)to) << 16) >> 16); | ||
476 | } | ||
477 | |||
467 | if (lbr_flags & LBR_EIP_FLAGS) { | 478 | if (lbr_flags & LBR_EIP_FLAGS) { |
468 | mis = !!(from & LBR_FROM_FLAG_MISPRED); | 479 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
469 | pred = !mis; | 480 | pred = !mis; |
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ad4dc7ffffb5..8b78481d1e64 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h | |||
@@ -859,6 +859,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[]; | |||
859 | 859 | ||
860 | extern struct event_constraint intel_slm_pebs_event_constraints[]; | 860 | extern struct event_constraint intel_slm_pebs_event_constraints[]; |
861 | 861 | ||
862 | extern struct event_constraint intel_glm_pebs_event_constraints[]; | ||
863 | |||
862 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; | 864 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; |
863 | 865 | ||
864 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; | 866 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; |