aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKan Liang <kan.liang@intel.com>2016-04-15 03:42:47 -0400
committerIngo Molnar <mingo@kernel.org>2016-04-23 08:12:27 -0400
commit8b92c3a78d40fb220dc5ab122e3274d1b126bfbb (patch)
tree750ae43a2d2793fe095c3e65926f352c31616179
parent65cbbd037b3d7be0a40bbdb5da9d43b0fccf17ee (diff)
perf/x86/intel: Add Goldmont CPU support
Add perf core PMU support for Intel Goldmont CPU cores: - The init code is based on Silvermont. - There is a new cache event list, based on the Silvermont cache event list. - Goldmont has 32 LBR entries. It also uses new LBRv6 format, which report the cycle information using upper 16-bit of the LBR_TO. - It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS for precise cycles. For details, please refer to the latest SDM058: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1460706167-45320-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/intel/core.c157
-rw-r--r--arch/x86/events/intel/ds.c6
-rw-r--r--arch/x86/events/intel/lbr.c13
-rw-r--r--arch/x86/events/perf_event.h2
4 files changed, 177 insertions, 1 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aff79884e17d..92fda6bb779e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1465,6 +1465,140 @@ static __initconst const u64 slm_hw_cache_event_ids
1465 }, 1465 },
1466}; 1466};
1467 1467
1468static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
1469 /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1470 INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
1471 INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
1472 EVENT_EXTRA_END
1473};
1474
1475#define GLM_DEMAND_DATA_RD BIT_ULL(0)
1476#define GLM_DEMAND_RFO BIT_ULL(1)
1477#define GLM_ANY_RESPONSE BIT_ULL(16)
1478#define GLM_SNP_NONE_OR_MISS BIT_ULL(33)
1479#define GLM_DEMAND_READ GLM_DEMAND_DATA_RD
1480#define GLM_DEMAND_WRITE GLM_DEMAND_RFO
1481#define GLM_DEMAND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
1482#define GLM_LLC_ACCESS GLM_ANY_RESPONSE
1483#define GLM_SNP_ANY (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
1484#define GLM_LLC_MISS (GLM_SNP_ANY|SNB_NON_DRAM)
1485
1486static __initconst const u64 glm_hw_cache_event_ids
1487 [PERF_COUNT_HW_CACHE_MAX]
1488 [PERF_COUNT_HW_CACHE_OP_MAX]
1489 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1490 [C(L1D)] = {
1491 [C(OP_READ)] = {
1492 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1493 [C(RESULT_MISS)] = 0x0,
1494 },
1495 [C(OP_WRITE)] = {
1496 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1497 [C(RESULT_MISS)] = 0x0,
1498 },
1499 [C(OP_PREFETCH)] = {
1500 [C(RESULT_ACCESS)] = 0x0,
1501 [C(RESULT_MISS)] = 0x0,
1502 },
1503 },
1504 [C(L1I)] = {
1505 [C(OP_READ)] = {
1506 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
1507 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
1508 },
1509 [C(OP_WRITE)] = {
1510 [C(RESULT_ACCESS)] = -1,
1511 [C(RESULT_MISS)] = -1,
1512 },
1513 [C(OP_PREFETCH)] = {
1514 [C(RESULT_ACCESS)] = 0x0,
1515 [C(RESULT_MISS)] = 0x0,
1516 },
1517 },
1518 [C(LL)] = {
1519 [C(OP_READ)] = {
1520 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1521 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1522 },
1523 [C(OP_WRITE)] = {
1524 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1525 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1526 },
1527 [C(OP_PREFETCH)] = {
1528 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1529 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1530 },
1531 },
1532 [C(DTLB)] = {
1533 [C(OP_READ)] = {
1534 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1535 [C(RESULT_MISS)] = 0x0,
1536 },
1537 [C(OP_WRITE)] = {
1538 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1539 [C(RESULT_MISS)] = 0x0,
1540 },
1541 [C(OP_PREFETCH)] = {
1542 [C(RESULT_ACCESS)] = 0x0,
1543 [C(RESULT_MISS)] = 0x0,
1544 },
1545 },
1546 [C(ITLB)] = {
1547 [C(OP_READ)] = {
1548 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
1549 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
1550 },
1551 [C(OP_WRITE)] = {
1552 [C(RESULT_ACCESS)] = -1,
1553 [C(RESULT_MISS)] = -1,
1554 },
1555 [C(OP_PREFETCH)] = {
1556 [C(RESULT_ACCESS)] = -1,
1557 [C(RESULT_MISS)] = -1,
1558 },
1559 },
1560 [C(BPU)] = {
1561 [C(OP_READ)] = {
1562 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1563 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
1564 },
1565 [C(OP_WRITE)] = {
1566 [C(RESULT_ACCESS)] = -1,
1567 [C(RESULT_MISS)] = -1,
1568 },
1569 [C(OP_PREFETCH)] = {
1570 [C(RESULT_ACCESS)] = -1,
1571 [C(RESULT_MISS)] = -1,
1572 },
1573 },
1574};
1575
1576static __initconst const u64 glm_hw_cache_extra_regs
1577 [PERF_COUNT_HW_CACHE_MAX]
1578 [PERF_COUNT_HW_CACHE_OP_MAX]
1579 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1580 [C(LL)] = {
1581 [C(OP_READ)] = {
1582 [C(RESULT_ACCESS)] = GLM_DEMAND_READ|
1583 GLM_LLC_ACCESS,
1584 [C(RESULT_MISS)] = GLM_DEMAND_READ|
1585 GLM_LLC_MISS,
1586 },
1587 [C(OP_WRITE)] = {
1588 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
1589 GLM_LLC_ACCESS,
1590 [C(RESULT_MISS)] = GLM_DEMAND_WRITE|
1591 GLM_LLC_MISS,
1592 },
1593 [C(OP_PREFETCH)] = {
1594 [C(RESULT_ACCESS)] = GLM_DEMAND_PREFETCH|
1595 GLM_LLC_ACCESS,
1596 [C(RESULT_MISS)] = GLM_DEMAND_PREFETCH|
1597 GLM_LLC_MISS,
1598 },
1599 },
1600};
1601
1468#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ 1602#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
1469#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ 1603#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
1470#define KNL_MCDRAM_LOCAL BIT_ULL(21) 1604#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3456,6 +3590,29 @@ __init int intel_pmu_init(void)
3456 pr_cont("Silvermont events, "); 3590 pr_cont("Silvermont events, ");
3457 break; 3591 break;
3458 3592
3593 case 92: /* 14nm Atom "Goldmont" */
3594 case 95: /* 14nm Atom "Goldmont Denverton" */
3595 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
3596 sizeof(hw_cache_event_ids));
3597 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
3598 sizeof(hw_cache_extra_regs));
3599
3600 intel_pmu_lbr_init_skl();
3601
3602 x86_pmu.event_constraints = intel_slm_event_constraints;
3603 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
3604 x86_pmu.extra_regs = intel_glm_extra_regs;
3605 /*
3606 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
3607 * for precise cycles.
3608 * :pp is identical to :ppp
3609 */
3610 x86_pmu.pebs_aliases = NULL;
3611 x86_pmu.pebs_prec_dist = true;
3612 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
3613 pr_cont("Goldmont events, ");
3614 break;
3615
3459 case 37: /* 32nm Westmere */ 3616 case 37: /* 32nm Westmere */
3460 case 44: /* 32nm Westmere-EP */ 3617 case 44: /* 32nm Westmere-EP */
3461 case 47: /* 32nm Westmere-EX */ 3618 case 47: /* 32nm Westmere-EX */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8584b90d8e0b..7ce9f3f669e6 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -645,6 +645,12 @@ struct event_constraint intel_slm_pebs_event_constraints[] = {
645 EVENT_CONSTRAINT_END 645 EVENT_CONSTRAINT_END
646}; 646};
647 647
648struct event_constraint intel_glm_pebs_event_constraints[] = {
649 /* Allow all events as PEBS with no flags */
650 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
651 EVENT_CONSTRAINT_END
652};
653
648struct event_constraint intel_nehalem_pebs_event_constraints[] = { 654struct event_constraint intel_nehalem_pebs_event_constraints[] = {
649 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 655 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
650 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 656 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6c3b7c1780c9..ad26ca770c98 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -14,7 +14,8 @@ enum {
14 LBR_FORMAT_EIP_FLAGS = 0x03, 14 LBR_FORMAT_EIP_FLAGS = 0x03,
15 LBR_FORMAT_EIP_FLAGS2 = 0x04, 15 LBR_FORMAT_EIP_FLAGS2 = 0x04,
16 LBR_FORMAT_INFO = 0x05, 16 LBR_FORMAT_INFO = 0x05,
17 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO, 17 LBR_FORMAT_TIME = 0x06,
18 LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME,
18}; 19};
19 20
20static enum { 21static enum {
@@ -464,6 +465,16 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
464 abort = !!(info & LBR_INFO_ABORT); 465 abort = !!(info & LBR_INFO_ABORT);
465 cycles = (info & LBR_INFO_CYCLES); 466 cycles = (info & LBR_INFO_CYCLES);
466 } 467 }
468
469 if (lbr_format == LBR_FORMAT_TIME) {
470 mis = !!(from & LBR_FROM_FLAG_MISPRED);
471 pred = !mis;
472 skip = 1;
473 cycles = ((to >> 48) & LBR_INFO_CYCLES);
474
475 to = (u64)((((s64)to) << 16) >> 16);
476 }
477
467 if (lbr_flags & LBR_EIP_FLAGS) { 478 if (lbr_flags & LBR_EIP_FLAGS) {
468 mis = !!(from & LBR_FROM_FLAG_MISPRED); 479 mis = !!(from & LBR_FROM_FLAG_MISPRED);
469 pred = !mis; 480 pred = !mis;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad4dc7ffffb5..8b78481d1e64 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -859,6 +859,8 @@ extern struct event_constraint intel_atom_pebs_event_constraints[];
859 859
860extern struct event_constraint intel_slm_pebs_event_constraints[]; 860extern struct event_constraint intel_slm_pebs_event_constraints[];
861 861
862extern struct event_constraint intel_glm_pebs_event_constraints[];
863
862extern struct event_constraint intel_nehalem_pebs_event_constraints[]; 864extern struct event_constraint intel_nehalem_pebs_event_constraints[];
863 865
864extern struct event_constraint intel_westmere_pebs_event_constraints[]; 866extern struct event_constraint intel_westmere_pebs_event_constraints[];