aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-21 14:12:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-21 14:12:48 -0400
commitbbcdea658f42070d25e7764f1b81785a51cb1642 (patch)
treeea5c21c180560918e94aa83a939c9d2507812ff5
parent8b810a3a35eec9cd5e9245266dd0ae10de063094 (diff)
parentdf6c3db8d30fb1699ccbc403196b86324f4257af (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Two hw-enablement patches, two race fixes, three fixes for regressions of semantics, plus a number of tooling fixes" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Add proper condition to run sched_task callbacks perf/core: Fix locking for children siblings group read perf/core: Fix scheduling regression of pinned groups perf/x86/intel: Fix debug_store reset field for freq events perf/x86/intel: Add Goldmont Plus CPU PMU support perf/x86/intel: Enable C-state residency events for Apollo Lake perf symbols: Accept zero as the kernel base address Revert "perf/core: Drop kernel samples even though :u is specified" perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target perf evsel: State in the default event name if attr.exclude_kernel is set perf evsel: Fix attr.exclude_kernel setting for default cycles:p
-rw-r--r--arch/x86/events/intel/core.c164
-rw-r--r--arch/x86/events/intel/cstate.c26
-rw-r--r--arch/x86/events/intel/ds.c22
-rw-r--r--arch/x86/events/intel/lbr.c4
-rw-r--r--arch/x86/events/perf_event.h2
-rw-r--r--kernel/events/core.c33
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/util/evsel.c8
-rw-r--r--tools/perf/util/machine.c2
9 files changed, 221 insertions, 42 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index aa62437d1aa1..98b0f0729527 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1708,6 +1708,120 @@ static __initconst const u64 glm_hw_cache_extra_regs
1708 }, 1708 },
1709}; 1709};
1710 1710
1711static __initconst const u64 glp_hw_cache_event_ids
1712 [PERF_COUNT_HW_CACHE_MAX]
1713 [PERF_COUNT_HW_CACHE_OP_MAX]
1714 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1715 [C(L1D)] = {
1716 [C(OP_READ)] = {
1717 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1718 [C(RESULT_MISS)] = 0x0,
1719 },
1720 [C(OP_WRITE)] = {
1721 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1722 [C(RESULT_MISS)] = 0x0,
1723 },
1724 [C(OP_PREFETCH)] = {
1725 [C(RESULT_ACCESS)] = 0x0,
1726 [C(RESULT_MISS)] = 0x0,
1727 },
1728 },
1729 [C(L1I)] = {
1730 [C(OP_READ)] = {
1731 [C(RESULT_ACCESS)] = 0x0380, /* ICACHE.ACCESSES */
1732 [C(RESULT_MISS)] = 0x0280, /* ICACHE.MISSES */
1733 },
1734 [C(OP_WRITE)] = {
1735 [C(RESULT_ACCESS)] = -1,
1736 [C(RESULT_MISS)] = -1,
1737 },
1738 [C(OP_PREFETCH)] = {
1739 [C(RESULT_ACCESS)] = 0x0,
1740 [C(RESULT_MISS)] = 0x0,
1741 },
1742 },
1743 [C(LL)] = {
1744 [C(OP_READ)] = {
1745 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1746 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1747 },
1748 [C(OP_WRITE)] = {
1749 [C(RESULT_ACCESS)] = 0x1b7, /* OFFCORE_RESPONSE */
1750 [C(RESULT_MISS)] = 0x1b7, /* OFFCORE_RESPONSE */
1751 },
1752 [C(OP_PREFETCH)] = {
1753 [C(RESULT_ACCESS)] = 0x0,
1754 [C(RESULT_MISS)] = 0x0,
1755 },
1756 },
1757 [C(DTLB)] = {
1758 [C(OP_READ)] = {
1759 [C(RESULT_ACCESS)] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
1760 [C(RESULT_MISS)] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */
1761 },
1762 [C(OP_WRITE)] = {
1763 [C(RESULT_ACCESS)] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
1764 [C(RESULT_MISS)] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */
1765 },
1766 [C(OP_PREFETCH)] = {
1767 [C(RESULT_ACCESS)] = 0x0,
1768 [C(RESULT_MISS)] = 0x0,
1769 },
1770 },
1771 [C(ITLB)] = {
1772 [C(OP_READ)] = {
1773 [C(RESULT_ACCESS)] = 0x00c0, /* INST_RETIRED.ANY_P */
1774 [C(RESULT_MISS)] = 0x0481, /* ITLB.MISS */
1775 },
1776 [C(OP_WRITE)] = {
1777 [C(RESULT_ACCESS)] = -1,
1778 [C(RESULT_MISS)] = -1,
1779 },
1780 [C(OP_PREFETCH)] = {
1781 [C(RESULT_ACCESS)] = -1,
1782 [C(RESULT_MISS)] = -1,
1783 },
1784 },
1785 [C(BPU)] = {
1786 [C(OP_READ)] = {
1787 [C(RESULT_ACCESS)] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1788 [C(RESULT_MISS)] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
1789 },
1790 [C(OP_WRITE)] = {
1791 [C(RESULT_ACCESS)] = -1,
1792 [C(RESULT_MISS)] = -1,
1793 },
1794 [C(OP_PREFETCH)] = {
1795 [C(RESULT_ACCESS)] = -1,
1796 [C(RESULT_MISS)] = -1,
1797 },
1798 },
1799};
1800
1801static __initconst const u64 glp_hw_cache_extra_regs
1802 [PERF_COUNT_HW_CACHE_MAX]
1803 [PERF_COUNT_HW_CACHE_OP_MAX]
1804 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1805 [C(LL)] = {
1806 [C(OP_READ)] = {
1807 [C(RESULT_ACCESS)] = GLM_DEMAND_READ|
1808 GLM_LLC_ACCESS,
1809 [C(RESULT_MISS)] = GLM_DEMAND_READ|
1810 GLM_LLC_MISS,
1811 },
1812 [C(OP_WRITE)] = {
1813 [C(RESULT_ACCESS)] = GLM_DEMAND_WRITE|
1814 GLM_LLC_ACCESS,
1815 [C(RESULT_MISS)] = GLM_DEMAND_WRITE|
1816 GLM_LLC_MISS,
1817 },
1818 [C(OP_PREFETCH)] = {
1819 [C(RESULT_ACCESS)] = 0x0,
1820 [C(RESULT_MISS)] = 0x0,
1821 },
1822 },
1823};
1824
1711#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ 1825#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
1712#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ 1826#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
1713#define KNL_MCDRAM_LOCAL BIT_ULL(21) 1827#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -3016,6 +3130,9 @@ static int hsw_hw_config(struct perf_event *event)
3016 return 0; 3130 return 0;
3017} 3131}
3018 3132
3133static struct event_constraint counter0_constraint =
3134 INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
3135
3019static struct event_constraint counter2_constraint = 3136static struct event_constraint counter2_constraint =
3020 EVENT_CONSTRAINT(0, 0x4, 0); 3137 EVENT_CONSTRAINT(0, 0x4, 0);
3021 3138
@@ -3037,6 +3154,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3037 return c; 3154 return c;
3038} 3155}
3039 3156
3157static struct event_constraint *
3158glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3159 struct perf_event *event)
3160{
3161 struct event_constraint *c;
3162
3163 /* :ppp means to do reduced skid PEBS which is PMC0 only. */
3164 if (event->attr.precise_ip == 3)
3165 return &counter0_constraint;
3166
3167 c = intel_get_event_constraints(cpuc, idx, event);
3168
3169 return c;
3170}
3171
3040/* 3172/*
3041 * Broadwell: 3173 * Broadwell:
3042 * 3174 *
@@ -3265,10 +3397,8 @@ static void intel_pmu_cpu_dying(int cpu)
3265static void intel_pmu_sched_task(struct perf_event_context *ctx, 3397static void intel_pmu_sched_task(struct perf_event_context *ctx,
3266 bool sched_in) 3398 bool sched_in)
3267{ 3399{
3268 if (x86_pmu.pebs_active) 3400 intel_pmu_pebs_sched_task(ctx, sched_in);
3269 intel_pmu_pebs_sched_task(ctx, sched_in); 3401 intel_pmu_lbr_sched_task(ctx, sched_in);
3270 if (x86_pmu.lbr_nr)
3271 intel_pmu_lbr_sched_task(ctx, sched_in);
3272} 3402}
3273 3403
3274PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); 3404PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -3838,6 +3968,32 @@ __init int intel_pmu_init(void)
3838 pr_cont("Goldmont events, "); 3968 pr_cont("Goldmont events, ");
3839 break; 3969 break;
3840 3970
3971 case INTEL_FAM6_ATOM_GEMINI_LAKE:
3972 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
3973 sizeof(hw_cache_event_ids));
3974 memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
3975 sizeof(hw_cache_extra_regs));
3976
3977 intel_pmu_lbr_init_skl();
3978
3979 x86_pmu.event_constraints = intel_slm_event_constraints;
3980 x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints;
3981 x86_pmu.extra_regs = intel_glm_extra_regs;
3982 /*
3983 * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
3984 * for precise cycles.
3985 */
3986 x86_pmu.pebs_aliases = NULL;
3987 x86_pmu.pebs_prec_dist = true;
3988 x86_pmu.lbr_pt_coexist = true;
3989 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
3990 x86_pmu.get_event_constraints = glp_get_event_constraints;
3991 x86_pmu.cpu_events = glm_events_attrs;
3992 /* Goldmont Plus has 4-wide pipeline */
3993 event_attr_td_total_slots_scale_glm.event_str = "4";
3994 pr_cont("Goldmont plus events, ");
3995 break;
3996
3841 case INTEL_FAM6_WESTMERE: 3997 case INTEL_FAM6_WESTMERE:
3842 case INTEL_FAM6_WESTMERE_EP: 3998 case INTEL_FAM6_WESTMERE_EP:
3843 case INTEL_FAM6_WESTMERE_EX: 3999 case INTEL_FAM6_WESTMERE_EX:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 238ae3248ba5..4cf100ff2a37 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,16 +40,16 @@
40 * Model specific counters: 40 * Model specific counters:
41 * MSR_CORE_C1_RES: CORE C1 Residency Counter 41 * MSR_CORE_C1_RES: CORE C1 Residency Counter
42 * perf code: 0x00 42 * perf code: 0x00
43 * Available model: SLM,AMT 43 * Available model: SLM,AMT,GLM
44 * Scope: Core (each processor core has a MSR) 44 * Scope: Core (each processor core has a MSR)
45 * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter 45 * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
46 * perf code: 0x01 46 * perf code: 0x01
47 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL 47 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM
48 * Scope: Core 48 * Scope: Core
49 * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter 49 * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
50 * perf code: 0x02 50 * perf code: 0x02
51 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW 51 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
52 * SKL,KNL 52 * SKL,KNL,GLM
53 * Scope: Core 53 * Scope: Core
54 * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter 54 * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
55 * perf code: 0x03 55 * perf code: 0x03
@@ -57,16 +57,17 @@
57 * Scope: Core 57 * Scope: Core
58 * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. 58 * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
59 * perf code: 0x00 59 * perf code: 0x00
60 * Available model: SNB,IVB,HSW,BDW,SKL,KNL 60 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM
61 * Scope: Package (physical package) 61 * Scope: Package (physical package)
62 * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. 62 * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
63 * perf code: 0x01 63 * perf code: 0x01
64 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL 64 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
65 * GLM
65 * Scope: Package (physical package) 66 * Scope: Package (physical package)
66 * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. 67 * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
67 * perf code: 0x02 68 * perf code: 0x02
68 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW 69 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
69 * SKL,KNL 70 * SKL,KNL,GLM
70 * Scope: Package (physical package) 71 * Scope: Package (physical package)
71 * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. 72 * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
72 * perf code: 0x03 73 * perf code: 0x03
@@ -82,7 +83,7 @@
82 * Scope: Package (physical package) 83 * Scope: Package (physical package)
83 * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. 84 * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
84 * perf code: 0x06 85 * perf code: 0x06
85 * Available model: HSW ULT only 86 * Available model: HSW ULT, GLM
86 * Scope: Package (physical package) 87 * Scope: Package (physical package)
87 * 88 *
88 */ 89 */
@@ -504,6 +505,17 @@ static const struct cstate_model knl_cstates __initconst = {
504}; 505};
505 506
506 507
508static const struct cstate_model glm_cstates __initconst = {
509 .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
510 BIT(PERF_CSTATE_CORE_C3_RES) |
511 BIT(PERF_CSTATE_CORE_C6_RES),
512
513 .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
514 BIT(PERF_CSTATE_PKG_C3_RES) |
515 BIT(PERF_CSTATE_PKG_C6_RES) |
516 BIT(PERF_CSTATE_PKG_C10_RES),
517};
518
507 519
508#define X86_CSTATES_MODEL(model, states) \ 520#define X86_CSTATES_MODEL(model, states) \
509 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } 521 { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
@@ -546,6 +558,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
546 558
547 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), 559 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
548 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), 560 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
561
562 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
549 { }, 563 { },
550}; 564};
551MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); 565MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index c6d23ffe422d..a322fed5f8ed 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -606,12 +606,6 @@ static inline void intel_pmu_drain_pebs_buffer(void)
606 x86_pmu.drain_pebs(&regs); 606 x86_pmu.drain_pebs(&regs);
607} 607}
608 608
609void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
610{
611 if (!sched_in)
612 intel_pmu_drain_pebs_buffer();
613}
614
615/* 609/*
616 * PEBS 610 * PEBS
617 */ 611 */
@@ -651,6 +645,12 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
651 EVENT_CONSTRAINT_END 645 EVENT_CONSTRAINT_END
652}; 646};
653 647
648struct event_constraint intel_glp_pebs_event_constraints[] = {
649 /* Allow all events as PEBS with no flags */
650 INTEL_ALL_EVENT_CONSTRAINT(0, 0xf),
651 EVENT_CONSTRAINT_END
652};
653
654struct event_constraint intel_nehalem_pebs_event_constraints[] = { 654struct event_constraint intel_nehalem_pebs_event_constraints[] = {
655 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ 655 INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
656 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ 656 INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
@@ -816,6 +816,14 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
816 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs); 816 return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
817} 817}
818 818
819void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
820{
821 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
822
823 if (!sched_in && pebs_needs_sched_cb(cpuc))
824 intel_pmu_drain_pebs_buffer();
825}
826
819static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) 827static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
820{ 828{
821 struct debug_store *ds = cpuc->ds; 829 struct debug_store *ds = cpuc->ds;
@@ -889,6 +897,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
889 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { 897 if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
890 ds->pebs_event_reset[hwc->idx] = 898 ds->pebs_event_reset[hwc->idx] =
891 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; 899 (u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
900 } else {
901 ds->pebs_event_reset[hwc->idx] = 0;
892 } 902 }
893} 903}
894 904
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index eb261656a320..955457a30197 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,8 +380,12 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
380 380
381void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 381void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
382{ 382{
383 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
383 struct x86_perf_task_context *task_ctx; 384 struct x86_perf_task_context *task_ctx;
384 385
386 if (!cpuc->lbr_users)
387 return;
388
385 /* 389 /*
386 * If LBR callstack feature is enabled and the stack was saved when 390 * If LBR callstack feature is enabled and the stack was saved when
387 * the task was scheduled out, restore the stack. Otherwise flush 391 * the task was scheduled out, restore the stack. Otherwise flush
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 53728eea1bed..476aec3a4cab 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -879,6 +879,8 @@ extern struct event_constraint intel_slm_pebs_event_constraints[];
879 879
880extern struct event_constraint intel_glm_pebs_event_constraints[]; 880extern struct event_constraint intel_glm_pebs_event_constraints[];
881 881
882extern struct event_constraint intel_glp_pebs_event_constraints[];
883
882extern struct event_constraint intel_nehalem_pebs_event_constraints[]; 884extern struct event_constraint intel_nehalem_pebs_event_constraints[];
883 885
884extern struct event_constraint intel_westmere_pebs_event_constraints[]; 886extern struct event_constraint intel_westmere_pebs_event_constraints[];
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1538df9b2b65..426c2ffba16d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
1452 1452
1453 lockdep_assert_held(&ctx->lock); 1453 lockdep_assert_held(&ctx->lock);
1454 1454
1455 /*
1456 * It's 'group type', really, because if our group leader is
1457 * pinned, so are we.
1458 */
1459 if (event->group_leader != event)
1460 event = event->group_leader;
1461
1455 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; 1462 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1456 if (!ctx->task) 1463 if (!ctx->task)
1457 event_type |= EVENT_CPU; 1464 event_type |= EVENT_CPU;
@@ -4378,7 +4385,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
4378static int __perf_read_group_add(struct perf_event *leader, 4385static int __perf_read_group_add(struct perf_event *leader,
4379 u64 read_format, u64 *values) 4386 u64 read_format, u64 *values)
4380{ 4387{
4388 struct perf_event_context *ctx = leader->ctx;
4381 struct perf_event *sub; 4389 struct perf_event *sub;
4390 unsigned long flags;
4382 int n = 1; /* skip @nr */ 4391 int n = 1; /* skip @nr */
4383 int ret; 4392 int ret;
4384 4393
@@ -4408,12 +4417,15 @@ static int __perf_read_group_add(struct perf_event *leader,
4408 if (read_format & PERF_FORMAT_ID) 4417 if (read_format & PERF_FORMAT_ID)
4409 values[n++] = primary_event_id(leader); 4418 values[n++] = primary_event_id(leader);
4410 4419
4420 raw_spin_lock_irqsave(&ctx->lock, flags);
4421
4411 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 4422 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
4412 values[n++] += perf_event_count(sub); 4423 values[n++] += perf_event_count(sub);
4413 if (read_format & PERF_FORMAT_ID) 4424 if (read_format & PERF_FORMAT_ID)
4414 values[n++] = primary_event_id(sub); 4425 values[n++] = primary_event_id(sub);
4415 } 4426 }
4416 4427
4428 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4417 return 0; 4429 return 0;
4418} 4430}
4419 4431
@@ -7321,21 +7333,6 @@ int perf_event_account_interrupt(struct perf_event *event)
7321 return __perf_event_account_interrupt(event, 1); 7333 return __perf_event_account_interrupt(event, 1);
7322} 7334}
7323 7335
7324static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
7325{
7326 /*
7327 * Due to interrupt latency (AKA "skid"), we may enter the
7328 * kernel before taking an overflow, even if the PMU is only
7329 * counting user events.
7330 * To avoid leaking information to userspace, we must always
7331 * reject kernel samples when exclude_kernel is set.
7332 */
7333 if (event->attr.exclude_kernel && !user_mode(regs))
7334 return false;
7335
7336 return true;
7337}
7338
7339/* 7336/*
7340 * Generic event overflow handling, sampling. 7337 * Generic event overflow handling, sampling.
7341 */ 7338 */
@@ -7357,12 +7354,6 @@ static int __perf_event_overflow(struct perf_event *event,
7357 ret = __perf_event_account_interrupt(event, throttle); 7354 ret = __perf_event_account_interrupt(event, throttle);
7358 7355
7359 /* 7356 /*
7360 * For security, drop the skid kernel samples if necessary.
7361 */
7362 if (!sample_is_allowed(event, regs))
7363 return ret;
7364
7365 /*
7366 * XXX event_limit might not quite work as expected on inherited 7357 * XXX event_limit might not quite work as expected on inherited
7367 * events 7358 * events
7368 */ 7359 */
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index a4d3762cd825..83874b0e266c 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -704,7 +704,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
704 ui_browser__gotorc(browser, row, column + 1); 704 ui_browser__gotorc(browser, row, column + 1);
705 SLsmg_draw_hline(2); 705 SLsmg_draw_hline(2);
706 706
707 if (row++ == 0) 707 if (++row == 0)
708 goto out; 708 goto out;
709 } else 709 } else
710 row = 0; 710 row = 0;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 87b431886670..413f74df08de 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -273,7 +273,7 @@ struct perf_evsel *perf_evsel__new_cycles(void)
273 struct perf_event_attr attr = { 273 struct perf_event_attr attr = {
274 .type = PERF_TYPE_HARDWARE, 274 .type = PERF_TYPE_HARDWARE,
275 .config = PERF_COUNT_HW_CPU_CYCLES, 275 .config = PERF_COUNT_HW_CPU_CYCLES,
276 .exclude_kernel = 1, 276 .exclude_kernel = geteuid() != 0,
277 }; 277 };
278 struct perf_evsel *evsel; 278 struct perf_evsel *evsel;
279 279
@@ -298,8 +298,10 @@ struct perf_evsel *perf_evsel__new_cycles(void)
298 goto out; 298 goto out;
299 299
300 /* use asprintf() because free(evsel) assumes name is allocated */ 300 /* use asprintf() because free(evsel) assumes name is allocated */
301 if (asprintf(&evsel->name, "cycles%.*s", 301 if (asprintf(&evsel->name, "cycles%s%s%.*s",
302 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 302 (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
303 attr.exclude_kernel ? "u" : "",
304 attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
303 goto error_free; 305 goto error_free;
304out: 306out:
305 return evsel; 307 return evsel;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5de2b86b9880..2e9eb6aa3ce2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2209,7 +2209,7 @@ int machine__get_kernel_start(struct machine *machine)
2209 machine->kernel_start = 1ULL << 63; 2209 machine->kernel_start = 1ULL << 63;
2210 if (map) { 2210 if (map) {
2211 err = map__load(map); 2211 err = map__load(map);
2212 if (map->start) 2212 if (!err)
2213 machine->kernel_start = map->start; 2213 machine->kernel_start = map->start;
2214 } 2214 }
2215 return err; 2215 return err;