diff options
| author | Ingo Molnar <mingo@kernel.org> | 2015-03-27 04:46:19 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2015-03-27 04:46:19 -0400 |
| commit | 936c663aed930972f7e185485fd6c2da69e33819 (patch) | |
| tree | 41bb069f66e109fc63c4114a4390de44d3068356 | |
| parent | 072e5a1cfabca7276744d24726e094d85721df5c (diff) | |
| parent | 50f16a8bf9d7a92c437ed1867d0f7e1dc6a9aca9 (diff) | |
Merge branch 'perf/x86' into perf/core, because it's ready
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | arch/arm/kernel/hw_breakpoint.c | 2 | ||||
| -rw-r--r-- | arch/arm64/kernel/hw_breakpoint.c | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/cpufeature.h | 9 | ||||
| -rw-r--r-- | arch/x86/include/asm/processor.h | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 39 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_cqm.c | 1379 | ||||
| -rw-r--r-- | include/linux/perf_event.h | 50 | ||||
| -rw-r--r-- | kernel/events/core.c | 73 | ||||
| -rw-r--r-- | kernel/events/hw_breakpoint.c | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 10 |
11 files changed, 1514 insertions, 63 deletions
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 7fc70ae21185..dc7d0a95bd36 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c | |||
| @@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
| 648 | * Per-cpu breakpoints are not supported by our stepping | 648 | * Per-cpu breakpoints are not supported by our stepping |
| 649 | * mechanism. | 649 | * mechanism. |
| 650 | */ | 650 | */ |
| 651 | if (!bp->hw.bp_target) | 651 | if (!bp->hw.target) |
| 652 | return -EINVAL; | 652 | return -EINVAL; |
| 653 | 653 | ||
| 654 | /* | 654 | /* |
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 98bbe06e469c..e7d934d3afe0 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c | |||
| @@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) | |||
| 527 | * Disallow per-task kernel breakpoints since these would | 527 | * Disallow per-task kernel breakpoints since these would |
| 528 | * complicate the stepping code. | 528 | * complicate the stepping code. |
| 529 | */ | 529 | */ |
| 530 | if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target) | 530 | if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target) |
| 531 | return -EINVAL; | 531 | return -EINVAL; |
| 532 | 532 | ||
| 533 | return 0; | 533 | return 0; |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 90a54851aedc..361922dcc9b1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <asm/disabled-features.h> | 12 | #include <asm/disabled-features.h> |
| 13 | #endif | 13 | #endif |
| 14 | 14 | ||
| 15 | #define NCAPINTS 11 /* N 32-bit words worth of info */ | 15 | #define NCAPINTS 13 /* N 32-bit words worth of info */ |
| 16 | #define NBUGINTS 1 /* N 32-bit bug flags */ | 16 | #define NBUGINTS 1 /* N 32-bit bug flags */ |
| 17 | 17 | ||
| 18 | /* | 18 | /* |
| @@ -226,6 +226,7 @@ | |||
| 226 | #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 226 | #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
| 227 | #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ | 227 | #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ |
| 228 | #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ | 228 | #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ |
| 229 | #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ | ||
| 229 | #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ | 230 | #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ |
| 230 | #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ | 231 | #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ |
| 231 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ | 232 | #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ |
| @@ -242,6 +243,12 @@ | |||
| 242 | #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ | 243 | #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ |
| 243 | #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ | 244 | #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ |
| 244 | 245 | ||
| 246 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ | ||
| 247 | #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ | ||
| 248 | |||
| 249 | /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ | ||
| 250 | #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ | ||
| 251 | |||
| 245 | /* | 252 | /* |
| 246 | * BUG word(s) | 253 | * BUG word(s) |
| 247 | */ | 254 | */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ec1c93588cef..a12d50e04d7a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -109,6 +109,9 @@ struct cpuinfo_x86 { | |||
| 109 | /* in KB - valid for CPUS which support this call: */ | 109 | /* in KB - valid for CPUS which support this call: */ |
| 110 | int x86_cache_size; | 110 | int x86_cache_size; |
| 111 | int x86_cache_alignment; /* In bytes */ | 111 | int x86_cache_alignment; /* In bytes */ |
| 112 | /* Cache QoS architectural values: */ | ||
| 113 | int x86_cache_max_rmid; /* max index */ | ||
| 114 | int x86_cache_occ_scale; /* scale to bytes */ | ||
| 112 | int x86_power; | 115 | int x86_power; |
| 113 | unsigned long loops_per_jiffy; | 116 | unsigned long loops_per_jiffy; |
| 114 | /* cpuid returned max cores value: */ | 117 | /* cpuid returned max cores value: */ |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 80091ae54c2b..6c1ca139f736 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | |||
| 39 | endif | 39 | endif |
| 40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
| 41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
| 42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o | 42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o |
| 43 | 43 | ||
| 44 | obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ | 44 | obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ |
| 45 | perf_event_intel_uncore_snb.o \ | 45 | perf_event_intel_uncore_snb.o \ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2346c95c6ab1..1cd4a1a44b95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c) | |||
| 646 | c->x86_capability[10] = eax; | 646 | c->x86_capability[10] = eax; |
| 647 | } | 647 | } |
| 648 | 648 | ||
| 649 | /* Additional Intel-defined flags: level 0x0000000F */ | ||
| 650 | if (c->cpuid_level >= 0x0000000F) { | ||
| 651 | u32 eax, ebx, ecx, edx; | ||
| 652 | |||
| 653 | /* QoS sub-leaf, EAX=0Fh, ECX=0 */ | ||
| 654 | cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); | ||
| 655 | c->x86_capability[11] = edx; | ||
| 656 | if (cpu_has(c, X86_FEATURE_CQM_LLC)) { | ||
| 657 | /* will be overridden if occupancy monitoring exists */ | ||
| 658 | c->x86_cache_max_rmid = ebx; | ||
| 659 | |||
| 660 | /* QoS sub-leaf, EAX=0Fh, ECX=1 */ | ||
| 661 | cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); | ||
| 662 | c->x86_capability[12] = edx; | ||
| 663 | if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { | ||
| 664 | c->x86_cache_max_rmid = ecx; | ||
| 665 | c->x86_cache_occ_scale = ebx; | ||
| 666 | } | ||
| 667 | } else { | ||
| 668 | c->x86_cache_max_rmid = -1; | ||
| 669 | c->x86_cache_occ_scale = -1; | ||
| 670 | } | ||
| 671 | } | ||
| 672 | |||
| 649 | /* AMD-defined flags: level 0x80000001 */ | 673 | /* AMD-defined flags: level 0x80000001 */ |
| 650 | xlvl = cpuid_eax(0x80000000); | 674 | xlvl = cpuid_eax(0x80000000); |
| 651 | c->extended_cpuid_level = xlvl; | 675 | c->extended_cpuid_level = xlvl; |
| @@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c) | |||
| 834 | detect_nopl(c); | 858 | detect_nopl(c); |
| 835 | } | 859 | } |
| 836 | 860 | ||
| 861 | static void x86_init_cache_qos(struct cpuinfo_x86 *c) | ||
| 862 | { | ||
| 863 | /* | ||
| 864 | * The heavy lifting of max_rmid and cache_occ_scale are handled | ||
| 865 | * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu | ||
| 866 | * in case CQM bits really aren't there in this CPU. | ||
| 867 | */ | ||
| 868 | if (c != &boot_cpu_data) { | ||
| 869 | boot_cpu_data.x86_cache_max_rmid = | ||
| 870 | min(boot_cpu_data.x86_cache_max_rmid, | ||
| 871 | c->x86_cache_max_rmid); | ||
| 872 | } | ||
| 873 | } | ||
| 874 | |||
| 837 | /* | 875 | /* |
| 838 | * This does the hard work of actually picking apart the CPU stuff... | 876 | * This does the hard work of actually picking apart the CPU stuff... |
| 839 | */ | 877 | */ |
| @@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |||
| 923 | 961 | ||
| 924 | init_hypervisor(c); | 962 | init_hypervisor(c); |
| 925 | x86_init_rdrand(c); | 963 | x86_init_rdrand(c); |
| 964 | x86_init_cache_qos(c); | ||
| 926 | 965 | ||
| 927 | /* | 966 | /* |
| 928 | * Clear/Set all flags overriden by options, need do it | 967 | * Clear/Set all flags overriden by options, need do it |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c new file mode 100644 index 000000000000..e4d1b8b738fa --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c | |||
| @@ -0,0 +1,1379 @@ | |||
| 1 | /* | ||
| 2 | * Intel Cache Quality-of-Service Monitoring (CQM) support. | ||
| 3 | * | ||
| 4 | * Based very, very heavily on work by Peter Zijlstra. | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/perf_event.h> | ||
| 8 | #include <linux/slab.h> | ||
| 9 | #include <asm/cpu_device_id.h> | ||
| 10 | #include "perf_event.h" | ||
| 11 | |||
| 12 | #define MSR_IA32_PQR_ASSOC 0x0c8f | ||
| 13 | #define MSR_IA32_QM_CTR 0x0c8e | ||
| 14 | #define MSR_IA32_QM_EVTSEL 0x0c8d | ||
| 15 | |||
| 16 | static unsigned int cqm_max_rmid = -1; | ||
| 17 | static unsigned int cqm_l3_scale; /* supposedly cacheline size */ | ||
| 18 | |||
| 19 | struct intel_cqm_state { | ||
| 20 | raw_spinlock_t lock; | ||
| 21 | int rmid; | ||
| 22 | int cnt; | ||
| 23 | }; | ||
| 24 | |||
| 25 | static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state); | ||
| 26 | |||
| 27 | /* | ||
| 28 | * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. | ||
| 29 | * Also protects event->hw.cqm_rmid | ||
| 30 | * | ||
| 31 | * Hold either for stability, both for modification of ->hw.cqm_rmid. | ||
| 32 | */ | ||
| 33 | static DEFINE_MUTEX(cache_mutex); | ||
| 34 | static DEFINE_RAW_SPINLOCK(cache_lock); | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Groups of events that have the same target(s), one RMID per group. | ||
| 38 | */ | ||
| 39 | static LIST_HEAD(cache_groups); | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Mask of CPUs for reading CQM values. We only need one per-socket. | ||
| 43 | */ | ||
| 44 | static cpumask_t cqm_cpumask; | ||
| 45 | |||
| 46 | #define RMID_VAL_ERROR (1ULL << 63) | ||
| 47 | #define RMID_VAL_UNAVAIL (1ULL << 62) | ||
| 48 | |||
| 49 | #define QOS_L3_OCCUP_EVENT_ID (1 << 0) | ||
| 50 | |||
| 51 | #define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID | ||
| 52 | |||
| 53 | /* | ||
| 54 | * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). | ||
| 55 | * | ||
| 56 | * This rmid is always free and is guaranteed to have an associated | ||
| 57 | * near-zero occupancy value, i.e. no cachelines are tagged with this | ||
| 58 | * RMID, once __intel_cqm_rmid_rotate() returns. | ||
| 59 | */ | ||
| 60 | static unsigned int intel_cqm_rotation_rmid; | ||
| 61 | |||
| 62 | #define INVALID_RMID (-1) | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Is @rmid valid for programming the hardware? | ||
| 66 | * | ||
| 67 | * rmid 0 is reserved by the hardware for all non-monitored tasks, which | ||
| 68 | * means that we should never come across an rmid with that value. | ||
| 69 | * Likewise, an rmid value of -1 is used to indicate "no rmid currently | ||
| 70 | * assigned" and is used as part of the rotation code. | ||
| 71 | */ | ||
| 72 | static inline bool __rmid_valid(unsigned int rmid) | ||
| 73 | { | ||
| 74 | if (!rmid || rmid == INVALID_RMID) | ||
| 75 | return false; | ||
| 76 | |||
| 77 | return true; | ||
| 78 | } | ||
| 79 | |||
| 80 | static u64 __rmid_read(unsigned int rmid) | ||
| 81 | { | ||
| 82 | u64 val; | ||
| 83 | |||
| 84 | /* | ||
| 85 | * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt, | ||
| 86 | * it just says that to increase confusion. | ||
| 87 | */ | ||
| 88 | wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid); | ||
| 89 | rdmsrl(MSR_IA32_QM_CTR, val); | ||
| 90 | |||
| 91 | /* | ||
| 92 | * Aside from the ERROR and UNAVAIL bits, assume this thing returns | ||
| 93 | * the number of cachelines tagged with @rmid. | ||
| 94 | */ | ||
| 95 | return val; | ||
| 96 | } | ||
| 97 | |||
| 98 | enum rmid_recycle_state { | ||
| 99 | RMID_YOUNG = 0, | ||
| 100 | RMID_AVAILABLE, | ||
| 101 | RMID_DIRTY, | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct cqm_rmid_entry { | ||
| 105 | unsigned int rmid; | ||
| 106 | enum rmid_recycle_state state; | ||
| 107 | struct list_head list; | ||
| 108 | unsigned long queue_time; | ||
| 109 | }; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * cqm_rmid_free_lru - A least recently used list of RMIDs. | ||
| 113 | * | ||
| 114 | * Oldest entry at the head, newest (most recently used) entry at the | ||
| 115 | * tail. This list is never traversed, it's only used to keep track of | ||
| 116 | * the lru order. That is, we only pick entries of the head or insert | ||
| 117 | * them on the tail. | ||
| 118 | * | ||
| 119 | * All entries on the list are 'free', and their RMIDs are not currently | ||
| 120 | * in use. To mark an RMID as in use, remove its entry from the lru | ||
| 121 | * list. | ||
| 122 | * | ||
| 123 | * | ||
| 124 | * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs. | ||
| 125 | * | ||
| 126 | * This list is contains RMIDs that no one is currently using but that | ||
| 127 | * may have a non-zero occupancy value associated with them. The | ||
| 128 | * rotation worker moves RMIDs from the limbo list to the free list once | ||
| 129 | * the occupancy value drops below __intel_cqm_threshold. | ||
| 130 | * | ||
| 131 | * Both lists are protected by cache_mutex. | ||
| 132 | */ | ||
| 133 | static LIST_HEAD(cqm_rmid_free_lru); | ||
| 134 | static LIST_HEAD(cqm_rmid_limbo_lru); | ||
| 135 | |||
| 136 | /* | ||
| 137 | * We use a simple array of pointers so that we can lookup a struct | ||
| 138 | * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid() | ||
| 139 | * and __put_rmid() from having to worry about dealing with struct | ||
| 140 | * cqm_rmid_entry - they just deal with rmids, i.e. integers. | ||
| 141 | * | ||
| 142 | * Once this array is initialized it is read-only. No locks are required | ||
| 143 | * to access it. | ||
| 144 | * | ||
| 145 | * All entries for all RMIDs can be looked up in the this array at all | ||
| 146 | * times. | ||
| 147 | */ | ||
| 148 | static struct cqm_rmid_entry **cqm_rmid_ptrs; | ||
| 149 | |||
| 150 | static inline struct cqm_rmid_entry *__rmid_entry(int rmid) | ||
| 151 | { | ||
| 152 | struct cqm_rmid_entry *entry; | ||
| 153 | |||
| 154 | entry = cqm_rmid_ptrs[rmid]; | ||
| 155 | WARN_ON(entry->rmid != rmid); | ||
| 156 | |||
| 157 | return entry; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Returns < 0 on fail. | ||
| 162 | * | ||
| 163 | * We expect to be called with cache_mutex held. | ||
| 164 | */ | ||
| 165 | static int __get_rmid(void) | ||
| 166 | { | ||
| 167 | struct cqm_rmid_entry *entry; | ||
| 168 | |||
| 169 | lockdep_assert_held(&cache_mutex); | ||
| 170 | |||
| 171 | if (list_empty(&cqm_rmid_free_lru)) | ||
| 172 | return INVALID_RMID; | ||
| 173 | |||
| 174 | entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list); | ||
| 175 | list_del(&entry->list); | ||
| 176 | |||
| 177 | return entry->rmid; | ||
| 178 | } | ||
| 179 | |||
| 180 | static void __put_rmid(unsigned int rmid) | ||
| 181 | { | ||
| 182 | struct cqm_rmid_entry *entry; | ||
| 183 | |||
| 184 | lockdep_assert_held(&cache_mutex); | ||
| 185 | |||
| 186 | WARN_ON(!__rmid_valid(rmid)); | ||
| 187 | entry = __rmid_entry(rmid); | ||
| 188 | |||
| 189 | entry->queue_time = jiffies; | ||
| 190 | entry->state = RMID_YOUNG; | ||
| 191 | |||
| 192 | list_add_tail(&entry->list, &cqm_rmid_limbo_lru); | ||
| 193 | } | ||
| 194 | |||
| 195 | static int intel_cqm_setup_rmid_cache(void) | ||
| 196 | { | ||
| 197 | struct cqm_rmid_entry *entry; | ||
| 198 | unsigned int nr_rmids; | ||
| 199 | int r = 0; | ||
| 200 | |||
| 201 | nr_rmids = cqm_max_rmid + 1; | ||
| 202 | cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) * | ||
| 203 | nr_rmids, GFP_KERNEL); | ||
| 204 | if (!cqm_rmid_ptrs) | ||
| 205 | return -ENOMEM; | ||
| 206 | |||
| 207 | for (; r <= cqm_max_rmid; r++) { | ||
| 208 | struct cqm_rmid_entry *entry; | ||
| 209 | |||
| 210 | entry = kmalloc(sizeof(*entry), GFP_KERNEL); | ||
| 211 | if (!entry) | ||
| 212 | goto fail; | ||
| 213 | |||
| 214 | INIT_LIST_HEAD(&entry->list); | ||
| 215 | entry->rmid = r; | ||
| 216 | cqm_rmid_ptrs[r] = entry; | ||
| 217 | |||
| 218 | list_add_tail(&entry->list, &cqm_rmid_free_lru); | ||
| 219 | } | ||
| 220 | |||
| 221 | /* | ||
| 222 | * RMID 0 is special and is always allocated. It's used for all | ||
| 223 | * tasks that are not monitored. | ||
| 224 | */ | ||
| 225 | entry = __rmid_entry(0); | ||
| 226 | list_del(&entry->list); | ||
| 227 | |||
| 228 | mutex_lock(&cache_mutex); | ||
| 229 | intel_cqm_rotation_rmid = __get_rmid(); | ||
| 230 | mutex_unlock(&cache_mutex); | ||
| 231 | |||
| 232 | return 0; | ||
| 233 | fail: | ||
| 234 | while (r--) | ||
| 235 | kfree(cqm_rmid_ptrs[r]); | ||
| 236 | |||
| 237 | kfree(cqm_rmid_ptrs); | ||
| 238 | return -ENOMEM; | ||
| 239 | } | ||
| 240 | |||
| 241 | /* | ||
| 242 | * Determine if @a and @b measure the same set of tasks. | ||
| 243 | * | ||
| 244 | * If @a and @b measure the same set of tasks then we want to share a | ||
| 245 | * single RMID. | ||
| 246 | */ | ||
| 247 | static bool __match_event(struct perf_event *a, struct perf_event *b) | ||
| 248 | { | ||
| 249 | /* Per-cpu and task events don't mix */ | ||
| 250 | if ((a->attach_state & PERF_ATTACH_TASK) != | ||
| 251 | (b->attach_state & PERF_ATTACH_TASK)) | ||
| 252 | return false; | ||
| 253 | |||
| 254 | #ifdef CONFIG_CGROUP_PERF | ||
| 255 | if (a->cgrp != b->cgrp) | ||
| 256 | return false; | ||
| 257 | #endif | ||
| 258 | |||
| 259 | /* If not task event, we're machine wide */ | ||
| 260 | if (!(b->attach_state & PERF_ATTACH_TASK)) | ||
| 261 | return true; | ||
| 262 | |||
| 263 | /* | ||
| 264 | * Events that target same task are placed into the same cache group. | ||
| 265 | */ | ||
| 266 | if (a->hw.target == b->hw.target) | ||
| 267 | return true; | ||
| 268 | |||
| 269 | /* | ||
| 270 | * Are we an inherited event? | ||
| 271 | */ | ||
| 272 | if (b->parent == a) | ||
| 273 | return true; | ||
| 274 | |||
| 275 | return false; | ||
| 276 | } | ||
| 277 | |||
| 278 | #ifdef CONFIG_CGROUP_PERF | ||
| 279 | static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event) | ||
| 280 | { | ||
| 281 | if (event->attach_state & PERF_ATTACH_TASK) | ||
| 282 | return perf_cgroup_from_task(event->hw.target); | ||
| 283 | |||
| 284 | return event->cgrp; | ||
| 285 | } | ||
| 286 | #endif | ||
| 287 | |||
| 288 | /* | ||
| 289 | * Determine if @a's tasks intersect with @b's tasks | ||
| 290 | * | ||
| 291 | * There are combinations of events that we explicitly prohibit, | ||
| 292 | * | ||
| 293 | * PROHIBITS | ||
| 294 | * system-wide -> cgroup and task | ||
| 295 | * cgroup -> system-wide | ||
| 296 | * -> task in cgroup | ||
| 297 | * task -> system-wide | ||
| 298 | * -> task in cgroup | ||
| 299 | * | ||
| 300 | * Call this function before allocating an RMID. | ||
| 301 | */ | ||
| 302 | static bool __conflict_event(struct perf_event *a, struct perf_event *b) | ||
| 303 | { | ||
| 304 | #ifdef CONFIG_CGROUP_PERF | ||
| 305 | /* | ||
| 306 | * We can have any number of cgroups but only one system-wide | ||
| 307 | * event at a time. | ||
| 308 | */ | ||
| 309 | if (a->cgrp && b->cgrp) { | ||
| 310 | struct perf_cgroup *ac = a->cgrp; | ||
| 311 | struct perf_cgroup *bc = b->cgrp; | ||
| 312 | |||
| 313 | /* | ||
| 314 | * This condition should have been caught in | ||
| 315 | * __match_event() and we should be sharing an RMID. | ||
| 316 | */ | ||
| 317 | WARN_ON_ONCE(ac == bc); | ||
| 318 | |||
| 319 | if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || | ||
| 320 | cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) | ||
| 321 | return true; | ||
| 322 | |||
| 323 | return false; | ||
| 324 | } | ||
| 325 | |||
| 326 | if (a->cgrp || b->cgrp) { | ||
| 327 | struct perf_cgroup *ac, *bc; | ||
| 328 | |||
| 329 | /* | ||
| 330 | * cgroup and system-wide events are mutually exclusive | ||
| 331 | */ | ||
| 332 | if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) || | ||
| 333 | (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK))) | ||
| 334 | return true; | ||
| 335 | |||
| 336 | /* | ||
| 337 | * Ensure neither event is part of the other's cgroup | ||
| 338 | */ | ||
| 339 | ac = event_to_cgroup(a); | ||
| 340 | bc = event_to_cgroup(b); | ||
| 341 | if (ac == bc) | ||
| 342 | return true; | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Must have cgroup and non-intersecting task events. | ||
| 346 | */ | ||
| 347 | if (!ac || !bc) | ||
| 348 | return false; | ||
| 349 | |||
| 350 | /* | ||
| 351 | * We have cgroup and task events, and the task belongs | ||
| 352 | * to a cgroup. Check for for overlap. | ||
| 353 | */ | ||
| 354 | if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) || | ||
| 355 | cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup)) | ||
| 356 | return true; | ||
| 357 | |||
| 358 | return false; | ||
| 359 | } | ||
| 360 | #endif | ||
| 361 | /* | ||
| 362 | * If one of them is not a task, same story as above with cgroups. | ||
| 363 | */ | ||
| 364 | if (!(a->attach_state & PERF_ATTACH_TASK) || | ||
| 365 | !(b->attach_state & PERF_ATTACH_TASK)) | ||
| 366 | return true; | ||
| 367 | |||
| 368 | /* | ||
| 369 | * Must be non-overlapping. | ||
| 370 | */ | ||
| 371 | return false; | ||
| 372 | } | ||
| 373 | |||
| 374 | struct rmid_read { | ||
| 375 | unsigned int rmid; | ||
| 376 | atomic64_t value; | ||
| 377 | }; | ||
| 378 | |||
| 379 | static void __intel_cqm_event_count(void *info); | ||
| 380 | |||
| 381 | /* | ||
| 382 | * Exchange the RMID of a group of events. | ||
| 383 | */ | ||
| 384 | static unsigned int | ||
| 385 | intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid) | ||
| 386 | { | ||
| 387 | struct perf_event *event; | ||
| 388 | unsigned int old_rmid = group->hw.cqm_rmid; | ||
| 389 | struct list_head *head = &group->hw.cqm_group_entry; | ||
| 390 | |||
| 391 | lockdep_assert_held(&cache_mutex); | ||
| 392 | |||
| 393 | /* | ||
| 394 | * If our RMID is being deallocated, perform a read now. | ||
| 395 | */ | ||
| 396 | if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { | ||
| 397 | struct rmid_read rr = { | ||
| 398 | .value = ATOMIC64_INIT(0), | ||
| 399 | .rmid = old_rmid, | ||
| 400 | }; | ||
| 401 | |||
| 402 | on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, | ||
| 403 | &rr, 1); | ||
| 404 | local64_set(&group->count, atomic64_read(&rr.value)); | ||
| 405 | } | ||
| 406 | |||
| 407 | raw_spin_lock_irq(&cache_lock); | ||
| 408 | |||
| 409 | group->hw.cqm_rmid = rmid; | ||
| 410 | list_for_each_entry(event, head, hw.cqm_group_entry) | ||
| 411 | event->hw.cqm_rmid = rmid; | ||
| 412 | |||
| 413 | raw_spin_unlock_irq(&cache_lock); | ||
| 414 | |||
| 415 | return old_rmid; | ||
| 416 | } | ||
| 417 | |||
| 418 | /* | ||
| 419 | * If we fail to assign a new RMID for intel_cqm_rotation_rmid because | ||
| 420 | * cachelines are still tagged with RMIDs in limbo, we progressively | ||
| 421 | * increment the threshold until we find an RMID in limbo with <= | ||
| 422 | * __intel_cqm_threshold lines tagged. This is designed to mitigate the | ||
| 423 | * problem where cachelines tagged with an RMID are not steadily being | ||
| 424 | * evicted. | ||
| 425 | * | ||
| 426 | * On successful rotations we decrease the threshold back towards zero. | ||
| 427 | * | ||
| 428 | * __intel_cqm_max_threshold provides an upper bound on the threshold, | ||
| 429 | * and is measured in bytes because it's exposed to userland. | ||
| 430 | */ | ||
| 431 | static unsigned int __intel_cqm_threshold; | ||
| 432 | static unsigned int __intel_cqm_max_threshold; | ||
| 433 | |||
| 434 | /* | ||
| 435 | * Test whether an RMID has a zero occupancy value on this cpu. | ||
| 436 | */ | ||
| 437 | static void intel_cqm_stable(void *arg) | ||
| 438 | { | ||
| 439 | struct cqm_rmid_entry *entry; | ||
| 440 | |||
| 441 | list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { | ||
| 442 | if (entry->state != RMID_AVAILABLE) | ||
| 443 | break; | ||
| 444 | |||
| 445 | if (__rmid_read(entry->rmid) > __intel_cqm_threshold) | ||
| 446 | entry->state = RMID_DIRTY; | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 450 | /* | ||
| 451 | * If we have group events waiting for an RMID that don't conflict with | ||
| 452 | * events already running, assign @rmid. | ||
| 453 | */ | ||
| 454 | static bool intel_cqm_sched_in_event(unsigned int rmid) | ||
| 455 | { | ||
| 456 | struct perf_event *leader, *event; | ||
| 457 | |||
| 458 | lockdep_assert_held(&cache_mutex); | ||
| 459 | |||
| 460 | leader = list_first_entry(&cache_groups, struct perf_event, | ||
| 461 | hw.cqm_groups_entry); | ||
| 462 | event = leader; | ||
| 463 | |||
| 464 | list_for_each_entry_continue(event, &cache_groups, | ||
| 465 | hw.cqm_groups_entry) { | ||
| 466 | if (__rmid_valid(event->hw.cqm_rmid)) | ||
| 467 | continue; | ||
| 468 | |||
| 469 | if (__conflict_event(event, leader)) | ||
| 470 | continue; | ||
| 471 | |||
| 472 | intel_cqm_xchg_rmid(event, rmid); | ||
| 473 | return true; | ||
| 474 | } | ||
| 475 | |||
| 476 | return false; | ||
| 477 | } | ||
| 478 | |||
| 479 | /* | ||
| 480 | * Initially use this constant for both the limbo queue time and the | ||
| 481 | * rotation timer interval, pmu::hrtimer_interval_ms. | ||
| 482 | * | ||
| 483 | * They don't need to be the same, but the two are related since if you | ||
| 484 | * rotate faster than you recycle RMIDs, you may run out of available | ||
| 485 | * RMIDs. | ||
| 486 | */ | ||
| 487 | #define RMID_DEFAULT_QUEUE_TIME 250 /* ms */ | ||
| 488 | |||
| 489 | static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME; | ||
| 490 | |||
| 491 | /* | ||
| 492 | * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list | ||
| 493 | * @nr_available: number of freeable RMIDs on the limbo list | ||
| 494 | * | ||
| 495 | * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no | ||
| 496 | * cachelines are tagged with those RMIDs. After this we can reuse them | ||
| 497 | * and know that the current set of active RMIDs is stable. | ||
| 498 | * | ||
| 499 | * Return %true or %false depending on whether stabilization needs to be | ||
| 500 | * reattempted. | ||
| 501 | * | ||
| 502 | * If we return %true then @nr_available is updated to indicate the | ||
| 503 | * number of RMIDs on the limbo list that have been queued for the | ||
| 504 | * minimum queue time (RMID_AVAILABLE), but whose data occupancy values | ||
| 505 | * are above __intel_cqm_threshold. | ||
| 506 | */ | ||
| 507 | static bool intel_cqm_rmid_stabilize(unsigned int *available) | ||
| 508 | { | ||
| 509 | struct cqm_rmid_entry *entry, *tmp; | ||
| 510 | |||
| 511 | lockdep_assert_held(&cache_mutex); | ||
| 512 | |||
| 513 | *available = 0; | ||
| 514 | list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) { | ||
| 515 | unsigned long min_queue_time; | ||
| 516 | unsigned long now = jiffies; | ||
| 517 | |||
| 518 | /* | ||
| 519 | * We hold RMIDs placed into limbo for a minimum queue | ||
| 520 | * time. Before the minimum queue time has elapsed we do | ||
| 521 | * not recycle RMIDs. | ||
| 522 | * | ||
| 523 | * The reasoning is that until a sufficient time has | ||
| 524 | * passed since we stopped using an RMID, any RMID | ||
| 525 | * placed onto the limbo list will likely still have | ||
| 526 | * data tagged in the cache, which means we'll probably | ||
| 527 | * fail to recycle it anyway. | ||
| 528 | * | ||
| 529 | * We can save ourselves an expensive IPI by skipping | ||
| 530 | * any RMIDs that have not been queued for the minimum | ||
| 531 | * time. | ||
| 532 | */ | ||
| 533 | min_queue_time = entry->queue_time + | ||
| 534 | msecs_to_jiffies(__rmid_queue_time_ms); | ||
| 535 | |||
| 536 | if (time_after(min_queue_time, now)) | ||
| 537 | break; | ||
| 538 | |||
| 539 | entry->state = RMID_AVAILABLE; | ||
| 540 | (*available)++; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * Fast return if none of the RMIDs on the limbo list have been | ||
| 545 | * sitting on the queue for the minimum queue time. | ||
| 546 | */ | ||
| 547 | if (!*available) | ||
| 548 | return false; | ||
| 549 | |||
| 550 | /* | ||
| 551 | * Test whether an RMID is free for each package. | ||
| 552 | */ | ||
| 553 | on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true); | ||
| 554 | |||
| 555 | list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) { | ||
| 556 | /* | ||
| 557 | * Exhausted all RMIDs that have waited min queue time. | ||
| 558 | */ | ||
| 559 | if (entry->state == RMID_YOUNG) | ||
| 560 | break; | ||
| 561 | |||
| 562 | if (entry->state == RMID_DIRTY) | ||
| 563 | continue; | ||
| 564 | |||
| 565 | list_del(&entry->list); /* remove from limbo */ | ||
| 566 | |||
| 567 | /* | ||
| 568 | * The rotation RMID gets priority if it's | ||
| 569 | * currently invalid. In which case, skip adding | ||
| 570 | * the RMID to the the free lru. | ||
| 571 | */ | ||
| 572 | if (!__rmid_valid(intel_cqm_rotation_rmid)) { | ||
| 573 | intel_cqm_rotation_rmid = entry->rmid; | ||
| 574 | continue; | ||
| 575 | } | ||
| 576 | |||
| 577 | /* | ||
| 578 | * If we have groups waiting for RMIDs, hand | ||
| 579 | * them one now provided they don't conflict. | ||
| 580 | */ | ||
| 581 | if (intel_cqm_sched_in_event(entry->rmid)) | ||
| 582 | continue; | ||
| 583 | |||
| 584 | /* | ||
| 585 | * Otherwise place it onto the free list. | ||
| 586 | */ | ||
| 587 | list_add_tail(&entry->list, &cqm_rmid_free_lru); | ||
| 588 | } | ||
| 589 | |||
| 590 | |||
| 591 | return __rmid_valid(intel_cqm_rotation_rmid); | ||
| 592 | } | ||
| 593 | |||
| 594 | /* | ||
| 595 | * Pick a victim group and move it to the tail of the group list. | ||
| 596 | * @next: The first group without an RMID | ||
| 597 | */ | ||
| 598 | static void __intel_cqm_pick_and_rotate(struct perf_event *next) | ||
| 599 | { | ||
| 600 | struct perf_event *rotor; | ||
| 601 | unsigned int rmid; | ||
| 602 | |||
| 603 | lockdep_assert_held(&cache_mutex); | ||
| 604 | |||
| 605 | rotor = list_first_entry(&cache_groups, struct perf_event, | ||
| 606 | hw.cqm_groups_entry); | ||
| 607 | |||
| 608 | /* | ||
| 609 | * The group at the front of the list should always have a valid | ||
| 610 | * RMID. If it doesn't then no groups have RMIDs assigned and we | ||
| 611 | * don't need to rotate the list. | ||
| 612 | */ | ||
| 613 | if (next == rotor) | ||
| 614 | return; | ||
| 615 | |||
| 616 | rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID); | ||
| 617 | __put_rmid(rmid); | ||
| 618 | |||
| 619 | list_rotate_left(&cache_groups); | ||
| 620 | } | ||
| 621 | |||
| 622 | /* | ||
| 623 | * Deallocate the RMIDs from any events that conflict with @event, and | ||
| 624 | * place them on the back of the group list. | ||
| 625 | */ | ||
| 626 | static void intel_cqm_sched_out_conflicting_events(struct perf_event *event) | ||
| 627 | { | ||
| 628 | struct perf_event *group, *g; | ||
| 629 | unsigned int rmid; | ||
| 630 | |||
| 631 | lockdep_assert_held(&cache_mutex); | ||
| 632 | |||
| 633 | list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) { | ||
| 634 | if (group == event) | ||
| 635 | continue; | ||
| 636 | |||
| 637 | rmid = group->hw.cqm_rmid; | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Skip events that don't have a valid RMID. | ||
| 641 | */ | ||
| 642 | if (!__rmid_valid(rmid)) | ||
| 643 | continue; | ||
| 644 | |||
| 645 | /* | ||
| 646 | * No conflict? No problem! Leave the event alone. | ||
| 647 | */ | ||
| 648 | if (!__conflict_event(group, event)) | ||
| 649 | continue; | ||
| 650 | |||
| 651 | intel_cqm_xchg_rmid(group, INVALID_RMID); | ||
| 652 | __put_rmid(rmid); | ||
| 653 | } | ||
| 654 | } | ||
| 655 | |||
| 656 | /* | ||
| 657 | * Attempt to rotate the groups and assign new RMIDs. | ||
| 658 | * | ||
| 659 | * We rotate for two reasons, | ||
| 660 | * 1. To handle the scheduling of conflicting events | ||
| 661 | * 2. To recycle RMIDs | ||
| 662 | * | ||
| 663 | * Rotating RMIDs is complicated because the hardware doesn't give us | ||
| 664 | * any clues. | ||
| 665 | * | ||
| 666 | * There's problems with the hardware interface; when you change the | ||
| 667 | * task:RMID map cachelines retain their 'old' tags, giving a skewed | ||
| 668 | * picture. In order to work around this, we must always keep one free | ||
| 669 | * RMID - intel_cqm_rotation_rmid. | ||
| 670 | * | ||
| 671 | * Rotation works by taking away an RMID from a group (the old RMID), | ||
| 672 | * and assigning the free RMID to another group (the new RMID). We must | ||
| 673 | * then wait for the old RMID to not be used (no cachelines tagged). | ||
| 674 | * This ensure that all cachelines are tagged with 'active' RMIDs. At | ||
| 675 | * this point we can start reading values for the new RMID and treat the | ||
| 676 | * old RMID as the free RMID for the next rotation. | ||
| 677 | * | ||
| 678 | * Return %true or %false depending on whether we did any rotating. | ||
| 679 | */ | ||
| 680 | static bool __intel_cqm_rmid_rotate(void) | ||
| 681 | { | ||
| 682 | struct perf_event *group, *start = NULL; | ||
| 683 | unsigned int threshold_limit; | ||
| 684 | unsigned int nr_needed = 0; | ||
| 685 | unsigned int nr_available; | ||
| 686 | bool rotated = false; | ||
| 687 | |||
| 688 | mutex_lock(&cache_mutex); | ||
| 689 | |||
| 690 | again: | ||
| 691 | /* | ||
| 692 | * Fast path through this function if there are no groups and no | ||
| 693 | * RMIDs that need cleaning. | ||
| 694 | */ | ||
| 695 | if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru)) | ||
| 696 | goto out; | ||
| 697 | |||
| 698 | list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) { | ||
| 699 | if (!__rmid_valid(group->hw.cqm_rmid)) { | ||
| 700 | if (!start) | ||
| 701 | start = group; | ||
| 702 | nr_needed++; | ||
| 703 | } | ||
| 704 | } | ||
| 705 | |||
| 706 | /* | ||
| 707 | * We have some event groups, but they all have RMIDs assigned | ||
| 708 | * and no RMIDs need cleaning. | ||
| 709 | */ | ||
| 710 | if (!nr_needed && list_empty(&cqm_rmid_limbo_lru)) | ||
| 711 | goto out; | ||
| 712 | |||
| 713 | if (!nr_needed) | ||
| 714 | goto stabilize; | ||
| 715 | |||
| 716 | /* | ||
| 717 | * We have more event groups without RMIDs than available RMIDs, | ||
| 718 | * or we have event groups that conflict with the ones currently | ||
| 719 | * scheduled. | ||
| 720 | * | ||
| 721 | * We force deallocate the rmid of the group at the head of | ||
| 722 | * cache_groups. The first event group without an RMID then gets | ||
| 723 | * assigned intel_cqm_rotation_rmid. This ensures we always make | ||
| 724 | * forward progress. | ||
| 725 | * | ||
| 726 | * Rotate the cache_groups list so the previous head is now the | ||
| 727 | * tail. | ||
| 728 | */ | ||
| 729 | __intel_cqm_pick_and_rotate(start); | ||
| 730 | |||
| 731 | /* | ||
| 732 | * If the rotation is going to succeed, reduce the threshold so | ||
| 733 | * that we don't needlessly reuse dirty RMIDs. | ||
| 734 | */ | ||
| 735 | if (__rmid_valid(intel_cqm_rotation_rmid)) { | ||
| 736 | intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid); | ||
| 737 | intel_cqm_rotation_rmid = __get_rmid(); | ||
| 738 | |||
| 739 | intel_cqm_sched_out_conflicting_events(start); | ||
| 740 | |||
| 741 | if (__intel_cqm_threshold) | ||
| 742 | __intel_cqm_threshold--; | ||
| 743 | } | ||
| 744 | |||
| 745 | rotated = true; | ||
| 746 | |||
| 747 | stabilize: | ||
| 748 | /* | ||
| 749 | * We now need to stablize the RMID we freed above (if any) to | ||
| 750 | * ensure that the next time we rotate we have an RMID with zero | ||
| 751 | * occupancy value. | ||
| 752 | * | ||
| 753 | * Alternatively, if we didn't need to perform any rotation, | ||
| 754 | * we'll have a bunch of RMIDs in limbo that need stabilizing. | ||
| 755 | */ | ||
| 756 | threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale; | ||
| 757 | |||
| 758 | while (intel_cqm_rmid_stabilize(&nr_available) && | ||
| 759 | __intel_cqm_threshold < threshold_limit) { | ||
| 760 | unsigned int steal_limit; | ||
| 761 | |||
| 762 | /* | ||
| 763 | * Don't spin if nobody is actively waiting for an RMID, | ||
| 764 | * the rotation worker will be kicked as soon as an | ||
| 765 | * event needs an RMID anyway. | ||
| 766 | */ | ||
| 767 | if (!nr_needed) | ||
| 768 | break; | ||
| 769 | |||
| 770 | /* Allow max 25% of RMIDs to be in limbo. */ | ||
| 771 | steal_limit = (cqm_max_rmid + 1) / 4; | ||
| 772 | |||
| 773 | /* | ||
| 774 | * We failed to stabilize any RMIDs so our rotation | ||
| 775 | * logic is now stuck. In order to make forward progress | ||
| 776 | * we have a few options: | ||
| 777 | * | ||
| 778 | * 1. rotate ("steal") another RMID | ||
| 779 | * 2. increase the threshold | ||
| 780 | * 3. do nothing | ||
| 781 | * | ||
| 782 | * We do both of 1. and 2. until we hit the steal limit. | ||
| 783 | * | ||
| 784 | * The steal limit prevents all RMIDs ending up on the | ||
| 785 | * limbo list. This can happen if every RMID has a | ||
| 786 | * non-zero occupancy above threshold_limit, and the | ||
| 787 | * occupancy values aren't dropping fast enough. | ||
| 788 | * | ||
| 789 | * Note that there is prioritisation at work here - we'd | ||
| 790 | * rather increase the number of RMIDs on the limbo list | ||
| 791 | * than increase the threshold, because increasing the | ||
| 792 | * threshold skews the event data (because we reuse | ||
| 793 | * dirty RMIDs) - threshold bumps are a last resort. | ||
| 794 | */ | ||
| 795 | if (nr_available < steal_limit) | ||
| 796 | goto again; | ||
| 797 | |||
| 798 | __intel_cqm_threshold++; | ||
| 799 | } | ||
| 800 | |||
| 801 | out: | ||
| 802 | mutex_unlock(&cache_mutex); | ||
| 803 | return rotated; | ||
| 804 | } | ||
| 805 | |||
| 806 | static void intel_cqm_rmid_rotate(struct work_struct *work); | ||
| 807 | |||
| 808 | static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate); | ||
| 809 | |||
| 810 | static struct pmu intel_cqm_pmu; | ||
| 811 | |||
| 812 | static void intel_cqm_rmid_rotate(struct work_struct *work) | ||
| 813 | { | ||
| 814 | unsigned long delay; | ||
| 815 | |||
| 816 | __intel_cqm_rmid_rotate(); | ||
| 817 | |||
| 818 | delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms); | ||
| 819 | schedule_delayed_work(&intel_cqm_rmid_work, delay); | ||
| 820 | } | ||
| 821 | |||
| 822 | /* | ||
| 823 | * Find a group and setup RMID. | ||
| 824 | * | ||
| 825 | * If we're part of a group, we use the group's RMID. | ||
| 826 | */ | ||
| 827 | static void intel_cqm_setup_event(struct perf_event *event, | ||
| 828 | struct perf_event **group) | ||
| 829 | { | ||
| 830 | struct perf_event *iter; | ||
| 831 | unsigned int rmid; | ||
| 832 | bool conflict = false; | ||
| 833 | |||
| 834 | list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { | ||
| 835 | rmid = iter->hw.cqm_rmid; | ||
| 836 | |||
| 837 | if (__match_event(iter, event)) { | ||
| 838 | /* All tasks in a group share an RMID */ | ||
| 839 | event->hw.cqm_rmid = rmid; | ||
| 840 | *group = iter; | ||
| 841 | return; | ||
| 842 | } | ||
| 843 | |||
| 844 | /* | ||
| 845 | * We only care about conflicts for events that are | ||
| 846 | * actually scheduled in (and hence have a valid RMID). | ||
| 847 | */ | ||
| 848 | if (__conflict_event(iter, event) && __rmid_valid(rmid)) | ||
| 849 | conflict = true; | ||
| 850 | } | ||
| 851 | |||
| 852 | if (conflict) | ||
| 853 | rmid = INVALID_RMID; | ||
| 854 | else | ||
| 855 | rmid = __get_rmid(); | ||
| 856 | |||
| 857 | event->hw.cqm_rmid = rmid; | ||
| 858 | } | ||
| 859 | |||
| 860 | static void intel_cqm_event_read(struct perf_event *event) | ||
| 861 | { | ||
| 862 | unsigned long flags; | ||
| 863 | unsigned int rmid; | ||
| 864 | u64 val; | ||
| 865 | |||
| 866 | /* | ||
| 867 | * Task events are handled by intel_cqm_event_count(). | ||
| 868 | */ | ||
| 869 | if (event->cpu == -1) | ||
| 870 | return; | ||
| 871 | |||
| 872 | raw_spin_lock_irqsave(&cache_lock, flags); | ||
| 873 | rmid = event->hw.cqm_rmid; | ||
| 874 | |||
| 875 | if (!__rmid_valid(rmid)) | ||
| 876 | goto out; | ||
| 877 | |||
| 878 | val = __rmid_read(rmid); | ||
| 879 | |||
| 880 | /* | ||
| 881 | * Ignore this reading on error states and do not update the value. | ||
| 882 | */ | ||
| 883 | if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | ||
| 884 | goto out; | ||
| 885 | |||
| 886 | local64_set(&event->count, val); | ||
| 887 | out: | ||
| 888 | raw_spin_unlock_irqrestore(&cache_lock, flags); | ||
| 889 | } | ||
| 890 | |||
| 891 | static void __intel_cqm_event_count(void *info) | ||
| 892 | { | ||
| 893 | struct rmid_read *rr = info; | ||
| 894 | u64 val; | ||
| 895 | |||
| 896 | val = __rmid_read(rr->rmid); | ||
| 897 | |||
| 898 | if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) | ||
| 899 | return; | ||
| 900 | |||
| 901 | atomic64_add(val, &rr->value); | ||
| 902 | } | ||
| 903 | |||
| 904 | static inline bool cqm_group_leader(struct perf_event *event) | ||
| 905 | { | ||
| 906 | return !list_empty(&event->hw.cqm_groups_entry); | ||
| 907 | } | ||
| 908 | |||
| 909 | static u64 intel_cqm_event_count(struct perf_event *event) | ||
| 910 | { | ||
| 911 | unsigned long flags; | ||
| 912 | struct rmid_read rr = { | ||
| 913 | .value = ATOMIC64_INIT(0), | ||
| 914 | }; | ||
| 915 | |||
| 916 | /* | ||
| 917 | * We only need to worry about task events. System-wide events | ||
| 918 | * are handled like usual, i.e. entirely with | ||
| 919 | * intel_cqm_event_read(). | ||
| 920 | */ | ||
| 921 | if (event->cpu != -1) | ||
| 922 | return __perf_event_count(event); | ||
| 923 | |||
| 924 | /* | ||
| 925 | * Only the group leader gets to report values. This stops us | ||
| 926 | * reporting duplicate values to userspace, and gives us a clear | ||
| 927 | * rule for which task gets to report the values. | ||
| 928 | * | ||
| 929 | * Note that it is impossible to attribute these values to | ||
| 930 | * specific packages - we forfeit that ability when we create | ||
| 931 | * task events. | ||
| 932 | */ | ||
| 933 | if (!cqm_group_leader(event)) | ||
| 934 | return 0; | ||
| 935 | |||
| 936 | /* | ||
| 937 | * Notice that we don't perform the reading of an RMID | ||
| 938 | * atomically, because we can't hold a spin lock across the | ||
| 939 | * IPIs. | ||
| 940 | * | ||
| 941 | * Speculatively perform the read, since @event might be | ||
| 942 | * assigned a different (possibly invalid) RMID while we're | ||
| 943 | * busying performing the IPI calls. It's therefore necessary to | ||
| 944 | * check @event's RMID afterwards, and if it has changed, | ||
| 945 | * discard the result of the read. | ||
| 946 | */ | ||
| 947 | rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid); | ||
| 948 | |||
| 949 | if (!__rmid_valid(rr.rmid)) | ||
| 950 | goto out; | ||
| 951 | |||
| 952 | on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); | ||
| 953 | |||
| 954 | raw_spin_lock_irqsave(&cache_lock, flags); | ||
| 955 | if (event->hw.cqm_rmid == rr.rmid) | ||
| 956 | local64_set(&event->count, atomic64_read(&rr.value)); | ||
| 957 | raw_spin_unlock_irqrestore(&cache_lock, flags); | ||
| 958 | out: | ||
| 959 | return __perf_event_count(event); | ||
| 960 | } | ||
| 961 | |||
| 962 | static void intel_cqm_event_start(struct perf_event *event, int mode) | ||
| 963 | { | ||
| 964 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); | ||
| 965 | unsigned int rmid = event->hw.cqm_rmid; | ||
| 966 | unsigned long flags; | ||
| 967 | |||
| 968 | if (!(event->hw.cqm_state & PERF_HES_STOPPED)) | ||
| 969 | return; | ||
| 970 | |||
| 971 | event->hw.cqm_state &= ~PERF_HES_STOPPED; | ||
| 972 | |||
| 973 | raw_spin_lock_irqsave(&state->lock, flags); | ||
| 974 | |||
| 975 | if (state->cnt++) | ||
| 976 | WARN_ON_ONCE(state->rmid != rmid); | ||
| 977 | else | ||
| 978 | WARN_ON_ONCE(state->rmid); | ||
| 979 | |||
| 980 | state->rmid = rmid; | ||
| 981 | wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid); | ||
| 982 | |||
| 983 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
| 984 | } | ||
| 985 | |||
| 986 | static void intel_cqm_event_stop(struct perf_event *event, int mode) | ||
| 987 | { | ||
| 988 | struct intel_cqm_state *state = this_cpu_ptr(&cqm_state); | ||
| 989 | unsigned long flags; | ||
| 990 | |||
| 991 | if (event->hw.cqm_state & PERF_HES_STOPPED) | ||
| 992 | return; | ||
| 993 | |||
| 994 | event->hw.cqm_state |= PERF_HES_STOPPED; | ||
| 995 | |||
| 996 | raw_spin_lock_irqsave(&state->lock, flags); | ||
| 997 | intel_cqm_event_read(event); | ||
| 998 | |||
| 999 | if (!--state->cnt) { | ||
| 1000 | state->rmid = 0; | ||
| 1001 | wrmsrl(MSR_IA32_PQR_ASSOC, 0); | ||
| 1002 | } else { | ||
| 1003 | WARN_ON_ONCE(!state->rmid); | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | raw_spin_unlock_irqrestore(&state->lock, flags); | ||
| 1007 | } | ||
| 1008 | |||
| 1009 | static int intel_cqm_event_add(struct perf_event *event, int mode) | ||
| 1010 | { | ||
| 1011 | unsigned long flags; | ||
| 1012 | unsigned int rmid; | ||
| 1013 | |||
| 1014 | raw_spin_lock_irqsave(&cache_lock, flags); | ||
| 1015 | |||
| 1016 | event->hw.cqm_state = PERF_HES_STOPPED; | ||
| 1017 | rmid = event->hw.cqm_rmid; | ||
| 1018 | |||
| 1019 | if (__rmid_valid(rmid) && (mode & PERF_EF_START)) | ||
| 1020 | intel_cqm_event_start(event, mode); | ||
| 1021 | |||
| 1022 | raw_spin_unlock_irqrestore(&cache_lock, flags); | ||
| 1023 | |||
| 1024 | return 0; | ||
| 1025 | } | ||
| 1026 | |||
| 1027 | static void intel_cqm_event_del(struct perf_event *event, int mode) | ||
| 1028 | { | ||
| 1029 | intel_cqm_event_stop(event, mode); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | static void intel_cqm_event_destroy(struct perf_event *event) | ||
| 1033 | { | ||
| 1034 | struct perf_event *group_other = NULL; | ||
| 1035 | |||
| 1036 | mutex_lock(&cache_mutex); | ||
| 1037 | |||
| 1038 | /* | ||
| 1039 | * If there's another event in this group... | ||
| 1040 | */ | ||
| 1041 | if (!list_empty(&event->hw.cqm_group_entry)) { | ||
| 1042 | group_other = list_first_entry(&event->hw.cqm_group_entry, | ||
| 1043 | struct perf_event, | ||
| 1044 | hw.cqm_group_entry); | ||
| 1045 | list_del(&event->hw.cqm_group_entry); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | /* | ||
| 1049 | * And we're the group leader.. | ||
| 1050 | */ | ||
| 1051 | if (cqm_group_leader(event)) { | ||
| 1052 | /* | ||
| 1053 | * If there was a group_other, make that leader, otherwise | ||
| 1054 | * destroy the group and return the RMID. | ||
| 1055 | */ | ||
| 1056 | if (group_other) { | ||
| 1057 | list_replace(&event->hw.cqm_groups_entry, | ||
| 1058 | &group_other->hw.cqm_groups_entry); | ||
| 1059 | } else { | ||
| 1060 | unsigned int rmid = event->hw.cqm_rmid; | ||
| 1061 | |||
| 1062 | if (__rmid_valid(rmid)) | ||
| 1063 | __put_rmid(rmid); | ||
| 1064 | list_del(&event->hw.cqm_groups_entry); | ||
| 1065 | } | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | mutex_unlock(&cache_mutex); | ||
| 1069 | } | ||
| 1070 | |||
| 1071 | static int intel_cqm_event_init(struct perf_event *event) | ||
| 1072 | { | ||
| 1073 | struct perf_event *group = NULL; | ||
| 1074 | bool rotate = false; | ||
| 1075 | |||
| 1076 | if (event->attr.type != intel_cqm_pmu.type) | ||
| 1077 | return -ENOENT; | ||
| 1078 | |||
| 1079 | if (event->attr.config & ~QOS_EVENT_MASK) | ||
| 1080 | return -EINVAL; | ||
| 1081 | |||
| 1082 | /* unsupported modes and filters */ | ||
| 1083 | if (event->attr.exclude_user || | ||
| 1084 | event->attr.exclude_kernel || | ||
| 1085 | event->attr.exclude_hv || | ||
| 1086 | event->attr.exclude_idle || | ||
| 1087 | event->attr.exclude_host || | ||
| 1088 | event->attr.exclude_guest || | ||
| 1089 | event->attr.sample_period) /* no sampling */ | ||
| 1090 | return -EINVAL; | ||
| 1091 | |||
| 1092 | INIT_LIST_HEAD(&event->hw.cqm_group_entry); | ||
| 1093 | INIT_LIST_HEAD(&event->hw.cqm_groups_entry); | ||
| 1094 | |||
| 1095 | event->destroy = intel_cqm_event_destroy; | ||
| 1096 | |||
| 1097 | mutex_lock(&cache_mutex); | ||
| 1098 | |||
| 1099 | /* Will also set rmid */ | ||
| 1100 | intel_cqm_setup_event(event, &group); | ||
| 1101 | |||
| 1102 | if (group) { | ||
| 1103 | list_add_tail(&event->hw.cqm_group_entry, | ||
| 1104 | &group->hw.cqm_group_entry); | ||
| 1105 | } else { | ||
| 1106 | list_add_tail(&event->hw.cqm_groups_entry, | ||
| 1107 | &cache_groups); | ||
| 1108 | |||
| 1109 | /* | ||
| 1110 | * All RMIDs are either in use or have recently been | ||
| 1111 | * used. Kick the rotation worker to clean/free some. | ||
| 1112 | * | ||
| 1113 | * We only do this for the group leader, rather than for | ||
| 1114 | * every event in a group to save on needless work. | ||
| 1115 | */ | ||
| 1116 | if (!__rmid_valid(event->hw.cqm_rmid)) | ||
| 1117 | rotate = true; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | mutex_unlock(&cache_mutex); | ||
| 1121 | |||
| 1122 | if (rotate) | ||
| 1123 | schedule_delayed_work(&intel_cqm_rmid_work, 0); | ||
| 1124 | |||
| 1125 | return 0; | ||
| 1126 | } | ||
| 1127 | |||
| 1128 | EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01"); | ||
| 1129 | EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1"); | ||
| 1130 | EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); | ||
| 1131 | EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); | ||
| 1132 | EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); | ||
| 1133 | |||
| 1134 | static struct attribute *intel_cqm_events_attr[] = { | ||
| 1135 | EVENT_PTR(intel_cqm_llc), | ||
| 1136 | EVENT_PTR(intel_cqm_llc_pkg), | ||
| 1137 | EVENT_PTR(intel_cqm_llc_unit), | ||
| 1138 | EVENT_PTR(intel_cqm_llc_scale), | ||
| 1139 | EVENT_PTR(intel_cqm_llc_snapshot), | ||
| 1140 | NULL, | ||
| 1141 | }; | ||
| 1142 | |||
| 1143 | static struct attribute_group intel_cqm_events_group = { | ||
| 1144 | .name = "events", | ||
| 1145 | .attrs = intel_cqm_events_attr, | ||
| 1146 | }; | ||
| 1147 | |||
| 1148 | PMU_FORMAT_ATTR(event, "config:0-7"); | ||
| 1149 | static struct attribute *intel_cqm_formats_attr[] = { | ||
| 1150 | &format_attr_event.attr, | ||
| 1151 | NULL, | ||
| 1152 | }; | ||
| 1153 | |||
| 1154 | static struct attribute_group intel_cqm_format_group = { | ||
| 1155 | .name = "format", | ||
| 1156 | .attrs = intel_cqm_formats_attr, | ||
| 1157 | }; | ||
| 1158 | |||
| 1159 | static ssize_t | ||
| 1160 | max_recycle_threshold_show(struct device *dev, struct device_attribute *attr, | ||
| 1161 | char *page) | ||
| 1162 | { | ||
| 1163 | ssize_t rv; | ||
| 1164 | |||
| 1165 | mutex_lock(&cache_mutex); | ||
| 1166 | rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold); | ||
| 1167 | mutex_unlock(&cache_mutex); | ||
| 1168 | |||
| 1169 | return rv; | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | static ssize_t | ||
| 1173 | max_recycle_threshold_store(struct device *dev, | ||
| 1174 | struct device_attribute *attr, | ||
| 1175 | const char *buf, size_t count) | ||
| 1176 | { | ||
| 1177 | unsigned int bytes, cachelines; | ||
| 1178 | int ret; | ||
| 1179 | |||
| 1180 | ret = kstrtouint(buf, 0, &bytes); | ||
| 1181 | if (ret) | ||
| 1182 | return ret; | ||
| 1183 | |||
| 1184 | mutex_lock(&cache_mutex); | ||
| 1185 | |||
| 1186 | __intel_cqm_max_threshold = bytes; | ||
| 1187 | cachelines = bytes / cqm_l3_scale; | ||
| 1188 | |||
| 1189 | /* | ||
| 1190 | * The new maximum takes effect immediately. | ||
| 1191 | */ | ||
| 1192 | if (__intel_cqm_threshold > cachelines) | ||
| 1193 | __intel_cqm_threshold = cachelines; | ||
| 1194 | |||
| 1195 | mutex_unlock(&cache_mutex); | ||
| 1196 | |||
| 1197 | return count; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | static DEVICE_ATTR_RW(max_recycle_threshold); | ||
| 1201 | |||
| 1202 | static struct attribute *intel_cqm_attrs[] = { | ||
| 1203 | &dev_attr_max_recycle_threshold.attr, | ||
| 1204 | NULL, | ||
| 1205 | }; | ||
| 1206 | |||
| 1207 | static const struct attribute_group intel_cqm_group = { | ||
| 1208 | .attrs = intel_cqm_attrs, | ||
| 1209 | }; | ||
| 1210 | |||
| 1211 | static const struct attribute_group *intel_cqm_attr_groups[] = { | ||
| 1212 | &intel_cqm_events_group, | ||
| 1213 | &intel_cqm_format_group, | ||
| 1214 | &intel_cqm_group, | ||
| 1215 | NULL, | ||
| 1216 | }; | ||
| 1217 | |||
| 1218 | static struct pmu intel_cqm_pmu = { | ||
| 1219 | .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME, | ||
| 1220 | .attr_groups = intel_cqm_attr_groups, | ||
| 1221 | .task_ctx_nr = perf_sw_context, | ||
| 1222 | .event_init = intel_cqm_event_init, | ||
| 1223 | .add = intel_cqm_event_add, | ||
| 1224 | .del = intel_cqm_event_del, | ||
| 1225 | .start = intel_cqm_event_start, | ||
| 1226 | .stop = intel_cqm_event_stop, | ||
| 1227 | .read = intel_cqm_event_read, | ||
| 1228 | .count = intel_cqm_event_count, | ||
| 1229 | }; | ||
| 1230 | |||
| 1231 | static inline void cqm_pick_event_reader(int cpu) | ||
| 1232 | { | ||
| 1233 | int phys_id = topology_physical_package_id(cpu); | ||
| 1234 | int i; | ||
| 1235 | |||
| 1236 | for_each_cpu(i, &cqm_cpumask) { | ||
| 1237 | if (phys_id == topology_physical_package_id(i)) | ||
| 1238 | return; /* already got reader for this socket */ | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | cpumask_set_cpu(cpu, &cqm_cpumask); | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | static void intel_cqm_cpu_prepare(unsigned int cpu) | ||
| 1245 | { | ||
| 1246 | struct intel_cqm_state *state = &per_cpu(cqm_state, cpu); | ||
| 1247 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 1248 | |||
| 1249 | raw_spin_lock_init(&state->lock); | ||
| 1250 | state->rmid = 0; | ||
| 1251 | state->cnt = 0; | ||
| 1252 | |||
| 1253 | WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid); | ||
| 1254 | WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale); | ||
| 1255 | } | ||
| 1256 | |||
| 1257 | static void intel_cqm_cpu_exit(unsigned int cpu) | ||
| 1258 | { | ||
| 1259 | int phys_id = topology_physical_package_id(cpu); | ||
| 1260 | int i; | ||
| 1261 | |||
| 1262 | /* | ||
| 1263 | * Is @cpu a designated cqm reader? | ||
| 1264 | */ | ||
| 1265 | if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask)) | ||
| 1266 | return; | ||
| 1267 | |||
| 1268 | for_each_online_cpu(i) { | ||
| 1269 | if (i == cpu) | ||
| 1270 | continue; | ||
| 1271 | |||
| 1272 | if (phys_id == topology_physical_package_id(i)) { | ||
| 1273 | cpumask_set_cpu(i, &cqm_cpumask); | ||
| 1274 | break; | ||
| 1275 | } | ||
| 1276 | } | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | static int intel_cqm_cpu_notifier(struct notifier_block *nb, | ||
| 1280 | unsigned long action, void *hcpu) | ||
| 1281 | { | ||
| 1282 | unsigned int cpu = (unsigned long)hcpu; | ||
| 1283 | |||
| 1284 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 1285 | case CPU_UP_PREPARE: | ||
| 1286 | intel_cqm_cpu_prepare(cpu); | ||
| 1287 | break; | ||
| 1288 | case CPU_DOWN_PREPARE: | ||
| 1289 | intel_cqm_cpu_exit(cpu); | ||
| 1290 | break; | ||
| 1291 | case CPU_STARTING: | ||
| 1292 | cqm_pick_event_reader(cpu); | ||
| 1293 | break; | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | return NOTIFY_OK; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | static const struct x86_cpu_id intel_cqm_match[] = { | ||
| 1300 | { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC }, | ||
| 1301 | {} | ||
| 1302 | }; | ||
| 1303 | |||
| 1304 | static int __init intel_cqm_init(void) | ||
| 1305 | { | ||
| 1306 | char *str, scale[20]; | ||
| 1307 | int i, cpu, ret; | ||
| 1308 | |||
| 1309 | if (!x86_match_cpu(intel_cqm_match)) | ||
| 1310 | return -ENODEV; | ||
| 1311 | |||
| 1312 | cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale; | ||
| 1313 | |||
| 1314 | /* | ||
| 1315 | * It's possible that not all resources support the same number | ||
| 1316 | * of RMIDs. Instead of making scheduling much more complicated | ||
| 1317 | * (where we have to match a task's RMID to a cpu that supports | ||
| 1318 | * that many RMIDs) just find the minimum RMIDs supported across | ||
| 1319 | * all cpus. | ||
| 1320 | * | ||
| 1321 | * Also, check that the scales match on all cpus. | ||
| 1322 | */ | ||
| 1323 | cpu_notifier_register_begin(); | ||
| 1324 | |||
| 1325 | for_each_online_cpu(cpu) { | ||
| 1326 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
| 1327 | |||
| 1328 | if (c->x86_cache_max_rmid < cqm_max_rmid) | ||
| 1329 | cqm_max_rmid = c->x86_cache_max_rmid; | ||
| 1330 | |||
| 1331 | if (c->x86_cache_occ_scale != cqm_l3_scale) { | ||
| 1332 | pr_err("Multiple LLC scale values, disabling\n"); | ||
| 1333 | ret = -EINVAL; | ||
| 1334 | goto out; | ||
| 1335 | } | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | /* | ||
| 1339 | * A reasonable upper limit on the max threshold is the number | ||
| 1340 | * of lines tagged per RMID if all RMIDs have the same number of | ||
| 1341 | * lines tagged in the LLC. | ||
| 1342 | * | ||
| 1343 | * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. | ||
| 1344 | */ | ||
| 1345 | __intel_cqm_max_threshold = | ||
| 1346 | boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1); | ||
| 1347 | |||
| 1348 | snprintf(scale, sizeof(scale), "%u", cqm_l3_scale); | ||
| 1349 | str = kstrdup(scale, GFP_KERNEL); | ||
| 1350 | if (!str) { | ||
| 1351 | ret = -ENOMEM; | ||
| 1352 | goto out; | ||
| 1353 | } | ||
| 1354 | |||
| 1355 | event_attr_intel_cqm_llc_scale.event_str = str; | ||
| 1356 | |||
| 1357 | ret = intel_cqm_setup_rmid_cache(); | ||
| 1358 | if (ret) | ||
| 1359 | goto out; | ||
| 1360 | |||
| 1361 | for_each_online_cpu(i) { | ||
| 1362 | intel_cqm_cpu_prepare(i); | ||
| 1363 | cqm_pick_event_reader(i); | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | __perf_cpu_notifier(intel_cqm_cpu_notifier); | ||
| 1367 | |||
| 1368 | ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); | ||
| 1369 | if (ret) | ||
| 1370 | pr_err("Intel CQM perf registration failed: %d\n", ret); | ||
| 1371 | else | ||
| 1372 | pr_info("Intel CQM monitoring enabled\n"); | ||
| 1373 | |||
| 1374 | out: | ||
| 1375 | cpu_notifier_register_done(); | ||
| 1376 | |||
| 1377 | return ret; | ||
| 1378 | } | ||
| 1379 | device_initcall(intel_cqm_init); | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a503100388cc..b16eac5f54ce 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -53,6 +53,7 @@ struct perf_guest_info_callbacks { | |||
| 53 | #include <linux/sysfs.h> | 53 | #include <linux/sysfs.h> |
| 54 | #include <linux/perf_regs.h> | 54 | #include <linux/perf_regs.h> |
| 55 | #include <linux/workqueue.h> | 55 | #include <linux/workqueue.h> |
| 56 | #include <linux/cgroup.h> | ||
| 56 | #include <asm/local.h> | 57 | #include <asm/local.h> |
| 57 | 58 | ||
| 58 | struct perf_callchain_entry { | 59 | struct perf_callchain_entry { |
| @@ -118,10 +119,16 @@ struct hw_perf_event { | |||
| 118 | struct hrtimer hrtimer; | 119 | struct hrtimer hrtimer; |
| 119 | }; | 120 | }; |
| 120 | struct { /* tracepoint */ | 121 | struct { /* tracepoint */ |
| 121 | struct task_struct *tp_target; | ||
| 122 | /* for tp_event->class */ | 122 | /* for tp_event->class */ |
| 123 | struct list_head tp_list; | 123 | struct list_head tp_list; |
| 124 | }; | 124 | }; |
| 125 | struct { /* intel_cqm */ | ||
| 126 | int cqm_state; | ||
| 127 | int cqm_rmid; | ||
| 128 | struct list_head cqm_events_entry; | ||
| 129 | struct list_head cqm_groups_entry; | ||
| 130 | struct list_head cqm_group_entry; | ||
| 131 | }; | ||
| 125 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 132 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 126 | struct { /* breakpoint */ | 133 | struct { /* breakpoint */ |
| 127 | /* | 134 | /* |
| @@ -129,12 +136,12 @@ struct hw_perf_event { | |||
| 129 | * problem hw_breakpoint has with context | 136 | * problem hw_breakpoint has with context |
| 130 | * creation and event initalization. | 137 | * creation and event initalization. |
| 131 | */ | 138 | */ |
| 132 | struct task_struct *bp_target; | ||
| 133 | struct arch_hw_breakpoint info; | 139 | struct arch_hw_breakpoint info; |
| 134 | struct list_head bp_list; | 140 | struct list_head bp_list; |
| 135 | }; | 141 | }; |
| 136 | #endif | 142 | #endif |
| 137 | }; | 143 | }; |
| 144 | struct task_struct *target; | ||
| 138 | int state; | 145 | int state; |
| 139 | local64_t prev_count; | 146 | local64_t prev_count; |
| 140 | u64 sample_period; | 147 | u64 sample_period; |
| @@ -271,6 +278,11 @@ struct pmu { | |||
| 271 | */ | 278 | */ |
| 272 | size_t task_ctx_size; | 279 | size_t task_ctx_size; |
| 273 | 280 | ||
| 281 | |||
| 282 | /* | ||
| 283 | * Return the count value for a counter. | ||
| 284 | */ | ||
| 285 | u64 (*count) (struct perf_event *event); /*optional*/ | ||
| 274 | }; | 286 | }; |
| 275 | 287 | ||
| 276 | /** | 288 | /** |
| @@ -547,6 +559,35 @@ struct perf_output_handle { | |||
| 547 | int page; | 559 | int page; |
| 548 | }; | 560 | }; |
| 549 | 561 | ||
| 562 | #ifdef CONFIG_CGROUP_PERF | ||
| 563 | |||
| 564 | /* | ||
| 565 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | ||
| 566 | * This is a per-cpu dynamically allocated data structure. | ||
| 567 | */ | ||
| 568 | struct perf_cgroup_info { | ||
| 569 | u64 time; | ||
| 570 | u64 timestamp; | ||
| 571 | }; | ||
| 572 | |||
| 573 | struct perf_cgroup { | ||
| 574 | struct cgroup_subsys_state css; | ||
| 575 | struct perf_cgroup_info __percpu *info; | ||
| 576 | }; | ||
| 577 | |||
| 578 | /* | ||
| 579 | * Must ensure cgroup is pinned (css_get) before calling | ||
| 580 | * this function. In other words, we cannot call this function | ||
| 581 | * if there is no cgroup event for the current CPU context. | ||
| 582 | */ | ||
| 583 | static inline struct perf_cgroup * | ||
| 584 | perf_cgroup_from_task(struct task_struct *task) | ||
| 585 | { | ||
| 586 | return container_of(task_css(task, perf_event_cgrp_id), | ||
| 587 | struct perf_cgroup, css); | ||
| 588 | } | ||
| 589 | #endif /* CONFIG_CGROUP_PERF */ | ||
| 590 | |||
| 550 | #ifdef CONFIG_PERF_EVENTS | 591 | #ifdef CONFIG_PERF_EVENTS |
| 551 | 592 | ||
| 552 | extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); | 593 | extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); |
| @@ -740,6 +781,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, | |||
| 740 | __perf_event_task_sched_out(prev, next); | 781 | __perf_event_task_sched_out(prev, next); |
| 741 | } | 782 | } |
| 742 | 783 | ||
| 784 | static inline u64 __perf_event_count(struct perf_event *event) | ||
| 785 | { | ||
| 786 | return local64_read(&event->count) + atomic64_read(&event->child_count); | ||
| 787 | } | ||
| 788 | |||
| 743 | extern void perf_event_mmap(struct vm_area_struct *vma); | 789 | extern void perf_event_mmap(struct vm_area_struct *vma); |
| 744 | extern struct perf_guest_info_callbacks *perf_guest_cbs; | 790 | extern struct perf_guest_info_callbacks *perf_guest_cbs; |
| 745 | extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); | 791 | extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 9a5f339a0e2d..b01dfb602db1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -34,11 +34,11 @@ | |||
| 34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
| 35 | #include <linux/anon_inodes.h> | 35 | #include <linux/anon_inodes.h> |
| 36 | #include <linux/kernel_stat.h> | 36 | #include <linux/kernel_stat.h> |
| 37 | #include <linux/cgroup.h> | ||
| 37 | #include <linux/perf_event.h> | 38 | #include <linux/perf_event.h> |
| 38 | #include <linux/ftrace_event.h> | 39 | #include <linux/ftrace_event.h> |
| 39 | #include <linux/hw_breakpoint.h> | 40 | #include <linux/hw_breakpoint.h> |
| 40 | #include <linux/mm_types.h> | 41 | #include <linux/mm_types.h> |
| 41 | #include <linux/cgroup.h> | ||
| 42 | #include <linux/module.h> | 42 | #include <linux/module.h> |
| 43 | #include <linux/mman.h> | 43 | #include <linux/mman.h> |
| 44 | #include <linux/compat.h> | 44 | #include <linux/compat.h> |
| @@ -351,32 +351,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, | |||
| 351 | 351 | ||
| 352 | #ifdef CONFIG_CGROUP_PERF | 352 | #ifdef CONFIG_CGROUP_PERF |
| 353 | 353 | ||
| 354 | /* | ||
| 355 | * perf_cgroup_info keeps track of time_enabled for a cgroup. | ||
| 356 | * This is a per-cpu dynamically allocated data structure. | ||
| 357 | */ | ||
| 358 | struct perf_cgroup_info { | ||
| 359 | u64 time; | ||
| 360 | u64 timestamp; | ||
| 361 | }; | ||
| 362 | |||
| 363 | struct perf_cgroup { | ||
| 364 | struct cgroup_subsys_state css; | ||
| 365 | struct perf_cgroup_info __percpu *info; | ||
| 366 | }; | ||
| 367 | |||
| 368 | /* | ||
| 369 | * Must ensure cgroup is pinned (css_get) before calling | ||
| 370 | * this function. In other words, we cannot call this function | ||
| 371 | * if there is no cgroup event for the current CPU context. | ||
| 372 | */ | ||
| 373 | static inline struct perf_cgroup * | ||
| 374 | perf_cgroup_from_task(struct task_struct *task) | ||
| 375 | { | ||
| 376 | return container_of(task_css(task, perf_event_cgrp_id), | ||
| 377 | struct perf_cgroup, css); | ||
| 378 | } | ||
| 379 | |||
| 380 | static inline bool | 354 | static inline bool |
| 381 | perf_cgroup_match(struct perf_event *event) | 355 | perf_cgroup_match(struct perf_event *event) |
| 382 | { | 356 | { |
| @@ -3220,7 +3194,10 @@ static void __perf_event_read(void *info) | |||
| 3220 | 3194 | ||
| 3221 | static inline u64 perf_event_count(struct perf_event *event) | 3195 | static inline u64 perf_event_count(struct perf_event *event) |
| 3222 | { | 3196 | { |
| 3223 | return local64_read(&event->count) + atomic64_read(&event->child_count); | 3197 | if (event->pmu->count) |
| 3198 | return event->pmu->count(event); | ||
| 3199 | |||
| 3200 | return __perf_event_count(event); | ||
| 3224 | } | 3201 | } |
| 3225 | 3202 | ||
| 3226 | static u64 perf_event_read(struct perf_event *event) | 3203 | static u64 perf_event_read(struct perf_event *event) |
| @@ -7149,7 +7126,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 7149 | struct perf_event *group_leader, | 7126 | struct perf_event *group_leader, |
| 7150 | struct perf_event *parent_event, | 7127 | struct perf_event *parent_event, |
| 7151 | perf_overflow_handler_t overflow_handler, | 7128 | perf_overflow_handler_t overflow_handler, |
| 7152 | void *context) | 7129 | void *context, int cgroup_fd) |
| 7153 | { | 7130 | { |
| 7154 | struct pmu *pmu; | 7131 | struct pmu *pmu; |
| 7155 | struct perf_event *event; | 7132 | struct perf_event *event; |
| @@ -7204,16 +7181,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 7204 | 7181 | ||
| 7205 | if (task) { | 7182 | if (task) { |
| 7206 | event->attach_state = PERF_ATTACH_TASK; | 7183 | event->attach_state = PERF_ATTACH_TASK; |
| 7207 | |||
| 7208 | if (attr->type == PERF_TYPE_TRACEPOINT) | ||
| 7209 | event->hw.tp_target = task; | ||
| 7210 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | ||
| 7211 | /* | 7184 | /* |
| 7212 | * hw_breakpoint is a bit difficult here.. | 7185 | * XXX pmu::event_init needs to know what task to account to |
| 7186 | * and we cannot use the ctx information because we need the | ||
| 7187 | * pmu before we get a ctx. | ||
| 7213 | */ | 7188 | */ |
| 7214 | else if (attr->type == PERF_TYPE_BREAKPOINT) | 7189 | event->hw.target = task; |
| 7215 | event->hw.bp_target = task; | ||
| 7216 | #endif | ||
| 7217 | } | 7190 | } |
| 7218 | 7191 | ||
| 7219 | if (!overflow_handler && parent_event) { | 7192 | if (!overflow_handler && parent_event) { |
| @@ -7245,6 +7218,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 7245 | if (!has_branch_stack(event)) | 7218 | if (!has_branch_stack(event)) |
| 7246 | event->attr.branch_sample_type = 0; | 7219 | event->attr.branch_sample_type = 0; |
| 7247 | 7220 | ||
| 7221 | if (cgroup_fd != -1) { | ||
| 7222 | err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader); | ||
| 7223 | if (err) | ||
| 7224 | goto err_ns; | ||
| 7225 | } | ||
| 7226 | |||
| 7248 | pmu = perf_init_event(event); | 7227 | pmu = perf_init_event(event); |
| 7249 | if (!pmu) | 7228 | if (!pmu) |
| 7250 | goto err_ns; | 7229 | goto err_ns; |
| @@ -7268,6 +7247,8 @@ err_pmu: | |||
| 7268 | event->destroy(event); | 7247 | event->destroy(event); |
| 7269 | module_put(pmu->module); | 7248 | module_put(pmu->module); |
| 7270 | err_ns: | 7249 | err_ns: |
| 7250 | if (is_cgroup_event(event)) | ||
| 7251 | perf_detach_cgroup(event); | ||
| 7271 | if (event->ns) | 7252 | if (event->ns) |
| 7272 | put_pid_ns(event->ns); | 7253 | put_pid_ns(event->ns); |
| 7273 | kfree(event); | 7254 | kfree(event); |
| @@ -7486,6 +7467,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7486 | int move_group = 0; | 7467 | int move_group = 0; |
| 7487 | int err; | 7468 | int err; |
| 7488 | int f_flags = O_RDWR; | 7469 | int f_flags = O_RDWR; |
| 7470 | int cgroup_fd = -1; | ||
| 7489 | 7471 | ||
| 7490 | /* for future expandability... */ | 7472 | /* for future expandability... */ |
| 7491 | if (flags & ~PERF_FLAG_ALL) | 7473 | if (flags & ~PERF_FLAG_ALL) |
| @@ -7551,21 +7533,16 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7551 | 7533 | ||
| 7552 | get_online_cpus(); | 7534 | get_online_cpus(); |
| 7553 | 7535 | ||
| 7536 | if (flags & PERF_FLAG_PID_CGROUP) | ||
| 7537 | cgroup_fd = pid; | ||
| 7538 | |||
| 7554 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, | 7539 | event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, |
| 7555 | NULL, NULL); | 7540 | NULL, NULL, cgroup_fd); |
| 7556 | if (IS_ERR(event)) { | 7541 | if (IS_ERR(event)) { |
| 7557 | err = PTR_ERR(event); | 7542 | err = PTR_ERR(event); |
| 7558 | goto err_cpus; | 7543 | goto err_cpus; |
| 7559 | } | 7544 | } |
| 7560 | 7545 | ||
| 7561 | if (flags & PERF_FLAG_PID_CGROUP) { | ||
| 7562 | err = perf_cgroup_connect(pid, event, &attr, group_leader); | ||
| 7563 | if (err) { | ||
| 7564 | __free_event(event); | ||
| 7565 | goto err_cpus; | ||
| 7566 | } | ||
| 7567 | } | ||
| 7568 | |||
| 7569 | if (is_sampling_event(event)) { | 7546 | if (is_sampling_event(event)) { |
| 7570 | if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { | 7547 | if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { |
| 7571 | err = -ENOTSUPP; | 7548 | err = -ENOTSUPP; |
| @@ -7802,7 +7779,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
| 7802 | */ | 7779 | */ |
| 7803 | 7780 | ||
| 7804 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, | 7781 | event = perf_event_alloc(attr, cpu, task, NULL, NULL, |
| 7805 | overflow_handler, context); | 7782 | overflow_handler, context, -1); |
| 7806 | if (IS_ERR(event)) { | 7783 | if (IS_ERR(event)) { |
| 7807 | err = PTR_ERR(event); | 7784 | err = PTR_ERR(event); |
| 7808 | goto err; | 7785 | goto err; |
| @@ -8163,7 +8140,7 @@ inherit_event(struct perf_event *parent_event, | |||
| 8163 | parent_event->cpu, | 8140 | parent_event->cpu, |
| 8164 | child, | 8141 | child, |
| 8165 | group_leader, parent_event, | 8142 | group_leader, parent_event, |
| 8166 | NULL, NULL); | 8143 | NULL, NULL, -1); |
| 8167 | if (IS_ERR(child_event)) | 8144 | if (IS_ERR(child_event)) |
| 8168 | return child_event; | 8145 | return child_event; |
| 8169 | 8146 | ||
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 9803a6600d49..92ce5f4ccc26 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
| @@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) | |||
| 116 | */ | 116 | */ |
| 117 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) | 117 | static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) |
| 118 | { | 118 | { |
| 119 | struct task_struct *tsk = bp->hw.bp_target; | 119 | struct task_struct *tsk = bp->hw.target; |
| 120 | struct perf_event *iter; | 120 | struct perf_event *iter; |
| 121 | int count = 0; | 121 | int count = 0; |
| 122 | 122 | ||
| 123 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { | 123 | list_for_each_entry(iter, &bp_task_head, hw.bp_list) { |
| 124 | if (iter->hw.bp_target == tsk && | 124 | if (iter->hw.target == tsk && |
| 125 | find_slot_idx(iter) == type && | 125 | find_slot_idx(iter) == type && |
| 126 | (iter->cpu < 0 || cpu == iter->cpu)) | 126 | (iter->cpu < 0 || cpu == iter->cpu)) |
| 127 | count += hw_breakpoint_weight(iter); | 127 | count += hw_breakpoint_weight(iter); |
| @@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, | |||
| 153 | int nr; | 153 | int nr; |
| 154 | 154 | ||
| 155 | nr = info->cpu_pinned; | 155 | nr = info->cpu_pinned; |
| 156 | if (!bp->hw.bp_target) | 156 | if (!bp->hw.target) |
| 157 | nr += max_task_bp_pinned(cpu, type); | 157 | nr += max_task_bp_pinned(cpu, type); |
| 158 | else | 158 | else |
| 159 | nr += task_bp_pinned(cpu, bp, type); | 159 | nr += task_bp_pinned(cpu, bp, type); |
| @@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, | |||
| 210 | weight = -weight; | 210 | weight = -weight; |
| 211 | 211 | ||
| 212 | /* Pinned counter cpu profiling */ | 212 | /* Pinned counter cpu profiling */ |
| 213 | if (!bp->hw.bp_target) { | 213 | if (!bp->hw.target) { |
| 214 | get_bp_info(bp->cpu, type)->cpu_pinned += weight; | 214 | get_bp_info(bp->cpu, type)->cpu_pinned += weight; |
| 215 | return; | 215 | return; |
| 216 | } | 216 | } |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 7dc1c8abecd6..996e452e1eb3 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
| @@ -1005,7 +1005,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | |||
| 1005 | return true; | 1005 | return true; |
| 1006 | 1006 | ||
| 1007 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { | 1007 | list_for_each_entry(event, &filter->perf_events, hw.tp_list) { |
| 1008 | if (event->hw.tp_target->mm == mm) | 1008 | if (event->hw.target->mm == mm) |
| 1009 | return true; | 1009 | return true; |
| 1010 | } | 1010 | } |
| 1011 | 1011 | ||
| @@ -1015,7 +1015,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) | |||
| 1015 | static inline bool | 1015 | static inline bool |
| 1016 | uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) | 1016 | uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) |
| 1017 | { | 1017 | { |
| 1018 | return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); | 1018 | return __uprobe_perf_filter(&tu->filter, event->hw.target->mm); |
| 1019 | } | 1019 | } |
| 1020 | 1020 | ||
| 1021 | static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | 1021 | static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) |
| @@ -1023,10 +1023,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) | |||
| 1023 | bool done; | 1023 | bool done; |
| 1024 | 1024 | ||
| 1025 | write_lock(&tu->filter.rwlock); | 1025 | write_lock(&tu->filter.rwlock); |
| 1026 | if (event->hw.tp_target) { | 1026 | if (event->hw.target) { |
| 1027 | list_del(&event->hw.tp_list); | 1027 | list_del(&event->hw.tp_list); |
| 1028 | done = tu->filter.nr_systemwide || | 1028 | done = tu->filter.nr_systemwide || |
| 1029 | (event->hw.tp_target->flags & PF_EXITING) || | 1029 | (event->hw.target->flags & PF_EXITING) || |
| 1030 | uprobe_filter_event(tu, event); | 1030 | uprobe_filter_event(tu, event); |
| 1031 | } else { | 1031 | } else { |
| 1032 | tu->filter.nr_systemwide--; | 1032 | tu->filter.nr_systemwide--; |
| @@ -1046,7 +1046,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) | |||
| 1046 | int err; | 1046 | int err; |
| 1047 | 1047 | ||
| 1048 | write_lock(&tu->filter.rwlock); | 1048 | write_lock(&tu->filter.rwlock); |
| 1049 | if (event->hw.tp_target) { | 1049 | if (event->hw.target) { |
| 1050 | /* | 1050 | /* |
| 1051 | * event->parent != NULL means copy_process(), we can avoid | 1051 | * event->parent != NULL means copy_process(), we can avoid |
| 1052 | * uprobe_apply(). current->mm must be probed and we can rely | 1052 | * uprobe_apply(). current->mm must be probed and we can rely |
