aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/events/amd/core.c2
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/events/intel/lbr.c6
-rw-r--r--arch/x86/events/intel/pt.c75
-rw-r--r--arch/x86/events/intel/pt.h3
-rw-r--r--arch/x86/events/intel/rapl.c1
-rw-r--r--arch/x86/include/asm/perf_event.h4
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--kernel/events/core.c55
9 files changed, 120 insertions, 31 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 86a9bec18dab..bd3e8421b57c 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -115,7 +115,7 @@ static __initconst const u64 amd_hw_cache_event_ids
115/* 115/*
116 * AMD Performance Monitor K7 and later. 116 * AMD Performance Monitor K7 and later.
117 */ 117 */
118static const u64 amd_perfmon_event_map[] = 118static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] =
119{ 119{
120 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 120 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
121 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 121 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 68fa55b4d42e..aff79884e17d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3639,6 +3639,7 @@ __init int intel_pmu_init(void)
3639 3639
3640 case 78: /* 14nm Skylake Mobile */ 3640 case 78: /* 14nm Skylake Mobile */
3641 case 94: /* 14nm Skylake Desktop */ 3641 case 94: /* 14nm Skylake Desktop */
3642 case 85: /* 14nm Skylake Server */
3642 x86_pmu.late_ack = true; 3643 x86_pmu.late_ack = true;
3643 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 3644 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
3644 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 3645 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6c3b7c1780c9..1ca5d1e7d4f2 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -63,7 +63,7 @@ static enum {
63 63
64#define LBR_PLM (LBR_KERNEL | LBR_USER) 64#define LBR_PLM (LBR_KERNEL | LBR_USER)
65 65
66#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ 66#define LBR_SEL_MASK 0x3ff /* valid bits in LBR_SELECT */
67#define LBR_NOT_SUPP -1 /* LBR filter not supported */ 67#define LBR_NOT_SUPP -1 /* LBR filter not supported */
68#define LBR_IGN 0 /* ignored */ 68#define LBR_IGN 0 /* ignored */
69 69
@@ -610,8 +610,10 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
610 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate 610 * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
611 * in suppress mode. So LBR_SELECT should be set to 611 * in suppress mode. So LBR_SELECT should be set to
612 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) 612 * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
613 * But the 10th bit LBR_CALL_STACK does not operate
614 * in suppress mode.
613 */ 615 */
614 reg->config = mask ^ x86_pmu.lbr_sel_mask; 616 reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
615 617
616 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && 618 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
617 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && 619 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 6af7cf71d6b2..09a77dbc73c9 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,21 @@ static int __init pt_pmu_hw_init(void)
136 struct dev_ext_attribute *de_attrs; 136 struct dev_ext_attribute *de_attrs;
137 struct attribute **attrs; 137 struct attribute **attrs;
138 size_t size; 138 size_t size;
139 u64 reg;
139 int ret; 140 int ret;
140 long i; 141 long i;
141 142
143 if (boot_cpu_has(X86_FEATURE_VMX)) {
144 /*
145 * Intel SDM, 36.5 "Tracing post-VMXON" says that
146 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace
147 * post-VMXON.
148 */
149 rdmsrl(MSR_IA32_VMX_MISC, reg);
150 if (reg & BIT(14))
151 pt_pmu.vmx = true;
152 }
153
142 attrs = NULL; 154 attrs = NULL;
143 155
144 for (i = 0; i < PT_CPUID_LEAVES; i++) { 156 for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +281,23 @@ static void pt_config(struct perf_event *event)
269 281
270 reg |= (event->attr.config & PT_CONFIG_MASK); 282 reg |= (event->attr.config & PT_CONFIG_MASK);
271 283
284 event->hw.config = reg;
272 wrmsrl(MSR_IA32_RTIT_CTL, reg); 285 wrmsrl(MSR_IA32_RTIT_CTL, reg);
273} 286}
274 287
275static void pt_config_start(bool start) 288static void pt_config_stop(struct perf_event *event)
276{ 289{
277 u64 ctl; 290 u64 ctl = READ_ONCE(event->hw.config);
291
292 /* may be already stopped by a PMI */
293 if (!(ctl & RTIT_CTL_TRACEEN))
294 return;
278 295
279 rdmsrl(MSR_IA32_RTIT_CTL, ctl); 296 ctl &= ~RTIT_CTL_TRACEEN;
280 if (start)
281 ctl |= RTIT_CTL_TRACEEN;
282 else
283 ctl &= ~RTIT_CTL_TRACEEN;
284 wrmsrl(MSR_IA32_RTIT_CTL, ctl); 297 wrmsrl(MSR_IA32_RTIT_CTL, ctl);
285 298
299 WRITE_ONCE(event->hw.config, ctl);
300
286 /* 301 /*
287 * A wrmsr that disables trace generation serializes other PT 302 * A wrmsr that disables trace generation serializes other PT
288 * registers and causes all data packets to be written to memory, 303 * registers and causes all data packets to be written to memory,
@@ -291,8 +306,7 @@ static void pt_config_start(bool start)
291 * The below WMB, separating data store and aux_head store matches 306 * The below WMB, separating data store and aux_head store matches
292 * the consumer's RMB that separates aux_head load and data load. 307 * the consumer's RMB that separates aux_head load and data load.
293 */ 308 */
294 if (!start) 309 wmb();
295 wmb();
296} 310}
297 311
298static void pt_config_buffer(void *buf, unsigned int topa_idx, 312static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -942,11 +956,17 @@ void intel_pt_interrupt(void)
942 if (!ACCESS_ONCE(pt->handle_nmi)) 956 if (!ACCESS_ONCE(pt->handle_nmi))
943 return; 957 return;
944 958
945 pt_config_start(false); 959 /*
960 * If VMX is on and PT does not support it, don't touch anything.
961 */
962 if (READ_ONCE(pt->vmx_on))
963 return;
946 964
947 if (!event) 965 if (!event)
948 return; 966 return;
949 967
968 pt_config_stop(event);
969
950 buf = perf_get_aux(&pt->handle); 970 buf = perf_get_aux(&pt->handle);
951 if (!buf) 971 if (!buf)
952 return; 972 return;
@@ -983,6 +1003,35 @@ void intel_pt_interrupt(void)
983 } 1003 }
984} 1004}
985 1005
1006void intel_pt_handle_vmx(int on)
1007{
1008 struct pt *pt = this_cpu_ptr(&pt_ctx);
1009 struct perf_event *event;
1010 unsigned long flags;
1011
1012 /* PT plays nice with VMX, do nothing */
1013 if (pt_pmu.vmx)
1014 return;
1015
1016 /*
1017 * VMXON will clear RTIT_CTL.TraceEn; we need to make
1018 * sure to not try to set it while VMX is on. Disable
1019 * interrupts to avoid racing with pmu callbacks;
1020 * concurrent PMI should be handled fine.
1021 */
1022 local_irq_save(flags);
1023 WRITE_ONCE(pt->vmx_on, on);
1024
1025 if (on) {
1026 /* prevent pt_config_stop() from writing RTIT_CTL */
1027 event = pt->handle.event;
1028 if (event)
1029 event->hw.config = 0;
1030 }
1031 local_irq_restore(flags);
1032}
1033EXPORT_SYMBOL_GPL(intel_pt_handle_vmx);
1034
986/* 1035/*
987 * PMU callbacks 1036 * PMU callbacks
988 */ 1037 */
@@ -992,6 +1041,9 @@ static void pt_event_start(struct perf_event *event, int mode)
992 struct pt *pt = this_cpu_ptr(&pt_ctx); 1041 struct pt *pt = this_cpu_ptr(&pt_ctx);
993 struct pt_buffer *buf = perf_get_aux(&pt->handle); 1042 struct pt_buffer *buf = perf_get_aux(&pt->handle);
994 1043
1044 if (READ_ONCE(pt->vmx_on))
1045 return;
1046
995 if (!buf || pt_buffer_is_full(buf, pt)) { 1047 if (!buf || pt_buffer_is_full(buf, pt)) {
996 event->hw.state = PERF_HES_STOPPED; 1048 event->hw.state = PERF_HES_STOPPED;
997 return; 1049 return;
@@ -1014,7 +1066,8 @@ static void pt_event_stop(struct perf_event *event, int mode)
1014 * see comment in intel_pt_interrupt(). 1066 * see comment in intel_pt_interrupt().
1015 */ 1067 */
1016 ACCESS_ONCE(pt->handle_nmi) = 0; 1068 ACCESS_ONCE(pt->handle_nmi) = 0;
1017 pt_config_start(false); 1069
1070 pt_config_stop(event);
1018 1071
1019 if (event->hw.state == PERF_HES_STOPPED) 1072 if (event->hw.state == PERF_HES_STOPPED)
1020 return; 1073 return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d205..3abb5f5cccc8 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
65struct pt_pmu { 65struct pt_pmu {
66 struct pmu pmu; 66 struct pmu pmu;
67 u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; 67 u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
68 bool vmx;
68}; 69};
69 70
70/** 71/**
@@ -107,10 +108,12 @@ struct pt_buffer {
107 * struct pt - per-cpu pt context 108 * struct pt - per-cpu pt context
108 * @handle: perf output handle 109 * @handle: perf output handle
109 * @handle_nmi: do handle PT PMI on this cpu, there's an active event 110 * @handle_nmi: do handle PT PMI on this cpu, there's an active event
111 * @vmx_on: 1 if VMX is ON on this cpu
110 */ 112 */
111struct pt { 113struct pt {
112 struct perf_output_handle handle; 114 struct perf_output_handle handle;
113 int handle_nmi; 115 int handle_nmi;
116 int vmx_on;
114}; 117};
115 118
116#endif /* __INTEL_PT_H__ */ 119#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 70c93f9b03ac..1705c9d75e44 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -718,6 +718,7 @@ static int __init rapl_pmu_init(void)
718 break; 718 break;
719 case 60: /* Haswell */ 719 case 60: /* Haswell */
720 case 69: /* Haswell-Celeron */ 720 case 69: /* Haswell-Celeron */
721 case 70: /* Haswell GT3e */
721 case 61: /* Broadwell */ 722 case 61: /* Broadwell */
722 case 71: /* Broadwell-H */ 723 case 71: /* Broadwell-H */
723 rapl_cntr_mask = RAPL_IDX_HSW; 724 rapl_cntr_mask = RAPL_IDX_HSW;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f26..f353061bba1d 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void) { }
285static inline void perf_check_microcode(void) { } 285static inline void perf_check_microcode(void) { }
286#endif 286#endif
287 287
288#ifdef CONFIG_CPU_SUP_INTEL
289 extern void intel_pt_handle_vmx(int on);
290#endif
291
288#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 292#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
289 extern void amd_pmu_enable_virt(void); 293 extern void amd_pmu_enable_virt(void);
290 extern void amd_pmu_disable_virt(void); 294 extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ee1c8a93871c..133679d520af 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3103,6 +3103,8 @@ static __init int vmx_disabled_by_bios(void)
3103 3103
3104static void kvm_cpu_vmxon(u64 addr) 3104static void kvm_cpu_vmxon(u64 addr)
3105{ 3105{
3106 intel_pt_handle_vmx(1);
3107
3106 asm volatile (ASM_VMX_VMXON_RAX 3108 asm volatile (ASM_VMX_VMXON_RAX
3107 : : "a"(&addr), "m"(addr) 3109 : : "a"(&addr), "m"(addr)
3108 : "memory", "cc"); 3110 : "memory", "cc");
@@ -3172,6 +3174,8 @@ static void vmclear_local_loaded_vmcss(void)
3172static void kvm_cpu_vmxoff(void) 3174static void kvm_cpu_vmxoff(void)
3173{ 3175{
3174 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); 3176 asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
3177
3178 intel_pt_handle_vmx(0);
3175} 3179}
3176 3180
3177static void hardware_disable(void) 3181static void hardware_disable(void)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 52bedc5a5aaa..4e2ebf6f2f1f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -412,7 +412,8 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
412 if (ret || !write) 412 if (ret || !write)
413 return ret; 413 return ret;
414 414
415 if (sysctl_perf_cpu_time_max_percent == 100) { 415 if (sysctl_perf_cpu_time_max_percent == 100 ||
416 sysctl_perf_cpu_time_max_percent == 0) {
416 printk(KERN_WARNING 417 printk(KERN_WARNING
417 "perf: Dynamic interrupt throttling disabled, can hang your system!\n"); 418 "perf: Dynamic interrupt throttling disabled, can hang your system!\n");
418 WRITE_ONCE(perf_sample_allowed_ns, 0); 419 WRITE_ONCE(perf_sample_allowed_ns, 0);
@@ -1105,6 +1106,7 @@ static void put_ctx(struct perf_event_context *ctx)
1105 * function. 1106 * function.
1106 * 1107 *
1107 * Lock order: 1108 * Lock order:
1109 * cred_guard_mutex
1108 * task_struct::perf_event_mutex 1110 * task_struct::perf_event_mutex
1109 * perf_event_context::mutex 1111 * perf_event_context::mutex
1110 * perf_event::child_mutex; 1112 * perf_event::child_mutex;
@@ -3420,7 +3422,6 @@ static struct task_struct *
3420find_lively_task_by_vpid(pid_t vpid) 3422find_lively_task_by_vpid(pid_t vpid)
3421{ 3423{
3422 struct task_struct *task; 3424 struct task_struct *task;
3423 int err;
3424 3425
3425 rcu_read_lock(); 3426 rcu_read_lock();
3426 if (!vpid) 3427 if (!vpid)
@@ -3434,16 +3435,7 @@ find_lively_task_by_vpid(pid_t vpid)
3434 if (!task) 3435 if (!task)
3435 return ERR_PTR(-ESRCH); 3436 return ERR_PTR(-ESRCH);
3436 3437
3437 /* Reuse ptrace permission checks for now. */
3438 err = -EACCES;
3439 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
3440 goto errout;
3441
3442 return task; 3438 return task;
3443errout:
3444 put_task_struct(task);
3445 return ERR_PTR(err);
3446
3447} 3439}
3448 3440
3449/* 3441/*
@@ -8413,6 +8405,24 @@ SYSCALL_DEFINE5(perf_event_open,
8413 8405
8414 get_online_cpus(); 8406 get_online_cpus();
8415 8407
8408 if (task) {
8409 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
8410 if (err)
8411 goto err_cpus;
8412
8413 /*
8414 * Reuse ptrace permission checks for now.
8415 *
8416 * We must hold cred_guard_mutex across this and any potential
8417 * perf_install_in_context() call for this new event to
8418 * serialize against exec() altering our credentials (and the
8419 * perf_event_exit_task() that could imply).
8420 */
8421 err = -EACCES;
8422 if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
8423 goto err_cred;
8424 }
8425
8416 if (flags & PERF_FLAG_PID_CGROUP) 8426 if (flags & PERF_FLAG_PID_CGROUP)
8417 cgroup_fd = pid; 8427 cgroup_fd = pid;
8418 8428
@@ -8420,7 +8430,7 @@ SYSCALL_DEFINE5(perf_event_open,
8420 NULL, NULL, cgroup_fd); 8430 NULL, NULL, cgroup_fd);
8421 if (IS_ERR(event)) { 8431 if (IS_ERR(event)) {
8422 err = PTR_ERR(event); 8432 err = PTR_ERR(event);
8423 goto err_cpus; 8433 goto err_cred;
8424 } 8434 }
8425 8435
8426 if (is_sampling_event(event)) { 8436 if (is_sampling_event(event)) {
@@ -8479,11 +8489,6 @@ SYSCALL_DEFINE5(perf_event_open,
8479 goto err_context; 8489 goto err_context;
8480 } 8490 }
8481 8491
8482 if (task) {
8483 put_task_struct(task);
8484 task = NULL;
8485 }
8486
8487 /* 8492 /*
8488 * Look up the group leader (we will attach this event to it): 8493 * Look up the group leader (we will attach this event to it):
8489 */ 8494 */
@@ -8581,6 +8586,11 @@ SYSCALL_DEFINE5(perf_event_open,
8581 8586
8582 WARN_ON_ONCE(ctx->parent_ctx); 8587 WARN_ON_ONCE(ctx->parent_ctx);
8583 8588
8589 /*
8590 * This is the point on no return; we cannot fail hereafter. This is
8591 * where we start modifying current state.
8592 */
8593
8584 if (move_group) { 8594 if (move_group) {
8585 /* 8595 /*
8586 * See perf_event_ctx_lock() for comments on the details 8596 * See perf_event_ctx_lock() for comments on the details
@@ -8652,6 +8662,11 @@ SYSCALL_DEFINE5(perf_event_open,
8652 mutex_unlock(&gctx->mutex); 8662 mutex_unlock(&gctx->mutex);
8653 mutex_unlock(&ctx->mutex); 8663 mutex_unlock(&ctx->mutex);
8654 8664
8665 if (task) {
8666 mutex_unlock(&task->signal->cred_guard_mutex);
8667 put_task_struct(task);
8668 }
8669
8655 put_online_cpus(); 8670 put_online_cpus();
8656 8671
8657 mutex_lock(&current->perf_event_mutex); 8672 mutex_lock(&current->perf_event_mutex);
@@ -8684,6 +8699,9 @@ err_alloc:
8684 */ 8699 */
8685 if (!event_file) 8700 if (!event_file)
8686 free_event(event); 8701 free_event(event);
8702err_cred:
8703 if (task)
8704 mutex_unlock(&task->signal->cred_guard_mutex);
8687err_cpus: 8705err_cpus:
8688 put_online_cpus(); 8706 put_online_cpus();
8689err_task: 8707err_task:
@@ -8968,6 +8986,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8968 8986
8969/* 8987/*
8970 * When a child task exits, feed back event values to parent events. 8988 * When a child task exits, feed back event values to parent events.
8989 *
8990 * Can be called with cred_guard_mutex held when called from
8991 * install_exec_creds().
8971 */ 8992 */
8972void perf_event_exit_task(struct task_struct *child) 8993void perf_event_exit_task(struct task_struct *child)
8973{ 8994{