aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-07-27 12:57:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-07-27 12:57:16 -0400
commit9dae0a3fc4b0b9aed302a0fad61846c49cc313b1 (patch)
tree7ef2c3a50f8c340762fe45102d90cf41bdf27170
parent43a255c210cebdf09235567bf46d3ceea3438b4f (diff)
parentd81b4253b0f0f1e7b7e03b0cd0f80cab18bc4d7b (diff)
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A bunch of fixes for perf and kprobes: - revert a commit that caused a perf group regression - silence dmesg spam - fix kprobe probing errors on ia64 and ppc64 - filter kprobe faults from userspace - lockdep fix for perf exit path - prevent perf #GP in KVM guest - correct perf event and filters" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: kprobes: Fix "Failed to find blacklist" probing errors on ia64 and ppc64 kprobes/x86: Don't try to resolve kprobe faults from userspace perf/x86/intel: Avoid spamming kernel log for BTS buffer failure perf/x86/intel: Protect LBR and extra_regs against KVM lying perf: Fix lockdep warning on process exit perf/x86/intel/uncore: Fix SNB-EP/IVT Cbox filter mappings perf/x86/intel: Use proper dTLB-load-misses event on IvyBridge perf: Revert ("perf: Always destroy groups on exit")
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.h12
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c11
-rw-r--r--arch/x86/kernel/kprobes/core.c3
-rw-r--r--kernel/events/core.c32
-rw-r--r--kernel/kprobes.c14
8 files changed, 130 insertions, 20 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bdfbff8a4f6..2879ecdaac43 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
118 continue; 118 continue;
119 if (event->attr.config1 & ~er->valid_mask) 119 if (event->attr.config1 & ~er->valid_mask)
120 return -EINVAL; 120 return -EINVAL;
121 /* Check if the extra msrs can be safely accessed*/
122 if (!er->extra_msr_access)
123 return -ENXIO;
121 124
122 reg->idx = er->idx; 125 reg->idx = er->idx;
123 reg->config = event->attr.config1; 126 reg->config = event->attr.config1;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bdd974b..8ade93111e03 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -295,14 +295,16 @@ struct extra_reg {
295 u64 config_mask; 295 u64 config_mask;
296 u64 valid_mask; 296 u64 valid_mask;
297 int idx; /* per_xxx->regs[] reg index */ 297 int idx; /* per_xxx->regs[] reg index */
298 bool extra_msr_access;
298}; 299};
299 300
300#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ 301#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
301 .event = (e), \ 302 .event = (e), \
302 .msr = (ms), \ 303 .msr = (ms), \
303 .config_mask = (m), \ 304 .config_mask = (m), \
304 .valid_mask = (vm), \ 305 .valid_mask = (vm), \
305 .idx = EXTRA_REG_##i, \ 306 .idx = EXTRA_REG_##i, \
307 .extra_msr_access = true, \
306 } 308 }
307 309
308#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ 310#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 07846d738bdb..2502d0d9d246 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(void)
2182 } 2182 }
2183} 2183}
2184 2184
2185/*
2186 * Under certain circumstances, access certain MSR may cause #GP.
2187 * The function tests if the input MSR can be safely accessed.
2188 */
2189static bool check_msr(unsigned long msr, u64 mask)
2190{
2191 u64 val_old, val_new, val_tmp;
2192
2193 /*
2194 * Read the current value, change it and read it back to see if it
2195 * matches, this is needed to detect certain hardware emulators
2196 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
2197 */
2198 if (rdmsrl_safe(msr, &val_old))
2199 return false;
2200
2201 /*
2202 * Only change the bits which can be updated by wrmsrl.
2203 */
2204 val_tmp = val_old ^ mask;
2205 if (wrmsrl_safe(msr, val_tmp) ||
2206 rdmsrl_safe(msr, &val_new))
2207 return false;
2208
2209 if (val_new != val_tmp)
2210 return false;
2211
2212 /* Here it's sure that the MSR can be safely accessed.
2213 * Restore the old value and return.
2214 */
2215 wrmsrl(msr, val_old);
2216
2217 return true;
2218}
2219
2185static __init void intel_sandybridge_quirk(void) 2220static __init void intel_sandybridge_quirk(void)
2186{ 2221{
2187 x86_pmu.check_microcode = intel_snb_check_microcode; 2222 x86_pmu.check_microcode = intel_snb_check_microcode;
@@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void)
2271 union cpuid10_ebx ebx; 2306 union cpuid10_ebx ebx;
2272 struct event_constraint *c; 2307 struct event_constraint *c;
2273 unsigned int unused; 2308 unsigned int unused;
2274 int version; 2309 struct extra_reg *er;
2310 int version, i;
2275 2311
2276 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 2312 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
2277 switch (boot_cpu_data.x86) { 2313 switch (boot_cpu_data.x86) {
@@ -2474,6 +2510,9 @@ __init int intel_pmu_init(void)
2474 case 62: /* IvyBridge EP */ 2510 case 62: /* IvyBridge EP */
2475 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, 2511 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
2476 sizeof(hw_cache_event_ids)); 2512 sizeof(hw_cache_event_ids));
2513 /* dTLB-load-misses on IVB is different than SNB */
2514 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
2515
2477 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, 2516 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
2478 sizeof(hw_cache_extra_regs)); 2517 sizeof(hw_cache_extra_regs));
2479 2518
@@ -2574,6 +2613,34 @@ __init int intel_pmu_init(void)
2574 } 2613 }
2575 } 2614 }
2576 2615
2616 /*
2617 * Access LBR MSR may cause #GP under certain circumstances.
2618 * E.g. KVM doesn't support LBR MSR
2619 * Check all LBT MSR here.
2620 * Disable LBR access if any LBR MSRs can not be accessed.
2621 */
2622 if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
2623 x86_pmu.lbr_nr = 0;
2624 for (i = 0; i < x86_pmu.lbr_nr; i++) {
2625 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
2626 check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
2627 x86_pmu.lbr_nr = 0;
2628 }
2629
2630 /*
2631 * Access extra MSR may cause #GP under certain circumstances.
2632 * E.g. KVM doesn't support offcore event
2633 * Check all extra_regs here.
2634 */
2635 if (x86_pmu.extra_regs) {
2636 for (er = x86_pmu.extra_regs; er->msr; er++) {
2637 er->extra_msr_access = check_msr(er->msr, 0x1ffUL);
2638 /* Disable LBR select mapping */
2639 if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
2640 x86_pmu.lbr_sel_map = NULL;
2641 }
2642 }
2643
2577 /* Support full width counters using alternative MSR range */ 2644 /* Support full width counters using alternative MSR range */
2578 if (x86_pmu.intel_cap.full_width_write) { 2645 if (x86_pmu.intel_cap.full_width_write) {
2579 x86_pmu.max_period = x86_pmu.cntval_mask; 2646 x86_pmu.max_period = x86_pmu.cntval_mask;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970cb744d..696ade311ded 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu)
311 if (!x86_pmu.bts) 311 if (!x86_pmu.bts)
312 return 0; 312 return 0;
313 313
314 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); 314 buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
315 if (unlikely(!buffer)) 315 if (unlikely(!buffer)) {
316 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
316 return -ENOMEM; 317 return -ENOMEM;
318 }
317 319
318 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 320 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
319 thresh = max / 16; 321 thresh = max / 16;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 65bbbea38b9c..ae6552a0701f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {
550 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), 550 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),
551 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), 551 SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),
552 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), 552 SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8),
553 SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), 553 SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa),
554 SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), 554 SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),
555 SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), 555 SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),
556 SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), 556 SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),
557 SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), 557 SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),
558 SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), 558 SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),
559 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), 559 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),
560 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), 560 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8),
561 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), 561 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa),
562 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), 562 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),
563 SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), 563 SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),
564 SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), 564 SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),
565 SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), 565 SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2),
@@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1222 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, 1222 SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,
1223 SNBEP_CBO_PMON_CTL_TID_EN, 0x1), 1223 SNBEP_CBO_PMON_CTL_TID_EN, 0x1),
1224 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), 1224 SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2),
1225
1225 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), 1226 SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
1226 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), 1227 SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),
1227 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), 1228 SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc),
@@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {
1245 SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), 1246 SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),
1246 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), 1247 SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),
1247 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), 1248 SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10),
1248 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), 1249 SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),
1249 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), 1250 SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),
1250 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), 1251 SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),
1251 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), 1252 SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18),
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7596df664901..67e6d19ef1be 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs)
574 struct kprobe *p; 574 struct kprobe *p;
575 struct kprobe_ctlblk *kcb; 575 struct kprobe_ctlblk *kcb;
576 576
577 if (user_mode_vm(regs))
578 return 0;
579
577 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); 580 addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
578 /* 581 /*
579 * We don't want to be preempted for the entire 582 * We don't want to be preempted for the entire
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0c95f0f06fd..6b17ac1b0c2a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event,
7458 struct perf_event_context *child_ctx, 7458 struct perf_event_context *child_ctx,
7459 struct task_struct *child) 7459 struct task_struct *child)
7460{ 7460{
7461 perf_remove_from_context(child_event, true); 7461 /*
7462 * Do not destroy the 'original' grouping; because of the context
7463 * switch optimization the original events could've ended up in a
7464 * random child task.
7465 *
7466 * If we were to destroy the original group, all group related
7467 * operations would cease to function properly after this random
7468 * child dies.
7469 *
7470 * Do destroy all inherited groups, we don't care about those
7471 * and being thorough is better.
7472 */
7473 perf_remove_from_context(child_event, !!child_event->parent);
7462 7474
7463 /* 7475 /*
7464 * It can happen that the parent exits first, and has events 7476 * It can happen that the parent exits first, and has events
@@ -7474,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event,
7474static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7486static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7475{ 7487{
7476 struct perf_event *child_event, *next; 7488 struct perf_event *child_event, *next;
7477 struct perf_event_context *child_ctx; 7489 struct perf_event_context *child_ctx, *parent_ctx;
7478 unsigned long flags; 7490 unsigned long flags;
7479 7491
7480 if (likely(!child->perf_event_ctxp[ctxn])) { 7492 if (likely(!child->perf_event_ctxp[ctxn])) {
@@ -7499,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7499 raw_spin_lock(&child_ctx->lock); 7511 raw_spin_lock(&child_ctx->lock);
7500 task_ctx_sched_out(child_ctx); 7512 task_ctx_sched_out(child_ctx);
7501 child->perf_event_ctxp[ctxn] = NULL; 7513 child->perf_event_ctxp[ctxn] = NULL;
7514
7515 /*
7516 * In order to avoid freeing: child_ctx->parent_ctx->task
7517 * under perf_event_context::lock, grab another reference.
7518 */
7519 parent_ctx = child_ctx->parent_ctx;
7520 if (parent_ctx)
7521 get_ctx(parent_ctx);
7522
7502 /* 7523 /*
7503 * If this context is a clone; unclone it so it can't get 7524 * If this context is a clone; unclone it so it can't get
7504 * swapped to another process while we're removing all 7525 * swapped to another process while we're removing all
@@ -7509,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
7509 raw_spin_unlock_irqrestore(&child_ctx->lock, flags); 7530 raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
7510 7531
7511 /* 7532 /*
7533 * Now that we no longer hold perf_event_context::lock, drop
7534 * our extra child_ctx->parent_ctx reference.
7535 */
7536 if (parent_ctx)
7537 put_ctx(parent_ctx);
7538
7539 /*
7512 * Report the task dead after unscheduling the events so that we 7540 * Report the task dead after unscheduling the events so that we
7513 * won't get any samples after PERF_RECORD_EXIT. We can however still 7541 * won't get any samples after PERF_RECORD_EXIT. We can however still
7514 * get a few PERF_RECORD_READ events. 7542 * get a few PERF_RECORD_READ events.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3214289df5a7..734e9a7d280b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
2037{ 2037{
2038 unsigned long *iter; 2038 unsigned long *iter;
2039 struct kprobe_blacklist_entry *ent; 2039 struct kprobe_blacklist_entry *ent;
2040 unsigned long offset = 0, size = 0; 2040 unsigned long entry, offset = 0, size = 0;
2041 2041
2042 for (iter = start; iter < end; iter++) { 2042 for (iter = start; iter < end; iter++) {
2043 if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { 2043 entry = arch_deref_entry_point((void *)*iter);
2044 pr_err("Failed to find blacklist %p\n", (void *)*iter); 2044
2045 if (!kernel_text_address(entry) ||
2046 !kallsyms_lookup_size_offset(entry, &size, &offset)) {
2047 pr_err("Failed to find blacklist at %p\n",
2048 (void *)entry);
2045 continue; 2049 continue;
2046 } 2050 }
2047 2051
2048 ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2052 ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2049 if (!ent) 2053 if (!ent)
2050 return -ENOMEM; 2054 return -ENOMEM;
2051 ent->start_addr = *iter; 2055 ent->start_addr = entry;
2052 ent->end_addr = *iter + size; 2056 ent->end_addr = entry + size;
2053 INIT_LIST_HEAD(&ent->list); 2057 INIT_LIST_HEAD(&ent->list);
2054 list_add_tail(&ent->list, &kprobe_blacklist); 2058 list_add_tail(&ent->list, &kprobe_blacklist);
2055 } 2059 }