aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/paravirt.h10
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c7
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/hw_breakpoint.c4
-rw-r--r--include/linux/hw_breakpoint.h4
-rw-r--r--kernel/hw_breakpoint.c3
-rw-r--r--kernel/irq_work.c4
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c69
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--tools/perf/builtin-record.c17
-rw-r--r--tools/perf/util/symbol.c4
12 files changed, 123 insertions, 32 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 18e3b8a8709f..ef9975812c77 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -824,27 +824,27 @@ static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
824#define __PV_IS_CALLEE_SAVE(func) \ 824#define __PV_IS_CALLEE_SAVE(func) \
825 ((struct paravirt_callee_save) { func }) 825 ((struct paravirt_callee_save) { func })
826 826
827static inline unsigned long arch_local_save_flags(void) 827static inline notrace unsigned long arch_local_save_flags(void)
828{ 828{
829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl); 829 return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
830} 830}
831 831
832static inline void arch_local_irq_restore(unsigned long f) 832static inline notrace void arch_local_irq_restore(unsigned long f)
833{ 833{
834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f); 834 PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
835} 835}
836 836
837static inline void arch_local_irq_disable(void) 837static inline notrace void arch_local_irq_disable(void)
838{ 838{
839 PVOP_VCALLEE0(pv_irq_ops.irq_disable); 839 PVOP_VCALLEE0(pv_irq_ops.irq_disable);
840} 840}
841 841
842static inline void arch_local_irq_enable(void) 842static inline notrace void arch_local_irq_enable(void)
843{ 843{
844 PVOP_VCALLEE0(pv_irq_ops.irq_enable); 844 PVOP_VCALLEE0(pv_irq_ops.irq_enable);
845} 845}
846 846
847static inline unsigned long arch_local_irq_save(void) 847static inline notrace unsigned long arch_local_irq_save(void)
848{ 848{
849 unsigned long f; 849 unsigned long f;
850 850
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..62f6e1e55b90 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,15 +17,16 @@
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19 19
20/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22
23u64 hw_nmi_get_sample_period(void) 20u64 hw_nmi_get_sample_period(void)
24{ 21{
25 return (u64)(cpu_khz) * 1000 * 60; 22 return (u64)(cpu_khz) * 1000 * 60;
26} 23}
27 24
28#ifdef ARCH_HAS_NMI_WATCHDOG 25#ifdef ARCH_HAS_NMI_WATCHDOG
26
27/* For reliability, we're prepared to waste bits here. */
28static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
29
29void arch_trigger_all_cpu_backtrace(void) 30void arch_trigger_all_cpu_backtrace(void)
30{ 31{
31 int i; 32 int i;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index fe2690d71c0c..e3ba417e8697 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
295 .endm 295 .endm
296 296
297/* save partial stack frame */ 297/* save partial stack frame */
298 .pushsection .kprobes.text, "ax"
298ENTRY(save_args) 299ENTRY(save_args)
299 XCPT_FRAME 300 XCPT_FRAME
300 cld 301 cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
334 ret 335 ret
335 CFI_ENDPROC 336 CFI_ENDPROC
336END(save_args) 337END(save_args)
338 .popsection
337 339
338ENTRY(save_rest) 340ENTRY(save_rest)
339 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index ff15c9dcc25d..42c594254507 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
433 dr6_p = (unsigned long *)ERR_PTR(args->err); 433 dr6_p = (unsigned long *)ERR_PTR(args->err);
434 dr6 = *dr6_p; 434 dr6 = *dr6_p;
435 435
436 /* If it's a single step, TRAP bits are random */
437 if (dr6 & DR_STEP)
438 return NOTIFY_DONE;
439
436 /* Do an early return if no trap bits are set in DR6 */ 440 /* Do an early return if no trap bits are set in DR6 */
437 if ((dr6 & DR_TRAP_BITS) == 0) 441 if ((dr6 & DR_TRAP_BITS) == 0)
438 return NOTIFY_DONE; 442 return NOTIFY_DONE;
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a2d6ea49ec56..d1e55fed2c7d 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -33,6 +33,8 @@ enum bp_type_idx {
33 33
34#ifdef CONFIG_HAVE_HW_BREAKPOINT 34#ifdef CONFIG_HAVE_HW_BREAKPOINT
35 35
36extern int __init init_hw_breakpoint(void);
37
36static inline void hw_breakpoint_init(struct perf_event_attr *attr) 38static inline void hw_breakpoint_init(struct perf_event_attr *attr)
37{ 39{
38 memset(attr, 0, sizeof(*attr)); 40 memset(attr, 0, sizeof(*attr));
@@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
108 110
109#else /* !CONFIG_HAVE_HW_BREAKPOINT */ 111#else /* !CONFIG_HAVE_HW_BREAKPOINT */
110 112
113static inline int __init init_hw_breakpoint(void) { return 0; }
114
111static inline struct perf_event * 115static inline struct perf_event *
112register_user_hw_breakpoint(struct perf_event_attr *attr, 116register_user_hw_breakpoint(struct perf_event_attr *attr,
113 perf_overflow_handler_t triggered, 117 perf_overflow_handler_t triggered,
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 2c9120f0afca..e5325825aeb6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
620 .read = hw_breakpoint_pmu_read, 620 .read = hw_breakpoint_pmu_read,
621}; 621};
622 622
623static int __init init_hw_breakpoint(void) 623int __init init_hw_breakpoint(void)
624{ 624{
625 unsigned int **task_bp_pinned; 625 unsigned int **task_bp_pinned;
626 int cpu, err_cpu; 626 int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
655 655
656 return -ENOMEM; 656 return -ENOMEM;
657} 657}
658core_initcall(init_hw_breakpoint);
659 658
660 659
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index f16763ff8481..90f881904bb1 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -145,7 +145,9 @@ void irq_work_run(void)
145 * Clear the BUSY bit and return to the free state if 145 * Clear the BUSY bit and return to the free state if
146 * no-one else claimed it meanwhile. 146 * no-one else claimed it meanwhile.
147 */ 147 */
148 cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); 148 (void)cmpxchg(&entry->next,
149 next_flags(NULL, IRQ_WORK_BUSY),
150 NULL);
149 } 151 }
150} 152}
151EXPORT_SYMBOL_GPL(irq_work_run); 153EXPORT_SYMBOL_GPL(irq_work_run);
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a7524a..d190664f25ff 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * 2326 kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
2327 mod->num_trace_events, GFP_KERNEL); 2327 mod->num_trace_events, GFP_KERNEL);
2328#endif 2328#endif
2329#ifdef CONFIG_TRACING
2330 mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
2331 sizeof(*mod->trace_bprintk_fmt_start),
2332 &mod->num_trace_bprintk_fmt);
2333 /*
2334 * This section contains pointers to allocated objects in the trace
2335 * code and not scanning it leads to false positives.
2336 */
2337 kmemleak_scan_area(mod->trace_bprintk_fmt_start,
2338 sizeof(*mod->trace_bprintk_fmt_start) *
2339 mod->num_trace_bprintk_fmt, GFP_KERNEL);
2340#endif
2329#ifdef CONFIG_FTRACE_MCOUNT_RECORD 2341#ifdef CONFIG_FTRACE_MCOUNT_RECORD
2330 /* sechdrs[0].sh_size is always zero */ 2342 /* sechdrs[0].sh_size is always zero */
2331 mod->ftrace_callsites = section_objs(info, "__mcount_loc", 2343 mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index cb6c0d2af68f..671f6c8c8a32 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,6 +31,7 @@
31#include <linux/kernel_stat.h> 31#include <linux/kernel_stat.h>
32#include <linux/perf_event.h> 32#include <linux/perf_event.h>
33#include <linux/ftrace_event.h> 33#include <linux/ftrace_event.h>
34#include <linux/hw_breakpoint.h>
34 35
35#include <asm/irq_regs.h> 36#include <asm/irq_regs.h>
36 37
@@ -2234,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event)
2234 raw_spin_unlock_irq(&ctx->lock); 2235 raw_spin_unlock_irq(&ctx->lock);
2235 mutex_unlock(&ctx->mutex); 2236 mutex_unlock(&ctx->mutex);
2236 2237
2237 mutex_lock(&event->owner->perf_event_mutex);
2238 list_del_init(&event->owner_entry);
2239 mutex_unlock(&event->owner->perf_event_mutex);
2240 put_task_struct(event->owner);
2241
2242 free_event(event); 2238 free_event(event);
2243 2239
2244 return 0; 2240 return 0;
@@ -2251,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
2251static int perf_release(struct inode *inode, struct file *file) 2247static int perf_release(struct inode *inode, struct file *file)
2252{ 2248{
2253 struct perf_event *event = file->private_data; 2249 struct perf_event *event = file->private_data;
2250 struct task_struct *owner;
2254 2251
2255 file->private_data = NULL; 2252 file->private_data = NULL;
2256 2253
2254 rcu_read_lock();
2255 owner = ACCESS_ONCE(event->owner);
2256 /*
2257 * Matches the smp_wmb() in perf_event_exit_task(). If we observe
2258 * !owner it means the list deletion is complete and we can indeed
2259 * free this event, otherwise we need to serialize on
2260 * owner->perf_event_mutex.
2261 */
2262 smp_read_barrier_depends();
2263 if (owner) {
2264 /*
2265 * Since delayed_put_task_struct() also drops the last
2266 * task reference we can safely take a new reference
2267 * while holding the rcu_read_lock().
2268 */
2269 get_task_struct(owner);
2270 }
2271 rcu_read_unlock();
2272
2273 if (owner) {
2274 mutex_lock(&owner->perf_event_mutex);
2275 /*
2276 * We have to re-check the event->owner field, if it is cleared
2277 * we raced with perf_event_exit_task(), acquiring the mutex
2278 * ensured they're done, and we can proceed with freeing the
2279 * event.
2280 */
2281 if (event->owner)
2282 list_del_init(&event->owner_entry);
2283 mutex_unlock(&owner->perf_event_mutex);
2284 put_task_struct(owner);
2285 }
2286
2257 return perf_event_release_kernel(event); 2287 return perf_event_release_kernel(event);
2258} 2288}
2259 2289
@@ -5677,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open,
5677 mutex_unlock(&ctx->mutex); 5707 mutex_unlock(&ctx->mutex);
5678 5708
5679 event->owner = current; 5709 event->owner = current;
5680 get_task_struct(current); 5710
5681 mutex_lock(&current->perf_event_mutex); 5711 mutex_lock(&current->perf_event_mutex);
5682 list_add_tail(&event->owner_entry, &current->perf_event_list); 5712 list_add_tail(&event->owner_entry, &current->perf_event_list);
5683 mutex_unlock(&current->perf_event_mutex); 5713 mutex_unlock(&current->perf_event_mutex);
@@ -5745,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
5745 ++ctx->generation; 5775 ++ctx->generation;
5746 mutex_unlock(&ctx->mutex); 5776 mutex_unlock(&ctx->mutex);
5747 5777
5748 event->owner = current;
5749 get_task_struct(current);
5750 mutex_lock(&current->perf_event_mutex);
5751 list_add_tail(&event->owner_entry, &current->perf_event_list);
5752 mutex_unlock(&current->perf_event_mutex);
5753
5754 return event; 5778 return event;
5755 5779
5756err_free: 5780err_free:
@@ -5901,8 +5925,24 @@ again:
5901 */ 5925 */
5902void perf_event_exit_task(struct task_struct *child) 5926void perf_event_exit_task(struct task_struct *child)
5903{ 5927{
5928 struct perf_event *event, *tmp;
5904 int ctxn; 5929 int ctxn;
5905 5930
5931 mutex_lock(&child->perf_event_mutex);
5932 list_for_each_entry_safe(event, tmp, &child->perf_event_list,
5933 owner_entry) {
5934 list_del_init(&event->owner_entry);
5935
5936 /*
5937 * Ensure the list deletion is visible before we clear
5938 * the owner, closes a race against perf_release() where
5939 * we need to serialize on the owner->perf_event_mutex.
5940 */
5941 smp_wmb();
5942 event->owner = NULL;
5943 }
5944 mutex_unlock(&child->perf_event_mutex);
5945
5906 for_each_task_context_nr(ctxn) 5946 for_each_task_context_nr(ctxn)
5907 perf_event_exit_task_context(child, ctxn); 5947 perf_event_exit_task_context(child, ctxn);
5908} 5948}
@@ -6321,6 +6361,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
6321 6361
6322void __init perf_event_init(void) 6362void __init perf_event_init(void)
6323{ 6363{
6364 int ret;
6365
6324 perf_event_init_all_cpus(); 6366 perf_event_init_all_cpus();
6325 init_srcu_struct(&pmus_srcu); 6367 init_srcu_struct(&pmus_srcu);
6326 perf_pmu_register(&perf_swevent); 6368 perf_pmu_register(&perf_swevent);
@@ -6328,4 +6370,7 @@ void __init perf_event_init(void)
6328 perf_pmu_register(&perf_task_clock); 6370 perf_pmu_register(&perf_task_clock);
6329 perf_tp_register(); 6371 perf_tp_register();
6330 perf_cpu_notifier(perf_cpu_notify); 6372 perf_cpu_notifier(perf_cpu_notify);
6373
6374 ret = init_hw_breakpoint();
6375 WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
6331} 6376}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 042084157980..c380612273bf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1283,6 +1283,8 @@ void trace_dump_stack(void)
1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1283 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
1284} 1284}
1285 1285
1286static DEFINE_PER_CPU(int, user_stack_count);
1287
1286void 1288void
1287ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) 1289ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1288{ 1290{
@@ -1301,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1301 if (unlikely(in_nmi())) 1303 if (unlikely(in_nmi()))
1302 return; 1304 return;
1303 1305
1306 /*
1307 * prevent recursion, since the user stack tracing may
1308 * trigger other kernel events.
1309 */
1310 preempt_disable();
1311 if (__this_cpu_read(user_stack_count))
1312 goto out;
1313
1314 __this_cpu_inc(user_stack_count);
1315
1316
1317
1304 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1318 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1305 sizeof(*entry), flags, pc); 1319 sizeof(*entry), flags, pc);
1306 if (!event) 1320 if (!event)
@@ -1318,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1318 save_stack_trace_user(&trace); 1332 save_stack_trace_user(&trace);
1319 if (!filter_check_discard(call, entry, buffer, event)) 1333 if (!filter_check_discard(call, entry, buffer, event))
1320 ring_buffer_unlock_commit(buffer, event); 1334 ring_buffer_unlock_commit(buffer, event);
1335
1336 __this_cpu_dec(user_stack_count);
1337
1338 out:
1339 preempt_enable();
1321} 1340}
1322 1341
1323#ifdef UNUSED 1342#ifdef UNUSED
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 93bd2ff001fb..e2c2de201eec 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -697,17 +697,18 @@ static int __cmd_record(int argc, const char **argv)
697 if (err < 0) 697 if (err < 0)
698 err = event__synthesize_kernel_mmap(process_synthesized_event, 698 err = event__synthesize_kernel_mmap(process_synthesized_event,
699 session, machine, "_stext"); 699 session, machine, "_stext");
700 if (err < 0) { 700 if (err < 0)
701 pr_err("Couldn't record kernel reference relocation symbol.\n"); 701 pr_err("Couldn't record kernel reference relocation symbol\n"
702 return err; 702 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
703 } 703 "Check /proc/kallsyms permission or run as root.\n");
704 704
705 err = event__synthesize_modules(process_synthesized_event, 705 err = event__synthesize_modules(process_synthesized_event,
706 session, machine); 706 session, machine);
707 if (err < 0) { 707 if (err < 0)
708 pr_err("Couldn't record kernel reference relocation symbol.\n"); 708 pr_err("Couldn't record kernel module information.\n"
709 return err; 709 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
710 } 710 "Check /proc/modules permission or run as root.\n");
711
711 if (perf_guest) 712 if (perf_guest)
712 perf_session__process_machines(session, event__synthesize_guest_os); 713 perf_session__process_machines(session, event__synthesize_guest_os);
713 714
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b39f499e575a..0500895a45af 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
295{ 295{
296 struct rb_node **p = &self->rb_node; 296 struct rb_node **p = &self->rb_node;
297 struct rb_node *parent = NULL; 297 struct rb_node *parent = NULL;
298 struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s; 298 struct symbol_name_rb_node *symn, *s;
299
300 symn = container_of(sym, struct symbol_name_rb_node, sym);
299 301
300 while (*p != NULL) { 302 while (*p != NULL) {
301 parent = *p; 303 parent = *p;