aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/perf_event.h19
-rw-r--r--arch/x86/kernel/cpu/perf_event.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c216
-rw-r--r--arch/x86/oprofile/nmi_int.c6
-rw-r--r--arch/x86/oprofile/op_model_amd.c146
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/ring_buffer.h12
-rw-r--r--include/trace/events/irq.h54
-rw-r--r--kernel/kprobes.c7
-rw-r--r--kernel/perf_event.c94
-rw-r--r--kernel/softirq.c16
-rw-r--r--kernel/trace/ring_buffer.c335
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--tools/perf/Documentation/perf-list.txt17
-rw-r--r--tools/perf/Documentation/perf-probe.txt18
-rw-r--r--tools/perf/Documentation/perf-record.txt4
-rw-r--r--tools/perf/builtin-probe.c78
-rw-r--r--tools/perf/builtin-record.c8
-rw-r--r--tools/perf/builtin-trace.c17
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report2
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report2
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py58
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-record2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-report4
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report2
-rw-r--r--tools/perf/scripts/python/bin/sctop-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report2
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py21
-rw-r--r--tools/perf/scripts/python/futex-contention.py50
-rw-r--r--tools/perf/scripts/python/sctop.py9
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py21
-rw-r--r--tools/perf/scripts/python/syscall-counts.py5
-rw-r--r--tools/perf/util/debug.c4
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/map.h10
-rw-r--r--tools/perf/util/probe-event.c189
-rw-r--r--tools/perf/util/probe-event.h16
-rw-r--r--tools/perf/util/probe-finder.c645
-rw-r--r--tools/perf/util/probe-finder.h31
-rw-r--r--tools/perf/util/ui/browser.c1
48 files changed, 1501 insertions, 670 deletions
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 83c4bb1d917d..3ea3dc487047 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,7 @@
121#define MSR_AMD64_IBSDCLINAD 0xc0011038 121#define MSR_AMD64_IBSDCLINAD 0xc0011038
122#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 122#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
123#define MSR_AMD64_IBSCTL 0xc001103a 123#define MSR_AMD64_IBSCTL 0xc001103a
124#define MSR_AMD64_IBSBRTARGET 0xc001103b
124 125
125/* Fam 10h MSRs */ 126/* Fam 10h MSRs */
126#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 127#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 6e742cc4251b..550e26b1dbb3 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -111,17 +111,18 @@ union cpuid10_edx {
111#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) 111#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
112 112
113/* IbsFetchCtl bits/masks */ 113/* IbsFetchCtl bits/masks */
114#define IBS_FETCH_RAND_EN (1ULL<<57) 114#define IBS_FETCH_RAND_EN (1ULL<<57)
115#define IBS_FETCH_VAL (1ULL<<49) 115#define IBS_FETCH_VAL (1ULL<<49)
116#define IBS_FETCH_ENABLE (1ULL<<48) 116#define IBS_FETCH_ENABLE (1ULL<<48)
117#define IBS_FETCH_CNT 0xFFFF0000ULL 117#define IBS_FETCH_CNT 0xFFFF0000ULL
118#define IBS_FETCH_MAX_CNT 0x0000FFFFULL 118#define IBS_FETCH_MAX_CNT 0x0000FFFFULL
119 119
120/* IbsOpCtl bits */ 120/* IbsOpCtl bits */
121#define IBS_OP_CNT_CTL (1ULL<<19) 121#define IBS_OP_CNT_CTL (1ULL<<19)
122#define IBS_OP_VAL (1ULL<<18) 122#define IBS_OP_VAL (1ULL<<18)
123#define IBS_OP_ENABLE (1ULL<<17) 123#define IBS_OP_ENABLE (1ULL<<17)
124#define IBS_OP_MAX_CNT 0x0000FFFFULL 124#define IBS_OP_MAX_CNT 0x0000FFFFULL
125#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
125 126
126#ifdef CONFIG_PERF_EVENTS 127#ifdef CONFIG_PERF_EVENTS
127extern void init_hw_perf_events(void); 128extern void init_hw_perf_events(void);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c1e8c7a51164..ed6310183efb 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -237,6 +237,7 @@ struct x86_pmu {
237 * Intel DebugStore bits 237 * Intel DebugStore bits
238 */ 238 */
239 int bts, pebs; 239 int bts, pebs;
240 int bts_active, pebs_active;
240 int pebs_record_size; 241 int pebs_record_size;
241 void (*drain_pebs)(struct pt_regs *regs); 242 void (*drain_pebs)(struct pt_regs *regs);
242 struct event_constraint *pebs_constraints; 243 struct event_constraint *pebs_constraints;
@@ -380,7 +381,7 @@ static void release_pmc_hardware(void) {}
380 381
381#endif 382#endif
382 383
383static int reserve_ds_buffers(void); 384static void reserve_ds_buffers(void);
384static void release_ds_buffers(void); 385static void release_ds_buffers(void);
385 386
386static void hw_perf_event_destroy(struct perf_event *event) 387static void hw_perf_event_destroy(struct perf_event *event)
@@ -477,7 +478,7 @@ static int x86_setup_perfctr(struct perf_event *event)
477 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
478 (hwc->sample_period == 1)) { 479 (hwc->sample_period == 1)) {
479 /* BTS is not supported by this architecture. */ 480 /* BTS is not supported by this architecture. */
480 if (!x86_pmu.bts) 481 if (!x86_pmu.bts_active)
481 return -EOPNOTSUPP; 482 return -EOPNOTSUPP;
482 483
483 /* BTS is currently only allowed for user-mode. */ 484 /* BTS is currently only allowed for user-mode. */
@@ -496,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
496 int precise = 0; 497 int precise = 0;
497 498
498 /* Support for constant skid */ 499 /* Support for constant skid */
499 if (x86_pmu.pebs) 500 if (x86_pmu.pebs_active) {
500 precise++; 501 precise++;
501 502
502 /* Support for IP fixup */ 503 /* Support for IP fixup */
503 if (x86_pmu.lbr_nr) 504 if (x86_pmu.lbr_nr)
504 precise++; 505 precise++;
506 }
505 507
506 if (event->attr.precise_ip > precise) 508 if (event->attr.precise_ip > precise)
507 return -EOPNOTSUPP; 509 return -EOPNOTSUPP;
@@ -543,11 +545,8 @@ static int __x86_pmu_event_init(struct perf_event *event)
543 if (atomic_read(&active_events) == 0) { 545 if (atomic_read(&active_events) == 0) {
544 if (!reserve_pmc_hardware()) 546 if (!reserve_pmc_hardware())
545 err = -EBUSY; 547 err = -EBUSY;
546 else { 548 else
547 err = reserve_ds_buffers(); 549 reserve_ds_buffers();
548 if (err)
549 release_pmc_hardware();
550 }
551 } 550 }
552 if (!err) 551 if (!err)
553 atomic_inc(&active_events); 552 atomic_inc(&active_events);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4977f9c400e5..b7dcd9f2b8a0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -74,6 +74,107 @@ static void fini_debug_store_on_cpu(int cpu)
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75} 75}
76 76
77static int alloc_pebs_buffer(int cpu)
78{
79 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
80 int node = cpu_to_node(cpu);
81 int max, thresh = 1; /* always use a single PEBS record */
82 void *buffer;
83
84 if (!x86_pmu.pebs)
85 return 0;
86
87 buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
88 if (unlikely(!buffer))
89 return -ENOMEM;
90
91 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
92
93 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
94 ds->pebs_index = ds->pebs_buffer_base;
95 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
96 max * x86_pmu.pebs_record_size;
97
98 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
99 thresh * x86_pmu.pebs_record_size;
100
101 return 0;
102}
103
104static void release_pebs_buffer(int cpu)
105{
106 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
107
108 if (!ds || !x86_pmu.pebs)
109 return;
110
111 kfree((void *)(unsigned long)ds->pebs_buffer_base);
112 ds->pebs_buffer_base = 0;
113}
114
115static int alloc_bts_buffer(int cpu)
116{
117 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
118 int node = cpu_to_node(cpu);
119 int max, thresh;
120 void *buffer;
121
122 if (!x86_pmu.bts)
123 return 0;
124
125 buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
126 if (unlikely(!buffer))
127 return -ENOMEM;
128
129 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
130 thresh = max / 16;
131
132 ds->bts_buffer_base = (u64)(unsigned long)buffer;
133 ds->bts_index = ds->bts_buffer_base;
134 ds->bts_absolute_maximum = ds->bts_buffer_base +
135 max * BTS_RECORD_SIZE;
136 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
137 thresh * BTS_RECORD_SIZE;
138
139 return 0;
140}
141
142static void release_bts_buffer(int cpu)
143{
144 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
145
146 if (!ds || !x86_pmu.bts)
147 return;
148
149 kfree((void *)(unsigned long)ds->bts_buffer_base);
150 ds->bts_buffer_base = 0;
151}
152
153static int alloc_ds_buffer(int cpu)
154{
155 int node = cpu_to_node(cpu);
156 struct debug_store *ds;
157
158 ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
159 if (unlikely(!ds))
160 return -ENOMEM;
161
162 per_cpu(cpu_hw_events, cpu).ds = ds;
163
164 return 0;
165}
166
167static void release_ds_buffer(int cpu)
168{
169 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170
171 if (!ds)
172 return;
173
174 per_cpu(cpu_hw_events, cpu).ds = NULL;
175 kfree(ds);
176}
177
77static void release_ds_buffers(void) 178static void release_ds_buffers(void)
78{ 179{
79 int cpu; 180 int cpu;
@@ -82,93 +183,77 @@ static void release_ds_buffers(void)
82 return; 183 return;
83 184
84 get_online_cpus(); 185 get_online_cpus();
85
86 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu); 187 fini_debug_store_on_cpu(cpu);
88 188
89 for_each_possible_cpu(cpu) { 189 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 190 release_pebs_buffer(cpu);
91 191 release_bts_buffer(cpu);
92 if (!ds) 192 release_ds_buffer(cpu);
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 } 193 }
101
102 put_online_cpus(); 194 put_online_cpus();
103} 195}
104 196
105static int reserve_ds_buffers(void) 197static void reserve_ds_buffers(void)
106{ 198{
107 int cpu, err = 0; 199 int bts_err = 0, pebs_err = 0;
200 int cpu;
201
202 x86_pmu.bts_active = 0;
203 x86_pmu.pebs_active = 0;
108 204
109 if (!x86_pmu.bts && !x86_pmu.pebs) 205 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0; 206 return;
207
208 if (!x86_pmu.bts)
209 bts_err = 1;
210
211 if (!x86_pmu.pebs)
212 pebs_err = 1;
111 213
112 get_online_cpus(); 214 get_online_cpus();
113 215
114 for_each_possible_cpu(cpu) { 216 for_each_possible_cpu(cpu) {
115 struct debug_store *ds; 217 if (alloc_ds_buffer(cpu)) {
116 void *buffer; 218 bts_err = 1;
117 int max, thresh; 219 pebs_err = 1;
220 }
221
222 if (!bts_err && alloc_bts_buffer(cpu))
223 bts_err = 1;
118 224
119 err = -ENOMEM; 225 if (!pebs_err && alloc_pebs_buffer(cpu))
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 226 pebs_err = 1;
121 if (unlikely(!ds)) 227
228 if (bts_err && pebs_err)
122 break; 229 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds; 230 }
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140 231
141 if (x86_pmu.pebs) { 232 if (bts_err) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 233 for_each_possible_cpu(cpu)
143 if (unlikely(!buffer)) 234 release_bts_buffer(cpu);
144 break; 235 }
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158 236
159 err = 0; 237 if (pebs_err) {
238 for_each_possible_cpu(cpu)
239 release_pebs_buffer(cpu);
160 } 240 }
161 241
162 if (err) 242 if (bts_err && pebs_err) {
163 release_ds_buffers(); 243 for_each_possible_cpu(cpu)
164 else { 244 release_ds_buffer(cpu);
245 } else {
246 if (x86_pmu.bts && !bts_err)
247 x86_pmu.bts_active = 1;
248
249 if (x86_pmu.pebs && !pebs_err)
250 x86_pmu.pebs_active = 1;
251
165 for_each_online_cpu(cpu) 252 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu); 253 init_debug_store_on_cpu(cpu);
167 } 254 }
168 255
169 put_online_cpus(); 256 put_online_cpus();
170
171 return err;
172} 257}
173 258
174/* 259/*
@@ -233,7 +318,7 @@ static int intel_pmu_drain_bts_buffer(void)
233 if (!event) 318 if (!event)
234 return 0; 319 return 0;
235 320
236 if (!ds) 321 if (!x86_pmu.bts_active)
237 return 0; 322 return 0;
238 323
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 324 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
@@ -503,7 +588,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
503 struct pebs_record_core *at, *top; 588 struct pebs_record_core *at, *top;
504 int n; 589 int n;
505 590
506 if (!ds || !x86_pmu.pebs) 591 if (!x86_pmu.pebs_active)
507 return; 592 return;
508 593
509 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 594 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
@@ -545,7 +630,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
545 u64 status = 0; 630 u64 status = 0;
546 int bit, n; 631 int bit, n;
547 632
548 if (!ds || !x86_pmu.pebs) 633 if (!x86_pmu.pebs_active)
549 return; 634 return;
550 635
551 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 636 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
@@ -630,9 +715,8 @@ static void intel_ds_init(void)
630 715
631#else /* CONFIG_CPU_SUP_INTEL */ 716#else /* CONFIG_CPU_SUP_INTEL */
632 717
633static int reserve_ds_buffers(void) 718static void reserve_ds_buffers(void)
634{ 719{
635 return 0;
636} 720}
637 721
638static void release_ds_buffers(void) 722static void release_ds_buffers(void)
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index bd1489c3ce09..4e8baad36d37 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -726,6 +726,12 @@ int __init op_nmi_init(struct oprofile_operations *ops)
726 case 0x11: 726 case 0x11:
727 cpu_type = "x86-64/family11h"; 727 cpu_type = "x86-64/family11h";
728 break; 728 break;
729 case 0x12:
730 cpu_type = "x86-64/family12h";
731 break;
732 case 0x14:
733 cpu_type = "x86-64/family14h";
734 break;
729 default: 735 default:
730 return -ENODEV; 736 return -ENODEV;
731 } 737 }
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 42fb46f83883..a011bcc0f943 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -48,17 +48,24 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS];
48 48
49static u32 ibs_caps; 49static u32 ibs_caps;
50 50
51struct op_ibs_config { 51struct ibs_config {
52 unsigned long op_enabled; 52 unsigned long op_enabled;
53 unsigned long fetch_enabled; 53 unsigned long fetch_enabled;
54 unsigned long max_cnt_fetch; 54 unsigned long max_cnt_fetch;
55 unsigned long max_cnt_op; 55 unsigned long max_cnt_op;
56 unsigned long rand_en; 56 unsigned long rand_en;
57 unsigned long dispatched_ops; 57 unsigned long dispatched_ops;
58 unsigned long branch_target;
58}; 59};
59 60
60static struct op_ibs_config ibs_config; 61struct ibs_state {
61static u64 ibs_op_ctl; 62 u64 ibs_op_ctl;
63 int branch_target;
64 unsigned long sample_size;
65};
66
67static struct ibs_config ibs_config;
68static struct ibs_state ibs_state;
62 69
63/* 70/*
64 * IBS cpuid feature detection 71 * IBS cpuid feature detection
@@ -71,8 +78,16 @@ static u64 ibs_op_ctl;
71 * bit 0 is used to indicate the existence of IBS. 78 * bit 0 is used to indicate the existence of IBS.
72 */ 79 */
73#define IBS_CAPS_AVAIL (1U<<0) 80#define IBS_CAPS_AVAIL (1U<<0)
81#define IBS_CAPS_FETCHSAM (1U<<1)
82#define IBS_CAPS_OPSAM (1U<<2)
74#define IBS_CAPS_RDWROPCNT (1U<<3) 83#define IBS_CAPS_RDWROPCNT (1U<<3)
75#define IBS_CAPS_OPCNT (1U<<4) 84#define IBS_CAPS_OPCNT (1U<<4)
85#define IBS_CAPS_BRNTRGT (1U<<5)
86#define IBS_CAPS_OPCNTEXT (1U<<6)
87
88#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
89 | IBS_CAPS_FETCHSAM \
90 | IBS_CAPS_OPSAM)
76 91
77/* 92/*
78 * IBS APIC setup 93 * IBS APIC setup
@@ -99,12 +114,12 @@ static u32 get_ibs_caps(void)
99 /* check IBS cpuid feature flags */ 114 /* check IBS cpuid feature flags */
100 max_level = cpuid_eax(0x80000000); 115 max_level = cpuid_eax(0x80000000);
101 if (max_level < IBS_CPUID_FEATURES) 116 if (max_level < IBS_CPUID_FEATURES)
102 return IBS_CAPS_AVAIL; 117 return IBS_CAPS_DEFAULT;
103 118
104 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); 119 ibs_caps = cpuid_eax(IBS_CPUID_FEATURES);
105 if (!(ibs_caps & IBS_CAPS_AVAIL)) 120 if (!(ibs_caps & IBS_CAPS_AVAIL))
106 /* cpuid flags not valid */ 121 /* cpuid flags not valid */
107 return IBS_CAPS_AVAIL; 122 return IBS_CAPS_DEFAULT;
108 123
109 return ibs_caps; 124 return ibs_caps;
110} 125}
@@ -197,8 +212,8 @@ op_amd_handle_ibs(struct pt_regs * const regs,
197 rdmsrl(MSR_AMD64_IBSOPCTL, ctl); 212 rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
198 if (ctl & IBS_OP_VAL) { 213 if (ctl & IBS_OP_VAL) {
199 rdmsrl(MSR_AMD64_IBSOPRIP, val); 214 rdmsrl(MSR_AMD64_IBSOPRIP, val);
200 oprofile_write_reserve(&entry, regs, val, 215 oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
201 IBS_OP_CODE, IBS_OP_SIZE); 216 ibs_state.sample_size);
202 oprofile_add_data64(&entry, val); 217 oprofile_add_data64(&entry, val);
203 rdmsrl(MSR_AMD64_IBSOPDATA, val); 218 rdmsrl(MSR_AMD64_IBSOPDATA, val);
204 oprofile_add_data64(&entry, val); 219 oprofile_add_data64(&entry, val);
@@ -210,10 +225,14 @@ op_amd_handle_ibs(struct pt_regs * const regs,
210 oprofile_add_data64(&entry, val); 225 oprofile_add_data64(&entry, val);
211 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); 226 rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
212 oprofile_add_data64(&entry, val); 227 oprofile_add_data64(&entry, val);
228 if (ibs_state.branch_target) {
229 rdmsrl(MSR_AMD64_IBSBRTARGET, val);
230 oprofile_add_data(&entry, (unsigned long)val);
231 }
213 oprofile_write_commit(&entry); 232 oprofile_write_commit(&entry);
214 233
215 /* reenable the IRQ */ 234 /* reenable the IRQ */
216 ctl = op_amd_randomize_ibs_op(ibs_op_ctl); 235 ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
217 wrmsrl(MSR_AMD64_IBSOPCTL, ctl); 236 wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
218 } 237 }
219 } 238 }
@@ -226,21 +245,32 @@ static inline void op_amd_start_ibs(void)
226 if (!ibs_caps) 245 if (!ibs_caps)
227 return; 246 return;
228 247
248 memset(&ibs_state, 0, sizeof(ibs_state));
249
250 /*
251 * Note: Since the max count settings may out of range we
252 * write back the actual used values so that userland can read
253 * it.
254 */
255
229 if (ibs_config.fetch_enabled) { 256 if (ibs_config.fetch_enabled) {
230 val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; 257 val = ibs_config.max_cnt_fetch >> 4;
258 val = min(val, IBS_FETCH_MAX_CNT);
259 ibs_config.max_cnt_fetch = val << 4;
231 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; 260 val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
232 val |= IBS_FETCH_ENABLE; 261 val |= IBS_FETCH_ENABLE;
233 wrmsrl(MSR_AMD64_IBSFETCHCTL, val); 262 wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
234 } 263 }
235 264
236 if (ibs_config.op_enabled) { 265 if (ibs_config.op_enabled) {
237 ibs_op_ctl = ibs_config.max_cnt_op >> 4; 266 val = ibs_config.max_cnt_op >> 4;
238 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { 267 if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
239 /* 268 /*
240 * IbsOpCurCnt not supported. See 269 * IbsOpCurCnt not supported. See
241 * op_amd_randomize_ibs_op() for details. 270 * op_amd_randomize_ibs_op() for details.
242 */ 271 */
243 ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); 272 val = clamp(val, 0x0081ULL, 0xFF80ULL);
273 ibs_config.max_cnt_op = val << 4;
244 } else { 274 } else {
245 /* 275 /*
246 * The start value is randomized with a 276 * The start value is randomized with a
@@ -248,13 +278,24 @@ static inline void op_amd_start_ibs(void)
248 * with the half of the randomized range. Also 278 * with the half of the randomized range. Also
249 * avoid underflows. 279 * avoid underflows.
250 */ 280 */
251 ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, 281 val += IBS_RANDOM_MAXCNT_OFFSET;
252 IBS_OP_MAX_CNT); 282 if (ibs_caps & IBS_CAPS_OPCNTEXT)
283 val = min(val, IBS_OP_MAX_CNT_EXT);
284 else
285 val = min(val, IBS_OP_MAX_CNT);
286 ibs_config.max_cnt_op =
287 (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
288 }
289 val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
290 val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
291 val |= IBS_OP_ENABLE;
292 ibs_state.ibs_op_ctl = val;
293 ibs_state.sample_size = IBS_OP_SIZE;
294 if (ibs_config.branch_target) {
295 ibs_state.branch_target = 1;
296 ibs_state.sample_size++;
253 } 297 }
254 if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) 298 val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
255 ibs_op_ctl |= IBS_OP_CNT_CTL;
256 ibs_op_ctl |= IBS_OP_ENABLE;
257 val = op_amd_randomize_ibs_op(ibs_op_ctl);
258 wrmsrl(MSR_AMD64_IBSOPCTL, val); 299 wrmsrl(MSR_AMD64_IBSOPCTL, val);
259 } 300 }
260} 301}
@@ -281,29 +322,25 @@ static inline int eilvt_is_available(int offset)
281 322
282static inline int ibs_eilvt_valid(void) 323static inline int ibs_eilvt_valid(void)
283{ 324{
284 u64 val;
285 int offset; 325 int offset;
326 u64 val;
286 327
287 rdmsrl(MSR_AMD64_IBSCTL, val); 328 rdmsrl(MSR_AMD64_IBSCTL, val);
329 offset = val & IBSCTL_LVT_OFFSET_MASK;
330
288 if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 331 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
289 pr_err(FW_BUG "cpu %d, invalid IBS " 332 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
290 "interrupt offset %d (MSR%08X=0x%016llx)", 333 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
291 smp_processor_id(), offset,
292 MSR_AMD64_IBSCTL, val);
293 return 0; 334 return 0;
294 } 335 }
295 336
296 offset = val & IBSCTL_LVT_OFFSET_MASK; 337 if (!eilvt_is_available(offset)) {
297 338 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
298 if (eilvt_is_available(offset)) 339 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
299 return !0; 340 return 0;
300 341 }
301 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d "
302 "not available (MSR%08X=0x%016llx)",
303 smp_processor_id(), offset,
304 MSR_AMD64_IBSCTL, val);
305 342
306 return 0; 343 return 1;
307} 344}
308 345
309static inline int get_ibs_offset(void) 346static inline int get_ibs_offset(void)
@@ -630,28 +667,33 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root)
630 /* model specific files */ 667 /* model specific files */
631 668
632 /* setup some reasonable defaults */ 669 /* setup some reasonable defaults */
670 memset(&ibs_config, 0, sizeof(ibs_config));
633 ibs_config.max_cnt_fetch = 250000; 671 ibs_config.max_cnt_fetch = 250000;
634 ibs_config.fetch_enabled = 0;
635 ibs_config.max_cnt_op = 250000; 672 ibs_config.max_cnt_op = 250000;
636 ibs_config.op_enabled = 0; 673
637 ibs_config.dispatched_ops = 0; 674 if (ibs_caps & IBS_CAPS_FETCHSAM) {
638 675 dir = oprofilefs_mkdir(sb, root, "ibs_fetch");
639 dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); 676 oprofilefs_create_ulong(sb, dir, "enable",
640 oprofilefs_create_ulong(sb, dir, "enable", 677 &ibs_config.fetch_enabled);
641 &ibs_config.fetch_enabled); 678 oprofilefs_create_ulong(sb, dir, "max_count",
642 oprofilefs_create_ulong(sb, dir, "max_count", 679 &ibs_config.max_cnt_fetch);
643 &ibs_config.max_cnt_fetch); 680 oprofilefs_create_ulong(sb, dir, "rand_enable",
644 oprofilefs_create_ulong(sb, dir, "rand_enable", 681 &ibs_config.rand_en);
645 &ibs_config.rand_en); 682 }
646 683
647 dir = oprofilefs_mkdir(sb, root, "ibs_op"); 684 if (ibs_caps & IBS_CAPS_OPSAM) {
648 oprofilefs_create_ulong(sb, dir, "enable", 685 dir = oprofilefs_mkdir(sb, root, "ibs_op");
649 &ibs_config.op_enabled); 686 oprofilefs_create_ulong(sb, dir, "enable",
650 oprofilefs_create_ulong(sb, dir, "max_count", 687 &ibs_config.op_enabled);
651 &ibs_config.max_cnt_op); 688 oprofilefs_create_ulong(sb, dir, "max_count",
652 if (ibs_caps & IBS_CAPS_OPCNT) 689 &ibs_config.max_cnt_op);
653 oprofilefs_create_ulong(sb, dir, "dispatched_ops", 690 if (ibs_caps & IBS_CAPS_OPCNT)
654 &ibs_config.dispatched_ops); 691 oprofilefs_create_ulong(sb, dir, "dispatched_ops",
692 &ibs_config.dispatched_ops);
693 if (ibs_caps & IBS_CAPS_BRNTRGT)
694 oprofilefs_create_ulong(sb, dir, "branch_target",
695 &ibs_config.branch_target);
696 }
655 697
656 return 0; 698 return 0;
657} 699}
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 01b281646251..79d0c4f6d071 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -410,7 +410,7 @@ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
410extern void softirq_init(void); 410extern void softirq_init(void);
411static inline void __raise_softirq_irqoff(unsigned int nr) 411static inline void __raise_softirq_irqoff(unsigned int nr)
412{ 412{
413 trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); 413 trace_softirq_raise(nr);
414 or_softirq_pending(1UL << nr); 414 or_softirq_pending(1UL << nr);
415} 415}
416 416
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 25b4f686d918..8d3a2486544d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -62,18 +62,6 @@ enum ring_buffer_type {
62unsigned ring_buffer_event_length(struct ring_buffer_event *event); 62unsigned ring_buffer_event_length(struct ring_buffer_event *event);
63void *ring_buffer_event_data(struct ring_buffer_event *event); 63void *ring_buffer_event_data(struct ring_buffer_event *event);
64 64
65/**
66 * ring_buffer_event_time_delta - return the delta timestamp of the event
67 * @event: the event to get the delta timestamp of
68 *
69 * The delta timestamp is the 27 bit timestamp since the last event.
70 */
71static inline unsigned
72ring_buffer_event_time_delta(struct ring_buffer_event *event)
73{
74 return event->time_delta;
75}
76
77/* 65/*
78 * ring_buffer_discard_commit will remove an event that has not 66 * ring_buffer_discard_commit will remove an event that has not
79 * ben committed yet. If this is used, then ring_buffer_unlock_commit 67 * ben committed yet. If this is used, then ring_buffer_unlock_commit
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 6fa7cbab7d93..1c09820df585 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -86,76 +86,62 @@ TRACE_EVENT(irq_handler_exit,
86 86
87DECLARE_EVENT_CLASS(softirq, 87DECLARE_EVENT_CLASS(softirq,
88 88
89 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 89 TP_PROTO(unsigned int vec_nr),
90 90
91 TP_ARGS(h, vec), 91 TP_ARGS(vec_nr),
92 92
93 TP_STRUCT__entry( 93 TP_STRUCT__entry(
94 __field( int, vec ) 94 __field( unsigned int, vec )
95 ), 95 ),
96 96
97 TP_fast_assign( 97 TP_fast_assign(
98 if (vec) 98 __entry->vec = vec_nr;
99 __entry->vec = (int)(h - vec);
100 else
101 __entry->vec = (int)(long)h;
102 ), 99 ),
103 100
104 TP_printk("vec=%d [action=%s]", __entry->vec, 101 TP_printk("vec=%u [action=%s]", __entry->vec,
105 show_softirq_name(__entry->vec)) 102 show_softirq_name(__entry->vec))
106); 103);
107 104
108/** 105/**
109 * softirq_entry - called immediately before the softirq handler 106 * softirq_entry - called immediately before the softirq handler
110 * @h: pointer to struct softirq_action 107 * @vec_nr: softirq vector number
111 * @vec: pointer to first struct softirq_action in softirq_vec array
112 * 108 *
113 * The @h parameter, contains a pointer to the struct softirq_action 109 * When used in combination with the softirq_exit tracepoint
114 * which has a pointer to the action handler that is called. By subtracting 110 * we can determine the softirq handler runtine.
115 * the @vec pointer from the @h pointer, we can determine the softirq
116 * number. Also, when used in combination with the softirq_exit tracepoint
117 * we can determine the softirq latency.
118 */ 111 */
119DEFINE_EVENT(softirq, softirq_entry, 112DEFINE_EVENT(softirq, softirq_entry,
120 113
121 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 114 TP_PROTO(unsigned int vec_nr),
122 115
123 TP_ARGS(h, vec) 116 TP_ARGS(vec_nr)
124); 117);
125 118
126/** 119/**
127 * softirq_exit - called immediately after the softirq handler returns 120 * softirq_exit - called immediately after the softirq handler returns
128 * @h: pointer to struct softirq_action 121 * @vec_nr: softirq vector number
129 * @vec: pointer to first struct softirq_action in softirq_vec array
130 * 122 *
131 * The @h parameter contains a pointer to the struct softirq_action 123 * When used in combination with the softirq_entry tracepoint
132 * that has handled the softirq. By subtracting the @vec pointer from 124 * we can determine the softirq handler runtine.
133 * the @h pointer, we can determine the softirq number. Also, when used in
134 * combination with the softirq_entry tracepoint we can determine the softirq
135 * latency.
136 */ 125 */
137DEFINE_EVENT(softirq, softirq_exit, 126DEFINE_EVENT(softirq, softirq_exit,
138 127
139 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 128 TP_PROTO(unsigned int vec_nr),
140 129
141 TP_ARGS(h, vec) 130 TP_ARGS(vec_nr)
142); 131);
143 132
144/** 133/**
145 * softirq_raise - called immediately when a softirq is raised 134 * softirq_raise - called immediately when a softirq is raised
146 * @h: pointer to struct softirq_action 135 * @vec_nr: softirq vector number
147 * @vec: pointer to first struct softirq_action in softirq_vec array
148 * 136 *
149 * The @h parameter contains a pointer to the softirq vector number which is 137 * When used in combination with the softirq_entry tracepoint
150 * raised. @vec is NULL and it means @h includes vector number not 138 * we can determine the softirq raise to run latency.
151 * softirq_action. When used in combination with the softirq_entry tracepoint
152 * we can determine the softirq raise latency.
153 */ 139 */
154DEFINE_EVENT(softirq, softirq_raise, 140DEFINE_EVENT(softirq, softirq_raise,
155 141
156 TP_PROTO(struct softirq_action *h, struct softirq_action *vec), 142 TP_PROTO(unsigned int vec_nr),
157 143
158 TP_ARGS(h, vec) 144 TP_ARGS(vec_nr)
159); 145);
160 146
161#endif /* _TRACE_IRQ_H */ 147#endif /* _TRACE_IRQ_H */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 56a891914273..99865c33a60d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
74/* NOTE: change this value only with kprobe_mutex held */ 74/* NOTE: change this value only with kprobe_mutex held */
75static bool kprobes_all_disarmed; 75static bool kprobes_all_disarmed;
76 76
77static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 77/* This protects kprobe_table and optimizing_list */
78static DEFINE_MUTEX(kprobe_mutex);
78static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 79static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
79static struct { 80static struct {
80 spinlock_t lock ____cacheline_aligned_in_smp; 81 spinlock_t lock ____cacheline_aligned_in_smp;
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
595} 596}
596 597
597#ifdef CONFIG_SYSCTL 598#ifdef CONFIG_SYSCTL
599/* This should be called with kprobe_mutex locked */
598static void __kprobes optimize_all_kprobes(void) 600static void __kprobes optimize_all_kprobes(void)
599{ 601{
600 struct hlist_head *head; 602 struct hlist_head *head;
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
607 return; 609 return;
608 610
609 kprobes_allow_optimization = true; 611 kprobes_allow_optimization = true;
610 mutex_lock(&text_mutex);
611 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 612 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
612 head = &kprobe_table[i]; 613 head = &kprobe_table[i];
613 hlist_for_each_entry_rcu(p, node, head, hlist) 614 hlist_for_each_entry_rcu(p, node, head, hlist)
614 if (!kprobe_disabled(p)) 615 if (!kprobe_disabled(p))
615 optimize_kprobe(p); 616 optimize_kprobe(p);
616 } 617 }
617 mutex_unlock(&text_mutex);
618 printk(KERN_INFO "Kprobes globally optimized\n"); 618 printk(KERN_INFO "Kprobes globally optimized\n");
619} 619}
620 620
621/* This should be called with kprobe_mutex locked */
621static void __kprobes unoptimize_all_kprobes(void) 622static void __kprobes unoptimize_all_kprobes(void)
622{ 623{
623 struct hlist_head *head; 624 struct hlist_head *head;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f309e8014c78..517d827f4982 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
417 return event->cpu == -1 || event->cpu == smp_processor_id(); 417 return event->cpu == -1 || event->cpu == smp_processor_id();
418} 418}
419 419
420static int 420static void
421__event_sched_out(struct perf_event *event, 421event_sched_out(struct perf_event *event,
422 struct perf_cpu_context *cpuctx, 422 struct perf_cpu_context *cpuctx,
423 struct perf_event_context *ctx) 423 struct perf_event_context *ctx)
424{ 424{
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
437 } 437 }
438 438
439 if (event->state != PERF_EVENT_STATE_ACTIVE) 439 if (event->state != PERF_EVENT_STATE_ACTIVE)
440 return 0; 440 return;
441 441
442 event->state = PERF_EVENT_STATE_INACTIVE; 442 event->state = PERF_EVENT_STATE_INACTIVE;
443 if (event->pending_disable) { 443 if (event->pending_disable) {
444 event->pending_disable = 0; 444 event->pending_disable = 0;
445 event->state = PERF_EVENT_STATE_OFF; 445 event->state = PERF_EVENT_STATE_OFF;
446 } 446 }
447 event->tstamp_stopped = ctx->time;
447 event->pmu->del(event, 0); 448 event->pmu->del(event, 0);
448 event->oncpu = -1; 449 event->oncpu = -1;
449 450
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
452 ctx->nr_active--; 453 ctx->nr_active--;
453 if (event->attr.exclusive || !cpuctx->active_oncpu) 454 if (event->attr.exclusive || !cpuctx->active_oncpu)
454 cpuctx->exclusive = 0; 455 cpuctx->exclusive = 0;
455 return 1;
456}
457
458static void
459event_sched_out(struct perf_event *event,
460 struct perf_cpu_context *cpuctx,
461 struct perf_event_context *ctx)
462{
463 int ret;
464
465 ret = __event_sched_out(event, cpuctx, ctx);
466 if (ret)
467 event->tstamp_stopped = ctx->time;
468} 456}
469 457
470static void 458static void
@@ -664,7 +652,7 @@ retry:
664} 652}
665 653
666static int 654static int
667__event_sched_in(struct perf_event *event, 655event_sched_in(struct perf_event *event,
668 struct perf_cpu_context *cpuctx, 656 struct perf_cpu_context *cpuctx,
669 struct perf_event_context *ctx) 657 struct perf_event_context *ctx)
670{ 658{
@@ -684,6 +672,8 @@ __event_sched_in(struct perf_event *event,
684 return -EAGAIN; 672 return -EAGAIN;
685 } 673 }
686 674
675 event->tstamp_running += ctx->time - event->tstamp_stopped;
676
687 if (!is_software_event(event)) 677 if (!is_software_event(event))
688 cpuctx->active_oncpu++; 678 cpuctx->active_oncpu++;
689 ctx->nr_active++; 679 ctx->nr_active++;
@@ -694,35 +684,6 @@ __event_sched_in(struct perf_event *event,
694 return 0; 684 return 0;
695} 685}
696 686
697static inline int
698event_sched_in(struct perf_event *event,
699 struct perf_cpu_context *cpuctx,
700 struct perf_event_context *ctx)
701{
702 int ret = __event_sched_in(event, cpuctx, ctx);
703 if (ret)
704 return ret;
705 event->tstamp_running += ctx->time - event->tstamp_stopped;
706 return 0;
707}
708
709static void
710group_commit_event_sched_in(struct perf_event *group_event,
711 struct perf_cpu_context *cpuctx,
712 struct perf_event_context *ctx)
713{
714 struct perf_event *event;
715 u64 now = ctx->time;
716
717 group_event->tstamp_running += now - group_event->tstamp_stopped;
718 /*
719 * Schedule in siblings as one group (if any):
720 */
721 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
722 event->tstamp_running += now - event->tstamp_stopped;
723 }
724}
725
726static int 687static int
727group_sched_in(struct perf_event *group_event, 688group_sched_in(struct perf_event *group_event,
728 struct perf_cpu_context *cpuctx, 689 struct perf_cpu_context *cpuctx,
@@ -730,19 +691,15 @@ group_sched_in(struct perf_event *group_event,
730{ 691{
731 struct perf_event *event, *partial_group = NULL; 692 struct perf_event *event, *partial_group = NULL;
732 struct pmu *pmu = group_event->pmu; 693 struct pmu *pmu = group_event->pmu;
694 u64 now = ctx->time;
695 bool simulate = false;
733 696
734 if (group_event->state == PERF_EVENT_STATE_OFF) 697 if (group_event->state == PERF_EVENT_STATE_OFF)
735 return 0; 698 return 0;
736 699
737 pmu->start_txn(pmu); 700 pmu->start_txn(pmu);
738 701
739 /* 702 if (event_sched_in(group_event, cpuctx, ctx)) {
740 * use __event_sched_in() to delay updating tstamp_running
741 * until the transaction is committed. In case of failure
742 * we will keep an unmodified tstamp_running which is a
743 * requirement to get correct timing information
744 */
745 if (__event_sched_in(group_event, cpuctx, ctx)) {
746 pmu->cancel_txn(pmu); 703 pmu->cancel_txn(pmu);
747 return -EAGAIN; 704 return -EAGAIN;
748 } 705 }
@@ -751,31 +708,42 @@ group_sched_in(struct perf_event *group_event,
751 * Schedule in siblings as one group (if any): 708 * Schedule in siblings as one group (if any):
752 */ 709 */
753 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 710 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
754 if (__event_sched_in(event, cpuctx, ctx)) { 711 if (event_sched_in(event, cpuctx, ctx)) {
755 partial_group = event; 712 partial_group = event;
756 goto group_error; 713 goto group_error;
757 } 714 }
758 } 715 }
759 716
760 if (!pmu->commit_txn(pmu)) { 717 if (!pmu->commit_txn(pmu))
761 /* commit tstamp_running */
762 group_commit_event_sched_in(group_event, cpuctx, ctx);
763 return 0; 718 return 0;
764 } 719
765group_error: 720group_error:
766 /* 721 /*
767 * Groups can be scheduled in as one unit only, so undo any 722 * Groups can be scheduled in as one unit only, so undo any
768 * partial group before returning: 723 * partial group before returning:
724 * The events up to the failed event are scheduled out normally,
725 * tstamp_stopped will be updated.
769 * 726 *
770 * use __event_sched_out() to avoid updating tstamp_stopped 727 * The failed events and the remaining siblings need to have
771 * because the event never actually ran 728 * their timings updated as if they had gone thru event_sched_in()
729 * and event_sched_out(). This is required to get consistent timings
730 * across the group. This also takes care of the case where the group
731 * could never be scheduled by ensuring tstamp_stopped is set to mark
732 * the time the event was actually stopped, such that time delta
733 * calculation in update_event_times() is correct.
772 */ 734 */
773 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 735 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
774 if (event == partial_group) 736 if (event == partial_group)
775 break; 737 simulate = true;
776 __event_sched_out(event, cpuctx, ctx); 738
739 if (simulate) {
740 event->tstamp_running += now - event->tstamp_stopped;
741 event->tstamp_stopped = now;
742 } else {
743 event_sched_out(event, cpuctx, ctx);
744 }
777 } 745 }
778 __event_sched_out(group_event, cpuctx, ctx); 746 event_sched_out(group_event, cpuctx, ctx);
779 747
780 pmu->cancel_txn(pmu); 748 pmu->cancel_txn(pmu);
781 749
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f02a9dfa19bc..18f4be0d5fe0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -229,18 +229,20 @@ restart:
229 229
230 do { 230 do {
231 if (pending & 1) { 231 if (pending & 1) {
232 unsigned int vec_nr = h - softirq_vec;
232 int prev_count = preempt_count(); 233 int prev_count = preempt_count();
233 kstat_incr_softirqs_this_cpu(h - softirq_vec);
234 234
235 trace_softirq_entry(h, softirq_vec); 235 kstat_incr_softirqs_this_cpu(vec_nr);
236
237 trace_softirq_entry(vec_nr);
236 h->action(h); 238 h->action(h);
237 trace_softirq_exit(h, softirq_vec); 239 trace_softirq_exit(vec_nr);
238 if (unlikely(prev_count != preempt_count())) { 240 if (unlikely(prev_count != preempt_count())) {
239 printk(KERN_ERR "huh, entered softirq %td %s %p" 241 printk(KERN_ERR "huh, entered softirq %u %s %p"
240 "with preempt_count %08x," 242 "with preempt_count %08x,"
241 " exited with %08x?\n", h - softirq_vec, 243 " exited with %08x?\n", vec_nr,
242 softirq_to_name[h - softirq_vec], 244 softirq_to_name[vec_nr], h->action,
243 h->action, prev_count, preempt_count()); 245 prev_count, preempt_count());
244 preempt_count() = prev_count; 246 preempt_count() = prev_count;
245 } 247 }
246 248
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c3dab054d18e..9ed509a015d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
225}; 225};
226 226
227#define skip_time_extend(event) \
228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
229
227static inline int rb_null_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
228{ 231{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
249} 252}
250 253
251/* inline for ring buffer fast paths */ 254/*
252static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
254{ 261{
255 switch (event->type_len) { 262 switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 281 return 0;
275} 282}
276 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
277/** 301/**
278 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
280 */ 310 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 312{
283 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 320 return length;
286 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 329static void *
295rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
296{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 336 if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
404/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 441/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
405#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 442#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
406 443
407/* Max number of timestamps that can fit on a page */
408#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
409
410int ring_buffer_print_page_header(struct trace_seq *s) 444int ring_buffer_print_page_header(struct trace_seq *s)
411{ 445{
412 struct buffer_data_page field; 446 struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1580 iter->head = 0;
1547} 1581}
1548 1582
1583/* Slow path, do not inline */
1584static noinline struct ring_buffer_event *
1585rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1586{
1587 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1588
1589 /* Not the first event on the page? */
1590 if (rb_event_index(event)) {
1591 event->time_delta = delta & TS_MASK;
1592 event->array[0] = delta >> TS_SHIFT;
1593 } else {
1594 /* nope, just zero it */
1595 event->time_delta = 0;
1596 event->array[0] = 0;
1597 }
1598
1599 return skip_time_extend(event);
1600}
1601
1549/** 1602/**
1550 * ring_buffer_update_event - update event type and data 1603 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1604 * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1611 * data field.
1559 */ 1612 */
1560static void 1613static void
1561rb_update_event(struct ring_buffer_event *event, 1614rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1615 struct ring_buffer_event *event, unsigned length,
1616 int add_timestamp, u64 delta)
1563{ 1617{
1564 event->type_len = type; 1618 /* Only a commit updates the timestamp */
1565 1619 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1620 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1621
1573 case 0: 1622 /*
1574 length -= RB_EVNT_HDR_SIZE; 1623 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1624 * add it to the start of the resevered space.
1576 event->array[0] = length; 1625 */
1577 else 1626 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1627 event = rb_add_time_stamp(event, delta);
1579 break; 1628 length -= RB_LEN_TIME_EXTEND;
1580 default: 1629 delta = 0;
1581 BUG();
1582 } 1630 }
1631
1632 event->time_delta = delta;
1633 length -= RB_EVNT_HDR_SIZE;
1634 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1635 event->type_len = 0;
1636 event->array[0] = length;
1637 } else
1638 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1639}
1584 1640
1585/* 1641/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1823 local_sub(length, &tail_page->write); 1879 local_sub(length, &tail_page->write);
1824} 1880}
1825 1881
1826static struct ring_buffer_event * 1882/*
1883 * This is the slow path, force gcc not to inline it.
1884 */
1885static noinline struct ring_buffer_event *
1827rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1886rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1828 unsigned long length, unsigned long tail, 1887 unsigned long length, unsigned long tail,
1829 struct buffer_page *tail_page, u64 *ts) 1888 struct buffer_page *tail_page, u64 ts)
1830{ 1889{
1831 struct buffer_page *commit_page = cpu_buffer->commit_page; 1890 struct buffer_page *commit_page = cpu_buffer->commit_page;
1832 struct ring_buffer *buffer = cpu_buffer->buffer; 1891 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1909 * Nested commits always have zero deltas, so 1968 * Nested commits always have zero deltas, so
1910 * just reread the time stamp 1969 * just reread the time stamp
1911 */ 1970 */
1912 *ts = rb_time_stamp(buffer); 1971 ts = rb_time_stamp(buffer);
1913 next_page->page->time_stamp = *ts; 1972 next_page->page->time_stamp = ts;
1914 } 1973 }
1915 1974
1916 out_again: 1975 out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1929 1988
1930static struct ring_buffer_event * 1989static struct ring_buffer_event *
1931__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1990__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1932 unsigned type, unsigned long length, u64 *ts) 1991 unsigned long length, u64 ts,
1992 u64 delta, int add_timestamp)
1933{ 1993{
1934 struct buffer_page *tail_page; 1994 struct buffer_page *tail_page;
1935 struct ring_buffer_event *event; 1995 struct ring_buffer_event *event;
1936 unsigned long tail, write; 1996 unsigned long tail, write;
1937 1997
1998 /*
1999 * If the time delta since the last event is too big to
2000 * hold in the time field of the event, then we append a
2001 * TIME EXTEND event ahead of the data event.
2002 */
2003 if (unlikely(add_timestamp))
2004 length += RB_LEN_TIME_EXTEND;
2005
1938 tail_page = cpu_buffer->tail_page; 2006 tail_page = cpu_buffer->tail_page;
1939 write = local_add_return(length, &tail_page->write); 2007 write = local_add_return(length, &tail_page->write);
1940 2008
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1943 tail = write - length; 2011 tail = write - length;
1944 2012
1945 /* See if we shot pass the end of this buffer page */ 2013 /* See if we shot pass the end of this buffer page */
1946 if (write > BUF_PAGE_SIZE) 2014 if (unlikely(write > BUF_PAGE_SIZE))
1947 return rb_move_tail(cpu_buffer, length, tail, 2015 return rb_move_tail(cpu_buffer, length, tail,
1948 tail_page, ts); 2016 tail_page, ts);
1949 2017
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1951 2019
1952 event = __rb_page_index(tail_page, tail); 2020 event = __rb_page_index(tail_page, tail);
1953 kmemcheck_annotate_bitfield(event, bitfield); 2021 kmemcheck_annotate_bitfield(event, bitfield);
1954 rb_update_event(event, type, length); 2022 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1955 2023
1956 /* The passed in type is zero for DATA */ 2024 local_inc(&tail_page->entries);
1957 if (likely(!type))
1958 local_inc(&tail_page->entries);
1959 2025
1960 /* 2026 /*
1961 * If this is the first commit on the page, then update 2027 * If this is the first commit on the page, then update
1962 * its timestamp. 2028 * its timestamp.
1963 */ 2029 */
1964 if (!tail) 2030 if (!tail)
1965 tail_page->page->time_stamp = *ts; 2031 tail_page->page->time_stamp = ts;
1966 2032
1967 return event; 2033 return event;
1968} 2034}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1977 unsigned long addr; 2043 unsigned long addr;
1978 2044
1979 new_index = rb_event_index(event); 2045 new_index = rb_event_index(event);
1980 old_index = new_index + rb_event_length(event); 2046 old_index = new_index + rb_event_ts_length(event);
1981 addr = (unsigned long)event; 2047 addr = (unsigned long)event;
1982 addr &= PAGE_MASK; 2048 addr &= PAGE_MASK;
1983 2049
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2003 return 0; 2069 return 0;
2004} 2070}
2005 2071
2006static int
2007rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2008 u64 *ts, u64 *delta)
2009{
2010 struct ring_buffer_event *event;
2011 int ret;
2012
2013 WARN_ONCE(*delta > (1ULL << 59),
2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2015 (unsigned long long)*delta,
2016 (unsigned long long)*ts,
2017 (unsigned long long)cpu_buffer->write_stamp);
2018
2019 /*
2020 * The delta is too big, we to add a
2021 * new timestamp.
2022 */
2023 event = __rb_reserve_next(cpu_buffer,
2024 RINGBUF_TYPE_TIME_EXTEND,
2025 RB_LEN_TIME_EXTEND,
2026 ts);
2027 if (!event)
2028 return -EBUSY;
2029
2030 if (PTR_ERR(event) == -EAGAIN)
2031 return -EAGAIN;
2032
2033 /* Only a commited time event can update the write stamp */
2034 if (rb_event_is_commit(cpu_buffer, event)) {
2035 /*
2036 * If this is the first on the page, then it was
2037 * updated with the page itself. Try to discard it
2038 * and if we can't just make it zero.
2039 */
2040 if (rb_event_index(event)) {
2041 event->time_delta = *delta & TS_MASK;
2042 event->array[0] = *delta >> TS_SHIFT;
2043 } else {
2044 /* try to discard, since we do not need this */
2045 if (!rb_try_to_discard(cpu_buffer, event)) {
2046 /* nope, just zero it */
2047 event->time_delta = 0;
2048 event->array[0] = 0;
2049 }
2050 }
2051 cpu_buffer->write_stamp = *ts;
2052 /* let the caller know this was the commit */
2053 ret = 1;
2054 } else {
2055 /* Try to discard the event */
2056 if (!rb_try_to_discard(cpu_buffer, event)) {
2057 /* Darn, this is just wasted space */
2058 event->time_delta = 0;
2059 event->array[0] = 0;
2060 }
2061 ret = 0;
2062 }
2063
2064 *delta = 0;
2065
2066 return ret;
2067}
2068
2069static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2072static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2070{ 2073{
2071 local_inc(&cpu_buffer->committing); 2074 local_inc(&cpu_buffer->committing);
2072 local_inc(&cpu_buffer->commits); 2075 local_inc(&cpu_buffer->commits);
2073} 2076}
2074 2077
2075static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2078static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2076{ 2079{
2077 unsigned long commits; 2080 unsigned long commits;
2078 2081
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2110 unsigned long length) 2113 unsigned long length)
2111{ 2114{
2112 struct ring_buffer_event *event; 2115 struct ring_buffer_event *event;
2113 u64 ts, delta = 0; 2116 u64 ts, delta;
2114 int commit = 0;
2115 int nr_loops = 0; 2117 int nr_loops = 0;
2118 int add_timestamp;
2119 u64 diff;
2116 2120
2117 rb_start_commit(cpu_buffer); 2121 rb_start_commit(cpu_buffer);
2118 2122
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2133 2137
2134 length = rb_calculate_event_length(length); 2138 length = rb_calculate_event_length(length);
2135 again: 2139 again:
2140 add_timestamp = 0;
2141 delta = 0;
2142
2136 /* 2143 /*
2137 * We allow for interrupts to reenter here and do a trace. 2144 * We allow for interrupts to reenter here and do a trace.
2138 * If one does, it will cause this original code to loop 2145 * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2146 goto out_fail; 2153 goto out_fail;
2147 2154
2148 ts = rb_time_stamp(cpu_buffer->buffer); 2155 ts = rb_time_stamp(cpu_buffer->buffer);
2156 diff = ts - cpu_buffer->write_stamp;
2149 2157
2150 /* 2158 /* make sure this diff is calculated here */
2151 * Only the first commit can update the timestamp. 2159 barrier();
2152 * Yes there is a race here. If an interrupt comes in
2153 * just after the conditional and it traces too, then it
2154 * will also check the deltas. More than one timestamp may
2155 * also be made. But only the entry that did the actual
2156 * commit will be something other than zero.
2157 */
2158 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2159 rb_page_write(cpu_buffer->tail_page) ==
2160 rb_commit_index(cpu_buffer))) {
2161 u64 diff;
2162
2163 diff = ts - cpu_buffer->write_stamp;
2164
2165 /* make sure this diff is calculated here */
2166 barrier();
2167
2168 /* Did the write stamp get updated already? */
2169 if (unlikely(ts < cpu_buffer->write_stamp))
2170 goto get_event;
2171 2160
2161 /* Did the write stamp get updated already? */
2162 if (likely(ts >= cpu_buffer->write_stamp)) {
2172 delta = diff; 2163 delta = diff;
2173 if (unlikely(test_time_stamp(delta))) { 2164 if (unlikely(test_time_stamp(delta))) {
2174 2165 WARN_ONCE(delta > (1ULL << 59),
2175 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2176 if (commit == -EBUSY) 2167 (unsigned long long)delta,
2177 goto out_fail; 2168 (unsigned long long)ts,
2178 2169 (unsigned long long)cpu_buffer->write_stamp);
2179 if (commit == -EAGAIN) 2170 add_timestamp = 1;
2180 goto again;
2181
2182 RB_WARN_ON(cpu_buffer, commit < 0);
2183 } 2171 }
2184 } 2172 }
2185 2173
2186 get_event: 2174 event = __rb_reserve_next(cpu_buffer, length, ts,
2187 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2175 delta, add_timestamp);
2188 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2176 if (unlikely(PTR_ERR(event) == -EAGAIN))
2189 goto again; 2177 goto again;
2190 2178
2191 if (!event) 2179 if (!event)
2192 goto out_fail; 2180 goto out_fail;
2193 2181
2194 if (!rb_event_is_commit(cpu_buffer, event))
2195 delta = 0;
2196
2197 event->time_delta = delta;
2198
2199 return event; 2182 return event;
2200 2183
2201 out_fail: 2184 out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2207 2190
2208#define TRACE_RECURSIVE_DEPTH 16 2191#define TRACE_RECURSIVE_DEPTH 16
2209 2192
2210static int trace_recursive_lock(void) 2193/* Keep this code out of the fast path cache */
2194static noinline void trace_recursive_fail(void)
2211{ 2195{
2212 current->trace_recursion++;
2213
2214 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2215 return 0;
2216
2217 /* Disable all tracing before we do anything else */ 2196 /* Disable all tracing before we do anything else */
2218 tracing_off_permanent(); 2197 tracing_off_permanent();
2219 2198
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
2225 in_nmi()); 2204 in_nmi());
2226 2205
2227 WARN_ON_ONCE(1); 2206 WARN_ON_ONCE(1);
2207}
2208
2209static inline int trace_recursive_lock(void)
2210{
2211 current->trace_recursion++;
2212
2213 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2214 return 0;
2215
2216 trace_recursive_fail();
2217
2228 return -1; 2218 return -1;
2229} 2219}
2230 2220
2231static void trace_recursive_unlock(void) 2221static inline void trace_recursive_unlock(void)
2232{ 2222{
2233 WARN_ON_ONCE(!current->trace_recursion); 2223 WARN_ON_ONCE(!current->trace_recursion);
2234 2224
@@ -2308,12 +2298,28 @@ static void
2308rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2298rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2309 struct ring_buffer_event *event) 2299 struct ring_buffer_event *event)
2310{ 2300{
2301 u64 delta;
2302
2311 /* 2303 /*
2312 * The event first in the commit queue updates the 2304 * The event first in the commit queue updates the
2313 * time stamp. 2305 * time stamp.
2314 */ 2306 */
2315 if (rb_event_is_commit(cpu_buffer, event)) 2307 if (rb_event_is_commit(cpu_buffer, event)) {
2316 cpu_buffer->write_stamp += event->time_delta; 2308 /*
2309 * A commit event that is first on a page
2310 * updates the write timestamp with the page stamp
2311 */
2312 if (!rb_event_index(event))
2313 cpu_buffer->write_stamp =
2314 cpu_buffer->commit_page->page->time_stamp;
2315 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2316 delta = event->array[0];
2317 delta <<= TS_SHIFT;
2318 delta += event->time_delta;
2319 cpu_buffer->write_stamp += delta;
2320 } else
2321 cpu_buffer->write_stamp += event->time_delta;
2322 }
2317} 2323}
2318 2324
2319static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2325static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2353 2359
2354static inline void rb_event_discard(struct ring_buffer_event *event) 2360static inline void rb_event_discard(struct ring_buffer_event *event)
2355{ 2361{
2362 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2363 event = skip_time_extend(event);
2364
2356 /* array[0] holds the actual length for the discarded event */ 2365 /* array[0] holds the actual length for the discarded event */
2357 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2366 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2358 event->type_len = RINGBUF_TYPE_PADDING; 2367 event->type_len = RINGBUF_TYPE_PADDING;
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3049 3058
3050 again: 3059 again:
3051 /* 3060 /*
3052 * We repeat when a timestamp is encountered. It is possible 3061 * We repeat when a time extend is encountered.
3053 * to get multiple timestamps from an interrupt entering just 3062 * Since the time extend is always attached to a data event,
3054 * as one timestamp is about to be written, or from discarded 3063 * we should never loop more than once.
3055 * commits. The most that we can have is the number on a single page. 3064 * (We never hit the following condition more than twice).
3056 */ 3065 */
3057 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3066 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3058 return NULL; 3067 return NULL;
3059 3068
3060 reader = rb_get_reader_page(cpu_buffer); 3069 reader = rb_get_reader_page(cpu_buffer);
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3130 return NULL; 3139 return NULL;
3131 3140
3132 /* 3141 /*
3133 * We repeat when a timestamp is encountered. 3142 * We repeat when a time extend is encountered.
3134 * We can get multiple timestamps by nested interrupts or also 3143 * Since the time extend is always attached to a data event,
3135 * if filtering is on (discarding commits). Since discarding 3144 * we should never loop more than once.
3136 * commits can be frequent we can get a lot of timestamps. 3145 * (We never hit the following condition more than twice).
3137 * But we limit them by not adding timestamps if they begin
3138 * at the start of a page.
3139 */ 3146 */
3140 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3147 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3141 return NULL; 3148 return NULL;
3142 3149
3143 if (rb_per_cpu_empty(cpu_buffer)) 3150 if (rb_per_cpu_empty(cpu_buffer))
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3835 if (len > (commit - read)) 3842 if (len > (commit - read))
3836 len = (commit - read); 3843 len = (commit - read);
3837 3844
3838 size = rb_event_length(event); 3845 /* Always keep the time extend and data together */
3846 size = rb_event_ts_length(event);
3839 3847
3840 if (len < size) 3848 if (len < size)
3841 goto out_unlock; 3849 goto out_unlock;
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3857 break; 3865 break;
3858 3866
3859 event = rb_reader_event(cpu_buffer); 3867 event = rb_reader_event(cpu_buffer);
3860 size = rb_event_length(event); 3868 /* Always keep the time extend and data together */
3869 size = rb_event_ts_length(event);
3861 } while (len > size); 3870 } while (len > size);
3862 3871
3863 /* update bpage */ 3872 /* update bpage */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 001bcd2ccf4a..82d9b8106cd0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3996{ 3996{
3997 struct dentry *d_percpu = tracing_dentry_percpu(); 3997 struct dentry *d_percpu = tracing_dentry_percpu();
3998 struct dentry *d_cpu; 3998 struct dentry *d_cpu;
3999 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 3999 char cpu_dir[30]; /* 30 characters should be more than enough */
4000 char cpu_dir[7];
4001 4000
4002 if (cpu > 999 || cpu < 0) 4001 snprintf(cpu_dir, 30, "cpu%ld", cpu);
4003 return;
4004
4005 sprintf(cpu_dir, "cpu%ld", cpu);
4006 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4002 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4007 if (!d_cpu) { 4003 if (!d_cpu) {
4008 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4004 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 43e3dd284b90..399751befeed 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -15,6 +15,23 @@ DESCRIPTION
15This command displays the symbolic event types which can be selected in the 15This command displays the symbolic event types which can be selected in the
16various perf commands with the -e option. 16various perf commands with the -e option.
17 17
18EVENT MODIFIERS
19---------------
20
21Events can optionally have a modifer by appending a colon and one or
22more modifiers. Modifiers allow the user to restrict when events are
23counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor.
24
25The 'p' modifier can be used for specifying how precise the instruction
26address should be. The 'p' modifier is currently only implemented for
27Intel PEBS and can be specified multiple times:
28 0 - SAMPLE_IP can have arbitrary skid
29 1 - SAMPLE_IP must have constant skid
30 2 - SAMPLE_IP requested to have 0 skid
31 3 - SAMPLE_IP must have 0 skid
32
33The PEBS implementation now supports up to 2.
34
18RAW HARDWARE EVENT DESCRIPTOR 35RAW HARDWARE EVENT DESCRIPTOR
19----------------------------- 36-----------------------------
20Even when an event is not available in a symbolic form within perf right now, 37Even when an event is not available in a symbolic form within perf right now,
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 27d52dae5a43..62de1b7f4e76 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -16,7 +16,9 @@ or
16or 16or
17'perf probe' --list 17'perf probe' --list
18or 18or
19'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]' 19'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
20or
21'perf probe' [options] --vars='PROBEPOINT'
20 22
21DESCRIPTION 23DESCRIPTION
22----------- 24-----------
@@ -31,6 +33,11 @@ OPTIONS
31--vmlinux=PATH:: 33--vmlinux=PATH::
32 Specify vmlinux path which has debuginfo (Dwarf binary). 34 Specify vmlinux path which has debuginfo (Dwarf binary).
33 35
36-m::
37--module=MODNAME::
38 Specify module name in which perf-probe searches probe points
39 or lines.
40
34-s:: 41-s::
35--source=PATH:: 42--source=PATH::
36 Specify path to kernel source. 43 Specify path to kernel source.
@@ -57,6 +64,15 @@ OPTIONS
57 Show source code lines which can be probed. This needs an argument 64 Show source code lines which can be probed. This needs an argument
58 which specifies a range of the source code. (see LINE SYNTAX for detail) 65 which specifies a range of the source code. (see LINE SYNTAX for detail)
59 66
67-V::
68--vars=::
69 Show available local variables at given probe point. The argument
70 syntax is same as PROBE SYNTAX, but NO ARGs.
71
72--externs::
73 (Only for --vars) Show external defined variables in addition to local
74 variables.
75
60-f:: 76-f::
61--force:: 77--force::
62 Forcibly add events with existing name. 78 Forcibly add events with existing name.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 3ee27dccfde9..a91f9f9e6e5c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -83,6 +83,10 @@ OPTIONS
83--call-graph:: 83--call-graph::
84 Do call-graph (stack chain/backtrace) recording. 84 Do call-graph (stack chain/backtrace) recording.
85 85
86-q::
87--quiet::
88 Don't print any message, useful for scripting.
89
86-v:: 90-v::
87--verbose:: 91--verbose::
88 Be more verbose (show counter open errors, etc). 92 Be more verbose (show counter open errors, etc).
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 199d5e19554f..2e000c068cc5 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -50,14 +50,17 @@ static struct {
50 bool list_events; 50 bool list_events;
51 bool force_add; 51 bool force_add;
52 bool show_lines; 52 bool show_lines;
53 bool show_vars;
54 bool show_ext_vars;
55 bool mod_events;
53 int nevents; 56 int nevents;
54 struct perf_probe_event events[MAX_PROBES]; 57 struct perf_probe_event events[MAX_PROBES];
55 struct strlist *dellist; 58 struct strlist *dellist;
56 struct line_range line_range; 59 struct line_range line_range;
60 const char *target_module;
57 int max_probe_points; 61 int max_probe_points;
58} params; 62} params;
59 63
60
61/* Parse an event definition. Note that any error must die. */ 64/* Parse an event definition. Note that any error must die. */
62static int parse_probe_event(const char *str) 65static int parse_probe_event(const char *str)
63{ 66{
@@ -92,6 +95,7 @@ static int parse_probe_event_argv(int argc, const char **argv)
92 len = 0; 95 len = 0;
93 for (i = 0; i < argc; i++) 96 for (i = 0; i < argc; i++)
94 len += sprintf(&buf[len], "%s ", argv[i]); 97 len += sprintf(&buf[len], "%s ", argv[i]);
98 params.mod_events = true;
95 ret = parse_probe_event(buf); 99 ret = parse_probe_event(buf);
96 free(buf); 100 free(buf);
97 return ret; 101 return ret;
@@ -100,9 +104,10 @@ static int parse_probe_event_argv(int argc, const char **argv)
100static int opt_add_probe_event(const struct option *opt __used, 104static int opt_add_probe_event(const struct option *opt __used,
101 const char *str, int unset __used) 105 const char *str, int unset __used)
102{ 106{
103 if (str) 107 if (str) {
108 params.mod_events = true;
104 return parse_probe_event(str); 109 return parse_probe_event(str);
105 else 110 } else
106 return 0; 111 return 0;
107} 112}
108 113
@@ -110,6 +115,7 @@ static int opt_del_probe_event(const struct option *opt __used,
110 const char *str, int unset __used) 115 const char *str, int unset __used)
111{ 116{
112 if (str) { 117 if (str) {
118 params.mod_events = true;
113 if (!params.dellist) 119 if (!params.dellist)
114 params.dellist = strlist__new(true, NULL); 120 params.dellist = strlist__new(true, NULL);
115 strlist__add(params.dellist, str); 121 strlist__add(params.dellist, str);
@@ -130,6 +136,25 @@ static int opt_show_lines(const struct option *opt __used,
130 136
131 return ret; 137 return ret;
132} 138}
139
140static int opt_show_vars(const struct option *opt __used,
141 const char *str, int unset __used)
142{
143 struct perf_probe_event *pev = &params.events[params.nevents];
144 int ret;
145
146 if (!str)
147 return 0;
148
149 ret = parse_probe_event(str);
150 if (!ret && pev->nargs != 0) {
151 pr_err(" Error: '--vars' doesn't accept arguments.\n");
152 return -EINVAL;
153 }
154 params.show_vars = true;
155
156 return ret;
157}
133#endif 158#endif
134 159
135static const char * const probe_usage[] = { 160static const char * const probe_usage[] = {
@@ -138,7 +163,8 @@ static const char * const probe_usage[] = {
138 "perf probe [<options>] --del '[GROUP:]EVENT' ...", 163 "perf probe [<options>] --del '[GROUP:]EVENT' ...",
139 "perf probe --list", 164 "perf probe --list",
140#ifdef DWARF_SUPPORT 165#ifdef DWARF_SUPPORT
141 "perf probe --line 'LINEDESC'", 166 "perf probe [<options>] --line 'LINEDESC'",
167 "perf probe [<options>] --vars 'PROBEPOINT'",
142#endif 168#endif
143 NULL 169 NULL
144}; 170};
@@ -180,10 +206,17 @@ static const struct option options[] = {
180 OPT_CALLBACK('L', "line", NULL, 206 OPT_CALLBACK('L', "line", NULL,
181 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]", 207 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
182 "Show source code lines.", opt_show_lines), 208 "Show source code lines.", opt_show_lines),
209 OPT_CALLBACK('V', "vars", NULL,
210 "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
211 "Show accessible variables on PROBEDEF", opt_show_vars),
212 OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
213 "Show external variables too (with --vars only)"),
183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 214 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
184 "file", "vmlinux pathname"), 215 "file", "vmlinux pathname"),
185 OPT_STRING('s', "source", &symbol_conf.source_prefix, 216 OPT_STRING('s', "source", &symbol_conf.source_prefix,
186 "directory", "path to kernel source"), 217 "directory", "path to kernel source"),
218 OPT_STRING('m', "module", &params.target_module,
219 "modname", "target module name"),
187#endif 220#endif
188 OPT__DRY_RUN(&probe_event_dry_run), 221 OPT__DRY_RUN(&probe_event_dry_run),
189 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 222 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
@@ -217,7 +250,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
217 usage_with_options(probe_usage, options); 250 usage_with_options(probe_usage, options);
218 251
219 if (params.list_events) { 252 if (params.list_events) {
220 if (params.nevents != 0 || params.dellist) { 253 if (params.mod_events) {
221 pr_err(" Error: Don't use --list with --add/--del.\n"); 254 pr_err(" Error: Don't use --list with --add/--del.\n");
222 usage_with_options(probe_usage, options); 255 usage_with_options(probe_usage, options);
223 } 256 }
@@ -225,6 +258,10 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
225 pr_err(" Error: Don't use --list with --line.\n"); 258 pr_err(" Error: Don't use --list with --line.\n");
226 usage_with_options(probe_usage, options); 259 usage_with_options(probe_usage, options);
227 } 260 }
261 if (params.show_vars) {
262 pr_err(" Error: Don't use --list with --vars.\n");
263 usage_with_options(probe_usage, options);
264 }
228 ret = show_perf_probe_events(); 265 ret = show_perf_probe_events();
229 if (ret < 0) 266 if (ret < 0)
230 pr_err(" Error: Failed to show event list. (%d)\n", 267 pr_err(" Error: Failed to show event list. (%d)\n",
@@ -234,17 +271,35 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
234 271
235#ifdef DWARF_SUPPORT 272#ifdef DWARF_SUPPORT
236 if (params.show_lines) { 273 if (params.show_lines) {
237 if (params.nevents != 0 || params.dellist) { 274 if (params.mod_events) {
238 pr_warning(" Error: Don't use --line with" 275 pr_err(" Error: Don't use --line with"
239 " --add/--del.\n"); 276 " --add/--del.\n");
277 usage_with_options(probe_usage, options);
278 }
279 if (params.show_vars) {
280 pr_err(" Error: Don't use --line with --vars.\n");
240 usage_with_options(probe_usage, options); 281 usage_with_options(probe_usage, options);
241 } 282 }
242 283
243 ret = show_line_range(&params.line_range); 284 ret = show_line_range(&params.line_range, params.target_module);
244 if (ret < 0) 285 if (ret < 0)
245 pr_err(" Error: Failed to show lines. (%d)\n", ret); 286 pr_err(" Error: Failed to show lines. (%d)\n", ret);
246 return ret; 287 return ret;
247 } 288 }
289 if (params.show_vars) {
290 if (params.mod_events) {
291 pr_err(" Error: Don't use --vars with"
292 " --add/--del.\n");
293 usage_with_options(probe_usage, options);
294 }
295 ret = show_available_vars(params.events, params.nevents,
296 params.max_probe_points,
297 params.target_module,
298 params.show_ext_vars);
299 if (ret < 0)
300 pr_err(" Error: Failed to show vars. (%d)\n", ret);
301 return ret;
302 }
248#endif 303#endif
249 304
250 if (params.dellist) { 305 if (params.dellist) {
@@ -258,8 +313,9 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
258 313
259 if (params.nevents) { 314 if (params.nevents) {
260 ret = add_perf_probe_events(params.events, params.nevents, 315 ret = add_perf_probe_events(params.events, params.nevents,
261 params.force_add, 316 params.max_probe_points,
262 params.max_probe_points); 317 params.target_module,
318 params.force_add);
263 if (ret < 0) { 319 if (ret < 0) {
264 pr_err(" Error: Failed to add events. (%d)\n", ret); 320 pr_err(" Error: Failed to add events. (%d)\n", ret);
265 return ret; 321 return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ff77b805de71..4e75583ddd6d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -353,7 +353,7 @@ try_again:
353 } 353 }
354 354
355 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) { 355 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
356 perror("Unable to read perf file descriptor\n"); 356 perror("Unable to read perf file descriptor");
357 exit(-1); 357 exit(-1);
358 } 358 }
359 359
@@ -626,7 +626,7 @@ static int __cmd_record(int argc, const char **argv)
626 626
627 nr_cpus = read_cpu_map(cpu_list); 627 nr_cpus = read_cpu_map(cpu_list);
628 if (nr_cpus < 1) { 628 if (nr_cpus < 1) {
629 perror("failed to collect number of CPUs\n"); 629 perror("failed to collect number of CPUs");
630 return -1; 630 return -1;
631 } 631 }
632 632
@@ -761,6 +761,9 @@ static int __cmd_record(int argc, const char **argv)
761 } 761 }
762 } 762 }
763 763
764 if (quiet)
765 return 0;
766
764 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); 767 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
765 768
766 /* 769 /*
@@ -820,6 +823,7 @@ static const struct option options[] = {
820 "do call-graph (stack chain/backtrace) recording"), 823 "do call-graph (stack chain/backtrace) recording"),
821 OPT_INCR('v', "verbose", &verbose, 824 OPT_INCR('v', "verbose", &verbose,
822 "be more verbose (show counter open errors, etc)"), 825 "be more verbose (show counter open errors, etc)"),
826 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
823 OPT_BOOLEAN('s', "stat", &inherit_stat, 827 OPT_BOOLEAN('s', "stat", &inherit_stat,
824 "per thread counts"), 828 "per thread counts"),
825 OPT_BOOLEAN('d', "data", &sample_address, 829 OPT_BOOLEAN('d', "data", &sample_address,
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 40a6a2992d15..2f8df45c4dcb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -46,9 +46,6 @@ static struct scripting_ops *scripting_ops;
46 46
47static void setup_scripting(void) 47static void setup_scripting(void)
48{ 48{
49 /* make sure PERF_EXEC_PATH is set for scripts */
50 perf_set_argv_exec_path(perf_exec_path());
51
52 setup_perl_scripting(); 49 setup_perl_scripting();
53 setup_python_scripting(); 50 setup_python_scripting();
54 51
@@ -285,7 +282,7 @@ static int parse_scriptname(const struct option *opt __used,
285 script++; 282 script++;
286 } else { 283 } else {
287 script = str; 284 script = str;
288 ext = strchr(script, '.'); 285 ext = strrchr(script, '.');
289 if (!ext) { 286 if (!ext) {
290 fprintf(stderr, "invalid script extension"); 287 fprintf(stderr, "invalid script extension");
291 return -1; 288 return -1;
@@ -593,6 +590,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
593 suffix = REPORT_SUFFIX; 590 suffix = REPORT_SUFFIX;
594 } 591 }
595 592
593 /* make sure PERF_EXEC_PATH is set for scripts */
594 perf_set_argv_exec_path(perf_exec_path());
595
596 if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) { 596 if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
597 char *record_script_path, *report_script_path; 597 char *record_script_path, *report_script_path;
598 int live_pipe[2]; 598 int live_pipe[2];
@@ -625,12 +625,13 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
625 dup2(live_pipe[1], 1); 625 dup2(live_pipe[1], 1);
626 close(live_pipe[0]); 626 close(live_pipe[0]);
627 627
628 __argv = malloc(5 * sizeof(const char *)); 628 __argv = malloc(6 * sizeof(const char *));
629 __argv[0] = "/bin/sh"; 629 __argv[0] = "/bin/sh";
630 __argv[1] = record_script_path; 630 __argv[1] = record_script_path;
631 __argv[2] = "-o"; 631 __argv[2] = "-q";
632 __argv[3] = "-"; 632 __argv[3] = "-o";
633 __argv[4] = NULL; 633 __argv[4] = "-";
634 __argv[5] = NULL;
634 635
635 execvp("/bin/sh", (char **)__argv); 636 execvp("/bin/sh", (char **)__argv);
636 exit(-1); 637 exit(-1);
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index e3a5e55d54ff..4028d92dc4ae 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm 10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index d83070b7eeb5..ba25f4d41fb0 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -7,7 +7,7 @@ if [ $# -lt 1 ] ; then
7fi 7fi
8comm=$1 8comm=$1
9shift 9shift
10perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm 10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
11 11
12 12
13 13
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 7ef46983f62f..641a3f5d085c 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide r/w activity 2# description: system-wide r/w activity
3perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl 3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
index 93e698cd3f38..4918dba77021 100644
--- a/tools/perf/scripts/perl/bin/rwtop-report
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -17,7 +17,7 @@ if [ "$n_args" -gt 0 ] ; then
17 interval=$1 17 interval=$1
18 shift 18 shift
19fi 19fi
20perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval 20perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
21 21
22 22
23 23
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index a0d898f9ca1d..49052ebcb632 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide min/max/avg wakeup latency 2# description: system-wide min/max/avg wakeup latency
3perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl 3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index 35081132ef97..df0c65f4ca93 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: workqueue stats (ins/exe/create/destroy) 2# description: workqueue stats (ins/exe/create/destroy)
3perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl 3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 9689bc0acd9f..13cc02b5893a 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -6,6 +6,14 @@
6# Public License ("GPL") version 2 as published by the Free Software 6# Public License ("GPL") version 2 as published by the Free Software
7# Foundation. 7# Foundation.
8 8
9import errno, os
10
11FUTEX_WAIT = 0
12FUTEX_WAKE = 1
13FUTEX_PRIVATE_FLAG = 128
14FUTEX_CLOCK_REALTIME = 256
15FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
16
9NSECS_PER_SEC = 1000000000 17NSECS_PER_SEC = 1000000000
10 18
11def avg(total, n): 19def avg(total, n):
@@ -24,5 +32,55 @@ def nsecs_str(nsecs):
24 str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)), 32 str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
25 return str 33 return str
26 34
35def add_stats(dict, key, value):
36 if not dict.has_key(key):
37 dict[key] = (value, value, value, 1)
38 else:
39 min, max, avg, count = dict[key]
40 if value < min:
41 min = value
42 if value > max:
43 max = value
44 avg = (avg + value) / 2
45 dict[key] = (min, max, avg, count + 1)
46
27def clear_term(): 47def clear_term():
28 print("\x1b[H\x1b[2J") 48 print("\x1b[H\x1b[2J")
49
50audit_package_warned = False
51
52try:
53 import audit
54 machine_to_id = {
55 'x86_64': audit.MACH_86_64,
56 'alpha' : audit.MACH_ALPHA,
57 'ia64' : audit.MACH_IA64,
58 'ppc' : audit.MACH_PPC,
59 'ppc64' : audit.MACH_PPC64,
60 's390' : audit.MACH_S390,
61 's390x' : audit.MACH_S390X,
62 'i386' : audit.MACH_X86,
63 'i586' : audit.MACH_X86,
64 'i686' : audit.MACH_X86,
65 }
66 try:
67 machine_to_id['armeb'] = audit.MACH_ARMEB
68 except:
69 pass
70 machine_id = machine_to_id[os.uname()[4]]
71except:
72 if not audit_package_warned:
73 audit_package_warned = True
74 print "Install the audit-libs-python package to get syscall names"
75
76def syscall_name(id):
77 try:
78 return audit.audit_syscall_to_name(id, machine_id)
79 except:
80 return str(id)
81
82def strerror(nr):
83 try:
84 return errno.errorcode[abs(nr)]
85 except:
86 return "Unknown %d errno" % nr
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 30293545fcc2..03587021463d 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm 10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-record b/tools/perf/scripts/python/bin/futex-contention-record
new file mode 100644
index 000000000000..5ecbb433caf4
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -a -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
new file mode 100644
index 000000000000..c8268138fb7e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -0,0 +1,4 @@
1#!/bin/bash
2# description: futext contention measurement
3
4perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
index c3d0a638123d..4ad361b31249 100644
--- a/tools/perf/scripts/python/bin/netdev-times-report
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -2,4 +2,4 @@
2# description: display a process of packet and processing time 2# description: display a process of packet and processing time
3# args: [tx] [rx] [dev=] [debug] 3# args: [tx] [rx] [dev=] [debug]
4 4
5perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ 5perf trace -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
index 61d05f72e443..df1791f07c24 100644
--- a/tools/perf/scripts/python/bin/sched-migration-report
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -1,3 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2# description: sched migration overview 2# description: sched migration overview
3perf trace $@ -s ~/libexec/perf-core/scripts/python/sched-migration.py 3perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
index b01c842ae7b4..36b409c05e50 100644
--- a/tools/perf/scripts/python/bin/sctop-report
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -21,4 +21,4 @@ elif [ "$n_args" -gt 0 ] ; then
21 interval=$1 21 interval=$1
22 shift 22 shift
23fi 23fi
24perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval 24perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index 9e9d8ddd72ce..4eb88c9fc83c 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm 10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index dc076b618796..cb2f9c5cf17e 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -7,4 +7,4 @@ if [ $# -gt 0 ] ; then
7 shift 7 shift
8 fi 8 fi
9fi 9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm 10perf trace $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index 0ca02278fe69..acd7848717b3 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -13,21 +13,26 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
13 13
14from perf_trace_context import * 14from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import *
16 17
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; 18usage = "perf trace -s syscall-counts-by-pid.py [comm|pid]\n";
18 19
19for_comm = None 20for_comm = None
21for_pid = None
20 22
21if len(sys.argv) > 2: 23if len(sys.argv) > 2:
22 sys.exit(usage) 24 sys.exit(usage)
23 25
24if len(sys.argv) > 1: 26if len(sys.argv) > 1:
25 for_comm = sys.argv[1] 27 try:
28 for_pid = int(sys.argv[1])
29 except:
30 for_comm = sys.argv[1]
26 31
27syscalls = autodict() 32syscalls = autodict()
28 33
29def trace_begin(): 34def trace_begin():
30 pass 35 print "Press control+C to stop and show the summary"
31 36
32def trace_end(): 37def trace_end():
33 print_error_totals() 38 print_error_totals()
@@ -35,9 +40,9 @@ def trace_end():
35def raw_syscalls__sys_exit(event_name, context, common_cpu, 40def raw_syscalls__sys_exit(event_name, context, common_cpu,
36 common_secs, common_nsecs, common_pid, common_comm, 41 common_secs, common_nsecs, common_pid, common_comm,
37 id, ret): 42 id, ret):
38 if for_comm is not None: 43 if (for_comm and common_comm != for_comm) or \
39 if common_comm != for_comm: 44 (for_pid and common_pid != for_pid ):
40 return 45 return
41 46
42 if ret < 0: 47 if ret < 0:
43 try: 48 try:
@@ -62,7 +67,7 @@ def print_error_totals():
62 print "\n%s [%d]\n" % (comm, pid), 67 print "\n%s [%d]\n" % (comm, pid),
63 id_keys = syscalls[comm][pid].keys() 68 id_keys = syscalls[comm][pid].keys()
64 for id in id_keys: 69 for id in id_keys:
65 print " syscall: %-16d\n" % (id), 70 print " syscall: %-16s\n" % syscall_name(id),
66 ret_keys = syscalls[comm][pid][id].keys() 71 ret_keys = syscalls[comm][pid][id].keys()
67 for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True): 72 for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
68 print " err = %-20d %10d\n" % (ret, val), 73 print " err = %-20s %10d\n" % (strerror(ret), val),
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
new file mode 100644
index 000000000000..11e70a388d41
--- /dev/null
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -0,0 +1,50 @@
1# futex contention
2# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Translation of:
6#
7# http://sourceware.org/systemtap/wiki/WSFutexContention
8#
9# to perf python scripting.
10#
11# Measures futex contention
12
13import os, sys
14sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
15from Util import *
16
17process_names = {}
18thread_thislock = {}
19thread_blocktime = {}
20
21lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
22process_names = {} # long-lived pid-to-execname mapping
23
24def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm,
25 nr, uaddr, op, val, utime, uaddr2, val3):
26 cmd = op & FUTEX_CMD_MASK
27 if cmd != FUTEX_WAIT:
28 return # we don't care about originators of WAKE events
29
30 process_names[tid] = comm
31 thread_thislock[tid] = uaddr
32 thread_blocktime[tid] = nsecs(s, ns)
33
34def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm,
35 nr, ret):
36 if thread_blocktime.has_key(tid):
37 elapsed = nsecs(s, ns) - thread_blocktime[tid]
38 add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
39 del thread_blocktime[tid]
40 del thread_thislock[tid]
41
42def trace_begin():
43 print "Press control+C to stop and show the summary"
44
45def trace_end():
46 for (tid, lock) in lock_waits:
47 min, max, avg, count = lock_waits[tid, lock]
48 print "%s[%d] lock %x contended %d times, %d avg ns" % \
49 (process_names[tid], tid, lock, count, avg)
50
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 6cafad40c296..7a6ec2c7d8ab 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -8,10 +8,7 @@
8# will be refreshed every [interval] seconds. The default interval is 8# will be refreshed every [interval] seconds. The default interval is
9# 3 seconds. 9# 3 seconds.
10 10
11import thread 11import os, sys, thread, time
12import time
13import os
14import sys
15 12
16sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 13sys.path.append(os.environ['PERF_EXEC_PATH'] + \
17 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -20,7 +17,7 @@ from perf_trace_context import *
20from Core import * 17from Core import *
21from Util import * 18from Util import *
22 19
23usage = "perf trace -s syscall-counts.py [comm] [interval]\n"; 20usage = "perf trace -s sctop.py [comm] [interval]\n";
24 21
25for_comm = None 22for_comm = None
26default_interval = 3 23default_interval = 3
@@ -71,7 +68,7 @@ def print_syscall_totals(interval):
71 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ 68 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
72 reverse = True): 69 reverse = True):
73 try: 70 try:
74 print "%-40d %10d\n" % (id, val), 71 print "%-40s %10d\n" % (syscall_name(id), val),
75 except TypeError: 72 except TypeError:
76 pass 73 pass
77 syscalls.clear() 74 syscalls.clear()
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index af722d6a4b3f..d1ee3ec10cf2 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -5,29 +5,33 @@
5# Displays system-wide system call totals, broken down by syscall. 5# Displays system-wide system call totals, broken down by syscall.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed. 6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7 7
8import os 8import os, sys
9import sys
10 9
11sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 10sys.path.append(os.environ['PERF_EXEC_PATH'] + \
12 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 11 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
13 12
14from perf_trace_context import * 13from perf_trace_context import *
15from Core import * 14from Core import *
15from Util import syscall_name
16 16
17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n"; 17usage = "perf trace -s syscall-counts-by-pid.py [comm]\n";
18 18
19for_comm = None 19for_comm = None
20for_pid = None
20 21
21if len(sys.argv) > 2: 22if len(sys.argv) > 2:
22 sys.exit(usage) 23 sys.exit(usage)
23 24
24if len(sys.argv) > 1: 25if len(sys.argv) > 1:
25 for_comm = sys.argv[1] 26 try:
27 for_pid = int(sys.argv[1])
28 except:
29 for_comm = sys.argv[1]
26 30
27syscalls = autodict() 31syscalls = autodict()
28 32
29def trace_begin(): 33def trace_begin():
30 pass 34 print "Press control+C to stop and show the summary"
31 35
32def trace_end(): 36def trace_end():
33 print_syscall_totals() 37 print_syscall_totals()
@@ -35,9 +39,10 @@ def trace_end():
35def raw_syscalls__sys_enter(event_name, context, common_cpu, 39def raw_syscalls__sys_enter(event_name, context, common_cpu,
36 common_secs, common_nsecs, common_pid, common_comm, 40 common_secs, common_nsecs, common_pid, common_comm,
37 id, args): 41 id, args):
38 if for_comm is not None: 42
39 if common_comm != for_comm: 43 if (for_comm and common_comm != for_comm) or \
40 return 44 (for_pid and common_pid != for_pid ):
45 return
41 try: 46 try:
42 syscalls[common_comm][common_pid][id] += 1 47 syscalls[common_comm][common_pid][id] += 1
43 except TypeError: 48 except TypeError:
@@ -61,4 +66,4 @@ def print_syscall_totals():
61 id_keys = syscalls[comm][pid].keys() 66 id_keys = syscalls[comm][pid].keys()
62 for id, val in sorted(syscalls[comm][pid].iteritems(), \ 67 for id, val in sorted(syscalls[comm][pid].iteritems(), \
63 key = lambda(k, v): (v, k), reverse = True): 68 key = lambda(k, v): (v, k), reverse = True):
64 print " %-38d %10d\n" % (id, val), 69 print " %-38s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index f977e85ff049..ea183dc82d29 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -13,6 +13,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
13 13
14from perf_trace_context import * 14from perf_trace_context import *
15from Core import * 15from Core import *
16from Util import syscall_name
16 17
17usage = "perf trace -s syscall-counts.py [comm]\n"; 18usage = "perf trace -s syscall-counts.py [comm]\n";
18 19
@@ -27,7 +28,7 @@ if len(sys.argv) > 1:
27syscalls = autodict() 28syscalls = autodict()
28 29
29def trace_begin(): 30def trace_begin():
30 pass 31 print "Press control+C to stop and show the summary"
31 32
32def trace_end(): 33def trace_end():
33 print_syscall_totals() 34 print_syscall_totals()
@@ -55,4 +56,4 @@ def print_syscall_totals():
55 56
56 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ 57 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
57 reverse = True): 58 reverse = True):
58 print "%-40d %10d\n" % (id, val), 59 print "%-40s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index f9c7e3ad1aa7..c8d81b00089d 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -12,8 +12,8 @@
12#include "debug.h" 12#include "debug.h"
13#include "util.h" 13#include "util.h"
14 14
15int verbose = 0; 15int verbose;
16bool dump_trace = false; 16bool dump_trace = false, quiet = false;
17 17
18int eprintf(int level, const char *fmt, ...) 18int eprintf(int level, const char *fmt, ...)
19{ 19{
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7a17ee061bcb..7b514082bbaf 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -6,7 +6,7 @@
6#include "event.h" 6#include "event.h"
7 7
8extern int verbose; 8extern int verbose;
9extern bool dump_trace; 9extern bool quiet, dump_trace;
10 10
11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
12void trace_event(event_t *event); 12void trace_event(event_t *event);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 78575796d5f3..b397c0383728 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -215,6 +215,16 @@ struct symbol *map_groups__find_function_by_name(struct map_groups *self,
215 return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter); 215 return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
216} 216}
217 217
218static inline
219struct symbol *machine__find_kernel_function_by_name(struct machine *self,
220 const char *name,
221 struct map **mapp,
222 symbol_filter_t filter)
223{
224 return map_groups__find_function_by_name(&self->kmaps, name, mapp,
225 filter);
226}
227
218int map_groups__fixup_overlappings(struct map_groups *self, struct map *map, 228int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
219 int verbose, FILE *fp); 229 int verbose, FILE *fp);
220 230
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fcc16e4349df..3b6a5297bf16 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -74,10 +74,9 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
74static char *synthesize_perf_probe_point(struct perf_probe_point *pp); 74static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
75static struct machine machine; 75static struct machine machine;
76 76
77/* Initialize symbol maps and path of vmlinux */ 77/* Initialize symbol maps and path of vmlinux/modules */
78static int init_vmlinux(void) 78static int init_vmlinux(void)
79{ 79{
80 struct dso *kernel;
81 int ret; 80 int ret;
82 81
83 symbol_conf.sort_by_name = true; 82 symbol_conf.sort_by_name = true;
@@ -91,33 +90,61 @@ static int init_vmlinux(void)
91 goto out; 90 goto out;
92 } 91 }
93 92
94 ret = machine__init(&machine, "/", 0); 93 ret = machine__init(&machine, "", HOST_KERNEL_ID);
95 if (ret < 0) 94 if (ret < 0)
96 goto out; 95 goto out;
97 96
98 kernel = dso__new_kernel(symbol_conf.vmlinux_name); 97 if (machine__create_kernel_maps(&machine) < 0) {
99 if (kernel == NULL) 98 pr_debug("machine__create_kernel_maps ");
100 die("Failed to create kernel dso."); 99 goto out;
101 100 }
102 ret = __machine__create_kernel_maps(&machine, kernel);
103 if (ret < 0)
104 pr_debug("Failed to create kernel maps.\n");
105
106out: 101out:
107 if (ret < 0) 102 if (ret < 0)
108 pr_warning("Failed to init vmlinux path.\n"); 103 pr_warning("Failed to init vmlinux path.\n");
109 return ret; 104 return ret;
110} 105}
111 106
107static struct symbol *__find_kernel_function_by_name(const char *name,
108 struct map **mapp)
109{
110 return machine__find_kernel_function_by_name(&machine, name, mapp,
111 NULL);
112}
113
114const char *kernel_get_module_path(const char *module)
115{
116 struct dso *dso;
117
118 if (module) {
119 list_for_each_entry(dso, &machine.kernel_dsos, node) {
120 if (strncmp(dso->short_name + 1, module,
121 dso->short_name_len - 2) == 0)
122 goto found;
123 }
124 pr_debug("Failed to find module %s.\n", module);
125 return NULL;
126 } else {
127 dso = machine.vmlinux_maps[MAP__FUNCTION]->dso;
128 if (dso__load_vmlinux_path(dso,
129 machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) {
130 pr_debug("Failed to load kernel map.\n");
131 return NULL;
132 }
133 }
134found:
135 return dso->long_name;
136}
137
112#ifdef DWARF_SUPPORT 138#ifdef DWARF_SUPPORT
113static int open_vmlinux(void) 139static int open_vmlinux(const char *module)
114{ 140{
115 if (map__load(machine.vmlinux_maps[MAP__FUNCTION], NULL) < 0) { 141 const char *path = kernel_get_module_path(module);
116 pr_debug("Failed to load kernel map.\n"); 142 if (!path) {
117 return -EINVAL; 143 pr_err("Failed to find path of %s module", module ?: "kernel");
144 return -ENOENT;
118 } 145 }
119 pr_debug("Try to open %s\n", machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name); 146 pr_debug("Try to open %s\n", path);
120 return open(machine.vmlinux_maps[MAP__FUNCTION]->dso->long_name, O_RDONLY); 147 return open(path, O_RDONLY);
121} 148}
122 149
123/* 150/*
@@ -125,20 +152,19 @@ static int open_vmlinux(void)
125 * Currently only handles kprobes. 152 * Currently only handles kprobes.
126 */ 153 */
127static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, 154static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
128 struct perf_probe_point *pp) 155 struct perf_probe_point *pp)
129{ 156{
130 struct symbol *sym; 157 struct symbol *sym;
131 int fd, ret = -ENOENT; 158 struct map *map;
159 u64 addr;
160 int ret = -ENOENT;
132 161
133 sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], 162 sym = __find_kernel_function_by_name(tp->symbol, &map);
134 tp->symbol, NULL);
135 if (sym) { 163 if (sym) {
136 fd = open_vmlinux(); 164 addr = map->unmap_ip(map, sym->start + tp->offset);
137 if (fd >= 0) { 165 pr_debug("try to find %s+%ld@%llx\n", tp->symbol,
138 ret = find_perf_probe_point(fd, 166 tp->offset, addr);
139 sym->start + tp->offset, pp); 167 ret = find_perf_probe_point((unsigned long)addr, pp);
140 close(fd);
141 }
142 } 168 }
143 if (ret <= 0) { 169 if (ret <= 0) {
144 pr_debug("Failed to find corresponding probes from " 170 pr_debug("Failed to find corresponding probes from "
@@ -156,12 +182,12 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
156/* Try to find perf_probe_event with debuginfo */ 182/* Try to find perf_probe_event with debuginfo */
157static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 183static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
158 struct probe_trace_event **tevs, 184 struct probe_trace_event **tevs,
159 int max_tevs) 185 int max_tevs, const char *module)
160{ 186{
161 bool need_dwarf = perf_probe_event_need_dwarf(pev); 187 bool need_dwarf = perf_probe_event_need_dwarf(pev);
162 int fd, ntevs; 188 int fd, ntevs;
163 189
164 fd = open_vmlinux(); 190 fd = open_vmlinux(module);
165 if (fd < 0) { 191 if (fd < 0) {
166 if (need_dwarf) { 192 if (need_dwarf) {
167 pr_warning("Failed to open debuginfo file.\n"); 193 pr_warning("Failed to open debuginfo file.\n");
@@ -300,7 +326,7 @@ error:
300 * Show line-range always requires debuginfo to find source file and 326 * Show line-range always requires debuginfo to find source file and
301 * line number. 327 * line number.
302 */ 328 */
303int show_line_range(struct line_range *lr) 329int show_line_range(struct line_range *lr, const char *module)
304{ 330{
305 int l = 1; 331 int l = 1;
306 struct line_node *ln; 332 struct line_node *ln;
@@ -313,7 +339,7 @@ int show_line_range(struct line_range *lr)
313 if (ret < 0) 339 if (ret < 0)
314 return ret; 340 return ret;
315 341
316 fd = open_vmlinux(); 342 fd = open_vmlinux(module);
317 if (fd < 0) { 343 if (fd < 0) {
318 pr_warning("Failed to open debuginfo file.\n"); 344 pr_warning("Failed to open debuginfo file.\n");
319 return fd; 345 return fd;
@@ -378,11 +404,84 @@ end:
378 return ret; 404 return ret;
379} 405}
380 406
407static int show_available_vars_at(int fd, struct perf_probe_event *pev,
408 int max_vls, bool externs)
409{
410 char *buf;
411 int ret, i;
412 struct str_node *node;
413 struct variable_list *vls = NULL, *vl;
414
415 buf = synthesize_perf_probe_point(&pev->point);
416 if (!buf)
417 return -EINVAL;
418 pr_debug("Searching variables at %s\n", buf);
419
420 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
421 if (ret > 0) {
422 /* Some variables were found */
423 fprintf(stdout, "Available variables at %s\n", buf);
424 for (i = 0; i < ret; i++) {
425 vl = &vls[i];
426 /*
427 * A probe point might be converted to
428 * several trace points.
429 */
430 fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
431 vl->point.offset);
432 free(vl->point.symbol);
433 if (vl->vars) {
434 strlist__for_each(node, vl->vars)
435 fprintf(stdout, "\t\t%s\n", node->s);
436 strlist__delete(vl->vars);
437 } else
438 fprintf(stdout, "(No variables)\n");
439 }
440 free(vls);
441 } else
442 pr_err("Failed to find variables at %s (%d)\n", buf, ret);
443
444 free(buf);
445 return ret;
446}
447
448/* Show available variables on given probe point */
449int show_available_vars(struct perf_probe_event *pevs, int npevs,
450 int max_vls, const char *module, bool externs)
451{
452 int i, fd, ret = 0;
453
454 ret = init_vmlinux();
455 if (ret < 0)
456 return ret;
457
458 fd = open_vmlinux(module);
459 if (fd < 0) {
460 pr_warning("Failed to open debuginfo file.\n");
461 return fd;
462 }
463
464 setup_pager();
465
466 for (i = 0; i < npevs && ret >= 0; i++)
467 ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
468
469 close(fd);
470 return ret;
471}
472
381#else /* !DWARF_SUPPORT */ 473#else /* !DWARF_SUPPORT */
382 474
383static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, 475static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
384 struct perf_probe_point *pp) 476 struct perf_probe_point *pp)
385{ 477{
478 struct symbol *sym;
479
480 sym = __find_kernel_function_by_name(tp->symbol, NULL);
481 if (!sym) {
482 pr_err("Failed to find symbol %s in kernel.\n", tp->symbol);
483 return -ENOENT;
484 }
386 pp->function = strdup(tp->symbol); 485 pp->function = strdup(tp->symbol);
387 if (pp->function == NULL) 486 if (pp->function == NULL)
388 return -ENOMEM; 487 return -ENOMEM;
@@ -394,7 +493,7 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
394 493
395static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 494static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
396 struct probe_trace_event **tevs __unused, 495 struct probe_trace_event **tevs __unused,
397 int max_tevs __unused) 496 int max_tevs __unused, const char *mod __unused)
398{ 497{
399 if (perf_probe_event_need_dwarf(pev)) { 498 if (perf_probe_event_need_dwarf(pev)) {
400 pr_warning("Debuginfo-analysis is not supported.\n"); 499 pr_warning("Debuginfo-analysis is not supported.\n");
@@ -403,12 +502,19 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
403 return 0; 502 return 0;
404} 503}
405 504
406int show_line_range(struct line_range *lr __unused) 505int show_line_range(struct line_range *lr __unused, const char *module __unused)
407{ 506{
408 pr_warning("Debuginfo-analysis is not supported.\n"); 507 pr_warning("Debuginfo-analysis is not supported.\n");
409 return -ENOSYS; 508 return -ENOSYS;
410} 509}
411 510
511int show_available_vars(struct perf_probe_event *pevs __unused,
512 int npevs __unused, int max_vls __unused,
513 const char *module __unused, bool externs __unused)
514{
515 pr_warning("Debuginfo-analysis is not supported.\n");
516 return -ENOSYS;
517}
412#endif 518#endif
413 519
414int parse_line_range_desc(const char *arg, struct line_range *lr) 520int parse_line_range_desc(const char *arg, struct line_range *lr)
@@ -1087,7 +1193,7 @@ error:
1087} 1193}
1088 1194
1089static int convert_to_perf_probe_event(struct probe_trace_event *tev, 1195static int convert_to_perf_probe_event(struct probe_trace_event *tev,
1090 struct perf_probe_event *pev) 1196 struct perf_probe_event *pev)
1091{ 1197{
1092 char buf[64] = ""; 1198 char buf[64] = "";
1093 int i, ret; 1199 int i, ret;
@@ -1516,14 +1622,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1516 1622
1517static int convert_to_probe_trace_events(struct perf_probe_event *pev, 1623static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1518 struct probe_trace_event **tevs, 1624 struct probe_trace_event **tevs,
1519 int max_tevs) 1625 int max_tevs, const char *module)
1520{ 1626{
1521 struct symbol *sym; 1627 struct symbol *sym;
1522 int ret = 0, i; 1628 int ret = 0, i;
1523 struct probe_trace_event *tev; 1629 struct probe_trace_event *tev;
1524 1630
1525 /* Convert perf_probe_event with debuginfo */ 1631 /* Convert perf_probe_event with debuginfo */
1526 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs); 1632 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
1527 if (ret != 0) 1633 if (ret != 0)
1528 return ret; 1634 return ret;
1529 1635
@@ -1572,8 +1678,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1572 } 1678 }
1573 1679
1574 /* Currently just checking function name from symbol map */ 1680 /* Currently just checking function name from symbol map */
1575 sym = map__find_symbol_by_name(machine.vmlinux_maps[MAP__FUNCTION], 1681 sym = __find_kernel_function_by_name(tev->point.symbol, NULL);
1576 tev->point.symbol, NULL);
1577 if (!sym) { 1682 if (!sym) {
1578 pr_warning("Kernel symbol \'%s\' not found.\n", 1683 pr_warning("Kernel symbol \'%s\' not found.\n",
1579 tev->point.symbol); 1684 tev->point.symbol);
@@ -1596,7 +1701,7 @@ struct __event_package {
1596}; 1701};
1597 1702
1598int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, 1703int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1599 bool force_add, int max_tevs) 1704 int max_tevs, const char *module, bool force_add)
1600{ 1705{
1601 int i, j, ret; 1706 int i, j, ret;
1602 struct __event_package *pkgs; 1707 struct __event_package *pkgs;
@@ -1617,7 +1722,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1617 pkgs[i].pev = &pevs[i]; 1722 pkgs[i].pev = &pevs[i];
1618 /* Convert with or without debuginfo */ 1723 /* Convert with or without debuginfo */
1619 ret = convert_to_probe_trace_events(pkgs[i].pev, 1724 ret = convert_to_probe_trace_events(pkgs[i].pev,
1620 &pkgs[i].tevs, max_tevs); 1725 &pkgs[i].tevs,
1726 max_tevs,
1727 module);
1621 if (ret < 0) 1728 if (ret < 0)
1622 goto end; 1729 goto end;
1623 pkgs[i].ntevs = ret; 1730 pkgs[i].ntevs = ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5af39243a25b..5accbedfea37 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -90,6 +90,12 @@ struct line_range {
90 struct list_head line_list; /* Visible lines */ 90 struct list_head line_list; /* Visible lines */
91}; 91};
92 92
93/* List of variables */
94struct variable_list {
95 struct probe_trace_point point; /* Actual probepoint */
96 struct strlist *vars; /* Available variables */
97};
98
93/* Command string to events */ 99/* Command string to events */
94extern int parse_perf_probe_command(const char *cmd, 100extern int parse_perf_probe_command(const char *cmd,
95 struct perf_probe_event *pev); 101 struct perf_probe_event *pev);
@@ -109,12 +115,18 @@ extern void clear_perf_probe_event(struct perf_probe_event *pev);
109/* Command string to line-range */ 115/* Command string to line-range */
110extern int parse_line_range_desc(const char *cmd, struct line_range *lr); 116extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
111 117
118/* Internal use: Return kernel/module path */
119extern const char *kernel_get_module_path(const char *module);
112 120
113extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, 121extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
114 bool force_add, int max_probe_points); 122 int max_probe_points, const char *module,
123 bool force_add);
115extern int del_perf_probe_events(struct strlist *dellist); 124extern int del_perf_probe_events(struct strlist *dellist);
116extern int show_perf_probe_events(void); 125extern int show_perf_probe_events(void);
117extern int show_line_range(struct line_range *lr); 126extern int show_line_range(struct line_range *lr, const char *module);
127extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
128 int max_probe_points, const char *module,
129 bool externs);
118 130
119 131
120/* Maximum index number of event-name postfix */ 132/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 32b81f707ff5..3991d73d1cff 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -116,6 +116,101 @@ static void line_list__free(struct list_head *head)
116 } 116 }
117} 117}
118 118
119/* Dwarf FL wrappers */
120
121static int __linux_kernel_find_elf(Dwfl_Module *mod,
122 void **userdata,
123 const char *module_name,
124 Dwarf_Addr base,
125 char **file_name, Elf **elfp)
126{
127 int fd;
128 const char *path = kernel_get_module_path(module_name);
129
130 if (path) {
131 fd = open(path, O_RDONLY);
132 if (fd >= 0) {
133 *file_name = strdup(path);
134 return fd;
135 }
136 }
137 /* If failed, try to call standard method */
138 return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base,
139 file_name, elfp);
140}
141
142static char *debuginfo_path; /* Currently dummy */
143
144static const Dwfl_Callbacks offline_callbacks = {
145 .find_debuginfo = dwfl_standard_find_debuginfo,
146 .debuginfo_path = &debuginfo_path,
147
148 .section_address = dwfl_offline_section_address,
149
150 /* We use this table for core files too. */
151 .find_elf = dwfl_build_id_find_elf,
152};
153
154static const Dwfl_Callbacks kernel_callbacks = {
155 .find_debuginfo = dwfl_standard_find_debuginfo,
156 .debuginfo_path = &debuginfo_path,
157
158 .find_elf = __linux_kernel_find_elf,
159 .section_address = dwfl_linux_kernel_module_section_address,
160};
161
162/* Get a Dwarf from offline image */
163static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias)
164{
165 Dwfl_Module *mod;
166 Dwarf *dbg = NULL;
167
168 if (!dwflp)
169 return NULL;
170
171 *dwflp = dwfl_begin(&offline_callbacks);
172 if (!*dwflp)
173 return NULL;
174
175 mod = dwfl_report_offline(*dwflp, "", "", fd);
176 if (!mod)
177 goto error;
178
179 dbg = dwfl_module_getdwarf(mod, bias);
180 if (!dbg) {
181error:
182 dwfl_end(*dwflp);
183 *dwflp = NULL;
184 }
185 return dbg;
186}
187
188/* Get a Dwarf from live kernel image */
189static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp,
190 Dwarf_Addr *bias)
191{
192 Dwarf *dbg;
193
194 if (!dwflp)
195 return NULL;
196
197 *dwflp = dwfl_begin(&kernel_callbacks);
198 if (!*dwflp)
199 return NULL;
200
201 /* Load the kernel dwarves: Don't care the result here */
202 dwfl_linux_kernel_report_kernel(*dwflp);
203 dwfl_linux_kernel_report_modules(*dwflp);
204
205 dbg = dwfl_addrdwarf(*dwflp, addr, bias);
206 /* Here, check whether we could get a real dwarf */
207 if (!dbg) {
208 dwfl_end(*dwflp);
209 *dwflp = NULL;
210 }
211 return dbg;
212}
213
119/* Dwarf wrappers */ 214/* Dwarf wrappers */
120 215
121/* Find the realpath of the target file. */ 216/* Find the realpath of the target file. */
@@ -160,26 +255,44 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
160 return name ? (strcmp(tname, name) == 0) : false; 255 return name ? (strcmp(tname, name) == 0) : false;
161} 256}
162 257
163/* Get type die, but skip qualifiers and typedef */ 258/* Get type die */
164static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) 259static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
165{ 260{
166 Dwarf_Attribute attr; 261 Dwarf_Attribute attr;
262
263 if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
264 dwarf_formref_die(&attr, die_mem))
265 return die_mem;
266 else
267 return NULL;
268}
269
270/* Get a type die, but skip qualifiers */
271static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
272{
167 int tag; 273 int tag;
168 274
169 do { 275 do {
170 if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL || 276 vr_die = die_get_type(vr_die, die_mem);
171 dwarf_formref_die(&attr, die_mem) == NULL) 277 if (!vr_die)
172 return NULL; 278 break;
173 279 tag = dwarf_tag(vr_die);
174 tag = dwarf_tag(die_mem);
175 vr_die = die_mem;
176 } while (tag == DW_TAG_const_type || 280 } while (tag == DW_TAG_const_type ||
177 tag == DW_TAG_restrict_type || 281 tag == DW_TAG_restrict_type ||
178 tag == DW_TAG_volatile_type || 282 tag == DW_TAG_volatile_type ||
179 tag == DW_TAG_shared_type || 283 tag == DW_TAG_shared_type);
180 tag == DW_TAG_typedef); 284
285 return vr_die;
286}
181 287
182 return die_mem; 288/* Get a type die, but skip qualifiers and typedef */
289static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
290{
291 do {
292 vr_die = __die_get_real_type(vr_die, die_mem);
293 } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
294
295 return vr_die;
183} 296}
184 297
185static bool die_is_signed_type(Dwarf_Die *tp_die) 298static bool die_is_signed_type(Dwarf_Die *tp_die)
@@ -320,25 +433,35 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
320 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem); 433 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
321} 434}
322 435
436struct __find_variable_param {
437 const char *name;
438 Dwarf_Addr addr;
439};
440
323static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) 441static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
324{ 442{
325 const char *name = data; 443 struct __find_variable_param *fvp = data;
326 int tag; 444 int tag;
327 445
328 tag = dwarf_tag(die_mem); 446 tag = dwarf_tag(die_mem);
329 if ((tag == DW_TAG_formal_parameter || 447 if ((tag == DW_TAG_formal_parameter ||
330 tag == DW_TAG_variable) && 448 tag == DW_TAG_variable) &&
331 die_compare_name(die_mem, name)) 449 die_compare_name(die_mem, fvp->name))
332 return DIE_FIND_CB_FOUND; 450 return DIE_FIND_CB_FOUND;
333 451
334 return DIE_FIND_CB_CONTINUE; 452 if (dwarf_haspc(die_mem, fvp->addr))
453 return DIE_FIND_CB_CONTINUE;
454 else
455 return DIE_FIND_CB_SIBLING;
335} 456}
336 457
337/* Find a variable called 'name' */ 458/* Find a variable called 'name' at given address */
338static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, 459static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
339 Dwarf_Die *die_mem) 460 Dwarf_Addr addr, Dwarf_Die *die_mem)
340{ 461{
341 return die_find_child(sp_die, __die_find_variable_cb, (void *)name, 462 struct __find_variable_param fvp = { .name = name, .addr = addr};
463
464 return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
342 die_mem); 465 die_mem);
343} 466}
344 467
@@ -361,6 +484,60 @@ static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
361 die_mem); 484 die_mem);
362} 485}
363 486
487/* Get the name of given variable DIE */
488static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
489{
490 Dwarf_Die type;
491 int tag, ret, ret2;
492 const char *tmp = "";
493
494 if (__die_get_real_type(vr_die, &type) == NULL)
495 return -ENOENT;
496
497 tag = dwarf_tag(&type);
498 if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
499 tmp = "*";
500 else if (tag == DW_TAG_subroutine_type) {
501 /* Function pointer */
502 ret = snprintf(buf, len, "(function_type)");
503 return (ret >= len) ? -E2BIG : ret;
504 } else {
505 if (!dwarf_diename(&type))
506 return -ENOENT;
507 if (tag == DW_TAG_union_type)
508 tmp = "union ";
509 else if (tag == DW_TAG_structure_type)
510 tmp = "struct ";
511 /* Write a base name */
512 ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
513 return (ret >= len) ? -E2BIG : ret;
514 }
515 ret = die_get_typename(&type, buf, len);
516 if (ret > 0) {
517 ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
518 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
519 }
520 return ret;
521}
522
523/* Get the name and type of given variable DIE, stored as "type\tname" */
524static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
525{
526 int ret, ret2;
527
528 ret = die_get_typename(vr_die, buf, len);
529 if (ret < 0) {
530 pr_debug("Failed to get type, make it unknown.\n");
531 ret = snprintf(buf, len, "(unknown_type)");
532 }
533 if (ret > 0) {
534 ret2 = snprintf(buf + ret, len - ret, "\t%s",
535 dwarf_diename(vr_die));
536 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
537 }
538 return ret;
539}
540
364/* 541/*
365 * Probe finder related functions 542 * Probe finder related functions
366 */ 543 */
@@ -374,8 +551,13 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
374 return ref; 551 return ref;
375} 552}
376 553
377/* Show a location */ 554/*
378static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf) 555 * Convert a location into trace_arg.
556 * If tvar == NULL, this just checks variable can be converted.
557 */
558static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
559 Dwarf_Op *fb_ops,
560 struct probe_trace_arg *tvar)
379{ 561{
380 Dwarf_Attribute attr; 562 Dwarf_Attribute attr;
381 Dwarf_Op *op; 563 Dwarf_Op *op;
@@ -384,20 +566,23 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
384 Dwarf_Word offs = 0; 566 Dwarf_Word offs = 0;
385 bool ref = false; 567 bool ref = false;
386 const char *regs; 568 const char *regs;
387 struct probe_trace_arg *tvar = pf->tvar;
388 int ret; 569 int ret;
389 570
571 if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
572 goto static_var;
573
390 /* TODO: handle more than 1 exprs */ 574 /* TODO: handle more than 1 exprs */
391 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL || 575 if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL ||
392 dwarf_getlocation_addr(&attr, pf->addr, &op, &nops, 1) <= 0 || 576 dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0 ||
393 nops == 0) { 577 nops == 0) {
394 /* TODO: Support const_value */ 578 /* TODO: Support const_value */
395 pr_err("Failed to find the location of %s at this address.\n"
396 " Perhaps, it has been optimized out.\n", pf->pvar->var);
397 return -ENOENT; 579 return -ENOENT;
398 } 580 }
399 581
400 if (op->atom == DW_OP_addr) { 582 if (op->atom == DW_OP_addr) {
583static_var:
584 if (!tvar)
585 return 0;
401 /* Static variables on memory (not stack), make @varname */ 586 /* Static variables on memory (not stack), make @varname */
402 ret = strlen(dwarf_diename(vr_die)); 587 ret = strlen(dwarf_diename(vr_die));
403 tvar->value = zalloc(ret + 2); 588 tvar->value = zalloc(ret + 2);
@@ -412,14 +597,11 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
412 597
413 /* If this is based on frame buffer, set the offset */ 598 /* If this is based on frame buffer, set the offset */
414 if (op->atom == DW_OP_fbreg) { 599 if (op->atom == DW_OP_fbreg) {
415 if (pf->fb_ops == NULL) { 600 if (fb_ops == NULL)
416 pr_warning("The attribute of frame base is not "
417 "supported.\n");
418 return -ENOTSUP; 601 return -ENOTSUP;
419 }
420 ref = true; 602 ref = true;
421 offs = op->number; 603 offs = op->number;
422 op = &pf->fb_ops[0]; 604 op = &fb_ops[0];
423 } 605 }
424 606
425 if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) { 607 if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
@@ -435,13 +617,18 @@ static int convert_variable_location(Dwarf_Die *vr_die, struct probe_finder *pf)
435 } else if (op->atom == DW_OP_regx) { 617 } else if (op->atom == DW_OP_regx) {
436 regn = op->number; 618 regn = op->number;
437 } else { 619 } else {
438 pr_warning("DW_OP %x is not supported.\n", op->atom); 620 pr_debug("DW_OP %x is not supported.\n", op->atom);
439 return -ENOTSUP; 621 return -ENOTSUP;
440 } 622 }
441 623
624 if (!tvar)
625 return 0;
626
442 regs = get_arch_regstr(regn); 627 regs = get_arch_regstr(regn);
443 if (!regs) { 628 if (!regs) {
444 pr_warning("Mapping for DWARF register number %u missing on this architecture.", regn); 629 /* This should be a bug in DWARF or this tool */
630 pr_warning("Mapping for DWARF register number %u "
631 "missing on this architecture.", regn);
445 return -ERANGE; 632 return -ERANGE;
446 } 633 }
447 634
@@ -666,8 +853,14 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
666 pr_debug("Converting variable %s into trace event.\n", 853 pr_debug("Converting variable %s into trace event.\n",
667 dwarf_diename(vr_die)); 854 dwarf_diename(vr_die));
668 855
669 ret = convert_variable_location(vr_die, pf); 856 ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
670 if (ret == 0 && pf->pvar->field) { 857 pf->tvar);
858 if (ret == -ENOENT)
859 pr_err("Failed to find the location of %s at this address.\n"
860 " Perhaps, it has been optimized out.\n", pf->pvar->var);
861 else if (ret == -ENOTSUP)
862 pr_err("Sorry, we don't support this variable location yet.\n");
863 else if (pf->pvar->field) {
671 ret = convert_variable_fields(vr_die, pf->pvar->var, 864 ret = convert_variable_fields(vr_die, pf->pvar->var,
672 pf->pvar->field, &pf->tvar->ref, 865 pf->pvar->field, &pf->tvar->ref,
673 &die_mem); 866 &die_mem);
@@ -722,56 +915,39 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
722 pr_debug("Searching '%s' variable in context.\n", 915 pr_debug("Searching '%s' variable in context.\n",
723 pf->pvar->var); 916 pf->pvar->var);
724 /* Search child die for local variables and parameters. */ 917 /* Search child die for local variables and parameters. */
725 if (die_find_variable(sp_die, pf->pvar->var, &vr_die)) 918 if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die))
726 ret = convert_variable(&vr_die, pf); 919 ret = convert_variable(&vr_die, pf);
727 else { 920 else {
728 /* Search upper class */ 921 /* Search upper class */
729 nscopes = dwarf_getscopes_die(sp_die, &scopes); 922 nscopes = dwarf_getscopes_die(sp_die, &scopes);
730 if (nscopes > 0) { 923 while (nscopes-- > 1) {
731 ret = dwarf_getscopevar(scopes, nscopes, pf->pvar->var, 924 pr_debug("Searching variables in %s\n",
732 0, NULL, 0, 0, &vr_die); 925 dwarf_diename(&scopes[nscopes]));
733 if (ret >= 0) 926 /* We should check this scope, so give dummy address */
927 if (die_find_variable_at(&scopes[nscopes],
928 pf->pvar->var, 0,
929 &vr_die)) {
734 ret = convert_variable(&vr_die, pf); 930 ret = convert_variable(&vr_die, pf);
735 else 931 goto found;
736 ret = -ENOENT; 932 }
933 }
934 if (scopes)
737 free(scopes); 935 free(scopes);
738 } else 936 ret = -ENOENT;
739 ret = -ENOENT;
740 } 937 }
938found:
741 if (ret < 0) 939 if (ret < 0)
742 pr_warning("Failed to find '%s' in this function.\n", 940 pr_warning("Failed to find '%s' in this function.\n",
743 pf->pvar->var); 941 pf->pvar->var);
744 return ret; 942 return ret;
745} 943}
746 944
747/* Show a probe point to output buffer */ 945/* Convert subprogram DIE to trace point */
748static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) 946static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr,
947 bool retprobe, struct probe_trace_point *tp)
749{ 948{
750 struct probe_trace_event *tev;
751 Dwarf_Addr eaddr; 949 Dwarf_Addr eaddr;
752 Dwarf_Die die_mem;
753 const char *name; 950 const char *name;
754 int ret, i;
755 Dwarf_Attribute fb_attr;
756 size_t nops;
757
758 if (pf->ntevs == pf->max_tevs) {
759 pr_warning("Too many( > %d) probe point found.\n",
760 pf->max_tevs);
761 return -ERANGE;
762 }
763 tev = &pf->tevs[pf->ntevs++];
764
765 /* If no real subprogram, find a real one */
766 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
767 sp_die = die_find_real_subprogram(&pf->cu_die,
768 pf->addr, &die_mem);
769 if (!sp_die) {
770 pr_warning("Failed to find probe point in any "
771 "functions.\n");
772 return -ENOENT;
773 }
774 }
775 951
776 /* Copy the name of probe point */ 952 /* Copy the name of probe point */
777 name = dwarf_diename(sp_die); 953 name = dwarf_diename(sp_die);
@@ -781,26 +957,45 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
781 dwarf_diename(sp_die)); 957 dwarf_diename(sp_die));
782 return -ENOENT; 958 return -ENOENT;
783 } 959 }
784 tev->point.symbol = strdup(name); 960 tp->symbol = strdup(name);
785 if (tev->point.symbol == NULL) 961 if (tp->symbol == NULL)
786 return -ENOMEM; 962 return -ENOMEM;
787 tev->point.offset = (unsigned long)(pf->addr - eaddr); 963 tp->offset = (unsigned long)(paddr - eaddr);
788 } else 964 } else
789 /* This function has no name. */ 965 /* This function has no name. */
790 tev->point.offset = (unsigned long)pf->addr; 966 tp->offset = (unsigned long)paddr;
791 967
792 /* Return probe must be on the head of a subprogram */ 968 /* Return probe must be on the head of a subprogram */
793 if (pf->pev->point.retprobe) { 969 if (retprobe) {
794 if (tev->point.offset != 0) { 970 if (eaddr != paddr) {
795 pr_warning("Return probe must be on the head of" 971 pr_warning("Return probe must be on the head of"
796 " a real function\n"); 972 " a real function\n");
797 return -EINVAL; 973 return -EINVAL;
798 } 974 }
799 tev->point.retprobe = true; 975 tp->retprobe = true;
800 } 976 }
801 977
802 pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, 978 return 0;
803 tev->point.offset); 979}
980
981/* Call probe_finder callback with real subprogram DIE */
982static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
983{
984 Dwarf_Die die_mem;
985 Dwarf_Attribute fb_attr;
986 size_t nops;
987 int ret;
988
989 /* If no real subprogram, find a real one */
990 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
991 sp_die = die_find_real_subprogram(&pf->cu_die,
992 pf->addr, &die_mem);
993 if (!sp_die) {
994 pr_warning("Failed to find probe point in any "
995 "functions.\n");
996 return -ENOENT;
997 }
998 }
804 999
805 /* Get the frame base attribute/ops */ 1000 /* Get the frame base attribute/ops */
806 dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); 1001 dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
@@ -820,22 +1015,13 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
820#endif 1015#endif
821 } 1016 }
822 1017
823 /* Find each argument */ 1018 /* Call finder's callback handler */
824 tev->nargs = pf->pev->nargs; 1019 ret = pf->callback(sp_die, pf);
825 tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
826 if (tev->args == NULL)
827 return -ENOMEM;
828 for (i = 0; i < pf->pev->nargs; i++) {
829 pf->pvar = &pf->pev->args[i];
830 pf->tvar = &tev->args[i];
831 ret = find_variable(sp_die, pf);
832 if (ret != 0)
833 return ret;
834 }
835 1020
836 /* *pf->fb_ops will be cached in libdw. Don't free it. */ 1021 /* *pf->fb_ops will be cached in libdw. Don't free it. */
837 pf->fb_ops = NULL; 1022 pf->fb_ops = NULL;
838 return 0; 1023
1024 return ret;
839} 1025}
840 1026
841/* Find probe point from its line number */ 1027/* Find probe point from its line number */
@@ -871,7 +1057,7 @@ static int find_probe_point_by_line(struct probe_finder *pf)
871 (int)i, lineno, (uintmax_t)addr); 1057 (int)i, lineno, (uintmax_t)addr);
872 pf->addr = addr; 1058 pf->addr = addr;
873 1059
874 ret = convert_probe_point(NULL, pf); 1060 ret = call_probe_finder(NULL, pf);
875 /* Continuing, because target line might be inlined. */ 1061 /* Continuing, because target line might be inlined. */
876 } 1062 }
877 return ret; 1063 return ret;
@@ -984,7 +1170,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
984 (int)i, lineno, (unsigned long long)addr); 1170 (int)i, lineno, (unsigned long long)addr);
985 pf->addr = addr; 1171 pf->addr = addr;
986 1172
987 ret = convert_probe_point(sp_die, pf); 1173 ret = call_probe_finder(sp_die, pf);
988 /* Continuing, because target line might be inlined. */ 1174 /* Continuing, because target line might be inlined. */
989 } 1175 }
990 /* TODO: deallocate lines, but how? */ 1176 /* TODO: deallocate lines, but how? */
@@ -1019,7 +1205,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
1019 pr_debug("found inline addr: 0x%jx\n", 1205 pr_debug("found inline addr: 0x%jx\n",
1020 (uintmax_t)pf->addr); 1206 (uintmax_t)pf->addr);
1021 1207
1022 param->retval = convert_probe_point(in_die, pf); 1208 param->retval = call_probe_finder(in_die, pf);
1023 if (param->retval < 0) 1209 if (param->retval < 0)
1024 return DWARF_CB_ABORT; 1210 return DWARF_CB_ABORT;
1025 } 1211 }
@@ -1057,7 +1243,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
1057 } 1243 }
1058 pf->addr += pp->offset; 1244 pf->addr += pp->offset;
1059 /* TODO: Check the address in this function */ 1245 /* TODO: Check the address in this function */
1060 param->retval = convert_probe_point(sp_die, pf); 1246 param->retval = call_probe_finder(sp_die, pf);
1061 } 1247 }
1062 } else { 1248 } else {
1063 struct dwarf_callback_param _param = {.data = (void *)pf, 1249 struct dwarf_callback_param _param = {.data = (void *)pf,
@@ -1079,90 +1265,276 @@ static int find_probe_point_by_func(struct probe_finder *pf)
1079 return _param.retval; 1265 return _param.retval;
1080} 1266}
1081 1267
1082/* Find probe_trace_events specified by perf_probe_event from debuginfo */ 1268/* Find probe points from debuginfo */
1083int find_probe_trace_events(int fd, struct perf_probe_event *pev, 1269static int find_probes(int fd, struct probe_finder *pf)
1084 struct probe_trace_event **tevs, int max_tevs)
1085{ 1270{
1086 struct probe_finder pf = {.pev = pev, .max_tevs = max_tevs}; 1271 struct perf_probe_point *pp = &pf->pev->point;
1087 struct perf_probe_point *pp = &pev->point;
1088 Dwarf_Off off, noff; 1272 Dwarf_Off off, noff;
1089 size_t cuhl; 1273 size_t cuhl;
1090 Dwarf_Die *diep; 1274 Dwarf_Die *diep;
1091 Dwarf *dbg; 1275 Dwarf *dbg = NULL;
1276 Dwfl *dwfl;
1277 Dwarf_Addr bias; /* Currently ignored */
1092 int ret = 0; 1278 int ret = 0;
1093 1279
1094 pf.tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); 1280 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1095 if (pf.tevs == NULL)
1096 return -ENOMEM;
1097 *tevs = pf.tevs;
1098 pf.ntevs = 0;
1099
1100 dbg = dwarf_begin(fd, DWARF_C_READ);
1101 if (!dbg) { 1281 if (!dbg) {
1102 pr_warning("No dwarf info found in the vmlinux - " 1282 pr_warning("No dwarf info found in the vmlinux - "
1103 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1283 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1104 free(pf.tevs);
1105 *tevs = NULL;
1106 return -EBADF; 1284 return -EBADF;
1107 } 1285 }
1108 1286
1109#if _ELFUTILS_PREREQ(0, 142) 1287#if _ELFUTILS_PREREQ(0, 142)
1110 /* Get the call frame information from this dwarf */ 1288 /* Get the call frame information from this dwarf */
1111 pf.cfi = dwarf_getcfi(dbg); 1289 pf->cfi = dwarf_getcfi(dbg);
1112#endif 1290#endif
1113 1291
1114 off = 0; 1292 off = 0;
1115 line_list__init(&pf.lcache); 1293 line_list__init(&pf->lcache);
1116 /* Loop on CUs (Compilation Unit) */ 1294 /* Loop on CUs (Compilation Unit) */
1117 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) && 1295 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
1118 ret >= 0) { 1296 ret >= 0) {
1119 /* Get the DIE(Debugging Information Entry) of this CU */ 1297 /* Get the DIE(Debugging Information Entry) of this CU */
1120 diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die); 1298 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
1121 if (!diep) 1299 if (!diep)
1122 continue; 1300 continue;
1123 1301
1124 /* Check if target file is included. */ 1302 /* Check if target file is included. */
1125 if (pp->file) 1303 if (pp->file)
1126 pf.fname = cu_find_realpath(&pf.cu_die, pp->file); 1304 pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
1127 else 1305 else
1128 pf.fname = NULL; 1306 pf->fname = NULL;
1129 1307
1130 if (!pp->file || pf.fname) { 1308 if (!pp->file || pf->fname) {
1131 if (pp->function) 1309 if (pp->function)
1132 ret = find_probe_point_by_func(&pf); 1310 ret = find_probe_point_by_func(pf);
1133 else if (pp->lazy_line) 1311 else if (pp->lazy_line)
1134 ret = find_probe_point_lazy(NULL, &pf); 1312 ret = find_probe_point_lazy(NULL, pf);
1135 else { 1313 else {
1136 pf.lno = pp->line; 1314 pf->lno = pp->line;
1137 ret = find_probe_point_by_line(&pf); 1315 ret = find_probe_point_by_line(pf);
1138 } 1316 }
1139 } 1317 }
1140 off = noff; 1318 off = noff;
1141 } 1319 }
1142 line_list__free(&pf.lcache); 1320 line_list__free(&pf->lcache);
1143 dwarf_end(dbg); 1321 if (dwfl)
1322 dwfl_end(dwfl);
1144 1323
1145 return (ret < 0) ? ret : pf.ntevs; 1324 return ret;
1325}
1326
1327/* Add a found probe point into trace event list */
1328static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
1329{
1330 struct trace_event_finder *tf =
1331 container_of(pf, struct trace_event_finder, pf);
1332 struct probe_trace_event *tev;
1333 int ret, i;
1334
1335 /* Check number of tevs */
1336 if (tf->ntevs == tf->max_tevs) {
1337 pr_warning("Too many( > %d) probe point found.\n",
1338 tf->max_tevs);
1339 return -ERANGE;
1340 }
1341 tev = &tf->tevs[tf->ntevs++];
1342
1343 ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
1344 &tev->point);
1345 if (ret < 0)
1346 return ret;
1347
1348 pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
1349 tev->point.offset);
1350
1351 /* Find each argument */
1352 tev->nargs = pf->pev->nargs;
1353 tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
1354 if (tev->args == NULL)
1355 return -ENOMEM;
1356 for (i = 0; i < pf->pev->nargs; i++) {
1357 pf->pvar = &pf->pev->args[i];
1358 pf->tvar = &tev->args[i];
1359 ret = find_variable(sp_die, pf);
1360 if (ret != 0)
1361 return ret;
1362 }
1363
1364 return 0;
1365}
1366
1367/* Find probe_trace_events specified by perf_probe_event from debuginfo */
1368int find_probe_trace_events(int fd, struct perf_probe_event *pev,
1369 struct probe_trace_event **tevs, int max_tevs)
1370{
1371 struct trace_event_finder tf = {
1372 .pf = {.pev = pev, .callback = add_probe_trace_event},
1373 .max_tevs = max_tevs};
1374 int ret;
1375
1376 /* Allocate result tevs array */
1377 *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
1378 if (*tevs == NULL)
1379 return -ENOMEM;
1380
1381 tf.tevs = *tevs;
1382 tf.ntevs = 0;
1383
1384 ret = find_probes(fd, &tf.pf);
1385 if (ret < 0) {
1386 free(*tevs);
1387 *tevs = NULL;
1388 return ret;
1389 }
1390
1391 return (ret < 0) ? ret : tf.ntevs;
1392}
1393
1394#define MAX_VAR_LEN 64
1395
1396/* Collect available variables in this scope */
1397static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
1398{
1399 struct available_var_finder *af = data;
1400 struct variable_list *vl;
1401 char buf[MAX_VAR_LEN];
1402 int tag, ret;
1403
1404 vl = &af->vls[af->nvls - 1];
1405
1406 tag = dwarf_tag(die_mem);
1407 if (tag == DW_TAG_formal_parameter ||
1408 tag == DW_TAG_variable) {
1409 ret = convert_variable_location(die_mem, af->pf.addr,
1410 af->pf.fb_ops, NULL);
1411 if (ret == 0) {
1412 ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
1413 pr_debug2("Add new var: %s\n", buf);
1414 if (ret > 0)
1415 strlist__add(vl->vars, buf);
1416 }
1417 }
1418
1419 if (af->child && dwarf_haspc(die_mem, af->pf.addr))
1420 return DIE_FIND_CB_CONTINUE;
1421 else
1422 return DIE_FIND_CB_SIBLING;
1423}
1424
1425/* Add a found vars into available variables list */
1426static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf)
1427{
1428 struct available_var_finder *af =
1429 container_of(pf, struct available_var_finder, pf);
1430 struct variable_list *vl;
1431 Dwarf_Die die_mem, *scopes = NULL;
1432 int ret, nscopes;
1433
1434 /* Check number of tevs */
1435 if (af->nvls == af->max_vls) {
1436 pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
1437 return -ERANGE;
1438 }
1439 vl = &af->vls[af->nvls++];
1440
1441 ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe,
1442 &vl->point);
1443 if (ret < 0)
1444 return ret;
1445
1446 pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
1447 vl->point.offset);
1448
1449 /* Find local variables */
1450 vl->vars = strlist__new(true, NULL);
1451 if (vl->vars == NULL)
1452 return -ENOMEM;
1453 af->child = true;
1454 die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem);
1455
1456 /* Find external variables */
1457 if (!af->externs)
1458 goto out;
1459 /* Don't need to search child DIE for externs. */
1460 af->child = false;
1461 nscopes = dwarf_getscopes_die(sp_die, &scopes);
1462 while (nscopes-- > 1)
1463 die_find_child(&scopes[nscopes], collect_variables_cb,
1464 (void *)af, &die_mem);
1465 if (scopes)
1466 free(scopes);
1467
1468out:
1469 if (strlist__empty(vl->vars)) {
1470 strlist__delete(vl->vars);
1471 vl->vars = NULL;
1472 }
1473
1474 return ret;
1475}
1476
1477/* Find available variables at given probe point */
1478int find_available_vars_at(int fd, struct perf_probe_event *pev,
1479 struct variable_list **vls, int max_vls,
1480 bool externs)
1481{
1482 struct available_var_finder af = {
1483 .pf = {.pev = pev, .callback = add_available_vars},
1484 .max_vls = max_vls, .externs = externs};
1485 int ret;
1486
1487 /* Allocate result vls array */
1488 *vls = zalloc(sizeof(struct variable_list) * max_vls);
1489 if (*vls == NULL)
1490 return -ENOMEM;
1491
1492 af.vls = *vls;
1493 af.nvls = 0;
1494
1495 ret = find_probes(fd, &af.pf);
1496 if (ret < 0) {
1497 /* Free vlist for error */
1498 while (af.nvls--) {
1499 if (af.vls[af.nvls].point.symbol)
1500 free(af.vls[af.nvls].point.symbol);
1501 if (af.vls[af.nvls].vars)
1502 strlist__delete(af.vls[af.nvls].vars);
1503 }
1504 free(af.vls);
1505 *vls = NULL;
1506 return ret;
1507 }
1508
1509 return (ret < 0) ? ret : af.nvls;
1146} 1510}
1147 1511
1148/* Reverse search */ 1512/* Reverse search */
1149int find_perf_probe_point(int fd, unsigned long addr, 1513int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1150 struct perf_probe_point *ppt)
1151{ 1514{
1152 Dwarf_Die cudie, spdie, indie; 1515 Dwarf_Die cudie, spdie, indie;
1153 Dwarf *dbg; 1516 Dwarf *dbg = NULL;
1517 Dwfl *dwfl = NULL;
1154 Dwarf_Line *line; 1518 Dwarf_Line *line;
1155 Dwarf_Addr laddr, eaddr; 1519 Dwarf_Addr laddr, eaddr, bias = 0;
1156 const char *tmp; 1520 const char *tmp;
1157 int lineno, ret = 0; 1521 int lineno, ret = 0;
1158 bool found = false; 1522 bool found = false;
1159 1523
1160 dbg = dwarf_begin(fd, DWARF_C_READ); 1524 /* Open the live linux kernel */
1161 if (!dbg) 1525 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
1162 return -EBADF; 1526 if (!dbg) {
1527 pr_warning("No dwarf info found in the vmlinux - "
1528 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1529 ret = -EINVAL;
1530 goto end;
1531 }
1163 1532
1533 /* Adjust address with bias */
1534 addr += bias;
1164 /* Find cu die */ 1535 /* Find cu die */
1165 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) { 1536 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) {
1537 pr_warning("No CU DIE is found at %lx\n", addr);
1166 ret = -EINVAL; 1538 ret = -EINVAL;
1167 goto end; 1539 goto end;
1168 } 1540 }
@@ -1225,7 +1597,8 @@ found:
1225 } 1597 }
1226 1598
1227end: 1599end:
1228 dwarf_end(dbg); 1600 if (dwfl)
1601 dwfl_end(dwfl);
1229 if (ret >= 0) 1602 if (ret >= 0)
1230 ret = found ? 1 : 0; 1603 ret = found ? 1 : 0;
1231 return ret; 1604 return ret;
@@ -1358,6 +1731,9 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1358 struct line_finder *lf = param->data; 1731 struct line_finder *lf = param->data;
1359 struct line_range *lr = lf->lr; 1732 struct line_range *lr = lf->lr;
1360 1733
1734 pr_debug("find (%llx) %s\n",
1735 (unsigned long long)dwarf_dieoffset(sp_die),
1736 dwarf_diename(sp_die));
1361 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1737 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
1362 die_compare_name(sp_die, lr->function)) { 1738 die_compare_name(sp_die, lr->function)) {
1363 lf->fname = dwarf_decl_file(sp_die); 1739 lf->fname = dwarf_decl_file(sp_die);
@@ -1401,10 +1777,12 @@ int find_line_range(int fd, struct line_range *lr)
1401 Dwarf_Off off = 0, noff; 1777 Dwarf_Off off = 0, noff;
1402 size_t cuhl; 1778 size_t cuhl;
1403 Dwarf_Die *diep; 1779 Dwarf_Die *diep;
1404 Dwarf *dbg; 1780 Dwarf *dbg = NULL;
1781 Dwfl *dwfl;
1782 Dwarf_Addr bias; /* Currently ignored */
1405 const char *comp_dir; 1783 const char *comp_dir;
1406 1784
1407 dbg = dwarf_begin(fd, DWARF_C_READ); 1785 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1408 if (!dbg) { 1786 if (!dbg) {
1409 pr_warning("No dwarf info found in the vmlinux - " 1787 pr_warning("No dwarf info found in the vmlinux - "
1410 "please rebuild with CONFIG_DEBUG_INFO=y.\n"); 1788 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
@@ -1450,8 +1828,7 @@ int find_line_range(int fd, struct line_range *lr)
1450 } 1828 }
1451 1829
1452 pr_debug("path: %s\n", lr->path); 1830 pr_debug("path: %s\n", lr->path);
1453 dwarf_end(dbg); 1831 dwfl_end(dwfl);
1454
1455 return (ret < 0) ? ret : lf.found; 1832 return (ret < 0) ? ret : lf.found;
1456} 1833}
1457 1834
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 4507d519f183..bba69d455699 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -22,20 +22,27 @@ extern int find_probe_trace_events(int fd, struct perf_probe_event *pev,
22 int max_tevs); 22 int max_tevs);
23 23
24/* Find a perf_probe_point from debuginfo */ 24/* Find a perf_probe_point from debuginfo */
25extern int find_perf_probe_point(int fd, unsigned long addr, 25extern int find_perf_probe_point(unsigned long addr,
26 struct perf_probe_point *ppt); 26 struct perf_probe_point *ppt);
27 27
28/* Find a line range */
28extern int find_line_range(int fd, struct line_range *lr); 29extern int find_line_range(int fd, struct line_range *lr);
29 30
31/* Find available variables */
32extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
33 struct variable_list **vls, int max_points,
34 bool externs);
35
30#include <dwarf.h> 36#include <dwarf.h>
31#include <libdw.h> 37#include <libdw.h>
38#include <libdwfl.h>
32#include <version.h> 39#include <version.h>
33 40
34struct probe_finder { 41struct probe_finder {
35 struct perf_probe_event *pev; /* Target probe event */ 42 struct perf_probe_event *pev; /* Target probe event */
36 struct probe_trace_event *tevs; /* Result trace events */ 43
37 int ntevs; /* Number of trace events */ 44 /* Callback when a probe point is found */
38 int max_tevs; /* Max number of trace events */ 45 int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf);
39 46
40 /* For function searching */ 47 /* For function searching */
41 int lno; /* Line number */ 48 int lno; /* Line number */
@@ -53,6 +60,22 @@ struct probe_finder {
53 struct probe_trace_arg *tvar; /* Current result variable */ 60 struct probe_trace_arg *tvar; /* Current result variable */
54}; 61};
55 62
63struct trace_event_finder {
64 struct probe_finder pf;
65 struct probe_trace_event *tevs; /* Found trace events */
66 int ntevs; /* Number of trace events */
67 int max_tevs; /* Max number of trace events */
68};
69
70struct available_var_finder {
71 struct probe_finder pf;
72 struct variable_list *vls; /* Found variable lists */
73 int nvls; /* Number of variable lists */
74 int max_vls; /* Max no. of variable lists */
75 bool externs; /* Find external vars too */
76 bool child; /* Search child scopes */
77};
78
56struct line_finder { 79struct line_finder {
57 struct line_range *lr; /* Target line range */ 80 struct line_range *lr; /* Target line range */
58 81
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 6d0df809a2ed..8bc010edca25 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,4 +1,3 @@
1#include <slang.h>
2#include "libslang.h" 1#include "libslang.h"
3#include <linux/compiler.h> 2#include <linux/compiler.h>
4#include <linux/list.h> 3#include <linux/list.h>