aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-04-08 09:01:33 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-08 13:05:56 -0400
commit78f13e9525ba777da25c4ddab89f28e9366a8b7c (patch)
treed23d43df02330f39e9c31901df9956d2e58a3474
parent4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 (diff)
perf_counter: allow for data addresses to be recorded
Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/powerpc/kernel/perf_counter.c2
-rw-r--r--arch/powerpc/mm/fault.c8
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c2
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--include/linux/perf_counter.h14
-rw-r--r--kernel/perf_counter.c46
6 files changed, 49 insertions, 31 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 0697ade84dd3..c9d019f19074 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
749 * Finally record data if requested. 749 * Finally record data if requested.
750 */ 750 */
751 if (record) 751 if (record)
752 perf_counter_overflow(counter, 1, regs); 752 perf_counter_overflow(counter, 1, regs, 0);
753} 753}
754 754
755/* 755/*
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 17bbf6f91fbe..ac0e112031b2 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
171 die("Weird page fault", regs, SIGSEGV); 171 die("Weird page fault", regs, SIGSEGV);
172 } 172 }
173 173
174 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); 174 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
175 175
176 /* When running in the kernel we expect faults to occur only to 176 /* When running in the kernel we expect faults to occur only to
177 * addresses in user space. All other faults represent errors in the 177 * addresses in user space. All other faults represent errors in the
@@ -312,7 +312,8 @@ good_area:
312 } 312 }
313 if (ret & VM_FAULT_MAJOR) { 313 if (ret & VM_FAULT_MAJOR) {
314 current->maj_flt++; 314 current->maj_flt++;
315 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); 315 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
316 regs, address);
316#ifdef CONFIG_PPC_SMLPAR 317#ifdef CONFIG_PPC_SMLPAR
317 if (firmware_has_feature(FW_FEATURE_CMO)) { 318 if (firmware_has_feature(FW_FEATURE_CMO)) {
318 preempt_disable(); 319 preempt_disable();
@@ -322,7 +323,8 @@ good_area:
322#endif 323#endif
323 } else { 324 } else {
324 current->min_flt++; 325 current->min_flt++;
325 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); 326 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
327 regs, address);
326 } 328 }
327 up_read(&mm->mmap_sem); 329 up_read(&mm->mmap_sem);
328 return 0; 330 return 0;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 1116a41bc7b5..0fcbaab83f9b 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -800,7 +800,7 @@ again:
800 continue; 800 continue;
801 801
802 perf_save_and_restart(counter); 802 perf_save_and_restart(counter);
803 if (perf_counter_overflow(counter, nmi, regs)) 803 if (perf_counter_overflow(counter, nmi, regs, 0))
804 __pmc_generic_disable(counter, &counter->hw, bit); 804 __pmc_generic_disable(counter, &counter->hw, bit);
805 } 805 }
806 806
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f2d3324d9215..6f9df2babe48 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1045 if (unlikely(error_code & PF_RSVD)) 1045 if (unlikely(error_code & PF_RSVD))
1046 pgtable_bad(regs, error_code, address); 1046 pgtable_bad(regs, error_code, address);
1047 1047
1048 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); 1048 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);
1049 1049
1050 /* 1050 /*
1051 * If we're in an interrupt, have no user context or are running 1051 * If we're in an interrupt, have no user context or are running
@@ -1142,10 +1142,12 @@ good_area:
1142 1142
1143 if (fault & VM_FAULT_MAJOR) { 1143 if (fault & VM_FAULT_MAJOR) {
1144 tsk->maj_flt++; 1144 tsk->maj_flt++;
1145 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); 1145 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
1146 regs, address);
1146 } else { 1147 } else {
1147 tsk->min_flt++; 1148 tsk->min_flt++;
1148 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); 1149 perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
1150 regs, address);
1149 } 1151 }
1150 1152
1151 check_v8086_mode(regs, address, tsk); 1153 check_v8086_mode(regs, address, tsk);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 8bd1be58c938..c22363a4f746 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -101,8 +101,9 @@ enum perf_counter_record_format {
101 PERF_RECORD_IP = 1U << 0, 101 PERF_RECORD_IP = 1U << 0,
102 PERF_RECORD_TID = 1U << 1, 102 PERF_RECORD_TID = 1U << 1,
103 PERF_RECORD_TIME = 1U << 2, 103 PERF_RECORD_TIME = 1U << 2,
104 PERF_RECORD_GROUP = 1U << 3, 104 PERF_RECORD_ADDR = 1U << 3,
105 PERF_RECORD_CALLCHAIN = 1U << 4, 105 PERF_RECORD_GROUP = 1U << 4,
106 PERF_RECORD_CALLCHAIN = 1U << 5,
106}; 107};
107 108
108/* 109/*
@@ -251,6 +252,7 @@ enum perf_event_type {
251 * { u64 ip; } && PERF_RECORD_IP 252 * { u64 ip; } && PERF_RECORD_IP
252 * { u32 pid, tid; } && PERF_RECORD_TID 253 * { u32 pid, tid; } && PERF_RECORD_TID
253 * { u64 time; } && PERF_RECORD_TIME 254 * { u64 time; } && PERF_RECORD_TIME
255 * { u64 addr; } && PERF_RECORD_ADDR
254 * 256 *
255 * { u64 nr; 257 * { u64 nr;
256 * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP 258 * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP
@@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
537extern void perf_counter_update_userpage(struct perf_counter *counter); 539extern void perf_counter_update_userpage(struct perf_counter *counter);
538 540
539extern int perf_counter_overflow(struct perf_counter *counter, 541extern int perf_counter_overflow(struct perf_counter *counter,
540 int nmi, struct pt_regs *regs); 542 int nmi, struct pt_regs *regs, u64 addr);
541/* 543/*
542 * Return 1 for a software counter, 0 for a hardware counter 544 * Return 1 for a software counter, 0 for a hardware counter
543 */ 545 */
@@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter)
547 perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; 549 perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
548} 550}
549 551
550extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); 552extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
551 553
552extern void perf_counter_mmap(unsigned long addr, unsigned long len, 554extern void perf_counter_mmap(unsigned long addr, unsigned long len,
553 unsigned long pgoff, struct file *file); 555 unsigned long pgoff, struct file *file);
@@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; }
584static inline int perf_counter_task_enable(void) { return -EINVAL; } 586static inline int perf_counter_task_enable(void) { return -EINVAL; }
585 587
586static inline void 588static inline void
587perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } 589perf_swcounter_event(u32 event, u64 nr, int nmi,
588 590 struct pt_regs *regs, u64 addr) { }
589 591
590static inline void 592static inline void
591perf_counter_mmap(unsigned long addr, unsigned long len, 593perf_counter_mmap(unsigned long addr, unsigned long len,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4dc8600d2825..321c57e3556f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
800 update_context_time(ctx); 800 update_context_time(ctx);
801 801
802 regs = task_pt_regs(task); 802 regs = task_pt_regs(task);
803 perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); 803 perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
804 __perf_counter_sched_out(ctx, cpuctx); 804 __perf_counter_sched_out(ctx, cpuctx);
805 805
806 cpuctx->task_ctx = NULL; 806 cpuctx->task_ctx = NULL;
@@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle)
1810} 1810}
1811 1811
1812static void perf_counter_output(struct perf_counter *counter, 1812static void perf_counter_output(struct perf_counter *counter,
1813 int nmi, struct pt_regs *regs) 1813 int nmi, struct pt_regs *regs, u64 addr)
1814{ 1814{
1815 int ret; 1815 int ret;
1816 u64 record_type = counter->hw_event.record_type; 1816 u64 record_type = counter->hw_event.record_type;
@@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter,
1860 header.size += sizeof(u64); 1860 header.size += sizeof(u64);
1861 } 1861 }
1862 1862
1863 if (record_type & PERF_RECORD_ADDR) {
1864 header.type |= PERF_RECORD_ADDR;
1865 header.size += sizeof(u64);
1866 }
1867
1863 if (record_type & PERF_RECORD_GROUP) { 1868 if (record_type & PERF_RECORD_GROUP) {
1864 header.type |= PERF_RECORD_GROUP; 1869 header.type |= PERF_RECORD_GROUP;
1865 header.size += sizeof(u64) + 1870 header.size += sizeof(u64) +
@@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter,
1892 if (record_type & PERF_RECORD_TIME) 1897 if (record_type & PERF_RECORD_TIME)
1893 perf_output_put(&handle, time); 1898 perf_output_put(&handle, time);
1894 1899
1900 if (record_type & PERF_RECORD_ADDR)
1901 perf_output_put(&handle, addr);
1902
1895 if (record_type & PERF_RECORD_GROUP) { 1903 if (record_type & PERF_RECORD_GROUP) {
1896 struct perf_counter *leader, *sub; 1904 struct perf_counter *leader, *sub;
1897 u64 nr = counter->nr_siblings; 1905 u64 nr = counter->nr_siblings;
@@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
2158 */ 2166 */
2159 2167
2160int perf_counter_overflow(struct perf_counter *counter, 2168int perf_counter_overflow(struct perf_counter *counter,
2161 int nmi, struct pt_regs *regs) 2169 int nmi, struct pt_regs *regs, u64 addr)
2162{ 2170{
2163 int events = atomic_read(&counter->event_limit); 2171 int events = atomic_read(&counter->event_limit);
2164 int ret = 0; 2172 int ret = 0;
@@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter,
2175 perf_counter_disable(counter); 2183 perf_counter_disable(counter);
2176 } 2184 }
2177 2185
2178 perf_counter_output(counter, nmi, regs); 2186 perf_counter_output(counter, nmi, regs, addr);
2179 return ret; 2187 return ret;
2180} 2188}
2181 2189
@@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
2240 regs = task_pt_regs(current); 2248 regs = task_pt_regs(current);
2241 2249
2242 if (regs) { 2250 if (regs) {
2243 if (perf_counter_overflow(counter, 0, regs)) 2251 if (perf_counter_overflow(counter, 0, regs, 0))
2244 ret = HRTIMER_NORESTART; 2252 ret = HRTIMER_NORESTART;
2245 } 2253 }
2246 2254
@@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
2250} 2258}
2251 2259
2252static void perf_swcounter_overflow(struct perf_counter *counter, 2260static void perf_swcounter_overflow(struct perf_counter *counter,
2253 int nmi, struct pt_regs *regs) 2261 int nmi, struct pt_regs *regs, u64 addr)
2254{ 2262{
2255 perf_swcounter_update(counter); 2263 perf_swcounter_update(counter);
2256 perf_swcounter_set_period(counter); 2264 perf_swcounter_set_period(counter);
2257 if (perf_counter_overflow(counter, nmi, regs)) 2265 if (perf_counter_overflow(counter, nmi, regs, addr))
2258 /* soft-disable the counter */ 2266 /* soft-disable the counter */
2259 ; 2267 ;
2260 2268
@@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter,
2286} 2294}
2287 2295
2288static void perf_swcounter_add(struct perf_counter *counter, u64 nr, 2296static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
2289 int nmi, struct pt_regs *regs) 2297 int nmi, struct pt_regs *regs, u64 addr)
2290{ 2298{
2291 int neg = atomic64_add_negative(nr, &counter->hw.count); 2299 int neg = atomic64_add_negative(nr, &counter->hw.count);
2292 if (counter->hw.irq_period && !neg) 2300 if (counter->hw.irq_period && !neg)
2293 perf_swcounter_overflow(counter, nmi, regs); 2301 perf_swcounter_overflow(counter, nmi, regs, addr);
2294} 2302}
2295 2303
2296static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 2304static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
2297 enum perf_event_types type, u32 event, 2305 enum perf_event_types type, u32 event,
2298 u64 nr, int nmi, struct pt_regs *regs) 2306 u64 nr, int nmi, struct pt_regs *regs,
2307 u64 addr)
2299{ 2308{
2300 struct perf_counter *counter; 2309 struct perf_counter *counter;
2301 2310
@@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
2305 rcu_read_lock(); 2314 rcu_read_lock();
2306 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 2315 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
2307 if (perf_swcounter_match(counter, type, event, regs)) 2316 if (perf_swcounter_match(counter, type, event, regs))
2308 perf_swcounter_add(counter, nr, nmi, regs); 2317 perf_swcounter_add(counter, nr, nmi, regs, addr);
2309 } 2318 }
2310 rcu_read_unlock(); 2319 rcu_read_unlock();
2311} 2320}
@@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
2325} 2334}
2326 2335
2327static void __perf_swcounter_event(enum perf_event_types type, u32 event, 2336static void __perf_swcounter_event(enum perf_event_types type, u32 event,
2328 u64 nr, int nmi, struct pt_regs *regs) 2337 u64 nr, int nmi, struct pt_regs *regs,
2338 u64 addr)
2329{ 2339{
2330 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); 2340 struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
2331 int *recursion = perf_swcounter_recursion_context(cpuctx); 2341 int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
2336 (*recursion)++; 2346 (*recursion)++;
2337 barrier(); 2347 barrier();
2338 2348
2339 perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); 2349 perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
2350 nr, nmi, regs, addr);
2340 if (cpuctx->task_ctx) { 2351 if (cpuctx->task_ctx) {
2341 perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, 2352 perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
2342 nr, nmi, regs); 2353 nr, nmi, regs, addr);
2343 } 2354 }
2344 2355
2345 barrier(); 2356 barrier();
@@ -2349,9 +2360,10 @@ out:
2349 put_cpu_var(perf_cpu_context); 2360 put_cpu_var(perf_cpu_context);
2350} 2361}
2351 2362
2352void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) 2363void
2364perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
2353{ 2365{
2354 __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs); 2366 __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
2355} 2367}
2356 2368
2357static void perf_swcounter_read(struct perf_counter *counter) 2369static void perf_swcounter_read(struct perf_counter *counter)
@@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id)
2548 if (!regs) 2560 if (!regs)
2549 regs = task_pt_regs(current); 2561 regs = task_pt_regs(current);
2550 2562
2551 __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs); 2563 __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
2552} 2564}
2553 2565
2554extern int ftrace_profile_enable(int); 2566extern int ftrace_profile_enable(int);