aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-05-15 09:19:28 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-15 09:26:56 -0400
commit60db5e09c13109b13830cc9dcae688003fd39e79 (patch)
treeac923b89c28d735d2460216202d960e9c6237be0
parent789f90fcf6b0b54e655740e9396c954378542c79 (diff)
perf_counter: frequency based adaptive irq_period
Instead of specifying the irq_period for a counter, provide a target interrupt frequency and dynamically adapt the irq_period to match this frequency. [ Impact: new perf-counter attribute/feature ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <20090515132018.646195868@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/powerpc/kernel/perf_counter.c13
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c9
-rw-r--r--include/linux/perf_counter.h10
-rw-r--r--kernel/perf_counter.c63
4 files changed, 68 insertions, 27 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb1b463c1361..db8d5cafc159 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
534 continue; 534 continue;
535 } 535 }
536 val = 0; 536 val = 0;
537 if (counter->hw_event.irq_period) { 537 if (counter->hw.irq_period) {
538 left = atomic64_read(&counter->hw.period_left); 538 left = atomic64_read(&counter->hw.period_left);
539 if (left < 0x80000000L) 539 if (left < 0x80000000L)
540 val = 0x80000000L - left; 540 val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
829 829
830 if (!ppmu) 830 if (!ppmu)
831 return ERR_PTR(-ENXIO); 831 return ERR_PTR(-ENXIO);
832 if ((s64)counter->hw_event.irq_period < 0)
833 return ERR_PTR(-EINVAL);
834 if (!perf_event_raw(&counter->hw_event)) { 832 if (!perf_event_raw(&counter->hw_event)) {
835 ev = perf_event_id(&counter->hw_event); 833 ev = perf_event_id(&counter->hw_event);
836 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) 834 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
901 899
902 counter->hw.config = events[n]; 900 counter->hw.config = events[n];
903 counter->hw.counter_base = cflags[n]; 901 counter->hw.counter_base = cflags[n];
904 atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); 902 atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
905 903
906 /* 904 /*
907 * See if we need to reserve the PMU. 905 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
934static void record_and_restart(struct perf_counter *counter, long val, 932static void record_and_restart(struct perf_counter *counter, long val,
935 struct pt_regs *regs, int nmi) 933 struct pt_regs *regs, int nmi)
936{ 934{
935 u64 period = counter->hw.irq_period;
937 s64 prev, delta, left; 936 s64 prev, delta, left;
938 int record = 0; 937 int record = 0;
939 938
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
948 */ 947 */
949 val = 0; 948 val = 0;
950 left = atomic64_read(&counter->hw.period_left) - delta; 949 left = atomic64_read(&counter->hw.period_left) - delta;
951 if (counter->hw_event.irq_period) { 950 if (period) {
952 if (left <= 0) { 951 if (left <= 0) {
953 left += counter->hw_event.irq_period; 952 left += period;
954 if (left <= 0) 953 if (left <= 0)
955 left = counter->hw_event.irq_period; 954 left = period;
956 record = 1; 955 record = 1;
957 } 956 }
958 if (left < 0x80000000L) 957 if (left < 0x80000000L)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a7f718eb1e1..886dcf334bc3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
286 hwc->nmi = 1; 286 hwc->nmi = 1;
287 } 287 }
288 288
289 hwc->irq_period = hw_event->irq_period; 289 atomic64_set(&hwc->period_left,
290 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period) 290 min(x86_pmu.max_period, hwc->irq_period));
291 hwc->irq_period = x86_pmu.max_period;
292
293 atomic64_set(&hwc->period_left, hwc->irq_period);
294 291
295 /* 292 /*
296 * Raw event type provide the config in the event structure 293 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
458 struct hw_perf_counter *hwc, int idx) 455 struct hw_perf_counter *hwc, int idx)
459{ 456{
460 s64 left = atomic64_read(&hwc->period_left); 457 s64 left = atomic64_read(&hwc->period_left);
461 s64 period = hwc->irq_period; 458 s64 period = min(x86_pmu.max_period, hwc->irq_period);
462 int err; 459 int err;
463 460
464 /* 461 /*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e543ecc129f1..004b6e162b96 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
130 */ 130 */
131 __u64 config; 131 __u64 config;
132 132
133 __u64 irq_period; 133 union {
134 __u64 irq_period;
135 __u64 irq_freq;
136 };
137
134 __u32 record_type; 138 __u32 record_type;
135 __u32 read_format; 139 __u32 read_format;
136 140
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
146 mmap : 1, /* include mmap data */ 150 mmap : 1, /* include mmap data */
147 munmap : 1, /* include munmap data */ 151 munmap : 1, /* include munmap data */
148 comm : 1, /* include comm data */ 152 comm : 1, /* include comm data */
153 freq : 1, /* use freq, not period */
149 154
150 __reserved_1 : 52; 155 __reserved_1 : 51;
151 156
152 __u32 extra_config_len; 157 __u32 extra_config_len;
153 __u32 wakeup_events; /* wakeup every n events */ 158 __u32 wakeup_events; /* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
337 atomic64_t prev_count; 342 atomic64_t prev_count;
338 u64 irq_period; 343 u64 irq_period;
339 atomic64_t period_left; 344 atomic64_t period_left;
345 u64 interrupts;
340#endif 346#endif
341}; 347};
342 348
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 93f4a0e4b873..0ad1db4f3d65 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
1046 return 0; 1046 return 0;
1047} 1047}
1048 1048
1049void perf_adjust_freq(struct perf_counter_context *ctx)
1050{
1051 struct perf_counter *counter;
1052 u64 irq_period;
1053 u64 events, period;
1054 s64 delta;
1055
1056 spin_lock(&ctx->lock);
1057 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1058 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1059 continue;
1060
1061 if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
1062 continue;
1063
1064 events = HZ * counter->hw.interrupts * counter->hw.irq_period;
1065 period = div64_u64(events, counter->hw_event.irq_freq);
1066
1067 delta = (s64)(1 + period - counter->hw.irq_period);
1068 delta >>= 1;
1069
1070 irq_period = counter->hw.irq_period + delta;
1071
1072 if (!irq_period)
1073 irq_period = 1;
1074
1075 counter->hw.irq_period = irq_period;
1076 counter->hw.interrupts = 0;
1077 }
1078 spin_unlock(&ctx->lock);
1079}
1080
1049/* 1081/*
1050 * Round-robin a context's counters: 1082 * Round-robin a context's counters:
1051 */ 1083 */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1081 cpuctx = &per_cpu(perf_cpu_context, cpu); 1113 cpuctx = &per_cpu(perf_cpu_context, cpu);
1082 ctx = &curr->perf_counter_ctx; 1114 ctx = &curr->perf_counter_ctx;
1083 1115
1116 perf_adjust_freq(&cpuctx->ctx);
1117 perf_adjust_freq(ctx);
1118
1084 perf_counter_cpu_sched_out(cpuctx); 1119 perf_counter_cpu_sched_out(cpuctx);
1085 __perf_counter_task_sched_out(ctx); 1120 __perf_counter_task_sched_out(ctx);
1086 1121
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
2382 int events = atomic_read(&counter->event_limit); 2417 int events = atomic_read(&counter->event_limit);
2383 int ret = 0; 2418 int ret = 0;
2384 2419
2420 counter->hw.interrupts++;
2421
2385 /* 2422 /*
2386 * XXX event_limit might not quite work as expected on inherited 2423 * XXX event_limit might not quite work as expected on inherited
2387 * counters 2424 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
2450 enum hrtimer_restart ret = HRTIMER_RESTART; 2487 enum hrtimer_restart ret = HRTIMER_RESTART;
2451 struct perf_counter *counter; 2488 struct perf_counter *counter;
2452 struct pt_regs *regs; 2489 struct pt_regs *regs;
2490 u64 period;
2453 2491
2454 counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); 2492 counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
2455 counter->pmu->read(counter); 2493 counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
2468 ret = HRTIMER_NORESTART; 2506 ret = HRTIMER_NORESTART;
2469 } 2507 }
2470 2508
2471 hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); 2509 period = max_t(u64, 10000, counter->hw.irq_period);
2510 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
2472 2511
2473 return ret; 2512 return ret;
2474} 2513}
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
2629 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2668 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2630 hwc->hrtimer.function = perf_swcounter_hrtimer; 2669 hwc->hrtimer.function = perf_swcounter_hrtimer;
2631 if (hwc->irq_period) { 2670 if (hwc->irq_period) {
2671 u64 period = max_t(u64, 10000, hwc->irq_period);
2632 __hrtimer_start_range_ns(&hwc->hrtimer, 2672 __hrtimer_start_range_ns(&hwc->hrtimer,
2633 ns_to_ktime(hwc->irq_period), 0, 2673 ns_to_ktime(period), 0,
2634 HRTIMER_MODE_REL, 0); 2674 HRTIMER_MODE_REL, 0);
2635 } 2675 }
2636 2676
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
2679 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2719 hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2680 hwc->hrtimer.function = perf_swcounter_hrtimer; 2720 hwc->hrtimer.function = perf_swcounter_hrtimer;
2681 if (hwc->irq_period) { 2721 if (hwc->irq_period) {
2722 u64 period = max_t(u64, 10000, hwc->irq_period);
2682 __hrtimer_start_range_ns(&hwc->hrtimer, 2723 __hrtimer_start_range_ns(&hwc->hrtimer,
2683 ns_to_ktime(hwc->irq_period), 0, 2724 ns_to_ktime(period), 0,
2684 HRTIMER_MODE_REL, 0); 2725 HRTIMER_MODE_REL, 0);
2685 } 2726 }
2686 2727
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
2811 2852
2812static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) 2853static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
2813{ 2854{
2814 struct perf_counter_hw_event *hw_event = &counter->hw_event;
2815 const struct pmu *pmu = NULL; 2855 const struct pmu *pmu = NULL;
2816 struct hw_perf_counter *hwc = &counter->hw;
2817 2856
2818 /* 2857 /*
2819 * Software counters (currently) can't in general distinguish 2858 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
2826 case PERF_COUNT_CPU_CLOCK: 2865 case PERF_COUNT_CPU_CLOCK:
2827 pmu = &perf_ops_cpu_clock; 2866 pmu = &perf_ops_cpu_clock;
2828 2867
2829 if (hw_event->irq_period && hw_event->irq_period < 10000)
2830 hw_event->irq_period = 10000;
2831 break; 2868 break;
2832 case PERF_COUNT_TASK_CLOCK: 2869 case PERF_COUNT_TASK_CLOCK:
2833 /* 2870 /*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
2839 else 2876 else
2840 pmu = &perf_ops_cpu_clock; 2877 pmu = &perf_ops_cpu_clock;
2841 2878
2842 if (hw_event->irq_period && hw_event->irq_period < 10000)
2843 hw_event->irq_period = 10000;
2844 break; 2879 break;
2845 case PERF_COUNT_PAGE_FAULTS: 2880 case PERF_COUNT_PAGE_FAULTS:
2846 case PERF_COUNT_PAGE_FAULTS_MIN: 2881 case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
2854 break; 2889 break;
2855 } 2890 }
2856 2891
2857 if (pmu)
2858 hwc->irq_period = hw_event->irq_period;
2859
2860 return pmu; 2892 return pmu;
2861} 2893}
2862 2894
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
2872{ 2904{
2873 const struct pmu *pmu; 2905 const struct pmu *pmu;
2874 struct perf_counter *counter; 2906 struct perf_counter *counter;
2907 struct hw_perf_counter *hwc;
2875 long err; 2908 long err;
2876 2909
2877 counter = kzalloc(sizeof(*counter), gfpflags); 2910 counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
2907 2940
2908 pmu = NULL; 2941 pmu = NULL;
2909 2942
2943 hwc = &counter->hw;
2944 if (hw_event->freq && hw_event->irq_freq)
2945 hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
2946 else
2947 hwc->irq_period = hw_event->irq_period;
2948
2910 /* 2949 /*
2911 * we currently do not support PERF_RECORD_GROUP on inherited counters 2950 * we currently do not support PERF_RECORD_GROUP on inherited counters
2912 */ 2951 */