diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-05-15 09:19:28 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-15 09:26:56 -0400 |
commit | 60db5e09c13109b13830cc9dcae688003fd39e79 (patch) | |
tree | ac923b89c28d735d2460216202d960e9c6237be0 | |
parent | 789f90fcf6b0b54e655740e9396c954378542c79 (diff) |
perf_counter: frequency based adaptive irq_period
Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.
[ Impact: new perf-counter attribute/feature ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/powerpc/kernel/perf_counter.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 9 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 10 | ||||
-rw-r--r-- | kernel/perf_counter.c | 63 |
4 files changed, 68 insertions, 27 deletions
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb1b463c1361..db8d5cafc159 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -534,7 +534,7 @@ void hw_perf_enable(void) | |||
534 | continue; | 534 | continue; |
535 | } | 535 | } |
536 | val = 0; | 536 | val = 0; |
537 | if (counter->hw_event.irq_period) { | 537 | if (counter->hw.irq_period) { |
538 | left = atomic64_read(&counter->hw.period_left); | 538 | left = atomic64_read(&counter->hw.period_left); |
539 | if (left < 0x80000000L) | 539 | if (left < 0x80000000L) |
540 | val = 0x80000000L - left; | 540 | val = 0x80000000L - left; |
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
829 | 829 | ||
830 | if (!ppmu) | 830 | if (!ppmu) |
831 | return ERR_PTR(-ENXIO); | 831 | return ERR_PTR(-ENXIO); |
832 | if ((s64)counter->hw_event.irq_period < 0) | ||
833 | return ERR_PTR(-EINVAL); | ||
834 | if (!perf_event_raw(&counter->hw_event)) { | 832 | if (!perf_event_raw(&counter->hw_event)) { |
835 | ev = perf_event_id(&counter->hw_event); | 833 | ev = perf_event_id(&counter->hw_event); |
836 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | 834 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) |
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
901 | 899 | ||
902 | counter->hw.config = events[n]; | 900 | counter->hw.config = events[n]; |
903 | counter->hw.counter_base = cflags[n]; | 901 | counter->hw.counter_base = cflags[n]; |
904 | atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); | 902 | atomic64_set(&counter->hw.period_left, counter->hw.irq_period); |
905 | 903 | ||
906 | /* | 904 | /* |
907 | * See if we need to reserve the PMU. | 905 | * See if we need to reserve the PMU. |
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
934 | static void record_and_restart(struct perf_counter *counter, long val, | 932 | static void record_and_restart(struct perf_counter *counter, long val, |
935 | struct pt_regs *regs, int nmi) | 933 | struct pt_regs *regs, int nmi) |
936 | { | 934 | { |
935 | u64 period = counter->hw.irq_period; | ||
937 | s64 prev, delta, left; | 936 | s64 prev, delta, left; |
938 | int record = 0; | 937 | int record = 0; |
939 | 938 | ||
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val, | |||
948 | */ | 947 | */ |
949 | val = 0; | 948 | val = 0; |
950 | left = atomic64_read(&counter->hw.period_left) - delta; | 949 | left = atomic64_read(&counter->hw.period_left) - delta; |
951 | if (counter->hw_event.irq_period) { | 950 | if (period) { |
952 | if (left <= 0) { | 951 | if (left <= 0) { |
953 | left += counter->hw_event.irq_period; | 952 | left += period; |
954 | if (left <= 0) | 953 | if (left <= 0) |
955 | left = counter->hw_event.irq_period; | 954 | left = period; |
956 | record = 1; | 955 | record = 1; |
957 | } | 956 | } |
958 | if (left < 0x80000000L) | 957 | if (left < 0x80000000L) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 5a7f718eb1e1..886dcf334bc3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
286 | hwc->nmi = 1; | 286 | hwc->nmi = 1; |
287 | } | 287 | } |
288 | 288 | ||
289 | hwc->irq_period = hw_event->irq_period; | 289 | atomic64_set(&hwc->period_left, |
290 | if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period) | 290 | min(x86_pmu.max_period, hwc->irq_period)); |
291 | hwc->irq_period = x86_pmu.max_period; | ||
292 | |||
293 | atomic64_set(&hwc->period_left, hwc->irq_period); | ||
294 | 291 | ||
295 | /* | 292 | /* |
296 | * Raw event type provide the config in the event structure | 293 | * Raw event type provide the config in the event structure |
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
458 | struct hw_perf_counter *hwc, int idx) | 455 | struct hw_perf_counter *hwc, int idx) |
459 | { | 456 | { |
460 | s64 left = atomic64_read(&hwc->period_left); | 457 | s64 left = atomic64_read(&hwc->period_left); |
461 | s64 period = hwc->irq_period; | 458 | s64 period = min(x86_pmu.max_period, hwc->irq_period); |
462 | int err; | 459 | int err; |
463 | 460 | ||
464 | /* | 461 | /* |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e543ecc129f1..004b6e162b96 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -130,7 +130,11 @@ struct perf_counter_hw_event { | |||
130 | */ | 130 | */ |
131 | __u64 config; | 131 | __u64 config; |
132 | 132 | ||
133 | __u64 irq_period; | 133 | union { |
134 | __u64 irq_period; | ||
135 | __u64 irq_freq; | ||
136 | }; | ||
137 | |||
134 | __u32 record_type; | 138 | __u32 record_type; |
135 | __u32 read_format; | 139 | __u32 read_format; |
136 | 140 | ||
@@ -146,8 +150,9 @@ struct perf_counter_hw_event { | |||
146 | mmap : 1, /* include mmap data */ | 150 | mmap : 1, /* include mmap data */ |
147 | munmap : 1, /* include munmap data */ | 151 | munmap : 1, /* include munmap data */ |
148 | comm : 1, /* include comm data */ | 152 | comm : 1, /* include comm data */ |
153 | freq : 1, /* use freq, not period */ | ||
149 | 154 | ||
150 | __reserved_1 : 52; | 155 | __reserved_1 : 51; |
151 | 156 | ||
152 | __u32 extra_config_len; | 157 | __u32 extra_config_len; |
153 | __u32 wakeup_events; /* wakeup every n events */ | 158 | __u32 wakeup_events; /* wakeup every n events */ |
@@ -337,6 +342,7 @@ struct hw_perf_counter { | |||
337 | atomic64_t prev_count; | 342 | atomic64_t prev_count; |
338 | u64 irq_period; | 343 | u64 irq_period; |
339 | atomic64_t period_left; | 344 | atomic64_t period_left; |
345 | u64 interrupts; | ||
340 | #endif | 346 | #endif |
341 | }; | 347 | }; |
342 | 348 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 93f4a0e4b873..0ad1db4f3d65 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void) | |||
1046 | return 0; | 1046 | return 0; |
1047 | } | 1047 | } |
1048 | 1048 | ||
1049 | void perf_adjust_freq(struct perf_counter_context *ctx) | ||
1050 | { | ||
1051 | struct perf_counter *counter; | ||
1052 | u64 irq_period; | ||
1053 | u64 events, period; | ||
1054 | s64 delta; | ||
1055 | |||
1056 | spin_lock(&ctx->lock); | ||
1057 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1058 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
1059 | continue; | ||
1060 | |||
1061 | if (!counter->hw_event.freq || !counter->hw_event.irq_freq) | ||
1062 | continue; | ||
1063 | |||
1064 | events = HZ * counter->hw.interrupts * counter->hw.irq_period; | ||
1065 | period = div64_u64(events, counter->hw_event.irq_freq); | ||
1066 | |||
1067 | delta = (s64)(1 + period - counter->hw.irq_period); | ||
1068 | delta >>= 1; | ||
1069 | |||
1070 | irq_period = counter->hw.irq_period + delta; | ||
1071 | |||
1072 | if (!irq_period) | ||
1073 | irq_period = 1; | ||
1074 | |||
1075 | counter->hw.irq_period = irq_period; | ||
1076 | counter->hw.interrupts = 0; | ||
1077 | } | ||
1078 | spin_unlock(&ctx->lock); | ||
1079 | } | ||
1080 | |||
1049 | /* | 1081 | /* |
1050 | * Round-robin a context's counters: | 1082 | * Round-robin a context's counters: |
1051 | */ | 1083 | */ |
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
1081 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1113 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
1082 | ctx = &curr->perf_counter_ctx; | 1114 | ctx = &curr->perf_counter_ctx; |
1083 | 1115 | ||
1116 | perf_adjust_freq(&cpuctx->ctx); | ||
1117 | perf_adjust_freq(ctx); | ||
1118 | |||
1084 | perf_counter_cpu_sched_out(cpuctx); | 1119 | perf_counter_cpu_sched_out(cpuctx); |
1085 | __perf_counter_task_sched_out(ctx); | 1120 | __perf_counter_task_sched_out(ctx); |
1086 | 1121 | ||
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter, | |||
2382 | int events = atomic_read(&counter->event_limit); | 2417 | int events = atomic_read(&counter->event_limit); |
2383 | int ret = 0; | 2418 | int ret = 0; |
2384 | 2419 | ||
2420 | counter->hw.interrupts++; | ||
2421 | |||
2385 | /* | 2422 | /* |
2386 | * XXX event_limit might not quite work as expected on inherited | 2423 | * XXX event_limit might not quite work as expected on inherited |
2387 | * counters | 2424 | * counters |
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | |||
2450 | enum hrtimer_restart ret = HRTIMER_RESTART; | 2487 | enum hrtimer_restart ret = HRTIMER_RESTART; |
2451 | struct perf_counter *counter; | 2488 | struct perf_counter *counter; |
2452 | struct pt_regs *regs; | 2489 | struct pt_regs *regs; |
2490 | u64 period; | ||
2453 | 2491 | ||
2454 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | 2492 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); |
2455 | counter->pmu->read(counter); | 2493 | counter->pmu->read(counter); |
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | |||
2468 | ret = HRTIMER_NORESTART; | 2506 | ret = HRTIMER_NORESTART; |
2469 | } | 2507 | } |
2470 | 2508 | ||
2471 | hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); | 2509 | period = max_t(u64, 10000, counter->hw.irq_period); |
2510 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
2472 | 2511 | ||
2473 | return ret; | 2512 | return ret; |
2474 | } | 2513 | } |
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) | |||
2629 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 2668 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
2630 | hwc->hrtimer.function = perf_swcounter_hrtimer; | 2669 | hwc->hrtimer.function = perf_swcounter_hrtimer; |
2631 | if (hwc->irq_period) { | 2670 | if (hwc->irq_period) { |
2671 | u64 period = max_t(u64, 10000, hwc->irq_period); | ||
2632 | __hrtimer_start_range_ns(&hwc->hrtimer, | 2672 | __hrtimer_start_range_ns(&hwc->hrtimer, |
2633 | ns_to_ktime(hwc->irq_period), 0, | 2673 | ns_to_ktime(period), 0, |
2634 | HRTIMER_MODE_REL, 0); | 2674 | HRTIMER_MODE_REL, 0); |
2635 | } | 2675 | } |
2636 | 2676 | ||
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) | |||
2679 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 2719 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
2680 | hwc->hrtimer.function = perf_swcounter_hrtimer; | 2720 | hwc->hrtimer.function = perf_swcounter_hrtimer; |
2681 | if (hwc->irq_period) { | 2721 | if (hwc->irq_period) { |
2722 | u64 period = max_t(u64, 10000, hwc->irq_period); | ||
2682 | __hrtimer_start_range_ns(&hwc->hrtimer, | 2723 | __hrtimer_start_range_ns(&hwc->hrtimer, |
2683 | ns_to_ktime(hwc->irq_period), 0, | 2724 | ns_to_ktime(period), 0, |
2684 | HRTIMER_MODE_REL, 0); | 2725 | HRTIMER_MODE_REL, 0); |
2685 | } | 2726 | } |
2686 | 2727 | ||
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | |||
2811 | 2852 | ||
2812 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | 2853 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) |
2813 | { | 2854 | { |
2814 | struct perf_counter_hw_event *hw_event = &counter->hw_event; | ||
2815 | const struct pmu *pmu = NULL; | 2855 | const struct pmu *pmu = NULL; |
2816 | struct hw_perf_counter *hwc = &counter->hw; | ||
2817 | 2856 | ||
2818 | /* | 2857 | /* |
2819 | * Software counters (currently) can't in general distinguish | 2858 | * Software counters (currently) can't in general distinguish |
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | |||
2826 | case PERF_COUNT_CPU_CLOCK: | 2865 | case PERF_COUNT_CPU_CLOCK: |
2827 | pmu = &perf_ops_cpu_clock; | 2866 | pmu = &perf_ops_cpu_clock; |
2828 | 2867 | ||
2829 | if (hw_event->irq_period && hw_event->irq_period < 10000) | ||
2830 | hw_event->irq_period = 10000; | ||
2831 | break; | 2868 | break; |
2832 | case PERF_COUNT_TASK_CLOCK: | 2869 | case PERF_COUNT_TASK_CLOCK: |
2833 | /* | 2870 | /* |
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | |||
2839 | else | 2876 | else |
2840 | pmu = &perf_ops_cpu_clock; | 2877 | pmu = &perf_ops_cpu_clock; |
2841 | 2878 | ||
2842 | if (hw_event->irq_period && hw_event->irq_period < 10000) | ||
2843 | hw_event->irq_period = 10000; | ||
2844 | break; | 2879 | break; |
2845 | case PERF_COUNT_PAGE_FAULTS: | 2880 | case PERF_COUNT_PAGE_FAULTS: |
2846 | case PERF_COUNT_PAGE_FAULTS_MIN: | 2881 | case PERF_COUNT_PAGE_FAULTS_MIN: |
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | |||
2854 | break; | 2889 | break; |
2855 | } | 2890 | } |
2856 | 2891 | ||
2857 | if (pmu) | ||
2858 | hwc->irq_period = hw_event->irq_period; | ||
2859 | |||
2860 | return pmu; | 2892 | return pmu; |
2861 | } | 2893 | } |
2862 | 2894 | ||
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, | |||
2872 | { | 2904 | { |
2873 | const struct pmu *pmu; | 2905 | const struct pmu *pmu; |
2874 | struct perf_counter *counter; | 2906 | struct perf_counter *counter; |
2907 | struct hw_perf_counter *hwc; | ||
2875 | long err; | 2908 | long err; |
2876 | 2909 | ||
2877 | counter = kzalloc(sizeof(*counter), gfpflags); | 2910 | counter = kzalloc(sizeof(*counter), gfpflags); |
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, | |||
2907 | 2940 | ||
2908 | pmu = NULL; | 2941 | pmu = NULL; |
2909 | 2942 | ||
2943 | hwc = &counter->hw; | ||
2944 | if (hw_event->freq && hw_event->irq_freq) | ||
2945 | hwc->irq_period = TICK_NSEC / hw_event->irq_freq; | ||
2946 | else | ||
2947 | hwc->irq_period = hw_event->irq_period; | ||
2948 | |||
2910 | /* | 2949 | /* |
2911 | * we currently do not support PERF_RECORD_GROUP on inherited counters | 2950 | * we currently do not support PERF_RECORD_GROUP on inherited counters |
2912 | */ | 2951 | */ |