diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-06-10 07:40:57 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-10 10:55:26 -0400 |
commit | bd2b5b12849a3446abad0b25e920f86f5480b309 (patch) | |
tree | b0eacf6002f2015c0483390619a3f874bcb7e7d2 /kernel | |
parent | dc81081b2d9a6a9d64dad1bef1e5fc9fb660e53e (diff) |
perf_counter: More aggressive frequency adjustment
Also employ the overflow handler to adjust the frequency, this results
in a stable frequency in about 40~50 samples, instead of that many ticks.
This also means we can start sampling at a sample period of 1 without
running head-first into the throttle.
It relies on sched_clock() to accurately measure the time difference
between the overflow NMIs.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/perf_counter.c | 130 |
1 files changed, 88 insertions, 42 deletions
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5eacaaf3f9cd..51c571ee4d0b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | |||
1184 | static void perf_log_throttle(struct perf_counter *counter, int enable); | 1184 | static void perf_log_throttle(struct perf_counter *counter, int enable); |
1185 | static void perf_log_period(struct perf_counter *counter, u64 period); | 1185 | static void perf_log_period(struct perf_counter *counter, u64 period); |
1186 | 1186 | ||
1187 | static void perf_adjust_freq(struct perf_counter_context *ctx) | 1187 | static void perf_adjust_period(struct perf_counter *counter, u64 events) |
1188 | { | ||
1189 | struct hw_perf_counter *hwc = &counter->hw; | ||
1190 | u64 period, sample_period; | ||
1191 | s64 delta; | ||
1192 | |||
1193 | events *= hwc->sample_period; | ||
1194 | period = div64_u64(events, counter->attr.sample_freq); | ||
1195 | |||
1196 | delta = (s64)(period - hwc->sample_period); | ||
1197 | delta = (delta + 7) / 8; /* low pass filter */ | ||
1198 | |||
1199 | sample_period = hwc->sample_period + delta; | ||
1200 | |||
1201 | if (!sample_period) | ||
1202 | sample_period = 1; | ||
1203 | |||
1204 | perf_log_period(counter, sample_period); | ||
1205 | |||
1206 | hwc->sample_period = sample_period; | ||
1207 | } | ||
1208 | |||
1209 | static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) | ||
1188 | { | 1210 | { |
1189 | struct perf_counter *counter; | 1211 | struct perf_counter *counter; |
1190 | struct hw_perf_counter *hwc; | 1212 | struct hw_perf_counter *hwc; |
1191 | u64 interrupts, sample_period; | 1213 | u64 interrupts, freq; |
1192 | u64 events, period, freq; | ||
1193 | s64 delta; | ||
1194 | 1214 | ||
1195 | spin_lock(&ctx->lock); | 1215 | spin_lock(&ctx->lock); |
1196 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | 1216 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { |
@@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) | |||
1202 | interrupts = hwc->interrupts; | 1222 | interrupts = hwc->interrupts; |
1203 | hwc->interrupts = 0; | 1223 | hwc->interrupts = 0; |
1204 | 1224 | ||
1225 | /* | ||
1226 | * unthrottle counters on the tick | ||
1227 | */ | ||
1205 | if (interrupts == MAX_INTERRUPTS) { | 1228 | if (interrupts == MAX_INTERRUPTS) { |
1206 | perf_log_throttle(counter, 1); | 1229 | perf_log_throttle(counter, 1); |
1207 | counter->pmu->unthrottle(counter); | 1230 | counter->pmu->unthrottle(counter); |
@@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) | |||
1211 | if (!counter->attr.freq || !counter->attr.sample_freq) | 1234 | if (!counter->attr.freq || !counter->attr.sample_freq) |
1212 | continue; | 1235 | continue; |
1213 | 1236 | ||
1237 | /* | ||
1238 | * if the specified freq < HZ then we need to skip ticks | ||
1239 | */ | ||
1214 | if (counter->attr.sample_freq < HZ) { | 1240 | if (counter->attr.sample_freq < HZ) { |
1215 | freq = counter->attr.sample_freq; | 1241 | freq = counter->attr.sample_freq; |
1216 | 1242 | ||
@@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) | |||
1226 | } else | 1252 | } else |
1227 | freq = HZ; | 1253 | freq = HZ; |
1228 | 1254 | ||
1229 | events = freq * interrupts * hwc->sample_period; | 1255 | perf_adjust_period(counter, freq * interrupts); |
1230 | period = div64_u64(events, counter->attr.sample_freq); | ||
1231 | |||
1232 | delta = (s64)(1 + period - hwc->sample_period); | ||
1233 | delta >>= 1; | ||
1234 | |||
1235 | sample_period = hwc->sample_period + delta; | ||
1236 | |||
1237 | if (!sample_period) | ||
1238 | sample_period = 1; | ||
1239 | 1256 | ||
1240 | perf_log_period(counter, sample_period); | 1257 | /* |
1241 | 1258 | * In order to avoid being stalled by an (accidental) huge | |
1242 | hwc->sample_period = sample_period; | 1259 | * sample period, force reset the sample period if we didn't |
1260 | * get any events in this freq period. | ||
1261 | */ | ||
1262 | if (!interrupts) { | ||
1263 | perf_disable(); | ||
1264 | counter->pmu->disable(counter); | ||
1265 | atomic_set(&hwc->period_left, 0); | ||
1266 | counter->pmu->enable(counter); | ||
1267 | perf_enable(); | ||
1268 | } | ||
1243 | } | 1269 | } |
1244 | spin_unlock(&ctx->lock); | 1270 | spin_unlock(&ctx->lock); |
1245 | } | 1271 | } |
@@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) | |||
1279 | cpuctx = &per_cpu(perf_cpu_context, cpu); | 1305 | cpuctx = &per_cpu(perf_cpu_context, cpu); |
1280 | ctx = curr->perf_counter_ctxp; | 1306 | ctx = curr->perf_counter_ctxp; |
1281 | 1307 | ||
1282 | perf_adjust_freq(&cpuctx->ctx); | 1308 | perf_ctx_adjust_freq(&cpuctx->ctx); |
1283 | if (ctx) | 1309 | if (ctx) |
1284 | perf_adjust_freq(ctx); | 1310 | perf_ctx_adjust_freq(ctx); |
1285 | 1311 | ||
1286 | perf_counter_cpu_sched_out(cpuctx); | 1312 | perf_counter_cpu_sched_out(cpuctx); |
1287 | if (ctx) | 1313 | if (ctx) |
@@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) | |||
1647 | 1673 | ||
1648 | counter->attr.sample_freq = value; | 1674 | counter->attr.sample_freq = value; |
1649 | } else { | 1675 | } else { |
1676 | perf_log_period(counter, value); | ||
1677 | |||
1650 | counter->attr.sample_period = value; | 1678 | counter->attr.sample_period = value; |
1651 | counter->hw.sample_period = value; | 1679 | counter->hw.sample_period = value; |
1652 | |||
1653 | perf_log_period(counter, value); | ||
1654 | } | 1680 | } |
1655 | unlock: | 1681 | unlock: |
1656 | spin_unlock_irq(&ctx->lock); | 1682 | spin_unlock_irq(&ctx->lock); |
@@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma) | |||
2853 | * event flow. | 2879 | * event flow. |
2854 | */ | 2880 | */ |
2855 | 2881 | ||
2882 | struct freq_event { | ||
2883 | struct perf_event_header header; | ||
2884 | u64 time; | ||
2885 | u64 id; | ||
2886 | u64 period; | ||
2887 | }; | ||
2888 | |||
2856 | static void perf_log_period(struct perf_counter *counter, u64 period) | 2889 | static void perf_log_period(struct perf_counter *counter, u64 period) |
2857 | { | 2890 | { |
2858 | struct perf_output_handle handle; | 2891 | struct perf_output_handle handle; |
2892 | struct freq_event event; | ||
2859 | int ret; | 2893 | int ret; |
2860 | 2894 | ||
2861 | struct { | 2895 | if (counter->hw.sample_period == period) |
2862 | struct perf_event_header header; | 2896 | return; |
2863 | u64 time; | 2897 | |
2864 | u64 id; | 2898 | if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) |
2865 | u64 period; | 2899 | return; |
2866 | } freq_event = { | 2900 | |
2901 | event = (struct freq_event) { | ||
2867 | .header = { | 2902 | .header = { |
2868 | .type = PERF_EVENT_PERIOD, | 2903 | .type = PERF_EVENT_PERIOD, |
2869 | .misc = 0, | 2904 | .misc = 0, |
2870 | .size = sizeof(freq_event), | 2905 | .size = sizeof(event), |
2871 | }, | 2906 | }, |
2872 | .time = sched_clock(), | 2907 | .time = sched_clock(), |
2873 | .id = counter->id, | 2908 | .id = counter->id, |
2874 | .period = period, | 2909 | .period = period, |
2875 | }; | 2910 | }; |
2876 | 2911 | ||
2877 | if (counter->hw.sample_period == period) | 2912 | ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); |
2878 | return; | ||
2879 | |||
2880 | ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0); | ||
2881 | if (ret) | 2913 | if (ret) |
2882 | return; | 2914 | return; |
2883 | 2915 | ||
2884 | perf_output_put(&handle, freq_event); | 2916 | perf_output_put(&handle, event); |
2885 | perf_output_end(&handle); | 2917 | perf_output_end(&handle); |
2886 | } | 2918 | } |
2887 | 2919 | ||
@@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter, | |||
2923 | { | 2955 | { |
2924 | int events = atomic_read(&counter->event_limit); | 2956 | int events = atomic_read(&counter->event_limit); |
2925 | int throttle = counter->pmu->unthrottle != NULL; | 2957 | int throttle = counter->pmu->unthrottle != NULL; |
2958 | struct hw_perf_counter *hwc = &counter->hw; | ||
2926 | int ret = 0; | 2959 | int ret = 0; |
2927 | 2960 | ||
2928 | if (!throttle) { | 2961 | if (!throttle) { |
2929 | counter->hw.interrupts++; | 2962 | hwc->interrupts++; |
2930 | } else { | 2963 | } else { |
2931 | if (counter->hw.interrupts != MAX_INTERRUPTS) { | 2964 | if (hwc->interrupts != MAX_INTERRUPTS) { |
2932 | counter->hw.interrupts++; | 2965 | hwc->interrupts++; |
2933 | if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) { | 2966 | if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) { |
2934 | counter->hw.interrupts = MAX_INTERRUPTS; | 2967 | hwc->interrupts = MAX_INTERRUPTS; |
2935 | perf_log_throttle(counter, 0); | 2968 | perf_log_throttle(counter, 0); |
2936 | ret = 1; | 2969 | ret = 1; |
2937 | } | 2970 | } |
@@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter, | |||
2945 | } | 2978 | } |
2946 | } | 2979 | } |
2947 | 2980 | ||
2981 | if (counter->attr.freq) { | ||
2982 | u64 now = sched_clock(); | ||
2983 | s64 delta = now - hwc->freq_stamp; | ||
2984 | |||
2985 | hwc->freq_stamp = now; | ||
2986 | |||
2987 | if (delta > 0 && delta < TICK_NSEC) | ||
2988 | perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); | ||
2989 | } | ||
2990 | |||
2948 | /* | 2991 | /* |
2949 | * XXX event_limit might not quite work as expected on inherited | 2992 | * XXX event_limit might not quite work as expected on inherited |
2950 | * counters | 2993 | * counters |
@@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | |||
3379 | return NULL; | 3422 | return NULL; |
3380 | 3423 | ||
3381 | counter->destroy = tp_perf_counter_destroy; | 3424 | counter->destroy = tp_perf_counter_destroy; |
3382 | counter->hw.sample_period = counter->attr.sample_period; | ||
3383 | 3425 | ||
3384 | return &perf_ops_generic; | 3426 | return &perf_ops_generic; |
3385 | } | 3427 | } |
@@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr, | |||
3483 | pmu = NULL; | 3525 | pmu = NULL; |
3484 | 3526 | ||
3485 | hwc = &counter->hw; | 3527 | hwc = &counter->hw; |
3528 | hwc->sample_period = attr->sample_period; | ||
3486 | if (attr->freq && attr->sample_freq) | 3529 | if (attr->freq && attr->sample_freq) |
3487 | hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq); | 3530 | hwc->sample_period = 1; |
3488 | else | 3531 | |
3489 | hwc->sample_period = attr->sample_period; | 3532 | atomic64_set(&hwc->period_left, hwc->sample_period); |
3490 | 3533 | ||
3491 | /* | 3534 | /* |
3492 | * we currently do not support PERF_SAMPLE_GROUP on inherited counters | 3535 | * we currently do not support PERF_SAMPLE_GROUP on inherited counters |
@@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter, | |||
3687 | else | 3730 | else |
3688 | child_counter->state = PERF_COUNTER_STATE_OFF; | 3731 | child_counter->state = PERF_COUNTER_STATE_OFF; |
3689 | 3732 | ||
3733 | if (parent_counter->attr.freq) | ||
3734 | child_counter->hw.sample_period = parent_counter->hw.sample_period; | ||
3735 | |||
3690 | /* | 3736 | /* |
3691 | * Link it up in the child's context: | 3737 | * Link it up in the child's context: |
3692 | */ | 3738 | */ |