aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-06-10 07:40:57 -0400
committerIngo Molnar <mingo@elte.hu>2009-06-10 10:55:26 -0400
commitbd2b5b12849a3446abad0b25e920f86f5480b309 (patch)
treeb0eacf6002f2015c0483390619a3f874bcb7e7d2
parentdc81081b2d9a6a9d64dad1bef1e5fc9fb660e53e (diff)
perf_counter: More aggressive frequency adjustment
Also employ the overflow handler to adjust the frequency, this results in a stable frequency in about 40~50 samples, instead of that many ticks. This also means we can start sampling at a sample period of 1 without running head-first into the throttle. It relies on sched_clock() to accurately measure the time difference between the overflow NMIs. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c5
-rw-r--r--include/linux/perf_counter.h1
-rw-r--r--kernel/perf_counter.c130
3 files changed, 92 insertions, 44 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 49f258537cbf..240ca5630632 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -696,10 +696,11 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
696 if (!attr->exclude_kernel) 696 if (!attr->exclude_kernel)
697 hwc->config |= ARCH_PERFMON_EVENTSEL_OS; 697 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
698 698
699 if (!hwc->sample_period) 699 if (!hwc->sample_period) {
700 hwc->sample_period = x86_pmu.max_period; 700 hwc->sample_period = x86_pmu.max_period;
701 atomic64_set(&hwc->period_left, hwc->sample_period);
702 }
701 703
702 atomic64_set(&hwc->period_left, hwc->sample_period);
703 counter->destroy = hw_perf_counter_destroy; 704 counter->destroy = hw_perf_counter_destroy;
704 705
705 /* 706 /*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3586df840f69..282d8cc48980 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -371,6 +371,7 @@ struct hw_perf_counter {
371 371
372 u64 freq_count; 372 u64 freq_count;
373 u64 freq_interrupts; 373 u64 freq_interrupts;
374 u64 freq_stamp;
374#endif 375#endif
375}; 376};
376 377
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 5eacaaf3f9cd..51c571ee4d0b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1184,13 +1184,33 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
1184static void perf_log_throttle(struct perf_counter *counter, int enable); 1184static void perf_log_throttle(struct perf_counter *counter, int enable);
1185static void perf_log_period(struct perf_counter *counter, u64 period); 1185static void perf_log_period(struct perf_counter *counter, u64 period);
1186 1186
1187static void perf_adjust_freq(struct perf_counter_context *ctx) 1187static void perf_adjust_period(struct perf_counter *counter, u64 events)
1188{
1189 struct hw_perf_counter *hwc = &counter->hw;
1190 u64 period, sample_period;
1191 s64 delta;
1192
1193 events *= hwc->sample_period;
1194 period = div64_u64(events, counter->attr.sample_freq);
1195
1196 delta = (s64)(period - hwc->sample_period);
1197 delta = (delta + 7) / 8; /* low pass filter */
1198
1199 sample_period = hwc->sample_period + delta;
1200
1201 if (!sample_period)
1202 sample_period = 1;
1203
1204 perf_log_period(counter, sample_period);
1205
1206 hwc->sample_period = sample_period;
1207}
1208
1209static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
1188{ 1210{
1189 struct perf_counter *counter; 1211 struct perf_counter *counter;
1190 struct hw_perf_counter *hwc; 1212 struct hw_perf_counter *hwc;
1191 u64 interrupts, sample_period; 1213 u64 interrupts, freq;
1192 u64 events, period, freq;
1193 s64 delta;
1194 1214
1195 spin_lock(&ctx->lock); 1215 spin_lock(&ctx->lock);
1196 list_for_each_entry(counter, &ctx->counter_list, list_entry) { 1216 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
@@ -1202,6 +1222,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
1202 interrupts = hwc->interrupts; 1222 interrupts = hwc->interrupts;
1203 hwc->interrupts = 0; 1223 hwc->interrupts = 0;
1204 1224
1225 /*
1226 * unthrottle counters on the tick
1227 */
1205 if (interrupts == MAX_INTERRUPTS) { 1228 if (interrupts == MAX_INTERRUPTS) {
1206 perf_log_throttle(counter, 1); 1229 perf_log_throttle(counter, 1);
1207 counter->pmu->unthrottle(counter); 1230 counter->pmu->unthrottle(counter);
@@ -1211,6 +1234,9 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
1211 if (!counter->attr.freq || !counter->attr.sample_freq) 1234 if (!counter->attr.freq || !counter->attr.sample_freq)
1212 continue; 1235 continue;
1213 1236
1237 /*
1238 * if the specified freq < HZ then we need to skip ticks
1239 */
1214 if (counter->attr.sample_freq < HZ) { 1240 if (counter->attr.sample_freq < HZ) {
1215 freq = counter->attr.sample_freq; 1241 freq = counter->attr.sample_freq;
1216 1242
@@ -1226,20 +1252,20 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
1226 } else 1252 } else
1227 freq = HZ; 1253 freq = HZ;
1228 1254
1229 events = freq * interrupts * hwc->sample_period; 1255 perf_adjust_period(counter, freq * interrupts);
1230 period = div64_u64(events, counter->attr.sample_freq);
1231
1232 delta = (s64)(1 + period - hwc->sample_period);
1233 delta >>= 1;
1234
1235 sample_period = hwc->sample_period + delta;
1236
1237 if (!sample_period)
1238 sample_period = 1;
1239 1256
1240 perf_log_period(counter, sample_period); 1257 /*
1241 1258 * In order to avoid being stalled by an (accidental) huge
1242 hwc->sample_period = sample_period; 1259 * sample period, force reset the sample period if we didn't
1260 * get any events in this freq period.
1261 */
1262 if (!interrupts) {
1263 perf_disable();
1264 counter->pmu->disable(counter);
1265 atomic_set(&hwc->period_left, 0);
1266 counter->pmu->enable(counter);
1267 perf_enable();
1268 }
1243 } 1269 }
1244 spin_unlock(&ctx->lock); 1270 spin_unlock(&ctx->lock);
1245} 1271}
@@ -1279,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1279 cpuctx = &per_cpu(perf_cpu_context, cpu); 1305 cpuctx = &per_cpu(perf_cpu_context, cpu);
1280 ctx = curr->perf_counter_ctxp; 1306 ctx = curr->perf_counter_ctxp;
1281 1307
1282 perf_adjust_freq(&cpuctx->ctx); 1308 perf_ctx_adjust_freq(&cpuctx->ctx);
1283 if (ctx) 1309 if (ctx)
1284 perf_adjust_freq(ctx); 1310 perf_ctx_adjust_freq(ctx);
1285 1311
1286 perf_counter_cpu_sched_out(cpuctx); 1312 perf_counter_cpu_sched_out(cpuctx);
1287 if (ctx) 1313 if (ctx)
@@ -1647,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
1647 1673
1648 counter->attr.sample_freq = value; 1674 counter->attr.sample_freq = value;
1649 } else { 1675 } else {
1676 perf_log_period(counter, value);
1677
1650 counter->attr.sample_period = value; 1678 counter->attr.sample_period = value;
1651 counter->hw.sample_period = value; 1679 counter->hw.sample_period = value;
1652
1653 perf_log_period(counter, value);
1654 } 1680 }
1655unlock: 1681unlock:
1656 spin_unlock_irq(&ctx->lock); 1682 spin_unlock_irq(&ctx->lock);
@@ -2853,35 +2879,41 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
2853 * event flow. 2879 * event flow.
2854 */ 2880 */
2855 2881
2882struct freq_event {
2883 struct perf_event_header header;
2884 u64 time;
2885 u64 id;
2886 u64 period;
2887};
2888
2856static void perf_log_period(struct perf_counter *counter, u64 period) 2889static void perf_log_period(struct perf_counter *counter, u64 period)
2857{ 2890{
2858 struct perf_output_handle handle; 2891 struct perf_output_handle handle;
2892 struct freq_event event;
2859 int ret; 2893 int ret;
2860 2894
2861 struct { 2895 if (counter->hw.sample_period == period)
2862 struct perf_event_header header; 2896 return;
2863 u64 time; 2897
2864 u64 id; 2898 if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
2865 u64 period; 2899 return;
2866 } freq_event = { 2900
2901 event = (struct freq_event) {
2867 .header = { 2902 .header = {
2868 .type = PERF_EVENT_PERIOD, 2903 .type = PERF_EVENT_PERIOD,
2869 .misc = 0, 2904 .misc = 0,
2870 .size = sizeof(freq_event), 2905 .size = sizeof(event),
2871 }, 2906 },
2872 .time = sched_clock(), 2907 .time = sched_clock(),
2873 .id = counter->id, 2908 .id = counter->id,
2874 .period = period, 2909 .period = period,
2875 }; 2910 };
2876 2911
2877 if (counter->hw.sample_period == period) 2912 ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
2878 return;
2879
2880 ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
2881 if (ret) 2913 if (ret)
2882 return; 2914 return;
2883 2915
2884 perf_output_put(&handle, freq_event); 2916 perf_output_put(&handle, event);
2885 perf_output_end(&handle); 2917 perf_output_end(&handle);
2886} 2918}
2887 2919
@@ -2923,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter,
2923{ 2955{
2924 int events = atomic_read(&counter->event_limit); 2956 int events = atomic_read(&counter->event_limit);
2925 int throttle = counter->pmu->unthrottle != NULL; 2957 int throttle = counter->pmu->unthrottle != NULL;
2958 struct hw_perf_counter *hwc = &counter->hw;
2926 int ret = 0; 2959 int ret = 0;
2927 2960
2928 if (!throttle) { 2961 if (!throttle) {
2929 counter->hw.interrupts++; 2962 hwc->interrupts++;
2930 } else { 2963 } else {
2931 if (counter->hw.interrupts != MAX_INTERRUPTS) { 2964 if (hwc->interrupts != MAX_INTERRUPTS) {
2932 counter->hw.interrupts++; 2965 hwc->interrupts++;
2933 if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) { 2966 if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
2934 counter->hw.interrupts = MAX_INTERRUPTS; 2967 hwc->interrupts = MAX_INTERRUPTS;
2935 perf_log_throttle(counter, 0); 2968 perf_log_throttle(counter, 0);
2936 ret = 1; 2969 ret = 1;
2937 } 2970 }
@@ -2945,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter,
2945 } 2978 }
2946 } 2979 }
2947 2980
2981 if (counter->attr.freq) {
2982 u64 now = sched_clock();
2983 s64 delta = now - hwc->freq_stamp;
2984
2985 hwc->freq_stamp = now;
2986
2987 if (delta > 0 && delta < TICK_NSEC)
2988 perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
2989 }
2990
2948 /* 2991 /*
2949 * XXX event_limit might not quite work as expected on inherited 2992 * XXX event_limit might not quite work as expected on inherited
2950 * counters 2993 * counters
@@ -3379,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3379 return NULL; 3422 return NULL;
3380 3423
3381 counter->destroy = tp_perf_counter_destroy; 3424 counter->destroy = tp_perf_counter_destroy;
3382 counter->hw.sample_period = counter->attr.sample_period;
3383 3425
3384 return &perf_ops_generic; 3426 return &perf_ops_generic;
3385} 3427}
@@ -3483,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3483 pmu = NULL; 3525 pmu = NULL;
3484 3526
3485 hwc = &counter->hw; 3527 hwc = &counter->hw;
3528 hwc->sample_period = attr->sample_period;
3486 if (attr->freq && attr->sample_freq) 3529 if (attr->freq && attr->sample_freq)
3487 hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq); 3530 hwc->sample_period = 1;
3488 else 3531
3489 hwc->sample_period = attr->sample_period; 3532 atomic64_set(&hwc->period_left, hwc->sample_period);
3490 3533
3491 /* 3534 /*
3492 * we currently do not support PERF_SAMPLE_GROUP on inherited counters 3535 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
@@ -3687,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter,
3687 else 3730 else
3688 child_counter->state = PERF_COUNTER_STATE_OFF; 3731 child_counter->state = PERF_COUNTER_STATE_OFF;
3689 3732
3733 if (parent_counter->attr.freq)
3734 child_counter->hw.sample_period = parent_counter->hw.sample_period;
3735
3690 /* 3736 /*
3691 * Link it up in the child's context: 3737 * Link it up in the child's context:
3692 */ 3738 */