diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-05-25 11:39:05 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-25 15:41:12 -0400 |
commit | a78ac3258782f3e64cb40beb5990808e1febcc0c (patch) | |
tree | 692462a974e4c26bdb7fa7fae727a6b326a0eeee | |
parent | 48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 (diff) |
perf_counter: Generic per counter interrupt throttle
Introduce a generic per counter interrupt throttle.
This uses the perf_counter_overflow() quick disable to throttle a specific
counter when its going too fast when a pmu->unthrottle() method is provided
which can undo the quick disable.
Power needs to implement both the quick disable and the unthrottle method.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: John Kacur <jkacur@redhat.com>
LKML-Reference: <20090525153931.703093461@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 13 | ||||
-rw-r--r-- | include/linux/perf_counter.h | 11 | ||||
-rw-r--r-- | kernel/perf_counter.c | 59 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 |
4 files changed, 87 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 8c8177f859fe..c4b543d1a86f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -623,6 +623,18 @@ try_generic: | |||
623 | return 0; | 623 | return 0; |
624 | } | 624 | } |
625 | 625 | ||
626 | static void x86_pmu_unthrottle(struct perf_counter *counter) | ||
627 | { | ||
628 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
629 | struct hw_perf_counter *hwc = &counter->hw; | ||
630 | |||
631 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
632 | cpuc->counters[hwc->idx] != counter)) | ||
633 | return; | ||
634 | |||
635 | x86_pmu.enable(hwc, hwc->idx); | ||
636 | } | ||
637 | |||
626 | void perf_counter_print_debug(void) | 638 | void perf_counter_print_debug(void) |
627 | { | 639 | { |
628 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 640 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
@@ -1038,6 +1050,7 @@ static const struct pmu pmu = { | |||
1038 | .enable = x86_pmu_enable, | 1050 | .enable = x86_pmu_enable, |
1039 | .disable = x86_pmu_disable, | 1051 | .disable = x86_pmu_disable, |
1040 | .read = x86_pmu_read, | 1052 | .read = x86_pmu_read, |
1053 | .unthrottle = x86_pmu_unthrottle, | ||
1041 | }; | 1054 | }; |
1042 | 1055 | ||
1043 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | 1056 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0c160be2078f..e3a7585d3e43 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -267,6 +267,15 @@ enum perf_event_type { | |||
267 | PERF_EVENT_PERIOD = 4, | 267 | PERF_EVENT_PERIOD = 4, |
268 | 268 | ||
269 | /* | 269 | /* |
270 | * struct { | ||
271 | * struct perf_event_header header; | ||
272 | * u64 time; | ||
273 | * }; | ||
274 | */ | ||
275 | PERF_EVENT_THROTTLE = 5, | ||
276 | PERF_EVENT_UNTHROTTLE = 6, | ||
277 | |||
278 | /* | ||
270 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | 279 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field |
271 | * will be PERF_RECORD_* | 280 | * will be PERF_RECORD_* |
272 | * | 281 | * |
@@ -367,6 +376,7 @@ struct pmu { | |||
367 | int (*enable) (struct perf_counter *counter); | 376 | int (*enable) (struct perf_counter *counter); |
368 | void (*disable) (struct perf_counter *counter); | 377 | void (*disable) (struct perf_counter *counter); |
369 | void (*read) (struct perf_counter *counter); | 378 | void (*read) (struct perf_counter *counter); |
379 | void (*unthrottle) (struct perf_counter *counter); | ||
370 | }; | 380 | }; |
371 | 381 | ||
372 | /** | 382 | /** |
@@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | |||
613 | 623 | ||
614 | extern int sysctl_perf_counter_priv; | 624 | extern int sysctl_perf_counter_priv; |
615 | extern int sysctl_perf_counter_mlock; | 625 | extern int sysctl_perf_counter_mlock; |
626 | extern int sysctl_perf_counter_limit; | ||
616 | 627 | ||
617 | extern void perf_counter_init(void); | 628 | extern void perf_counter_init(void); |
618 | 629 | ||
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 14b1fe984832..ec9c4007a7f9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly; | |||
46 | 46 | ||
47 | int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ | 47 | int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ |
48 | int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ | 48 | int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ |
49 | int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */ | ||
49 | 50 | ||
50 | /* | 51 | /* |
51 | * Lock for (sysadmin-configurable) counter reservations: | 52 | * Lock for (sysadmin-configurable) counter reservations: |
@@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | |||
1066 | __perf_counter_sched_in(ctx, cpuctx, cpu); | 1067 | __perf_counter_sched_in(ctx, cpuctx, cpu); |
1067 | } | 1068 | } |
1068 | 1069 | ||
1070 | #define MAX_INTERRUPTS (~0ULL) | ||
1071 | |||
1072 | static void perf_log_throttle(struct perf_counter *counter, int enable); | ||
1069 | static void perf_log_period(struct perf_counter *counter, u64 period); | 1073 | static void perf_log_period(struct perf_counter *counter, u64 period); |
1070 | 1074 | ||
1071 | static void perf_adjust_freq(struct perf_counter_context *ctx) | 1075 | static void perf_adjust_freq(struct perf_counter_context *ctx) |
1072 | { | 1076 | { |
1073 | struct perf_counter *counter; | 1077 | struct perf_counter *counter; |
1074 | u64 irq_period; | 1078 | u64 interrupts, irq_period; |
1075 | u64 events, period; | 1079 | u64 events, period; |
1076 | s64 delta; | 1080 | s64 delta; |
1077 | 1081 | ||
@@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) | |||
1080 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | 1084 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) |
1081 | continue; | 1085 | continue; |
1082 | 1086 | ||
1087 | interrupts = counter->hw.interrupts; | ||
1088 | counter->hw.interrupts = 0; | ||
1089 | |||
1090 | if (interrupts == MAX_INTERRUPTS) { | ||
1091 | perf_log_throttle(counter, 1); | ||
1092 | counter->pmu->unthrottle(counter); | ||
1093 | interrupts = 2*sysctl_perf_counter_limit/HZ; | ||
1094 | } | ||
1095 | |||
1083 | if (!counter->hw_event.freq || !counter->hw_event.irq_freq) | 1096 | if (!counter->hw_event.freq || !counter->hw_event.irq_freq) |
1084 | continue; | 1097 | continue; |
1085 | 1098 | ||
1086 | events = HZ * counter->hw.interrupts * counter->hw.irq_period; | 1099 | events = HZ * interrupts * counter->hw.irq_period; |
1087 | period = div64_u64(events, counter->hw_event.irq_freq); | 1100 | period = div64_u64(events, counter->hw_event.irq_freq); |
1088 | 1101 | ||
1089 | delta = (s64)(1 + period - counter->hw.irq_period); | 1102 | delta = (s64)(1 + period - counter->hw.irq_period); |
@@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx) | |||
1097 | perf_log_period(counter, irq_period); | 1110 | perf_log_period(counter, irq_period); |
1098 | 1111 | ||
1099 | counter->hw.irq_period = irq_period; | 1112 | counter->hw.irq_period = irq_period; |
1100 | counter->hw.interrupts = 0; | ||
1101 | } | 1113 | } |
1102 | spin_unlock(&ctx->lock); | 1114 | spin_unlock(&ctx->lock); |
1103 | } | 1115 | } |
@@ -2544,6 +2556,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period) | |||
2544 | } | 2556 | } |
2545 | 2557 | ||
2546 | /* | 2558 | /* |
2559 | * IRQ throttle logging | ||
2560 | */ | ||
2561 | |||
2562 | static void perf_log_throttle(struct perf_counter *counter, int enable) | ||
2563 | { | ||
2564 | struct perf_output_handle handle; | ||
2565 | int ret; | ||
2566 | |||
2567 | struct { | ||
2568 | struct perf_event_header header; | ||
2569 | u64 time; | ||
2570 | } throttle_event = { | ||
2571 | .header = { | ||
2572 | .type = PERF_EVENT_THROTTLE + 1, | ||
2573 | .misc = 0, | ||
2574 | .size = sizeof(throttle_event), | ||
2575 | }, | ||
2576 | .time = sched_clock(), | ||
2577 | }; | ||
2578 | |||
2579 | ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0); | ||
2580 | if (ret) | ||
2581 | return; | ||
2582 | |||
2583 | perf_output_put(&handle, throttle_event); | ||
2584 | perf_output_end(&handle); | ||
2585 | } | ||
2586 | |||
2587 | /* | ||
2547 | * Generic counter overflow handling. | 2588 | * Generic counter overflow handling. |
2548 | */ | 2589 | */ |
2549 | 2590 | ||
@@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter, | |||
2551 | int nmi, struct pt_regs *regs, u64 addr) | 2592 | int nmi, struct pt_regs *regs, u64 addr) |
2552 | { | 2593 | { |
2553 | int events = atomic_read(&counter->event_limit); | 2594 | int events = atomic_read(&counter->event_limit); |
2595 | int throttle = counter->pmu->unthrottle != NULL; | ||
2554 | int ret = 0; | 2596 | int ret = 0; |
2555 | 2597 | ||
2556 | counter->hw.interrupts++; | 2598 | if (!throttle) { |
2599 | counter->hw.interrupts++; | ||
2600 | } else if (counter->hw.interrupts != MAX_INTERRUPTS) { | ||
2601 | counter->hw.interrupts++; | ||
2602 | if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) { | ||
2603 | counter->hw.interrupts = MAX_INTERRUPTS; | ||
2604 | perf_log_throttle(counter, 0); | ||
2605 | ret = 1; | ||
2606 | } | ||
2607 | } | ||
2557 | 2608 | ||
2558 | /* | 2609 | /* |
2559 | * XXX event_limit might not quite work as expected on inherited | 2610 | * XXX event_limit might not quite work as expected on inherited |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3cb1849f5989..0c4bf863afa3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = { | |||
930 | .mode = 0644, | 930 | .mode = 0644, |
931 | .proc_handler = &proc_dointvec, | 931 | .proc_handler = &proc_dointvec, |
932 | }, | 932 | }, |
933 | { | ||
934 | .ctl_name = CTL_UNNUMBERED, | ||
935 | .procname = "perf_counter_int_limit", | ||
936 | .data = &sysctl_perf_counter_limit, | ||
937 | .maxlen = sizeof(sysctl_perf_counter_limit), | ||
938 | .mode = 0644, | ||
939 | .proc_handler = &proc_dointvec, | ||
940 | }, | ||
933 | #endif | 941 | #endif |
934 | /* | 942 | /* |
935 | * NOTE: do not add new entries to this table unless you have read | 943 | * NOTE: do not add new entries to this table unless you have read |