aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-05-25 11:39:05 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-25 15:41:12 -0400
commita78ac3258782f3e64cb40beb5990808e1febcc0c (patch)
tree692462a974e4c26bdb7fa7fae727a6b326a0eeee
parent48e22d56ecdeddd1ffb42a02fccba5c6ef42b133 (diff)
perf_counter: Generic per counter interrupt throttle
Introduce a generic per counter interrupt throttle. This uses the perf_counter_overflow() quick disable to throttle a specific counter when its going too fast when a pmu->unthrottle() method is provided which can undo the quick disable. Power needs to implement both the quick disable and the unthrottle method. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: John Kacur <jkacur@redhat.com> LKML-Reference: <20090525153931.703093461@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c13
-rw-r--r--include/linux/perf_counter.h11
-rw-r--r--kernel/perf_counter.c59
-rw-r--r--kernel/sysctl.c8
4 files changed, 87 insertions, 4 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8c8177f859fe..c4b543d1a86f 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -623,6 +623,18 @@ try_generic:
623 return 0; 623 return 0;
624} 624}
625 625
626static void x86_pmu_unthrottle(struct perf_counter *counter)
627{
628 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
629 struct hw_perf_counter *hwc = &counter->hw;
630
631 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
632 cpuc->counters[hwc->idx] != counter))
633 return;
634
635 x86_pmu.enable(hwc, hwc->idx);
636}
637
626void perf_counter_print_debug(void) 638void perf_counter_print_debug(void)
627{ 639{
628 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 640 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -1038,6 +1050,7 @@ static const struct pmu pmu = {
1038 .enable = x86_pmu_enable, 1050 .enable = x86_pmu_enable,
1039 .disable = x86_pmu_disable, 1051 .disable = x86_pmu_disable,
1040 .read = x86_pmu_read, 1052 .read = x86_pmu_read,
1053 .unthrottle = x86_pmu_unthrottle,
1041}; 1054};
1042 1055
1043const struct pmu *hw_perf_counter_init(struct perf_counter *counter) 1056const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0c160be2078f..e3a7585d3e43 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -267,6 +267,15 @@ enum perf_event_type {
267 PERF_EVENT_PERIOD = 4, 267 PERF_EVENT_PERIOD = 4,
268 268
269 /* 269 /*
270 * struct {
271 * struct perf_event_header header;
272 * u64 time;
273 * };
274 */
275 PERF_EVENT_THROTTLE = 5,
276 PERF_EVENT_UNTHROTTLE = 6,
277
278 /*
270 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field 279 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
271 * will be PERF_RECORD_* 280 * will be PERF_RECORD_*
272 * 281 *
@@ -367,6 +376,7 @@ struct pmu {
367 int (*enable) (struct perf_counter *counter); 376 int (*enable) (struct perf_counter *counter);
368 void (*disable) (struct perf_counter *counter); 377 void (*disable) (struct perf_counter *counter);
369 void (*read) (struct perf_counter *counter); 378 void (*read) (struct perf_counter *counter);
379 void (*unthrottle) (struct perf_counter *counter);
370}; 380};
371 381
372/** 382/**
@@ -613,6 +623,7 @@ extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
613 623
614extern int sysctl_perf_counter_priv; 624extern int sysctl_perf_counter_priv;
615extern int sysctl_perf_counter_mlock; 625extern int sysctl_perf_counter_mlock;
626extern int sysctl_perf_counter_limit;
616 627
617extern void perf_counter_init(void); 628extern void perf_counter_init(void);
618 629
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 14b1fe984832..ec9c4007a7f9 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,6 +46,7 @@ static atomic_t nr_comm_tracking __read_mostly;
46 46
47int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ 47int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
48int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ 48int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
49int sysctl_perf_counter_limit __read_mostly = 100000; /* max NMIs per second */
49 50
50/* 51/*
51 * Lock for (sysadmin-configurable) counter reservations: 52 * Lock for (sysadmin-configurable) counter reservations:
@@ -1066,12 +1067,15 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
1066 __perf_counter_sched_in(ctx, cpuctx, cpu); 1067 __perf_counter_sched_in(ctx, cpuctx, cpu);
1067} 1068}
1068 1069
1070#define MAX_INTERRUPTS (~0ULL)
1071
1072static void perf_log_throttle(struct perf_counter *counter, int enable);
1069static void perf_log_period(struct perf_counter *counter, u64 period); 1073static void perf_log_period(struct perf_counter *counter, u64 period);
1070 1074
1071static void perf_adjust_freq(struct perf_counter_context *ctx) 1075static void perf_adjust_freq(struct perf_counter_context *ctx)
1072{ 1076{
1073 struct perf_counter *counter; 1077 struct perf_counter *counter;
1074 u64 irq_period; 1078 u64 interrupts, irq_period;
1075 u64 events, period; 1079 u64 events, period;
1076 s64 delta; 1080 s64 delta;
1077 1081
@@ -1080,10 +1084,19 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
1080 if (counter->state != PERF_COUNTER_STATE_ACTIVE) 1084 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1081 continue; 1085 continue;
1082 1086
1087 interrupts = counter->hw.interrupts;
1088 counter->hw.interrupts = 0;
1089
1090 if (interrupts == MAX_INTERRUPTS) {
1091 perf_log_throttle(counter, 1);
1092 counter->pmu->unthrottle(counter);
1093 interrupts = 2*sysctl_perf_counter_limit/HZ;
1094 }
1095
1083 if (!counter->hw_event.freq || !counter->hw_event.irq_freq) 1096 if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
1084 continue; 1097 continue;
1085 1098
1086 events = HZ * counter->hw.interrupts * counter->hw.irq_period; 1099 events = HZ * interrupts * counter->hw.irq_period;
1087 period = div64_u64(events, counter->hw_event.irq_freq); 1100 period = div64_u64(events, counter->hw_event.irq_freq);
1088 1101
1089 delta = (s64)(1 + period - counter->hw.irq_period); 1102 delta = (s64)(1 + period - counter->hw.irq_period);
@@ -1097,7 +1110,6 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
1097 perf_log_period(counter, irq_period); 1110 perf_log_period(counter, irq_period);
1098 1111
1099 counter->hw.irq_period = irq_period; 1112 counter->hw.irq_period = irq_period;
1100 counter->hw.interrupts = 0;
1101 } 1113 }
1102 spin_unlock(&ctx->lock); 1114 spin_unlock(&ctx->lock);
1103} 1115}
@@ -2544,6 +2556,35 @@ static void perf_log_period(struct perf_counter *counter, u64 period)
2544} 2556}
2545 2557
2546/* 2558/*
2559 * IRQ throttle logging
2560 */
2561
2562static void perf_log_throttle(struct perf_counter *counter, int enable)
2563{
2564 struct perf_output_handle handle;
2565 int ret;
2566
2567 struct {
2568 struct perf_event_header header;
2569 u64 time;
2570 } throttle_event = {
2571 .header = {
2572 .type = PERF_EVENT_THROTTLE + 1,
2573 .misc = 0,
2574 .size = sizeof(throttle_event),
2575 },
2576 .time = sched_clock(),
2577 };
2578
2579 ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 0, 0);
2580 if (ret)
2581 return;
2582
2583 perf_output_put(&handle, throttle_event);
2584 perf_output_end(&handle);
2585}
2586
2587/*
2547 * Generic counter overflow handling. 2588 * Generic counter overflow handling.
2548 */ 2589 */
2549 2590
@@ -2551,9 +2592,19 @@ int perf_counter_overflow(struct perf_counter *counter,
2551 int nmi, struct pt_regs *regs, u64 addr) 2592 int nmi, struct pt_regs *regs, u64 addr)
2552{ 2593{
2553 int events = atomic_read(&counter->event_limit); 2594 int events = atomic_read(&counter->event_limit);
2595 int throttle = counter->pmu->unthrottle != NULL;
2554 int ret = 0; 2596 int ret = 0;
2555 2597
2556 counter->hw.interrupts++; 2598 if (!throttle) {
2599 counter->hw.interrupts++;
2600 } else if (counter->hw.interrupts != MAX_INTERRUPTS) {
2601 counter->hw.interrupts++;
2602 if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
2603 counter->hw.interrupts = MAX_INTERRUPTS;
2604 perf_log_throttle(counter, 0);
2605 ret = 1;
2606 }
2607 }
2557 2608
2558 /* 2609 /*
2559 * XXX event_limit might not quite work as expected on inherited 2610 * XXX event_limit might not quite work as expected on inherited
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3cb1849f5989..0c4bf863afa3 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -930,6 +930,14 @@ static struct ctl_table kern_table[] = {
930 .mode = 0644, 930 .mode = 0644,
931 .proc_handler = &proc_dointvec, 931 .proc_handler = &proc_dointvec,
932 }, 932 },
933 {
934 .ctl_name = CTL_UNNUMBERED,
935 .procname = "perf_counter_int_limit",
936 .data = &sysctl_perf_counter_limit,
937 .maxlen = sizeof(sysctl_perf_counter_limit),
938 .mode = 0644,
939 .proc_handler = &proc_dointvec,
940 },
933#endif 941#endif
934/* 942/*
935 * NOTE: do not add new entries to this table unless you have read 943 * NOTE: do not add new entries to this table unless you have read