aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorStephane Eranian <eranian@google.com>2013-04-03 08:21:33 -0400
committerIngo Molnar <mingo@kernel.org>2013-05-28 03:07:10 -0400
commit9e6302056f8029f438e853432a856b9f13de26a6 (patch)
tree2c777b710fb4624fda55be17306d6f107b72428c /kernel/events
parentab573844e3058eef2788803d373019f8bebead57 (diff)
perf: Use hrtimers for event multiplexing
The current scheme of using the timer tick was fine for per-thread events. However, it was causing bias issues in system-wide mode (including for uncore PMUs). Event groups would not get their fair share of runtime on the PMU. With tickless kernels, if a core is idle there is no timer tick, and thus no event rotation (multiplexing). However, there are events (especially uncore events) which do count even though cores are asleep. This patch changes the timer source for multiplexing. It introduces a per-PMU per-cpu hrtimer. The advantage is that even when a core goes idle, it will come back to service the hrtimer, thus multiplexing on system-wide events works much better. The per-PMU implementation (suggested by PeterZ) enables adjusting the multiplexing interval per PMU. The preferred interval is stashed into the struct pmu. If not set, it will be forced to the default interval value. In order to minimize the impact of the hrtimer, it is turned on and off on demand. When the PMU on a CPU is overcommited, the hrtimer is activated. It is stopped when the PMU is not overcommitted. In order for this to work properly, we had to change the order of initialization in start_kernel() such that hrtimer_init() is run before perf_event_init(). The default interval in milliseconds is set to a timer tick just like with the old code. We will provide a sysctl to tune this in another patch. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Link: http://lkml.kernel.org/r/1364991694-5876-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c114
1 files changed, 106 insertions, 8 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e0dcced282e4..97bfac7e6f45 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -170,6 +170,8 @@ int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
170static int max_samples_per_tick __read_mostly = 170static int max_samples_per_tick __read_mostly =
171 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ); 171 DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
172 172
173static int perf_rotate_context(struct perf_cpu_context *cpuctx);
174
173int perf_proc_update_handler(struct ctl_table *table, int write, 175int perf_proc_update_handler(struct ctl_table *table, int write,
174 void __user *buffer, size_t *lenp, 176 void __user *buffer, size_t *lenp,
175 loff_t *ppos) 177 loff_t *ppos)
@@ -658,6 +660,98 @@ perf_cgroup_mark_enabled(struct perf_event *event,
658} 660}
659#endif 661#endif
660 662
663/*
664 * set default to be dependent on timer tick just
665 * like original code
666 */
667#define PERF_CPU_HRTIMER (1000 / HZ)
668/*
669 * function must be called with interrupts disbled
670 */
671static enum hrtimer_restart perf_cpu_hrtimer_handler(struct hrtimer *hr)
672{
673 struct perf_cpu_context *cpuctx;
674 enum hrtimer_restart ret = HRTIMER_NORESTART;
675 int rotations = 0;
676
677 WARN_ON(!irqs_disabled());
678
679 cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);
680
681 rotations = perf_rotate_context(cpuctx);
682
683 /*
684 * arm timer if needed
685 */
686 if (rotations) {
687 hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
688 ret = HRTIMER_RESTART;
689 }
690
691 return ret;
692}
693
694/* CPU is going down */
695void perf_cpu_hrtimer_cancel(int cpu)
696{
697 struct perf_cpu_context *cpuctx;
698 struct pmu *pmu;
699 unsigned long flags;
700
701 if (WARN_ON(cpu != smp_processor_id()))
702 return;
703
704 local_irq_save(flags);
705
706 rcu_read_lock();
707
708 list_for_each_entry_rcu(pmu, &pmus, entry) {
709 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
710
711 if (pmu->task_ctx_nr == perf_sw_context)
712 continue;
713
714 hrtimer_cancel(&cpuctx->hrtimer);
715 }
716
717 rcu_read_unlock();
718
719 local_irq_restore(flags);
720}
721
722static void __perf_cpu_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
723{
724 struct hrtimer *hr = &cpuctx->hrtimer;
725 struct pmu *pmu = cpuctx->ctx.pmu;
726
727 /* no multiplexing needed for SW PMU */
728 if (pmu->task_ctx_nr == perf_sw_context)
729 return;
730
731 cpuctx->hrtimer_interval =
732 ns_to_ktime(NSEC_PER_MSEC * PERF_CPU_HRTIMER);
733
734 hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
735 hr->function = perf_cpu_hrtimer_handler;
736}
737
738static void perf_cpu_hrtimer_restart(struct perf_cpu_context *cpuctx)
739{
740 struct hrtimer *hr = &cpuctx->hrtimer;
741 struct pmu *pmu = cpuctx->ctx.pmu;
742
743 /* not for SW PMU */
744 if (pmu->task_ctx_nr == perf_sw_context)
745 return;
746
747 if (hrtimer_active(hr))
748 return;
749
750 if (!hrtimer_callback_running(hr))
751 __hrtimer_start_range_ns(hr, cpuctx->hrtimer_interval,
752 0, HRTIMER_MODE_REL_PINNED, 0);
753}
754
661void perf_pmu_disable(struct pmu *pmu) 755void perf_pmu_disable(struct pmu *pmu)
662{ 756{
663 int *count = this_cpu_ptr(pmu->pmu_disable_count); 757 int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -1506,6 +1600,7 @@ group_sched_in(struct perf_event *group_event,
1506 1600
1507 if (event_sched_in(group_event, cpuctx, ctx)) { 1601 if (event_sched_in(group_event, cpuctx, ctx)) {
1508 pmu->cancel_txn(pmu); 1602 pmu->cancel_txn(pmu);
1603 perf_cpu_hrtimer_restart(cpuctx);
1509 return -EAGAIN; 1604 return -EAGAIN;
1510 } 1605 }
1511 1606
@@ -1552,6 +1647,8 @@ group_error:
1552 1647
1553 pmu->cancel_txn(pmu); 1648 pmu->cancel_txn(pmu);
1554 1649
1650 perf_cpu_hrtimer_restart(cpuctx);
1651
1555 return -EAGAIN; 1652 return -EAGAIN;
1556} 1653}
1557 1654
@@ -1807,8 +1904,10 @@ static int __perf_event_enable(void *info)
1807 * If this event can't go on and it's part of a 1904 * If this event can't go on and it's part of a
1808 * group, then the whole group has to come off. 1905 * group, then the whole group has to come off.
1809 */ 1906 */
1810 if (leader != event) 1907 if (leader != event) {
1811 group_sched_out(leader, cpuctx, ctx); 1908 group_sched_out(leader, cpuctx, ctx);
1909 perf_cpu_hrtimer_restart(cpuctx);
1910 }
1812 if (leader->attr.pinned) { 1911 if (leader->attr.pinned) {
1813 update_group_times(leader); 1912 update_group_times(leader);
1814 leader->state = PERF_EVENT_STATE_ERROR; 1913 leader->state = PERF_EVENT_STATE_ERROR;
@@ -2555,7 +2654,7 @@ static void rotate_ctx(struct perf_event_context *ctx)
2555 * because they're strictly cpu affine and rotate_start is called with IRQs 2654 * because they're strictly cpu affine and rotate_start is called with IRQs
2556 * disabled, while rotate_context is called from IRQ context. 2655 * disabled, while rotate_context is called from IRQ context.
2557 */ 2656 */
2558static void perf_rotate_context(struct perf_cpu_context *cpuctx) 2657static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2559{ 2658{
2560 struct perf_event_context *ctx = NULL; 2659 struct perf_event_context *ctx = NULL;
2561 int rotate = 0, remove = 1; 2660 int rotate = 0, remove = 1;
@@ -2594,6 +2693,8 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2594done: 2693done:
2595 if (remove) 2694 if (remove)
2596 list_del_init(&cpuctx->rotation_list); 2695 list_del_init(&cpuctx->rotation_list);
2696
2697 return rotate;
2597} 2698}
2598 2699
2599#ifdef CONFIG_NO_HZ_FULL 2700#ifdef CONFIG_NO_HZ_FULL
@@ -2625,10 +2726,6 @@ void perf_event_task_tick(void)
2625 ctx = cpuctx->task_ctx; 2726 ctx = cpuctx->task_ctx;
2626 if (ctx) 2727 if (ctx)
2627 perf_adjust_freq_unthr_context(ctx, throttled); 2728 perf_adjust_freq_unthr_context(ctx, throttled);
2628
2629 if (cpuctx->jiffies_interval == 1 ||
2630 !(jiffies % cpuctx->jiffies_interval))
2631 perf_rotate_context(cpuctx);
2632 } 2729 }
2633} 2730}
2634 2731
@@ -6001,7 +6098,9 @@ skip_type:
6001 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); 6098 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
6002 cpuctx->ctx.type = cpu_context; 6099 cpuctx->ctx.type = cpu_context;
6003 cpuctx->ctx.pmu = pmu; 6100 cpuctx->ctx.pmu = pmu;
6004 cpuctx->jiffies_interval = 1; 6101
6102 __perf_cpu_hrtimer_init(cpuctx, cpu);
6103
6005 INIT_LIST_HEAD(&cpuctx->rotation_list); 6104 INIT_LIST_HEAD(&cpuctx->rotation_list);
6006 cpuctx->unique_pmu = pmu; 6105 cpuctx->unique_pmu = pmu;
6007 } 6106 }
@@ -7387,7 +7486,6 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
7387 case CPU_DOWN_PREPARE: 7486 case CPU_DOWN_PREPARE:
7388 perf_event_exit_cpu(cpu); 7487 perf_event_exit_cpu(cpu);
7389 break; 7488 break;
7390
7391 default: 7489 default:
7392 break; 7490 break;
7393 } 7491 }