aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorMark Rutland <mark.rutland@arm.com>2015-01-07 10:01:54 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-04 02:07:16 -0500
commit2fde4f94e0a9531251e706fa57131b51b0df042e (patch)
treef688f63dd3e5444cbb7dc24cbbcbfc414b550d96 /kernel/events
parentcc34b98bacb0e102fb720d95a25fed5c6090a70d (diff)
perf: Decouple unthrottling and rotating
Currently the adjusments made as part of perf_event_task_tick() use the percpu rotation lists to iterate over any active PMU contexts, but these are not used by the context rotation code, having been replaced by separate (per-context) hrtimer callbacks. However, some manipulation of the rotation lists (i.e. removal of contexts) has remained in perf_rotate_context(). This leads to the following issues: * Contexts are not always removed from the rotation lists. Removal of PMUs which have been placed in rotation lists, but have not been removed by a hrtimer callback can result in corruption of the rotation lists (when memory backing the context is freed). This has been observed to result in hangs when PMU drivers built as modules are inserted and removed around the creation of events for said PMUs. * Contexts which do not require rotation may be removed from the rotation lists as a result of a hrtimer, and will not be considered by the unthrottling code in perf_event_task_tick. This patch fixes the issue by updating the rotation ist when events are scheduled in/out, ensuring that each rotation list stays in sync with the HW state. As each event holds a refcount on the module of its PMU, this ensures that when a PMU module is unloaded none of its CPU contexts can be in a rotation list. By maintaining a list of perf_event_contexts rather than perf_event_cpu_contexts, we don't need separate paths to handle the cpu and task contexts, which also makes the code a little simpler. As the rotation_list variables are not used for rotation, these are renamed to active_ctx_list, which better matches their current function. perf_pmu_rotate_{start,stop} are renamed to perf_pmu_ctx_{activate,deactivate}. Reported-by: Johannes Jensen <johannes.jensen@arm.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Will Deacon <Will.Deacon@arm.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/20150129134511.GR17721@leverpostej Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c81
1 files changed, 29 insertions, 52 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 37cc20e8aa3b..7f2fbb8b5069 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu)
872 pmu->pmu_enable(pmu); 872 pmu->pmu_enable(pmu);
873} 873}
874 874
875static DEFINE_PER_CPU(struct list_head, rotation_list); 875static DEFINE_PER_CPU(struct list_head, active_ctx_list);
876 876
877/* 877/*
878 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized 878 * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and
879 * because they're strictly cpu affine and rotate_start is called with IRQs 879 * perf_event_task_tick() are fully serialized because they're strictly cpu
880 * disabled, while rotate_context is called from IRQ context. 880 * affine and perf_event_ctx{activate,deactivate} are called with IRQs
881 * disabled, while perf_event_task_tick is called from IRQ context.
881 */ 882 */
882static void perf_pmu_rotate_start(struct pmu *pmu) 883static void perf_event_ctx_activate(struct perf_event_context *ctx)
883{ 884{
884 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 885 struct list_head *head = this_cpu_ptr(&active_ctx_list);
885 struct list_head *head = this_cpu_ptr(&rotation_list);
886 886
887 WARN_ON(!irqs_disabled()); 887 WARN_ON(!irqs_disabled());
888 888
889 if (list_empty(&cpuctx->rotation_list)) 889 WARN_ON(!list_empty(&ctx->active_ctx_list));
890 list_add(&cpuctx->rotation_list, head); 890
891 list_add(&ctx->active_ctx_list, head);
892}
893
894static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
895{
896 WARN_ON(!irqs_disabled());
897
898 WARN_ON(list_empty(&ctx->active_ctx_list));
899
900 list_del_init(&ctx->active_ctx_list);
891} 901}
892 902
893static void get_ctx(struct perf_event_context *ctx) 903static void get_ctx(struct perf_event_context *ctx)
@@ -1233,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1233 ctx->nr_branch_stack++; 1243 ctx->nr_branch_stack++;
1234 1244
1235 list_add_rcu(&event->event_entry, &ctx->event_list); 1245 list_add_rcu(&event->event_entry, &ctx->event_list);
1236 if (!ctx->nr_events)
1237 perf_pmu_rotate_start(ctx->pmu);
1238 ctx->nr_events++; 1246 ctx->nr_events++;
1239 if (event->attr.inherit_stat) 1247 if (event->attr.inherit_stat)
1240 ctx->nr_stat++; 1248 ctx->nr_stat++;
@@ -1561,7 +1569,8 @@ event_sched_out(struct perf_event *event,
1561 1569
1562 if (!is_software_event(event)) 1570 if (!is_software_event(event))
1563 cpuctx->active_oncpu--; 1571 cpuctx->active_oncpu--;
1564 ctx->nr_active--; 1572 if (!--ctx->nr_active)
1573 perf_event_ctx_deactivate(ctx);
1565 if (event->attr.freq && event->attr.sample_freq) 1574 if (event->attr.freq && event->attr.sample_freq)
1566 ctx->nr_freq--; 1575 ctx->nr_freq--;
1567 if (event->attr.exclusive || !cpuctx->active_oncpu) 1576 if (event->attr.exclusive || !cpuctx->active_oncpu)
@@ -1885,7 +1894,8 @@ event_sched_in(struct perf_event *event,
1885 1894
1886 if (!is_software_event(event)) 1895 if (!is_software_event(event))
1887 cpuctx->active_oncpu++; 1896 cpuctx->active_oncpu++;
1888 ctx->nr_active++; 1897 if (!ctx->nr_active++)
1898 perf_event_ctx_activate(ctx);
1889 if (event->attr.freq && event->attr.sample_freq) 1899 if (event->attr.freq && event->attr.sample_freq)
1890 ctx->nr_freq++; 1900 ctx->nr_freq++;
1891 1901
@@ -2742,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2742 2752
2743 perf_pmu_enable(ctx->pmu); 2753 perf_pmu_enable(ctx->pmu);
2744 perf_ctx_unlock(cpuctx, ctx); 2754 perf_ctx_unlock(cpuctx, ctx);
2745
2746 /*
2747 * Since these rotations are per-cpu, we need to ensure the
2748 * cpu-context we got scheduled on is actually rotating.
2749 */
2750 perf_pmu_rotate_start(ctx->pmu);
2751} 2755}
2752 2756
2753/* 2757/*
@@ -3035,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx)
3035 list_rotate_left(&ctx->flexible_groups); 3039 list_rotate_left(&ctx->flexible_groups);
3036} 3040}
3037 3041
3038/*
3039 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
3040 * because they're strictly cpu affine and rotate_start is called with IRQs
3041 * disabled, while rotate_context is called from IRQ context.
3042 */
3043static int perf_rotate_context(struct perf_cpu_context *cpuctx) 3042static int perf_rotate_context(struct perf_cpu_context *cpuctx)
3044{ 3043{
3045 struct perf_event_context *ctx = NULL; 3044 struct perf_event_context *ctx = NULL;
3046 int rotate = 0, remove = 1; 3045 int rotate = 0;
3047 3046
3048 if (cpuctx->ctx.nr_events) { 3047 if (cpuctx->ctx.nr_events) {
3049 remove = 0;
3050 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 3048 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
3051 rotate = 1; 3049 rotate = 1;
3052 } 3050 }
3053 3051
3054 ctx = cpuctx->task_ctx; 3052 ctx = cpuctx->task_ctx;
3055 if (ctx && ctx->nr_events) { 3053 if (ctx && ctx->nr_events) {
3056 remove = 0;
3057 if (ctx->nr_events != ctx->nr_active) 3054 if (ctx->nr_events != ctx->nr_active)
3058 rotate = 1; 3055 rotate = 1;
3059 } 3056 }
@@ -3077,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
3077 perf_pmu_enable(cpuctx->ctx.pmu); 3074 perf_pmu_enable(cpuctx->ctx.pmu);
3078 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 3075 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
3079done: 3076done:
3080 if (remove)
3081 list_del_init(&cpuctx->rotation_list);
3082 3077
3083 return rotate; 3078 return rotate;
3084} 3079}
@@ -3096,9 +3091,8 @@ bool perf_event_can_stop_tick(void)
3096 3091
3097void perf_event_task_tick(void) 3092void perf_event_task_tick(void)
3098{ 3093{
3099 struct list_head *head = this_cpu_ptr(&rotation_list); 3094 struct list_head *head = this_cpu_ptr(&active_ctx_list);
3100 struct perf_cpu_context *cpuctx, *tmp; 3095 struct perf_event_context *ctx, *tmp;
3101 struct perf_event_context *ctx;
3102 int throttled; 3096 int throttled;
3103 3097
3104 WARN_ON(!irqs_disabled()); 3098 WARN_ON(!irqs_disabled());
@@ -3106,14 +3100,8 @@ void perf_event_task_tick(void)
3106 __this_cpu_inc(perf_throttled_seq); 3100 __this_cpu_inc(perf_throttled_seq);
3107 throttled = __this_cpu_xchg(perf_throttled_count, 0); 3101 throttled = __this_cpu_xchg(perf_throttled_count, 0);
3108 3102
3109 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 3103 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
3110 ctx = &cpuctx->ctx;
3111 perf_adjust_freq_unthr_context(ctx, throttled); 3104 perf_adjust_freq_unthr_context(ctx, throttled);
3112
3113 ctx = cpuctx->task_ctx;
3114 if (ctx)
3115 perf_adjust_freq_unthr_context(ctx, throttled);
3116 }
3117} 3105}
3118 3106
3119static int event_enable_on_exec(struct perf_event *event, 3107static int event_enable_on_exec(struct perf_event *event,
@@ -3272,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3272{ 3260{
3273 raw_spin_lock_init(&ctx->lock); 3261 raw_spin_lock_init(&ctx->lock);
3274 mutex_init(&ctx->mutex); 3262 mutex_init(&ctx->mutex);
3263 INIT_LIST_HEAD(&ctx->active_ctx_list);
3275 INIT_LIST_HEAD(&ctx->pinned_groups); 3264 INIT_LIST_HEAD(&ctx->pinned_groups);
3276 INIT_LIST_HEAD(&ctx->flexible_groups); 3265 INIT_LIST_HEAD(&ctx->flexible_groups);
3277 INIT_LIST_HEAD(&ctx->event_list); 3266 INIT_LIST_HEAD(&ctx->event_list);
@@ -6954,7 +6943,6 @@ skip_type:
6954 6943
6955 __perf_cpu_hrtimer_init(cpuctx, cpu); 6944 __perf_cpu_hrtimer_init(cpuctx, cpu);
6956 6945
6957 INIT_LIST_HEAD(&cpuctx->rotation_list);
6958 cpuctx->unique_pmu = pmu; 6946 cpuctx->unique_pmu = pmu;
6959 } 6947 }
6960 6948
@@ -8384,7 +8372,7 @@ static void __init perf_event_init_all_cpus(void)
8384 for_each_possible_cpu(cpu) { 8372 for_each_possible_cpu(cpu) {
8385 swhash = &per_cpu(swevent_htable, cpu); 8373 swhash = &per_cpu(swevent_htable, cpu);
8386 mutex_init(&swhash->hlist_mutex); 8374 mutex_init(&swhash->hlist_mutex);
8387 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); 8375 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
8388 } 8376 }
8389} 8377}
8390 8378
@@ -8405,22 +8393,11 @@ static void perf_event_init_cpu(int cpu)
8405} 8393}
8406 8394
8407#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC 8395#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
8408static void perf_pmu_rotate_stop(struct pmu *pmu)
8409{
8410 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
8411
8412 WARN_ON(!irqs_disabled());
8413
8414 list_del_init(&cpuctx->rotation_list);
8415}
8416
8417static void __perf_event_exit_context(void *__info) 8396static void __perf_event_exit_context(void *__info)
8418{ 8397{
8419 struct remove_event re = { .detach_group = true }; 8398 struct remove_event re = { .detach_group = true };
8420 struct perf_event_context *ctx = __info; 8399 struct perf_event_context *ctx = __info;
8421 8400
8422 perf_pmu_rotate_stop(ctx->pmu);
8423
8424 rcu_read_lock(); 8401 rcu_read_lock();
8425 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) 8402 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
8426 __perf_remove_from_context(&re); 8403 __perf_remove_from_context(&re);