aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-05-24 04:15:34 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-05-26 04:10:44 -0400
commita63fbed776c7124ce9f606234267c3c095b2680e (patch)
tree662d7fed8d276db9cf277ea53e6bd6d2e3eb9961
parentfdaf0a51bad496289356d11d796095a293794b5f (diff)
perf/tracing/cpuhotplug: Fix locking order
perf, tracing, kprobes and jump_labels have a gazillion of ways to create dependency lock chains. Some of those involve nested invocations of get_online_cpus(). The conversion of the hotplug locking to a percpu rwsem requires to avoid such nested calls. sys_perf_event_open() protects most of the syscall logic against cpu hotplug. This causes nested calls and lock inversions versus ftrace and kprobes in various interesting ways. It's impossible to move the hotplug locking to the outer end of all call chains in the involved facilities, so the hotplug protection in sys_perf_event_open() needs to be solved differently. Introduce 'pmus_mutex' which protects a perf private online cpumask. This mutex is taken when the mask is updated in the cpu hotplug callbacks and can be taken in sys_perf_event_open() to protect the swhash setup/teardown code and when the final judgement about a valid event has to be made. [ tglx: Produced changelog and fixed the swhash interaction ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Sebastian Siewior <bigeasy@linutronix.de> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Link: http://lkml.kernel.org/r/20170524081548.930941109@linutronix.de
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--kernel/events/core.c106
2 files changed, 78 insertions, 30 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24a635887f28..7d6aa29094b2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -801,6 +801,8 @@ struct perf_cpu_context {
801 801
802 struct list_head sched_cb_entry; 802 struct list_head sched_cb_entry;
803 int sched_cb_usage; 803 int sched_cb_usage;
804
805 int online;
804}; 806};
805 807
806struct perf_output_handle { 808struct perf_output_handle {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6e75a5c9412d..b97cda4d1777 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
389static LIST_HEAD(pmus); 389static LIST_HEAD(pmus);
390static DEFINE_MUTEX(pmus_lock); 390static DEFINE_MUTEX(pmus_lock);
391static struct srcu_struct pmus_srcu; 391static struct srcu_struct pmus_srcu;
392static cpumask_var_t perf_online_mask;
392 393
393/* 394/*
394 * perf event paranoia level: 395 * perf event paranoia level:
@@ -3812,14 +3813,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
3812 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 3813 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
3813 return ERR_PTR(-EACCES); 3814 return ERR_PTR(-EACCES);
3814 3815
3815 /*
3816 * We could be clever and allow to attach a event to an
3817 * offline CPU and activate it when the CPU comes up, but
3818 * that's for later.
3819 */
3820 if (!cpu_online(cpu))
3821 return ERR_PTR(-ENODEV);
3822
3823 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 3816 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
3824 ctx = &cpuctx->ctx; 3817 ctx = &cpuctx->ctx;
3825 get_ctx(ctx); 3818 get_ctx(ctx);
@@ -7703,7 +7696,8 @@ static int swevent_hlist_get_cpu(int cpu)
7703 int err = 0; 7696 int err = 0;
7704 7697
7705 mutex_lock(&swhash->hlist_mutex); 7698 mutex_lock(&swhash->hlist_mutex);
7706 if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { 7699 if (!swevent_hlist_deref(swhash) &&
7700 cpumask_test_cpu(cpu, perf_online_mask)) {
7707 struct swevent_hlist *hlist; 7701 struct swevent_hlist *hlist;
7708 7702
7709 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); 7703 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -7724,7 +7718,7 @@ static int swevent_hlist_get(void)
7724{ 7718{
7725 int err, cpu, failed_cpu; 7719 int err, cpu, failed_cpu;
7726 7720
7727 get_online_cpus(); 7721 mutex_lock(&pmus_lock);
7728 for_each_possible_cpu(cpu) { 7722 for_each_possible_cpu(cpu) {
7729 err = swevent_hlist_get_cpu(cpu); 7723 err = swevent_hlist_get_cpu(cpu);
7730 if (err) { 7724 if (err) {
@@ -7732,8 +7726,7 @@ static int swevent_hlist_get(void)
7732 goto fail; 7726 goto fail;
7733 } 7727 }
7734 } 7728 }
7735 put_online_cpus(); 7729 mutex_unlock(&pmus_lock);
7736
7737 return 0; 7730 return 0;
7738fail: 7731fail:
7739 for_each_possible_cpu(cpu) { 7732 for_each_possible_cpu(cpu) {
@@ -7741,8 +7734,7 @@ fail:
7741 break; 7734 break;
7742 swevent_hlist_put_cpu(cpu); 7735 swevent_hlist_put_cpu(cpu);
7743 } 7736 }
7744 7737 mutex_unlock(&pmus_lock);
7745 put_online_cpus();
7746 return err; 7738 return err;
7747} 7739}
7748 7740
@@ -8920,7 +8912,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
8920 pmu->hrtimer_interval_ms = timer; 8912 pmu->hrtimer_interval_ms = timer;
8921 8913
8922 /* update all cpuctx for this PMU */ 8914 /* update all cpuctx for this PMU */
8923 get_online_cpus(); 8915 cpus_read_lock();
8924 for_each_online_cpu(cpu) { 8916 for_each_online_cpu(cpu) {
8925 struct perf_cpu_context *cpuctx; 8917 struct perf_cpu_context *cpuctx;
8926 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 8918 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -8929,7 +8921,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
8929 cpu_function_call(cpu, 8921 cpu_function_call(cpu,
8930 (remote_function_f)perf_mux_hrtimer_restart, cpuctx); 8922 (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
8931 } 8923 }
8932 put_online_cpus(); 8924 cpus_read_unlock();
8933 mutex_unlock(&mux_interval_mutex); 8925 mutex_unlock(&mux_interval_mutex);
8934 8926
8935 return count; 8927 return count;
@@ -9059,6 +9051,7 @@ skip_type:
9059 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); 9051 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
9060 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock); 9052 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
9061 cpuctx->ctx.pmu = pmu; 9053 cpuctx->ctx.pmu = pmu;
9054 cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
9062 9055
9063 __perf_mux_hrtimer_init(cpuctx, cpu); 9056 __perf_mux_hrtimer_init(cpuctx, cpu);
9064 } 9057 }
@@ -9882,12 +9875,10 @@ SYSCALL_DEFINE5(perf_event_open,
9882 goto err_task; 9875 goto err_task;
9883 } 9876 }
9884 9877
9885 get_online_cpus();
9886
9887 if (task) { 9878 if (task) {
9888 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex); 9879 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
9889 if (err) 9880 if (err)
9890 goto err_cpus; 9881 goto err_cred;
9891 9882
9892 /* 9883 /*
9893 * Reuse ptrace permission checks for now. 9884 * Reuse ptrace permission checks for now.
@@ -10073,6 +10064,23 @@ SYSCALL_DEFINE5(perf_event_open,
10073 goto err_locked; 10064 goto err_locked;
10074 } 10065 }
10075 10066
10067 if (!task) {
10068 /*
10069 * Check if the @cpu we're creating an event for is online.
10070 *
10071 * We use the perf_cpu_context::ctx::mutex to serialize against
10072 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
10073 */
10074 struct perf_cpu_context *cpuctx =
10075 container_of(ctx, struct perf_cpu_context, ctx);
10076
10077 if (!cpuctx->online) {
10078 err = -ENODEV;
10079 goto err_locked;
10080 }
10081 }
10082
10083
10076 /* 10084 /*
10077 * Must be under the same ctx::mutex as perf_install_in_context(), 10085 * Must be under the same ctx::mutex as perf_install_in_context(),
10078 * because we need to serialize with concurrent event creation. 10086 * because we need to serialize with concurrent event creation.
@@ -10162,8 +10170,6 @@ SYSCALL_DEFINE5(perf_event_open,
10162 put_task_struct(task); 10170 put_task_struct(task);
10163 } 10171 }
10164 10172
10165 put_online_cpus();
10166
10167 mutex_lock(&current->perf_event_mutex); 10173 mutex_lock(&current->perf_event_mutex);
10168 list_add_tail(&event->owner_entry, &current->perf_event_list); 10174 list_add_tail(&event->owner_entry, &current->perf_event_list);
10169 mutex_unlock(&current->perf_event_mutex); 10175 mutex_unlock(&current->perf_event_mutex);
@@ -10197,8 +10203,6 @@ err_alloc:
10197err_cred: 10203err_cred:
10198 if (task) 10204 if (task)
10199 mutex_unlock(&task->signal->cred_guard_mutex); 10205 mutex_unlock(&task->signal->cred_guard_mutex);
10200err_cpus:
10201 put_online_cpus();
10202err_task: 10206err_task:
10203 if (task) 10207 if (task)
10204 put_task_struct(task); 10208 put_task_struct(task);
@@ -10253,6 +10257,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
10253 goto err_unlock; 10257 goto err_unlock;
10254 } 10258 }
10255 10259
10260 if (!task) {
10261 /*
10262 * Check if the @cpu we're creating an event for is online.
10263 *
10264 * We use the perf_cpu_context::ctx::mutex to serialize against
10265 * the hotplug notifiers. See perf_event_{init,exit}_cpu().
10266 */
10267 struct perf_cpu_context *cpuctx =
10268 container_of(ctx, struct perf_cpu_context, ctx);
10269 if (!cpuctx->online) {
10270 err = -ENODEV;
10271 goto err_unlock;
10272 }
10273 }
10274
10256 if (!exclusive_event_installable(event, ctx)) { 10275 if (!exclusive_event_installable(event, ctx)) {
10257 err = -EBUSY; 10276 err = -EBUSY;
10258 goto err_unlock; 10277 goto err_unlock;
@@ -10920,6 +10939,8 @@ static void __init perf_event_init_all_cpus(void)
10920 struct swevent_htable *swhash; 10939 struct swevent_htable *swhash;
10921 int cpu; 10940 int cpu;
10922 10941
10942 zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
10943
10923 for_each_possible_cpu(cpu) { 10944 for_each_possible_cpu(cpu) {
10924 swhash = &per_cpu(swevent_htable, cpu); 10945 swhash = &per_cpu(swevent_htable, cpu);
10925 mutex_init(&swhash->hlist_mutex); 10946 mutex_init(&swhash->hlist_mutex);
@@ -10935,7 +10956,7 @@ static void __init perf_event_init_all_cpus(void)
10935 } 10956 }
10936} 10957}
10937 10958
10938int perf_event_init_cpu(unsigned int cpu) 10959void perf_swevent_init_cpu(unsigned int cpu)
10939{ 10960{
10940 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); 10961 struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
10941 10962
@@ -10948,7 +10969,6 @@ int perf_event_init_cpu(unsigned int cpu)
10948 rcu_assign_pointer(swhash->swevent_hlist, hlist); 10969 rcu_assign_pointer(swhash->swevent_hlist, hlist);
10949 } 10970 }
10950 mutex_unlock(&swhash->hlist_mutex); 10971 mutex_unlock(&swhash->hlist_mutex);
10951 return 0;
10952} 10972}
10953 10973
10954#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE 10974#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -10966,19 +10986,22 @@ static void __perf_event_exit_context(void *__info)
10966 10986
10967static void perf_event_exit_cpu_context(int cpu) 10987static void perf_event_exit_cpu_context(int cpu)
10968{ 10988{
10989 struct perf_cpu_context *cpuctx;
10969 struct perf_event_context *ctx; 10990 struct perf_event_context *ctx;
10970 struct pmu *pmu; 10991 struct pmu *pmu;
10971 int idx;
10972 10992
10973 idx = srcu_read_lock(&pmus_srcu); 10993 mutex_lock(&pmus_lock);
10974 list_for_each_entry_rcu(pmu, &pmus, entry) { 10994 list_for_each_entry(pmu, &pmus, entry) {
10975 ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; 10995 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
10996 ctx = &cpuctx->ctx;
10976 10997
10977 mutex_lock(&ctx->mutex); 10998 mutex_lock(&ctx->mutex);
10978 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); 10999 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
11000 cpuctx->online = 0;
10979 mutex_unlock(&ctx->mutex); 11001 mutex_unlock(&ctx->mutex);
10980 } 11002 }
10981 srcu_read_unlock(&pmus_srcu, idx); 11003 cpumask_clear_cpu(cpu, perf_online_mask);
11004 mutex_unlock(&pmus_lock);
10982} 11005}
10983#else 11006#else
10984 11007
@@ -10986,6 +11009,29 @@ static void perf_event_exit_cpu_context(int cpu) { }
10986 11009
10987#endif 11010#endif
10988 11011
11012int perf_event_init_cpu(unsigned int cpu)
11013{
11014 struct perf_cpu_context *cpuctx;
11015 struct perf_event_context *ctx;
11016 struct pmu *pmu;
11017
11018 perf_swevent_init_cpu(cpu);
11019
11020 mutex_lock(&pmus_lock);
11021 cpumask_set_cpu(cpu, perf_online_mask);
11022 list_for_each_entry(pmu, &pmus, entry) {
11023 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
11024 ctx = &cpuctx->ctx;
11025
11026 mutex_lock(&ctx->mutex);
11027 cpuctx->online = 1;
11028 mutex_unlock(&ctx->mutex);
11029 }
11030 mutex_unlock(&pmus_lock);
11031
11032 return 0;
11033}
11034
10989int perf_event_exit_cpu(unsigned int cpu) 11035int perf_event_exit_cpu(unsigned int cpu)
10990{ 11036{
10991 perf_event_exit_cpu_context(cpu); 11037 perf_event_exit_cpu_context(cpu);