aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt (VMware) <rostedt@goodmis.org>2018-07-09 17:48:54 -0400
committerTejun Heo <tj@kernel.org>2018-07-11 13:48:47 -0400
commite4f8d81c738db6d3ffdabfb8329aa2feaa310699 (patch)
tree97153959e7c625bc49850709149a1faa5146a70e /kernel
parent1e09177acae32a61586af26d83ca5ef591cdcaf5 (diff)
cgroup/tracing: Move taking of spin lock out of trace event handlers
It is unwise to take spin locks from the handlers of trace events. Mainly, because they can introduce lockups, because it introduces locks in places that are normally not tested. Worse yet, because trace events are tucked away in the include/trace/events/ directory, locks that are taken there are forgotten about. As a general rule, I tell people never to take any locks in a trace event handler. Several cgroup trace event handlers call cgroup_path() which eventually takes the kernfs_rename_lock spinlock. This injects the spinlock in the code without people realizing it. It also can cause issues for the PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event handlers are called with preemption disabled. By moving the calculation of the cgroup_path() out of the trace event handlers and into a macro (surrounded by a trace_cgroup_##type##_enabled()), then we could place the cgroup_path into a string, and pass that to the trace event. Not only does this remove the taking of the spinlock out of the trace event handler, but it also means that the cgroup_path() only needs to be called once (it is currently called twice, once to get the length to reserver the buffer for, and once again to get the path itself. Now it only needs to be done once. Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup-internal.h26
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/cgroup/cgroup.c12
3 files changed, 35 insertions, 7 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 77ff1cd6a252..75568fcf2180 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -8,6 +8,32 @@
8#include <linux/list.h> 8#include <linux/list.h>
9#include <linux/refcount.h> 9#include <linux/refcount.h>
10 10
11#define TRACE_CGROUP_PATH_LEN 1024
12extern spinlock_t trace_cgroup_path_lock;
13extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
14
15/*
16 * cgroup_path() takes a spin lock. It is good practice not to take
17 * spin locks within trace point handlers, as they are mostly hidden
18 * from normal view. As cgroup_path() can take the kernfs_rename_lock
19 * spin lock, it is best to not call that function from the trace event
20 * handler.
21 *
22 * Note: trace_cgroup_##type##_enabled() is a static branch that will only
23 * be set when the trace event is enabled.
24 */
25#define TRACE_CGROUP_PATH(type, cgrp, ...) \
26 do { \
27 if (trace_cgroup_##type##_enabled()) { \
28 spin_lock(&trace_cgroup_path_lock); \
29 cgroup_path(cgrp, trace_cgroup_path, \
30 TRACE_CGROUP_PATH_LEN); \
31 trace_cgroup_##type(cgrp, trace_cgroup_path, \
32 ##__VA_ARGS__); \
33 spin_unlock(&trace_cgroup_path_lock); \
34 } \
35 } while (0)
36
11/* 37/*
12 * A cgroup can be associated with multiple css_sets as different tasks may 38 * A cgroup can be associated with multiple css_sets as different tasks may
13 * belong to different cgroups on different hierarchies. In the other 39 * belong to different cgroups on different hierarchies. In the other
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8b4f0768efd6..51063e7a93c2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
135 if (task) { 135 if (task) {
136 ret = cgroup_migrate(task, false, &mgctx); 136 ret = cgroup_migrate(task, false, &mgctx);
137 if (!ret) 137 if (!ret)
138 trace_cgroup_transfer_tasks(to, task, false); 138 TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
139 put_task_struct(task); 139 put_task_struct(task);
140 } 140 }
141 } while (task && !ret); 141 } while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
865 865
866 ret = kernfs_rename(kn, new_parent, new_name_str); 866 ret = kernfs_rename(kn, new_parent, new_name_str);
867 if (!ret) 867 if (!ret)
868 trace_cgroup_rename(cgrp); 868 TRACE_CGROUP_PATH(rename, cgrp);
869 869
870 mutex_unlock(&cgroup_mutex); 870 mutex_unlock(&cgroup_mutex);
871 871
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..4a439de621bd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
83EXPORT_SYMBOL_GPL(css_set_lock); 83EXPORT_SYMBOL_GPL(css_set_lock);
84#endif 84#endif
85 85
86DEFINE_SPINLOCK(trace_cgroup_path_lock);
87char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
88
86/* 89/*
87 * Protects cgroup_idr and css_idr so that IDs can be released without 90 * Protects cgroup_idr and css_idr so that IDs can be released without
88 * grabbing cgroup_mutex. 91 * grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
2638 cgroup_migrate_finish(&mgctx); 2641 cgroup_migrate_finish(&mgctx);
2639 2642
2640 if (!ret) 2643 if (!ret)
2641 trace_cgroup_attach_task(dst_cgrp, leader, threadgroup); 2644 TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
2642 2645
2643 return ret; 2646 return ret;
2644} 2647}
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
4634 struct cgroup *tcgrp; 4637 struct cgroup *tcgrp;
4635 4638
4636 /* cgroup release path */ 4639 /* cgroup release path */
4637 trace_cgroup_release(cgrp); 4640 TRACE_CGROUP_PATH(release, cgrp);
4638 4641
4639 if (cgroup_on_dfl(cgrp)) 4642 if (cgroup_on_dfl(cgrp))
4640 cgroup_rstat_flush(cgrp); 4643 cgroup_rstat_flush(cgrp);
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
4977 if (ret) 4980 if (ret)
4978 goto out_destroy; 4981 goto out_destroy;
4979 4982
4980 trace_cgroup_mkdir(cgrp); 4983 TRACE_CGROUP_PATH(mkdir, cgrp);
4981 4984
4982 /* let's create and online css's */ 4985 /* let's create and online css's */
4983 kernfs_activate(kn); 4986 kernfs_activate(kn);
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
5165 return 0; 5168 return 0;
5166 5169
5167 ret = cgroup_destroy_locked(cgrp); 5170 ret = cgroup_destroy_locked(cgrp);
5168
5169 if (!ret) 5171 if (!ret)
5170 trace_cgroup_rmdir(cgrp); 5172 TRACE_CGROUP_PATH(rmdir, cgrp);
5171 5173
5172 cgroup_kn_unlock(kn); 5174 cgroup_kn_unlock(kn);
5173 return ret; 5175 return ret;