aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Gushchin <guro@fb.com>2019-06-25 17:38:58 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2019-06-27 16:51:58 -0400
commite5c891a349d7c556b7b9dc231d6dd78e88a29e5c (patch)
tree34f2beb6de8bd858648246c32db562c26a00c390
parent572a6928f9e3689ad2c2f94814e6215104eec1b7 (diff)
bpf: fix cgroup bpf release synchronization
Since commit 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself"), cgroup_bpf release occurs asynchronously (from a worker context), and before the release of the cgroup itself. This introduced a previously non-existing race between the release and update paths. E.g. if a leaf's cgroup_bpf is released and a new bpf program is attached to the one of ancestor cgroups at the same time. The race may result in double-free and other memory corruptions. To fix the problem, let's protect the body of cgroup_bpf_release() with cgroup_mutex, as it was effectively previously, when all this code was called from the cgroup release path with cgroup mutex held. Also let's skip cgroups, which have no chances to invoke a bpf program, on the update path. If the cgroup bpf refcnt reached 0, it means that the cgroup is offline (no attached processes), and there are no associated sockets left. It means there is no point in updating effective progs array! And it can lead to a leak, if it happens after the release. So, let's skip such cgroups. Big thanks for Tejun Heo for discovering and debugging of this problem! Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") Reported-by: Tejun Heo <tj@kernel.org> Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--kernel/bpf/cgroup.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index c225c42e114a..077ed3a19848 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -16,6 +16,8 @@
16#include <linux/bpf-cgroup.h> 16#include <linux/bpf-cgroup.h>
17#include <net/sock.h> 17#include <net/sock.h>
18 18
19#include "../cgroup/cgroup-internal.h"
20
19DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); 21DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
20EXPORT_SYMBOL(cgroup_bpf_enabled_key); 22EXPORT_SYMBOL(cgroup_bpf_enabled_key);
21 23
@@ -38,6 +40,8 @@ static void cgroup_bpf_release(struct work_struct *work)
38 struct bpf_prog_array *old_array; 40 struct bpf_prog_array *old_array;
39 unsigned int type; 41 unsigned int type;
40 42
43 mutex_lock(&cgroup_mutex);
44
41 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { 45 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
42 struct list_head *progs = &cgrp->bpf.progs[type]; 46 struct list_head *progs = &cgrp->bpf.progs[type];
43 struct bpf_prog_list *pl, *tmp; 47 struct bpf_prog_list *pl, *tmp;
@@ -54,10 +58,12 @@ static void cgroup_bpf_release(struct work_struct *work)
54 } 58 }
55 old_array = rcu_dereference_protected( 59 old_array = rcu_dereference_protected(
56 cgrp->bpf.effective[type], 60 cgrp->bpf.effective[type],
57 percpu_ref_is_dying(&cgrp->bpf.refcnt)); 61 lockdep_is_held(&cgroup_mutex));
58 bpf_prog_array_free(old_array); 62 bpf_prog_array_free(old_array);
59 } 63 }
60 64
65 mutex_unlock(&cgroup_mutex);
66
61 percpu_ref_exit(&cgrp->bpf.refcnt); 67 percpu_ref_exit(&cgrp->bpf.refcnt);
62 cgroup_put(cgrp); 68 cgroup_put(cgrp);
63} 69}
@@ -229,6 +235,9 @@ static int update_effective_progs(struct cgroup *cgrp,
229 css_for_each_descendant_pre(css, &cgrp->self) { 235 css_for_each_descendant_pre(css, &cgrp->self) {
230 struct cgroup *desc = container_of(css, struct cgroup, self); 236 struct cgroup *desc = container_of(css, struct cgroup, self);
231 237
238 if (percpu_ref_is_zero(&desc->bpf.refcnt))
239 continue;
240
232 err = compute_effective_progs(desc, type, &desc->bpf.inactive); 241 err = compute_effective_progs(desc, type, &desc->bpf.inactive);
233 if (err) 242 if (err)
234 goto cleanup; 243 goto cleanup;
@@ -238,6 +247,14 @@ static int update_effective_progs(struct cgroup *cgrp,
238 css_for_each_descendant_pre(css, &cgrp->self) { 247 css_for_each_descendant_pre(css, &cgrp->self) {
239 struct cgroup *desc = container_of(css, struct cgroup, self); 248 struct cgroup *desc = container_of(css, struct cgroup, self);
240 249
250 if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
251 if (unlikely(desc->bpf.inactive)) {
252 bpf_prog_array_free(desc->bpf.inactive);
253 desc->bpf.inactive = NULL;
254 }
255 continue;
256 }
257
241 activate_effective_progs(desc, type, desc->bpf.inactive); 258 activate_effective_progs(desc, type, desc->bpf.inactive);
242 desc->bpf.inactive = NULL; 259 desc->bpf.inactive = NULL;
243 } 260 }