bpf: cgroup: properly use bpf_prog_array api

Now that we don't have __rcu markers on the bpf_prog_array helpers, let's use proper rcu_dereference_protected to obtain array pointer under mutex. We also don't need __rcu annotations on cgroup_bpf.inactive since it's not read/updated concurrently. v4: * drop cgroup_rcu_xyz wrappers and use rcu APIs directly; presumably should be more clear to understand which mutex/refcount protects each particular place v3: * amend cgroup_rcu_dereference to include percpu_ref_is_dying; cgroup_bpf is now reference counted and we don't hold cgroup_mutex anymore in cgroup_bpf_release v2: * replace xchg with rcu_swap_protected Cc: Roman Gushchin <guro@fb.com> Signed-off-by: Stanislav Fomichev <sdf@google.com> Acked-by: Roman Gushchin <guro@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
author: Stanislav Fomichev <sdf@google.com> 2019-05-28 17:14:43 -0400
committer: Daniel Borkmann <daniel@iogearbox.net> 2019-05-29 09:17:35 -0400
commit: dbcc1ba26e43bd32cb308e50ac4cb4a29d2f5967 (patch)
tree: 1d8ce96e66911655a7abeafbddb7fb39b777a175 /kernel/bpf/cgroup.c
parent: 02205d2ed6fe26a8f4fd9e9cec251d1dc7f79316 (diff)
1 files changed, 17 insertions, 11 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index d995edbe816d..ff594eb86fd7 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -38,6 +38,7 @@ static void cgroup_bpf_release(struct work_struct *work)
        struct cgroup *cgrp = container_of(work, struct cgroup,
                                           bpf.release_work);
        enum bpf_cgroup_storage_type stype;
+        struct bpf_prog_array *old_array;
        unsigned int type;
        for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -54,7 +55,10 @@ static void cgroup_bpf_release(struct work_struct *work)
                        kfree(pl);
                        static_branch_dec(&cgroup_bpf_enabled_key);
                }
-                bpf_prog_array_free(cgrp->bpf.effective[type]);
+                old_array = rcu_dereference_protected(
+                                cgrp->bpf.effective[type],
+                                percpu_ref_is_dying(&cgrp->bpf.refcnt));
+                bpf_prog_array_free(old_array);
        }
        percpu_ref_exit(&cgrp->bpf.refcnt);
@@ -126,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
 */
 static int compute_effective_progs(struct cgroup *cgrp,
                                   enum bpf_attach_type type,
-                                   struct bpf_prog_array __rcu **array)
+                                   struct bpf_prog_array **array)
 {
        enum bpf_cgroup_storage_type stype;
        struct bpf_prog_array *progs;
@@ -164,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
                }
        } while ((p = cgroup_parent(p)));
-        rcu_assign_pointer(*array, progs);
+        *array = progs;
        return 0;
 }
 static void activate_effective_progs(struct cgroup *cgrp,
                                     enum bpf_attach_type type,
-                                     struct bpf_prog_array __rcu *array)
+                                     struct bpf_prog_array *old_array)
 {
-        struct bpf_prog_array __rcu *old_array;
+        rcu_swap_protected(cgrp->bpf.effective[type], old_array,
+                           lockdep_is_held(&cgroup_mutex));
-        old_array = xchg(&cgrp->bpf.effective[type], array);
        /* free prog array after grace period, since __cgroup_bpf_run_*()
         * might be still walking the array
         */
@@ -191,7 +194,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 * that array below is variable length
 */
 #define NR ARRAY_SIZE(cgrp->bpf.effective)
-        struct bpf_prog_array __rcu *arrays[NR] = {};
+        struct bpf_prog_array *arrays[NR] = {};
        int ret, i;
        ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
@@ -477,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
        enum bpf_attach_type type = attr->query.attach_type;
        struct list_head *progs = &cgrp->bpf.progs[type];
        u32 flags = cgrp->bpf.flags[type];
+        struct bpf_prog_array *effective;
        int cnt, ret = 0, i;
+        effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+                                              lockdep_is_held(&cgroup_mutex));
        if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
-                cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+                cnt = bpf_prog_array_length(effective);
        else
                cnt = prog_list_length(progs);
@@ -497,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
        }
        if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
-                return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
+                return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
-                                                   prog_ids, cnt);
        } else {
                struct bpf_prog_list *pl;
                u32 id;
author	Stanislav Fomichev <sdf@google.com>	2019-05-28 17:14:43 -0400
committer	Daniel Borkmann <daniel@iogearbox.net>	2019-05-29 09:17:35 -0400
commit	dbcc1ba26e43bd32cb308e50ac4cb4a29d2f5967 (patch)
tree	1d8ce96e66911655a7abeafbddb7fb39b777a175 /kernel/bpf/cgroup.c
parent	02205d2ed6fe26a8f4fd9e9cec251d1dc7f79316 (diff)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index d995edbe816d..ff594eb86fd7 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c
@@ -38,6 +38,7 @@ static void cgroup_bpf_release(struct work_struct *work)
38	struct cgroup *cgrp = container_of(work, struct cgroup,	38	struct cgroup *cgrp = container_of(work, struct cgroup,
39	bpf.release_work);	39	bpf.release_work);
40	enum bpf_cgroup_storage_type stype;	40	enum bpf_cgroup_storage_type stype;
		41	struct bpf_prog_array *old_array;
41	unsigned int type;	42	unsigned int type;
42		43
43	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {	44	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -54,7 +55,10 @@ static void cgroup_bpf_release(struct work_struct *work)
54	kfree(pl);	55	kfree(pl);
55	static_branch_dec(&cgroup_bpf_enabled_key);	56	static_branch_dec(&cgroup_bpf_enabled_key);
56	}	57	}
57	bpf_prog_array_free(cgrp->bpf.effective[type]);	58	old_array = rcu_dereference_protected(
		59	cgrp->bpf.effective[type],
		60	percpu_ref_is_dying(&cgrp->bpf.refcnt));
		61	bpf_prog_array_free(old_array);
58	}	62	}
59		63
60	percpu_ref_exit(&cgrp->bpf.refcnt);	64	percpu_ref_exit(&cgrp->bpf.refcnt);
@@ -126,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
126	*/	130	*/
127	static int compute_effective_progs(struct cgroup *cgrp,	131	static int compute_effective_progs(struct cgroup *cgrp,
128	enum bpf_attach_type type,	132	enum bpf_attach_type type,
129	struct bpf_prog_array __rcu **array)	133	struct bpf_prog_array **array)
130	{	134	{
131	enum bpf_cgroup_storage_type stype;	135	enum bpf_cgroup_storage_type stype;
132	struct bpf_prog_array *progs;	136	struct bpf_prog_array *progs;
@@ -164,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
164	}	168	}
165	} while ((p = cgroup_parent(p)));	169	} while ((p = cgroup_parent(p)));
166		170
167	rcu_assign_pointer(*array, progs);	171	*array = progs;
168	return 0;	172	return 0;
169	}	173	}
170		174
171	static void activate_effective_progs(struct cgroup *cgrp,	175	static void activate_effective_progs(struct cgroup *cgrp,
172	enum bpf_attach_type type,	176	enum bpf_attach_type type,
173	struct bpf_prog_array __rcu *array)	177	struct bpf_prog_array *old_array)
174	{	178	{
175	struct bpf_prog_array __rcu *old_array;	179	rcu_swap_protected(cgrp->bpf.effective[type], old_array,
176		180	lockdep_is_held(&cgroup_mutex));
177	old_array = xchg(&cgrp->bpf.effective[type], array);
178	/* free prog array after grace period, since __cgroup_bpf_run_*()	181	/* free prog array after grace period, since __cgroup_bpf_run_*()
179	* might be still walking the array	182	* might be still walking the array
180	*/	183	*/
@@ -191,7 +194,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
191	* that array below is variable length	194	* that array below is variable length
192	*/	195	*/
193	#define NR ARRAY_SIZE(cgrp->bpf.effective)	196	#define NR ARRAY_SIZE(cgrp->bpf.effective)
194	struct bpf_prog_array __rcu *arrays[NR] = {};	197	struct bpf_prog_array *arrays[NR] = {};
195	int ret, i;	198	int ret, i;
196		199
197	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,	200	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
@@ -477,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
477	enum bpf_attach_type type = attr->query.attach_type;	480	enum bpf_attach_type type = attr->query.attach_type;
478	struct list_head *progs = &cgrp->bpf.progs[type];	481	struct list_head *progs = &cgrp->bpf.progs[type];
479	u32 flags = cgrp->bpf.flags[type];	482	u32 flags = cgrp->bpf.flags[type];
		483	struct bpf_prog_array *effective;
480	int cnt, ret = 0, i;	484	int cnt, ret = 0, i;
481		485
		486	effective = rcu_dereference_protected(cgrp->bpf.effective[type],
		487	lockdep_is_held(&cgroup_mutex));
		488
482	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)	489	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
483	cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);	490	cnt = bpf_prog_array_length(effective);
484	else	491	else
485	cnt = prog_list_length(progs);	492	cnt = prog_list_length(progs);
486		493
@@ -497,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
497	}	504	}
498		505
499	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {	506	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
500	return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],	507	return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
501	prog_ids, cnt);
502	} else {	508	} else {
503	struct bpf_prog_list *pl;	509	struct bpf_prog_list *pl;
504	u32 id;	510	u32 id;