1 files changed, 74 insertions, 20 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index fcde0f7b2585..1b65ab0df457 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,13 +22,23 @@
 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+void cgroup_bpf_offline(struct cgroup *cgrp)
+{
+        cgroup_get(cgrp);
+        percpu_ref_kill(&cgrp->bpf.refcnt);
+}
 /**
- * cgroup_bpf_put() - put references of all bpf programs
+ * cgroup_bpf_release() - put references of all bpf programs and
- * @cgrp: the cgroup to modify
+ *                        release all cgroup bpf data
+ * @work: work structure embedded into the cgroup to modify
 */
-void cgroup_bpf_put(struct cgroup *cgrp)
+static void cgroup_bpf_release(struct work_struct *work)
 {
+        struct cgroup *cgrp = container_of(work, struct cgroup,
+                                           bpf.release_work);
        enum bpf_cgroup_storage_type stype;
+        struct bpf_prog_array *old_array;
        unsigned int type;
        for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
                        kfree(pl);
                        static_branch_dec(&cgroup_bpf_enabled_key);
                }
-                bpf_prog_array_free(cgrp->bpf.effective[type]);
+                old_array = rcu_dereference_protected(
+                                cgrp->bpf.effective[type],
+                                percpu_ref_is_dying(&cgrp->bpf.refcnt));
+                bpf_prog_array_free(old_array);
        }
+        percpu_ref_exit(&cgrp->bpf.refcnt);
+        cgroup_put(cgrp);
+}
+/**
+ * cgroup_bpf_release_fn() - callback used to schedule releasing
+ *                           of bpf cgroup data
+ * @ref: percpu ref counter structure
+ */
+static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+{
+        struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+        INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+        queue_work(system_wq, &cgrp->bpf.release_work);
 }
 /* count number of elements in the list.
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
 */
 static int compute_effective_progs(struct cgroup *cgrp,
                                   enum bpf_attach_type type,
-                                   struct bpf_prog_array __rcu **array)
+                                   struct bpf_prog_array **array)
 {
        enum bpf_cgroup_storage_type stype;
        struct bpf_prog_array *progs;
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
                }
        } while ((p = cgroup_parent(p)));
-        rcu_assign_pointer(*array, progs);
+        *array = progs;
        return 0;
 }
 static void activate_effective_progs(struct cgroup *cgrp,
                                     enum bpf_attach_type type,
-                                     struct bpf_prog_array __rcu *array)
+                                     struct bpf_prog_array *old_array)
 {
-        struct bpf_prog_array __rcu *old_array;
+        rcu_swap_protected(cgrp->bpf.effective[type], old_array,
+                           lockdep_is_held(&cgroup_mutex));
-        old_array = xchg(&cgrp->bpf.effective[type], array);
        /* free prog array after grace period, since __cgroup_bpf_run_*()
         * might be still walking the array
         */
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 * that array below is variable length
 */
 #define NR ARRAY_SIZE(cgrp->bpf.effective)
-        struct bpf_prog_array __rcu *arrays[NR] = {};
+        struct bpf_prog_array *arrays[NR] = {};
-        int i;
+        int ret, i;
+        ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
+                              GFP_KERNEL);
+        if (ret)
+                return ret;
        for (i = 0; i < NR; i++)
                INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 cleanup:
        for (i = 0; i < NR; i++)
                bpf_prog_array_free(arrays[i]);
+        percpu_ref_exit(&cgrp->bpf.refcnt);
        return -ENOMEM;
 }
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
        enum bpf_attach_type type = attr->query.attach_type;
        struct list_head *progs = &cgrp->bpf.progs[type];
        u32 flags = cgrp->bpf.flags[type];
+        struct bpf_prog_array *effective;
        int cnt, ret = 0, i;
+        effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+                                              lockdep_is_held(&cgroup_mutex));
        if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
-                cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+                cnt = bpf_prog_array_length(effective);
        else
                cnt = prog_list_length(progs);
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
        }
        if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
-                return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
+                return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
-                                                   prog_ids, cnt);
        } else {
                struct bpf_prog_list *pl;
                u32 id;
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 * The program type passed in via @type must be suitable for network
 * filtering. No further check is performed to assert that.
 *
- * This function will return %-EPERM if any if an attached program was found
+ * For egress packets, this function can return:
- * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ *   NET_XMIT_SUCCESS    (0)    - continue with packet output
+ *   NET_XMIT_DROP       (1)    - drop packet and notify TCP to call cwr
+ *   NET_XMIT_CN         (2)    - continue with packet output and notify TCP
+ *                                to call cwr
+ *   -EPERM                     - drop packet
+ *
+ * For ingress packets, this function will return -EPERM if any
+ * attached program was found and if it returned != 1 during execution.
+ * Otherwise 0 is returned.
 */
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
                                struct sk_buff *skb,
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
        /* compute pointers for the bpf prog */
        bpf_compute_and_save_data_end(skb, &saved_data_end);
-        ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+        if (type == BPF_CGROUP_INET_EGRESS) {
-                                 __bpf_prog_run_save_cb);
+                ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
+                        cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+        } else {
+                ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+                                          __bpf_prog_run_save_cb);
+                ret = (ret == 1 ? 0 : -EPERM);
+        }
        bpf_restore_data_end(skb, saved_data_end);
        __skb_pull(skb, offset);
        skb->sk = save_sk;
-        return ret == 1 ? 0 : -EPERM;
+        return ret;
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fcde0f7b2585..1b65ab0df457 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c
@@ -22,13 +22,23 @@
22	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);	22	DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
23	EXPORT_SYMBOL(cgroup_bpf_enabled_key);	23	EXPORT_SYMBOL(cgroup_bpf_enabled_key);
24		24
		25	void cgroup_bpf_offline(struct cgroup *cgrp)
		26	{
		27	cgroup_get(cgrp);
		28	percpu_ref_kill(&cgrp->bpf.refcnt);
		29	}
		30
25	/**	31	/**
26	* cgroup_bpf_put() - put references of all bpf programs	32	* cgroup_bpf_release() - put references of all bpf programs and
27	* @cgrp: the cgroup to modify	33	* release all cgroup bpf data
		34	* @work: work structure embedded into the cgroup to modify
28	*/	35	*/
29	void cgroup_bpf_put(struct cgroup *cgrp)	36	static void cgroup_bpf_release(struct work_struct *work)
30	{	37	{
		38	struct cgroup *cgrp = container_of(work, struct cgroup,
		39	bpf.release_work);
31	enum bpf_cgroup_storage_type stype;	40	enum bpf_cgroup_storage_type stype;
		41	struct bpf_prog_array *old_array;
32	unsigned int type;	42	unsigned int type;
33		43
34	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {	44	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
45	kfree(pl);	55	kfree(pl);
46	static_branch_dec(&cgroup_bpf_enabled_key);	56	static_branch_dec(&cgroup_bpf_enabled_key);
47	}	57	}
48	bpf_prog_array_free(cgrp->bpf.effective[type]);	58	old_array = rcu_dereference_protected(
		59	cgrp->bpf.effective[type],
		60	percpu_ref_is_dying(&cgrp->bpf.refcnt));
		61	bpf_prog_array_free(old_array);
49	}	62	}
		63
		64	percpu_ref_exit(&cgrp->bpf.refcnt);
		65	cgroup_put(cgrp);
		66	}
		67
		68	/**
		69	* cgroup_bpf_release_fn() - callback used to schedule releasing
		70	* of bpf cgroup data
		71	* @ref: percpu ref counter structure
		72	*/
		73	static void cgroup_bpf_release_fn(struct percpu_ref *ref)
		74	{
		75	struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
		76
		77	INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
		78	queue_work(system_wq, &cgrp->bpf.release_work);
50	}	79	}
51		80
52	/* count number of elements in the list.	81	/* count number of elements in the list.
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
101	*/	130	*/
102	static int compute_effective_progs(struct cgroup *cgrp,	131	static int compute_effective_progs(struct cgroup *cgrp,
103	enum bpf_attach_type type,	132	enum bpf_attach_type type,
104	struct bpf_prog_array __rcu **array)	133	struct bpf_prog_array **array)
105	{	134	{
106	enum bpf_cgroup_storage_type stype;	135	enum bpf_cgroup_storage_type stype;
107	struct bpf_prog_array *progs;	136	struct bpf_prog_array *progs;
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
139	}	168	}
140	} while ((p = cgroup_parent(p)));	169	} while ((p = cgroup_parent(p)));
141		170
142	rcu_assign_pointer(*array, progs);	171	*array = progs;
143	return 0;	172	return 0;
144	}	173	}
145		174
146	static void activate_effective_progs(struct cgroup *cgrp,	175	static void activate_effective_progs(struct cgroup *cgrp,
147	enum bpf_attach_type type,	176	enum bpf_attach_type type,
148	struct bpf_prog_array __rcu *array)	177	struct bpf_prog_array *old_array)
149	{	178	{
150	struct bpf_prog_array __rcu *old_array;	179	rcu_swap_protected(cgrp->bpf.effective[type], old_array,
151		180	lockdep_is_held(&cgroup_mutex));
152	old_array = xchg(&cgrp->bpf.effective[type], array);
153	/* free prog array after grace period, since __cgroup_bpf_run_*()	181	/* free prog array after grace period, since __cgroup_bpf_run_*()
154	* might be still walking the array	182	* might be still walking the array
155	*/	183	*/
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
166	* that array below is variable length	194	* that array below is variable length
167	*/	195	*/
168	#define NR ARRAY_SIZE(cgrp->bpf.effective)	196	#define NR ARRAY_SIZE(cgrp->bpf.effective)
169	struct bpf_prog_array __rcu *arrays[NR] = {};	197	struct bpf_prog_array *arrays[NR] = {};
170	int i;	198	int ret, i;
		199
		200	ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
		201	GFP_KERNEL);
		202	if (ret)
		203	return ret;
171		204
172	for (i = 0; i < NR; i++)	205	for (i = 0; i < NR; i++)
173	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);	206	INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
183	cleanup:	216	cleanup:
184	for (i = 0; i < NR; i++)	217	for (i = 0; i < NR; i++)
185	bpf_prog_array_free(arrays[i]);	218	bpf_prog_array_free(arrays[i]);
		219
		220	percpu_ref_exit(&cgrp->bpf.refcnt);
		221
186	return -ENOMEM;	222	return -ENOMEM;
187	}	223	}
188		224
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
444	enum bpf_attach_type type = attr->query.attach_type;	480	enum bpf_attach_type type = attr->query.attach_type;
445	struct list_head *progs = &cgrp->bpf.progs[type];	481	struct list_head *progs = &cgrp->bpf.progs[type];
446	u32 flags = cgrp->bpf.flags[type];	482	u32 flags = cgrp->bpf.flags[type];
		483	struct bpf_prog_array *effective;
447	int cnt, ret = 0, i;	484	int cnt, ret = 0, i;
448		485
		486	effective = rcu_dereference_protected(cgrp->bpf.effective[type],
		487	lockdep_is_held(&cgroup_mutex));
		488
449	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)	489	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
450	cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);	490	cnt = bpf_prog_array_length(effective);
451	else	491	else
452	cnt = prog_list_length(progs);	492	cnt = prog_list_length(progs);
453		493
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup cgrp, const union bpf_attr attr,
464	}	504	}
465		505
466	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {	506	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
467	return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],	507	return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
468	prog_ids, cnt);
469	} else {	508	} else {
470	struct bpf_prog_list *pl;	509	struct bpf_prog_list *pl;
471	u32 id;	510	u32 id;
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
548	* The program type passed in via @type must be suitable for network	587	* The program type passed in via @type must be suitable for network
549	* filtering. No further check is performed to assert that.	588	* filtering. No further check is performed to assert that.
550	*	589	*
551	* This function will return %-EPERM if any if an attached program was found	590	* For egress packets, this function can return:
552	* and if it returned != 1 during execution. In all other cases, 0 is returned.	591	* NET_XMIT_SUCCESS (0) - continue with packet output
		592	* NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
		593	* NET_XMIT_CN (2) - continue with packet output and notify TCP
		594	* to call cwr
		595	* -EPERM - drop packet
		596	*
		597	* For ingress packets, this function will return -EPERM if any
		598	* attached program was found and if it returned != 1 during execution.
		599	* Otherwise 0 is returned.
553	*/	600	*/
554	int __cgroup_bpf_run_filter_skb(struct sock *sk,	601	int __cgroup_bpf_run_filter_skb(struct sock *sk,
555	struct sk_buff *skb,	602	struct sk_buff *skb,
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
575	/* compute pointers for the bpf prog */	622	/* compute pointers for the bpf prog */
576	bpf_compute_and_save_data_end(skb, &saved_data_end);	623	bpf_compute_and_save_data_end(skb, &saved_data_end);
577		624
578	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,	625	if (type == BPF_CGROUP_INET_EGRESS) {
579	__bpf_prog_run_save_cb);	626	ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
		627	cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
		628	} else {
		629	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
		630	__bpf_prog_run_save_cb);
		631	ret = (ret == 1 ? 0 : -EPERM);
		632	}
580	bpf_restore_data_end(skb, saved_data_end);	633	bpf_restore_data_end(skb, saved_data_end);
581	__skb_pull(skb, offset);	634	__skb_pull(skb, offset);
582	skb->sk = save_sk;	635	skb->sk = save_sk;
583	return ret == 1 ? 0 : -EPERM;	636
		637	return ret;
584	}	638	}
585	EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);	639	EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
586		640