aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/cgroup.c')
-rw-r--r--kernel/bpf/cgroup.c94
1 files changed, 74 insertions, 20 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index fcde0f7b2585..1b65ab0df457 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,13 +22,23 @@
22DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); 22DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
23EXPORT_SYMBOL(cgroup_bpf_enabled_key); 23EXPORT_SYMBOL(cgroup_bpf_enabled_key);
24 24
25void cgroup_bpf_offline(struct cgroup *cgrp)
26{
27 cgroup_get(cgrp);
28 percpu_ref_kill(&cgrp->bpf.refcnt);
29}
30
25/** 31/**
26 * cgroup_bpf_put() - put references of all bpf programs 32 * cgroup_bpf_release() - put references of all bpf programs and
27 * @cgrp: the cgroup to modify 33 * release all cgroup bpf data
34 * @work: work structure embedded into the cgroup to modify
28 */ 35 */
29void cgroup_bpf_put(struct cgroup *cgrp) 36static void cgroup_bpf_release(struct work_struct *work)
30{ 37{
38 struct cgroup *cgrp = container_of(work, struct cgroup,
39 bpf.release_work);
31 enum bpf_cgroup_storage_type stype; 40 enum bpf_cgroup_storage_type stype;
41 struct bpf_prog_array *old_array;
32 unsigned int type; 42 unsigned int type;
33 43
34 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { 44 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp)
45 kfree(pl); 55 kfree(pl);
46 static_branch_dec(&cgroup_bpf_enabled_key); 56 static_branch_dec(&cgroup_bpf_enabled_key);
47 } 57 }
48 bpf_prog_array_free(cgrp->bpf.effective[type]); 58 old_array = rcu_dereference_protected(
59 cgrp->bpf.effective[type],
60 percpu_ref_is_dying(&cgrp->bpf.refcnt));
61 bpf_prog_array_free(old_array);
49 } 62 }
63
64 percpu_ref_exit(&cgrp->bpf.refcnt);
65 cgroup_put(cgrp);
66}
67
68/**
69 * cgroup_bpf_release_fn() - callback used to schedule releasing
70 * of bpf cgroup data
71 * @ref: percpu ref counter structure
72 */
73static void cgroup_bpf_release_fn(struct percpu_ref *ref)
74{
75 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
76
77 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
78 queue_work(system_wq, &cgrp->bpf.release_work);
50} 79}
51 80
52/* count number of elements in the list. 81/* count number of elements in the list.
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
101 */ 130 */
102static int compute_effective_progs(struct cgroup *cgrp, 131static int compute_effective_progs(struct cgroup *cgrp,
103 enum bpf_attach_type type, 132 enum bpf_attach_type type,
104 struct bpf_prog_array __rcu **array) 133 struct bpf_prog_array **array)
105{ 134{
106 enum bpf_cgroup_storage_type stype; 135 enum bpf_cgroup_storage_type stype;
107 struct bpf_prog_array *progs; 136 struct bpf_prog_array *progs;
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp,
139 } 168 }
140 } while ((p = cgroup_parent(p))); 169 } while ((p = cgroup_parent(p)));
141 170
142 rcu_assign_pointer(*array, progs); 171 *array = progs;
143 return 0; 172 return 0;
144} 173}
145 174
146static void activate_effective_progs(struct cgroup *cgrp, 175static void activate_effective_progs(struct cgroup *cgrp,
147 enum bpf_attach_type type, 176 enum bpf_attach_type type,
148 struct bpf_prog_array __rcu *array) 177 struct bpf_prog_array *old_array)
149{ 178{
150 struct bpf_prog_array __rcu *old_array; 179 rcu_swap_protected(cgrp->bpf.effective[type], old_array,
151 180 lockdep_is_held(&cgroup_mutex));
152 old_array = xchg(&cgrp->bpf.effective[type], array);
153 /* free prog array after grace period, since __cgroup_bpf_run_*() 181 /* free prog array after grace period, since __cgroup_bpf_run_*()
154 * might be still walking the array 182 * might be still walking the array
155 */ 183 */
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
166 * that array below is variable length 194 * that array below is variable length
167 */ 195 */
168#define NR ARRAY_SIZE(cgrp->bpf.effective) 196#define NR ARRAY_SIZE(cgrp->bpf.effective)
169 struct bpf_prog_array __rcu *arrays[NR] = {}; 197 struct bpf_prog_array *arrays[NR] = {};
170 int i; 198 int ret, i;
199
200 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
201 GFP_KERNEL);
202 if (ret)
203 return ret;
171 204
172 for (i = 0; i < NR; i++) 205 for (i = 0; i < NR; i++)
173 INIT_LIST_HEAD(&cgrp->bpf.progs[i]); 206 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
183cleanup: 216cleanup:
184 for (i = 0; i < NR; i++) 217 for (i = 0; i < NR; i++)
185 bpf_prog_array_free(arrays[i]); 218 bpf_prog_array_free(arrays[i]);
219
220 percpu_ref_exit(&cgrp->bpf.refcnt);
221
186 return -ENOMEM; 222 return -ENOMEM;
187} 223}
188 224
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
444 enum bpf_attach_type type = attr->query.attach_type; 480 enum bpf_attach_type type = attr->query.attach_type;
445 struct list_head *progs = &cgrp->bpf.progs[type]; 481 struct list_head *progs = &cgrp->bpf.progs[type];
446 u32 flags = cgrp->bpf.flags[type]; 482 u32 flags = cgrp->bpf.flags[type];
483 struct bpf_prog_array *effective;
447 int cnt, ret = 0, i; 484 int cnt, ret = 0, i;
448 485
486 effective = rcu_dereference_protected(cgrp->bpf.effective[type],
487 lockdep_is_held(&cgroup_mutex));
488
449 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) 489 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
450 cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); 490 cnt = bpf_prog_array_length(effective);
451 else 491 else
452 cnt = prog_list_length(progs); 492 cnt = prog_list_length(progs);
453 493
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
464 } 504 }
465 505
466 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { 506 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
467 return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], 507 return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
468 prog_ids, cnt);
469 } else { 508 } else {
470 struct bpf_prog_list *pl; 509 struct bpf_prog_list *pl;
471 u32 id; 510 u32 id;
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
548 * The program type passed in via @type must be suitable for network 587 * The program type passed in via @type must be suitable for network
549 * filtering. No further check is performed to assert that. 588 * filtering. No further check is performed to assert that.
550 * 589 *
551 * This function will return %-EPERM if any if an attached program was found 590 * For egress packets, this function can return:
552 * and if it returned != 1 during execution. In all other cases, 0 is returned. 591 * NET_XMIT_SUCCESS (0) - continue with packet output
592 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
593 * NET_XMIT_CN (2) - continue with packet output and notify TCP
594 * to call cwr
595 * -EPERM - drop packet
596 *
597 * For ingress packets, this function will return -EPERM if any
598 * attached program was found and if it returned != 1 during execution.
599 * Otherwise 0 is returned.
553 */ 600 */
554int __cgroup_bpf_run_filter_skb(struct sock *sk, 601int __cgroup_bpf_run_filter_skb(struct sock *sk,
555 struct sk_buff *skb, 602 struct sk_buff *skb,
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
575 /* compute pointers for the bpf prog */ 622 /* compute pointers for the bpf prog */
576 bpf_compute_and_save_data_end(skb, &saved_data_end); 623 bpf_compute_and_save_data_end(skb, &saved_data_end);
577 624
578 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, 625 if (type == BPF_CGROUP_INET_EGRESS) {
579 __bpf_prog_run_save_cb); 626 ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
627 cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
628 } else {
629 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
630 __bpf_prog_run_save_cb);
631 ret = (ret == 1 ? 0 : -EPERM);
632 }
580 bpf_restore_data_end(skb, saved_data_end); 633 bpf_restore_data_end(skb, saved_data_end);
581 __skb_pull(skb, offset); 634 __skb_pull(skb, offset);
582 skb->sk = save_sk; 635 skb->sk = save_sk;
583 return ret == 1 ? 0 : -EPERM; 636
637 return ret;
584} 638}
585EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 639EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
586 640