diff options
Diffstat (limited to 'kernel/bpf/cgroup.c')
-rw-r--r-- | kernel/bpf/cgroup.c | 94 |
1 files changed, 74 insertions, 20 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fcde0f7b2585..1b65ab0df457 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c | |||
@@ -22,13 +22,23 @@ | |||
22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | 22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); |
23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | 23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); |
24 | 24 | ||
25 | void cgroup_bpf_offline(struct cgroup *cgrp) | ||
26 | { | ||
27 | cgroup_get(cgrp); | ||
28 | percpu_ref_kill(&cgrp->bpf.refcnt); | ||
29 | } | ||
30 | |||
25 | /** | 31 | /** |
26 | * cgroup_bpf_put() - put references of all bpf programs | 32 | * cgroup_bpf_release() - put references of all bpf programs and |
27 | * @cgrp: the cgroup to modify | 33 | * release all cgroup bpf data |
34 | * @work: work structure embedded into the cgroup to modify | ||
28 | */ | 35 | */ |
29 | void cgroup_bpf_put(struct cgroup *cgrp) | 36 | static void cgroup_bpf_release(struct work_struct *work) |
30 | { | 37 | { |
38 | struct cgroup *cgrp = container_of(work, struct cgroup, | ||
39 | bpf.release_work); | ||
31 | enum bpf_cgroup_storage_type stype; | 40 | enum bpf_cgroup_storage_type stype; |
41 | struct bpf_prog_array *old_array; | ||
32 | unsigned int type; | 42 | unsigned int type; |
33 | 43 | ||
34 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { | 44 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
@@ -45,8 +55,27 @@ void cgroup_bpf_put(struct cgroup *cgrp) | |||
45 | kfree(pl); | 55 | kfree(pl); |
46 | static_branch_dec(&cgroup_bpf_enabled_key); | 56 | static_branch_dec(&cgroup_bpf_enabled_key); |
47 | } | 57 | } |
48 | bpf_prog_array_free(cgrp->bpf.effective[type]); | 58 | old_array = rcu_dereference_protected( |
59 | cgrp->bpf.effective[type], | ||
60 | percpu_ref_is_dying(&cgrp->bpf.refcnt)); | ||
61 | bpf_prog_array_free(old_array); | ||
49 | } | 62 | } |
63 | |||
64 | percpu_ref_exit(&cgrp->bpf.refcnt); | ||
65 | cgroup_put(cgrp); | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | * cgroup_bpf_release_fn() - callback used to schedule releasing | ||
70 | * of bpf cgroup data | ||
71 | * @ref: percpu ref counter structure | ||
72 | */ | ||
73 | static void cgroup_bpf_release_fn(struct percpu_ref *ref) | ||
74 | { | ||
75 | struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); | ||
76 | |||
77 | INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); | ||
78 | queue_work(system_wq, &cgrp->bpf.release_work); | ||
50 | } | 79 | } |
51 | 80 | ||
52 | /* count number of elements in the list. | 81 | /* count number of elements in the list. |
@@ -101,7 +130,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, | |||
101 | */ | 130 | */ |
102 | static int compute_effective_progs(struct cgroup *cgrp, | 131 | static int compute_effective_progs(struct cgroup *cgrp, |
103 | enum bpf_attach_type type, | 132 | enum bpf_attach_type type, |
104 | struct bpf_prog_array __rcu **array) | 133 | struct bpf_prog_array **array) |
105 | { | 134 | { |
106 | enum bpf_cgroup_storage_type stype; | 135 | enum bpf_cgroup_storage_type stype; |
107 | struct bpf_prog_array *progs; | 136 | struct bpf_prog_array *progs; |
@@ -139,17 +168,16 @@ static int compute_effective_progs(struct cgroup *cgrp, | |||
139 | } | 168 | } |
140 | } while ((p = cgroup_parent(p))); | 169 | } while ((p = cgroup_parent(p))); |
141 | 170 | ||
142 | rcu_assign_pointer(*array, progs); | 171 | *array = progs; |
143 | return 0; | 172 | return 0; |
144 | } | 173 | } |
145 | 174 | ||
146 | static void activate_effective_progs(struct cgroup *cgrp, | 175 | static void activate_effective_progs(struct cgroup *cgrp, |
147 | enum bpf_attach_type type, | 176 | enum bpf_attach_type type, |
148 | struct bpf_prog_array __rcu *array) | 177 | struct bpf_prog_array *old_array) |
149 | { | 178 | { |
150 | struct bpf_prog_array __rcu *old_array; | 179 | rcu_swap_protected(cgrp->bpf.effective[type], old_array, |
151 | 180 | lockdep_is_held(&cgroup_mutex)); | |
152 | old_array = xchg(&cgrp->bpf.effective[type], array); | ||
153 | /* free prog array after grace period, since __cgroup_bpf_run_*() | 181 | /* free prog array after grace period, since __cgroup_bpf_run_*() |
154 | * might be still walking the array | 182 | * might be still walking the array |
155 | */ | 183 | */ |
@@ -166,8 +194,13 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) | |||
166 | * that array below is variable length | 194 | * that array below is variable length |
167 | */ | 195 | */ |
168 | #define NR ARRAY_SIZE(cgrp->bpf.effective) | 196 | #define NR ARRAY_SIZE(cgrp->bpf.effective) |
169 | struct bpf_prog_array __rcu *arrays[NR] = {}; | 197 | struct bpf_prog_array *arrays[NR] = {}; |
170 | int i; | 198 | int ret, i; |
199 | |||
200 | ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, | ||
201 | GFP_KERNEL); | ||
202 | if (ret) | ||
203 | return ret; | ||
171 | 204 | ||
172 | for (i = 0; i < NR; i++) | 205 | for (i = 0; i < NR; i++) |
173 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | 206 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); |
@@ -183,6 +216,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp) | |||
183 | cleanup: | 216 | cleanup: |
184 | for (i = 0; i < NR; i++) | 217 | for (i = 0; i < NR; i++) |
185 | bpf_prog_array_free(arrays[i]); | 218 | bpf_prog_array_free(arrays[i]); |
219 | |||
220 | percpu_ref_exit(&cgrp->bpf.refcnt); | ||
221 | |||
186 | return -ENOMEM; | 222 | return -ENOMEM; |
187 | } | 223 | } |
188 | 224 | ||
@@ -444,10 +480,14 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |||
444 | enum bpf_attach_type type = attr->query.attach_type; | 480 | enum bpf_attach_type type = attr->query.attach_type; |
445 | struct list_head *progs = &cgrp->bpf.progs[type]; | 481 | struct list_head *progs = &cgrp->bpf.progs[type]; |
446 | u32 flags = cgrp->bpf.flags[type]; | 482 | u32 flags = cgrp->bpf.flags[type]; |
483 | struct bpf_prog_array *effective; | ||
447 | int cnt, ret = 0, i; | 484 | int cnt, ret = 0, i; |
448 | 485 | ||
486 | effective = rcu_dereference_protected(cgrp->bpf.effective[type], | ||
487 | lockdep_is_held(&cgroup_mutex)); | ||
488 | |||
449 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | 489 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) |
450 | cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); | 490 | cnt = bpf_prog_array_length(effective); |
451 | else | 491 | else |
452 | cnt = prog_list_length(progs); | 492 | cnt = prog_list_length(progs); |
453 | 493 | ||
@@ -464,8 +504,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |||
464 | } | 504 | } |
465 | 505 | ||
466 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | 506 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { |
467 | return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], | 507 | return bpf_prog_array_copy_to_user(effective, prog_ids, cnt); |
468 | prog_ids, cnt); | ||
469 | } else { | 508 | } else { |
470 | struct bpf_prog_list *pl; | 509 | struct bpf_prog_list *pl; |
471 | u32 id; | 510 | u32 id; |
@@ -548,8 +587,16 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, | |||
548 | * The program type passed in via @type must be suitable for network | 587 | * The program type passed in via @type must be suitable for network |
549 | * filtering. No further check is performed to assert that. | 588 | * filtering. No further check is performed to assert that. |
550 | * | 589 | * |
551 | * This function will return %-EPERM if any if an attached program was found | 590 | * For egress packets, this function can return: |
552 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | 591 | * NET_XMIT_SUCCESS (0) - continue with packet output |
592 | * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr | ||
593 | * NET_XMIT_CN (2) - continue with packet output and notify TCP | ||
594 | * to call cwr | ||
595 | * -EPERM - drop packet | ||
596 | * | ||
597 | * For ingress packets, this function will return -EPERM if any | ||
598 | * attached program was found and if it returned != 1 during execution. | ||
599 | * Otherwise 0 is returned. | ||
553 | */ | 600 | */ |
554 | int __cgroup_bpf_run_filter_skb(struct sock *sk, | 601 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
555 | struct sk_buff *skb, | 602 | struct sk_buff *skb, |
@@ -575,12 +622,19 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, | |||
575 | /* compute pointers for the bpf prog */ | 622 | /* compute pointers for the bpf prog */ |
576 | bpf_compute_and_save_data_end(skb, &saved_data_end); | 623 | bpf_compute_and_save_data_end(skb, &saved_data_end); |
577 | 624 | ||
578 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | 625 | if (type == BPF_CGROUP_INET_EGRESS) { |
579 | __bpf_prog_run_save_cb); | 626 | ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( |
627 | cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); | ||
628 | } else { | ||
629 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, | ||
630 | __bpf_prog_run_save_cb); | ||
631 | ret = (ret == 1 ? 0 : -EPERM); | ||
632 | } | ||
580 | bpf_restore_data_end(skb, saved_data_end); | 633 | bpf_restore_data_end(skb, saved_data_end); |
581 | __skb_pull(skb, offset); | 634 | __skb_pull(skb, offset); |
582 | skb->sk = save_sk; | 635 | skb->sk = save_sk; |
583 | return ret == 1 ? 0 : -EPERM; | 636 | |
637 | return ret; | ||
584 | } | 638 | } |
585 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); | 639 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
586 | 640 | ||