diff options
| author | John Fastabend <john.fastabend@gmail.com> | 2017-10-18 10:10:36 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2017-10-20 08:01:29 -0400 |
| commit | 34f79502bbcfab659b8729da68b5e387f96eb4c1 (patch) | |
| tree | 27084757c315fdf13c682c3d6c57922ed5f8447c /kernel | |
| parent | 435bf0d3f99a164df7e8c30428cef266b91d1d3b (diff) | |
bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region
SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.
This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/sockmap.c | 19 |
1 files changed, 9 insertions, 10 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c68899d5b246..beaabb21c3a3 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/workqueue.h> | 39 | #include <linux/workqueue.h> |
| 40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
| 41 | #include <net/strparser.h> | 41 | #include <net/strparser.h> |
| 42 | #include <net/tcp.h> | ||
| 42 | 43 | ||
| 43 | struct bpf_stab { | 44 | struct bpf_stab { |
| 44 | struct bpf_map map; | 45 | struct bpf_map map; |
| @@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) | |||
| 101 | return SK_DROP; | 102 | return SK_DROP; |
| 102 | 103 | ||
| 103 | skb_orphan(skb); | 104 | skb_orphan(skb); |
| 105 | /* We need to ensure that BPF metadata for maps is also cleared | ||
| 106 | * when we orphan the skb so that we don't have the possibility | ||
| 107 | * to reference a stale map. | ||
| 108 | */ | ||
| 109 | TCP_SKB_CB(skb)->bpf.map = NULL; | ||
| 104 | skb->sk = psock->sock; | 110 | skb->sk = psock->sock; |
| 105 | bpf_compute_data_end(skb); | 111 | bpf_compute_data_end(skb); |
| 112 | preempt_disable(); | ||
| 106 | rc = (*prog->bpf_func)(skb, prog->insnsi); | 113 | rc = (*prog->bpf_func)(skb, prog->insnsi); |
| 114 | preempt_enable(); | ||
| 107 | skb->sk = NULL; | 115 | skb->sk = NULL; |
| 108 | 116 | ||
| 109 | return rc; | 117 | return rc; |
| @@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
| 114 | struct sock *sk; | 122 | struct sock *sk; |
| 115 | int rc; | 123 | int rc; |
| 116 | 124 | ||
| 117 | /* Because we use per cpu values to feed input from sock redirect | ||
| 118 | * in BPF program to do_sk_redirect_map() call we need to ensure we | ||
| 119 | * are not preempted. RCU read lock is not sufficient in this case | ||
| 120 | * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. | ||
| 121 | */ | ||
| 122 | preempt_disable(); | ||
| 123 | rc = smap_verdict_func(psock, skb); | 125 | rc = smap_verdict_func(psock, skb); |
| 124 | switch (rc) { | 126 | switch (rc) { |
| 125 | case SK_REDIRECT: | 127 | case SK_REDIRECT: |
| 126 | sk = do_sk_redirect_map(); | 128 | sk = do_sk_redirect_map(skb); |
| 127 | preempt_enable(); | ||
| 128 | if (likely(sk)) { | 129 | if (likely(sk)) { |
| 129 | struct smap_psock *peer = smap_psock_sk(sk); | 130 | struct smap_psock *peer = smap_psock_sk(sk); |
| 130 | 131 | ||
| @@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
| 141 | /* Fall through and free skb otherwise */ | 142 | /* Fall through and free skb otherwise */ |
| 142 | case SK_DROP: | 143 | case SK_DROP: |
| 143 | default: | 144 | default: |
| 144 | if (rc != SK_REDIRECT) | ||
| 145 | preempt_enable(); | ||
| 146 | kfree_skb(skb); | 145 | kfree_skb(skb); |
| 147 | } | 146 | } |
| 148 | } | 147 | } |
