aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2017-10-18 10:10:36 -0400
committerDavid S. Miller <davem@davemloft.net>2017-10-20 08:01:29 -0400
commit34f79502bbcfab659b8729da68b5e387f96eb4c1 (patch)
tree27084757c315fdf13c682c3d6c57922ed5f8447c /kernel
parent435bf0d3f99a164df7e8c30428cef266b91d1d3b (diff)
bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region
SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/sockmap.c19
1 files changed, 9 insertions, 10 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index c68899d5b246..beaabb21c3a3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -39,6 +39,7 @@
39#include <linux/workqueue.h> 39#include <linux/workqueue.h>
40#include <linux/list.h> 40#include <linux/list.h>
41#include <net/strparser.h> 41#include <net/strparser.h>
42#include <net/tcp.h>
42 43
43struct bpf_stab { 44struct bpf_stab {
44 struct bpf_map map; 45 struct bpf_map map;
@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
101 return SK_DROP; 102 return SK_DROP;
102 103
103 skb_orphan(skb); 104 skb_orphan(skb);
105 /* We need to ensure that BPF metadata for maps is also cleared
106 * when we orphan the skb so that we don't have the possibility
107 * to reference a stale map.
108 */
109 TCP_SKB_CB(skb)->bpf.map = NULL;
104 skb->sk = psock->sock; 110 skb->sk = psock->sock;
105 bpf_compute_data_end(skb); 111 bpf_compute_data_end(skb);
112 preempt_disable();
106 rc = (*prog->bpf_func)(skb, prog->insnsi); 113 rc = (*prog->bpf_func)(skb, prog->insnsi);
114 preempt_enable();
107 skb->sk = NULL; 115 skb->sk = NULL;
108 116
109 return rc; 117 return rc;
@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
114 struct sock *sk; 122 struct sock *sk;
115 int rc; 123 int rc;
116 124
117 /* Because we use per cpu values to feed input from sock redirect
118 * in BPF program to do_sk_redirect_map() call we need to ensure we
119 * are not preempted. RCU read lock is not sufficient in this case
120 * with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
121 */
122 preempt_disable();
123 rc = smap_verdict_func(psock, skb); 125 rc = smap_verdict_func(psock, skb);
124 switch (rc) { 126 switch (rc) {
125 case SK_REDIRECT: 127 case SK_REDIRECT:
126 sk = do_sk_redirect_map(); 128 sk = do_sk_redirect_map(skb);
127 preempt_enable();
128 if (likely(sk)) { 129 if (likely(sk)) {
129 struct smap_psock *peer = smap_psock_sk(sk); 130 struct smap_psock *peer = smap_psock_sk(sk);
130 131
@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
141 /* Fall through and free skb otherwise */ 142 /* Fall through and free skb otherwise */
142 case SK_DROP: 143 case SK_DROP:
143 default: 144 default:
144 if (rc != SK_REDIRECT)
145 preempt_enable();
146 kfree_skb(skb); 145 kfree_skb(skb);
147 } 146 }
148} 147}