diff options
Diffstat (limited to 'kernel/bpf/sockmap.c')
-rw-r--r-- | kernel/bpf/sockmap.c | 43 |
1 files changed, 30 insertions, 13 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..66f00a2b27f4 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/workqueue.h> | 39 | #include <linux/workqueue.h> |
40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
41 | #include <net/strparser.h> | 41 | #include <net/strparser.h> |
42 | #include <net/tcp.h> | ||
42 | 43 | ||
43 | struct bpf_stab { | 44 | struct bpf_stab { |
44 | struct bpf_map map; | 45 | struct bpf_map map; |
@@ -92,6 +93,14 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) | |||
92 | return rcu_dereference_sk_user_data(sk); | 93 | return rcu_dereference_sk_user_data(sk); |
93 | } | 94 | } |
94 | 95 | ||
96 | /* compute the linear packet data range [data, data_end) for skb when | ||
97 | * sk_skb type programs are in use. | ||
98 | */ | ||
99 | static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) | ||
100 | { | ||
101 | TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); | ||
102 | } | ||
103 | |||
95 | static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) | 104 | static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) |
96 | { | 105 | { |
97 | struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); | 106 | struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); |
@@ -101,12 +110,20 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) | |||
101 | return SK_DROP; | 110 | return SK_DROP; |
102 | 111 | ||
103 | skb_orphan(skb); | 112 | skb_orphan(skb); |
113 | /* We need to ensure that BPF metadata for maps is also cleared | ||
114 | * when we orphan the skb so that we don't have the possibility | ||
115 | * to reference a stale map. | ||
116 | */ | ||
117 | TCP_SKB_CB(skb)->bpf.map = NULL; | ||
104 | skb->sk = psock->sock; | 118 | skb->sk = psock->sock; |
105 | bpf_compute_data_end(skb); | 119 | bpf_compute_data_end_sk_skb(skb); |
120 | preempt_disable(); | ||
106 | rc = (*prog->bpf_func)(skb, prog->insnsi); | 121 | rc = (*prog->bpf_func)(skb, prog->insnsi); |
122 | preempt_enable(); | ||
107 | skb->sk = NULL; | 123 | skb->sk = NULL; |
108 | 124 | ||
109 | return rc; | 125 | return rc == SK_PASS ? |
126 | (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; | ||
110 | } | 127 | } |
111 | 128 | ||
112 | static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | 129 | static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) |
@@ -114,17 +131,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
114 | struct sock *sk; | 131 | struct sock *sk; |
115 | int rc; | 132 | int rc; |
116 | 133 | ||
117 | /* Because we use per cpu values to feed input from sock redirect | ||
118 | * in BPF program to do_sk_redirect_map() call we need to ensure we | ||
119 | * are not preempted. RCU read lock is not sufficient in this case | ||
120 | * with CONFIG_PREEMPT_RCU enabled so we must be explicit here. | ||
121 | */ | ||
122 | preempt_disable(); | ||
123 | rc = smap_verdict_func(psock, skb); | 134 | rc = smap_verdict_func(psock, skb); |
124 | switch (rc) { | 135 | switch (rc) { |
125 | case SK_REDIRECT: | 136 | case SK_REDIRECT: |
126 | sk = do_sk_redirect_map(); | 137 | sk = do_sk_redirect_map(skb); |
127 | preempt_enable(); | ||
128 | if (likely(sk)) { | 138 | if (likely(sk)) { |
129 | struct smap_psock *peer = smap_psock_sk(sk); | 139 | struct smap_psock *peer = smap_psock_sk(sk); |
130 | 140 | ||
@@ -141,8 +151,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) | |||
141 | /* Fall through and free skb otherwise */ | 151 | /* Fall through and free skb otherwise */ |
142 | case SK_DROP: | 152 | case SK_DROP: |
143 | default: | 153 | default: |
144 | if (rc != SK_REDIRECT) | ||
145 | preempt_enable(); | ||
146 | kfree_skb(skb); | 154 | kfree_skb(skb); |
147 | } | 155 | } |
148 | } | 156 | } |
@@ -369,7 +377,7 @@ static int smap_parse_func_strparser(struct strparser *strp, | |||
369 | * any socket yet. | 377 | * any socket yet. |
370 | */ | 378 | */ |
371 | skb->sk = psock->sock; | 379 | skb->sk = psock->sock; |
372 | bpf_compute_data_end(skb); | 380 | bpf_compute_data_end_sk_skb(skb); |
373 | rc = (*prog->bpf_func)(skb, prog->insnsi); | 381 | rc = (*prog->bpf_func)(skb, prog->insnsi); |
374 | skb->sk = NULL; | 382 | skb->sk = NULL; |
375 | rcu_read_unlock(); | 383 | rcu_read_unlock(); |
@@ -487,6 +495,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) | |||
487 | int err = -EINVAL; | 495 | int err = -EINVAL; |
488 | u64 cost; | 496 | u64 cost; |
489 | 497 | ||
498 | if (!capable(CAP_NET_ADMIN)) | ||
499 | return ERR_PTR(-EPERM); | ||
500 | |||
490 | /* check sanity of attributes */ | 501 | /* check sanity of attributes */ |
491 | if (attr->max_entries == 0 || attr->key_size != 4 || | 502 | if (attr->max_entries == 0 || attr->key_size != 4 || |
492 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) | 503 | attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) |
@@ -840,6 +851,12 @@ static int sock_map_update_elem(struct bpf_map *map, | |||
840 | return -EINVAL; | 851 | return -EINVAL; |
841 | } | 852 | } |
842 | 853 | ||
854 | if (skops.sk->sk_type != SOCK_STREAM || | ||
855 | skops.sk->sk_protocol != IPPROTO_TCP) { | ||
856 | fput(socket->file); | ||
857 | return -EOPNOTSUPP; | ||
858 | } | ||
859 | |||
843 | err = sock_map_ctx_update_elem(&skops, map, key, flags); | 860 | err = sock_map_ctx_update_elem(&skops, map, key, flags); |
844 | fput(socket->file); | 861 | fput(socket->file); |
845 | return err; | 862 | return err; |