aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-09-18 12:01:49 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-09-21 20:46:41 -0400
commitb05545e15e1ff1d6a6a8593971275f9cc3e6b92b (patch)
tree15a5f8f63fcea18adbfeeaff94ae6869f3cecc74 /kernel
parent5607fff303636d48b88414c6be353d9fed700af2 (diff)
bpf: sockmap, fix transition through disconnect without close
It is possible (via shutdown()) for TCP socks to go trough TCP_CLOSE state via tcp_disconnect() without actually calling tcp_close which would then call our bpf_tcp_close() callback. Because of this a user could disconnect a socket then put it in a LISTEN state which would break our assumptions about sockets always being ESTABLISHED state. To resolve this rely on the unhash hook, which is called in the disconnect case, to remove the sock from the sockmap. Reported-by: Eric Dumazet <edumazet@google.com> Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks") Signed-off-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Yonghong Song <yhs@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/sockmap.c60
1 files changed, 41 insertions, 19 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 1f97b559892a..0a0f2ec75370 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -132,6 +132,7 @@ struct smap_psock {
132 struct work_struct gc_work; 132 struct work_struct gc_work;
133 133
134 struct proto *sk_proto; 134 struct proto *sk_proto;
135 void (*save_unhash)(struct sock *sk);
135 void (*save_close)(struct sock *sk, long timeout); 136 void (*save_close)(struct sock *sk, long timeout);
136 void (*save_data_ready)(struct sock *sk); 137 void (*save_data_ready)(struct sock *sk);
137 void (*save_write_space)(struct sock *sk); 138 void (*save_write_space)(struct sock *sk);
@@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
143static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); 144static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
144static int bpf_tcp_sendpage(struct sock *sk, struct page *page, 145static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
145 int offset, size_t size, int flags); 146 int offset, size_t size, int flags);
147static void bpf_tcp_unhash(struct sock *sk);
146static void bpf_tcp_close(struct sock *sk, long timeout); 148static void bpf_tcp_close(struct sock *sk, long timeout);
147 149
148static inline struct smap_psock *smap_psock_sk(const struct sock *sk) 150static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
184 struct proto *base) 186 struct proto *base)
185{ 187{
186 prot[SOCKMAP_BASE] = *base; 188 prot[SOCKMAP_BASE] = *base;
189 prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash;
187 prot[SOCKMAP_BASE].close = bpf_tcp_close; 190 prot[SOCKMAP_BASE].close = bpf_tcp_close;
188 prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; 191 prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
189 prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; 192 prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
@@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk)
217 return -EBUSY; 220 return -EBUSY;
218 } 221 }
219 222
223 psock->save_unhash = sk->sk_prot->unhash;
220 psock->save_close = sk->sk_prot->close; 224 psock->save_close = sk->sk_prot->close;
221 psock->sk_proto = sk->sk_prot; 225 psock->sk_proto = sk->sk_prot;
222 226
@@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
305 return e; 309 return e;
306} 310}
307 311
308static void bpf_tcp_close(struct sock *sk, long timeout) 312static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
309{ 313{
310 void (*close_fun)(struct sock *sk, long timeout);
311 struct smap_psock_map_entry *e; 314 struct smap_psock_map_entry *e;
312 struct sk_msg_buff *md, *mtmp; 315 struct sk_msg_buff *md, *mtmp;
313 struct smap_psock *psock;
314 struct sock *osk; 316 struct sock *osk;
315 317
316 lock_sock(sk);
317 rcu_read_lock();
318 psock = smap_psock_sk(sk);
319 if (unlikely(!psock)) {
320 rcu_read_unlock();
321 release_sock(sk);
322 return sk->sk_prot->close(sk, timeout);
323 }
324
325 /* The psock may be destroyed anytime after exiting the RCU critial
326 * section so by the time we use close_fun the psock may no longer
327 * be valid. However, bpf_tcp_close is called with the sock lock
328 * held so the close hook and sk are still valid.
329 */
330 close_fun = psock->save_close;
331
332 if (psock->cork) { 318 if (psock->cork) {
333 free_start_sg(psock->sock, psock->cork, true); 319 free_start_sg(psock->sock, psock->cork, true);
334 kfree(psock->cork); 320 kfree(psock->cork);
@@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
379 kfree(e); 365 kfree(e);
380 e = psock_map_pop(sk, psock); 366 e = psock_map_pop(sk, psock);
381 } 367 }
368}
369
370static void bpf_tcp_unhash(struct sock *sk)
371{
372 void (*unhash_fun)(struct sock *sk);
373 struct smap_psock *psock;
374
375 rcu_read_lock();
376 psock = smap_psock_sk(sk);
377 if (unlikely(!psock)) {
378 rcu_read_unlock();
379 if (sk->sk_prot->unhash)
380 sk->sk_prot->unhash(sk);
381 return;
382 }
383 unhash_fun = psock->save_unhash;
384 bpf_tcp_remove(sk, psock);
385 rcu_read_unlock();
386 unhash_fun(sk);
387}
388
389static void bpf_tcp_close(struct sock *sk, long timeout)
390{
391 void (*close_fun)(struct sock *sk, long timeout);
392 struct smap_psock *psock;
393
394 lock_sock(sk);
395 rcu_read_lock();
396 psock = smap_psock_sk(sk);
397 if (unlikely(!psock)) {
398 rcu_read_unlock();
399 release_sock(sk);
400 return sk->sk_prot->close(sk, timeout);
401 }
402 close_fun = psock->save_close;
403 bpf_tcp_remove(sk, psock);
382 rcu_read_unlock(); 404 rcu_read_unlock();
383 release_sock(sk); 405 release_sock(sk);
384 close_fun(sk, timeout); 406 close_fun(sk, timeout);