diff options
author | David S. Miller <davem@davemloft.net> | 2018-08-29 19:19:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-08-29 19:19:38 -0400 |
commit | 6a5d39aa9ac274d9ccdb50ec47112158537f00de (patch) | |
tree | daa8c87faa188cf123e905140f1ab3e995439fc4 | |
parent | 53ae914d898e5dd5984d352d5fa0b23410f966a0 (diff) | |
parent | d65e6c80c6bb72ced46ce90dea4016d913a8ddd4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says:
====================
pull-request: bpf 2018-08-29
The following pull-request contains BPF updates for your *net* tree.
The main changes are:
1) Fix a build error in sk_reuseport_convert_ctx_access() when
compiling with clang which cannot resolve hweight_long() at
build time inside the BUILD_BUG_ON() assertion, from Stefan.
2) Several fixes for BPF sockmap, four of them in getting the
bpf_msg_pull_data() helper to work, one use after free case
in bpf_tcp_close() and one refcount leak in bpf_tcp_recvmsg(),
from Daniel.
3) Another fix for BPF sockmap where we misaccount sk_mem_uncharge()
in the socket redirect error case from unwinding scatterlist
twice, from John.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | kernel/bpf/sockmap.c | 52 | ||||
-rw-r--r-- | net/core/filter.c | 52 |
2 files changed, 54 insertions, 50 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index cf5195c7c331..ce63e5801746 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c | |||
@@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk) | |||
236 | } | 236 | } |
237 | 237 | ||
238 | static void smap_release_sock(struct smap_psock *psock, struct sock *sock); | 238 | static void smap_release_sock(struct smap_psock *psock, struct sock *sock); |
239 | static int free_start_sg(struct sock *sk, struct sk_msg_buff *md); | 239 | static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge); |
240 | 240 | ||
241 | static void bpf_tcp_release(struct sock *sk) | 241 | static void bpf_tcp_release(struct sock *sk) |
242 | { | 242 | { |
@@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk) | |||
248 | goto out; | 248 | goto out; |
249 | 249 | ||
250 | if (psock->cork) { | 250 | if (psock->cork) { |
251 | free_start_sg(psock->sock, psock->cork); | 251 | free_start_sg(psock->sock, psock->cork, true); |
252 | kfree(psock->cork); | 252 | kfree(psock->cork); |
253 | psock->cork = NULL; | 253 | psock->cork = NULL; |
254 | } | 254 | } |
@@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout) | |||
330 | close_fun = psock->save_close; | 330 | close_fun = psock->save_close; |
331 | 331 | ||
332 | if (psock->cork) { | 332 | if (psock->cork) { |
333 | free_start_sg(psock->sock, psock->cork); | 333 | free_start_sg(psock->sock, psock->cork, true); |
334 | kfree(psock->cork); | 334 | kfree(psock->cork); |
335 | psock->cork = NULL; | 335 | psock->cork = NULL; |
336 | } | 336 | } |
337 | 337 | ||
338 | list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { | 338 | list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { |
339 | list_del(&md->list); | 339 | list_del(&md->list); |
340 | free_start_sg(psock->sock, md); | 340 | free_start_sg(psock->sock, md, true); |
341 | kfree(md); | 341 | kfree(md); |
342 | } | 342 | } |
343 | 343 | ||
@@ -369,7 +369,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout) | |||
369 | /* If another thread deleted this object skip deletion. | 369 | /* If another thread deleted this object skip deletion. |
370 | * The refcnt on psock may or may not be zero. | 370 | * The refcnt on psock may or may not be zero. |
371 | */ | 371 | */ |
372 | if (l) { | 372 | if (l && l == link) { |
373 | hlist_del_rcu(&link->hash_node); | 373 | hlist_del_rcu(&link->hash_node); |
374 | smap_release_sock(psock, link->sk); | 374 | smap_release_sock(psock, link->sk); |
375 | free_htab_elem(htab, link); | 375 | free_htab_elem(htab, link); |
@@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes, | |||
570 | md->sg_start = i; | 570 | md->sg_start = i; |
571 | } | 571 | } |
572 | 572 | ||
573 | static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) | 573 | static int free_sg(struct sock *sk, int start, |
574 | struct sk_msg_buff *md, bool charge) | ||
574 | { | 575 | { |
575 | struct scatterlist *sg = md->sg_data; | 576 | struct scatterlist *sg = md->sg_data; |
576 | int i = start, free = 0; | 577 | int i = start, free = 0; |
577 | 578 | ||
578 | while (sg[i].length) { | 579 | while (sg[i].length) { |
579 | free += sg[i].length; | 580 | free += sg[i].length; |
580 | sk_mem_uncharge(sk, sg[i].length); | 581 | if (charge) |
582 | sk_mem_uncharge(sk, sg[i].length); | ||
581 | if (!md->skb) | 583 | if (!md->skb) |
582 | put_page(sg_page(&sg[i])); | 584 | put_page(sg_page(&sg[i])); |
583 | sg[i].length = 0; | 585 | sg[i].length = 0; |
@@ -594,9 +596,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) | |||
594 | return free; | 596 | return free; |
595 | } | 597 | } |
596 | 598 | ||
597 | static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) | 599 | static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge) |
598 | { | 600 | { |
599 | int free = free_sg(sk, md->sg_start, md); | 601 | int free = free_sg(sk, md->sg_start, md, charge); |
600 | 602 | ||
601 | md->sg_start = md->sg_end; | 603 | md->sg_start = md->sg_end; |
602 | return free; | 604 | return free; |
@@ -604,7 +606,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) | |||
604 | 606 | ||
605 | static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md) | 607 | static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md) |
606 | { | 608 | { |
607 | return free_sg(sk, md->sg_curr, md); | 609 | return free_sg(sk, md->sg_curr, md, true); |
608 | } | 610 | } |
609 | 611 | ||
610 | static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md) | 612 | static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md) |
@@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, | |||
718 | list_add_tail(&r->list, &psock->ingress); | 720 | list_add_tail(&r->list, &psock->ingress); |
719 | sk->sk_data_ready(sk); | 721 | sk->sk_data_ready(sk); |
720 | } else { | 722 | } else { |
721 | free_start_sg(sk, r); | 723 | free_start_sg(sk, r, true); |
722 | kfree(r); | 724 | kfree(r); |
723 | } | 725 | } |
724 | 726 | ||
@@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, | |||
752 | release_sock(sk); | 754 | release_sock(sk); |
753 | } | 755 | } |
754 | smap_release_sock(psock, sk); | 756 | smap_release_sock(psock, sk); |
755 | if (unlikely(err)) | 757 | return err; |
756 | goto out; | ||
757 | return 0; | ||
758 | out_rcu: | 758 | out_rcu: |
759 | rcu_read_unlock(); | 759 | rcu_read_unlock(); |
760 | out: | 760 | return 0; |
761 | free_bytes_sg(NULL, send, md, false); | ||
762 | return err; | ||
763 | } | 761 | } |
764 | 762 | ||
765 | static inline void bpf_md_init(struct smap_psock *psock) | 763 | static inline void bpf_md_init(struct smap_psock *psock) |
@@ -822,7 +820,7 @@ more_data: | |||
822 | case __SK_PASS: | 820 | case __SK_PASS: |
823 | err = bpf_tcp_push(sk, send, m, flags, true); | 821 | err = bpf_tcp_push(sk, send, m, flags, true); |
824 | if (unlikely(err)) { | 822 | if (unlikely(err)) { |
825 | *copied -= free_start_sg(sk, m); | 823 | *copied -= free_start_sg(sk, m, true); |
826 | break; | 824 | break; |
827 | } | 825 | } |
828 | 826 | ||
@@ -845,16 +843,17 @@ more_data: | |||
845 | lock_sock(sk); | 843 | lock_sock(sk); |
846 | 844 | ||
847 | if (unlikely(err < 0)) { | 845 | if (unlikely(err < 0)) { |
848 | free_start_sg(sk, m); | 846 | int free = free_start_sg(sk, m, false); |
847 | |||
849 | psock->sg_size = 0; | 848 | psock->sg_size = 0; |
850 | if (!cork) | 849 | if (!cork) |
851 | *copied -= send; | 850 | *copied -= free; |
852 | } else { | 851 | } else { |
853 | psock->sg_size -= send; | 852 | psock->sg_size -= send; |
854 | } | 853 | } |
855 | 854 | ||
856 | if (cork) { | 855 | if (cork) { |
857 | free_start_sg(sk, m); | 856 | free_start_sg(sk, m, true); |
858 | psock->sg_size = 0; | 857 | psock->sg_size = 0; |
859 | kfree(m); | 858 | kfree(m); |
860 | m = NULL; | 859 | m = NULL; |
@@ -912,6 +911,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |||
912 | 911 | ||
913 | if (unlikely(flags & MSG_ERRQUEUE)) | 912 | if (unlikely(flags & MSG_ERRQUEUE)) |
914 | return inet_recv_error(sk, msg, len, addr_len); | 913 | return inet_recv_error(sk, msg, len, addr_len); |
914 | if (!skb_queue_empty(&sk->sk_receive_queue)) | ||
915 | return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); | ||
915 | 916 | ||
916 | rcu_read_lock(); | 917 | rcu_read_lock(); |
917 | psock = smap_psock_sk(sk); | 918 | psock = smap_psock_sk(sk); |
@@ -922,9 +923,6 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |||
922 | goto out; | 923 | goto out; |
923 | rcu_read_unlock(); | 924 | rcu_read_unlock(); |
924 | 925 | ||
925 | if (!skb_queue_empty(&sk->sk_receive_queue)) | ||
926 | return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); | ||
927 | |||
928 | lock_sock(sk); | 926 | lock_sock(sk); |
929 | bytes_ready: | 927 | bytes_ready: |
930 | while (copied != len) { | 928 | while (copied != len) { |
@@ -1122,7 +1120,7 @@ wait_for_memory: | |||
1122 | err = sk_stream_wait_memory(sk, &timeo); | 1120 | err = sk_stream_wait_memory(sk, &timeo); |
1123 | if (err) { | 1121 | if (err) { |
1124 | if (m && m != psock->cork) | 1122 | if (m && m != psock->cork) |
1125 | free_start_sg(sk, m); | 1123 | free_start_sg(sk, m, true); |
1126 | goto out_err; | 1124 | goto out_err; |
1127 | } | 1125 | } |
1128 | } | 1126 | } |
@@ -1581,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w) | |||
1581 | bpf_prog_put(psock->bpf_tx_msg); | 1579 | bpf_prog_put(psock->bpf_tx_msg); |
1582 | 1580 | ||
1583 | if (psock->cork) { | 1581 | if (psock->cork) { |
1584 | free_start_sg(psock->sock, psock->cork); | 1582 | free_start_sg(psock->sock, psock->cork, true); |
1585 | kfree(psock->cork); | 1583 | kfree(psock->cork); |
1586 | } | 1584 | } |
1587 | 1585 | ||
1588 | list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { | 1586 | list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { |
1589 | list_del(&md->list); | 1587 | list_del(&md->list); |
1590 | free_start_sg(psock->sock, md); | 1588 | free_start_sg(psock->sock, md, true); |
1591 | kfree(md); | 1589 | kfree(md); |
1592 | } | 1590 | } |
1593 | 1591 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index c25eb36f1320..2c7801f6737a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -2282,14 +2282,21 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { | |||
2282 | .arg2_type = ARG_ANYTHING, | 2282 | .arg2_type = ARG_ANYTHING, |
2283 | }; | 2283 | }; |
2284 | 2284 | ||
2285 | #define sk_msg_iter_var(var) \ | ||
2286 | do { \ | ||
2287 | var++; \ | ||
2288 | if (var == MAX_SKB_FRAGS) \ | ||
2289 | var = 0; \ | ||
2290 | } while (0) | ||
2291 | |||
2285 | BPF_CALL_4(bpf_msg_pull_data, | 2292 | BPF_CALL_4(bpf_msg_pull_data, |
2286 | struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) | 2293 | struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) |
2287 | { | 2294 | { |
2288 | unsigned int len = 0, offset = 0, copy = 0; | 2295 | unsigned int len = 0, offset = 0, copy = 0; |
2296 | int bytes = end - start, bytes_sg_total; | ||
2289 | struct scatterlist *sg = msg->sg_data; | 2297 | struct scatterlist *sg = msg->sg_data; |
2290 | int first_sg, last_sg, i, shift; | 2298 | int first_sg, last_sg, i, shift; |
2291 | unsigned char *p, *to, *from; | 2299 | unsigned char *p, *to, *from; |
2292 | int bytes = end - start; | ||
2293 | struct page *page; | 2300 | struct page *page; |
2294 | 2301 | ||
2295 | if (unlikely(flags || end <= start)) | 2302 | if (unlikely(flags || end <= start)) |
@@ -2299,21 +2306,22 @@ BPF_CALL_4(bpf_msg_pull_data, | |||
2299 | i = msg->sg_start; | 2306 | i = msg->sg_start; |
2300 | do { | 2307 | do { |
2301 | len = sg[i].length; | 2308 | len = sg[i].length; |
2302 | offset += len; | ||
2303 | if (start < offset + len) | 2309 | if (start < offset + len) |
2304 | break; | 2310 | break; |
2305 | i++; | 2311 | offset += len; |
2306 | if (i == MAX_SKB_FRAGS) | 2312 | sk_msg_iter_var(i); |
2307 | i = 0; | ||
2308 | } while (i != msg->sg_end); | 2313 | } while (i != msg->sg_end); |
2309 | 2314 | ||
2310 | if (unlikely(start >= offset + len)) | 2315 | if (unlikely(start >= offset + len)) |
2311 | return -EINVAL; | 2316 | return -EINVAL; |
2312 | 2317 | ||
2313 | if (!msg->sg_copy[i] && bytes <= len) | ||
2314 | goto out; | ||
2315 | |||
2316 | first_sg = i; | 2318 | first_sg = i; |
2319 | /* The start may point into the sg element so we need to also | ||
2320 | * account for the headroom. | ||
2321 | */ | ||
2322 | bytes_sg_total = start - offset + bytes; | ||
2323 | if (!msg->sg_copy[i] && bytes_sg_total <= len) | ||
2324 | goto out; | ||
2317 | 2325 | ||
2318 | /* At this point we need to linearize multiple scatterlist | 2326 | /* At this point we need to linearize multiple scatterlist |
2319 | * elements or a single shared page. Either way we need to | 2327 | * elements or a single shared page. Either way we need to |
@@ -2327,15 +2335,13 @@ BPF_CALL_4(bpf_msg_pull_data, | |||
2327 | */ | 2335 | */ |
2328 | do { | 2336 | do { |
2329 | copy += sg[i].length; | 2337 | copy += sg[i].length; |
2330 | i++; | 2338 | sk_msg_iter_var(i); |
2331 | if (i == MAX_SKB_FRAGS) | 2339 | if (bytes_sg_total <= copy) |
2332 | i = 0; | ||
2333 | if (bytes < copy) | ||
2334 | break; | 2340 | break; |
2335 | } while (i != msg->sg_end); | 2341 | } while (i != msg->sg_end); |
2336 | last_sg = i; | 2342 | last_sg = i; |
2337 | 2343 | ||
2338 | if (unlikely(copy < end - start)) | 2344 | if (unlikely(bytes_sg_total > copy)) |
2339 | return -EINVAL; | 2345 | return -EINVAL; |
2340 | 2346 | ||
2341 | page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); | 2347 | page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); |
@@ -2355,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data, | |||
2355 | sg[i].length = 0; | 2361 | sg[i].length = 0; |
2356 | put_page(sg_page(&sg[i])); | 2362 | put_page(sg_page(&sg[i])); |
2357 | 2363 | ||
2358 | i++; | 2364 | sk_msg_iter_var(i); |
2359 | if (i == MAX_SKB_FRAGS) | ||
2360 | i = 0; | ||
2361 | } while (i != last_sg); | 2365 | } while (i != last_sg); |
2362 | 2366 | ||
2363 | sg[first_sg].length = copy; | 2367 | sg[first_sg].length = copy; |
@@ -2367,11 +2371,15 @@ BPF_CALL_4(bpf_msg_pull_data, | |||
2367 | * had a single entry though we can just replace it and | 2371 | * had a single entry though we can just replace it and |
2368 | * be done. Otherwise walk the ring and shift the entries. | 2372 | * be done. Otherwise walk the ring and shift the entries. |
2369 | */ | 2373 | */ |
2370 | shift = last_sg - first_sg - 1; | 2374 | WARN_ON_ONCE(last_sg == first_sg); |
2375 | shift = last_sg > first_sg ? | ||
2376 | last_sg - first_sg - 1 : | ||
2377 | MAX_SKB_FRAGS - first_sg + last_sg - 1; | ||
2371 | if (!shift) | 2378 | if (!shift) |
2372 | goto out; | 2379 | goto out; |
2373 | 2380 | ||
2374 | i = first_sg + 1; | 2381 | i = first_sg; |
2382 | sk_msg_iter_var(i); | ||
2375 | do { | 2383 | do { |
2376 | int move_from; | 2384 | int move_from; |
2377 | 2385 | ||
@@ -2388,15 +2396,13 @@ BPF_CALL_4(bpf_msg_pull_data, | |||
2388 | sg[move_from].page_link = 0; | 2396 | sg[move_from].page_link = 0; |
2389 | sg[move_from].offset = 0; | 2397 | sg[move_from].offset = 0; |
2390 | 2398 | ||
2391 | i++; | 2399 | sk_msg_iter_var(i); |
2392 | if (i == MAX_SKB_FRAGS) | ||
2393 | i = 0; | ||
2394 | } while (1); | 2400 | } while (1); |
2395 | msg->sg_end -= shift; | 2401 | msg->sg_end -= shift; |
2396 | if (msg->sg_end < 0) | 2402 | if (msg->sg_end < 0) |
2397 | msg->sg_end += MAX_SKB_FRAGS; | 2403 | msg->sg_end += MAX_SKB_FRAGS; |
2398 | out: | 2404 | out: |
2399 | msg->data = sg_virt(&sg[i]) + start - offset; | 2405 | msg->data = sg_virt(&sg[first_sg]) + start - offset; |
2400 | msg->data_end = msg->data + bytes; | 2406 | msg->data_end = msg->data + bytes; |
2401 | 2407 | ||
2402 | return 0; | 2408 | return 0; |
@@ -7281,7 +7287,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type, | |||
7281 | break; | 7287 | break; |
7282 | 7288 | ||
7283 | case offsetof(struct sk_reuseport_md, ip_protocol): | 7289 | case offsetof(struct sk_reuseport_md, ip_protocol): |
7284 | BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE); | 7290 | BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE); |
7285 | SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, | 7291 | SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, |
7286 | BPF_W, 0); | 7292 | BPF_W, 0); |
7287 | *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); | 7293 | *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); |