aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-08-29 19:19:38 -0400
committerDavid S. Miller <davem@davemloft.net>2018-08-29 19:19:38 -0400
commit6a5d39aa9ac274d9ccdb50ec47112158537f00de (patch)
treedaa8c87faa188cf123e905140f1ab3e995439fc4
parent53ae914d898e5dd5984d352d5fa0b23410f966a0 (diff)
parentd65e6c80c6bb72ced46ce90dea4016d913a8ddd4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2018-08-29 The following pull-request contains BPF updates for your *net* tree. The main changes are: 1) Fix a build error in sk_reuseport_convert_ctx_access() when compiling with clang which cannot resolve hweight_long() at build time inside the BUILD_BUG_ON() assertion, from Stefan. 2) Several fixes for BPF sockmap, four of them in getting the bpf_msg_pull_data() helper to work, one use after free case in bpf_tcp_close() and one refcount leak in bpf_tcp_recvmsg(), from Daniel. 3) Another fix for BPF sockmap where we misaccount sk_mem_uncharge() in the socket redirect error case from unwinding scatterlist twice, from John. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--kernel/bpf/sockmap.c52
-rw-r--r--net/core/filter.c52
2 files changed, 54 insertions, 50 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index cf5195c7c331..ce63e5801746 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk)
236} 236}
237 237
238static void smap_release_sock(struct smap_psock *psock, struct sock *sock); 238static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
239static int free_start_sg(struct sock *sk, struct sk_msg_buff *md); 239static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
240 240
241static void bpf_tcp_release(struct sock *sk) 241static void bpf_tcp_release(struct sock *sk)
242{ 242{
@@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk)
248 goto out; 248 goto out;
249 249
250 if (psock->cork) { 250 if (psock->cork) {
251 free_start_sg(psock->sock, psock->cork); 251 free_start_sg(psock->sock, psock->cork, true);
252 kfree(psock->cork); 252 kfree(psock->cork);
253 psock->cork = NULL; 253 psock->cork = NULL;
254 } 254 }
@@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
330 close_fun = psock->save_close; 330 close_fun = psock->save_close;
331 331
332 if (psock->cork) { 332 if (psock->cork) {
333 free_start_sg(psock->sock, psock->cork); 333 free_start_sg(psock->sock, psock->cork, true);
334 kfree(psock->cork); 334 kfree(psock->cork);
335 psock->cork = NULL; 335 psock->cork = NULL;
336 } 336 }
337 337
338 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { 338 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
339 list_del(&md->list); 339 list_del(&md->list);
340 free_start_sg(psock->sock, md); 340 free_start_sg(psock->sock, md, true);
341 kfree(md); 341 kfree(md);
342 } 342 }
343 343
@@ -369,7 +369,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
369 /* If another thread deleted this object skip deletion. 369 /* If another thread deleted this object skip deletion.
370 * The refcnt on psock may or may not be zero. 370 * The refcnt on psock may or may not be zero.
371 */ 371 */
372 if (l) { 372 if (l && l == link) {
373 hlist_del_rcu(&link->hash_node); 373 hlist_del_rcu(&link->hash_node);
374 smap_release_sock(psock, link->sk); 374 smap_release_sock(psock, link->sk);
375 free_htab_elem(htab, link); 375 free_htab_elem(htab, link);
@@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes,
570 md->sg_start = i; 570 md->sg_start = i;
571} 571}
572 572
573static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) 573static int free_sg(struct sock *sk, int start,
574 struct sk_msg_buff *md, bool charge)
574{ 575{
575 struct scatterlist *sg = md->sg_data; 576 struct scatterlist *sg = md->sg_data;
576 int i = start, free = 0; 577 int i = start, free = 0;
577 578
578 while (sg[i].length) { 579 while (sg[i].length) {
579 free += sg[i].length; 580 free += sg[i].length;
580 sk_mem_uncharge(sk, sg[i].length); 581 if (charge)
582 sk_mem_uncharge(sk, sg[i].length);
581 if (!md->skb) 583 if (!md->skb)
582 put_page(sg_page(&sg[i])); 584 put_page(sg_page(&sg[i]));
583 sg[i].length = 0; 585 sg[i].length = 0;
@@ -594,9 +596,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
594 return free; 596 return free;
595} 597}
596 598
597static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) 599static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
598{ 600{
599 int free = free_sg(sk, md->sg_start, md); 601 int free = free_sg(sk, md->sg_start, md, charge);
600 602
601 md->sg_start = md->sg_end; 603 md->sg_start = md->sg_end;
602 return free; 604 return free;
@@ -604,7 +606,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
604 606
605static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md) 607static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
606{ 608{
607 return free_sg(sk, md->sg_curr, md); 609 return free_sg(sk, md->sg_curr, md, true);
608} 610}
609 611
610static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md) 612static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
@@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
718 list_add_tail(&r->list, &psock->ingress); 720 list_add_tail(&r->list, &psock->ingress);
719 sk->sk_data_ready(sk); 721 sk->sk_data_ready(sk);
720 } else { 722 } else {
721 free_start_sg(sk, r); 723 free_start_sg(sk, r, true);
722 kfree(r); 724 kfree(r);
723 } 725 }
724 726
@@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
752 release_sock(sk); 754 release_sock(sk);
753 } 755 }
754 smap_release_sock(psock, sk); 756 smap_release_sock(psock, sk);
755 if (unlikely(err)) 757 return err;
756 goto out;
757 return 0;
758out_rcu: 758out_rcu:
759 rcu_read_unlock(); 759 rcu_read_unlock();
760out: 760 return 0;
761 free_bytes_sg(NULL, send, md, false);
762 return err;
763} 761}
764 762
765static inline void bpf_md_init(struct smap_psock *psock) 763static inline void bpf_md_init(struct smap_psock *psock)
@@ -822,7 +820,7 @@ more_data:
822 case __SK_PASS: 820 case __SK_PASS:
823 err = bpf_tcp_push(sk, send, m, flags, true); 821 err = bpf_tcp_push(sk, send, m, flags, true);
824 if (unlikely(err)) { 822 if (unlikely(err)) {
825 *copied -= free_start_sg(sk, m); 823 *copied -= free_start_sg(sk, m, true);
826 break; 824 break;
827 } 825 }
828 826
@@ -845,16 +843,17 @@ more_data:
845 lock_sock(sk); 843 lock_sock(sk);
846 844
847 if (unlikely(err < 0)) { 845 if (unlikely(err < 0)) {
848 free_start_sg(sk, m); 846 int free = free_start_sg(sk, m, false);
847
849 psock->sg_size = 0; 848 psock->sg_size = 0;
850 if (!cork) 849 if (!cork)
851 *copied -= send; 850 *copied -= free;
852 } else { 851 } else {
853 psock->sg_size -= send; 852 psock->sg_size -= send;
854 } 853 }
855 854
856 if (cork) { 855 if (cork) {
857 free_start_sg(sk, m); 856 free_start_sg(sk, m, true);
858 psock->sg_size = 0; 857 psock->sg_size = 0;
859 kfree(m); 858 kfree(m);
860 m = NULL; 859 m = NULL;
@@ -912,6 +911,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
912 911
913 if (unlikely(flags & MSG_ERRQUEUE)) 912 if (unlikely(flags & MSG_ERRQUEUE))
914 return inet_recv_error(sk, msg, len, addr_len); 913 return inet_recv_error(sk, msg, len, addr_len);
914 if (!skb_queue_empty(&sk->sk_receive_queue))
915 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
915 916
916 rcu_read_lock(); 917 rcu_read_lock();
917 psock = smap_psock_sk(sk); 918 psock = smap_psock_sk(sk);
@@ -922,9 +923,6 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
922 goto out; 923 goto out;
923 rcu_read_unlock(); 924 rcu_read_unlock();
924 925
925 if (!skb_queue_empty(&sk->sk_receive_queue))
926 return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
927
928 lock_sock(sk); 926 lock_sock(sk);
929bytes_ready: 927bytes_ready:
930 while (copied != len) { 928 while (copied != len) {
@@ -1122,7 +1120,7 @@ wait_for_memory:
1122 err = sk_stream_wait_memory(sk, &timeo); 1120 err = sk_stream_wait_memory(sk, &timeo);
1123 if (err) { 1121 if (err) {
1124 if (m && m != psock->cork) 1122 if (m && m != psock->cork)
1125 free_start_sg(sk, m); 1123 free_start_sg(sk, m, true);
1126 goto out_err; 1124 goto out_err;
1127 } 1125 }
1128 } 1126 }
@@ -1581,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w)
1581 bpf_prog_put(psock->bpf_tx_msg); 1579 bpf_prog_put(psock->bpf_tx_msg);
1582 1580
1583 if (psock->cork) { 1581 if (psock->cork) {
1584 free_start_sg(psock->sock, psock->cork); 1582 free_start_sg(psock->sock, psock->cork, true);
1585 kfree(psock->cork); 1583 kfree(psock->cork);
1586 } 1584 }
1587 1585
1588 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { 1586 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
1589 list_del(&md->list); 1587 list_del(&md->list);
1590 free_start_sg(psock->sock, md); 1588 free_start_sg(psock->sock, md, true);
1591 kfree(md); 1589 kfree(md);
1592 } 1590 }
1593 1591
diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36f1320..2c7801f6737a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2282,14 +2282,21 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2282 .arg2_type = ARG_ANYTHING, 2282 .arg2_type = ARG_ANYTHING,
2283}; 2283};
2284 2284
2285#define sk_msg_iter_var(var) \
2286 do { \
2287 var++; \
2288 if (var == MAX_SKB_FRAGS) \
2289 var = 0; \
2290 } while (0)
2291
2285BPF_CALL_4(bpf_msg_pull_data, 2292BPF_CALL_4(bpf_msg_pull_data,
2286 struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags) 2293 struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
2287{ 2294{
2288 unsigned int len = 0, offset = 0, copy = 0; 2295 unsigned int len = 0, offset = 0, copy = 0;
2296 int bytes = end - start, bytes_sg_total;
2289 struct scatterlist *sg = msg->sg_data; 2297 struct scatterlist *sg = msg->sg_data;
2290 int first_sg, last_sg, i, shift; 2298 int first_sg, last_sg, i, shift;
2291 unsigned char *p, *to, *from; 2299 unsigned char *p, *to, *from;
2292 int bytes = end - start;
2293 struct page *page; 2300 struct page *page;
2294 2301
2295 if (unlikely(flags || end <= start)) 2302 if (unlikely(flags || end <= start))
@@ -2299,21 +2306,22 @@ BPF_CALL_4(bpf_msg_pull_data,
2299 i = msg->sg_start; 2306 i = msg->sg_start;
2300 do { 2307 do {
2301 len = sg[i].length; 2308 len = sg[i].length;
2302 offset += len;
2303 if (start < offset + len) 2309 if (start < offset + len)
2304 break; 2310 break;
2305 i++; 2311 offset += len;
2306 if (i == MAX_SKB_FRAGS) 2312 sk_msg_iter_var(i);
2307 i = 0;
2308 } while (i != msg->sg_end); 2313 } while (i != msg->sg_end);
2309 2314
2310 if (unlikely(start >= offset + len)) 2315 if (unlikely(start >= offset + len))
2311 return -EINVAL; 2316 return -EINVAL;
2312 2317
2313 if (!msg->sg_copy[i] && bytes <= len)
2314 goto out;
2315
2316 first_sg = i; 2318 first_sg = i;
2319 /* The start may point into the sg element so we need to also
2320 * account for the headroom.
2321 */
2322 bytes_sg_total = start - offset + bytes;
2323 if (!msg->sg_copy[i] && bytes_sg_total <= len)
2324 goto out;
2317 2325
2318 /* At this point we need to linearize multiple scatterlist 2326 /* At this point we need to linearize multiple scatterlist
2319 * elements or a single shared page. Either way we need to 2327 * elements or a single shared page. Either way we need to
@@ -2327,15 +2335,13 @@ BPF_CALL_4(bpf_msg_pull_data,
2327 */ 2335 */
2328 do { 2336 do {
2329 copy += sg[i].length; 2337 copy += sg[i].length;
2330 i++; 2338 sk_msg_iter_var(i);
2331 if (i == MAX_SKB_FRAGS) 2339 if (bytes_sg_total <= copy)
2332 i = 0;
2333 if (bytes < copy)
2334 break; 2340 break;
2335 } while (i != msg->sg_end); 2341 } while (i != msg->sg_end);
2336 last_sg = i; 2342 last_sg = i;
2337 2343
2338 if (unlikely(copy < end - start)) 2344 if (unlikely(bytes_sg_total > copy))
2339 return -EINVAL; 2345 return -EINVAL;
2340 2346
2341 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy)); 2347 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
@@ -2355,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data,
2355 sg[i].length = 0; 2361 sg[i].length = 0;
2356 put_page(sg_page(&sg[i])); 2362 put_page(sg_page(&sg[i]));
2357 2363
2358 i++; 2364 sk_msg_iter_var(i);
2359 if (i == MAX_SKB_FRAGS)
2360 i = 0;
2361 } while (i != last_sg); 2365 } while (i != last_sg);
2362 2366
2363 sg[first_sg].length = copy; 2367 sg[first_sg].length = copy;
@@ -2367,11 +2371,15 @@ BPF_CALL_4(bpf_msg_pull_data,
2367 * had a single entry though we can just replace it and 2371 * had a single entry though we can just replace it and
2368 * be done. Otherwise walk the ring and shift the entries. 2372 * be done. Otherwise walk the ring and shift the entries.
2369 */ 2373 */
2370 shift = last_sg - first_sg - 1; 2374 WARN_ON_ONCE(last_sg == first_sg);
2375 shift = last_sg > first_sg ?
2376 last_sg - first_sg - 1 :
2377 MAX_SKB_FRAGS - first_sg + last_sg - 1;
2371 if (!shift) 2378 if (!shift)
2372 goto out; 2379 goto out;
2373 2380
2374 i = first_sg + 1; 2381 i = first_sg;
2382 sk_msg_iter_var(i);
2375 do { 2383 do {
2376 int move_from; 2384 int move_from;
2377 2385
@@ -2388,15 +2396,13 @@ BPF_CALL_4(bpf_msg_pull_data,
2388 sg[move_from].page_link = 0; 2396 sg[move_from].page_link = 0;
2389 sg[move_from].offset = 0; 2397 sg[move_from].offset = 0;
2390 2398
2391 i++; 2399 sk_msg_iter_var(i);
2392 if (i == MAX_SKB_FRAGS)
2393 i = 0;
2394 } while (1); 2400 } while (1);
2395 msg->sg_end -= shift; 2401 msg->sg_end -= shift;
2396 if (msg->sg_end < 0) 2402 if (msg->sg_end < 0)
2397 msg->sg_end += MAX_SKB_FRAGS; 2403 msg->sg_end += MAX_SKB_FRAGS;
2398out: 2404out:
2399 msg->data = sg_virt(&sg[i]) + start - offset; 2405 msg->data = sg_virt(&sg[first_sg]) + start - offset;
2400 msg->data_end = msg->data + bytes; 2406 msg->data_end = msg->data + bytes;
2401 2407
2402 return 0; 2408 return 0;
@@ -7281,7 +7287,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
7281 break; 7287 break;
7282 7288
7283 case offsetof(struct sk_reuseport_md, ip_protocol): 7289 case offsetof(struct sk_reuseport_md, ip_protocol):
7284 BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE); 7290 BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
7285 SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset, 7291 SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
7286 BPF_W, 0); 7292 BPF_W, 0);
7287 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); 7293 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);