aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJohn Fastabend <john.fastabend@gmail.com>2018-08-24 20:37:00 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-08-28 03:01:06 -0400
commit501ca81760c204ec59b73e4a00bee5971fc0f1b1 (patch)
treea63a623bae798db9e5754f6d1aa9aa85aca6922e /kernel
parent3f6e138d41ddff196f452993528cfe75762ede0f (diff)
bpf: sockmap, decrement copied count correctly in redirect error case
Currently, when a redirect occurs in sockmap and an error occurs in the redirect call we unwind the scatterlist once in the error path of bpf_tcp_sendmsg_do_redirect() and then again in sendmsg(). Then in the error path of sendmsg we decrement the copied count by the send size. However, its possible we partially sent data before the error was generated. This can happen if do_tcp_sendpages() partially sends the scatterlist before encountering a memory pressure error. If this happens we need to decrement the copied value (the value tracking how many bytes were actually sent to TCP stack) by the number of remaining bytes _not_ the entire send size. Otherwise we risk confusing userspace. Also we don't need two calls to free the scatterlist one is good enough. So remove the one in bpf_tcp_sendmsg_do_redirect() and then properly reduce copied by the number of remaining bytes which may in fact be the entire send size if no bytes were sent. To do this use bool to indicate if free_start_sg() should do mem accounting or not. Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/sockmap.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 26d8a3053407..ce63e5801746 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk)
236} 236}
237 237
238static void smap_release_sock(struct smap_psock *psock, struct sock *sock); 238static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
239static int free_start_sg(struct sock *sk, struct sk_msg_buff *md); 239static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
240 240
241static void bpf_tcp_release(struct sock *sk) 241static void bpf_tcp_release(struct sock *sk)
242{ 242{
@@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk)
248 goto out; 248 goto out;
249 249
250 if (psock->cork) { 250 if (psock->cork) {
251 free_start_sg(psock->sock, psock->cork); 251 free_start_sg(psock->sock, psock->cork, true);
252 kfree(psock->cork); 252 kfree(psock->cork);
253 psock->cork = NULL; 253 psock->cork = NULL;
254 } 254 }
@@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
330 close_fun = psock->save_close; 330 close_fun = psock->save_close;
331 331
332 if (psock->cork) { 332 if (psock->cork) {
333 free_start_sg(psock->sock, psock->cork); 333 free_start_sg(psock->sock, psock->cork, true);
334 kfree(psock->cork); 334 kfree(psock->cork);
335 psock->cork = NULL; 335 psock->cork = NULL;
336 } 336 }
337 337
338 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { 338 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
339 list_del(&md->list); 339 list_del(&md->list);
340 free_start_sg(psock->sock, md); 340 free_start_sg(psock->sock, md, true);
341 kfree(md); 341 kfree(md);
342 } 342 }
343 343
@@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes,
570 md->sg_start = i; 570 md->sg_start = i;
571} 571}
572 572
573static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) 573static int free_sg(struct sock *sk, int start,
574 struct sk_msg_buff *md, bool charge)
574{ 575{
575 struct scatterlist *sg = md->sg_data; 576 struct scatterlist *sg = md->sg_data;
576 int i = start, free = 0; 577 int i = start, free = 0;
577 578
578 while (sg[i].length) { 579 while (sg[i].length) {
579 free += sg[i].length; 580 free += sg[i].length;
580 sk_mem_uncharge(sk, sg[i].length); 581 if (charge)
582 sk_mem_uncharge(sk, sg[i].length);
581 if (!md->skb) 583 if (!md->skb)
582 put_page(sg_page(&sg[i])); 584 put_page(sg_page(&sg[i]));
583 sg[i].length = 0; 585 sg[i].length = 0;
@@ -594,9 +596,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
594 return free; 596 return free;
595} 597}
596 598
597static int free_start_sg(struct sock *sk, struct sk_msg_buff *md) 599static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
598{ 600{
599 int free = free_sg(sk, md->sg_start, md); 601 int free = free_sg(sk, md->sg_start, md, charge);
600 602
601 md->sg_start = md->sg_end; 603 md->sg_start = md->sg_end;
602 return free; 604 return free;
@@ -604,7 +606,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
604 606
605static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md) 607static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
606{ 608{
607 return free_sg(sk, md->sg_curr, md); 609 return free_sg(sk, md->sg_curr, md, true);
608} 610}
609 611
610static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md) 612static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
@@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
718 list_add_tail(&r->list, &psock->ingress); 720 list_add_tail(&r->list, &psock->ingress);
719 sk->sk_data_ready(sk); 721 sk->sk_data_ready(sk);
720 } else { 722 } else {
721 free_start_sg(sk, r); 723 free_start_sg(sk, r, true);
722 kfree(r); 724 kfree(r);
723 } 725 }
724 726
@@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
752 release_sock(sk); 754 release_sock(sk);
753 } 755 }
754 smap_release_sock(psock, sk); 756 smap_release_sock(psock, sk);
755 if (unlikely(err)) 757 return err;
756 goto out;
757 return 0;
758out_rcu: 758out_rcu:
759 rcu_read_unlock(); 759 rcu_read_unlock();
760out: 760 return 0;
761 free_bytes_sg(NULL, send, md, false);
762 return err;
763} 761}
764 762
765static inline void bpf_md_init(struct smap_psock *psock) 763static inline void bpf_md_init(struct smap_psock *psock)
@@ -822,7 +820,7 @@ more_data:
822 case __SK_PASS: 820 case __SK_PASS:
823 err = bpf_tcp_push(sk, send, m, flags, true); 821 err = bpf_tcp_push(sk, send, m, flags, true);
824 if (unlikely(err)) { 822 if (unlikely(err)) {
825 *copied -= free_start_sg(sk, m); 823 *copied -= free_start_sg(sk, m, true);
826 break; 824 break;
827 } 825 }
828 826
@@ -845,16 +843,17 @@ more_data:
845 lock_sock(sk); 843 lock_sock(sk);
846 844
847 if (unlikely(err < 0)) { 845 if (unlikely(err < 0)) {
848 free_start_sg(sk, m); 846 int free = free_start_sg(sk, m, false);
847
849 psock->sg_size = 0; 848 psock->sg_size = 0;
850 if (!cork) 849 if (!cork)
851 *copied -= send; 850 *copied -= free;
852 } else { 851 } else {
853 psock->sg_size -= send; 852 psock->sg_size -= send;
854 } 853 }
855 854
856 if (cork) { 855 if (cork) {
857 free_start_sg(sk, m); 856 free_start_sg(sk, m, true);
858 psock->sg_size = 0; 857 psock->sg_size = 0;
859 kfree(m); 858 kfree(m);
860 m = NULL; 859 m = NULL;
@@ -1121,7 +1120,7 @@ wait_for_memory:
1121 err = sk_stream_wait_memory(sk, &timeo); 1120 err = sk_stream_wait_memory(sk, &timeo);
1122 if (err) { 1121 if (err) {
1123 if (m && m != psock->cork) 1122 if (m && m != psock->cork)
1124 free_start_sg(sk, m); 1123 free_start_sg(sk, m, true);
1125 goto out_err; 1124 goto out_err;
1126 } 1125 }
1127 } 1126 }
@@ -1580,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w)
1580 bpf_prog_put(psock->bpf_tx_msg); 1579 bpf_prog_put(psock->bpf_tx_msg);
1581 1580
1582 if (psock->cork) { 1581 if (psock->cork) {
1583 free_start_sg(psock->sock, psock->cork); 1582 free_start_sg(psock->sock, psock->cork, true);
1584 kfree(psock->cork); 1583 kfree(psock->cork);
1585 } 1584 }
1586 1585
1587 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) { 1586 list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
1588 list_del(&md->list); 1587 list_del(&md->list);
1589 free_start_sg(psock->sock, md); 1588 free_start_sg(psock->sock, md, true);
1590 kfree(md); 1589 kfree(md);
1591 } 1590 }
1592 1591