aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2018-05-02 18:30:45 -0400
committerAlexei Starovoitov <ast@kernel.org>2018-05-02 18:30:46 -0400
commitb5b6ff730253ab68ec230e239c4245cb1e8a5397 (patch)
tree789a726515dde9027f593db1f085e436341bcdc4
parent0f58e58e2803a83753f7e626cd3654cea601308d (diff)
parentabaeb096ca38cad02c8a68c49ddd7efc043c319a (diff)
Merge branch 'bpf-sockmap-fixes'
John Fastabend says: ==================== When I added the test_sockmap to selftests I mistakenly changed the test logic a bit. The result of this was on redirect cases we ended up choosing the wrong sock from the BPF program and ended up sending to a socket that had no receive handler. The result was the actual receive handler, running on a different socket, is timing out and closing the socket. This results in errors (-EPIPE to be specific) on the sending side. Typically happening if the sender does not complete the send before the receive side times out. So depending on timing and the size of the send we may get errors. This exposed some bugs in the sockmap error path handling. This series fixes the errors. The primary issue is we did not do proper memory accounting in these cases which resulted in missing a sk_mem_uncharge(). This happened in the redirect path and in one case on the normal send path. See the three patches for the details. The other take-away from this is we need to fix the test_sockmap and also add more negative test cases. That will happen in bpf-next. Finally, I tested this using the existing test_sockmap program, the older sockmap sample test script, and a few real use cases with Cilium. All of these seem to be in working correctly. v2: fix compiler warning, drop iterator variable 'i' that is no longer used in patch 3. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--kernel/bpf/sockmap.c48
1 files changed, 26 insertions, 22 deletions
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 634415c7fbcd..098eca568c2b 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -326,6 +326,9 @@ retry:
326 if (ret > 0) { 326 if (ret > 0) {
327 if (apply) 327 if (apply)
328 apply_bytes -= ret; 328 apply_bytes -= ret;
329
330 sg->offset += ret;
331 sg->length -= ret;
329 size -= ret; 332 size -= ret;
330 offset += ret; 333 offset += ret;
331 if (uncharge) 334 if (uncharge)
@@ -333,8 +336,6 @@ retry:
333 goto retry; 336 goto retry;
334 } 337 }
335 338
336 sg->length = size;
337 sg->offset = offset;
338 return ret; 339 return ret;
339 } 340 }
340 341
@@ -392,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
392 } while (i != md->sg_end); 393 } while (i != md->sg_end);
393} 394}
394 395
395static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) 396static void free_bytes_sg(struct sock *sk, int bytes,
397 struct sk_msg_buff *md, bool charge)
396{ 398{
397 struct scatterlist *sg = md->sg_data; 399 struct scatterlist *sg = md->sg_data;
398 int i = md->sg_start, free; 400 int i = md->sg_start, free;
@@ -402,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
402 if (bytes < free) { 404 if (bytes < free) {
403 sg[i].length -= bytes; 405 sg[i].length -= bytes;
404 sg[i].offset += bytes; 406 sg[i].offset += bytes;
405 sk_mem_uncharge(sk, bytes); 407 if (charge)
408 sk_mem_uncharge(sk, bytes);
406 break; 409 break;
407 } 410 }
408 411
409 sk_mem_uncharge(sk, sg[i].length); 412 if (charge)
413 sk_mem_uncharge(sk, sg[i].length);
410 put_page(sg_page(&sg[i])); 414 put_page(sg_page(&sg[i]));
411 bytes -= sg[i].length; 415 bytes -= sg[i].length;
412 sg[i].length = 0; 416 sg[i].length = 0;
@@ -417,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
417 if (i == MAX_SKB_FRAGS) 421 if (i == MAX_SKB_FRAGS)
418 i = 0; 422 i = 0;
419 } 423 }
424 md->sg_start = i;
420} 425}
421 426
422static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) 427static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
@@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
575 struct sk_msg_buff *md, 580 struct sk_msg_buff *md,
576 int flags) 581 int flags)
577{ 582{
583 bool ingress = !!(md->flags & BPF_F_INGRESS);
578 struct smap_psock *psock; 584 struct smap_psock *psock;
579 struct scatterlist *sg; 585 struct scatterlist *sg;
580 int i, err, free = 0; 586 int err = 0;
581 bool ingress = !!(md->flags & BPF_F_INGRESS);
582 587
583 sg = md->sg_data; 588 sg = md->sg_data;
584 589
@@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
606out_rcu: 611out_rcu:
607 rcu_read_unlock(); 612 rcu_read_unlock();
608out: 613out:
609 i = md->sg_start; 614 free_bytes_sg(NULL, send, md, false);
610 while (sg[i].length) { 615 return err;
611 free += sg[i].length;
612 put_page(sg_page(&sg[i]));
613 sg[i].length = 0;
614 i++;
615 if (i == MAX_SKB_FRAGS)
616 i = 0;
617 }
618 return free;
619} 616}
620 617
621static inline void bpf_md_init(struct smap_psock *psock) 618static inline void bpf_md_init(struct smap_psock *psock)
@@ -700,19 +697,26 @@ more_data:
700 err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags); 697 err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
701 lock_sock(sk); 698 lock_sock(sk);
702 699
700 if (unlikely(err < 0)) {
701 free_start_sg(sk, m);
702 psock->sg_size = 0;
703 if (!cork)
704 *copied -= send;
705 } else {
706 psock->sg_size -= send;
707 }
708
703 if (cork) { 709 if (cork) {
704 free_start_sg(sk, m); 710 free_start_sg(sk, m);
711 psock->sg_size = 0;
705 kfree(m); 712 kfree(m);
706 m = NULL; 713 m = NULL;
714 err = 0;
707 } 715 }
708 if (unlikely(err))
709 *copied -= err;
710 else
711 psock->sg_size -= send;
712 break; 716 break;
713 case __SK_DROP: 717 case __SK_DROP:
714 default: 718 default:
715 free_bytes_sg(sk, send, m); 719 free_bytes_sg(sk, send, m, true);
716 apply_bytes_dec(psock, send); 720 apply_bytes_dec(psock, send);
717 *copied -= send; 721 *copied -= send;
718 psock->sg_size -= send; 722 psock->sg_size -= send;