aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilly Tarreau <w@1wt.eu>2009-01-13 19:04:36 -0500
committerDavid S. Miller <davem@davemloft.net>2009-01-13 19:04:36 -0500
commit33966dd0e2f68f26943cd9ee93ec6abbc6547a8e (patch)
tree75502831bb2868e016ed4f55d3ea3238b50f609b
parent9a0811ca4e9cf7be320ae8a5a43a259feb70074f (diff)
tcp: splice as many packets as possible at once
As spotted by Willy Tarreau, current splice() from tcp socket to pipe is not optimal. It processes at most one segment per call. This results in low performance and very high overhead due to syscall rate when splicing from interfaces which do not support LRO. Willy provided a patch inside tcp_splice_read(), but a better fix is to let tcp_read_sock() process as many segments as possible, so that tcp_rcv_space_adjust() and tcp_cleanup_rbuf() are called less often. With this change, splice() behaves like tcp_recvmsg(), being able to consume many skbs in one system call. With typical 1460 bytes of payload per frame, that means splice(SPLICE_F_NONBLOCK) can return 16*1460 = 23360 bytes. Signed-off-by: Willy Tarreau <w@1wt.eu> Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ce572f9dff02..48ada1b2d2c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -522,8 +522,12 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
522 unsigned int offset, size_t len) 522 unsigned int offset, size_t len)
523{ 523{
524 struct tcp_splice_state *tss = rd_desc->arg.data; 524 struct tcp_splice_state *tss = rd_desc->arg.data;
525 int ret;
525 526
526 return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); 527 ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags);
528 if (ret > 0)
529 rd_desc->count -= ret;
530 return ret;
527} 531}
528 532
529static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) 533static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
@@ -531,6 +535,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
531 /* Store TCP splice context information in read_descriptor_t. */ 535 /* Store TCP splice context information in read_descriptor_t. */
532 read_descriptor_t rd_desc = { 536 read_descriptor_t rd_desc = {
533 .arg.data = tss, 537 .arg.data = tss,
538 .count = tss->len,
534 }; 539 };
535 540
536 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); 541 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
@@ -611,11 +616,13 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
611 tss.len -= ret; 616 tss.len -= ret;
612 spliced += ret; 617 spliced += ret;
613 618
619 if (!timeo)
620 break;
614 release_sock(sk); 621 release_sock(sk);
615 lock_sock(sk); 622 lock_sock(sk);
616 623
617 if (sk->sk_err || sk->sk_state == TCP_CLOSE || 624 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
618 (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || 625 (sk->sk_shutdown & RCV_SHUTDOWN) ||
619 signal_pending(current)) 626 signal_pending(current))
620 break; 627 break;
621 } 628 }