diff options
author | Willy Tarreau <w@1wt.eu> | 2009-01-13 19:04:36 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-01-13 19:04:36 -0500 |
commit | 33966dd0e2f68f26943cd9ee93ec6abbc6547a8e (patch) | |
tree | 75502831bb2868e016ed4f55d3ea3238b50f609b | |
parent | 9a0811ca4e9cf7be320ae8a5a43a259feb70074f (diff) |
tcp: splice as many packets as possible at once
As spotted by Willy Tarreau, current splice() from tcp socket to pipe is not
optimal. It processes at most one segment per call.
This results in low performance and very high overhead due to syscall rate
when splicing from interfaces which do not support LRO.
Willy provided a patch inside tcp_splice_read(), but a better fix
is to let tcp_read_sock() process as many segments as possible, so
that tcp_rcv_space_adjust() and tcp_cleanup_rbuf() are called less
often.
With this change, splice() behaves like tcp_recvmsg(), being able
to consume many skbs in one system call. With typical 1460 bytes
of payload per frame, that means splice(SPLICE_F_NONBLOCK) can return
16*1460 = 23360 bytes.
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp.c | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ce572f9dff02..48ada1b2d2c4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -522,8 +522,12 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | |||
522 | unsigned int offset, size_t len) | 522 | unsigned int offset, size_t len) |
523 | { | 523 | { |
524 | struct tcp_splice_state *tss = rd_desc->arg.data; | 524 | struct tcp_splice_state *tss = rd_desc->arg.data; |
525 | int ret; | ||
525 | 526 | ||
526 | return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); | 527 | ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); |
528 | if (ret > 0) | ||
529 | rd_desc->count -= ret; | ||
530 | return ret; | ||
527 | } | 531 | } |
528 | 532 | ||
529 | static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) | 533 | static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) |
@@ -531,6 +535,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) | |||
531 | /* Store TCP splice context information in read_descriptor_t. */ | 535 | /* Store TCP splice context information in read_descriptor_t. */ |
532 | read_descriptor_t rd_desc = { | 536 | read_descriptor_t rd_desc = { |
533 | .arg.data = tss, | 537 | .arg.data = tss, |
538 | .count = tss->len, | ||
534 | }; | 539 | }; |
535 | 540 | ||
536 | return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); | 541 | return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); |
@@ -611,11 +616,13 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
611 | tss.len -= ret; | 616 | tss.len -= ret; |
612 | spliced += ret; | 617 | spliced += ret; |
613 | 618 | ||
619 | if (!timeo) | ||
620 | break; | ||
614 | release_sock(sk); | 621 | release_sock(sk); |
615 | lock_sock(sk); | 622 | lock_sock(sk); |
616 | 623 | ||
617 | if (sk->sk_err || sk->sk_state == TCP_CLOSE || | 624 | if (sk->sk_err || sk->sk_state == TCP_CLOSE || |
618 | (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || | 625 | (sk->sk_shutdown & RCV_SHUTDOWN) || |
619 | signal_pending(current)) | 626 | signal_pending(current)) |
620 | break; | 627 | break; |
621 | } | 628 | } |