diff options
author | David S. Miller <davem@davemloft.net> | 2005-07-05 18:24:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2005-07-05 18:24:38 -0400 |
commit | c1b4a7e69576d65efc31a8cea0714173c2841244 (patch) | |
tree | 92082532651cddc6f0649a9d7ca9ca63e381d310 /net/ipv4/tcp.c | |
parent | 0d9901df62fe4820aee86b49f1a074cdb5c6928e (diff) |
[TCP]: Move to new TSO segmenting scheme.
Make TSO segment transmit size decisions at send time not earlier.
The basic scheme is that we try to build as large a TSO frame as
possible when pulling in the user data, but the size of the TSO frame
output to the card is determined at transmit time.
This is guided by tp->xmit_size_goal. It is always set to a multiple
of MSS and tells sendmsg/sendpage how large an SKB to try and build.
Later, tcp_write_xmit() and tcp_push_one() chop up the packet if
necessary and conditions warrant. These routines can also decide to
"defer" in order to wait for more ACKs to arrive and thus allow larger
TSO frames to be emitted.
A general observation is that TSO elongates the pipe, thus requiring a
larger congestion window and larger buffering especially at the sender
side. Therefore, it is important that applications 1) get a large
enough socket send buffer (this is accomplished by our dynamic send
buffer expansion code) 2) do large enough writes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2ba73bf3a8f9..29894c749163 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
615 | size_t psize, int flags) | 615 | size_t psize, int flags) |
616 | { | 616 | { |
617 | struct tcp_sock *tp = tcp_sk(sk); | 617 | struct tcp_sock *tp = tcp_sk(sk); |
618 | int mss_now; | 618 | int mss_now, size_goal; |
619 | int err; | 619 | int err; |
620 | ssize_t copied; | 620 | ssize_t copied; |
621 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | 621 | long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
628 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 628 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
629 | 629 | ||
630 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 630 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); |
631 | size_goal = tp->xmit_size_goal; | ||
631 | copied = 0; | 632 | copied = 0; |
632 | 633 | ||
633 | err = -EPIPE; | 634 | err = -EPIPE; |
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
641 | int offset = poffset % PAGE_SIZE; | 642 | int offset = poffset % PAGE_SIZE; |
642 | int size = min_t(size_t, psize, PAGE_SIZE - offset); | 643 | int size = min_t(size_t, psize, PAGE_SIZE - offset); |
643 | 644 | ||
644 | if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) { | 645 | if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { |
645 | new_segment: | 646 | new_segment: |
646 | if (!sk_stream_memory_free(sk)) | 647 | if (!sk_stream_memory_free(sk)) |
647 | goto wait_for_sndbuf; | 648 | goto wait_for_sndbuf; |
@@ -652,7 +653,7 @@ new_segment: | |||
652 | goto wait_for_memory; | 653 | goto wait_for_memory; |
653 | 654 | ||
654 | skb_entail(sk, tp, skb); | 655 | skb_entail(sk, tp, skb); |
655 | copy = mss_now; | 656 | copy = size_goal; |
656 | } | 657 | } |
657 | 658 | ||
658 | if (copy > size) | 659 | if (copy > size) |
@@ -693,7 +694,7 @@ new_segment: | |||
693 | if (!(psize -= copy)) | 694 | if (!(psize -= copy)) |
694 | goto out; | 695 | goto out; |
695 | 696 | ||
696 | if (skb->len != mss_now || (flags & MSG_OOB)) | 697 | if (skb->len < mss_now || (flags & MSG_OOB)) |
697 | continue; | 698 | continue; |
698 | 699 | ||
699 | if (forced_push(tp)) { | 700 | if (forced_push(tp)) { |
@@ -713,6 +714,7 @@ wait_for_memory: | |||
713 | goto do_error; | 714 | goto do_error; |
714 | 715 | ||
715 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 716 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); |
717 | size_goal = tp->xmit_size_goal; | ||
716 | } | 718 | } |
717 | 719 | ||
718 | out: | 720 | out: |
@@ -754,7 +756,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
754 | 756 | ||
755 | static inline int select_size(struct sock *sk, struct tcp_sock *tp) | 757 | static inline int select_size(struct sock *sk, struct tcp_sock *tp) |
756 | { | 758 | { |
757 | int tmp = tp->mss_cache_std; | 759 | int tmp = tp->mss_cache; |
758 | 760 | ||
759 | if (sk->sk_route_caps & NETIF_F_SG) { | 761 | if (sk->sk_route_caps & NETIF_F_SG) { |
760 | if (sk->sk_route_caps & NETIF_F_TSO) | 762 | if (sk->sk_route_caps & NETIF_F_TSO) |
@@ -778,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
778 | struct tcp_sock *tp = tcp_sk(sk); | 780 | struct tcp_sock *tp = tcp_sk(sk); |
779 | struct sk_buff *skb; | 781 | struct sk_buff *skb; |
780 | int iovlen, flags; | 782 | int iovlen, flags; |
781 | int mss_now; | 783 | int mss_now, size_goal; |
782 | int err, copied; | 784 | int err, copied; |
783 | long timeo; | 785 | long timeo; |
784 | 786 | ||
@@ -797,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
797 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 799 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
798 | 800 | ||
799 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 801 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); |
802 | size_goal = tp->xmit_size_goal; | ||
800 | 803 | ||
801 | /* Ok commence sending. */ | 804 | /* Ok commence sending. */ |
802 | iovlen = msg->msg_iovlen; | 805 | iovlen = msg->msg_iovlen; |
@@ -819,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
819 | skb = sk->sk_write_queue.prev; | 822 | skb = sk->sk_write_queue.prev; |
820 | 823 | ||
821 | if (!sk->sk_send_head || | 824 | if (!sk->sk_send_head || |
822 | (copy = mss_now - skb->len) <= 0) { | 825 | (copy = size_goal - skb->len) <= 0) { |
823 | 826 | ||
824 | new_segment: | 827 | new_segment: |
825 | /* Allocate new segment. If the interface is SG, | 828 | /* Allocate new segment. If the interface is SG, |
@@ -842,7 +845,7 @@ new_segment: | |||
842 | skb->ip_summed = CHECKSUM_HW; | 845 | skb->ip_summed = CHECKSUM_HW; |
843 | 846 | ||
844 | skb_entail(sk, tp, skb); | 847 | skb_entail(sk, tp, skb); |
845 | copy = mss_now; | 848 | copy = size_goal; |
846 | } | 849 | } |
847 | 850 | ||
848 | /* Try to append data to the end of skb. */ | 851 | /* Try to append data to the end of skb. */ |
@@ -937,7 +940,7 @@ new_segment: | |||
937 | if ((seglen -= copy) == 0 && iovlen == 0) | 940 | if ((seglen -= copy) == 0 && iovlen == 0) |
938 | goto out; | 941 | goto out; |
939 | 942 | ||
940 | if (skb->len != mss_now || (flags & MSG_OOB)) | 943 | if (skb->len < mss_now || (flags & MSG_OOB)) |
941 | continue; | 944 | continue; |
942 | 945 | ||
943 | if (forced_push(tp)) { | 946 | if (forced_push(tp)) { |
@@ -957,6 +960,7 @@ wait_for_memory: | |||
957 | goto do_error; | 960 | goto do_error; |
958 | 961 | ||
959 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 962 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); |
963 | size_goal = tp->xmit_size_goal; | ||
960 | } | 964 | } |
961 | } | 965 | } |
962 | 966 | ||
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
2128 | 2132 | ||
2129 | info->tcpi_rto = jiffies_to_usecs(tp->rto); | 2133 | info->tcpi_rto = jiffies_to_usecs(tp->rto); |
2130 | info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); | 2134 | info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); |
2131 | info->tcpi_snd_mss = tp->mss_cache_std; | 2135 | info->tcpi_snd_mss = tp->mss_cache; |
2132 | info->tcpi_rcv_mss = tp->ack.rcv_mss; | 2136 | info->tcpi_rcv_mss = tp->ack.rcv_mss; |
2133 | 2137 | ||
2134 | info->tcpi_unacked = tp->packets_out; | 2138 | info->tcpi_unacked = tp->packets_out; |
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2178 | 2182 | ||
2179 | switch (optname) { | 2183 | switch (optname) { |
2180 | case TCP_MAXSEG: | 2184 | case TCP_MAXSEG: |
2181 | val = tp->mss_cache_std; | 2185 | val = tp->mss_cache; |
2182 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) | 2186 | if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) |
2183 | val = tp->rx_opt.user_mss; | 2187 | val = tp->rx_opt.user_mss; |
2184 | break; | 2188 | break; |