aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Chan <mchan@broadcom.com>2006-06-29 15:30:00 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-06-29 19:58:08 -0400
commitb0da8537037f337103348f239ad901477e907aa8 (patch)
tree498a5dceb0d536fa54dcf4acc26ae1d1b43dfaf1
parent877ce7c1b3afd69a9b1caeb1b9964c992641f52a (diff)
[NET]: Add ECN support for TSO
In the current TSO implementation, NETIF_F_TSO and ECN cannot be turned on together in a TCP connection. The problem is that most hardware that supports TSO does not handle CWR correctly if it is set in the TSO packet. Correct handling requires CWR to be set in the first packet only if it is set in the TSO header. This patch adds the ability to turn on NETIF_F_TSO and ECN using GSO if necessary to handle TSO packets with CWR set. Hardware that handles CWR correctly can turn on NETIF_F_TSO_ECN in the dev-> features flag. All TSO packets with CWR set will have the SKB_GSO_TCPV4_ECN set. If the output device does not have the NETIF_F_TSO_ECN feature set, GSO will split the packet up correctly with CWR only set in the first segment. With help from Herbert Xu <herbert@gondor.apana.org.au>. Since ECN can always be enabled with TSO, the SOCK_NO_LARGESEND sock flag is completely removed. Signed-off-by: Michael Chan <mchan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/tcp_ecn.h6
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c2
7 files changed, 9 insertions, 12 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efd1e2af0bf3..aa2d3c12c4d8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -316,6 +316,7 @@ struct net_device
316#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) 316#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
317#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) 317#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
318#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) 318#define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
319#define NETIF_F_TSO_ECN (SKB_GSO_TCPV4_ECN << NETIF_F_GSO_SHIFT)
319 320
320#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) 321#define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
321#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) 322#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5fb72da7da03..e74c294929a0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -175,6 +175,9 @@ enum {
175 175
176 /* This indicates the skb is from an untrusted source. */ 176 /* This indicates the skb is from an untrusted source. */
177 SKB_GSO_DODGY = 1 << 2, 177 SKB_GSO_DODGY = 1 << 2,
178
179 /* This indicates the tcp segment has CWR set. */
180 SKB_GSO_TCPV4_ECN = 1 << 3,
178}; 181};
179 182
180/** 183/**
diff --git a/include/net/sock.h b/include/net/sock.h
index 2d8d6adf1616..7136bae48c2f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -383,7 +383,6 @@ enum sock_flags {
383 SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */ 383 SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
384 SOCK_DBG, /* %SO_DEBUG setting */ 384 SOCK_DBG, /* %SO_DEBUG setting */
385 SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ 385 SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
386 SOCK_NO_LARGESEND, /* whether to sent large segments or not */
387 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ 386 SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
388 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ 387 SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
389}; 388};
@@ -1033,7 +1032,7 @@ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1033 if (sk->sk_route_caps & NETIF_F_GSO) 1032 if (sk->sk_route_caps & NETIF_F_GSO)
1034 sk->sk_route_caps |= NETIF_F_TSO; 1033 sk->sk_route_caps |= NETIF_F_TSO;
1035 if (sk->sk_route_caps & NETIF_F_TSO) { 1034 if (sk->sk_route_caps & NETIF_F_TSO) {
1036 if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) 1035 if (dst->header_len)
1037 sk->sk_route_caps &= ~NETIF_F_TSO; 1036 sk->sk_route_caps &= ~NETIF_F_TSO;
1038 else 1037 else
1039 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 1038 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index c6b84397448d..7bb366f70934 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -31,10 +31,9 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp,
31 struct sk_buff *skb) 31 struct sk_buff *skb)
32{ 32{
33 tp->ecn_flags = 0; 33 tp->ecn_flags = 0;
34 if (sysctl_tcp_ecn && !(sk->sk_route_caps & NETIF_F_TSO)) { 34 if (sysctl_tcp_ecn) {
35 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; 35 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
36 tp->ecn_flags = TCP_ECN_OK; 36 tp->ecn_flags = TCP_ECN_OK;
37 sock_set_flag(sk, SOCK_NO_LARGESEND);
38 } 37 }
39} 38}
40 39
@@ -56,6 +55,9 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
56 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { 55 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
57 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 56 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
58 skb->h.th->cwr = 1; 57 skb->h.th->cwr = 1;
58 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
59 skb_shinfo(skb)->gso_type |=
60 SKB_GSO_TCPV4_ECN;
59 } 61 }
60 } else { 62 } else {
61 /* ACK or retransmitted segment: clear ECT|CE */ 63 /* ACK or retransmitted segment: clear ECT|CE */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94fe5b1f9dcb..7fa0b4a8a389 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4178,8 +4178,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
4178 */ 4178 */
4179 4179
4180 TCP_ECN_rcv_synack(tp, th); 4180 TCP_ECN_rcv_synack(tp, th);
4181 if (tp->ecn_flags&TCP_ECN_OK)
4182 sock_set_flag(sk, SOCK_NO_LARGESEND);
4183 4181
4184 tp->snd_wl1 = TCP_SKB_CB(skb)->seq; 4182 tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
4185 tcp_ack(sk, skb, FLAG_SLOWPATH); 4183 tcp_ack(sk, skb, FLAG_SLOWPATH);
@@ -4322,8 +4320,6 @@ discard:
4322 tp->max_window = tp->snd_wnd; 4320 tp->max_window = tp->snd_wnd;
4323 4321
4324 TCP_ECN_rcv_syn(tp, th); 4322 TCP_ECN_rcv_syn(tp, th);
4325 if (tp->ecn_flags&TCP_ECN_OK)
4326 sock_set_flag(sk, SOCK_NO_LARGESEND);
4327 4323
4328 tcp_mtup_init(sk); 4324 tcp_mtup_init(sk);
4329 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); 4325 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2b9b7f6c7f7c..54b2ef7d3efe 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -440,8 +440,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
440 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; 440 newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
441 newtp->rx_opt.mss_clamp = req->mss; 441 newtp->rx_opt.mss_clamp = req->mss;
442 TCP_ECN_openreq_child(newtp, req); 442 TCP_ECN_openreq_child(newtp, req);
443 if (newtp->ecn_flags&TCP_ECN_OK)
444 sock_set_flag(newsk, SOCK_NO_LARGESEND);
445 443
446 TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); 444 TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
447 } 445 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdd71db8bf90..5a7cb4a9c867 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2044,8 +2044,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2044 memset(th, 0, sizeof(struct tcphdr)); 2044 memset(th, 0, sizeof(struct tcphdr));
2045 th->syn = 1; 2045 th->syn = 1;
2046 th->ack = 1; 2046 th->ack = 1;
2047 if (dst->dev->features&NETIF_F_TSO)
2048 ireq->ecn_ok = 0;
2049 TCP_ECN_make_synack(req, th); 2047 TCP_ECN_make_synack(req, th);
2050 th->source = inet_sk(sk)->sport; 2048 th->source = inet_sk(sk)->sport;
2051 th->dest = ireq->rmt_port; 2049 th->dest = ireq->rmt_port;