diff options
author | Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com> | 2008-03-21 06:43:19 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-03-21 06:43:19 -0400 |
commit | 82cc1a7a56872056af0ead6c7d695aa223f36695 (patch) | |
tree | 0891cf854a40cac90fc4fec3bd6b5ecb213d4452 | |
parent | a25606c845856e5ca5ed54d23cab077e3a49bf10 (diff) |
[NET]: Add per-connection option to set max TSO frame size
Update: My mailer ate one of Jarek's feedback mails... Fixed the
parameter in netif_set_gso_max_size() to be u32, not u16. Fixed the
whitespace issue due to a patch import botch. Changed the types from
u32 to unsigned int to be more consistent with other variables in the
area. Also brought the patch up to the latest net-2.6.26 tree.
Update: Made gso_max_size container 32 bits, not 16. Moved the
location of gso_max_size within netdev to be less hotpath. Made more
consistent names between the sock and netdev layers, and added a
define for the max GSO size.
Update: Respun for net-2.6.26 tree.
Update: changed max_gso_frame_size and sk_gso_max_size from signed to
unsigned - thanks Stephen!
This patch adds the ability for device drivers to control the size of
the TSO frames being sent to them, per TCP connection. By setting the
netdevice's gso_max_size value, the socket layer will set the GSO
frame size based on that value. This will propogate into the TCP
layer, and send TSO's of that size to the hardware.
This can be desirable to help tune the bursty nature of TSO on a
per-adapter basis, where one may have 1 GbE and 10 GbE devices
coexisting in a system, one running multiqueue and the other not, etc.
This can also be desirable for devices that cannot support full 64 KB
TSO's, but still want to benefit from some level of segmentation
offloading.
Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 10 | ||||
-rw-r--r-- | include/net/sock.h | 2 | ||||
-rw-r--r-- | net/core/dev.c | 1 | ||||
-rw-r--r-- | net/core/sock.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 4 |
5 files changed, 19 insertions, 4 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a2f003239c85..ced61f87660e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -724,6 +724,10 @@ struct net_device | |||
724 | /* rtnetlink link ops */ | 724 | /* rtnetlink link ops */ |
725 | const struct rtnl_link_ops *rtnl_link_ops; | 725 | const struct rtnl_link_ops *rtnl_link_ops; |
726 | 726 | ||
727 | /* for setting kernel sock attribute on TCP connection setup */ | ||
728 | #define GSO_MAX_SIZE 65536 | ||
729 | unsigned int gso_max_size; | ||
730 | |||
727 | /* The TX queue control structures */ | 731 | /* The TX queue control structures */ |
728 | unsigned int egress_subqueue_count; | 732 | unsigned int egress_subqueue_count; |
729 | struct net_device_subqueue egress_subqueue[1]; | 733 | struct net_device_subqueue egress_subqueue[1]; |
@@ -1475,6 +1479,12 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) | |||
1475 | unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); | 1479 | unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); |
1476 | } | 1480 | } |
1477 | 1481 | ||
1482 | static inline void netif_set_gso_max_size(struct net_device *dev, | ||
1483 | unsigned int size) | ||
1484 | { | ||
1485 | dev->gso_max_size = size; | ||
1486 | } | ||
1487 | |||
1478 | /* On bonding slaves other than the currently active slave, suppress | 1488 | /* On bonding slaves other than the currently active slave, suppress |
1479 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and | 1489 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and |
1480 | * ARP on active-backup slaves with arp_validate enabled. | 1490 | * ARP on active-backup slaves with arp_validate enabled. |
diff --git a/include/net/sock.h b/include/net/sock.h index 39112e75411c..8358fff002eb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h | |||
@@ -151,6 +151,7 @@ struct sock_common { | |||
151 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets | 151 | * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets |
152 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) | 152 | * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) |
153 | * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) | 153 | * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4) |
154 | * @sk_gso_max_size: Maximum GSO segment size to build | ||
154 | * @sk_lingertime: %SO_LINGER l_linger setting | 155 | * @sk_lingertime: %SO_LINGER l_linger setting |
155 | * @sk_backlog: always used with the per-socket spinlock held | 156 | * @sk_backlog: always used with the per-socket spinlock held |
156 | * @sk_callback_lock: used with the callbacks in the end of this struct | 157 | * @sk_callback_lock: used with the callbacks in the end of this struct |
@@ -237,6 +238,7 @@ struct sock { | |||
237 | gfp_t sk_allocation; | 238 | gfp_t sk_allocation; |
238 | int sk_route_caps; | 239 | int sk_route_caps; |
239 | int sk_gso_type; | 240 | int sk_gso_type; |
241 | unsigned int sk_gso_max_size; | ||
240 | int sk_rcvlowat; | 242 | int sk_rcvlowat; |
241 | unsigned long sk_flags; | 243 | unsigned long sk_flags; |
242 | unsigned long sk_lingertime; | 244 | unsigned long sk_lingertime; |
diff --git a/net/core/dev.c b/net/core/dev.c index fcdf03cf3b3f..f973e38b81af 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -4021,6 +4021,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
4021 | } | 4021 | } |
4022 | 4022 | ||
4023 | dev->egress_subqueue_count = queue_count; | 4023 | dev->egress_subqueue_count = queue_count; |
4024 | dev->gso_max_size = GSO_MAX_SIZE; | ||
4024 | 4025 | ||
4025 | dev->get_stats = internal_stats; | 4026 | dev->get_stats = internal_stats; |
4026 | netpoll_netdev_init(dev); | 4027 | netpoll_netdev_init(dev); |
diff --git a/net/core/sock.c b/net/core/sock.c index bb5236aee643..b1a6ed4d33c1 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1095,10 +1095,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
1095 | if (sk->sk_route_caps & NETIF_F_GSO) | 1095 | if (sk->sk_route_caps & NETIF_F_GSO) |
1096 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 1096 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
1097 | if (sk_can_gso(sk)) { | 1097 | if (sk_can_gso(sk)) { |
1098 | if (dst->header_len) | 1098 | if (dst->header_len) { |
1099 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1099 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
1100 | else | 1100 | } else { |
1101 | sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; | 1101 | sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; |
1102 | sk->sk_gso_max_size = dst->dev->gso_max_size; | ||
1103 | } | ||
1102 | } | 1104 | } |
1103 | } | 1105 | } |
1104 | EXPORT_SYMBOL_GPL(sk_setup_caps); | 1106 | EXPORT_SYMBOL_GPL(sk_setup_caps); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b4e11d834c9f..a627616314ba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -998,7 +998,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
998 | xmit_size_goal = mss_now; | 998 | xmit_size_goal = mss_now; |
999 | 999 | ||
1000 | if (doing_tso) { | 1000 | if (doing_tso) { |
1001 | xmit_size_goal = (65535 - | 1001 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - |
1002 | inet_csk(sk)->icsk_af_ops->net_header_len - | 1002 | inet_csk(sk)->icsk_af_ops->net_header_len - |
1003 | inet_csk(sk)->icsk_ext_hdr_len - | 1003 | inet_csk(sk)->icsk_ext_hdr_len - |
1004 | tp->tcp_header_len); | 1004 | tp->tcp_header_len); |
@@ -1282,7 +1282,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1282 | limit = min(send_win, cong_win); | 1282 | limit = min(send_win, cong_win); |
1283 | 1283 | ||
1284 | /* If a full-sized TSO skb can be sent, do it. */ | 1284 | /* If a full-sized TSO skb can be sent, do it. */ |
1285 | if (limit >= 65536) | 1285 | if (limit >= sk->sk_gso_max_size) |
1286 | goto send_now; | 1286 | goto send_now; |
1287 | 1287 | ||
1288 | if (sysctl_tcp_tso_win_divisor) { | 1288 | if (sysctl_tcp_tso_win_divisor) { |