aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 11:00:01 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 11:00:01 -0400
commit199f4c9f76fd8b030405abddf294e771f888de03 (patch)
treeee4f104a7562e1fd76882bc40f2de7d90812e1df /net
parent37224470c8c6d90a4062e76a08d4dc1fcf91fc89 (diff)
parentca6bb5d7ab22ac79f608fe6cbc6b12de6a5a19f0 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [NET]: Require CAP_NET_ADMIN to create tuntap devices. [NET]: fix net-core kernel-doc [TCP]: Move inclusion of <linux/dmaengine.h> to correct place in <linux/tcp.h> [IPSEC]: Handle GSO packets [NET]: Added GSO toggle [NET]: Add software TSOv4 [NET]: Add generic segmentation offload [NET]: Merge TSO/UFO fields in sk_buff [NET]: Prevent transmission after dev_deactivate [IPV6] ADDRCONF: Fix default source address selection without CONFIG_IPV6_PRIVACY [IPV6]: Fix source address selection. [NET]: Avoid allocating skb in skb_pad
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_if.c17
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/core/dev.c137
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/skbuff.c178
-rw-r--r--net/ipv4/af_inet.c51
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/tcp.c66
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/xfrm4_output.c54
-rw-r--r--net/ipv6/addrconf.c9
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/xfrm6_output.c39
-rw-r--r--net/sched/sch_generic.c29
16 files changed, 602 insertions, 85 deletions
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 0dca027ceb80..8be9f2123e54 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -34,8 +34,8 @@ static inline unsigned packet_length(const struct sk_buff *skb)
34 34
35int br_dev_queue_push_xmit(struct sk_buff *skb) 35int br_dev_queue_push_xmit(struct sk_buff *skb)
36{ 36{
37 /* drop mtu oversized packets except tso */ 37 /* drop mtu oversized packets except gso */
38 if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size) 38 if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size)
39 kfree_skb(skb); 39 kfree_skb(skb);
40 else { 40 else {
41#ifdef CONFIG_BRIDGE_NETFILTER 41#ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index fdec773f5b52..07956ecf545e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -376,15 +376,20 @@ void br_features_recompute(struct net_bridge *br)
376 features = br->feature_mask & ~NETIF_F_ALL_CSUM; 376 features = br->feature_mask & ~NETIF_F_ALL_CSUM;
377 377
378 list_for_each_entry(p, &br->port_list, list) { 378 list_for_each_entry(p, &br->port_list, list) {
379 if (checksum & NETIF_F_NO_CSUM && 379 unsigned long feature = p->dev->features;
380 !(p->dev->features & NETIF_F_NO_CSUM)) 380
381 if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
381 checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; 382 checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
382 if (checksum & NETIF_F_HW_CSUM && 383 if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
383 !(p->dev->features & NETIF_F_HW_CSUM))
384 checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; 384 checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
385 if (!(p->dev->features & NETIF_F_IP_CSUM)) 385 if (!(feature & NETIF_F_IP_CSUM))
386 checksum = 0; 386 checksum = 0;
387 features &= p->dev->features; 387
388 if (feature & NETIF_F_GSO)
389 feature |= NETIF_F_TSO;
390 feature |= NETIF_F_GSO;
391
392 features &= feature;
388 } 393 }
389 394
390 br->dev->features = features | checksum | NETIF_F_LLTX; 395 br->dev->features = features | checksum | NETIF_F_LLTX;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3e41f9d6d51c..8298a5179aef 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -761,7 +761,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
761{ 761{
762 if (skb->protocol == htons(ETH_P_IP) && 762 if (skb->protocol == htons(ETH_P_IP) &&
763 skb->len > skb->dev->mtu && 763 skb->len > skb->dev->mtu &&
764 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) 764 !skb_shinfo(skb)->gso_size)
765 return ip_fragment(skb, br_dev_queue_push_xmit); 765 return ip_fragment(skb, br_dev_queue_push_xmit);
766 else 766 else
767 return br_dev_queue_push_xmit(skb); 767 return br_dev_queue_push_xmit(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 195a5e96b2d1..ea2469398bd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -116,6 +116,7 @@
116#include <asm/current.h> 116#include <asm/current.h>
117#include <linux/audit.h> 117#include <linux/audit.h>
118#include <linux/dmaengine.h> 118#include <linux/dmaengine.h>
119#include <linux/err.h>
119 120
120/* 121/*
121 * The list of packet types we will receive (as opposed to discard) 122 * The list of packet types we will receive (as opposed to discard)
@@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1048 * taps currently in use. 1049 * taps currently in use.
1049 */ 1050 */
1050 1051
1051void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1052static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052{ 1053{
1053 struct packet_type *ptype; 1054 struct packet_type *ptype;
1054 1055
@@ -1186,6 +1187,40 @@ out:
1186 return ret; 1187 return ret;
1187} 1188}
1188 1189
1190/**
1191 * skb_gso_segment - Perform segmentation on skb.
1192 * @skb: buffer to segment
1193 * @sg: whether scatter-gather is supported on the target.
1194 *
1195 * This function segments the given skb and returns a list of segments.
1196 */
1197struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
1198{
1199 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1200 struct packet_type *ptype;
1201 int type = skb->protocol;
1202
1203 BUG_ON(skb_shinfo(skb)->frag_list);
1204 BUG_ON(skb->ip_summed != CHECKSUM_HW);
1205
1206 skb->mac.raw = skb->data;
1207 skb->mac_len = skb->nh.raw - skb->data;
1208 __skb_pull(skb, skb->mac_len);
1209
1210 rcu_read_lock();
1211 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1212 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1213 segs = ptype->gso_segment(skb, sg);
1214 break;
1215 }
1216 }
1217 rcu_read_unlock();
1218
1219 return segs;
1220}
1221
1222EXPORT_SYMBOL(skb_gso_segment);
1223
1189/* Take action when hardware reception checksum errors are detected. */ 1224/* Take action when hardware reception checksum errors are detected. */
1190#ifdef CONFIG_BUG 1225#ifdef CONFIG_BUG
1191void netdev_rx_csum_fault(struct net_device *dev) 1226void netdev_rx_csum_fault(struct net_device *dev)
@@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1222#define illegal_highdma(dev, skb) (0) 1257#define illegal_highdma(dev, skb) (0)
1223#endif 1258#endif
1224 1259
1260struct dev_gso_cb {
1261 void (*destructor)(struct sk_buff *skb);
1262};
1263
1264#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1265
1266static void dev_gso_skb_destructor(struct sk_buff *skb)
1267{
1268 struct dev_gso_cb *cb;
1269
1270 do {
1271 struct sk_buff *nskb = skb->next;
1272
1273 skb->next = nskb->next;
1274 nskb->next = NULL;
1275 kfree_skb(nskb);
1276 } while (skb->next);
1277
1278 cb = DEV_GSO_CB(skb);
1279 if (cb->destructor)
1280 cb->destructor(skb);
1281}
1282
1283/**
1284 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1285 * @skb: buffer to segment
1286 *
1287 * This function segments the given skb and stores the list of segments
1288 * in skb->next.
1289 */
1290static int dev_gso_segment(struct sk_buff *skb)
1291{
1292 struct net_device *dev = skb->dev;
1293 struct sk_buff *segs;
1294
1295 segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
1296 !illegal_highdma(dev, skb));
1297 if (unlikely(IS_ERR(segs)))
1298 return PTR_ERR(segs);
1299
1300 skb->next = segs;
1301 DEV_GSO_CB(skb)->destructor = skb->destructor;
1302 skb->destructor = dev_gso_skb_destructor;
1303
1304 return 0;
1305}
1306
1307int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1308{
1309 if (likely(!skb->next)) {
1310 if (netdev_nit)
1311 dev_queue_xmit_nit(skb, dev);
1312
1313 if (!netif_needs_gso(dev, skb))
1314 return dev->hard_start_xmit(skb, dev);
1315
1316 if (unlikely(dev_gso_segment(skb)))
1317 goto out_kfree_skb;
1318 }
1319
1320 do {
1321 struct sk_buff *nskb = skb->next;
1322 int rc;
1323
1324 skb->next = nskb->next;
1325 nskb->next = NULL;
1326 rc = dev->hard_start_xmit(nskb, dev);
1327 if (unlikely(rc)) {
1328 skb->next = nskb;
1329 return rc;
1330 }
1331 } while (skb->next);
1332
1333 skb->destructor = DEV_GSO_CB(skb)->destructor;
1334
1335out_kfree_skb:
1336 kfree_skb(skb);
1337 return 0;
1338}
1339
1225#define HARD_TX_LOCK(dev, cpu) { \ 1340#define HARD_TX_LOCK(dev, cpu) { \
1226 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1341 if ((dev->features & NETIF_F_LLTX) == 0) { \
1227 netif_tx_lock(dev); \ 1342 netif_tx_lock(dev); \
@@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb)
1266 struct Qdisc *q; 1381 struct Qdisc *q;
1267 int rc = -ENOMEM; 1382 int rc = -ENOMEM;
1268 1383
1384 /* GSO will handle the following emulations directly. */
1385 if (netif_needs_gso(dev, skb))
1386 goto gso;
1387
1269 if (skb_shinfo(skb)->frag_list && 1388 if (skb_shinfo(skb)->frag_list &&
1270 !(dev->features & NETIF_F_FRAGLIST) && 1389 !(dev->features & NETIF_F_FRAGLIST) &&
1271 __skb_linearize(skb)) 1390 __skb_linearize(skb))
@@ -1290,12 +1409,13 @@ int dev_queue_xmit(struct sk_buff *skb)
1290 if (skb_checksum_help(skb, 0)) 1409 if (skb_checksum_help(skb, 0))
1291 goto out_kfree_skb; 1410 goto out_kfree_skb;
1292 1411
1412gso:
1293 spin_lock_prefetch(&dev->queue_lock); 1413 spin_lock_prefetch(&dev->queue_lock);
1294 1414
1295 /* Disable soft irqs for various locks below. Also 1415 /* Disable soft irqs for various locks below. Also
1296 * stops preemption for RCU. 1416 * stops preemption for RCU.
1297 */ 1417 */
1298 local_bh_disable(); 1418 rcu_read_lock_bh();
1299 1419
1300 /* Updates of qdisc are serialized by queue_lock. 1420 /* Updates of qdisc are serialized by queue_lock.
1301 * The struct Qdisc which is pointed to by qdisc is now a 1421 * The struct Qdisc which is pointed to by qdisc is now a
@@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1346 HARD_TX_LOCK(dev, cpu); 1466 HARD_TX_LOCK(dev, cpu);
1347 1467
1348 if (!netif_queue_stopped(dev)) { 1468 if (!netif_queue_stopped(dev)) {
1349 if (netdev_nit)
1350 dev_queue_xmit_nit(skb, dev);
1351
1352 rc = 0; 1469 rc = 0;
1353 if (!dev->hard_start_xmit(skb, dev)) { 1470 if (!dev_hard_start_xmit(skb, dev)) {
1354 HARD_TX_UNLOCK(dev); 1471 HARD_TX_UNLOCK(dev);
1355 goto out; 1472 goto out;
1356 } 1473 }
@@ -1369,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb)
1369 } 1486 }
1370 1487
1371 rc = -ENETDOWN; 1488 rc = -ENETDOWN;
1372 local_bh_enable(); 1489 rcu_read_unlock_bh();
1373 1490
1374out_kfree_skb: 1491out_kfree_skb:
1375 kfree_skb(skb); 1492 kfree_skb(skb);
1376 return rc; 1493 return rc;
1377out: 1494out:
1378 local_bh_enable(); 1495 rcu_read_unlock_bh();
1379 return rc; 1496 return rc;
1380} 1497}
1381 1498
@@ -3301,8 +3418,8 @@ static void net_dma_rebalance(void)
3301/** 3418/**
3302 * netdev_dma_event - event callback for the net_dma_client 3419 * netdev_dma_event - event callback for the net_dma_client
3303 * @client: should always be net_dma_client 3420 * @client: should always be net_dma_client
3304 * @chan: 3421 * @chan: DMA channel for the event
3305 * @event: 3422 * @event: event type
3306 */ 3423 */
3307static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3424static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3308 enum dma_event event) 3425 enum dma_event event)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 33ce7ed6afc6..27ce1683caf5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -614,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
614 return dev->ethtool_ops->set_ufo(dev, edata.data); 614 return dev->ethtool_ops->set_ufo(dev, edata.data);
615} 615}
616 616
617static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
618{
619 struct ethtool_value edata = { ETHTOOL_GGSO };
620
621 edata.data = dev->features & NETIF_F_GSO;
622 if (copy_to_user(useraddr, &edata, sizeof(edata)))
623 return -EFAULT;
624 return 0;
625}
626
627static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
628{
629 struct ethtool_value edata;
630
631 if (copy_from_user(&edata, useraddr, sizeof(edata)))
632 return -EFAULT;
633 if (edata.data)
634 dev->features |= NETIF_F_GSO;
635 else
636 dev->features &= ~NETIF_F_GSO;
637 return 0;
638}
639
617static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 640static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
618{ 641{
619 struct ethtool_test test; 642 struct ethtool_test test;
@@ -905,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
905 case ETHTOOL_SUFO: 928 case ETHTOOL_SUFO:
906 rc = ethtool_set_ufo(dev, useraddr); 929 rc = ethtool_set_ufo(dev, useraddr);
907 break; 930 break;
931 case ETHTOOL_GGSO:
932 rc = ethtool_get_gso(dev, useraddr);
933 break;
934 case ETHTOOL_SGSO:
935 rc = ethtool_set_gso(dev, useraddr);
936 break;
908 default: 937 default:
909 rc = -EOPNOTSUPP; 938 rc = -EOPNOTSUPP;
910 } 939 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb7210f4005e..8e5044ba3ab6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
172 shinfo = skb_shinfo(skb); 172 shinfo = skb_shinfo(skb);
173 atomic_set(&shinfo->dataref, 1); 173 atomic_set(&shinfo->dataref, 1);
174 shinfo->nr_frags = 0; 174 shinfo->nr_frags = 0;
175 shinfo->tso_size = 0; 175 shinfo->gso_size = 0;
176 shinfo->tso_segs = 0; 176 shinfo->gso_segs = 0;
177 shinfo->ufo_size = 0; 177 shinfo->gso_type = 0;
178 shinfo->ip6_frag_id = 0; 178 shinfo->ip6_frag_id = 0;
179 shinfo->frag_list = NULL; 179 shinfo->frag_list = NULL;
180 180
@@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
238 238
239 atomic_set(&(skb_shinfo(skb)->dataref), 1); 239 atomic_set(&(skb_shinfo(skb)->dataref), 1);
240 skb_shinfo(skb)->nr_frags = 0; 240 skb_shinfo(skb)->nr_frags = 0;
241 skb_shinfo(skb)->tso_size = 0; 241 skb_shinfo(skb)->gso_size = 0;
242 skb_shinfo(skb)->tso_segs = 0; 242 skb_shinfo(skb)->gso_segs = 0;
243 skb_shinfo(skb)->gso_type = 0;
243 skb_shinfo(skb)->frag_list = NULL; 244 skb_shinfo(skb)->frag_list = NULL;
244out: 245out:
245 return skb; 246 return skb;
@@ -528,8 +529,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
528#endif 529#endif
529 skb_copy_secmark(new, old); 530 skb_copy_secmark(new, old);
530 atomic_set(&new->users, 1); 531 atomic_set(&new->users, 1);
531 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 532 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
532 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 533 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
534 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
533} 535}
534 536
535/** 537/**
@@ -781,24 +783,40 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
781 * filled. Used by network drivers which may DMA or transfer data 783 * filled. Used by network drivers which may DMA or transfer data
782 * beyond the buffer end onto the wire. 784 * beyond the buffer end onto the wire.
783 * 785 *
784 * May return NULL in out of memory cases. 786 * May return error in out of memory cases. The skb is freed on error.
785 */ 787 */
786 788
787struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 789int skb_pad(struct sk_buff *skb, int pad)
788{ 790{
789 struct sk_buff *nskb; 791 int err;
792 int ntail;
790 793
791 /* If the skbuff is non linear tailroom is always zero.. */ 794 /* If the skbuff is non linear tailroom is always zero.. */
792 if (skb_tailroom(skb) >= pad) { 795 if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
793 memset(skb->data+skb->len, 0, pad); 796 memset(skb->data+skb->len, 0, pad);
794 return skb; 797 return 0;
795 } 798 }
796 799
797 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 800 ntail = skb->data_len + pad - (skb->end - skb->tail);
801 if (likely(skb_cloned(skb) || ntail > 0)) {
802 err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
803 if (unlikely(err))
804 goto free_skb;
805 }
806
807 /* FIXME: The use of this function with non-linear skb's really needs
808 * to be audited.
809 */
810 err = skb_linearize(skb);
811 if (unlikely(err))
812 goto free_skb;
813
814 memset(skb->data + skb->len, 0, pad);
815 return 0;
816
817free_skb:
798 kfree_skb(skb); 818 kfree_skb(skb);
799 if (nskb) 819 return err;
800 memset(nskb->data+nskb->len, 0, pad);
801 return nskb;
802} 820}
803 821
804/* Trims skb to length len. It can change skb pointers. 822/* Trims skb to length len. It can change skb pointers.
@@ -1824,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
1824 1842
1825EXPORT_SYMBOL_GPL(skb_pull_rcsum); 1843EXPORT_SYMBOL_GPL(skb_pull_rcsum);
1826 1844
1845/**
1846 * skb_segment - Perform protocol segmentation on skb.
1847 * @skb: buffer to segment
1848 * @sg: whether scatter-gather can be used for generated segments
1849 *
1850 * This function performs segmentation on the given skb. It returns
1851 * the segment at the given position. It returns NULL if there are
1852 * no more segments to generate, or when an error is encountered.
1853 */
1854struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
1855{
1856 struct sk_buff *segs = NULL;
1857 struct sk_buff *tail = NULL;
1858 unsigned int mss = skb_shinfo(skb)->gso_size;
1859 unsigned int doffset = skb->data - skb->mac.raw;
1860 unsigned int offset = doffset;
1861 unsigned int headroom;
1862 unsigned int len;
1863 int nfrags = skb_shinfo(skb)->nr_frags;
1864 int err = -ENOMEM;
1865 int i = 0;
1866 int pos;
1867
1868 __skb_push(skb, doffset);
1869 headroom = skb_headroom(skb);
1870 pos = skb_headlen(skb);
1871
1872 do {
1873 struct sk_buff *nskb;
1874 skb_frag_t *frag;
1875 int hsize, nsize;
1876 int k;
1877 int size;
1878
1879 len = skb->len - offset;
1880 if (len > mss)
1881 len = mss;
1882
1883 hsize = skb_headlen(skb) - offset;
1884 if (hsize < 0)
1885 hsize = 0;
1886 nsize = hsize + doffset;
1887 if (nsize > len + doffset || !sg)
1888 nsize = len + doffset;
1889
1890 nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
1891 if (unlikely(!nskb))
1892 goto err;
1893
1894 if (segs)
1895 tail->next = nskb;
1896 else
1897 segs = nskb;
1898 tail = nskb;
1899
1900 nskb->dev = skb->dev;
1901 nskb->priority = skb->priority;
1902 nskb->protocol = skb->protocol;
1903 nskb->dst = dst_clone(skb->dst);
1904 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
1905 nskb->pkt_type = skb->pkt_type;
1906 nskb->mac_len = skb->mac_len;
1907
1908 skb_reserve(nskb, headroom);
1909 nskb->mac.raw = nskb->data;
1910 nskb->nh.raw = nskb->data + skb->mac_len;
1911 nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
1912 memcpy(skb_put(nskb, doffset), skb->data, doffset);
1913
1914 if (!sg) {
1915 nskb->csum = skb_copy_and_csum_bits(skb, offset,
1916 skb_put(nskb, len),
1917 len, 0);
1918 continue;
1919 }
1920
1921 frag = skb_shinfo(nskb)->frags;
1922 k = 0;
1923
1924 nskb->ip_summed = CHECKSUM_HW;
1925 nskb->csum = skb->csum;
1926 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
1927
1928 while (pos < offset + len) {
1929 BUG_ON(i >= nfrags);
1930
1931 *frag = skb_shinfo(skb)->frags[i];
1932 get_page(frag->page);
1933 size = frag->size;
1934
1935 if (pos < offset) {
1936 frag->page_offset += offset - pos;
1937 frag->size -= offset - pos;
1938 }
1939
1940 k++;
1941
1942 if (pos + size <= offset + len) {
1943 i++;
1944 pos += size;
1945 } else {
1946 frag->size -= pos + size - (offset + len);
1947 break;
1948 }
1949
1950 frag++;
1951 }
1952
1953 skb_shinfo(nskb)->nr_frags = k;
1954 nskb->data_len = len - hsize;
1955 nskb->len += nskb->data_len;
1956 nskb->truesize += nskb->data_len;
1957 } while ((offset += len) < skb->len);
1958
1959 return segs;
1960
1961err:
1962 while ((skb = segs)) {
1963 segs = skb->next;
1964 kfree(skb);
1965 }
1966 return ERR_PTR(err);
1967}
1968
1969EXPORT_SYMBOL_GPL(skb_segment);
1970
1827void __init skb_init(void) 1971void __init skb_init(void)
1828{ 1972{
1829 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1973 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0a277453526b..461216b47948 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -68,6 +68,7 @@
68 */ 68 */
69 69
70#include <linux/config.h> 70#include <linux/config.h>
71#include <linux/err.h>
71#include <linux/errno.h> 72#include <linux/errno.h>
72#include <linux/types.h> 73#include <linux/types.h>
73#include <linux/socket.h> 74#include <linux/socket.h>
@@ -1096,6 +1097,54 @@ int inet_sk_rebuild_header(struct sock *sk)
1096 1097
1097EXPORT_SYMBOL(inet_sk_rebuild_header); 1098EXPORT_SYMBOL(inet_sk_rebuild_header);
1098 1099
1100static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg)
1101{
1102 struct sk_buff *segs = ERR_PTR(-EINVAL);
1103 struct iphdr *iph;
1104 struct net_protocol *ops;
1105 int proto;
1106 int ihl;
1107 int id;
1108
1109 if (!pskb_may_pull(skb, sizeof(*iph)))
1110 goto out;
1111
1112 iph = skb->nh.iph;
1113 ihl = iph->ihl * 4;
1114 if (ihl < sizeof(*iph))
1115 goto out;
1116
1117 if (!pskb_may_pull(skb, ihl))
1118 goto out;
1119
1120 skb->h.raw = __skb_pull(skb, ihl);
1121 iph = skb->nh.iph;
1122 id = ntohs(iph->id);
1123 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1124 segs = ERR_PTR(-EPROTONOSUPPORT);
1125
1126 rcu_read_lock();
1127 ops = rcu_dereference(inet_protos[proto]);
1128 if (ops && ops->gso_segment)
1129 segs = ops->gso_segment(skb, sg);
1130 rcu_read_unlock();
1131
1132 if (IS_ERR(segs))
1133 goto out;
1134
1135 skb = segs;
1136 do {
1137 iph = skb->nh.iph;
1138 iph->id = htons(id++);
1139 iph->tot_len = htons(skb->len - skb->mac_len);
1140 iph->check = 0;
1141 iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
1142 } while ((skb = skb->next));
1143
1144out:
1145 return segs;
1146}
1147
1099#ifdef CONFIG_IP_MULTICAST 1148#ifdef CONFIG_IP_MULTICAST
1100static struct net_protocol igmp_protocol = { 1149static struct net_protocol igmp_protocol = {
1101 .handler = igmp_rcv, 1150 .handler = igmp_rcv,
@@ -1105,6 +1154,7 @@ static struct net_protocol igmp_protocol = {
1105static struct net_protocol tcp_protocol = { 1154static struct net_protocol tcp_protocol = {
1106 .handler = tcp_v4_rcv, 1155 .handler = tcp_v4_rcv,
1107 .err_handler = tcp_v4_err, 1156 .err_handler = tcp_v4_err,
1157 .gso_segment = tcp_tso_segment,
1108 .no_policy = 1, 1158 .no_policy = 1,
1109}; 1159};
1110 1160
@@ -1150,6 +1200,7 @@ static int ipv4_proc_init(void);
1150static struct packet_type ip_packet_type = { 1200static struct packet_type ip_packet_type = {
1151 .type = __constant_htons(ETH_P_IP), 1201 .type = __constant_htons(ETH_P_IP),
1152 .func = ip_rcv, 1202 .func = ip_rcv,
1203 .gso_segment = inet_gso_segment,
1153}; 1204};
1154 1205
1155static int __init inet_init(void) 1206static int __init inet_init(void)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8538aac3d148..7624fd1d8f9f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -210,8 +210,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
210 return dst_output(skb); 210 return dst_output(skb);
211 } 211 }
212#endif 212#endif
213 if (skb->len > dst_mtu(skb->dst) && 213 if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size)
214 !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
215 return ip_fragment(skb, ip_finish_output2); 214 return ip_fragment(skb, ip_finish_output2);
216 else 215 else
217 return ip_finish_output2(skb); 216 return ip_finish_output2(skb);
@@ -362,7 +361,7 @@ packet_routed:
362 } 361 }
363 362
364 ip_select_ident_more(iph, &rt->u.dst, sk, 363 ip_select_ident_more(iph, &rt->u.dst, sk,
365 (skb_shinfo(skb)->tso_segs ?: 1) - 1); 364 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
366 365
367 /* Add an IP checksum. */ 366 /* Add an IP checksum. */
368 ip_send_check(iph); 367 ip_send_check(iph);
@@ -744,7 +743,8 @@ static inline int ip_ufo_append_data(struct sock *sk,
744 (length - transhdrlen)); 743 (length - transhdrlen));
745 if (!err) { 744 if (!err) {
746 /* specify the length of each IP datagram fragment*/ 745 /* specify the length of each IP datagram fragment*/
747 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); 746 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
747 skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
748 __skb_queue_tail(&sk->sk_write_queue, skb); 748 __skb_queue_tail(&sk->sk_write_queue, skb);
749 749
750 return 0; 750 return 0;
@@ -1087,14 +1087,16 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1087 1087
1088 inet->cork.length += size; 1088 inet->cork.length += size;
1089 if ((sk->sk_protocol == IPPROTO_UDP) && 1089 if ((sk->sk_protocol == IPPROTO_UDP) &&
1090 (rt->u.dst.dev->features & NETIF_F_UFO)) 1090 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1091 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); 1091 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1092 skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
1093 }
1092 1094
1093 1095
1094 while (size > 0) { 1096 while (size > 0) {
1095 int i; 1097 int i;
1096 1098
1097 if (skb_shinfo(skb)->ufo_size) 1099 if (skb_shinfo(skb)->gso_size)
1098 len = size; 1100 len = size;
1099 else { 1101 else {
1100 1102
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 74998f250071..0e029c4e2903 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -258,6 +258,7 @@
258#include <linux/random.h> 258#include <linux/random.h>
259#include <linux/bootmem.h> 259#include <linux/bootmem.h>
260#include <linux/cache.h> 260#include <linux/cache.h>
261#include <linux/err.h>
261 262
262#include <net/icmp.h> 263#include <net/icmp.h>
263#include <net/tcp.h> 264#include <net/tcp.h>
@@ -571,7 +572,7 @@ new_segment:
571 skb->ip_summed = CHECKSUM_HW; 572 skb->ip_summed = CHECKSUM_HW;
572 tp->write_seq += copy; 573 tp->write_seq += copy;
573 TCP_SKB_CB(skb)->end_seq += copy; 574 TCP_SKB_CB(skb)->end_seq += copy;
574 skb_shinfo(skb)->tso_segs = 0; 575 skb_shinfo(skb)->gso_segs = 0;
575 576
576 if (!copied) 577 if (!copied)
577 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 578 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -818,7 +819,7 @@ new_segment:
818 819
819 tp->write_seq += copy; 820 tp->write_seq += copy;
820 TCP_SKB_CB(skb)->end_seq += copy; 821 TCP_SKB_CB(skb)->end_seq += copy;
821 skb_shinfo(skb)->tso_segs = 0; 822 skb_shinfo(skb)->gso_segs = 0;
822 823
823 from += copy; 824 from += copy;
824 copied += copy; 825 copied += copy;
@@ -2144,6 +2145,67 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2144EXPORT_SYMBOL(compat_tcp_getsockopt); 2145EXPORT_SYMBOL(compat_tcp_getsockopt);
2145#endif 2146#endif
2146 2147
2148struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg)
2149{
2150 struct sk_buff *segs = ERR_PTR(-EINVAL);
2151 struct tcphdr *th;
2152 unsigned thlen;
2153 unsigned int seq;
2154 unsigned int delta;
2155 unsigned int oldlen;
2156 unsigned int len;
2157
2158 if (!pskb_may_pull(skb, sizeof(*th)))
2159 goto out;
2160
2161 th = skb->h.th;
2162 thlen = th->doff * 4;
2163 if (thlen < sizeof(*th))
2164 goto out;
2165
2166 if (!pskb_may_pull(skb, thlen))
2167 goto out;
2168
2169 oldlen = ~htonl(skb->len);
2170 __skb_pull(skb, thlen);
2171
2172 segs = skb_segment(skb, sg);
2173 if (IS_ERR(segs))
2174 goto out;
2175
2176 len = skb_shinfo(skb)->gso_size;
2177 delta = csum_add(oldlen, htonl(thlen + len));
2178
2179 skb = segs;
2180 th = skb->h.th;
2181 seq = ntohl(th->seq);
2182
2183 do {
2184 th->fin = th->psh = 0;
2185
2186 if (skb->ip_summed == CHECKSUM_NONE) {
2187 th->check = csum_fold(csum_partial(
2188 skb->h.raw, thlen, csum_add(skb->csum, delta)));
2189 }
2190
2191 seq += len;
2192 skb = skb->next;
2193 th = skb->h.th;
2194
2195 th->seq = htonl(seq);
2196 th->cwr = 0;
2197 } while (skb->next);
2198
2199 if (skb->ip_summed == CHECKSUM_NONE) {
2200 delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw));
2201 th->check = csum_fold(csum_partial(
2202 skb->h.raw, thlen, csum_add(skb->csum, delta)));
2203 }
2204
2205out:
2206 return segs;
2207}
2208
2147extern void __skb_cb_too_small_for_tcp(int, int); 2209extern void __skb_cb_too_small_for_tcp(int, int);
2148extern struct tcp_congestion_ops tcp_reno; 2210extern struct tcp_congestion_ops tcp_reno;
2149 2211
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e08245bdda3a..94fe5b1f9dcb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1073,7 +1073,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1073 else 1073 else
1074 pkt_len = (end_seq - 1074 pkt_len = (end_seq -
1075 TCP_SKB_CB(skb)->seq); 1075 TCP_SKB_CB(skb)->seq);
1076 if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) 1076 if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
1077 break; 1077 break;
1078 pcount = tcp_skb_pcount(skb); 1078 pcount = tcp_skb_pcount(skb);
1079 } 1079 }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 07bb5a2b375e..bdd71db8bf90 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -515,15 +515,17 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
515 /* Avoid the costly divide in the normal 515 /* Avoid the costly divide in the normal
516 * non-TSO case. 516 * non-TSO case.
517 */ 517 */
518 skb_shinfo(skb)->tso_segs = 1; 518 skb_shinfo(skb)->gso_segs = 1;
519 skb_shinfo(skb)->tso_size = 0; 519 skb_shinfo(skb)->gso_size = 0;
520 skb_shinfo(skb)->gso_type = 0;
520 } else { 521 } else {
521 unsigned int factor; 522 unsigned int factor;
522 523
523 factor = skb->len + (mss_now - 1); 524 factor = skb->len + (mss_now - 1);
524 factor /= mss_now; 525 factor /= mss_now;
525 skb_shinfo(skb)->tso_segs = factor; 526 skb_shinfo(skb)->gso_segs = factor;
526 skb_shinfo(skb)->tso_size = mss_now; 527 skb_shinfo(skb)->gso_size = mss_now;
528 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
527 } 529 }
528} 530}
529 531
@@ -914,7 +916,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int
914 916
915 if (!tso_segs || 917 if (!tso_segs ||
916 (tso_segs > 1 && 918 (tso_segs > 1 &&
917 skb_shinfo(skb)->tso_size != mss_now)) { 919 tcp_skb_mss(skb) != mss_now)) {
918 tcp_set_skb_tso_segs(sk, skb, mss_now); 920 tcp_set_skb_tso_segs(sk, skb, mss_now);
919 tso_segs = tcp_skb_pcount(skb); 921 tso_segs = tcp_skb_pcount(skb);
920 } 922 }
@@ -1724,8 +1726,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1724 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1726 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1725 if (!pskb_trim(skb, 0)) { 1727 if (!pskb_trim(skb, 0)) {
1726 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1728 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
1727 skb_shinfo(skb)->tso_segs = 1; 1729 skb_shinfo(skb)->gso_segs = 1;
1728 skb_shinfo(skb)->tso_size = 0; 1730 skb_shinfo(skb)->gso_size = 0;
1731 skb_shinfo(skb)->gso_type = 0;
1729 skb->ip_summed = CHECKSUM_NONE; 1732 skb->ip_summed = CHECKSUM_NONE;
1730 skb->csum = 0; 1733 skb->csum = 0;
1731 } 1734 }
@@ -1930,8 +1933,9 @@ void tcp_send_fin(struct sock *sk)
1930 skb->csum = 0; 1933 skb->csum = 0;
1931 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); 1934 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
1932 TCP_SKB_CB(skb)->sacked = 0; 1935 TCP_SKB_CB(skb)->sacked = 0;
1933 skb_shinfo(skb)->tso_segs = 1; 1936 skb_shinfo(skb)->gso_segs = 1;
1934 skb_shinfo(skb)->tso_size = 0; 1937 skb_shinfo(skb)->gso_size = 0;
1938 skb_shinfo(skb)->gso_type = 0;
1935 1939
1936 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 1940 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
1937 TCP_SKB_CB(skb)->seq = tp->write_seq; 1941 TCP_SKB_CB(skb)->seq = tp->write_seq;
@@ -1963,8 +1967,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
1963 skb->csum = 0; 1967 skb->csum = 0;
1964 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 1968 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
1965 TCP_SKB_CB(skb)->sacked = 0; 1969 TCP_SKB_CB(skb)->sacked = 0;
1966 skb_shinfo(skb)->tso_segs = 1; 1970 skb_shinfo(skb)->gso_segs = 1;
1967 skb_shinfo(skb)->tso_size = 0; 1971 skb_shinfo(skb)->gso_size = 0;
1972 skb_shinfo(skb)->gso_type = 0;
1968 1973
1969 /* Send it off. */ 1974 /* Send it off. */
1970 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); 1975 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
@@ -2047,8 +2052,9 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2047 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; 2052 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
2048 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2053 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
2049 TCP_SKB_CB(skb)->sacked = 0; 2054 TCP_SKB_CB(skb)->sacked = 0;
2050 skb_shinfo(skb)->tso_segs = 1; 2055 skb_shinfo(skb)->gso_segs = 1;
2051 skb_shinfo(skb)->tso_size = 0; 2056 skb_shinfo(skb)->gso_size = 0;
2057 skb_shinfo(skb)->gso_type = 0;
2052 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2058 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2053 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2059 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2054 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 2060 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2152,8 +2158,9 @@ int tcp_connect(struct sock *sk)
2152 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; 2158 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2153 TCP_ECN_send_syn(sk, tp, buff); 2159 TCP_ECN_send_syn(sk, tp, buff);
2154 TCP_SKB_CB(buff)->sacked = 0; 2160 TCP_SKB_CB(buff)->sacked = 0;
2155 skb_shinfo(buff)->tso_segs = 1; 2161 skb_shinfo(buff)->gso_segs = 1;
2156 skb_shinfo(buff)->tso_size = 0; 2162 skb_shinfo(buff)->gso_size = 0;
2163 skb_shinfo(buff)->gso_type = 0;
2157 buff->csum = 0; 2164 buff->csum = 0;
2158 TCP_SKB_CB(buff)->seq = tp->write_seq++; 2165 TCP_SKB_CB(buff)->seq = tp->write_seq++;
2159 TCP_SKB_CB(buff)->end_seq = tp->write_seq; 2166 TCP_SKB_CB(buff)->end_seq = tp->write_seq;
@@ -2257,8 +2264,9 @@ void tcp_send_ack(struct sock *sk)
2257 buff->csum = 0; 2264 buff->csum = 0;
2258 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; 2265 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
2259 TCP_SKB_CB(buff)->sacked = 0; 2266 TCP_SKB_CB(buff)->sacked = 0;
2260 skb_shinfo(buff)->tso_segs = 1; 2267 skb_shinfo(buff)->gso_segs = 1;
2261 skb_shinfo(buff)->tso_size = 0; 2268 skb_shinfo(buff)->gso_size = 0;
2269 skb_shinfo(buff)->gso_type = 0;
2262 2270
2263 /* Send it off, this clears delayed acks for us. */ 2271 /* Send it off, this clears delayed acks for us. */
2264 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); 2272 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
@@ -2293,8 +2301,9 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2293 skb->csum = 0; 2301 skb->csum = 0;
2294 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; 2302 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
2295 TCP_SKB_CB(skb)->sacked = urgent; 2303 TCP_SKB_CB(skb)->sacked = urgent;
2296 skb_shinfo(skb)->tso_segs = 1; 2304 skb_shinfo(skb)->gso_segs = 1;
2297 skb_shinfo(skb)->tso_size = 0; 2305 skb_shinfo(skb)->gso_size = 0;
2306 skb_shinfo(skb)->gso_type = 0;
2298 2307
2299 /* Use a previous sequence. This should cause the other 2308 /* Use a previous sequence. This should cause the other
2300 * end to send an ack. Don't queue or clone SKB, just 2309 * end to send an ack. Don't queue or clone SKB, just
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index ac9d91d4bb05..193363e22932 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -9,6 +9,8 @@
9 */ 9 */
10 10
11#include <linux/compiler.h> 11#include <linux/compiler.h>
12#include <linux/if_ether.h>
13#include <linux/kernel.h>
12#include <linux/skbuff.h> 14#include <linux/skbuff.h>
13#include <linux/spinlock.h> 15#include <linux/spinlock.h>
14#include <linux/netfilter_ipv4.h> 16#include <linux/netfilter_ipv4.h>
@@ -97,16 +99,10 @@ error_nolock:
97 goto out_exit; 99 goto out_exit;
98} 100}
99 101
100static int xfrm4_output_finish(struct sk_buff *skb) 102static int xfrm4_output_finish2(struct sk_buff *skb)
101{ 103{
102 int err; 104 int err;
103 105
104#ifdef CONFIG_NETFILTER
105 if (!skb->dst->xfrm) {
106 IPCB(skb)->flags |= IPSKB_REROUTED;
107 return dst_output(skb);
108 }
109#endif
110 while (likely((err = xfrm4_output_one(skb)) == 0)) { 106 while (likely((err = xfrm4_output_one(skb)) == 0)) {
111 nf_reset(skb); 107 nf_reset(skb);
112 108
@@ -119,7 +115,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
119 return dst_output(skb); 115 return dst_output(skb);
120 116
121 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, 117 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
122 skb->dst->dev, xfrm4_output_finish); 118 skb->dst->dev, xfrm4_output_finish2);
123 if (unlikely(err != 1)) 119 if (unlikely(err != 1))
124 break; 120 break;
125 } 121 }
@@ -127,6 +123,48 @@ static int xfrm4_output_finish(struct sk_buff *skb)
127 return err; 123 return err;
128} 124}
129 125
126static int xfrm4_output_finish(struct sk_buff *skb)
127{
128 struct sk_buff *segs;
129
130#ifdef CONFIG_NETFILTER
131 if (!skb->dst->xfrm) {
132 IPCB(skb)->flags |= IPSKB_REROUTED;
133 return dst_output(skb);
134 }
135#endif
136
137 if (!skb_shinfo(skb)->gso_size)
138 return xfrm4_output_finish2(skb);
139
140 skb->protocol = htons(ETH_P_IP);
141 segs = skb_gso_segment(skb, 0);
142 kfree_skb(skb);
143 if (unlikely(IS_ERR(segs)))
144 return PTR_ERR(segs);
145
146 do {
147 struct sk_buff *nskb = segs->next;
148 int err;
149
150 segs->next = NULL;
151 err = xfrm4_output_finish2(segs);
152
153 if (unlikely(err)) {
154 while ((segs = nskb)) {
155 nskb = segs->next;
156 segs->next = NULL;
157 kfree_skb(segs);
158 }
159 return err;
160 }
161
162 segs = nskb;
163 } while (segs);
164
165 return 0;
166}
167
130int xfrm4_output(struct sk_buff *skb) 168int xfrm4_output(struct sk_buff *skb)
131{ 169{
132 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, 170 return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c2c26fa0943d..4da664538f52 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -862,6 +862,8 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
862 * 2002::/16 2 862 * 2002::/16 2
863 * ::/96 3 863 * ::/96 3
864 * ::ffff:0:0/96 4 864 * ::ffff:0:0/96 4
865 * fc00::/7 5
866 * 2001::/32 6
865 */ 867 */
866 if (type & IPV6_ADDR_LOOPBACK) 868 if (type & IPV6_ADDR_LOOPBACK)
867 return 0; 869 return 0;
@@ -869,8 +871,12 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type)
869 return 3; 871 return 3;
870 else if (type & IPV6_ADDR_MAPPED) 872 else if (type & IPV6_ADDR_MAPPED)
871 return 4; 873 return 4;
874 else if (addr->s6_addr32[0] == htonl(0x20010000))
875 return 6;
872 else if (addr->s6_addr16[0] == htons(0x2002)) 876 else if (addr->s6_addr16[0] == htons(0x2002))
873 return 2; 877 return 2;
878 else if ((addr->s6_addr[0] & 0xfe) == 0xfc)
879 return 5;
874 return 1; 880 return 1;
875} 881}
876 882
@@ -1069,6 +1075,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
1069 if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) 1075 if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)
1070 continue; 1076 continue;
1071 } 1077 }
1078#else
1079 if (hiscore.rule < 7)
1080 hiscore.rule++;
1072#endif 1081#endif
1073 /* Rule 8: Use longest matching prefix */ 1082 /* Rule 8: Use longest matching prefix */
1074 if (hiscore.rule < 8) { 1083 if (hiscore.rule < 8) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d29620f4910e..abb94de33768 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -148,7 +148,7 @@ static int ip6_output2(struct sk_buff *skb)
148 148
149int ip6_output(struct sk_buff *skb) 149int ip6_output(struct sk_buff *skb)
150{ 150{
151 if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || 151 if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) ||
152 dst_allfrag(skb->dst)) 152 dst_allfrag(skb->dst))
153 return ip6_fragment(skb, ip6_output2); 153 return ip6_fragment(skb, ip6_output2);
154 else 154 else
@@ -833,8 +833,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
833 struct frag_hdr fhdr; 833 struct frag_hdr fhdr;
834 834
835 /* specify the length of each IP datagram fragment*/ 835 /* specify the length of each IP datagram fragment*/
836 skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - 836 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
837 sizeof(struct frag_hdr); 837 sizeof(struct frag_hdr);
838 skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
838 ipv6_select_ident(skb, &fhdr); 839 ipv6_select_ident(skb, &fhdr);
839 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 840 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
840 __skb_queue_tail(&sk->sk_write_queue, skb); 841 __skb_queue_tail(&sk->sk_write_queue, skb);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 16e84254a252..48fccb1eca08 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -94,7 +94,7 @@ error_nolock:
94 goto out_exit; 94 goto out_exit;
95} 95}
96 96
97static int xfrm6_output_finish(struct sk_buff *skb) 97static int xfrm6_output_finish2(struct sk_buff *skb)
98{ 98{
99 int err; 99 int err;
100 100
@@ -110,7 +110,7 @@ static int xfrm6_output_finish(struct sk_buff *skb)
110 return dst_output(skb); 110 return dst_output(skb);
111 111
112 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, 112 err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
113 skb->dst->dev, xfrm6_output_finish); 113 skb->dst->dev, xfrm6_output_finish2);
114 if (unlikely(err != 1)) 114 if (unlikely(err != 1))
115 break; 115 break;
116 } 116 }
@@ -118,6 +118,41 @@ static int xfrm6_output_finish(struct sk_buff *skb)
118 return err; 118 return err;
119} 119}
120 120
121static int xfrm6_output_finish(struct sk_buff *skb)
122{
123 struct sk_buff *segs;
124
125 if (!skb_shinfo(skb)->gso_size)
126 return xfrm6_output_finish2(skb);
127
128 skb->protocol = htons(ETH_P_IP);
129 segs = skb_gso_segment(skb, 0);
130 kfree_skb(skb);
131 if (unlikely(IS_ERR(segs)))
132 return PTR_ERR(segs);
133
134 do {
135 struct sk_buff *nskb = segs->next;
136 int err;
137
138 segs->next = NULL;
139 err = xfrm6_output_finish2(segs);
140
141 if (unlikely(err)) {
142 while ((segs = nskb)) {
143 nskb = segs->next;
144 segs->next = NULL;
145 kfree_skb(segs);
146 }
147 return err;
148 }
149
150 segs = nskb;
151 } while (segs);
152
153 return 0;
154}
155
121int xfrm6_output(struct sk_buff *skb) 156int xfrm6_output(struct sk_buff *skb)
122{ 157{
123 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, 158 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d7aca8ef524a..74d4a1dceeec 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,8 +96,11 @@ static inline int qdisc_restart(struct net_device *dev)
96 struct sk_buff *skb; 96 struct sk_buff *skb;
97 97
98 /* Dequeue packet */ 98 /* Dequeue packet */
99 if ((skb = q->dequeue(q)) != NULL) { 99 if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
100 unsigned nolock = (dev->features & NETIF_F_LLTX); 100 unsigned nolock = (dev->features & NETIF_F_LLTX);
101
102 dev->gso_skb = NULL;
103
101 /* 104 /*
102 * When the driver has LLTX set it does its own locking 105 * When the driver has LLTX set it does its own locking
103 * in start_xmit. No need to add additional overhead by 106 * in start_xmit. No need to add additional overhead by
@@ -134,10 +137,8 @@ static inline int qdisc_restart(struct net_device *dev)
134 137
135 if (!netif_queue_stopped(dev)) { 138 if (!netif_queue_stopped(dev)) {
136 int ret; 139 int ret;
137 if (netdev_nit)
138 dev_queue_xmit_nit(skb, dev);
139 140
140 ret = dev->hard_start_xmit(skb, dev); 141 ret = dev_hard_start_xmit(skb, dev);
141 if (ret == NETDEV_TX_OK) { 142 if (ret == NETDEV_TX_OK) {
142 if (!nolock) { 143 if (!nolock) {
143 netif_tx_unlock(dev); 144 netif_tx_unlock(dev);
@@ -171,7 +172,10 @@ static inline int qdisc_restart(struct net_device *dev)
171 */ 172 */
172 173
173requeue: 174requeue:
174 q->ops->requeue(skb, q); 175 if (skb->next)
176 dev->gso_skb = skb;
177 else
178 q->ops->requeue(skb, q);
175 netif_schedule(dev); 179 netif_schedule(dev);
176 return 1; 180 return 1;
177 } 181 }
@@ -181,9 +185,13 @@ requeue:
181 185
182void __qdisc_run(struct net_device *dev) 186void __qdisc_run(struct net_device *dev)
183{ 187{
188 if (unlikely(dev->qdisc == &noop_qdisc))
189 goto out;
190
184 while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) 191 while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
185 /* NOTHING */; 192 /* NOTHING */;
186 193
194out:
187 clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); 195 clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
188} 196}
189 197
@@ -583,10 +591,17 @@ void dev_deactivate(struct net_device *dev)
583 591
584 dev_watchdog_down(dev); 592 dev_watchdog_down(dev);
585 593
586 while (test_bit(__LINK_STATE_SCHED, &dev->state)) 594 /* Wait for outstanding dev_queue_xmit calls. */
595 synchronize_rcu();
596
597 /* Wait for outstanding qdisc_run calls. */
598 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
587 yield(); 599 yield();
588 600
589 spin_unlock_wait(&dev->_xmit_lock); 601 if (dev->gso_skb) {
602 kfree_skb(dev->gso_skb);
603 dev->gso_skb = NULL;
604 }
590} 605}
591 606
592void dev_init_scheduler(struct net_device *dev) 607void dev_init_scheduler(struct net_device *dev)