aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2006-06-22 05:57:17 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2006-06-23 05:07:31 -0400
commitf6a78bfcb141f963187464bac838d46a81c3882a (patch)
treefe30917dea1ab4cc046c6f1b8c1875373040c84a
parent7967168cefdbc63bf332d6b1548eca7cd65ebbcc (diff)
[NET]: Add generic segmentation offload
This patch adds the infrastructure for generic segmentation offload. The idea is to tap into the potential savings of TSO without hardware support by postponing the allocation of segmented skb's until just before the entry point into the NIC driver. The same structure can be used to support software IPv6 TSO, as well as UFO and segmentation offload for other relevant protocols, e.g., DCCP. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netdevice.h8
-rw-r--r--net/core/dev.c127
-rw-r--r--net/sched/sch_generic.c19
3 files changed, 143 insertions, 11 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa5671307b90..b4eae18390cc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -405,6 +405,9 @@ struct net_device
405 struct list_head qdisc_list; 405 struct list_head qdisc_list;
406 unsigned long tx_queue_len; /* Max frames per queue allowed */ 406 unsigned long tx_queue_len; /* Max frames per queue allowed */
407 407
408 /* Partially transmitted GSO packet. */
409 struct sk_buff *gso_skb;
410
408 /* ingress path synchronizer */ 411 /* ingress path synchronizer */
409 spinlock_t ingress_lock; 412 spinlock_t ingress_lock;
410 struct Qdisc *qdisc_ingress; 413 struct Qdisc *qdisc_ingress;
@@ -539,6 +542,7 @@ struct packet_type {
539 struct net_device *, 542 struct net_device *,
540 struct packet_type *, 543 struct packet_type *,
541 struct net_device *); 544 struct net_device *);
545 struct sk_buff *(*gso_segment)(struct sk_buff *skb, int sg);
542 void *af_packet_priv; 546 void *af_packet_priv;
543 struct list_head list; 547 struct list_head list;
544}; 548};
@@ -689,7 +693,8 @@ extern int dev_change_name(struct net_device *, char *);
689extern int dev_set_mtu(struct net_device *, int); 693extern int dev_set_mtu(struct net_device *, int);
690extern int dev_set_mac_address(struct net_device *, 694extern int dev_set_mac_address(struct net_device *,
691 struct sockaddr *); 695 struct sockaddr *);
692extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); 696extern int dev_hard_start_xmit(struct sk_buff *skb,
697 struct net_device *dev);
693 698
694extern void dev_init(void); 699extern void dev_init(void);
695 700
@@ -963,6 +968,7 @@ extern int netdev_max_backlog;
963extern int weight_p; 968extern int weight_p;
964extern int netdev_set_master(struct net_device *dev, struct net_device *master); 969extern int netdev_set_master(struct net_device *dev, struct net_device *master);
965extern int skb_checksum_help(struct sk_buff *skb, int inward); 970extern int skb_checksum_help(struct sk_buff *skb, int inward);
971extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg);
966#ifdef CONFIG_BUG 972#ifdef CONFIG_BUG
967extern void netdev_rx_csum_fault(struct net_device *dev); 973extern void netdev_rx_csum_fault(struct net_device *dev);
968#else 974#else
diff --git a/net/core/dev.c b/net/core/dev.c
index 29e3888102bc..d293e0f90a0c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -116,6 +116,7 @@
116#include <asm/current.h> 116#include <asm/current.h>
117#include <linux/audit.h> 117#include <linux/audit.h>
118#include <linux/dmaengine.h> 118#include <linux/dmaengine.h>
119#include <linux/err.h>
119 120
120/* 121/*
121 * The list of packet types we will receive (as opposed to discard) 122 * The list of packet types we will receive (as opposed to discard)
@@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1048 * taps currently in use. 1049 * taps currently in use.
1049 */ 1050 */
1050 1051
1051void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1052static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052{ 1053{
1053 struct packet_type *ptype; 1054 struct packet_type *ptype;
1054 1055
@@ -1186,6 +1187,40 @@ out:
1186 return ret; 1187 return ret;
1187} 1188}
1188 1189
1190/**
1191 * skb_gso_segment - Perform segmentation on skb.
1192 * @skb: buffer to segment
1193 * @sg: whether scatter-gather is supported on the target.
1194 *
1195 * This function segments the given skb and returns a list of segments.
1196 */
1197struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
1198{
1199 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1200 struct packet_type *ptype;
1201 int type = skb->protocol;
1202
1203 BUG_ON(skb_shinfo(skb)->frag_list);
1204 BUG_ON(skb->ip_summed != CHECKSUM_HW);
1205
1206 skb->mac.raw = skb->data;
1207 skb->mac_len = skb->nh.raw - skb->data;
1208 __skb_pull(skb, skb->mac_len);
1209
1210 rcu_read_lock();
1211 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1212 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1213 segs = ptype->gso_segment(skb, sg);
1214 break;
1215 }
1216 }
1217 rcu_read_unlock();
1218
1219 return segs;
1220}
1221
1222EXPORT_SYMBOL(skb_gso_segment);
1223
1189/* Take action when hardware reception checksum errors are detected. */ 1224/* Take action when hardware reception checksum errors are detected. */
1190#ifdef CONFIG_BUG 1225#ifdef CONFIG_BUG
1191void netdev_rx_csum_fault(struct net_device *dev) 1226void netdev_rx_csum_fault(struct net_device *dev)
@@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1222#define illegal_highdma(dev, skb) (0) 1257#define illegal_highdma(dev, skb) (0)
1223#endif 1258#endif
1224 1259
1260struct dev_gso_cb {
1261 void (*destructor)(struct sk_buff *skb);
1262};
1263
1264#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1265
1266static void dev_gso_skb_destructor(struct sk_buff *skb)
1267{
1268 struct dev_gso_cb *cb;
1269
1270 do {
1271 struct sk_buff *nskb = skb->next;
1272
1273 skb->next = nskb->next;
1274 nskb->next = NULL;
1275 kfree_skb(nskb);
1276 } while (skb->next);
1277
1278 cb = DEV_GSO_CB(skb);
1279 if (cb->destructor)
1280 cb->destructor(skb);
1281}
1282
1283/**
1284 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1285 * @skb: buffer to segment
1286 *
1287 * This function segments the given skb and stores the list of segments
1288 * in skb->next.
1289 */
1290static int dev_gso_segment(struct sk_buff *skb)
1291{
1292 struct net_device *dev = skb->dev;
1293 struct sk_buff *segs;
1294
1295 segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
1296 !illegal_highdma(dev, skb));
1297 if (unlikely(IS_ERR(segs)))
1298 return PTR_ERR(segs);
1299
1300 skb->next = segs;
1301 DEV_GSO_CB(skb)->destructor = skb->destructor;
1302 skb->destructor = dev_gso_skb_destructor;
1303
1304 return 0;
1305}
1306
1307int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1308{
1309 if (likely(!skb->next)) {
1310 if (netdev_nit)
1311 dev_queue_xmit_nit(skb, dev);
1312
1313 if (!netif_needs_gso(dev, skb))
1314 return dev->hard_start_xmit(skb, dev);
1315
1316 if (unlikely(dev_gso_segment(skb)))
1317 goto out_kfree_skb;
1318 }
1319
1320 do {
1321 struct sk_buff *nskb = skb->next;
1322 int rc;
1323
1324 skb->next = nskb->next;
1325 nskb->next = NULL;
1326 rc = dev->hard_start_xmit(nskb, dev);
1327 if (unlikely(rc)) {
1328 skb->next = nskb;
1329 return rc;
1330 }
1331 } while (skb->next);
1332
1333 skb->destructor = DEV_GSO_CB(skb)->destructor;
1334
1335out_kfree_skb:
1336 kfree_skb(skb);
1337 return 0;
1338}
1339
1225#define HARD_TX_LOCK(dev, cpu) { \ 1340#define HARD_TX_LOCK(dev, cpu) { \
1226 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1341 if ((dev->features & NETIF_F_LLTX) == 0) { \
1227 netif_tx_lock(dev); \ 1342 netif_tx_lock(dev); \
@@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb)
1266 struct Qdisc *q; 1381 struct Qdisc *q;
1267 int rc = -ENOMEM; 1382 int rc = -ENOMEM;
1268 1383
1384 /* GSO will handle the following emulations directly. */
1385 if (netif_needs_gso(dev, skb))
1386 goto gso;
1387
1269 if (skb_shinfo(skb)->frag_list && 1388 if (skb_shinfo(skb)->frag_list &&
1270 !(dev->features & NETIF_F_FRAGLIST) && 1389 !(dev->features & NETIF_F_FRAGLIST) &&
1271 __skb_linearize(skb)) 1390 __skb_linearize(skb))
@@ -1290,6 +1409,7 @@ int dev_queue_xmit(struct sk_buff *skb)
1290 if (skb_checksum_help(skb, 0)) 1409 if (skb_checksum_help(skb, 0))
1291 goto out_kfree_skb; 1410 goto out_kfree_skb;
1292 1411
1412gso:
1293 spin_lock_prefetch(&dev->queue_lock); 1413 spin_lock_prefetch(&dev->queue_lock);
1294 1414
1295 /* Disable soft irqs for various locks below. Also 1415 /* Disable soft irqs for various locks below. Also
@@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1346 HARD_TX_LOCK(dev, cpu); 1466 HARD_TX_LOCK(dev, cpu);
1347 1467
1348 if (!netif_queue_stopped(dev)) { 1468 if (!netif_queue_stopped(dev)) {
1349 if (netdev_nit)
1350 dev_queue_xmit_nit(skb, dev);
1351
1352 rc = 0; 1469 rc = 0;
1353 if (!dev->hard_start_xmit(skb, dev)) { 1470 if (!dev_hard_start_xmit(skb, dev)) {
1354 HARD_TX_UNLOCK(dev); 1471 HARD_TX_UNLOCK(dev);
1355 goto out; 1472 goto out;
1356 } 1473 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7aad0121232c..74d4a1dceeec 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,8 +96,11 @@ static inline int qdisc_restart(struct net_device *dev)
96 struct sk_buff *skb; 96 struct sk_buff *skb;
97 97
98 /* Dequeue packet */ 98 /* Dequeue packet */
99 if ((skb = q->dequeue(q)) != NULL) { 99 if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
100 unsigned nolock = (dev->features & NETIF_F_LLTX); 100 unsigned nolock = (dev->features & NETIF_F_LLTX);
101
102 dev->gso_skb = NULL;
103
101 /* 104 /*
102 * When the driver has LLTX set it does its own locking 105 * When the driver has LLTX set it does its own locking
103 * in start_xmit. No need to add additional overhead by 106 * in start_xmit. No need to add additional overhead by
@@ -134,10 +137,8 @@ static inline int qdisc_restart(struct net_device *dev)
134 137
135 if (!netif_queue_stopped(dev)) { 138 if (!netif_queue_stopped(dev)) {
136 int ret; 139 int ret;
137 if (netdev_nit)
138 dev_queue_xmit_nit(skb, dev);
139 140
140 ret = dev->hard_start_xmit(skb, dev); 141 ret = dev_hard_start_xmit(skb, dev);
141 if (ret == NETDEV_TX_OK) { 142 if (ret == NETDEV_TX_OK) {
142 if (!nolock) { 143 if (!nolock) {
143 netif_tx_unlock(dev); 144 netif_tx_unlock(dev);
@@ -171,7 +172,10 @@ static inline int qdisc_restart(struct net_device *dev)
171 */ 172 */
172 173
173requeue: 174requeue:
174 q->ops->requeue(skb, q); 175 if (skb->next)
176 dev->gso_skb = skb;
177 else
178 q->ops->requeue(skb, q);
175 netif_schedule(dev); 179 netif_schedule(dev);
176 return 1; 180 return 1;
177 } 181 }
@@ -593,6 +597,11 @@ void dev_deactivate(struct net_device *dev)
593 /* Wait for outstanding qdisc_run calls. */ 597 /* Wait for outstanding qdisc_run calls. */
594 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) 598 while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
595 yield(); 599 yield();
600
601 if (dev->gso_skb) {
602 kfree_skb(dev->gso_skb);
603 dev->gso_skb = NULL;
604 }
596} 605}
597 606
598void dev_init_scheduler(struct net_device *dev) 607void dev_init_scheduler(struct net_device *dev)