diff options
author | Herbert Xu <herbert@gondor.apana.org.au> | 2006-06-22 05:57:17 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-06-23 05:07:31 -0400 |
commit | f6a78bfcb141f963187464bac838d46a81c3882a (patch) | |
tree | fe30917dea1ab4cc046c6f1b8c1875373040c84a | |
parent | 7967168cefdbc63bf332d6b1548eca7cd65ebbcc (diff) |
[NET]: Add generic segmentation offload
This patch adds the infrastructure for generic segmentation offload.
The idea is to tap into the potential savings of TSO without hardware
support by postponing the allocation of segmented skb's until just
before the entry point into the NIC driver.
The same structure can be used to support software IPv6 TSO, as well as
UFO and segmentation offload for other relevant protocols, e.g., DCCP.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/netdevice.h | 8 | ||||
-rw-r--r-- | net/core/dev.c | 127 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 19 |
3 files changed, 143 insertions, 11 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa5671307b90..b4eae18390cc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h | |||
@@ -405,6 +405,9 @@ struct net_device | |||
405 | struct list_head qdisc_list; | 405 | struct list_head qdisc_list; |
406 | unsigned long tx_queue_len; /* Max frames per queue allowed */ | 406 | unsigned long tx_queue_len; /* Max frames per queue allowed */ |
407 | 407 | ||
408 | /* Partially transmitted GSO packet. */ | ||
409 | struct sk_buff *gso_skb; | ||
410 | |||
408 | /* ingress path synchronizer */ | 411 | /* ingress path synchronizer */ |
409 | spinlock_t ingress_lock; | 412 | spinlock_t ingress_lock; |
410 | struct Qdisc *qdisc_ingress; | 413 | struct Qdisc *qdisc_ingress; |
@@ -539,6 +542,7 @@ struct packet_type { | |||
539 | struct net_device *, | 542 | struct net_device *, |
540 | struct packet_type *, | 543 | struct packet_type *, |
541 | struct net_device *); | 544 | struct net_device *); |
545 | struct sk_buff *(*gso_segment)(struct sk_buff *skb, int sg); | ||
542 | void *af_packet_priv; | 546 | void *af_packet_priv; |
543 | struct list_head list; | 547 | struct list_head list; |
544 | }; | 548 | }; |
@@ -689,7 +693,8 @@ extern int dev_change_name(struct net_device *, char *); | |||
689 | extern int dev_set_mtu(struct net_device *, int); | 693 | extern int dev_set_mtu(struct net_device *, int); |
690 | extern int dev_set_mac_address(struct net_device *, | 694 | extern int dev_set_mac_address(struct net_device *, |
691 | struct sockaddr *); | 695 | struct sockaddr *); |
692 | extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); | 696 | extern int dev_hard_start_xmit(struct sk_buff *skb, |
697 | struct net_device *dev); | ||
693 | 698 | ||
694 | extern void dev_init(void); | 699 | extern void dev_init(void); |
695 | 700 | ||
@@ -963,6 +968,7 @@ extern int netdev_max_backlog; | |||
963 | extern int weight_p; | 968 | extern int weight_p; |
964 | extern int netdev_set_master(struct net_device *dev, struct net_device *master); | 969 | extern int netdev_set_master(struct net_device *dev, struct net_device *master); |
965 | extern int skb_checksum_help(struct sk_buff *skb, int inward); | 970 | extern int skb_checksum_help(struct sk_buff *skb, int inward); |
971 | extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg); | ||
966 | #ifdef CONFIG_BUG | 972 | #ifdef CONFIG_BUG |
967 | extern void netdev_rx_csum_fault(struct net_device *dev); | 973 | extern void netdev_rx_csum_fault(struct net_device *dev); |
968 | #else | 974 | #else |
diff --git a/net/core/dev.c b/net/core/dev.c index 29e3888102bc..d293e0f90a0c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -116,6 +116,7 @@ | |||
116 | #include <asm/current.h> | 116 | #include <asm/current.h> |
117 | #include <linux/audit.h> | 117 | #include <linux/audit.h> |
118 | #include <linux/dmaengine.h> | 118 | #include <linux/dmaengine.h> |
119 | #include <linux/err.h> | ||
119 | 120 | ||
120 | /* | 121 | /* |
121 | * The list of packet types we will receive (as opposed to discard) | 122 | * The list of packet types we will receive (as opposed to discard) |
@@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1048 | * taps currently in use. | 1049 | * taps currently in use. |
1049 | */ | 1050 | */ |
1050 | 1051 | ||
1051 | void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | 1052 | static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) |
1052 | { | 1053 | { |
1053 | struct packet_type *ptype; | 1054 | struct packet_type *ptype; |
1054 | 1055 | ||
@@ -1186,6 +1187,40 @@ out: | |||
1186 | return ret; | 1187 | return ret; |
1187 | } | 1188 | } |
1188 | 1189 | ||
1190 | /** | ||
1191 | * skb_gso_segment - Perform segmentation on skb. | ||
1192 | * @skb: buffer to segment | ||
1193 | * @sg: whether scatter-gather is supported on the target. | ||
1194 | * | ||
1195 | * This function segments the given skb and returns a list of segments. | ||
1196 | */ | ||
1197 | struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg) | ||
1198 | { | ||
1199 | struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | ||
1200 | struct packet_type *ptype; | ||
1201 | int type = skb->protocol; | ||
1202 | |||
1203 | BUG_ON(skb_shinfo(skb)->frag_list); | ||
1204 | BUG_ON(skb->ip_summed != CHECKSUM_HW); | ||
1205 | |||
1206 | skb->mac.raw = skb->data; | ||
1207 | skb->mac_len = skb->nh.raw - skb->data; | ||
1208 | __skb_pull(skb, skb->mac_len); | ||
1209 | |||
1210 | rcu_read_lock(); | ||
1211 | list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { | ||
1212 | if (ptype->type == type && !ptype->dev && ptype->gso_segment) { | ||
1213 | segs = ptype->gso_segment(skb, sg); | ||
1214 | break; | ||
1215 | } | ||
1216 | } | ||
1217 | rcu_read_unlock(); | ||
1218 | |||
1219 | return segs; | ||
1220 | } | ||
1221 | |||
1222 | EXPORT_SYMBOL(skb_gso_segment); | ||
1223 | |||
1189 | /* Take action when hardware reception checksum errors are detected. */ | 1224 | /* Take action when hardware reception checksum errors are detected. */ |
1190 | #ifdef CONFIG_BUG | 1225 | #ifdef CONFIG_BUG |
1191 | void netdev_rx_csum_fault(struct net_device *dev) | 1226 | void netdev_rx_csum_fault(struct net_device *dev) |
@@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | |||
1222 | #define illegal_highdma(dev, skb) (0) | 1257 | #define illegal_highdma(dev, skb) (0) |
1223 | #endif | 1258 | #endif |
1224 | 1259 | ||
1260 | struct dev_gso_cb { | ||
1261 | void (*destructor)(struct sk_buff *skb); | ||
1262 | }; | ||
1263 | |||
1264 | #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) | ||
1265 | |||
1266 | static void dev_gso_skb_destructor(struct sk_buff *skb) | ||
1267 | { | ||
1268 | struct dev_gso_cb *cb; | ||
1269 | |||
1270 | do { | ||
1271 | struct sk_buff *nskb = skb->next; | ||
1272 | |||
1273 | skb->next = nskb->next; | ||
1274 | nskb->next = NULL; | ||
1275 | kfree_skb(nskb); | ||
1276 | } while (skb->next); | ||
1277 | |||
1278 | cb = DEV_GSO_CB(skb); | ||
1279 | if (cb->destructor) | ||
1280 | cb->destructor(skb); | ||
1281 | } | ||
1282 | |||
1283 | /** | ||
1284 | * dev_gso_segment - Perform emulated hardware segmentation on skb. | ||
1285 | * @skb: buffer to segment | ||
1286 | * | ||
1287 | * This function segments the given skb and stores the list of segments | ||
1288 | * in skb->next. | ||
1289 | */ | ||
1290 | static int dev_gso_segment(struct sk_buff *skb) | ||
1291 | { | ||
1292 | struct net_device *dev = skb->dev; | ||
1293 | struct sk_buff *segs; | ||
1294 | |||
1295 | segs = skb_gso_segment(skb, dev->features & NETIF_F_SG && | ||
1296 | !illegal_highdma(dev, skb)); | ||
1297 | if (unlikely(IS_ERR(segs))) | ||
1298 | return PTR_ERR(segs); | ||
1299 | |||
1300 | skb->next = segs; | ||
1301 | DEV_GSO_CB(skb)->destructor = skb->destructor; | ||
1302 | skb->destructor = dev_gso_skb_destructor; | ||
1303 | |||
1304 | return 0; | ||
1305 | } | ||
1306 | |||
1307 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) | ||
1308 | { | ||
1309 | if (likely(!skb->next)) { | ||
1310 | if (netdev_nit) | ||
1311 | dev_queue_xmit_nit(skb, dev); | ||
1312 | |||
1313 | if (!netif_needs_gso(dev, skb)) | ||
1314 | return dev->hard_start_xmit(skb, dev); | ||
1315 | |||
1316 | if (unlikely(dev_gso_segment(skb))) | ||
1317 | goto out_kfree_skb; | ||
1318 | } | ||
1319 | |||
1320 | do { | ||
1321 | struct sk_buff *nskb = skb->next; | ||
1322 | int rc; | ||
1323 | |||
1324 | skb->next = nskb->next; | ||
1325 | nskb->next = NULL; | ||
1326 | rc = dev->hard_start_xmit(nskb, dev); | ||
1327 | if (unlikely(rc)) { | ||
1328 | skb->next = nskb; | ||
1329 | return rc; | ||
1330 | } | ||
1331 | } while (skb->next); | ||
1332 | |||
1333 | skb->destructor = DEV_GSO_CB(skb)->destructor; | ||
1334 | |||
1335 | out_kfree_skb: | ||
1336 | kfree_skb(skb); | ||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1225 | #define HARD_TX_LOCK(dev, cpu) { \ | 1340 | #define HARD_TX_LOCK(dev, cpu) { \ |
1226 | if ((dev->features & NETIF_F_LLTX) == 0) { \ | 1341 | if ((dev->features & NETIF_F_LLTX) == 0) { \ |
1227 | netif_tx_lock(dev); \ | 1342 | netif_tx_lock(dev); \ |
@@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
1266 | struct Qdisc *q; | 1381 | struct Qdisc *q; |
1267 | int rc = -ENOMEM; | 1382 | int rc = -ENOMEM; |
1268 | 1383 | ||
1384 | /* GSO will handle the following emulations directly. */ | ||
1385 | if (netif_needs_gso(dev, skb)) | ||
1386 | goto gso; | ||
1387 | |||
1269 | if (skb_shinfo(skb)->frag_list && | 1388 | if (skb_shinfo(skb)->frag_list && |
1270 | !(dev->features & NETIF_F_FRAGLIST) && | 1389 | !(dev->features & NETIF_F_FRAGLIST) && |
1271 | __skb_linearize(skb)) | 1390 | __skb_linearize(skb)) |
@@ -1290,6 +1409,7 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
1290 | if (skb_checksum_help(skb, 0)) | 1409 | if (skb_checksum_help(skb, 0)) |
1291 | goto out_kfree_skb; | 1410 | goto out_kfree_skb; |
1292 | 1411 | ||
1412 | gso: | ||
1293 | spin_lock_prefetch(&dev->queue_lock); | 1413 | spin_lock_prefetch(&dev->queue_lock); |
1294 | 1414 | ||
1295 | /* Disable soft irqs for various locks below. Also | 1415 | /* Disable soft irqs for various locks below. Also |
@@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb) | |||
1346 | HARD_TX_LOCK(dev, cpu); | 1466 | HARD_TX_LOCK(dev, cpu); |
1347 | 1467 | ||
1348 | if (!netif_queue_stopped(dev)) { | 1468 | if (!netif_queue_stopped(dev)) { |
1349 | if (netdev_nit) | ||
1350 | dev_queue_xmit_nit(skb, dev); | ||
1351 | |||
1352 | rc = 0; | 1469 | rc = 0; |
1353 | if (!dev->hard_start_xmit(skb, dev)) { | 1470 | if (!dev_hard_start_xmit(skb, dev)) { |
1354 | HARD_TX_UNLOCK(dev); | 1471 | HARD_TX_UNLOCK(dev); |
1355 | goto out; | 1472 | goto out; |
1356 | } | 1473 | } |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7aad0121232c..74d4a1dceeec 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -96,8 +96,11 @@ static inline int qdisc_restart(struct net_device *dev) | |||
96 | struct sk_buff *skb; | 96 | struct sk_buff *skb; |
97 | 97 | ||
98 | /* Dequeue packet */ | 98 | /* Dequeue packet */ |
99 | if ((skb = q->dequeue(q)) != NULL) { | 99 | if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { |
100 | unsigned nolock = (dev->features & NETIF_F_LLTX); | 100 | unsigned nolock = (dev->features & NETIF_F_LLTX); |
101 | |||
102 | dev->gso_skb = NULL; | ||
103 | |||
101 | /* | 104 | /* |
102 | * When the driver has LLTX set it does its own locking | 105 | * When the driver has LLTX set it does its own locking |
103 | * in start_xmit. No need to add additional overhead by | 106 | * in start_xmit. No need to add additional overhead by |
@@ -134,10 +137,8 @@ static inline int qdisc_restart(struct net_device *dev) | |||
134 | 137 | ||
135 | if (!netif_queue_stopped(dev)) { | 138 | if (!netif_queue_stopped(dev)) { |
136 | int ret; | 139 | int ret; |
137 | if (netdev_nit) | ||
138 | dev_queue_xmit_nit(skb, dev); | ||
139 | 140 | ||
140 | ret = dev->hard_start_xmit(skb, dev); | 141 | ret = dev_hard_start_xmit(skb, dev); |
141 | if (ret == NETDEV_TX_OK) { | 142 | if (ret == NETDEV_TX_OK) { |
142 | if (!nolock) { | 143 | if (!nolock) { |
143 | netif_tx_unlock(dev); | 144 | netif_tx_unlock(dev); |
@@ -171,7 +172,10 @@ static inline int qdisc_restart(struct net_device *dev) | |||
171 | */ | 172 | */ |
172 | 173 | ||
173 | requeue: | 174 | requeue: |
174 | q->ops->requeue(skb, q); | 175 | if (skb->next) |
176 | dev->gso_skb = skb; | ||
177 | else | ||
178 | q->ops->requeue(skb, q); | ||
175 | netif_schedule(dev); | 179 | netif_schedule(dev); |
176 | return 1; | 180 | return 1; |
177 | } | 181 | } |
@@ -593,6 +597,11 @@ void dev_deactivate(struct net_device *dev) | |||
593 | /* Wait for outstanding qdisc_run calls. */ | 597 | /* Wait for outstanding qdisc_run calls. */ |
594 | while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) | 598 | while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) |
595 | yield(); | 599 | yield(); |
600 | |||
601 | if (dev->gso_skb) { | ||
602 | kfree_skb(dev->gso_skb); | ||
603 | dev->gso_skb = NULL; | ||
604 | } | ||
596 | } | 605 | } |
597 | 606 | ||
598 | void dev_init_scheduler(struct net_device *dev) | 607 | void dev_init_scheduler(struct net_device *dev) |