aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 11:00:01 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 11:00:01 -0400
commit199f4c9f76fd8b030405abddf294e771f888de03 (patch)
treeee4f104a7562e1fd76882bc40f2de7d90812e1df /net/core
parent37224470c8c6d90a4062e76a08d4dc1fcf91fc89 (diff)
parentca6bb5d7ab22ac79f608fe6cbc6b12de6a5a19f0 (diff)
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [NET]: Require CAP_NET_ADMIN to create tuntap devices. [NET]: fix net-core kernel-doc [TCP]: Move inclusion of <linux/dmaengine.h> to correct place in <linux/tcp.h> [IPSEC]: Handle GSO packets [NET]: Added GSO toggle [NET]: Add software TSOv4 [NET]: Add generic segmentation offload [NET]: Merge TSO/UFO fields in sk_buff [NET]: Prevent transmission after dev_deactivate [IPV6] ADDRCONF: Fix default source address selection without CONFIG_IPV6_PRIVACY [IPV6]: Fix source address selection. [NET]: Avoid allocating skb in skb_pad
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c137
-rw-r--r--net/core/ethtool.c29
-rw-r--r--net/core/skbuff.c178
3 files changed, 317 insertions, 27 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 195a5e96b2d1..ea2469398bd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -116,6 +116,7 @@
116#include <asm/current.h> 116#include <asm/current.h>
117#include <linux/audit.h> 117#include <linux/audit.h>
118#include <linux/dmaengine.h> 118#include <linux/dmaengine.h>
119#include <linux/err.h>
119 120
120/* 121/*
121 * The list of packet types we will receive (as opposed to discard) 122 * The list of packet types we will receive (as opposed to discard)
@@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1048 * taps currently in use. 1049 * taps currently in use.
1049 */ 1050 */
1050 1051
1051void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) 1052static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052{ 1053{
1053 struct packet_type *ptype; 1054 struct packet_type *ptype;
1054 1055
@@ -1186,6 +1187,40 @@ out:
1186 return ret; 1187 return ret;
1187} 1188}
1188 1189
1190/**
1191 * skb_gso_segment - Perform segmentation on skb.
1192 * @skb: buffer to segment
1193 * @sg: whether scatter-gather is supported on the target.
1194 *
1195 * This function segments the given skb and returns a list of segments.
1196 */
1197struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
1198{
1199 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1200 struct packet_type *ptype;
1201 int type = skb->protocol;
1202
1203 BUG_ON(skb_shinfo(skb)->frag_list);
1204 BUG_ON(skb->ip_summed != CHECKSUM_HW);
1205
1206 skb->mac.raw = skb->data;
1207 skb->mac_len = skb->nh.raw - skb->data;
1208 __skb_pull(skb, skb->mac_len);
1209
1210 rcu_read_lock();
1211 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1212 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1213 segs = ptype->gso_segment(skb, sg);
1214 break;
1215 }
1216 }
1217 rcu_read_unlock();
1218
1219 return segs;
1220}
1221
1222EXPORT_SYMBOL(skb_gso_segment);
1223
1189/* Take action when hardware reception checksum errors are detected. */ 1224/* Take action when hardware reception checksum errors are detected. */
1190#ifdef CONFIG_BUG 1225#ifdef CONFIG_BUG
1191void netdev_rx_csum_fault(struct net_device *dev) 1226void netdev_rx_csum_fault(struct net_device *dev)
@@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1222#define illegal_highdma(dev, skb) (0) 1257#define illegal_highdma(dev, skb) (0)
1223#endif 1258#endif
1224 1259
1260struct dev_gso_cb {
1261 void (*destructor)(struct sk_buff *skb);
1262};
1263
1264#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1265
1266static void dev_gso_skb_destructor(struct sk_buff *skb)
1267{
1268 struct dev_gso_cb *cb;
1269
1270 do {
1271 struct sk_buff *nskb = skb->next;
1272
1273 skb->next = nskb->next;
1274 nskb->next = NULL;
1275 kfree_skb(nskb);
1276 } while (skb->next);
1277
1278 cb = DEV_GSO_CB(skb);
1279 if (cb->destructor)
1280 cb->destructor(skb);
1281}
1282
1283/**
1284 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1285 * @skb: buffer to segment
1286 *
1287 * This function segments the given skb and stores the list of segments
1288 * in skb->next.
1289 */
1290static int dev_gso_segment(struct sk_buff *skb)
1291{
1292 struct net_device *dev = skb->dev;
1293 struct sk_buff *segs;
1294
1295 segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
1296 !illegal_highdma(dev, skb));
1297 if (unlikely(IS_ERR(segs)))
1298 return PTR_ERR(segs);
1299
1300 skb->next = segs;
1301 DEV_GSO_CB(skb)->destructor = skb->destructor;
1302 skb->destructor = dev_gso_skb_destructor;
1303
1304 return 0;
1305}
1306
1307int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1308{
1309 if (likely(!skb->next)) {
1310 if (netdev_nit)
1311 dev_queue_xmit_nit(skb, dev);
1312
1313 if (!netif_needs_gso(dev, skb))
1314 return dev->hard_start_xmit(skb, dev);
1315
1316 if (unlikely(dev_gso_segment(skb)))
1317 goto out_kfree_skb;
1318 }
1319
1320 do {
1321 struct sk_buff *nskb = skb->next;
1322 int rc;
1323
1324 skb->next = nskb->next;
1325 nskb->next = NULL;
1326 rc = dev->hard_start_xmit(nskb, dev);
1327 if (unlikely(rc)) {
1328 skb->next = nskb;
1329 return rc;
1330 }
1331 } while (skb->next);
1332
1333 skb->destructor = DEV_GSO_CB(skb)->destructor;
1334
1335out_kfree_skb:
1336 kfree_skb(skb);
1337 return 0;
1338}
1339
1225#define HARD_TX_LOCK(dev, cpu) { \ 1340#define HARD_TX_LOCK(dev, cpu) { \
1226 if ((dev->features & NETIF_F_LLTX) == 0) { \ 1341 if ((dev->features & NETIF_F_LLTX) == 0) { \
1227 netif_tx_lock(dev); \ 1342 netif_tx_lock(dev); \
@@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb)
1266 struct Qdisc *q; 1381 struct Qdisc *q;
1267 int rc = -ENOMEM; 1382 int rc = -ENOMEM;
1268 1383
1384 /* GSO will handle the following emulations directly. */
1385 if (netif_needs_gso(dev, skb))
1386 goto gso;
1387
1269 if (skb_shinfo(skb)->frag_list && 1388 if (skb_shinfo(skb)->frag_list &&
1270 !(dev->features & NETIF_F_FRAGLIST) && 1389 !(dev->features & NETIF_F_FRAGLIST) &&
1271 __skb_linearize(skb)) 1390 __skb_linearize(skb))
@@ -1290,12 +1409,13 @@ int dev_queue_xmit(struct sk_buff *skb)
1290 if (skb_checksum_help(skb, 0)) 1409 if (skb_checksum_help(skb, 0))
1291 goto out_kfree_skb; 1410 goto out_kfree_skb;
1292 1411
1412gso:
1293 spin_lock_prefetch(&dev->queue_lock); 1413 spin_lock_prefetch(&dev->queue_lock);
1294 1414
1295 /* Disable soft irqs for various locks below. Also 1415 /* Disable soft irqs for various locks below. Also
1296 * stops preemption for RCU. 1416 * stops preemption for RCU.
1297 */ 1417 */
1298 local_bh_disable(); 1418 rcu_read_lock_bh();
1299 1419
1300 /* Updates of qdisc are serialized by queue_lock. 1420 /* Updates of qdisc are serialized by queue_lock.
1301 * The struct Qdisc which is pointed to by qdisc is now a 1421 * The struct Qdisc which is pointed to by qdisc is now a
@@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
1346 HARD_TX_LOCK(dev, cpu); 1466 HARD_TX_LOCK(dev, cpu);
1347 1467
1348 if (!netif_queue_stopped(dev)) { 1468 if (!netif_queue_stopped(dev)) {
1349 if (netdev_nit)
1350 dev_queue_xmit_nit(skb, dev);
1351
1352 rc = 0; 1469 rc = 0;
1353 if (!dev->hard_start_xmit(skb, dev)) { 1470 if (!dev_hard_start_xmit(skb, dev)) {
1354 HARD_TX_UNLOCK(dev); 1471 HARD_TX_UNLOCK(dev);
1355 goto out; 1472 goto out;
1356 } 1473 }
@@ -1369,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb)
1369 } 1486 }
1370 1487
1371 rc = -ENETDOWN; 1488 rc = -ENETDOWN;
1372 local_bh_enable(); 1489 rcu_read_unlock_bh();
1373 1490
1374out_kfree_skb: 1491out_kfree_skb:
1375 kfree_skb(skb); 1492 kfree_skb(skb);
1376 return rc; 1493 return rc;
1377out: 1494out:
1378 local_bh_enable(); 1495 rcu_read_unlock_bh();
1379 return rc; 1496 return rc;
1380} 1497}
1381 1498
@@ -3301,8 +3418,8 @@ static void net_dma_rebalance(void)
3301/** 3418/**
3302 * netdev_dma_event - event callback for the net_dma_client 3419 * netdev_dma_event - event callback for the net_dma_client
3303 * @client: should always be net_dma_client 3420 * @client: should always be net_dma_client
3304 * @chan: 3421 * @chan: DMA channel for the event
3305 * @event: 3422 * @event: event type
3306 */ 3423 */
3307static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3424static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3308 enum dma_event event) 3425 enum dma_event event)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 33ce7ed6afc6..27ce1683caf5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -614,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
614 return dev->ethtool_ops->set_ufo(dev, edata.data); 614 return dev->ethtool_ops->set_ufo(dev, edata.data);
615} 615}
616 616
617static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
618{
619 struct ethtool_value edata = { ETHTOOL_GGSO };
620
621 edata.data = dev->features & NETIF_F_GSO;
622 if (copy_to_user(useraddr, &edata, sizeof(edata)))
623 return -EFAULT;
624 return 0;
625}
626
627static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
628{
629 struct ethtool_value edata;
630
631 if (copy_from_user(&edata, useraddr, sizeof(edata)))
632 return -EFAULT;
633 if (edata.data)
634 dev->features |= NETIF_F_GSO;
635 else
636 dev->features &= ~NETIF_F_GSO;
637 return 0;
638}
639
617static int ethtool_self_test(struct net_device *dev, char __user *useraddr) 640static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
618{ 641{
619 struct ethtool_test test; 642 struct ethtool_test test;
@@ -905,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
905 case ETHTOOL_SUFO: 928 case ETHTOOL_SUFO:
906 rc = ethtool_set_ufo(dev, useraddr); 929 rc = ethtool_set_ufo(dev, useraddr);
907 break; 930 break;
931 case ETHTOOL_GGSO:
932 rc = ethtool_get_gso(dev, useraddr);
933 break;
934 case ETHTOOL_SGSO:
935 rc = ethtool_set_gso(dev, useraddr);
936 break;
908 default: 937 default:
909 rc = -EOPNOTSUPP; 938 rc = -EOPNOTSUPP;
910 } 939 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb7210f4005e..8e5044ba3ab6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
172 shinfo = skb_shinfo(skb); 172 shinfo = skb_shinfo(skb);
173 atomic_set(&shinfo->dataref, 1); 173 atomic_set(&shinfo->dataref, 1);
174 shinfo->nr_frags = 0; 174 shinfo->nr_frags = 0;
175 shinfo->tso_size = 0; 175 shinfo->gso_size = 0;
176 shinfo->tso_segs = 0; 176 shinfo->gso_segs = 0;
177 shinfo->ufo_size = 0; 177 shinfo->gso_type = 0;
178 shinfo->ip6_frag_id = 0; 178 shinfo->ip6_frag_id = 0;
179 shinfo->frag_list = NULL; 179 shinfo->frag_list = NULL;
180 180
@@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
238 238
239 atomic_set(&(skb_shinfo(skb)->dataref), 1); 239 atomic_set(&(skb_shinfo(skb)->dataref), 1);
240 skb_shinfo(skb)->nr_frags = 0; 240 skb_shinfo(skb)->nr_frags = 0;
241 skb_shinfo(skb)->tso_size = 0; 241 skb_shinfo(skb)->gso_size = 0;
242 skb_shinfo(skb)->tso_segs = 0; 242 skb_shinfo(skb)->gso_segs = 0;
243 skb_shinfo(skb)->gso_type = 0;
243 skb_shinfo(skb)->frag_list = NULL; 244 skb_shinfo(skb)->frag_list = NULL;
244out: 245out:
245 return skb; 246 return skb;
@@ -528,8 +529,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
528#endif 529#endif
529 skb_copy_secmark(new, old); 530 skb_copy_secmark(new, old);
530 atomic_set(&new->users, 1); 531 atomic_set(&new->users, 1);
531 skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; 532 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
532 skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; 533 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
534 skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
533} 535}
534 536
535/** 537/**
@@ -781,24 +783,40 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
781 * filled. Used by network drivers which may DMA or transfer data 783 * filled. Used by network drivers which may DMA or transfer data
782 * beyond the buffer end onto the wire. 784 * beyond the buffer end onto the wire.
783 * 785 *
784 * May return NULL in out of memory cases. 786 * May return error in out of memory cases. The skb is freed on error.
785 */ 787 */
786 788
787struct sk_buff *skb_pad(struct sk_buff *skb, int pad) 789int skb_pad(struct sk_buff *skb, int pad)
788{ 790{
789 struct sk_buff *nskb; 791 int err;
792 int ntail;
790 793
791 /* If the skbuff is non linear tailroom is always zero.. */ 794 /* If the skbuff is non linear tailroom is always zero.. */
792 if (skb_tailroom(skb) >= pad) { 795 if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
793 memset(skb->data+skb->len, 0, pad); 796 memset(skb->data+skb->len, 0, pad);
794 return skb; 797 return 0;
795 } 798 }
796 799
797 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); 800 ntail = skb->data_len + pad - (skb->end - skb->tail);
801 if (likely(skb_cloned(skb) || ntail > 0)) {
802 err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
803 if (unlikely(err))
804 goto free_skb;
805 }
806
807 /* FIXME: The use of this function with non-linear skb's really needs
808 * to be audited.
809 */
810 err = skb_linearize(skb);
811 if (unlikely(err))
812 goto free_skb;
813
814 memset(skb->data + skb->len, 0, pad);
815 return 0;
816
817free_skb:
798 kfree_skb(skb); 818 kfree_skb(skb);
799 if (nskb) 819 return err;
800 memset(nskb->data+nskb->len, 0, pad);
801 return nskb;
802} 820}
803 821
804/* Trims skb to length len. It can change skb pointers. 822/* Trims skb to length len. It can change skb pointers.
@@ -1824,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
1824 1842
1825EXPORT_SYMBOL_GPL(skb_pull_rcsum); 1843EXPORT_SYMBOL_GPL(skb_pull_rcsum);
1826 1844
1845/**
1846 * skb_segment - Perform protocol segmentation on skb.
1847 * @skb: buffer to segment
1848 * @sg: whether scatter-gather can be used for generated segments
1849 *
1850 * This function performs segmentation on the given skb. It returns
1851 * the segment at the given position. It returns NULL if there are
1852 * no more segments to generate, or when an error is encountered.
1853 */
1854struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
1855{
1856 struct sk_buff *segs = NULL;
1857 struct sk_buff *tail = NULL;
1858 unsigned int mss = skb_shinfo(skb)->gso_size;
1859 unsigned int doffset = skb->data - skb->mac.raw;
1860 unsigned int offset = doffset;
1861 unsigned int headroom;
1862 unsigned int len;
1863 int nfrags = skb_shinfo(skb)->nr_frags;
1864 int err = -ENOMEM;
1865 int i = 0;
1866 int pos;
1867
1868 __skb_push(skb, doffset);
1869 headroom = skb_headroom(skb);
1870 pos = skb_headlen(skb);
1871
1872 do {
1873 struct sk_buff *nskb;
1874 skb_frag_t *frag;
1875 int hsize, nsize;
1876 int k;
1877 int size;
1878
1879 len = skb->len - offset;
1880 if (len > mss)
1881 len = mss;
1882
1883 hsize = skb_headlen(skb) - offset;
1884 if (hsize < 0)
1885 hsize = 0;
1886 nsize = hsize + doffset;
1887 if (nsize > len + doffset || !sg)
1888 nsize = len + doffset;
1889
1890 nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
1891 if (unlikely(!nskb))
1892 goto err;
1893
1894 if (segs)
1895 tail->next = nskb;
1896 else
1897 segs = nskb;
1898 tail = nskb;
1899
1900 nskb->dev = skb->dev;
1901 nskb->priority = skb->priority;
1902 nskb->protocol = skb->protocol;
1903 nskb->dst = dst_clone(skb->dst);
1904 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
1905 nskb->pkt_type = skb->pkt_type;
1906 nskb->mac_len = skb->mac_len;
1907
1908 skb_reserve(nskb, headroom);
1909 nskb->mac.raw = nskb->data;
1910 nskb->nh.raw = nskb->data + skb->mac_len;
1911 nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
1912 memcpy(skb_put(nskb, doffset), skb->data, doffset);
1913
1914 if (!sg) {
1915 nskb->csum = skb_copy_and_csum_bits(skb, offset,
1916 skb_put(nskb, len),
1917 len, 0);
1918 continue;
1919 }
1920
1921 frag = skb_shinfo(nskb)->frags;
1922 k = 0;
1923
1924 nskb->ip_summed = CHECKSUM_HW;
1925 nskb->csum = skb->csum;
1926 memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
1927
1928 while (pos < offset + len) {
1929 BUG_ON(i >= nfrags);
1930
1931 *frag = skb_shinfo(skb)->frags[i];
1932 get_page(frag->page);
1933 size = frag->size;
1934
1935 if (pos < offset) {
1936 frag->page_offset += offset - pos;
1937 frag->size -= offset - pos;
1938 }
1939
1940 k++;
1941
1942 if (pos + size <= offset + len) {
1943 i++;
1944 pos += size;
1945 } else {
1946 frag->size -= pos + size - (offset + len);
1947 break;
1948 }
1949
1950 frag++;
1951 }
1952
1953 skb_shinfo(nskb)->nr_frags = k;
1954 nskb->data_len = len - hsize;
1955 nskb->len += nskb->data_len;
1956 nskb->truesize += nskb->data_len;
1957 } while ((offset += len) < skb->len);
1958
1959 return segs;
1960
1961err:
1962 while ((skb = segs)) {
1963 segs = skb->next;
1964 kfree(skb);
1965 }
1966 return ERR_PTR(err);
1967}
1968
1969EXPORT_SYMBOL_GPL(skb_segment);
1970
1827void __init skb_init(void) 1971void __init skb_init(void)
1828{ 1972{
1829 skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 1973 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",