aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:21 -0400
commitbbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree216fdc1cbef450ca688135c5b8969169482d9a48 /net/core
parent3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)
Merge branch 'dmaengine' into async-tx-next
Conflicts: crypto/async_tx/async_xor.c drivers/dma/ioat/dma_v2.h drivers/dma/ioat/pci.c drivers/md/raid5.c
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c241
-rw-r--r--net/core/dev.c688
-rw-r--r--net/core/drop_monitor.c139
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c33
-rw-r--r--net/core/neighbour.c57
-rw-r--r--net/core/net-sysfs.c9
-rw-r--r--net/core/net-traces.c7
-rw-r--r--net/core/net_namespace.c54
-rw-r--r--net/core/netpoll.c7
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/skb_dma_map.c13
-rw-r--r--net/core/skbuff.c330
-rw-r--r--net/core/sock.c137
-rw-r--r--net/core/stream.c3
-rw-r--r--net/core/user_dma.c46
17 files changed, 1186 insertions, 593 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b01a76abe1d2..58abee1f1df1 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -260,7 +260,9 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
260 spin_unlock_bh(&sk->sk_receive_queue.lock); 260 spin_unlock_bh(&sk->sk_receive_queue.lock);
261 } 261 }
262 262
263 skb_free_datagram(sk, skb); 263 kfree_skb(skb);
264 sk_mem_reclaim_partial(sk);
265
264 return err; 266 return err;
265} 267}
266 268
@@ -280,6 +282,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
280{ 282{
281 int start = skb_headlen(skb); 283 int start = skb_headlen(skb);
282 int i, copy = start - offset; 284 int i, copy = start - offset;
285 struct sk_buff *frag_iter;
283 286
284 /* Copy header. */ 287 /* Copy header. */
285 if (copy > 0) { 288 if (copy > 0) {
@@ -320,28 +323,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
320 start = end; 323 start = end;
321 } 324 }
322 325
323 if (skb_shinfo(skb)->frag_list) { 326 skb_walk_frags(skb, frag_iter) {
324 struct sk_buff *list = skb_shinfo(skb)->frag_list; 327 int end;
325 328
326 for (; list; list = list->next) { 329 WARN_ON(start > offset + len);
327 int end; 330
328 331 end = start + frag_iter->len;
329 WARN_ON(start > offset + len); 332 if ((copy = end - offset) > 0) {
330 333 if (copy > len)
331 end = start + list->len; 334 copy = len;
332 if ((copy = end - offset) > 0) { 335 if (skb_copy_datagram_iovec(frag_iter,
333 if (copy > len) 336 offset - start,
334 copy = len; 337 to, copy))
335 if (skb_copy_datagram_iovec(list, 338 goto fault;
336 offset - start, 339 if ((len -= copy) == 0)
337 to, copy)) 340 return 0;
338 goto fault; 341 offset += copy;
339 if ((len -= copy) == 0)
340 return 0;
341 offset += copy;
342 }
343 start = end;
344 } 342 }
343 start = end;
345 } 344 }
346 if (!len) 345 if (!len)
347 return 0; 346 return 0;
@@ -351,30 +350,124 @@ fault:
351} 350}
352 351
353/** 352/**
353 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
354 * @skb: buffer to copy
355 * @offset: offset in the buffer to start copying from
356 * @to: io vector to copy to
357 * @to_offset: offset in the io vector to start copying to
358 * @len: amount of data to copy from buffer to iovec
359 *
360 * Returns 0 or -EFAULT.
361 * Note: the iovec is not modified during the copy.
362 */
363int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
364 const struct iovec *to, int to_offset,
365 int len)
366{
367 int start = skb_headlen(skb);
368 int i, copy = start - offset;
369 struct sk_buff *frag_iter;
370
371 /* Copy header. */
372 if (copy > 0) {
373 if (copy > len)
374 copy = len;
375 if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
376 goto fault;
377 if ((len -= copy) == 0)
378 return 0;
379 offset += copy;
380 to_offset += copy;
381 }
382
383 /* Copy paged appendix. Hmm... why does this look so complicated? */
384 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
385 int end;
386
387 WARN_ON(start > offset + len);
388
389 end = start + skb_shinfo(skb)->frags[i].size;
390 if ((copy = end - offset) > 0) {
391 int err;
392 u8 *vaddr;
393 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
394 struct page *page = frag->page;
395
396 if (copy > len)
397 copy = len;
398 vaddr = kmap(page);
399 err = memcpy_toiovecend(to, vaddr + frag->page_offset +
400 offset - start, to_offset, copy);
401 kunmap(page);
402 if (err)
403 goto fault;
404 if (!(len -= copy))
405 return 0;
406 offset += copy;
407 to_offset += copy;
408 }
409 start = end;
410 }
411
412 skb_walk_frags(skb, frag_iter) {
413 int end;
414
415 WARN_ON(start > offset + len);
416
417 end = start + frag_iter->len;
418 if ((copy = end - offset) > 0) {
419 if (copy > len)
420 copy = len;
421 if (skb_copy_datagram_const_iovec(frag_iter,
422 offset - start,
423 to, to_offset,
424 copy))
425 goto fault;
426 if ((len -= copy) == 0)
427 return 0;
428 offset += copy;
429 to_offset += copy;
430 }
431 start = end;
432 }
433 if (!len)
434 return 0;
435
436fault:
437 return -EFAULT;
438}
439EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
440
441/**
354 * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. 442 * skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
355 * @skb: buffer to copy 443 * @skb: buffer to copy
356 * @offset: offset in the buffer to start copying to 444 * @offset: offset in the buffer to start copying to
357 * @from: io vector to copy to 445 * @from: io vector to copy to
446 * @from_offset: offset in the io vector to start copying from
358 * @len: amount of data to copy to buffer from iovec 447 * @len: amount of data to copy to buffer from iovec
359 * 448 *
360 * Returns 0 or -EFAULT. 449 * Returns 0 or -EFAULT.
361 * Note: the iovec is modified during the copy. 450 * Note: the iovec is not modified during the copy.
362 */ 451 */
363int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, 452int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
364 struct iovec *from, int len) 453 const struct iovec *from, int from_offset,
454 int len)
365{ 455{
366 int start = skb_headlen(skb); 456 int start = skb_headlen(skb);
367 int i, copy = start - offset; 457 int i, copy = start - offset;
458 struct sk_buff *frag_iter;
368 459
369 /* Copy header. */ 460 /* Copy header. */
370 if (copy > 0) { 461 if (copy > 0) {
371 if (copy > len) 462 if (copy > len)
372 copy = len; 463 copy = len;
373 if (memcpy_fromiovec(skb->data + offset, from, copy)) 464 if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
465 copy))
374 goto fault; 466 goto fault;
375 if ((len -= copy) == 0) 467 if ((len -= copy) == 0)
376 return 0; 468 return 0;
377 offset += copy; 469 offset += copy;
470 from_offset += copy;
378 } 471 }
379 472
380 /* Copy paged appendix. Hmm... why does this look so complicated? */ 473 /* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -393,8 +486,9 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
393 if (copy > len) 486 if (copy > len)
394 copy = len; 487 copy = len;
395 vaddr = kmap(page); 488 vaddr = kmap(page);
396 err = memcpy_fromiovec(vaddr + frag->page_offset + 489 err = memcpy_fromiovecend(vaddr + frag->page_offset +
397 offset - start, from, copy); 490 offset - start,
491 from, from_offset, copy);
398 kunmap(page); 492 kunmap(page);
399 if (err) 493 if (err)
400 goto fault; 494 goto fault;
@@ -402,32 +496,32 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
402 if (!(len -= copy)) 496 if (!(len -= copy))
403 return 0; 497 return 0;
404 offset += copy; 498 offset += copy;
499 from_offset += copy;
405 } 500 }
406 start = end; 501 start = end;
407 } 502 }
408 503
409 if (skb_shinfo(skb)->frag_list) { 504 skb_walk_frags(skb, frag_iter) {
410 struct sk_buff *list = skb_shinfo(skb)->frag_list; 505 int end;
411 506
412 for (; list; list = list->next) { 507 WARN_ON(start > offset + len);
413 int end; 508
414 509 end = start + frag_iter->len;
415 WARN_ON(start > offset + len); 510 if ((copy = end - offset) > 0) {
416 511 if (copy > len)
417 end = start + list->len; 512 copy = len;
418 if ((copy = end - offset) > 0) { 513 if (skb_copy_datagram_from_iovec(frag_iter,
419 if (copy > len) 514 offset - start,
420 copy = len; 515 from,
421 if (skb_copy_datagram_from_iovec(list, 516 from_offset,
422 offset - start, 517 copy))
423 from, copy)) 518 goto fault;
424 goto fault; 519 if ((len -= copy) == 0)
425 if ((len -= copy) == 0) 520 return 0;
426 return 0; 521 offset += copy;
427 offset += copy; 522 from_offset += copy;
428 }
429 start = end;
430 } 523 }
524 start = end;
431 } 525 }
432 if (!len) 526 if (!len)
433 return 0; 527 return 0;
@@ -442,8 +536,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
442 __wsum *csump) 536 __wsum *csump)
443{ 537{
444 int start = skb_headlen(skb); 538 int start = skb_headlen(skb);
445 int pos = 0;
446 int i, copy = start - offset; 539 int i, copy = start - offset;
540 struct sk_buff *frag_iter;
541 int pos = 0;
447 542
448 /* Copy header. */ 543 /* Copy header. */
449 if (copy > 0) { 544 if (copy > 0) {
@@ -494,33 +589,29 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
494 start = end; 589 start = end;
495 } 590 }
496 591
497 if (skb_shinfo(skb)->frag_list) { 592 skb_walk_frags(skb, frag_iter) {
498 struct sk_buff *list = skb_shinfo(skb)->frag_list; 593 int end;
499 594
500 for (; list; list=list->next) { 595 WARN_ON(start > offset + len);
501 int end; 596
502 597 end = start + frag_iter->len;
503 WARN_ON(start > offset + len); 598 if ((copy = end - offset) > 0) {
504 599 __wsum csum2 = 0;
505 end = start + list->len; 600 if (copy > len)
506 if ((copy = end - offset) > 0) { 601 copy = len;
507 __wsum csum2 = 0; 602 if (skb_copy_and_csum_datagram(frag_iter,
508 if (copy > len) 603 offset - start,
509 copy = len; 604 to, copy,
510 if (skb_copy_and_csum_datagram(list, 605 &csum2))
511 offset - start, 606 goto fault;
512 to, copy, 607 *csump = csum_block_add(*csump, csum2, pos);
513 &csum2)) 608 if ((len -= copy) == 0)
514 goto fault; 609 return 0;
515 *csump = csum_block_add(*csump, csum2, pos); 610 offset += copy;
516 if ((len -= copy) == 0) 611 to += copy;
517 return 0; 612 pos += copy;
518 offset += copy;
519 to += copy;
520 pos += copy;
521 }
522 start = end;
523 } 613 }
614 start = end;
524 } 615 }
525 if (!len) 616 if (!len)
526 return 0; 617 return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index e2e9e4af3ace..60b572812278 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -126,6 +126,7 @@
126#include <linux/in.h> 126#include <linux/in.h>
127#include <linux/jhash.h> 127#include <linux/jhash.h>
128#include <linux/random.h> 128#include <linux/random.h>
129#include <trace/events/napi.h>
129 130
130#include "net-sysfs.h" 131#include "net-sysfs.h"
131 132
@@ -268,7 +269,8 @@ static const unsigned short netdev_lock_type[] =
268 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, 269 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
269 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, 270 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
270 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, 271 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
271 ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; 272 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
273 ARPHRD_VOID, ARPHRD_NONE};
272 274
273static const char *netdev_lock_name[] = 275static const char *netdev_lock_name[] =
274 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", 276 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -285,7 +287,8 @@ static const char *netdev_lock_name[] =
285 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", 287 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
286 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", 288 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
287 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", 289 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
288 "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; 290 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
291 "_xmit_VOID", "_xmit_NONE"};
289 292
290static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; 293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
291static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; 294static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -1047,7 +1050,7 @@ void dev_load(struct net *net, const char *name)
1047int dev_open(struct net_device *dev) 1050int dev_open(struct net_device *dev)
1048{ 1051{
1049 const struct net_device_ops *ops = dev->netdev_ops; 1052 const struct net_device_ops *ops = dev->netdev_ops;
1050 int ret = 0; 1053 int ret;
1051 1054
1052 ASSERT_RTNL(); 1055 ASSERT_RTNL();
1053 1056
@@ -1064,6 +1067,11 @@ int dev_open(struct net_device *dev)
1064 if (!netif_device_present(dev)) 1067 if (!netif_device_present(dev))
1065 return -ENODEV; 1068 return -ENODEV;
1066 1069
1070 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1071 ret = notifier_to_errno(ret);
1072 if (ret)
1073 return ret;
1074
1067 /* 1075 /*
1068 * Call device private open method 1076 * Call device private open method
1069 */ 1077 */
@@ -1688,7 +1696,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1688 goto gso; 1696 goto gso;
1689 } 1697 }
1690 1698
1699 /*
1700 * If device doesnt need skb->dst, release it right now while
1701 * its hot in this cpu cache
1702 */
1703 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1704 skb_dst_drop(skb);
1705
1691 rc = ops->ndo_start_xmit(skb, dev); 1706 rc = ops->ndo_start_xmit(skb, dev);
1707 if (rc == 0)
1708 txq_trans_update(txq);
1692 /* 1709 /*
1693 * TODO: if skb_orphan() was called by 1710 * TODO: if skb_orphan() was called by
1694 * dev->hard_start_xmit() (for example, the unmodified 1711 * dev->hard_start_xmit() (for example, the unmodified
@@ -1718,6 +1735,7 @@ gso:
1718 skb->next = nskb; 1735 skb->next = nskb;
1719 return rc; 1736 return rc;
1720 } 1737 }
1738 txq_trans_update(txq);
1721 if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) 1739 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1722 return NETDEV_TX_BUSY; 1740 return NETDEV_TX_BUSY;
1723 } while (skb->next); 1741 } while (skb->next);
@@ -1735,8 +1753,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1735{ 1753{
1736 u32 hash; 1754 u32 hash;
1737 1755
1738 if (skb_rx_queue_recorded(skb)) 1756 if (skb_rx_queue_recorded(skb)) {
1739 return skb_get_rx_queue(skb) % dev->real_num_tx_queues; 1757 hash = skb_get_rx_queue(skb);
1758 while (unlikely (hash >= dev->real_num_tx_queues))
1759 hash -= dev->real_num_tx_queues;
1760 return hash;
1761 }
1740 1762
1741 if (skb->sk && skb->sk->sk_hash) 1763 if (skb->sk && skb->sk->sk_hash)
1742 hash = skb->sk->sk_hash; 1764 hash = skb->sk->sk_hash;
@@ -1800,7 +1822,7 @@ int dev_queue_xmit(struct sk_buff *skb)
1800 if (netif_needs_gso(dev, skb)) 1822 if (netif_needs_gso(dev, skb))
1801 goto gso; 1823 goto gso;
1802 1824
1803 if (skb_shinfo(skb)->frag_list && 1825 if (skb_has_frags(skb) &&
1804 !(dev->features & NETIF_F_FRAGLIST) && 1826 !(dev->features & NETIF_F_FRAGLIST) &&
1805 __skb_linearize(skb)) 1827 __skb_linearize(skb))
1806 goto out_kfree_skb; 1828 goto out_kfree_skb;
@@ -2049,11 +2071,13 @@ static inline int deliver_skb(struct sk_buff *skb,
2049} 2071}
2050 2072
2051#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2073#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2052/* These hooks defined here for ATM */ 2074
2053struct net_bridge; 2075#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2054struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, 2076/* This hook is defined here for ATM LANE */
2055 unsigned char *addr); 2077int (*br_fdb_test_addr_hook)(struct net_device *dev,
2056void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly; 2078 unsigned char *addr) __read_mostly;
2079EXPORT_SYMBOL(br_fdb_test_addr_hook);
2080#endif
2057 2081
2058/* 2082/*
2059 * If bridge module is loaded call bridging hook. 2083 * If bridge module is loaded call bridging hook.
@@ -2061,6 +2085,8 @@ void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2061 */ 2085 */
2062struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, 2086struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2063 struct sk_buff *skb) __read_mostly; 2087 struct sk_buff *skb) __read_mostly;
2088EXPORT_SYMBOL(br_handle_frame_hook);
2089
2064static inline struct sk_buff *handle_bridge(struct sk_buff *skb, 2090static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2065 struct packet_type **pt_prev, int *ret, 2091 struct packet_type **pt_prev, int *ret,
2066 struct net_device *orig_dev) 2092 struct net_device *orig_dev)
@@ -2284,8 +2310,6 @@ ncls:
2284 if (!skb) 2310 if (!skb)
2285 goto out; 2311 goto out;
2286 2312
2287 skb_orphan(skb);
2288
2289 type = skb->protocol; 2313 type = skb->protocol;
2290 list_for_each_entry_rcu(ptype, 2314 list_for_each_entry_rcu(ptype,
2291 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2315 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2374,26 +2398,6 @@ void napi_gro_flush(struct napi_struct *napi)
2374} 2398}
2375EXPORT_SYMBOL(napi_gro_flush); 2399EXPORT_SYMBOL(napi_gro_flush);
2376 2400
2377void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2378{
2379 unsigned int offset = skb_gro_offset(skb);
2380
2381 hlen += offset;
2382 if (hlen <= skb_headlen(skb))
2383 return skb->data + offset;
2384
2385 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2386 skb_shinfo(skb)->frags[0].size <=
2387 hlen - skb_headlen(skb) ||
2388 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2389 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2390
2391 return page_address(skb_shinfo(skb)->frags[0].page) +
2392 skb_shinfo(skb)->frags[0].page_offset +
2393 offset - skb_headlen(skb);
2394}
2395EXPORT_SYMBOL(skb_gro_header);
2396
2397int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2401int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2398{ 2402{
2399 struct sk_buff **pp = NULL; 2403 struct sk_buff **pp = NULL;
@@ -2407,7 +2411,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2407 if (!(skb->dev->features & NETIF_F_GRO)) 2411 if (!(skb->dev->features & NETIF_F_GRO))
2408 goto normal; 2412 goto normal;
2409 2413
2410 if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) 2414 if (skb_is_gso(skb) || skb_has_frags(skb))
2411 goto normal; 2415 goto normal;
2412 2416
2413 rcu_read_lock(); 2417 rcu_read_lock();
@@ -2456,10 +2460,25 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2456 ret = GRO_HELD; 2460 ret = GRO_HELD;
2457 2461
2458pull: 2462pull:
2459 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { 2463 if (skb_headlen(skb) < skb_gro_offset(skb)) {
2460 if (napi->gro_list == skb) 2464 int grow = skb_gro_offset(skb) - skb_headlen(skb);
2461 napi->gro_list = skb->next; 2465
2462 ret = GRO_DROP; 2466 BUG_ON(skb->end - skb->tail < grow);
2467
2468 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2469
2470 skb->tail += grow;
2471 skb->data_len -= grow;
2472
2473 skb_shinfo(skb)->frags[0].page_offset += grow;
2474 skb_shinfo(skb)->frags[0].size -= grow;
2475
2476 if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2477 put_page(skb_shinfo(skb)->frags[0].page);
2478 memmove(skb_shinfo(skb)->frags,
2479 skb_shinfo(skb)->frags + 1,
2480 --skb_shinfo(skb)->nr_frags);
2481 }
2463 } 2482 }
2464 2483
2465ok: 2484ok:
@@ -2509,6 +2528,22 @@ int napi_skb_finish(int ret, struct sk_buff *skb)
2509} 2528}
2510EXPORT_SYMBOL(napi_skb_finish); 2529EXPORT_SYMBOL(napi_skb_finish);
2511 2530
2531void skb_gro_reset_offset(struct sk_buff *skb)
2532{
2533 NAPI_GRO_CB(skb)->data_offset = 0;
2534 NAPI_GRO_CB(skb)->frag0 = NULL;
2535 NAPI_GRO_CB(skb)->frag0_len = 0;
2536
2537 if (skb->mac_header == skb->tail &&
2538 !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
2539 NAPI_GRO_CB(skb)->frag0 =
2540 page_address(skb_shinfo(skb)->frags[0].page) +
2541 skb_shinfo(skb)->frags[0].page_offset;
2542 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2543 }
2544}
2545EXPORT_SYMBOL(skb_gro_reset_offset);
2546
2512int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2547int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2513{ 2548{
2514 skb_gro_reset_offset(skb); 2549 skb_gro_reset_offset(skb);
@@ -2526,16 +2561,10 @@ void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2526} 2561}
2527EXPORT_SYMBOL(napi_reuse_skb); 2562EXPORT_SYMBOL(napi_reuse_skb);
2528 2563
2529struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, 2564struct sk_buff *napi_get_frags(struct napi_struct *napi)
2530 struct napi_gro_fraginfo *info)
2531{ 2565{
2532 struct net_device *dev = napi->dev; 2566 struct net_device *dev = napi->dev;
2533 struct sk_buff *skb = napi->skb; 2567 struct sk_buff *skb = napi->skb;
2534 struct ethhdr *eth;
2535 skb_frag_t *frag;
2536 int i;
2537
2538 napi->skb = NULL;
2539 2568
2540 if (!skb) { 2569 if (!skb) {
2541 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); 2570 skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
@@ -2543,47 +2572,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2543 goto out; 2572 goto out;
2544 2573
2545 skb_reserve(skb, NET_IP_ALIGN); 2574 skb_reserve(skb, NET_IP_ALIGN);
2546 }
2547
2548 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2549 frag = info->frags;
2550
2551 for (i = 0; i < info->nr_frags; i++) {
2552 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2553 frag->size);
2554 frag++;
2555 }
2556 skb_shinfo(skb)->nr_frags = info->nr_frags;
2557
2558 skb->data_len = info->len;
2559 skb->len += info->len;
2560 skb->truesize += info->len;
2561 2575
2562 skb_reset_mac_header(skb); 2576 napi->skb = skb;
2563 skb_gro_reset_offset(skb);
2564
2565 eth = skb_gro_header(skb, sizeof(*eth));
2566 if (!eth) {
2567 napi_reuse_skb(napi, skb);
2568 skb = NULL;
2569 goto out;
2570 } 2577 }
2571 2578
2572 skb_gro_pull(skb, sizeof(*eth));
2573
2574 /*
2575 * This works because the only protocols we care about don't require
2576 * special handling. We'll fix it up properly at the end.
2577 */
2578 skb->protocol = eth->h_proto;
2579
2580 skb->ip_summed = info->ip_summed;
2581 skb->csum = info->csum;
2582
2583out: 2579out:
2584 return skb; 2580 return skb;
2585} 2581}
2586EXPORT_SYMBOL(napi_fraginfo_skb); 2582EXPORT_SYMBOL(napi_get_frags);
2587 2583
2588int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) 2584int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2589{ 2585{
@@ -2613,9 +2609,46 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2613} 2609}
2614EXPORT_SYMBOL(napi_frags_finish); 2610EXPORT_SYMBOL(napi_frags_finish);
2615 2611
2616int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) 2612struct sk_buff *napi_frags_skb(struct napi_struct *napi)
2617{ 2613{
2618 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 2614 struct sk_buff *skb = napi->skb;
2615 struct ethhdr *eth;
2616 unsigned int hlen;
2617 unsigned int off;
2618
2619 napi->skb = NULL;
2620
2621 skb_reset_mac_header(skb);
2622 skb_gro_reset_offset(skb);
2623
2624 off = skb_gro_offset(skb);
2625 hlen = off + sizeof(*eth);
2626 eth = skb_gro_header_fast(skb, off);
2627 if (skb_gro_header_hard(skb, hlen)) {
2628 eth = skb_gro_header_slow(skb, hlen, off);
2629 if (unlikely(!eth)) {
2630 napi_reuse_skb(napi, skb);
2631 skb = NULL;
2632 goto out;
2633 }
2634 }
2635
2636 skb_gro_pull(skb, sizeof(*eth));
2637
2638 /*
2639 * This works because the only protocols we care about don't require
2640 * special handling. We'll fix it up properly at the end.
2641 */
2642 skb->protocol = eth->h_proto;
2643
2644out:
2645 return skb;
2646}
2647EXPORT_SYMBOL(napi_frags_skb);
2648
2649int napi_gro_frags(struct napi_struct *napi)
2650{
2651 struct sk_buff *skb = napi_frags_skb(napi);
2619 2652
2620 if (!skb) 2653 if (!skb)
2621 return NET_RX_DROP; 2654 return NET_RX_DROP;
@@ -2719,7 +2752,7 @@ void netif_napi_del(struct napi_struct *napi)
2719 struct sk_buff *skb, *next; 2752 struct sk_buff *skb, *next;
2720 2753
2721 list_del_init(&napi->dev_list); 2754 list_del_init(&napi->dev_list);
2722 kfree_skb(napi->skb); 2755 napi_free_frags(napi);
2723 2756
2724 for (skb = napi->gro_list; skb; skb = next) { 2757 for (skb = napi->gro_list; skb; skb = next) {
2725 next = skb->next; 2758 next = skb->next;
@@ -2773,8 +2806,10 @@ static void net_rx_action(struct softirq_action *h)
2773 * accidently calling ->poll() when NAPI is not scheduled. 2806 * accidently calling ->poll() when NAPI is not scheduled.
2774 */ 2807 */
2775 work = 0; 2808 work = 0;
2776 if (test_bit(NAPI_STATE_SCHED, &n->state)) 2809 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
2777 work = n->poll(n, weight); 2810 work = n->poll(n, weight);
2811 trace_napi_poll(n);
2812 }
2778 2813
2779 WARN_ON_ONCE(work > weight); 2814 WARN_ON_ONCE(work > weight);
2780 2815
@@ -3424,10 +3459,10 @@ void __dev_set_rx_mode(struct net_device *dev)
3424 /* Unicast addresses changes may only happen under the rtnl, 3459 /* Unicast addresses changes may only happen under the rtnl,
3425 * therefore calling __dev_set_promiscuity here is safe. 3460 * therefore calling __dev_set_promiscuity here is safe.
3426 */ 3461 */
3427 if (dev->uc_count > 0 && !dev->uc_promisc) { 3462 if (dev->uc.count > 0 && !dev->uc_promisc) {
3428 __dev_set_promiscuity(dev, 1); 3463 __dev_set_promiscuity(dev, 1);
3429 dev->uc_promisc = 1; 3464 dev->uc_promisc = 1;
3430 } else if (dev->uc_count == 0 && dev->uc_promisc) { 3465 } else if (dev->uc.count == 0 && dev->uc_promisc) {
3431 __dev_set_promiscuity(dev, -1); 3466 __dev_set_promiscuity(dev, -1);
3432 dev->uc_promisc = 0; 3467 dev->uc_promisc = 0;
3433 } 3468 }
@@ -3444,6 +3479,316 @@ void dev_set_rx_mode(struct net_device *dev)
3444 netif_addr_unlock_bh(dev); 3479 netif_addr_unlock_bh(dev);
3445} 3480}
3446 3481
3482/* hw addresses list handling functions */
3483
3484static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3485 int addr_len, unsigned char addr_type)
3486{
3487 struct netdev_hw_addr *ha;
3488 int alloc_size;
3489
3490 if (addr_len > MAX_ADDR_LEN)
3491 return -EINVAL;
3492
3493 list_for_each_entry(ha, &list->list, list) {
3494 if (!memcmp(ha->addr, addr, addr_len) &&
3495 ha->type == addr_type) {
3496 ha->refcount++;
3497 return 0;
3498 }
3499 }
3500
3501
3502 alloc_size = sizeof(*ha);
3503 if (alloc_size < L1_CACHE_BYTES)
3504 alloc_size = L1_CACHE_BYTES;
3505 ha = kmalloc(alloc_size, GFP_ATOMIC);
3506 if (!ha)
3507 return -ENOMEM;
3508 memcpy(ha->addr, addr, addr_len);
3509 ha->type = addr_type;
3510 ha->refcount = 1;
3511 ha->synced = false;
3512 list_add_tail_rcu(&ha->list, &list->list);
3513 list->count++;
3514 return 0;
3515}
3516
3517static void ha_rcu_free(struct rcu_head *head)
3518{
3519 struct netdev_hw_addr *ha;
3520
3521 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3522 kfree(ha);
3523}
3524
3525static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3526 int addr_len, unsigned char addr_type)
3527{
3528 struct netdev_hw_addr *ha;
3529
3530 list_for_each_entry(ha, &list->list, list) {
3531 if (!memcmp(ha->addr, addr, addr_len) &&
3532 (ha->type == addr_type || !addr_type)) {
3533 if (--ha->refcount)
3534 return 0;
3535 list_del_rcu(&ha->list);
3536 call_rcu(&ha->rcu_head, ha_rcu_free);
3537 list->count--;
3538 return 0;
3539 }
3540 }
3541 return -ENOENT;
3542}
3543
3544static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3545 struct netdev_hw_addr_list *from_list,
3546 int addr_len,
3547 unsigned char addr_type)
3548{
3549 int err;
3550 struct netdev_hw_addr *ha, *ha2;
3551 unsigned char type;
3552
3553 list_for_each_entry(ha, &from_list->list, list) {
3554 type = addr_type ? addr_type : ha->type;
3555 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3556 if (err)
3557 goto unroll;
3558 }
3559 return 0;
3560
3561unroll:
3562 list_for_each_entry(ha2, &from_list->list, list) {
3563 if (ha2 == ha)
3564 break;
3565 type = addr_type ? addr_type : ha2->type;
3566 __hw_addr_del(to_list, ha2->addr, addr_len, type);
3567 }
3568 return err;
3569}
3570
3571static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3572 struct netdev_hw_addr_list *from_list,
3573 int addr_len,
3574 unsigned char addr_type)
3575{
3576 struct netdev_hw_addr *ha;
3577 unsigned char type;
3578
3579 list_for_each_entry(ha, &from_list->list, list) {
3580 type = addr_type ? addr_type : ha->type;
3581 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3582 }
3583}
3584
3585static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3586 struct netdev_hw_addr_list *from_list,
3587 int addr_len)
3588{
3589 int err = 0;
3590 struct netdev_hw_addr *ha, *tmp;
3591
3592 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3593 if (!ha->synced) {
3594 err = __hw_addr_add(to_list, ha->addr,
3595 addr_len, ha->type);
3596 if (err)
3597 break;
3598 ha->synced = true;
3599 ha->refcount++;
3600 } else if (ha->refcount == 1) {
3601 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3602 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3603 }
3604 }
3605 return err;
3606}
3607
3608static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3609 struct netdev_hw_addr_list *from_list,
3610 int addr_len)
3611{
3612 struct netdev_hw_addr *ha, *tmp;
3613
3614 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3615 if (ha->synced) {
3616 __hw_addr_del(to_list, ha->addr,
3617 addr_len, ha->type);
3618 ha->synced = false;
3619 __hw_addr_del(from_list, ha->addr,
3620 addr_len, ha->type);
3621 }
3622 }
3623}
3624
3625static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3626{
3627 struct netdev_hw_addr *ha, *tmp;
3628
3629 list_for_each_entry_safe(ha, tmp, &list->list, list) {
3630 list_del_rcu(&ha->list);
3631 call_rcu(&ha->rcu_head, ha_rcu_free);
3632 }
3633 list->count = 0;
3634}
3635
3636static void __hw_addr_init(struct netdev_hw_addr_list *list)
3637{
3638 INIT_LIST_HEAD(&list->list);
3639 list->count = 0;
3640}
3641
3642/* Device addresses handling functions */
3643
3644static void dev_addr_flush(struct net_device *dev)
3645{
3646 /* rtnl_mutex must be held here */
3647
3648 __hw_addr_flush(&dev->dev_addrs);
3649 dev->dev_addr = NULL;
3650}
3651
3652static int dev_addr_init(struct net_device *dev)
3653{
3654 unsigned char addr[MAX_ADDR_LEN];
3655 struct netdev_hw_addr *ha;
3656 int err;
3657
3658 /* rtnl_mutex must be held here */
3659
3660 __hw_addr_init(&dev->dev_addrs);
3661 memset(addr, 0, sizeof(addr));
3662 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3663 NETDEV_HW_ADDR_T_LAN);
3664 if (!err) {
3665 /*
3666 * Get the first (previously created) address from the list
3667 * and set dev_addr pointer to this location.
3668 */
3669 ha = list_first_entry(&dev->dev_addrs.list,
3670 struct netdev_hw_addr, list);
3671 dev->dev_addr = ha->addr;
3672 }
3673 return err;
3674}
3675
3676/**
3677 * dev_addr_add - Add a device address
3678 * @dev: device
3679 * @addr: address to add
3680 * @addr_type: address type
3681 *
3682 * Add a device address to the device or increase the reference count if
3683 * it already exists.
3684 *
3685 * The caller must hold the rtnl_mutex.
3686 */
3687int dev_addr_add(struct net_device *dev, unsigned char *addr,
3688 unsigned char addr_type)
3689{
3690 int err;
3691
3692 ASSERT_RTNL();
3693
3694 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3695 if (!err)
3696 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3697 return err;
3698}
3699EXPORT_SYMBOL(dev_addr_add);
3700
3701/**
3702 * dev_addr_del - Release a device address.
3703 * @dev: device
3704 * @addr: address to delete
3705 * @addr_type: address type
3706 *
3707 * Release reference to a device address and remove it from the device
3708 * if the reference count drops to zero.
3709 *
3710 * The caller must hold the rtnl_mutex.
3711 */
3712int dev_addr_del(struct net_device *dev, unsigned char *addr,
3713 unsigned char addr_type)
3714{
3715 int err;
3716 struct netdev_hw_addr *ha;
3717
3718 ASSERT_RTNL();
3719
3720 /*
3721 * We can not remove the first address from the list because
3722 * dev->dev_addr points to that.
3723 */
3724 ha = list_first_entry(&dev->dev_addrs.list,
3725 struct netdev_hw_addr, list);
3726 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3727 return -ENOENT;
3728
3729 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3730 addr_type);
3731 if (!err)
3732 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3733 return err;
3734}
3735EXPORT_SYMBOL(dev_addr_del);
3736
3737/**
3738 * dev_addr_add_multiple - Add device addresses from another device
3739 * @to_dev: device to which addresses will be added
3740 * @from_dev: device from which addresses will be added
3741 * @addr_type: address type - 0 means type will be used from from_dev
3742 *
3743 * Add device addresses of the one device to another.
3744 **
3745 * The caller must hold the rtnl_mutex.
3746 */
3747int dev_addr_add_multiple(struct net_device *to_dev,
3748 struct net_device *from_dev,
3749 unsigned char addr_type)
3750{
3751 int err;
3752
3753 ASSERT_RTNL();
3754
3755 if (from_dev->addr_len != to_dev->addr_len)
3756 return -EINVAL;
3757 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3758 to_dev->addr_len, addr_type);
3759 if (!err)
3760 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3761 return err;
3762}
3763EXPORT_SYMBOL(dev_addr_add_multiple);
3764
3765/**
3766 * dev_addr_del_multiple - Delete device addresses by another device
3767 * @to_dev: device where the addresses will be deleted
3768 * @from_dev: device by which addresses the addresses will be deleted
3769 * @addr_type: address type - 0 means type will used from from_dev
3770 *
3771 * Deletes addresses in to device by the list of addresses in from device.
3772 *
3773 * The caller must hold the rtnl_mutex.
3774 */
3775int dev_addr_del_multiple(struct net_device *to_dev,
3776 struct net_device *from_dev,
3777 unsigned char addr_type)
3778{
3779 ASSERT_RTNL();
3780
3781 if (from_dev->addr_len != to_dev->addr_len)
3782 return -EINVAL;
3783 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
3784 to_dev->addr_len, addr_type);
3785 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3786 return 0;
3787}
3788EXPORT_SYMBOL(dev_addr_del_multiple);
3789
3790/* multicast addresses handling functions */
3791
3447int __dev_addr_delete(struct dev_addr_list **list, int *count, 3792int __dev_addr_delete(struct dev_addr_list **list, int *count,
3448 void *addr, int alen, int glbl) 3793 void *addr, int alen, int glbl)
3449{ 3794{
@@ -3506,24 +3851,22 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
3506 * dev_unicast_delete - Release secondary unicast address. 3851 * dev_unicast_delete - Release secondary unicast address.
3507 * @dev: device 3852 * @dev: device
3508 * @addr: address to delete 3853 * @addr: address to delete
3509 * @alen: length of @addr
3510 * 3854 *
3511 * Release reference to a secondary unicast address and remove it 3855 * Release reference to a secondary unicast address and remove it
3512 * from the device if the reference count drops to zero. 3856 * from the device if the reference count drops to zero.
3513 * 3857 *
3514 * The caller must hold the rtnl_mutex. 3858 * The caller must hold the rtnl_mutex.
3515 */ 3859 */
3516int dev_unicast_delete(struct net_device *dev, void *addr, int alen) 3860int dev_unicast_delete(struct net_device *dev, void *addr)
3517{ 3861{
3518 int err; 3862 int err;
3519 3863
3520 ASSERT_RTNL(); 3864 ASSERT_RTNL();
3521 3865
3522 netif_addr_lock_bh(dev); 3866 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3523 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3867 NETDEV_HW_ADDR_T_UNICAST);
3524 if (!err) 3868 if (!err)
3525 __dev_set_rx_mode(dev); 3869 __dev_set_rx_mode(dev);
3526 netif_addr_unlock_bh(dev);
3527 return err; 3870 return err;
3528} 3871}
3529EXPORT_SYMBOL(dev_unicast_delete); 3872EXPORT_SYMBOL(dev_unicast_delete);
@@ -3532,24 +3875,22 @@ EXPORT_SYMBOL(dev_unicast_delete);
3532 * dev_unicast_add - add a secondary unicast address 3875 * dev_unicast_add - add a secondary unicast address
3533 * @dev: device 3876 * @dev: device
3534 * @addr: address to add 3877 * @addr: address to add
3535 * @alen: length of @addr
3536 * 3878 *
3537 * Add a secondary unicast address to the device or increase 3879 * Add a secondary unicast address to the device or increase
3538 * the reference count if it already exists. 3880 * the reference count if it already exists.
3539 * 3881 *
3540 * The caller must hold the rtnl_mutex. 3882 * The caller must hold the rtnl_mutex.
3541 */ 3883 */
3542int dev_unicast_add(struct net_device *dev, void *addr, int alen) 3884int dev_unicast_add(struct net_device *dev, void *addr)
3543{ 3885{
3544 int err; 3886 int err;
3545 3887
3546 ASSERT_RTNL(); 3888 ASSERT_RTNL();
3547 3889
3548 netif_addr_lock_bh(dev); 3890 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
3549 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3891 NETDEV_HW_ADDR_T_UNICAST);
3550 if (!err) 3892 if (!err)
3551 __dev_set_rx_mode(dev); 3893 __dev_set_rx_mode(dev);
3552 netif_addr_unlock_bh(dev);
3553 return err; 3894 return err;
3554} 3895}
3555EXPORT_SYMBOL(dev_unicast_add); 3896EXPORT_SYMBOL(dev_unicast_add);
@@ -3606,8 +3947,7 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3606 * @from: source device 3947 * @from: source device
3607 * 3948 *
3608 * Add newly added addresses to the destination device and release 3949 * Add newly added addresses to the destination device and release
3609 * addresses that have no users left. The source device must be 3950 * addresses that have no users left.
3610 * locked by netif_tx_lock_bh.
3611 * 3951 *
3612 * This function is intended to be called from the dev->set_rx_mode 3952 * This function is intended to be called from the dev->set_rx_mode
3613 * function of layered software devices. 3953 * function of layered software devices.
@@ -3616,12 +3956,14 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
3616{ 3956{
3617 int err = 0; 3957 int err = 0;
3618 3958
3619 netif_addr_lock_bh(to); 3959 ASSERT_RTNL();
3620 err = __dev_addr_sync(&to->uc_list, &to->uc_count, 3960
3621 &from->uc_list, &from->uc_count); 3961 if (to->addr_len != from->addr_len)
3962 return -EINVAL;
3963
3964 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
3622 if (!err) 3965 if (!err)
3623 __dev_set_rx_mode(to); 3966 __dev_set_rx_mode(to);
3624 netif_addr_unlock_bh(to);
3625 return err; 3967 return err;
3626} 3968}
3627EXPORT_SYMBOL(dev_unicast_sync); 3969EXPORT_SYMBOL(dev_unicast_sync);
@@ -3637,18 +3979,31 @@ EXPORT_SYMBOL(dev_unicast_sync);
3637 */ 3979 */
3638void dev_unicast_unsync(struct net_device *to, struct net_device *from) 3980void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3639{ 3981{
3640 netif_addr_lock_bh(from); 3982 ASSERT_RTNL();
3641 netif_addr_lock(to);
3642 3983
3643 __dev_addr_unsync(&to->uc_list, &to->uc_count, 3984 if (to->addr_len != from->addr_len)
3644 &from->uc_list, &from->uc_count); 3985 return;
3645 __dev_set_rx_mode(to);
3646 3986
3647 netif_addr_unlock(to); 3987 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
3648 netif_addr_unlock_bh(from); 3988 __dev_set_rx_mode(to);
3649} 3989}
3650EXPORT_SYMBOL(dev_unicast_unsync); 3990EXPORT_SYMBOL(dev_unicast_unsync);
3651 3991
3992static void dev_unicast_flush(struct net_device *dev)
3993{
3994 /* rtnl_mutex must be held here */
3995
3996 __hw_addr_flush(&dev->uc);
3997}
3998
3999static void dev_unicast_init(struct net_device *dev)
4000{
4001 /* rtnl_mutex must be held here */
4002
4003 __hw_addr_init(&dev->uc);
4004}
4005
4006
3652static void __dev_addr_discard(struct dev_addr_list **list) 4007static void __dev_addr_discard(struct dev_addr_list **list)
3653{ 4008{
3654 struct dev_addr_list *tmp; 4009 struct dev_addr_list *tmp;
@@ -3667,9 +4022,6 @@ static void dev_addr_discard(struct net_device *dev)
3667{ 4022{
3668 netif_addr_lock_bh(dev); 4023 netif_addr_lock_bh(dev);
3669 4024
3670 __dev_addr_discard(&dev->uc_list);
3671 dev->uc_count = 0;
3672
3673 __dev_addr_discard(&dev->mc_list); 4025 __dev_addr_discard(&dev->mc_list);
3674 dev->mc_count = 0; 4026 dev->mc_count = 0;
3675 4027
@@ -3853,7 +4205,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
3853 4205
3854 switch (cmd) { 4206 switch (cmd) {
3855 case SIOCGIFFLAGS: /* Get interface flags */ 4207 case SIOCGIFFLAGS: /* Get interface flags */
3856 ifr->ifr_flags = dev_get_flags(dev); 4208 ifr->ifr_flags = (short) dev_get_flags(dev);
3857 return 0; 4209 return 0;
3858 4210
3859 case SIOCGIFMETRIC: /* Get the metric on the interface 4211 case SIOCGIFMETRIC: /* Get the metric on the interface
@@ -4262,6 +4614,7 @@ static void rollback_registered(struct net_device *dev)
4262 /* 4614 /*
4263 * Flush the unicast and multicast chains 4615 * Flush the unicast and multicast chains
4264 */ 4616 */
4617 dev_unicast_flush(dev);
4265 dev_addr_discard(dev); 4618 dev_addr_discard(dev);
4266 4619
4267 if (dev->netdev_ops->ndo_uninit) 4620 if (dev->netdev_ops->ndo_uninit)
@@ -4333,39 +4686,6 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
4333} 4686}
4334EXPORT_SYMBOL(netdev_fix_features); 4687EXPORT_SYMBOL(netdev_fix_features);
4335 4688
4336/* Some devices need to (re-)set their netdev_ops inside
4337 * ->init() or similar. If that happens, we have to setup
4338 * the compat pointers again.
4339 */
4340void netdev_resync_ops(struct net_device *dev)
4341{
4342#ifdef CONFIG_COMPAT_NET_DEV_OPS
4343 const struct net_device_ops *ops = dev->netdev_ops;
4344
4345 dev->init = ops->ndo_init;
4346 dev->uninit = ops->ndo_uninit;
4347 dev->open = ops->ndo_open;
4348 dev->change_rx_flags = ops->ndo_change_rx_flags;
4349 dev->set_rx_mode = ops->ndo_set_rx_mode;
4350 dev->set_multicast_list = ops->ndo_set_multicast_list;
4351 dev->set_mac_address = ops->ndo_set_mac_address;
4352 dev->validate_addr = ops->ndo_validate_addr;
4353 dev->do_ioctl = ops->ndo_do_ioctl;
4354 dev->set_config = ops->ndo_set_config;
4355 dev->change_mtu = ops->ndo_change_mtu;
4356 dev->neigh_setup = ops->ndo_neigh_setup;
4357 dev->tx_timeout = ops->ndo_tx_timeout;
4358 dev->get_stats = ops->ndo_get_stats;
4359 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4360 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4361 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4362#ifdef CONFIG_NET_POLL_CONTROLLER
4363 dev->poll_controller = ops->ndo_poll_controller;
4364#endif
4365#endif
4366}
4367EXPORT_SYMBOL(netdev_resync_ops);
4368
4369/** 4689/**
4370 * register_netdevice - register a network device 4690 * register_netdevice - register a network device
4371 * @dev: device to register 4691 * @dev: device to register
@@ -4405,23 +4725,6 @@ int register_netdevice(struct net_device *dev)
4405 4725
4406 dev->iflink = -1; 4726 dev->iflink = -1;
4407 4727
4408#ifdef CONFIG_COMPAT_NET_DEV_OPS
4409 /* Netdevice_ops API compatibility support.
4410 * This is temporary until all network devices are converted.
4411 */
4412 if (dev->netdev_ops) {
4413 netdev_resync_ops(dev);
4414 } else {
4415 char drivername[64];
4416 pr_info("%s (%s): not using net_device_ops yet\n",
4417 dev->name, netdev_drivername(dev, drivername, 64));
4418
4419 /* This works only because net_device_ops and the
4420 compatibility structure are the same. */
4421 dev->netdev_ops = (void *) &(dev->init);
4422 }
4423#endif
4424
4425 /* Init, if this function is available */ 4728 /* Init, if this function is available */
4426 if (dev->netdev_ops->ndo_init) { 4729 if (dev->netdev_ops->ndo_init) {
4427 ret = dev->netdev_ops->ndo_init(dev); 4730 ret = dev->netdev_ops->ndo_init(dev);
@@ -4707,13 +5010,30 @@ void netdev_run_todo(void)
4707 * the internal statistics structure is used. 5010 * the internal statistics structure is used.
4708 */ 5011 */
4709const struct net_device_stats *dev_get_stats(struct net_device *dev) 5012const struct net_device_stats *dev_get_stats(struct net_device *dev)
4710 { 5013{
4711 const struct net_device_ops *ops = dev->netdev_ops; 5014 const struct net_device_ops *ops = dev->netdev_ops;
4712 5015
4713 if (ops->ndo_get_stats) 5016 if (ops->ndo_get_stats)
4714 return ops->ndo_get_stats(dev); 5017 return ops->ndo_get_stats(dev);
4715 else 5018 else {
4716 return &dev->stats; 5019 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5020 struct net_device_stats *stats = &dev->stats;
5021 unsigned int i;
5022 struct netdev_queue *txq;
5023
5024 for (i = 0; i < dev->num_tx_queues; i++) {
5025 txq = netdev_get_tx_queue(dev, i);
5026 tx_bytes += txq->tx_bytes;
5027 tx_packets += txq->tx_packets;
5028 tx_dropped += txq->tx_dropped;
5029 }
5030 if (tx_bytes || tx_packets || tx_dropped) {
5031 stats->tx_bytes = tx_bytes;
5032 stats->tx_packets = tx_packets;
5033 stats->tx_dropped = tx_dropped;
5034 }
5035 return stats;
5036 }
4717} 5037}
4718EXPORT_SYMBOL(dev_get_stats); 5038EXPORT_SYMBOL(dev_get_stats);
4719 5039
@@ -4748,18 +5068,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4748 struct netdev_queue *tx; 5068 struct netdev_queue *tx;
4749 struct net_device *dev; 5069 struct net_device *dev;
4750 size_t alloc_size; 5070 size_t alloc_size;
4751 void *p; 5071 struct net_device *p;
4752 5072
4753 BUG_ON(strlen(name) >= sizeof(dev->name)); 5073 BUG_ON(strlen(name) >= sizeof(dev->name));
4754 5074
4755 alloc_size = sizeof(struct net_device); 5075 alloc_size = sizeof(struct net_device);
4756 if (sizeof_priv) { 5076 if (sizeof_priv) {
4757 /* ensure 32-byte alignment of private area */ 5077 /* ensure 32-byte alignment of private area */
4758 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 5078 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
4759 alloc_size += sizeof_priv; 5079 alloc_size += sizeof_priv;
4760 } 5080 }
4761 /* ensure 32-byte alignment of whole construct */ 5081 /* ensure 32-byte alignment of whole construct */
4762 alloc_size += NETDEV_ALIGN_CONST; 5082 alloc_size += NETDEV_ALIGN - 1;
4763 5083
4764 p = kzalloc(alloc_size, GFP_KERNEL); 5084 p = kzalloc(alloc_size, GFP_KERNEL);
4765 if (!p) { 5085 if (!p) {
@@ -4771,13 +5091,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4771 if (!tx) { 5091 if (!tx) {
4772 printk(KERN_ERR "alloc_netdev: Unable to allocate " 5092 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4773 "tx qdiscs.\n"); 5093 "tx qdiscs.\n");
4774 kfree(p); 5094 goto free_p;
4775 return NULL;
4776 } 5095 }
4777 5096
4778 dev = (struct net_device *) 5097 dev = PTR_ALIGN(p, NETDEV_ALIGN);
4779 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4780 dev->padded = (char *)dev - (char *)p; 5098 dev->padded = (char *)dev - (char *)p;
5099
5100 if (dev_addr_init(dev))
5101 goto free_tx;
5102
5103 dev_unicast_init(dev);
5104
4781 dev_net_set(dev, &init_net); 5105 dev_net_set(dev, &init_net);
4782 5106
4783 dev->_tx = tx; 5107 dev->_tx = tx;
@@ -4789,9 +5113,17 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4789 netdev_init_queues(dev); 5113 netdev_init_queues(dev);
4790 5114
4791 INIT_LIST_HEAD(&dev->napi_list); 5115 INIT_LIST_HEAD(&dev->napi_list);
5116 dev->priv_flags = IFF_XMIT_DST_RELEASE;
4792 setup(dev); 5117 setup(dev);
4793 strcpy(dev->name, name); 5118 strcpy(dev->name, name);
4794 return dev; 5119 return dev;
5120
5121free_tx:
5122 kfree(tx);
5123
5124free_p:
5125 kfree(p);
5126 return NULL;
4795} 5127}
4796EXPORT_SYMBOL(alloc_netdev_mq); 5128EXPORT_SYMBOL(alloc_netdev_mq);
4797 5129
@@ -4811,6 +5143,9 @@ void free_netdev(struct net_device *dev)
4811 5143
4812 kfree(dev->_tx); 5144 kfree(dev->_tx);
4813 5145
5146 /* Flush device addresses */
5147 dev_addr_flush(dev);
5148
4814 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5149 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
4815 netif_napi_del(p); 5150 netif_napi_del(p);
4816 5151
@@ -4970,6 +5305,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
4970 /* 5305 /*
4971 * Flush the unicast and multicast chains 5306 * Flush the unicast and multicast chains
4972 */ 5307 */
5308 dev_unicast_flush(dev);
4973 dev_addr_discard(dev); 5309 dev_addr_discard(dev);
4974 5310
4975 netdev_unregister_kobject(dev); 5311 netdev_unregister_kobject(dev);
@@ -5325,12 +5661,6 @@ EXPORT_SYMBOL(net_enable_timestamp);
5325EXPORT_SYMBOL(net_disable_timestamp); 5661EXPORT_SYMBOL(net_disable_timestamp);
5326EXPORT_SYMBOL(dev_get_flags); 5662EXPORT_SYMBOL(dev_get_flags);
5327 5663
5328#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5329EXPORT_SYMBOL(br_handle_frame_hook);
5330EXPORT_SYMBOL(br_fdb_get_hook);
5331EXPORT_SYMBOL(br_fdb_put_hook);
5332#endif
5333
5334EXPORT_SYMBOL(dev_load); 5664EXPORT_SYMBOL(dev_load);
5335 5665
5336EXPORT_PER_CPU_SYMBOL(softnet_data); 5666EXPORT_PER_CPU_SYMBOL(softnet_data);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9fd0dc3cca99..9d66fa953ab7 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -22,8 +22,10 @@
22#include <linux/timer.h> 22#include <linux/timer.h>
23#include <linux/bitops.h> 23#include <linux/bitops.h>
24#include <net/genetlink.h> 24#include <net/genetlink.h>
25#include <net/netevent.h>
25 26
26#include <trace/skb.h> 27#include <trace/events/skb.h>
28#include <trace/events/napi.h>
27 29
28#include <asm/unaligned.h> 30#include <asm/unaligned.h>
29 31
@@ -38,7 +40,8 @@ static void send_dm_alert(struct work_struct *unused);
38 * and the work handle that will send up 40 * and the work handle that will send up
39 * netlink alerts 41 * netlink alerts
40 */ 42 */
41struct sock *dm_sock; 43static int trace_state = TRACE_OFF;
44static spinlock_t trace_state_lock = SPIN_LOCK_UNLOCKED;
42 45
43struct per_cpu_dm_data { 46struct per_cpu_dm_data {
44 struct work_struct dm_alert_work; 47 struct work_struct dm_alert_work;
@@ -47,11 +50,18 @@ struct per_cpu_dm_data {
47 struct timer_list send_timer; 50 struct timer_list send_timer;
48}; 51};
49 52
53struct dm_hw_stat_delta {
54 struct net_device *dev;
55 struct list_head list;
56 struct rcu_head rcu;
57 unsigned long last_drop_val;
58};
59
50static struct genl_family net_drop_monitor_family = { 60static struct genl_family net_drop_monitor_family = {
51 .id = GENL_ID_GENERATE, 61 .id = GENL_ID_GENERATE,
52 .hdrsize = 0, 62 .hdrsize = 0,
53 .name = "NET_DM", 63 .name = "NET_DM",
54 .version = 1, 64 .version = 2,
55 .maxattr = NET_DM_CMD_MAX, 65 .maxattr = NET_DM_CMD_MAX,
56}; 66};
57 67
@@ -59,19 +69,24 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
59 69
60static int dm_hit_limit = 64; 70static int dm_hit_limit = 64;
61static int dm_delay = 1; 71static int dm_delay = 1;
62 72static unsigned long dm_hw_check_delta = 2*HZ;
73static LIST_HEAD(hw_stats_list);
63 74
64static void reset_per_cpu_data(struct per_cpu_dm_data *data) 75static void reset_per_cpu_data(struct per_cpu_dm_data *data)
65{ 76{
66 size_t al; 77 size_t al;
67 struct net_dm_alert_msg *msg; 78 struct net_dm_alert_msg *msg;
79 struct nlattr *nla;
68 80
69 al = sizeof(struct net_dm_alert_msg); 81 al = sizeof(struct net_dm_alert_msg);
70 al += dm_hit_limit * sizeof(struct net_dm_drop_point); 82 al += dm_hit_limit * sizeof(struct net_dm_drop_point);
83 al += sizeof(struct nlattr);
84
71 data->skb = genlmsg_new(al, GFP_KERNEL); 85 data->skb = genlmsg_new(al, GFP_KERNEL);
72 genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, 86 genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
73 0, NET_DM_CMD_ALERT); 87 0, NET_DM_CMD_ALERT);
74 msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); 88 nla = nla_reserve(data->skb, NLA_UNSPEC, sizeof(struct net_dm_alert_msg));
89 msg = nla_data(nla);
75 memset(msg, 0, al); 90 memset(msg, 0, al);
76 atomic_set(&data->dm_hit_count, dm_hit_limit); 91 atomic_set(&data->dm_hit_count, dm_hit_limit);
77} 92}
@@ -111,10 +126,11 @@ static void sched_send_work(unsigned long unused)
111 schedule_work(&data->dm_alert_work); 126 schedule_work(&data->dm_alert_work);
112} 127}
113 128
114static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 129static void trace_drop_common(struct sk_buff *skb, void *location)
115{ 130{
116 struct net_dm_alert_msg *msg; 131 struct net_dm_alert_msg *msg;
117 struct nlmsghdr *nlh; 132 struct nlmsghdr *nlh;
133 struct nlattr *nla;
118 int i; 134 int i;
119 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); 135 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
120 136
@@ -127,7 +143,8 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
127 } 143 }
128 144
129 nlh = (struct nlmsghdr *)data->skb->data; 145 nlh = (struct nlmsghdr *)data->skb->data;
130 msg = genlmsg_data(nlmsg_data(nlh)); 146 nla = genlmsg_data(nlmsg_data(nlh));
147 msg = nla_data(nla);
131 for (i = 0; i < msg->entries; i++) { 148 for (i = 0; i < msg->entries; i++) {
132 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { 149 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
133 msg->points[i].count++; 150 msg->points[i].count++;
@@ -139,6 +156,7 @@ static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
139 * We need to create a new entry 156 * We need to create a new entry
140 */ 157 */
141 __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); 158 __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
159 nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point));
142 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); 160 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
143 msg->points[msg->entries].count = 1; 161 msg->points[msg->entries].count = 1;
144 msg->entries++; 162 msg->entries++;
@@ -152,24 +170,80 @@ out:
152 return; 170 return;
153} 171}
154 172
173static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
174{
175 trace_drop_common(skb, location);
176}
177
178static void trace_napi_poll_hit(struct napi_struct *napi)
179{
180 struct dm_hw_stat_delta *new_stat;
181
182 /*
183 * Ratelimit our check time to dm_hw_check_delta jiffies
184 */
185 if (!time_after(jiffies, napi->dev->last_rx + dm_hw_check_delta))
186 return;
187
188 rcu_read_lock();
189 list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
190 if ((new_stat->dev == napi->dev) &&
191 (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
192 trace_drop_common(NULL, NULL);
193 new_stat->last_drop_val = napi->dev->stats.rx_dropped;
194 break;
195 }
196 }
197 rcu_read_unlock();
198}
199
200
201static void free_dm_hw_stat(struct rcu_head *head)
202{
203 struct dm_hw_stat_delta *n;
204 n = container_of(head, struct dm_hw_stat_delta, rcu);
205 kfree(n);
206}
207
155static int set_all_monitor_traces(int state) 208static int set_all_monitor_traces(int state)
156{ 209{
157 int rc = 0; 210 int rc = 0;
211 struct dm_hw_stat_delta *new_stat = NULL;
212 struct dm_hw_stat_delta *temp;
213
214 spin_lock(&trace_state_lock);
158 215
159 switch (state) { 216 switch (state) {
160 case TRACE_ON: 217 case TRACE_ON:
161 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 218 rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
219 rc |= register_trace_napi_poll(trace_napi_poll_hit);
162 break; 220 break;
163 case TRACE_OFF: 221 case TRACE_OFF:
164 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 222 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
223 rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
165 224
166 tracepoint_synchronize_unregister(); 225 tracepoint_synchronize_unregister();
226
227 /*
228 * Clean the device list
229 */
230 list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
231 if (new_stat->dev == NULL) {
232 list_del_rcu(&new_stat->list);
233 call_rcu(&new_stat->rcu, free_dm_hw_stat);
234 }
235 }
167 break; 236 break;
168 default: 237 default:
169 rc = 1; 238 rc = 1;
170 break; 239 break;
171 } 240 }
172 241
242 if (!rc)
243 trace_state = state;
244
245 spin_unlock(&trace_state_lock);
246
173 if (rc) 247 if (rc)
174 return -EINPROGRESS; 248 return -EINPROGRESS;
175 return rc; 249 return rc;
@@ -197,6 +271,44 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
197 return -ENOTSUPP; 271 return -ENOTSUPP;
198} 272}
199 273
274static int dropmon_net_event(struct notifier_block *ev_block,
275 unsigned long event, void *ptr)
276{
277 struct net_device *dev = ptr;
278 struct dm_hw_stat_delta *new_stat = NULL;
279 struct dm_hw_stat_delta *tmp;
280
281 switch (event) {
282 case NETDEV_REGISTER:
283 new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
284
285 if (!new_stat)
286 goto out;
287
288 new_stat->dev = dev;
289 INIT_RCU_HEAD(&new_stat->rcu);
290 spin_lock(&trace_state_lock);
291 list_add_rcu(&new_stat->list, &hw_stats_list);
292 spin_unlock(&trace_state_lock);
293 break;
294 case NETDEV_UNREGISTER:
295 spin_lock(&trace_state_lock);
296 list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
297 if (new_stat->dev == dev) {
298 new_stat->dev = NULL;
299 if (trace_state == TRACE_OFF) {
300 list_del_rcu(&new_stat->list);
301 call_rcu(&new_stat->rcu, free_dm_hw_stat);
302 break;
303 }
304 }
305 }
306 spin_unlock(&trace_state_lock);
307 break;
308 }
309out:
310 return NOTIFY_DONE;
311}
200 312
201static struct genl_ops dropmon_ops[] = { 313static struct genl_ops dropmon_ops[] = {
202 { 314 {
@@ -213,6 +325,10 @@ static struct genl_ops dropmon_ops[] = {
213 }, 325 },
214}; 326};
215 327
328static struct notifier_block dropmon_net_notifier = {
329 .notifier_call = dropmon_net_event
330};
331
216static int __init init_net_drop_monitor(void) 332static int __init init_net_drop_monitor(void)
217{ 333{
218 int cpu; 334 int cpu;
@@ -236,12 +352,18 @@ static int __init init_net_drop_monitor(void)
236 ret = genl_register_ops(&net_drop_monitor_family, 352 ret = genl_register_ops(&net_drop_monitor_family,
237 &dropmon_ops[i]); 353 &dropmon_ops[i]);
238 if (ret) { 354 if (ret) {
239 printk(KERN_CRIT "failed to register operation %d\n", 355 printk(KERN_CRIT "Failed to register operation %d\n",
240 dropmon_ops[i].cmd); 356 dropmon_ops[i].cmd);
241 goto out_unreg; 357 goto out_unreg;
242 } 358 }
243 } 359 }
244 360
361 rc = register_netdevice_notifier(&dropmon_net_notifier);
362 if (rc < 0) {
363 printk(KERN_CRIT "Failed to register netdevice notifier\n");
364 goto out_unreg;
365 }
366
245 rc = 0; 367 rc = 0;
246 368
247 for_each_present_cpu(cpu) { 369 for_each_present_cpu(cpu) {
@@ -252,6 +374,7 @@ static int __init init_net_drop_monitor(void)
252 data->send_timer.data = cpu; 374 data->send_timer.data = cpu;
253 data->send_timer.function = sched_send_work; 375 data->send_timer.function = sched_send_work;
254 } 376 }
377
255 goto out; 378 goto out;
256 379
257out_unreg: 380out_unreg:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 98691e1466b8..bd309384f8b8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -299,7 +299,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
299 } else if (rule->action == FR_ACT_GOTO) 299 } else if (rule->action == FR_ACT_GOTO)
300 goto errout_free; 300 goto errout_free;
301 301
302 err = ops->configure(rule, skb, nlh, frh, tb); 302 err = ops->configure(rule, skb, frh, tb);
303 if (err < 0) 303 if (err < 0)
304 goto errout_free; 304 goto errout_free;
305 305
@@ -500,7 +500,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
500 if (rule->target) 500 if (rule->target)
501 NLA_PUT_U32(skb, FRA_GOTO, rule->target); 501 NLA_PUT_U32(skb, FRA_GOTO, rule->target);
502 502
503 if (ops->fill(rule, skb, nlh, frh) < 0) 503 if (ops->fill(rule, skb, frh) < 0)
504 goto nla_put_failure; 504 goto nla_put_failure;
505 505
506 return nlmsg_end(skb, nlh); 506 return nlmsg_end(skb, nlh);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6d62d4618cfc..78e5bfc454ae 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -128,12 +128,12 @@ static void est_timer(unsigned long arg)
128 npackets = e->bstats->packets; 128 npackets = e->bstats->packets;
129 brate = (nbytes - e->last_bytes)<<(7 - idx); 129 brate = (nbytes - e->last_bytes)<<(7 - idx);
130 e->last_bytes = nbytes; 130 e->last_bytes = nbytes;
131 e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log; 131 e->avbps += (brate >> e->ewma_log) - (e->avbps >> e->ewma_log);
132 e->rate_est->bps = (e->avbps+0xF)>>5; 132 e->rate_est->bps = (e->avbps+0xF)>>5;
133 133
134 rate = (npackets - e->last_packets)<<(12 - idx); 134 rate = (npackets - e->last_packets)<<(12 - idx);
135 e->last_packets = npackets; 135 e->last_packets = npackets;
136 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log; 136 e->avpps += (rate >> e->ewma_log) - (e->avpps >> e->ewma_log);
137 e->rate_est->pps = (e->avpps+0x1FF)>>10; 137 e->rate_est->pps = (e->avpps+0x1FF)>>10;
138skip: 138skip:
139 read_unlock(&est_lock); 139 read_unlock(&est_lock);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 4c9c0121c9da..16ad45d4882b 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -98,6 +98,31 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
98} 98}
99 99
100/* 100/*
101 * Copy kernel to iovec. Returns -EFAULT on error.
102 */
103
104int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
105 int offset, int len)
106{
107 int copy;
108 for (; len > 0; ++iov) {
109 /* Skip over the finished iovecs */
110 if (unlikely(offset >= iov->iov_len)) {
111 offset -= iov->iov_len;
112 continue;
113 }
114 copy = min_t(unsigned int, iov->iov_len - offset, len);
115 if (copy_to_user(iov->iov_base + offset, kdata, copy))
116 return -EFAULT;
117 offset = 0;
118 kdata += copy;
119 len -= copy;
120 }
121
122 return 0;
123}
124
125/*
101 * Copy iovec to kernel. Returns -EFAULT on error. 126 * Copy iovec to kernel. Returns -EFAULT on error.
102 * 127 *
103 * Note: this modifies the original iovec. 128 * Note: this modifies the original iovec.
@@ -122,10 +147,11 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
122} 147}
123 148
124/* 149/*
125 * For use with ip_build_xmit 150 * Copy iovec from kernel. Returns -EFAULT on error.
126 */ 151 */
127int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, 152
128 int len) 153int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
154 int offset, int len)
129{ 155{
130 /* Skip over the finished iovecs */ 156 /* Skip over the finished iovecs */
131 while (offset >= iov->iov_len) { 157 while (offset >= iov->iov_len) {
@@ -236,3 +262,4 @@ EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
236EXPORT_SYMBOL(memcpy_fromiovec); 262EXPORT_SYMBOL(memcpy_fromiovec);
237EXPORT_SYMBOL(memcpy_fromiovecend); 263EXPORT_SYMBOL(memcpy_fromiovecend);
238EXPORT_SYMBOL(memcpy_toiovec); 264EXPORT_SYMBOL(memcpy_toiovec);
265EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a1cbce7fdae5..163b4f5b0365 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -771,6 +771,28 @@ static __inline__ int neigh_max_probes(struct neighbour *n)
771 p->ucast_probes + p->app_probes + p->mcast_probes); 771 p->ucast_probes + p->app_probes + p->mcast_probes);
772} 772}
773 773
774static void neigh_invalidate(struct neighbour *neigh)
775{
776 struct sk_buff *skb;
777
778 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
779 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
780 neigh->updated = jiffies;
781
782 /* It is very thin place. report_unreachable is very complicated
783 routine. Particularly, it can hit the same neighbour entry!
784
785 So that, we try to be accurate and avoid dead loop. --ANK
786 */
787 while (neigh->nud_state == NUD_FAILED &&
788 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
789 write_unlock(&neigh->lock);
790 neigh->ops->error_report(neigh, skb);
791 write_lock(&neigh->lock);
792 }
793 skb_queue_purge(&neigh->arp_queue);
794}
795
774/* Called when a timer expires for a neighbour entry. */ 796/* Called when a timer expires for a neighbour entry. */
775 797
776static void neigh_timer_handler(unsigned long arg) 798static void neigh_timer_handler(unsigned long arg)
@@ -835,26 +857,9 @@ static void neigh_timer_handler(unsigned long arg)
835 857
836 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 858 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
837 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 859 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
838 struct sk_buff *skb;
839
840 neigh->nud_state = NUD_FAILED; 860 neigh->nud_state = NUD_FAILED;
841 neigh->updated = jiffies;
842 notify = 1; 861 notify = 1;
843 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 862 neigh_invalidate(neigh);
844 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
845
846 /* It is very thin place. report_unreachable is very complicated
847 routine. Particularly, it can hit the same neighbour entry!
848
849 So that, we try to be accurate and avoid dead loop. --ANK
850 */
851 while (neigh->nud_state == NUD_FAILED &&
852 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
853 write_unlock(&neigh->lock);
854 neigh->ops->error_report(neigh, skb);
855 write_lock(&neigh->lock);
856 }
857 skb_queue_purge(&neigh->arp_queue);
858 } 863 }
859 864
860 if (neigh->nud_state & NUD_IN_TIMER) { 865 if (neigh->nud_state & NUD_IN_TIMER) {
@@ -1001,6 +1006,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1001 neigh->nud_state = new; 1006 neigh->nud_state = new;
1002 err = 0; 1007 err = 0;
1003 notify = old & NUD_VALID; 1008 notify = old & NUD_VALID;
1009 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1010 (new & NUD_FAILED)) {
1011 neigh_invalidate(neigh);
1012 notify = 1;
1013 }
1004 goto out; 1014 goto out;
1005 } 1015 }
1006 1016
@@ -1088,8 +1098,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1088 struct neighbour *n1 = neigh; 1098 struct neighbour *n1 = neigh;
1089 write_unlock_bh(&neigh->lock); 1099 write_unlock_bh(&neigh->lock);
1090 /* On shaper/eql skb->dst->neighbour != neigh :( */ 1100 /* On shaper/eql skb->dst->neighbour != neigh :( */
1091 if (skb->dst && skb->dst->neighbour) 1101 if (skb_dst(skb) && skb_dst(skb)->neighbour)
1092 n1 = skb->dst->neighbour; 1102 n1 = skb_dst(skb)->neighbour;
1093 n1->output(skb); 1103 n1->output(skb);
1094 write_lock_bh(&neigh->lock); 1104 write_lock_bh(&neigh->lock);
1095 } 1105 }
@@ -1182,7 +1192,7 @@ EXPORT_SYMBOL(neigh_compat_output);
1182 1192
1183int neigh_resolve_output(struct sk_buff *skb) 1193int neigh_resolve_output(struct sk_buff *skb)
1184{ 1194{
1185 struct dst_entry *dst = skb->dst; 1195 struct dst_entry *dst = skb_dst(skb);
1186 struct neighbour *neigh; 1196 struct neighbour *neigh;
1187 int rc = 0; 1197 int rc = 0;
1188 1198
@@ -1229,7 +1239,7 @@ EXPORT_SYMBOL(neigh_resolve_output);
1229int neigh_connected_output(struct sk_buff *skb) 1239int neigh_connected_output(struct sk_buff *skb)
1230{ 1240{
1231 int err; 1241 int err;
1232 struct dst_entry *dst = skb->dst; 1242 struct dst_entry *dst = skb_dst(skb);
1233 struct neighbour *neigh = dst->neighbour; 1243 struct neighbour *neigh = dst->neighbour;
1234 struct net_device *dev = neigh->dev; 1244 struct net_device *dev = neigh->dev;
1235 1245
@@ -1298,8 +1308,7 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1298 if (time_before(tbl->proxy_timer.expires, sched_next)) 1308 if (time_before(tbl->proxy_timer.expires, sched_next))
1299 sched_next = tbl->proxy_timer.expires; 1309 sched_next = tbl->proxy_timer.expires;
1300 } 1310 }
1301 dst_release(skb->dst); 1311 skb_dst_drop(skb);
1302 skb->dst = NULL;
1303 dev_hold(skb->dev); 1312 dev_hold(skb->dev);
1304 __skb_queue_tail(&tbl->proxy_queue, skb); 1313 __skb_queue_tail(&tbl->proxy_queue, skb);
1305 mod_timer(&tbl->proxy_timer, sched_next); 1314 mod_timer(&tbl->proxy_timer, sched_next);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 2da59a0ac4ac..3994680c08b9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -78,7 +78,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
78 goto err; 78 goto err;
79 79
80 if (!rtnl_trylock()) 80 if (!rtnl_trylock())
81 return -ERESTARTSYS; 81 return restart_syscall();
82 82
83 if (dev_isalive(net)) { 83 if (dev_isalive(net)) {
84 if ((ret = (*set)(net, new)) == 0) 84 if ((ret = (*set)(net, new)) == 0)
@@ -225,7 +225,8 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
225 if (len > 0 && buf[len - 1] == '\n') 225 if (len > 0 && buf[len - 1] == '\n')
226 --count; 226 --count;
227 227
228 rtnl_lock(); 228 if (!rtnl_trylock())
229 return restart_syscall();
229 ret = dev_set_alias(netdev, buf, count); 230 ret = dev_set_alias(netdev, buf, count);
230 rtnl_unlock(); 231 rtnl_unlock();
231 232
@@ -238,7 +239,8 @@ static ssize_t show_ifalias(struct device *dev,
238 const struct net_device *netdev = to_net_dev(dev); 239 const struct net_device *netdev = to_net_dev(dev);
239 ssize_t ret = 0; 240 ssize_t ret = 0;
240 241
241 rtnl_lock(); 242 if (!rtnl_trylock())
243 return restart_syscall();
242 if (netdev->ifalias) 244 if (netdev->ifalias)
243 ret = sprintf(buf, "%s\n", netdev->ifalias); 245 ret = sprintf(buf, "%s\n", netdev->ifalias);
244 rtnl_unlock(); 246 rtnl_unlock();
@@ -497,7 +499,6 @@ int netdev_register_kobject(struct net_device *net)
497 dev->platform_data = net; 499 dev->platform_data = net;
498 dev->groups = groups; 500 dev->groups = groups;
499 501
500 BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
501 dev_set_name(dev, "%s", net->name); 502 dev_set_name(dev, "%s", net->name);
502 503
503#ifdef CONFIG_SYSFS 504#ifdef CONFIG_SYSFS
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c8fb45665e4f..f1e982c508bb 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -19,11 +19,14 @@
19#include <linux/workqueue.h> 19#include <linux/workqueue.h>
20#include <linux/netlink.h> 20#include <linux/netlink.h>
21#include <linux/net_dropmon.h> 21#include <linux/net_dropmon.h>
22#include <trace/skb.h>
23 22
24#include <asm/unaligned.h> 23#include <asm/unaligned.h>
25#include <asm/bitops.h> 24#include <asm/bitops.h>
26 25
26#define CREATE_TRACE_POINTS
27#include <trace/events/skb.h>
28#include <trace/events/napi.h>
27 29
28DEFINE_TRACE(kfree_skb);
29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); 30EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
31
32EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e3bebd36f053..b7292a2719dc 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -115,41 +115,34 @@ static void net_free(struct net *net)
115 kmem_cache_free(net_cachep, net); 115 kmem_cache_free(net_cachep, net);
116} 116}
117 117
118struct net *copy_net_ns(unsigned long flags, struct net *old_net) 118static struct net *net_create(void)
119{ 119{
120 struct net *new_net = NULL; 120 struct net *net;
121 int err; 121 int rv;
122
123 get_net(old_net);
124
125 if (!(flags & CLONE_NEWNET))
126 return old_net;
127
128 err = -ENOMEM;
129 new_net = net_alloc();
130 if (!new_net)
131 goto out_err;
132 122
123 net = net_alloc();
124 if (!net)
125 return ERR_PTR(-ENOMEM);
133 mutex_lock(&net_mutex); 126 mutex_lock(&net_mutex);
134 err = setup_net(new_net); 127 rv = setup_net(net);
135 if (!err) { 128 if (rv == 0) {
136 rtnl_lock(); 129 rtnl_lock();
137 list_add_tail(&new_net->list, &net_namespace_list); 130 list_add_tail(&net->list, &net_namespace_list);
138 rtnl_unlock(); 131 rtnl_unlock();
139 } 132 }
140 mutex_unlock(&net_mutex); 133 mutex_unlock(&net_mutex);
134 if (rv < 0) {
135 net_free(net);
136 return ERR_PTR(rv);
137 }
138 return net;
139}
141 140
142 if (err) 141struct net *copy_net_ns(unsigned long flags, struct net *old_net)
143 goto out_free; 142{
144out: 143 if (!(flags & CLONE_NEWNET))
145 put_net(old_net); 144 return get_net(old_net);
146 return new_net; 145 return net_create();
147
148out_free:
149 net_free(new_net);
150out_err:
151 new_net = ERR_PTR(err);
152 goto out;
153} 146}
154 147
155static void cleanup_net(struct work_struct *work) 148static void cleanup_net(struct work_struct *work)
@@ -203,9 +196,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
203static int __init net_ns_init(void) 196static int __init net_ns_init(void)
204{ 197{
205 struct net_generic *ng; 198 struct net_generic *ng;
206 int err;
207 199
208 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
209#ifdef CONFIG_NET_NS 200#ifdef CONFIG_NET_NS
210 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 201 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
211 SMP_CACHE_BYTES, 202 SMP_CACHE_BYTES,
@@ -224,15 +215,14 @@ static int __init net_ns_init(void)
224 rcu_assign_pointer(init_net.gen, ng); 215 rcu_assign_pointer(init_net.gen, ng);
225 216
226 mutex_lock(&net_mutex); 217 mutex_lock(&net_mutex);
227 err = setup_net(&init_net); 218 if (setup_net(&init_net))
219 panic("Could not setup the initial network namespace");
228 220
229 rtnl_lock(); 221 rtnl_lock();
230 list_add_tail(&init_net.list, &net_namespace_list); 222 list_add_tail(&init_net.list, &net_namespace_list);
231 rtnl_unlock(); 223 rtnl_unlock();
232 224
233 mutex_unlock(&net_mutex); 225 mutex_unlock(&net_mutex);
234 if (err)
235 panic("Could not setup the initial network namespace");
236 226
237 return 0; 227 return 0;
238} 228}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 64f51eec6576..9675f312830d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -24,6 +24,7 @@
24#include <net/tcp.h> 24#include <net/tcp.h>
25#include <net/udp.h> 25#include <net/udp.h>
26#include <asm/unaligned.h> 26#include <asm/unaligned.h>
27#include <trace/events/napi.h>
27 28
28/* 29/*
29 * We maintain a small pool of fully-sized skbs, to make sure the 30 * We maintain a small pool of fully-sized skbs, to make sure the
@@ -137,6 +138,7 @@ static int poll_one_napi(struct netpoll_info *npinfo,
137 set_bit(NAPI_STATE_NPSVC, &napi->state); 138 set_bit(NAPI_STATE_NPSVC, &napi->state);
138 139
139 work = napi->poll(napi, budget); 140 work = napi->poll(napi, budget);
141 trace_napi_poll(napi);
140 142
141 clear_bit(NAPI_STATE_NPSVC, &napi->state); 143 clear_bit(NAPI_STATE_NPSVC, &napi->state);
142 atomic_dec(&trapped); 144 atomic_dec(&trapped);
@@ -300,8 +302,11 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
300 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 302 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
301 tries > 0; --tries) { 303 tries > 0; --tries) {
302 if (__netif_tx_trylock(txq)) { 304 if (__netif_tx_trylock(txq)) {
303 if (!netif_tx_queue_stopped(txq)) 305 if (!netif_tx_queue_stopped(txq)) {
304 status = ops->ndo_start_xmit(skb, dev); 306 status = ops->ndo_start_xmit(skb, dev);
307 if (status == NETDEV_TX_OK)
308 txq_trans_update(txq);
309 }
305 __netif_tx_unlock(txq); 310 __netif_tx_unlock(txq);
306 311
307 if (status == NETDEV_TX_OK) 312 if (status == NETDEV_TX_OK)
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0666a827bc62..19b8c20e98a4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3438,6 +3438,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3438 retry_now: 3438 retry_now:
3439 ret = (*xmit)(pkt_dev->skb, odev); 3439 ret = (*xmit)(pkt_dev->skb, odev);
3440 if (likely(ret == NETDEV_TX_OK)) { 3440 if (likely(ret == NETDEV_TX_OK)) {
3441 txq_trans_update(txq);
3441 pkt_dev->last_ok = 1; 3442 pkt_dev->last_ok = 1;
3442 pkt_dev->sofar++; 3443 pkt_dev->sofar++;
3443 pkt_dev->seq_num++; 3444 pkt_dev->seq_num++;
@@ -3690,8 +3691,7 @@ out1:
3690#ifdef CONFIG_XFRM 3691#ifdef CONFIG_XFRM
3691 free_SAs(pkt_dev); 3692 free_SAs(pkt_dev);
3692#endif 3693#endif
3693 if (pkt_dev->flows) 3694 vfree(pkt_dev->flows);
3694 vfree(pkt_dev->flows);
3695 kfree(pkt_dev); 3695 kfree(pkt_dev);
3696 return err; 3696 return err;
3697} 3697}
@@ -3790,8 +3790,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3790#ifdef CONFIG_XFRM 3790#ifdef CONFIG_XFRM
3791 free_SAs(pkt_dev); 3791 free_SAs(pkt_dev);
3792#endif 3792#endif
3793 if (pkt_dev->flows) 3793 vfree(pkt_dev->flows);
3794 vfree(pkt_dev->flows);
3795 kfree(pkt_dev); 3794 kfree(pkt_dev);
3796 return 0; 3795 return 0;
3797} 3796}
diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c
index 86234923a3b7..79687dfd6957 100644
--- a/net/core/skb_dma_map.c
+++ b/net/core/skb_dma_map.c
@@ -20,7 +20,7 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
20 if (dma_mapping_error(dev, map)) 20 if (dma_mapping_error(dev, map))
21 goto out_err; 21 goto out_err;
22 22
23 sp->dma_maps[0] = map; 23 sp->dma_head = map;
24 for (i = 0; i < sp->nr_frags; i++) { 24 for (i = 0; i < sp->nr_frags; i++) {
25 skb_frag_t *fp = &sp->frags[i]; 25 skb_frag_t *fp = &sp->frags[i];
26 26
@@ -28,9 +28,8 @@ int skb_dma_map(struct device *dev, struct sk_buff *skb,
28 fp->size, dir); 28 fp->size, dir);
29 if (dma_mapping_error(dev, map)) 29 if (dma_mapping_error(dev, map))
30 goto unwind; 30 goto unwind;
31 sp->dma_maps[i + 1] = map; 31 sp->dma_maps[i] = map;
32 } 32 }
33 sp->num_dma_maps = i + 1;
34 33
35 return 0; 34 return 0;
36 35
@@ -38,10 +37,10 @@ unwind:
38 while (--i >= 0) { 37 while (--i >= 0) {
39 skb_frag_t *fp = &sp->frags[i]; 38 skb_frag_t *fp = &sp->frags[i];
40 39
41 dma_unmap_page(dev, sp->dma_maps[i + 1], 40 dma_unmap_page(dev, sp->dma_maps[i],
42 fp->size, dir); 41 fp->size, dir);
43 } 42 }
44 dma_unmap_single(dev, sp->dma_maps[0], 43 dma_unmap_single(dev, sp->dma_head,
45 skb_headlen(skb), dir); 44 skb_headlen(skb), dir);
46out_err: 45out_err:
47 return -ENOMEM; 46 return -ENOMEM;
@@ -54,12 +53,12 @@ void skb_dma_unmap(struct device *dev, struct sk_buff *skb,
54 struct skb_shared_info *sp = skb_shinfo(skb); 53 struct skb_shared_info *sp = skb_shinfo(skb);
55 int i; 54 int i;
56 55
57 dma_unmap_single(dev, sp->dma_maps[0], 56 dma_unmap_single(dev, sp->dma_head,
58 skb_headlen(skb), dir); 57 skb_headlen(skb), dir);
59 for (i = 0; i < sp->nr_frags; i++) { 58 for (i = 0; i < sp->nr_frags; i++) {
60 skb_frag_t *fp = &sp->frags[i]; 59 skb_frag_t *fp = &sp->frags[i];
61 60
62 dma_unmap_page(dev, sp->dma_maps[i + 1], 61 dma_unmap_page(dev, sp->dma_maps[i],
63 fp->size, dir); 62 fp->size, dir);
64 } 63 }
65} 64}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e505b5392e1e..9e0597d189b0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -39,6 +39,7 @@
39#include <linux/module.h> 39#include <linux/module.h>
40#include <linux/types.h> 40#include <linux/types.h>
41#include <linux/kernel.h> 41#include <linux/kernel.h>
42#include <linux/kmemcheck.h>
42#include <linux/mm.h> 43#include <linux/mm.h>
43#include <linux/interrupt.h> 44#include <linux/interrupt.h>
44#include <linux/in.h> 45#include <linux/in.h>
@@ -65,7 +66,7 @@
65 66
66#include <asm/uaccess.h> 67#include <asm/uaccess.h>
67#include <asm/system.h> 68#include <asm/system.h>
68#include <trace/skb.h> 69#include <trace/events/skb.h>
69 70
70#include "kmap_skb.h" 71#include "kmap_skb.h"
71 72
@@ -201,6 +202,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
201 skb->data = data; 202 skb->data = data;
202 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
203 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U;
209#endif
210
204 /* make sure we initialize shinfo sequentially */ 211 /* make sure we initialize shinfo sequentially */
205 shinfo = skb_shinfo(skb); 212 shinfo = skb_shinfo(skb);
206 atomic_set(&shinfo->dataref, 1); 213 atomic_set(&shinfo->dataref, 1);
@@ -210,13 +217,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
210 shinfo->gso_type = 0; 217 shinfo->gso_type = 0;
211 shinfo->ip6_frag_id = 0; 218 shinfo->ip6_frag_id = 0;
212 shinfo->tx_flags.flags = 0; 219 shinfo->tx_flags.flags = 0;
213 shinfo->frag_list = NULL; 220 skb_frag_list_init(skb);
214 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); 221 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
215 222
216 if (fclone) { 223 if (fclone) {
217 struct sk_buff *child = skb + 1; 224 struct sk_buff *child = skb + 1;
218 atomic_t *fclone_ref = (atomic_t *) (child + 1); 225 atomic_t *fclone_ref = (atomic_t *) (child + 1);
219 226
227 kmemcheck_annotate_bitfield(child, flags1);
228 kmemcheck_annotate_bitfield(child, flags2);
220 skb->fclone = SKB_FCLONE_ORIG; 229 skb->fclone = SKB_FCLONE_ORIG;
221 atomic_set(fclone_ref, 1); 230 atomic_set(fclone_ref, 1);
222 231
@@ -323,7 +332,7 @@ static void skb_clone_fraglist(struct sk_buff *skb)
323{ 332{
324 struct sk_buff *list; 333 struct sk_buff *list;
325 334
326 for (list = skb_shinfo(skb)->frag_list; list; list = list->next) 335 skb_walk_frags(skb, list)
327 skb_get(list); 336 skb_get(list);
328} 337}
329 338
@@ -338,7 +347,7 @@ static void skb_release_data(struct sk_buff *skb)
338 put_page(skb_shinfo(skb)->frags[i].page); 347 put_page(skb_shinfo(skb)->frags[i].page);
339 } 348 }
340 349
341 if (skb_shinfo(skb)->frag_list) 350 if (skb_has_frags(skb))
342 skb_drop_fraglist(skb); 351 skb_drop_fraglist(skb);
343 352
344 kfree(skb->head); 353 kfree(skb->head);
@@ -381,7 +390,7 @@ static void kfree_skbmem(struct sk_buff *skb)
381 390
382static void skb_release_head_state(struct sk_buff *skb) 391static void skb_release_head_state(struct sk_buff *skb)
383{ 392{
384 dst_release(skb->dst); 393 skb_dst_drop(skb);
385#ifdef CONFIG_XFRM 394#ifdef CONFIG_XFRM
386 secpath_put(skb->sp); 395 secpath_put(skb->sp);
387#endif 396#endif
@@ -503,7 +512,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
503 shinfo->gso_type = 0; 512 shinfo->gso_type = 0;
504 shinfo->ip6_frag_id = 0; 513 shinfo->ip6_frag_id = 0;
505 shinfo->tx_flags.flags = 0; 514 shinfo->tx_flags.flags = 0;
506 shinfo->frag_list = NULL; 515 skb_frag_list_init(skb);
507 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); 516 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
508 517
509 memset(skb, 0, offsetof(struct sk_buff, tail)); 518 memset(skb, 0, offsetof(struct sk_buff, tail));
@@ -521,13 +530,12 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
521 new->transport_header = old->transport_header; 530 new->transport_header = old->transport_header;
522 new->network_header = old->network_header; 531 new->network_header = old->network_header;
523 new->mac_header = old->mac_header; 532 new->mac_header = old->mac_header;
524 new->dst = dst_clone(old->dst); 533 skb_dst_set(new, dst_clone(skb_dst(old)));
525#ifdef CONFIG_XFRM 534#ifdef CONFIG_XFRM
526 new->sp = secpath_get(old->sp); 535 new->sp = secpath_get(old->sp);
527#endif 536#endif
528 memcpy(new->cb, old->cb, sizeof(old->cb)); 537 memcpy(new->cb, old->cb, sizeof(old->cb));
529 new->csum_start = old->csum_start; 538 new->csum = old->csum;
530 new->csum_offset = old->csum_offset;
531 new->local_df = old->local_df; 539 new->local_df = old->local_df;
532 new->pkt_type = old->pkt_type; 540 new->pkt_type = old->pkt_type;
533 new->ip_summed = old->ip_summed; 541 new->ip_summed = old->ip_summed;
@@ -538,6 +546,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
538#endif 546#endif
539 new->protocol = old->protocol; 547 new->protocol = old->protocol;
540 new->mark = old->mark; 548 new->mark = old->mark;
549 new->iif = old->iif;
541 __nf_copy(new, old); 550 __nf_copy(new, old);
542#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 551#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
543 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) 552 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
@@ -550,10 +559,17 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
550#endif 559#endif
551#endif 560#endif
552 new->vlan_tci = old->vlan_tci; 561 new->vlan_tci = old->vlan_tci;
562#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
563 new->do_not_encrypt = old->do_not_encrypt;
564#endif
553 565
554 skb_copy_secmark(new, old); 566 skb_copy_secmark(new, old);
555} 567}
556 568
569/*
570 * You should not add any new code to this function. Add it to
571 * __copy_skb_header above instead.
572 */
557static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) 573static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
558{ 574{
559#define C(x) n->x = skb->x 575#define C(x) n->x = skb->x
@@ -569,16 +585,11 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
569 n->cloned = 1; 585 n->cloned = 1;
570 n->nohdr = 0; 586 n->nohdr = 0;
571 n->destructor = NULL; 587 n->destructor = NULL;
572 C(iif);
573 C(tail); 588 C(tail);
574 C(end); 589 C(end);
575 C(head); 590 C(head);
576 C(data); 591 C(data);
577 C(truesize); 592 C(truesize);
578#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
579 C(do_not_encrypt);
580 C(requeue);
581#endif
582 atomic_set(&n->users, 1); 593 atomic_set(&n->users, 1);
583 594
584 atomic_inc(&(skb_shinfo(skb)->dataref)); 595 atomic_inc(&(skb_shinfo(skb)->dataref));
@@ -633,6 +644,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
633 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 644 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
634 if (!n) 645 if (!n)
635 return NULL; 646 return NULL;
647
648 kmemcheck_annotate_bitfield(n, flags1);
649 kmemcheck_annotate_bitfield(n, flags2);
636 n->fclone = SKB_FCLONE_UNAVAILABLE; 650 n->fclone = SKB_FCLONE_UNAVAILABLE;
637 } 651 }
638 652
@@ -655,7 +669,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
655 /* {transport,network,mac}_header are relative to skb->head */ 669 /* {transport,network,mac}_header are relative to skb->head */
656 new->transport_header += offset; 670 new->transport_header += offset;
657 new->network_header += offset; 671 new->network_header += offset;
658 new->mac_header += offset; 672 if (skb_mac_header_was_set(new))
673 new->mac_header += offset;
659#endif 674#endif
660 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 675 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
661 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 676 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -755,7 +770,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
755 skb_shinfo(n)->nr_frags = i; 770 skb_shinfo(n)->nr_frags = i;
756 } 771 }
757 772
758 if (skb_shinfo(skb)->frag_list) { 773 if (skb_has_frags(skb)) {
759 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 774 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
760 skb_clone_fraglist(n); 775 skb_clone_fraglist(n);
761 } 776 }
@@ -818,7 +833,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
818 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 833 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
819 get_page(skb_shinfo(skb)->frags[i].page); 834 get_page(skb_shinfo(skb)->frags[i].page);
820 835
821 if (skb_shinfo(skb)->frag_list) 836 if (skb_has_frags(skb))
822 skb_clone_fraglist(skb); 837 skb_clone_fraglist(skb);
823 838
824 skb_release_data(skb); 839 skb_release_data(skb);
@@ -837,7 +852,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
837 skb->tail += off; 852 skb->tail += off;
838 skb->transport_header += off; 853 skb->transport_header += off;
839 skb->network_header += off; 854 skb->network_header += off;
840 skb->mac_header += off; 855 if (skb_mac_header_was_set(skb))
856 skb->mac_header += off;
841 skb->csum_start += nhead; 857 skb->csum_start += nhead;
842 skb->cloned = 0; 858 skb->cloned = 0;
843 skb->hdr_len = 0; 859 skb->hdr_len = 0;
@@ -929,7 +945,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
929#ifdef NET_SKBUFF_DATA_USES_OFFSET 945#ifdef NET_SKBUFF_DATA_USES_OFFSET
930 n->transport_header += off; 946 n->transport_header += off;
931 n->network_header += off; 947 n->network_header += off;
932 n->mac_header += off; 948 if (skb_mac_header_was_set(skb))
949 n->mac_header += off;
933#endif 950#endif
934 951
935 return n; 952 return n;
@@ -1090,7 +1107,7 @@ drop_pages:
1090 for (; i < nfrags; i++) 1107 for (; i < nfrags; i++)
1091 put_page(skb_shinfo(skb)->frags[i].page); 1108 put_page(skb_shinfo(skb)->frags[i].page);
1092 1109
1093 if (skb_shinfo(skb)->frag_list) 1110 if (skb_has_frags(skb))
1094 skb_drop_fraglist(skb); 1111 skb_drop_fraglist(skb);
1095 goto done; 1112 goto done;
1096 } 1113 }
@@ -1185,7 +1202,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1185 /* Optimization: no fragments, no reasons to preestimate 1202 /* Optimization: no fragments, no reasons to preestimate
1186 * size of pulled pages. Superb. 1203 * size of pulled pages. Superb.
1187 */ 1204 */
1188 if (!skb_shinfo(skb)->frag_list) 1205 if (!skb_has_frags(skb))
1189 goto pull_pages; 1206 goto pull_pages;
1190 1207
1191 /* Estimate size of pulled pages. */ 1208 /* Estimate size of pulled pages. */
@@ -1282,8 +1299,9 @@ EXPORT_SYMBOL(__pskb_pull_tail);
1282 1299
1283int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 1300int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1284{ 1301{
1285 int i, copy;
1286 int start = skb_headlen(skb); 1302 int start = skb_headlen(skb);
1303 struct sk_buff *frag_iter;
1304 int i, copy;
1287 1305
1288 if (offset > (int)skb->len - len) 1306 if (offset > (int)skb->len - len)
1289 goto fault; 1307 goto fault;
@@ -1325,28 +1343,23 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1325 start = end; 1343 start = end;
1326 } 1344 }
1327 1345
1328 if (skb_shinfo(skb)->frag_list) { 1346 skb_walk_frags(skb, frag_iter) {
1329 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1347 int end;
1330 1348
1331 for (; list; list = list->next) { 1349 WARN_ON(start > offset + len);
1332 int end; 1350
1333 1351 end = start + frag_iter->len;
1334 WARN_ON(start > offset + len); 1352 if ((copy = end - offset) > 0) {
1335 1353 if (copy > len)
1336 end = start + list->len; 1354 copy = len;
1337 if ((copy = end - offset) > 0) { 1355 if (skb_copy_bits(frag_iter, offset - start, to, copy))
1338 if (copy > len) 1356 goto fault;
1339 copy = len; 1357 if ((len -= copy) == 0)
1340 if (skb_copy_bits(list, offset - start, 1358 return 0;
1341 to, copy)) 1359 offset += copy;
1342 goto fault; 1360 to += copy;
1343 if ((len -= copy) == 0)
1344 return 0;
1345 offset += copy;
1346 to += copy;
1347 }
1348 start = end;
1349 } 1361 }
1362 start = end;
1350 } 1363 }
1351 if (!len) 1364 if (!len)
1352 return 0; 1365 return 0;
@@ -1531,6 +1544,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1531 .ops = &sock_pipe_buf_ops, 1544 .ops = &sock_pipe_buf_ops,
1532 .spd_release = sock_spd_release, 1545 .spd_release = sock_spd_release,
1533 }; 1546 };
1547 struct sk_buff *frag_iter;
1534 struct sock *sk = skb->sk; 1548 struct sock *sk = skb->sk;
1535 1549
1536 /* 1550 /*
@@ -1545,13 +1559,11 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1545 /* 1559 /*
1546 * now see if we have a frag_list to map 1560 * now see if we have a frag_list to map
1547 */ 1561 */
1548 if (skb_shinfo(skb)->frag_list) { 1562 skb_walk_frags(skb, frag_iter) {
1549 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1563 if (!tlen)
1550 1564 break;
1551 for (; list && tlen; list = list->next) { 1565 if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
1552 if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) 1566 break;
1553 break;
1554 }
1555 } 1567 }
1556 1568
1557done: 1569done:
@@ -1590,8 +1602,9 @@ done:
1590 1602
1591int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) 1603int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1592{ 1604{
1593 int i, copy;
1594 int start = skb_headlen(skb); 1605 int start = skb_headlen(skb);
1606 struct sk_buff *frag_iter;
1607 int i, copy;
1595 1608
1596 if (offset > (int)skb->len - len) 1609 if (offset > (int)skb->len - len)
1597 goto fault; 1610 goto fault;
@@ -1632,28 +1645,24 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1632 start = end; 1645 start = end;
1633 } 1646 }
1634 1647
1635 if (skb_shinfo(skb)->frag_list) { 1648 skb_walk_frags(skb, frag_iter) {
1636 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1649 int end;
1637 1650
1638 for (; list; list = list->next) { 1651 WARN_ON(start > offset + len);
1639 int end; 1652
1640 1653 end = start + frag_iter->len;
1641 WARN_ON(start > offset + len); 1654 if ((copy = end - offset) > 0) {
1642 1655 if (copy > len)
1643 end = start + list->len; 1656 copy = len;
1644 if ((copy = end - offset) > 0) { 1657 if (skb_store_bits(frag_iter, offset - start,
1645 if (copy > len) 1658 from, copy))
1646 copy = len; 1659 goto fault;
1647 if (skb_store_bits(list, offset - start, 1660 if ((len -= copy) == 0)
1648 from, copy)) 1661 return 0;
1649 goto fault; 1662 offset += copy;
1650 if ((len -= copy) == 0) 1663 from += copy;
1651 return 0;
1652 offset += copy;
1653 from += copy;
1654 }
1655 start = end;
1656 } 1664 }
1665 start = end;
1657 } 1666 }
1658 if (!len) 1667 if (!len)
1659 return 0; 1668 return 0;
@@ -1670,6 +1679,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1670{ 1679{
1671 int start = skb_headlen(skb); 1680 int start = skb_headlen(skb);
1672 int i, copy = start - offset; 1681 int i, copy = start - offset;
1682 struct sk_buff *frag_iter;
1673 int pos = 0; 1683 int pos = 0;
1674 1684
1675 /* Checksum header. */ 1685 /* Checksum header. */
@@ -1709,29 +1719,25 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1709 start = end; 1719 start = end;
1710 } 1720 }
1711 1721
1712 if (skb_shinfo(skb)->frag_list) { 1722 skb_walk_frags(skb, frag_iter) {
1713 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1723 int end;
1714 1724
1715 for (; list; list = list->next) { 1725 WARN_ON(start > offset + len);
1716 int end; 1726
1717 1727 end = start + frag_iter->len;
1718 WARN_ON(start > offset + len); 1728 if ((copy = end - offset) > 0) {
1719 1729 __wsum csum2;
1720 end = start + list->len; 1730 if (copy > len)
1721 if ((copy = end - offset) > 0) { 1731 copy = len;
1722 __wsum csum2; 1732 csum2 = skb_checksum(frag_iter, offset - start,
1723 if (copy > len) 1733 copy, 0);
1724 copy = len; 1734 csum = csum_block_add(csum, csum2, pos);
1725 csum2 = skb_checksum(list, offset - start, 1735 if ((len -= copy) == 0)
1726 copy, 0); 1736 return csum;
1727 csum = csum_block_add(csum, csum2, pos); 1737 offset += copy;
1728 if ((len -= copy) == 0) 1738 pos += copy;
1729 return csum;
1730 offset += copy;
1731 pos += copy;
1732 }
1733 start = end;
1734 } 1739 }
1740 start = end;
1735 } 1741 }
1736 BUG_ON(len); 1742 BUG_ON(len);
1737 1743
@@ -1746,6 +1752,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1746{ 1752{
1747 int start = skb_headlen(skb); 1753 int start = skb_headlen(skb);
1748 int i, copy = start - offset; 1754 int i, copy = start - offset;
1755 struct sk_buff *frag_iter;
1749 int pos = 0; 1756 int pos = 0;
1750 1757
1751 /* Copy header. */ 1758 /* Copy header. */
@@ -1790,31 +1797,27 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1790 start = end; 1797 start = end;
1791 } 1798 }
1792 1799
1793 if (skb_shinfo(skb)->frag_list) { 1800 skb_walk_frags(skb, frag_iter) {
1794 struct sk_buff *list = skb_shinfo(skb)->frag_list; 1801 __wsum csum2;
1802 int end;
1795 1803
1796 for (; list; list = list->next) { 1804 WARN_ON(start > offset + len);
1797 __wsum csum2; 1805
1798 int end; 1806 end = start + frag_iter->len;
1799 1807 if ((copy = end - offset) > 0) {
1800 WARN_ON(start > offset + len); 1808 if (copy > len)
1801 1809 copy = len;
1802 end = start + list->len; 1810 csum2 = skb_copy_and_csum_bits(frag_iter,
1803 if ((copy = end - offset) > 0) { 1811 offset - start,
1804 if (copy > len) 1812 to, copy, 0);
1805 copy = len; 1813 csum = csum_block_add(csum, csum2, pos);
1806 csum2 = skb_copy_and_csum_bits(list, 1814 if ((len -= copy) == 0)
1807 offset - start, 1815 return csum;
1808 to, copy, 0); 1816 offset += copy;
1809 csum = csum_block_add(csum, csum2, pos); 1817 to += copy;
1810 if ((len -= copy) == 0) 1818 pos += copy;
1811 return csum;
1812 offset += copy;
1813 to += copy;
1814 pos += copy;
1815 }
1816 start = end;
1817 } 1819 }
1820 start = end;
1818 } 1821 }
1819 BUG_ON(len); 1822 BUG_ON(len);
1820 return csum; 1823 return csum;
@@ -2324,8 +2327,7 @@ next_skb:
2324 st->frag_data = NULL; 2327 st->frag_data = NULL;
2325 } 2328 }
2326 2329
2327 if (st->root_skb == st->cur_skb && 2330 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) {
2328 skb_shinfo(st->root_skb)->frag_list) {
2329 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2331 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2330 st->frag_idx = 0; 2332 st->frag_idx = 0;
2331 goto next_skb; 2333 goto next_skb;
@@ -2636,7 +2638,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2636 } else 2638 } else
2637 skb_get(fskb2); 2639 skb_get(fskb2);
2638 2640
2639 BUG_ON(skb_shinfo(nskb)->frag_list); 2641 SKB_FRAG_ASSERT(nskb);
2640 skb_shinfo(nskb)->frag_list = fskb2; 2642 skb_shinfo(nskb)->frag_list = fskb2;
2641 } 2643 }
2642 2644
@@ -2661,30 +2663,40 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2661{ 2663{
2662 struct sk_buff *p = *head; 2664 struct sk_buff *p = *head;
2663 struct sk_buff *nskb; 2665 struct sk_buff *nskb;
2666 struct skb_shared_info *skbinfo = skb_shinfo(skb);
2667 struct skb_shared_info *pinfo = skb_shinfo(p);
2664 unsigned int headroom; 2668 unsigned int headroom;
2665 unsigned int len = skb_gro_len(skb); 2669 unsigned int len = skb_gro_len(skb);
2670 unsigned int offset = skb_gro_offset(skb);
2671 unsigned int headlen = skb_headlen(skb);
2666 2672
2667 if (p->len + len >= 65536) 2673 if (p->len + len >= 65536)
2668 return -E2BIG; 2674 return -E2BIG;
2669 2675
2670 if (skb_shinfo(p)->frag_list) 2676 if (pinfo->frag_list)
2671 goto merge; 2677 goto merge;
2672 else if (skb_headlen(skb) <= skb_gro_offset(skb)) { 2678 else if (headlen <= offset) {
2673 if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags > 2679 skb_frag_t *frag;
2674 MAX_SKB_FRAGS) 2680 skb_frag_t *frag2;
2681 int i = skbinfo->nr_frags;
2682 int nr_frags = pinfo->nr_frags + i;
2683
2684 offset -= headlen;
2685
2686 if (nr_frags > MAX_SKB_FRAGS)
2675 return -E2BIG; 2687 return -E2BIG;
2676 2688
2677 skb_shinfo(skb)->frags[0].page_offset += 2689 pinfo->nr_frags = nr_frags;
2678 skb_gro_offset(skb) - skb_headlen(skb); 2690 skbinfo->nr_frags = 0;
2679 skb_shinfo(skb)->frags[0].size -=
2680 skb_gro_offset(skb) - skb_headlen(skb);
2681 2691
2682 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, 2692 frag = pinfo->frags + nr_frags;
2683 skb_shinfo(skb)->frags, 2693 frag2 = skbinfo->frags + i;
2684 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 2694 do {
2695 *--frag = *--frag2;
2696 } while (--i);
2685 2697
2686 skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; 2698 frag->page_offset += offset;
2687 skb_shinfo(skb)->nr_frags = 0; 2699 frag->size -= offset;
2688 2700
2689 skb->truesize -= skb->data_len; 2701 skb->truesize -= skb->data_len;
2690 skb->len -= skb->data_len; 2702 skb->len -= skb->data_len;
@@ -2715,7 +2727,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2715 2727
2716 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2728 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2717 skb_shinfo(nskb)->frag_list = p; 2729 skb_shinfo(nskb)->frag_list = p;
2718 skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size; 2730 skb_shinfo(nskb)->gso_size = pinfo->gso_size;
2719 skb_header_release(p); 2731 skb_header_release(p);
2720 nskb->prev = p; 2732 nskb->prev = p;
2721 2733
@@ -2730,16 +2742,13 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2730 p = nskb; 2742 p = nskb;
2731 2743
2732merge: 2744merge:
2733 if (skb_gro_offset(skb) > skb_headlen(skb)) { 2745 if (offset > headlen) {
2734 skb_shinfo(skb)->frags[0].page_offset += 2746 skbinfo->frags[0].page_offset += offset - headlen;
2735 skb_gro_offset(skb) - skb_headlen(skb); 2747 skbinfo->frags[0].size -= offset - headlen;
2736 skb_shinfo(skb)->frags[0].size -= 2748 offset = headlen;
2737 skb_gro_offset(skb) - skb_headlen(skb);
2738 skb_gro_reset_offset(skb);
2739 skb_gro_pull(skb, skb_headlen(skb));
2740 } 2749 }
2741 2750
2742 __skb_pull(skb, skb_gro_offset(skb)); 2751 __skb_pull(skb, offset);
2743 2752
2744 p->prev->next = skb; 2753 p->prev->next = skb;
2745 p->prev = skb; 2754 p->prev = skb;
@@ -2786,6 +2795,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2786{ 2795{
2787 int start = skb_headlen(skb); 2796 int start = skb_headlen(skb);
2788 int i, copy = start - offset; 2797 int i, copy = start - offset;
2798 struct sk_buff *frag_iter;
2789 int elt = 0; 2799 int elt = 0;
2790 2800
2791 if (copy > 0) { 2801 if (copy > 0) {
@@ -2819,26 +2829,22 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2819 start = end; 2829 start = end;
2820 } 2830 }
2821 2831
2822 if (skb_shinfo(skb)->frag_list) { 2832 skb_walk_frags(skb, frag_iter) {
2823 struct sk_buff *list = skb_shinfo(skb)->frag_list; 2833 int end;
2824
2825 for (; list; list = list->next) {
2826 int end;
2827 2834
2828 WARN_ON(start > offset + len); 2835 WARN_ON(start > offset + len);
2829 2836
2830 end = start + list->len; 2837 end = start + frag_iter->len;
2831 if ((copy = end - offset) > 0) { 2838 if ((copy = end - offset) > 0) {
2832 if (copy > len) 2839 if (copy > len)
2833 copy = len; 2840 copy = len;
2834 elt += __skb_to_sgvec(list, sg+elt, offset - start, 2841 elt += __skb_to_sgvec(frag_iter, sg+elt, offset - start,
2835 copy); 2842 copy);
2836 if ((len -= copy) == 0) 2843 if ((len -= copy) == 0)
2837 return elt; 2844 return elt;
2838 offset += copy; 2845 offset += copy;
2839 }
2840 start = end;
2841 } 2846 }
2847 start = end;
2842 } 2848 }
2843 BUG_ON(len); 2849 BUG_ON(len);
2844 return elt; 2850 return elt;
@@ -2886,7 +2892,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2886 return -ENOMEM; 2892 return -ENOMEM;
2887 2893
2888 /* Easy case. Most of packets will go this way. */ 2894 /* Easy case. Most of packets will go this way. */
2889 if (!skb_shinfo(skb)->frag_list) { 2895 if (!skb_has_frags(skb)) {
2890 /* A little of trouble, not enough of space for trailer. 2896 /* A little of trouble, not enough of space for trailer.
2891 * This should not happen, when stack is tuned to generate 2897 * This should not happen, when stack is tuned to generate
2892 * good frames. OK, on miss we reallocate and reserve even more 2898 * good frames. OK, on miss we reallocate and reserve even more
@@ -2921,7 +2927,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2921 2927
2922 if (skb1->next == NULL && tailbits) { 2928 if (skb1->next == NULL && tailbits) {
2923 if (skb_shinfo(skb1)->nr_frags || 2929 if (skb_shinfo(skb1)->nr_frags ||
2924 skb_shinfo(skb1)->frag_list || 2930 skb_has_frags(skb1) ||
2925 skb_tailroom(skb1) < tailbits) 2931 skb_tailroom(skb1) < tailbits)
2926 ntail = tailbits + 128; 2932 ntail = tailbits + 128;
2927 } 2933 }
@@ -2930,7 +2936,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2930 skb_cloned(skb1) || 2936 skb_cloned(skb1) ||
2931 ntail || 2937 ntail ||
2932 skb_shinfo(skb1)->nr_frags || 2938 skb_shinfo(skb1)->nr_frags ||
2933 skb_shinfo(skb1)->frag_list) { 2939 skb_has_frags(skb1)) {
2934 struct sk_buff *skb2; 2940 struct sk_buff *skb2;
2935 2941
2936 /* Fuck, we are miserable poor guys... */ 2942 /* Fuck, we are miserable poor guys... */
@@ -3016,12 +3022,12 @@ EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3016 */ 3022 */
3017bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) 3023bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3018{ 3024{
3019 if (unlikely(start > skb->len - 2) || 3025 if (unlikely(start > skb_headlen(skb)) ||
3020 unlikely((int)start + off > skb->len - 2)) { 3026 unlikely((int)start + off > skb_headlen(skb) - 2)) {
3021 if (net_ratelimit()) 3027 if (net_ratelimit())
3022 printk(KERN_WARNING 3028 printk(KERN_WARNING
3023 "bad partial csum: csum=%u/%u len=%u\n", 3029 "bad partial csum: csum=%u/%u len=%u\n",
3024 start, off, skb->len); 3030 start, off, skb_headlen(skb));
3025 return false; 3031 return false;
3026 } 3032 }
3027 skb->ip_summed = CHECKSUM_PARTIAL; 3033 skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/net/core/sock.c b/net/core/sock.c
index 7dbf3ffb35cc..b0ba569bc973 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -155,6 +155,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
158 "sk_lock-AF_IEEE802154",
158 "sk_lock-AF_MAX" 159 "sk_lock-AF_MAX"
159}; 160};
160static const char *af_family_slock_key_strings[AF_MAX+1] = { 161static const char *af_family_slock_key_strings[AF_MAX+1] = {
@@ -170,6 +171,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
170 "slock-27" , "slock-28" , "slock-AF_CAN" , 171 "slock-27" , "slock-28" , "slock-AF_CAN" ,
171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
172 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
174 "slock-AF_IEEE802154",
173 "slock-AF_MAX" 175 "slock-AF_MAX"
174}; 176};
175static const char *af_family_clock_key_strings[AF_MAX+1] = { 177static const char *af_family_clock_key_strings[AF_MAX+1] = {
@@ -185,6 +187,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
185 "clock-27" , "clock-28" , "clock-AF_CAN" , 187 "clock-27" , "clock-28" , "clock-AF_CAN" ,
186 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
187 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
190 "clock-AF_IEEE802154",
188 "clock-AF_MAX" 191 "clock-AF_MAX"
189}; 192};
190 193
@@ -212,6 +215,7 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
212 215
213/* Maximal space eaten by iovec or ancilliary data plus some space */ 216/* Maximal space eaten by iovec or ancilliary data plus some space */
214int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
218EXPORT_SYMBOL(sysctl_optmem_max);
215 219
216static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
217{ 221{
@@ -444,7 +448,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
444int sock_setsockopt(struct socket *sock, int level, int optname, 448int sock_setsockopt(struct socket *sock, int level, int optname,
445 char __user *optval, int optlen) 449 char __user *optval, int optlen)
446{ 450{
447 struct sock *sk=sock->sk; 451 struct sock *sk = sock->sk;
448 int val; 452 int val;
449 int valbool; 453 int valbool;
450 struct linger ling; 454 struct linger ling;
@@ -463,15 +467,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
463 if (get_user(val, (int __user *)optval)) 467 if (get_user(val, (int __user *)optval))
464 return -EFAULT; 468 return -EFAULT;
465 469
466 valbool = val?1:0; 470 valbool = val ? 1 : 0;
467 471
468 lock_sock(sk); 472 lock_sock(sk);
469 473
470 switch(optname) { 474 switch (optname) {
471 case SO_DEBUG: 475 case SO_DEBUG:
472 if (val && !capable(CAP_NET_ADMIN)) { 476 if (val && !capable(CAP_NET_ADMIN))
473 ret = -EACCES; 477 ret = -EACCES;
474 } else 478 else
475 sock_valbool_flag(sk, SOCK_DBG, valbool); 479 sock_valbool_flag(sk, SOCK_DBG, valbool);
476 break; 480 break;
477 case SO_REUSEADDR: 481 case SO_REUSEADDR:
@@ -582,7 +586,7 @@ set_rcvbuf:
582 ret = -EINVAL; /* 1003.1g */ 586 ret = -EINVAL; /* 1003.1g */
583 break; 587 break;
584 } 588 }
585 if (copy_from_user(&ling,optval,sizeof(ling))) { 589 if (copy_from_user(&ling, optval, sizeof(ling))) {
586 ret = -EFAULT; 590 ret = -EFAULT;
587 break; 591 break;
588 } 592 }
@@ -690,9 +694,8 @@ set_rcvbuf:
690 case SO_MARK: 694 case SO_MARK:
691 if (!capable(CAP_NET_ADMIN)) 695 if (!capable(CAP_NET_ADMIN))
692 ret = -EPERM; 696 ret = -EPERM;
693 else { 697 else
694 sk->sk_mark = val; 698 sk->sk_mark = val;
695 }
696 break; 699 break;
697 700
698 /* We implement the SO_SNDLOWAT etc to 701 /* We implement the SO_SNDLOWAT etc to
@@ -704,6 +707,7 @@ set_rcvbuf:
704 release_sock(sk); 707 release_sock(sk);
705 return ret; 708 return ret;
706} 709}
710EXPORT_SYMBOL(sock_setsockopt);
707 711
708 712
709int sock_getsockopt(struct socket *sock, int level, int optname, 713int sock_getsockopt(struct socket *sock, int level, int optname,
@@ -727,7 +731,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
727 731
728 memset(&v, 0, sizeof(v)); 732 memset(&v, 0, sizeof(v));
729 733
730 switch(optname) { 734 switch (optname) {
731 case SO_DEBUG: 735 case SO_DEBUG:
732 v.val = sock_flag(sk, SOCK_DBG); 736 v.val = sock_flag(sk, SOCK_DBG);
733 break; 737 break;
@@ -762,7 +766,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
762 766
763 case SO_ERROR: 767 case SO_ERROR:
764 v.val = -sock_error(sk); 768 v.val = -sock_error(sk);
765 if (v.val==0) 769 if (v.val == 0)
766 v.val = xchg(&sk->sk_err_soft, 0); 770 v.val = xchg(&sk->sk_err_soft, 0);
767 break; 771 break;
768 772
@@ -816,7 +820,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
816 break; 820 break;
817 821
818 case SO_RCVTIMEO: 822 case SO_RCVTIMEO:
819 lv=sizeof(struct timeval); 823 lv = sizeof(struct timeval);
820 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 824 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
821 v.tm.tv_sec = 0; 825 v.tm.tv_sec = 0;
822 v.tm.tv_usec = 0; 826 v.tm.tv_usec = 0;
@@ -827,7 +831,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
827 break; 831 break;
828 832
829 case SO_SNDTIMEO: 833 case SO_SNDTIMEO:
830 lv=sizeof(struct timeval); 834 lv = sizeof(struct timeval);
831 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { 835 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
832 v.tm.tv_sec = 0; 836 v.tm.tv_sec = 0;
833 v.tm.tv_usec = 0; 837 v.tm.tv_usec = 0;
@@ -842,7 +846,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
842 break; 846 break;
843 847
844 case SO_SNDLOWAT: 848 case SO_SNDLOWAT:
845 v.val=1; 849 v.val = 1;
846 break; 850 break;
847 851
848 case SO_PASSCRED: 852 case SO_PASSCRED:
@@ -941,6 +945,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
941 sk = kmalloc(prot->obj_size, priority); 945 sk = kmalloc(prot->obj_size, priority);
942 946
943 if (sk != NULL) { 947 if (sk != NULL) {
948 kmemcheck_annotate_bitfield(sk, flags);
949
944 if (security_sk_alloc(sk, family, priority)) 950 if (security_sk_alloc(sk, family, priority))
945 goto out_free; 951 goto out_free;
946 952
@@ -1002,8 +1008,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1002 1008
1003 return sk; 1009 return sk;
1004} 1010}
1011EXPORT_SYMBOL(sk_alloc);
1005 1012
1006void sk_free(struct sock *sk) 1013static void __sk_free(struct sock *sk)
1007{ 1014{
1008 struct sk_filter *filter; 1015 struct sk_filter *filter;
1009 1016
@@ -1027,6 +1034,18 @@ void sk_free(struct sock *sk)
1027 sk_prot_free(sk->sk_prot_creator, sk); 1034 sk_prot_free(sk->sk_prot_creator, sk);
1028} 1035}
1029 1036
1037void sk_free(struct sock *sk)
1038{
1039 /*
1040 * We substract one from sk_wmem_alloc and can know if
1041 * some packets are still in some tx queue.
1042 * If not null, sock_wfree() will call __sk_free(sk) later
1043 */
1044 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1045 __sk_free(sk);
1046}
1047EXPORT_SYMBOL(sk_free);
1048
1030/* 1049/*
1031 * Last sock_put should drop referrence to sk->sk_net. It has already 1050 * Last sock_put should drop referrence to sk->sk_net. It has already
1032 * been dropped in sk_change_net. Taking referrence to stopping namespace 1051 * been dropped in sk_change_net. Taking referrence to stopping namespace
@@ -1065,7 +1084,10 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1065 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; 1084 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1066 1085
1067 atomic_set(&newsk->sk_rmem_alloc, 0); 1086 atomic_set(&newsk->sk_rmem_alloc, 0);
1068 atomic_set(&newsk->sk_wmem_alloc, 0); 1087 /*
1088 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1089 */
1090 atomic_set(&newsk->sk_wmem_alloc, 1);
1069 atomic_set(&newsk->sk_omem_alloc, 0); 1091 atomic_set(&newsk->sk_omem_alloc, 0);
1070 skb_queue_head_init(&newsk->sk_receive_queue); 1092 skb_queue_head_init(&newsk->sk_receive_queue);
1071 skb_queue_head_init(&newsk->sk_write_queue); 1093 skb_queue_head_init(&newsk->sk_write_queue);
@@ -1126,7 +1148,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1126out: 1148out:
1127 return newsk; 1149 return newsk;
1128} 1150}
1129
1130EXPORT_SYMBOL_GPL(sk_clone); 1151EXPORT_SYMBOL_GPL(sk_clone);
1131 1152
1132void sk_setup_caps(struct sock *sk, struct dst_entry *dst) 1153void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -1170,13 +1191,20 @@ void __init sk_init(void)
1170void sock_wfree(struct sk_buff *skb) 1191void sock_wfree(struct sk_buff *skb)
1171{ 1192{
1172 struct sock *sk = skb->sk; 1193 struct sock *sk = skb->sk;
1194 int res;
1173 1195
1174 /* In case it might be waiting for more memory. */ 1196 /* In case it might be waiting for more memory. */
1175 atomic_sub(skb->truesize, &sk->sk_wmem_alloc); 1197 res = atomic_sub_return(skb->truesize, &sk->sk_wmem_alloc);
1176 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) 1198 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1177 sk->sk_write_space(sk); 1199 sk->sk_write_space(sk);
1178 sock_put(sk); 1200 /*
1201 * if sk_wmem_alloc reached 0, we are last user and should
1202 * free this sock, as sk_free() call could not do it.
1203 */
1204 if (res == 0)
1205 __sk_free(sk);
1179} 1206}
1207EXPORT_SYMBOL(sock_wfree);
1180 1208
1181/* 1209/*
1182 * Read buffer destructor automatically called from kfree_skb. 1210 * Read buffer destructor automatically called from kfree_skb.
@@ -1188,6 +1216,7 @@ void sock_rfree(struct sk_buff *skb)
1188 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1216 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1189 sk_mem_uncharge(skb->sk, skb->truesize); 1217 sk_mem_uncharge(skb->sk, skb->truesize);
1190} 1218}
1219EXPORT_SYMBOL(sock_rfree);
1191 1220
1192 1221
1193int sock_i_uid(struct sock *sk) 1222int sock_i_uid(struct sock *sk)
@@ -1199,6 +1228,7 @@ int sock_i_uid(struct sock *sk)
1199 read_unlock(&sk->sk_callback_lock); 1228 read_unlock(&sk->sk_callback_lock);
1200 return uid; 1229 return uid;
1201} 1230}
1231EXPORT_SYMBOL(sock_i_uid);
1202 1232
1203unsigned long sock_i_ino(struct sock *sk) 1233unsigned long sock_i_ino(struct sock *sk)
1204{ 1234{
@@ -1209,6 +1239,7 @@ unsigned long sock_i_ino(struct sock *sk)
1209 read_unlock(&sk->sk_callback_lock); 1239 read_unlock(&sk->sk_callback_lock);
1210 return ino; 1240 return ino;
1211} 1241}
1242EXPORT_SYMBOL(sock_i_ino);
1212 1243
1213/* 1244/*
1214 * Allocate a skb from the socket's send buffer. 1245 * Allocate a skb from the socket's send buffer.
@@ -1217,7 +1248,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1217 gfp_t priority) 1248 gfp_t priority)
1218{ 1249{
1219 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 1250 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1220 struct sk_buff * skb = alloc_skb(size, priority); 1251 struct sk_buff *skb = alloc_skb(size, priority);
1221 if (skb) { 1252 if (skb) {
1222 skb_set_owner_w(skb, sk); 1253 skb_set_owner_w(skb, sk);
1223 return skb; 1254 return skb;
@@ -1225,6 +1256,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1225 } 1256 }
1226 return NULL; 1257 return NULL;
1227} 1258}
1259EXPORT_SYMBOL(sock_wmalloc);
1228 1260
1229/* 1261/*
1230 * Allocate a skb from the socket's receive buffer. 1262 * Allocate a skb from the socket's receive buffer.
@@ -1261,6 +1293,7 @@ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1261 } 1293 }
1262 return NULL; 1294 return NULL;
1263} 1295}
1296EXPORT_SYMBOL(sock_kmalloc);
1264 1297
1265/* 1298/*
1266 * Free an option memory block. 1299 * Free an option memory block.
@@ -1270,11 +1303,12 @@ void sock_kfree_s(struct sock *sk, void *mem, int size)
1270 kfree(mem); 1303 kfree(mem);
1271 atomic_sub(size, &sk->sk_omem_alloc); 1304 atomic_sub(size, &sk->sk_omem_alloc);
1272} 1305}
1306EXPORT_SYMBOL(sock_kfree_s);
1273 1307
1274/* It is almost wait_for_tcp_memory minus release_sock/lock_sock. 1308/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1275 I think, these locks should be removed for datagram sockets. 1309 I think, these locks should be removed for datagram sockets.
1276 */ 1310 */
1277static long sock_wait_for_wmem(struct sock * sk, long timeo) 1311static long sock_wait_for_wmem(struct sock *sk, long timeo)
1278{ 1312{
1279 DEFINE_WAIT(wait); 1313 DEFINE_WAIT(wait);
1280 1314
@@ -1392,6 +1426,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1392{ 1426{
1393 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); 1427 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1394} 1428}
1429EXPORT_SYMBOL(sock_alloc_send_skb);
1395 1430
1396static void __lock_sock(struct sock *sk) 1431static void __lock_sock(struct sock *sk)
1397{ 1432{
@@ -1460,7 +1495,6 @@ int sk_wait_data(struct sock *sk, long *timeo)
1460 finish_wait(sk->sk_sleep, &wait); 1495 finish_wait(sk->sk_sleep, &wait);
1461 return rc; 1496 return rc;
1462} 1497}
1463
1464EXPORT_SYMBOL(sk_wait_data); 1498EXPORT_SYMBOL(sk_wait_data);
1465 1499
1466/** 1500/**
@@ -1541,7 +1575,6 @@ suppress_allocation:
1541 atomic_sub(amt, prot->memory_allocated); 1575 atomic_sub(amt, prot->memory_allocated);
1542 return 0; 1576 return 0;
1543} 1577}
1544
1545EXPORT_SYMBOL(__sk_mem_schedule); 1578EXPORT_SYMBOL(__sk_mem_schedule);
1546 1579
1547/** 1580/**
@@ -1560,7 +1593,6 @@ void __sk_mem_reclaim(struct sock *sk)
1560 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) 1593 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1561 *prot->memory_pressure = 0; 1594 *prot->memory_pressure = 0;
1562} 1595}
1563
1564EXPORT_SYMBOL(__sk_mem_reclaim); 1596EXPORT_SYMBOL(__sk_mem_reclaim);
1565 1597
1566 1598
@@ -1575,78 +1607,92 @@ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1575{ 1607{
1576 return -EOPNOTSUPP; 1608 return -EOPNOTSUPP;
1577} 1609}
1610EXPORT_SYMBOL(sock_no_bind);
1578 1611
1579int sock_no_connect(struct socket *sock, struct sockaddr *saddr, 1612int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1580 int len, int flags) 1613 int len, int flags)
1581{ 1614{
1582 return -EOPNOTSUPP; 1615 return -EOPNOTSUPP;
1583} 1616}
1617EXPORT_SYMBOL(sock_no_connect);
1584 1618
1585int sock_no_socketpair(struct socket *sock1, struct socket *sock2) 1619int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1586{ 1620{
1587 return -EOPNOTSUPP; 1621 return -EOPNOTSUPP;
1588} 1622}
1623EXPORT_SYMBOL(sock_no_socketpair);
1589 1624
1590int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) 1625int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1591{ 1626{
1592 return -EOPNOTSUPP; 1627 return -EOPNOTSUPP;
1593} 1628}
1629EXPORT_SYMBOL(sock_no_accept);
1594 1630
1595int sock_no_getname(struct socket *sock, struct sockaddr *saddr, 1631int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1596 int *len, int peer) 1632 int *len, int peer)
1597{ 1633{
1598 return -EOPNOTSUPP; 1634 return -EOPNOTSUPP;
1599} 1635}
1636EXPORT_SYMBOL(sock_no_getname);
1600 1637
1601unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) 1638unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
1602{ 1639{
1603 return 0; 1640 return 0;
1604} 1641}
1642EXPORT_SYMBOL(sock_no_poll);
1605 1643
1606int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 1644int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1607{ 1645{
1608 return -EOPNOTSUPP; 1646 return -EOPNOTSUPP;
1609} 1647}
1648EXPORT_SYMBOL(sock_no_ioctl);
1610 1649
1611int sock_no_listen(struct socket *sock, int backlog) 1650int sock_no_listen(struct socket *sock, int backlog)
1612{ 1651{
1613 return -EOPNOTSUPP; 1652 return -EOPNOTSUPP;
1614} 1653}
1654EXPORT_SYMBOL(sock_no_listen);
1615 1655
1616int sock_no_shutdown(struct socket *sock, int how) 1656int sock_no_shutdown(struct socket *sock, int how)
1617{ 1657{
1618 return -EOPNOTSUPP; 1658 return -EOPNOTSUPP;
1619} 1659}
1660EXPORT_SYMBOL(sock_no_shutdown);
1620 1661
1621int sock_no_setsockopt(struct socket *sock, int level, int optname, 1662int sock_no_setsockopt(struct socket *sock, int level, int optname,
1622 char __user *optval, int optlen) 1663 char __user *optval, int optlen)
1623{ 1664{
1624 return -EOPNOTSUPP; 1665 return -EOPNOTSUPP;
1625} 1666}
1667EXPORT_SYMBOL(sock_no_setsockopt);
1626 1668
1627int sock_no_getsockopt(struct socket *sock, int level, int optname, 1669int sock_no_getsockopt(struct socket *sock, int level, int optname,
1628 char __user *optval, int __user *optlen) 1670 char __user *optval, int __user *optlen)
1629{ 1671{
1630 return -EOPNOTSUPP; 1672 return -EOPNOTSUPP;
1631} 1673}
1674EXPORT_SYMBOL(sock_no_getsockopt);
1632 1675
1633int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1676int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1634 size_t len) 1677 size_t len)
1635{ 1678{
1636 return -EOPNOTSUPP; 1679 return -EOPNOTSUPP;
1637} 1680}
1681EXPORT_SYMBOL(sock_no_sendmsg);
1638 1682
1639int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, 1683int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1640 size_t len, int flags) 1684 size_t len, int flags)
1641{ 1685{
1642 return -EOPNOTSUPP; 1686 return -EOPNOTSUPP;
1643} 1687}
1688EXPORT_SYMBOL(sock_no_recvmsg);
1644 1689
1645int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) 1690int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1646{ 1691{
1647 /* Mirror missing mmap method error code */ 1692 /* Mirror missing mmap method error code */
1648 return -ENODEV; 1693 return -ENODEV;
1649} 1694}
1695EXPORT_SYMBOL(sock_no_mmap);
1650 1696
1651ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 1697ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1652{ 1698{
@@ -1660,6 +1706,7 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz
1660 kunmap(page); 1706 kunmap(page);
1661 return res; 1707 return res;
1662} 1708}
1709EXPORT_SYMBOL(sock_no_sendpage);
1663 1710
1664/* 1711/*
1665 * Default Socket Callbacks 1712 * Default Socket Callbacks
@@ -1723,6 +1770,7 @@ void sk_send_sigurg(struct sock *sk)
1723 if (send_sigurg(&sk->sk_socket->file->f_owner)) 1770 if (send_sigurg(&sk->sk_socket->file->f_owner))
1724 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); 1771 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
1725} 1772}
1773EXPORT_SYMBOL(sk_send_sigurg);
1726 1774
1727void sk_reset_timer(struct sock *sk, struct timer_list* timer, 1775void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1728 unsigned long expires) 1776 unsigned long expires)
@@ -1730,7 +1778,6 @@ void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1730 if (!mod_timer(timer, expires)) 1778 if (!mod_timer(timer, expires))
1731 sock_hold(sk); 1779 sock_hold(sk);
1732} 1780}
1733
1734EXPORT_SYMBOL(sk_reset_timer); 1781EXPORT_SYMBOL(sk_reset_timer);
1735 1782
1736void sk_stop_timer(struct sock *sk, struct timer_list* timer) 1783void sk_stop_timer(struct sock *sk, struct timer_list* timer)
@@ -1738,7 +1785,6 @@ void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1738 if (timer_pending(timer) && del_timer(timer)) 1785 if (timer_pending(timer) && del_timer(timer))
1739 __sock_put(sk); 1786 __sock_put(sk);
1740} 1787}
1741
1742EXPORT_SYMBOL(sk_stop_timer); 1788EXPORT_SYMBOL(sk_stop_timer);
1743 1789
1744void sock_init_data(struct socket *sock, struct sock *sk) 1790void sock_init_data(struct socket *sock, struct sock *sk)
@@ -1795,8 +1841,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1795 sk->sk_stamp = ktime_set(-1L, 0); 1841 sk->sk_stamp = ktime_set(-1L, 0);
1796 1842
1797 atomic_set(&sk->sk_refcnt, 1); 1843 atomic_set(&sk->sk_refcnt, 1);
1844 atomic_set(&sk->sk_wmem_alloc, 1);
1798 atomic_set(&sk->sk_drops, 0); 1845 atomic_set(&sk->sk_drops, 0);
1799} 1846}
1847EXPORT_SYMBOL(sock_init_data);
1800 1848
1801void lock_sock_nested(struct sock *sk, int subclass) 1849void lock_sock_nested(struct sock *sk, int subclass)
1802{ 1850{
@@ -1812,7 +1860,6 @@ void lock_sock_nested(struct sock *sk, int subclass)
1812 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 1860 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1813 local_bh_enable(); 1861 local_bh_enable();
1814} 1862}
1815
1816EXPORT_SYMBOL(lock_sock_nested); 1863EXPORT_SYMBOL(lock_sock_nested);
1817 1864
1818void release_sock(struct sock *sk) 1865void release_sock(struct sock *sk)
@@ -1895,7 +1942,6 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname,
1895 1942
1896 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); 1943 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1897} 1944}
1898
1899EXPORT_SYMBOL(sock_common_getsockopt); 1945EXPORT_SYMBOL(sock_common_getsockopt);
1900 1946
1901#ifdef CONFIG_COMPAT 1947#ifdef CONFIG_COMPAT
@@ -1925,7 +1971,6 @@ int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1925 msg->msg_namelen = addr_len; 1971 msg->msg_namelen = addr_len;
1926 return err; 1972 return err;
1927} 1973}
1928
1929EXPORT_SYMBOL(sock_common_recvmsg); 1974EXPORT_SYMBOL(sock_common_recvmsg);
1930 1975
1931/* 1976/*
@@ -1938,7 +1983,6 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname,
1938 1983
1939 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); 1984 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1940} 1985}
1941
1942EXPORT_SYMBOL(sock_common_setsockopt); 1986EXPORT_SYMBOL(sock_common_setsockopt);
1943 1987
1944#ifdef CONFIG_COMPAT 1988#ifdef CONFIG_COMPAT
@@ -1989,7 +2033,6 @@ void sk_common_release(struct sock *sk)
1989 sk_refcnt_debug_release(sk); 2033 sk_refcnt_debug_release(sk);
1990 sock_put(sk); 2034 sock_put(sk);
1991} 2035}
1992
1993EXPORT_SYMBOL(sk_common_release); 2036EXPORT_SYMBOL(sk_common_release);
1994 2037
1995static DEFINE_RWLOCK(proto_list_lock); 2038static DEFINE_RWLOCK(proto_list_lock);
@@ -2171,7 +2214,6 @@ out_free_sock_slab:
2171out: 2214out:
2172 return -ENOBUFS; 2215 return -ENOBUFS;
2173} 2216}
2174
2175EXPORT_SYMBOL(proto_register); 2217EXPORT_SYMBOL(proto_register);
2176 2218
2177void proto_unregister(struct proto *prot) 2219void proto_unregister(struct proto *prot)
@@ -2198,7 +2240,6 @@ void proto_unregister(struct proto *prot)
2198 prot->twsk_prot->twsk_slab = NULL; 2240 prot->twsk_prot->twsk_slab = NULL;
2199 } 2241 }
2200} 2242}
2201
2202EXPORT_SYMBOL(proto_unregister); 2243EXPORT_SYMBOL(proto_unregister);
2203 2244
2204#ifdef CONFIG_PROC_FS 2245#ifdef CONFIG_PROC_FS
@@ -2324,33 +2365,3 @@ static int __init proto_init(void)
2324subsys_initcall(proto_init); 2365subsys_initcall(proto_init);
2325 2366
2326#endif /* PROC_FS */ 2367#endif /* PROC_FS */
2327
2328EXPORT_SYMBOL(sk_alloc);
2329EXPORT_SYMBOL(sk_free);
2330EXPORT_SYMBOL(sk_send_sigurg);
2331EXPORT_SYMBOL(sock_alloc_send_skb);
2332EXPORT_SYMBOL(sock_init_data);
2333EXPORT_SYMBOL(sock_kfree_s);
2334EXPORT_SYMBOL(sock_kmalloc);
2335EXPORT_SYMBOL(sock_no_accept);
2336EXPORT_SYMBOL(sock_no_bind);
2337EXPORT_SYMBOL(sock_no_connect);
2338EXPORT_SYMBOL(sock_no_getname);
2339EXPORT_SYMBOL(sock_no_getsockopt);
2340EXPORT_SYMBOL(sock_no_ioctl);
2341EXPORT_SYMBOL(sock_no_listen);
2342EXPORT_SYMBOL(sock_no_mmap);
2343EXPORT_SYMBOL(sock_no_poll);
2344EXPORT_SYMBOL(sock_no_recvmsg);
2345EXPORT_SYMBOL(sock_no_sendmsg);
2346EXPORT_SYMBOL(sock_no_sendpage);
2347EXPORT_SYMBOL(sock_no_setsockopt);
2348EXPORT_SYMBOL(sock_no_shutdown);
2349EXPORT_SYMBOL(sock_no_socketpair);
2350EXPORT_SYMBOL(sock_rfree);
2351EXPORT_SYMBOL(sock_setsockopt);
2352EXPORT_SYMBOL(sock_wfree);
2353EXPORT_SYMBOL(sock_wmalloc);
2354EXPORT_SYMBOL(sock_i_uid);
2355EXPORT_SYMBOL(sock_i_ino);
2356EXPORT_SYMBOL(sysctl_optmem_max);
diff --git a/net/core/stream.c b/net/core/stream.c
index 8727cead64ad..a37debfeb1b2 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -33,7 +33,8 @@ void sk_stream_write_space(struct sock *sk)
33 clear_bit(SOCK_NOSPACE, &sock->flags); 33 clear_bit(SOCK_NOSPACE, &sock->flags);
34 34
35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
36 wake_up_interruptible(sk->sk_sleep); 36 wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
37 POLLWRNORM | POLLWRBAND);
37 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 38 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
38 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 39 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
39 } 40 }
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 164b090d5ac3..25d717ebc92e 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -51,6 +51,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
51{ 51{
52 int start = skb_headlen(skb); 52 int start = skb_headlen(skb);
53 int i, copy = start - offset; 53 int i, copy = start - offset;
54 struct sk_buff *frag_iter;
54 dma_cookie_t cookie = 0; 55 dma_cookie_t cookie = 0;
55 56
56 /* Copy header. */ 57 /* Copy header. */
@@ -94,31 +95,28 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
94 start = end; 95 start = end;
95 } 96 }
96 97
97 if (skb_shinfo(skb)->frag_list) { 98 skb_walk_frags(skb, frag_iter) {
98 struct sk_buff *list = skb_shinfo(skb)->frag_list; 99 int end;
99 100
100 for (; list; list = list->next) { 101 WARN_ON(start > offset + len);
101 int end; 102
102 103 end = start + frag_iter->len;
103 WARN_ON(start > offset + len); 104 copy = end - offset;
104 105 if (copy > 0) {
105 end = start + list->len; 106 if (copy > len)
106 copy = end - offset; 107 copy = len;
107 if (copy > 0) { 108 cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
108 if (copy > len) 109 offset - start,
109 copy = len; 110 to, copy,
110 cookie = dma_skb_copy_datagram_iovec(chan, list, 111 pinned_list);
111 offset - start, to, copy, 112 if (cookie < 0)
112 pinned_list); 113 goto fault;
113 if (cookie < 0) 114 len -= copy;
114 goto fault; 115 if (len == 0)
115 len -= copy; 116 goto end;
116 if (len == 0) 117 offset += copy;
117 goto end;
118 offset += copy;
119 }
120 start = end;
121 } 118 }
119 start = end;
122 } 120 }
123 121
124end: 122end: