aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 07:25:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-25 07:25:22 -0400
commit8a9ea3237e7eb5c25f09e429ad242ae5a3d5ea22 (patch)
treea0a63398a9983667d52cbbbf4e2405b4f22b1d83 /net/core
parent1be025d3cb40cd295123af2c394f7229ef9b30ca (diff)
parent8b3408f8ee994973869d8ba32c5bf482bc4ddca4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1745 commits) dp83640: free packet queues on remove dp83640: use proper function to free transmit time stamping packets ipv6: Do not use routes from locally generated RAs |PATCH net-next] tg3: add tx_dropped counter be2net: don't create multiple RX/TX rings in multi channel mode be2net: don't create multiple TXQs in BE2 be2net: refactor VF setup/teardown code into be_vf_setup/clear() be2net: add vlan/rx-mode/flow-control config to be_setup() net_sched: cls_flow: use skb_header_pointer() ipv4: avoid useless call of the function check_peer_pmtu TCP: remove TCP_DEBUG net: Fix driver name for mdio-gpio.c ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT rtnetlink: Add missing manual netlink notification in dev_change_net_namespaces ipv4: fix ipsec forward performance regression jme: fix irq storm after suspend/resume route: fix ICMP redirect validation net: hold sock reference while processing tx timestamps tcp: md5: add more const attributes Add ethtool -g support to virtio_net ... Fix up conflicts in: - drivers/net/Kconfig: The split-up generated a trivial conflict with removal of a stale reference to Documentation/networking/net-modules.txt. Remove it from the new location instead. - fs/sysfs/dir.c: Fairly nasty conflicts with the sysfs rb-tree usage, conflicting with Eric Biederman's changes for tagged directories.
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c24
-rw-r--r--net/core/dev.c336
-rw-r--r--net/core/dev_addr_lists.c4
-rw-r--r--net/core/dst.c15
-rw-r--r--net/core/ethtool.c20
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c4
-rw-r--r--net/core/flow.c14
-rw-r--r--net/core/kmap_skb.h2
-rw-r--r--net/core/link_watch.c9
-rw-r--r--net/core/neighbour.c44
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/pktgen.c25
-rw-r--r--net/core/rtnetlink.c34
-rw-r--r--net/core/scm.c10
-rw-r--r--net/core/secure_seq.c2
-rw-r--r--net/core/skbuff.c174
-rw-r--r--net/core/sock.c23
-rw-r--r--net/core/timestamping.c12
-rw-r--r--net/core/user_dma.c6
21 files changed, 515 insertions, 263 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 18ac112ea7ae..68bbf9f65cb0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -324,15 +324,15 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
324 /* Copy paged appendix. Hmm... why does this look so complicated? */ 324 /* Copy paged appendix. Hmm... why does this look so complicated? */
325 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 325 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
326 int end; 326 int end;
327 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
327 328
328 WARN_ON(start > offset + len); 329 WARN_ON(start > offset + len);
329 330
330 end = start + skb_shinfo(skb)->frags[i].size; 331 end = start + skb_frag_size(frag);
331 if ((copy = end - offset) > 0) { 332 if ((copy = end - offset) > 0) {
332 int err; 333 int err;
333 u8 *vaddr; 334 u8 *vaddr;
334 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 335 struct page *page = skb_frag_page(frag);
335 struct page *page = frag->page;
336 336
337 if (copy > len) 337 if (copy > len)
338 copy = len; 338 copy = len;
@@ -410,15 +410,15 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
410 /* Copy paged appendix. Hmm... why does this look so complicated? */ 410 /* Copy paged appendix. Hmm... why does this look so complicated? */
411 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 411 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
412 int end; 412 int end;
413 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
413 414
414 WARN_ON(start > offset + len); 415 WARN_ON(start > offset + len);
415 416
416 end = start + skb_shinfo(skb)->frags[i].size; 417 end = start + skb_frag_size(frag);
417 if ((copy = end - offset) > 0) { 418 if ((copy = end - offset) > 0) {
418 int err; 419 int err;
419 u8 *vaddr; 420 u8 *vaddr;
420 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 421 struct page *page = skb_frag_page(frag);
421 struct page *page = frag->page;
422 422
423 if (copy > len) 423 if (copy > len)
424 copy = len; 424 copy = len;
@@ -500,15 +500,15 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
500 /* Copy paged appendix. Hmm... why does this look so complicated? */ 500 /* Copy paged appendix. Hmm... why does this look so complicated? */
501 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 501 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
502 int end; 502 int end;
503 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
503 504
504 WARN_ON(start > offset + len); 505 WARN_ON(start > offset + len);
505 506
506 end = start + skb_shinfo(skb)->frags[i].size; 507 end = start + skb_frag_size(frag);
507 if ((copy = end - offset) > 0) { 508 if ((copy = end - offset) > 0) {
508 int err; 509 int err;
509 u8 *vaddr; 510 u8 *vaddr;
510 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 511 struct page *page = skb_frag_page(frag);
511 struct page *page = frag->page;
512 512
513 if (copy > len) 513 if (copy > len)
514 copy = len; 514 copy = len;
@@ -585,16 +585,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
585 585
586 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 586 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
587 int end; 587 int end;
588 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
588 589
589 WARN_ON(start > offset + len); 590 WARN_ON(start > offset + len);
590 591
591 end = start + skb_shinfo(skb)->frags[i].size; 592 end = start + skb_frag_size(frag);
592 if ((copy = end - offset) > 0) { 593 if ((copy = end - offset) > 0) {
593 __wsum csum2; 594 __wsum csum2;
594 int err = 0; 595 int err = 0;
595 u8 *vaddr; 596 u8 *vaddr;
596 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 597 struct page *page = skb_frag_page(frag);
597 struct page *page = frag->page;
598 598
599 if (copy > len) 599 if (copy > len)
600 copy = len; 600 copy = len;
diff --git a/net/core/dev.c b/net/core/dev.c
index 231d3125bf26..edcf019c056d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -133,6 +133,10 @@
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h> 134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h> 135#include <linux/cpu_rmap.h>
136#include <linux/if_tunnel.h>
137#include <linux/if_pppox.h>
138#include <linux/ppp_defs.h>
139#include <linux/net_tstamp.h>
136 140
137#include "net-sysfs.h" 141#include "net-sysfs.h"
138 142
@@ -1474,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb)
1474 __net_timestamp(skb); 1478 __net_timestamp(skb);
1475} 1479}
1476 1480
1481static int net_hwtstamp_validate(struct ifreq *ifr)
1482{
1483 struct hwtstamp_config cfg;
1484 enum hwtstamp_tx_types tx_type;
1485 enum hwtstamp_rx_filters rx_filter;
1486 int tx_type_valid = 0;
1487 int rx_filter_valid = 0;
1488
1489 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1490 return -EFAULT;
1491
1492 if (cfg.flags) /* reserved for future extensions */
1493 return -EINVAL;
1494
1495 tx_type = cfg.tx_type;
1496 rx_filter = cfg.rx_filter;
1497
1498 switch (tx_type) {
1499 case HWTSTAMP_TX_OFF:
1500 case HWTSTAMP_TX_ON:
1501 case HWTSTAMP_TX_ONESTEP_SYNC:
1502 tx_type_valid = 1;
1503 break;
1504 }
1505
1506 switch (rx_filter) {
1507 case HWTSTAMP_FILTER_NONE:
1508 case HWTSTAMP_FILTER_ALL:
1509 case HWTSTAMP_FILTER_SOME:
1510 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1511 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1512 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1513 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1514 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1515 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1516 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1517 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1518 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1519 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1520 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1521 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1522 rx_filter_valid = 1;
1523 break;
1524 }
1525
1526 if (!tx_type_valid || !rx_filter_valid)
1527 return -ERANGE;
1528
1529 return 0;
1530}
1531
1477static inline bool is_skb_forwardable(struct net_device *dev, 1532static inline bool is_skb_forwardable(struct net_device *dev,
1478 struct sk_buff *skb) 1533 struct sk_buff *skb)
1479{ 1534{
@@ -1955,9 +2010,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1955#ifdef CONFIG_HIGHMEM 2010#ifdef CONFIG_HIGHMEM
1956 int i; 2011 int i;
1957 if (!(dev->features & NETIF_F_HIGHDMA)) { 2012 if (!(dev->features & NETIF_F_HIGHDMA)) {
1958 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 2013 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1959 if (PageHighMem(skb_shinfo(skb)->frags[i].page)) 2014 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2015 if (PageHighMem(skb_frag_page(frag)))
1960 return 1; 2016 return 1;
2017 }
1961 } 2018 }
1962 2019
1963 if (PCI_DMA_BUS_IS_PHYS) { 2020 if (PCI_DMA_BUS_IS_PHYS) {
@@ -1966,7 +2023,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1966 if (!pdev) 2023 if (!pdev)
1967 return 0; 2024 return 0;
1968 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2025 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1969 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); 2026 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2027 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
1970 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) 2028 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1971 return 1; 2029 return 1;
1972 } 2030 }
@@ -2527,25 +2585,31 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2527 2585
2528/* 2586/*
2529 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses 2587 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2530 * and src/dst port numbers. Returns a non-zero hash number on success 2588 * and src/dst port numbers. Sets rxhash in skb to non-zero hash value
2531 * and 0 on failure. 2589 * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb
2590 * if hash is a canonical 4-tuple hash over transport ports.
2532 */ 2591 */
2533__u32 __skb_get_rxhash(struct sk_buff *skb) 2592void __skb_get_rxhash(struct sk_buff *skb)
2534{ 2593{
2535 int nhoff, hash = 0, poff; 2594 int nhoff, hash = 0, poff;
2536 const struct ipv6hdr *ip6; 2595 const struct ipv6hdr *ip6;
2537 const struct iphdr *ip; 2596 const struct iphdr *ip;
2597 const struct vlan_hdr *vlan;
2538 u8 ip_proto; 2598 u8 ip_proto;
2539 u32 addr1, addr2, ihl; 2599 u32 addr1, addr2;
2600 u16 proto;
2540 union { 2601 union {
2541 u32 v32; 2602 u32 v32;
2542 u16 v16[2]; 2603 u16 v16[2];
2543 } ports; 2604 } ports;
2544 2605
2545 nhoff = skb_network_offset(skb); 2606 nhoff = skb_network_offset(skb);
2607 proto = skb->protocol;
2546 2608
2547 switch (skb->protocol) { 2609again:
2610 switch (proto) {
2548 case __constant_htons(ETH_P_IP): 2611 case __constant_htons(ETH_P_IP):
2612ip:
2549 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) 2613 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2550 goto done; 2614 goto done;
2551 2615
@@ -2556,9 +2620,10 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2556 ip_proto = ip->protocol; 2620 ip_proto = ip->protocol;
2557 addr1 = (__force u32) ip->saddr; 2621 addr1 = (__force u32) ip->saddr;
2558 addr2 = (__force u32) ip->daddr; 2622 addr2 = (__force u32) ip->daddr;
2559 ihl = ip->ihl; 2623 nhoff += ip->ihl * 4;
2560 break; 2624 break;
2561 case __constant_htons(ETH_P_IPV6): 2625 case __constant_htons(ETH_P_IPV6):
2626ipv6:
2562 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) 2627 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2563 goto done; 2628 goto done;
2564 2629
@@ -2566,20 +2631,71 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2566 ip_proto = ip6->nexthdr; 2631 ip_proto = ip6->nexthdr;
2567 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2632 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2568 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2633 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2569 ihl = (40 >> 2); 2634 nhoff += 40;
2570 break; 2635 break;
2636 case __constant_htons(ETH_P_8021Q):
2637 if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
2638 goto done;
2639 vlan = (const struct vlan_hdr *) (skb->data + nhoff);
2640 proto = vlan->h_vlan_encapsulated_proto;
2641 nhoff += sizeof(*vlan);
2642 goto again;
2643 case __constant_htons(ETH_P_PPP_SES):
2644 if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
2645 goto done;
2646 proto = *((__be16 *) (skb->data + nhoff +
2647 sizeof(struct pppoe_hdr)));
2648 nhoff += PPPOE_SES_HLEN;
2649 switch (proto) {
2650 case __constant_htons(PPP_IP):
2651 goto ip;
2652 case __constant_htons(PPP_IPV6):
2653 goto ipv6;
2654 default:
2655 goto done;
2656 }
2571 default: 2657 default:
2572 goto done; 2658 goto done;
2573 } 2659 }
2574 2660
2661 switch (ip_proto) {
2662 case IPPROTO_GRE:
2663 if (pskb_may_pull(skb, nhoff + 16)) {
2664 u8 *h = skb->data + nhoff;
2665 __be16 flags = *(__be16 *)h;
2666
2667 /*
2668 * Only look inside GRE if version zero and no
2669 * routing
2670 */
2671 if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
2672 proto = *(__be16 *)(h + 2);
2673 nhoff += 4;
2674 if (flags & GRE_CSUM)
2675 nhoff += 4;
2676 if (flags & GRE_KEY)
2677 nhoff += 4;
2678 if (flags & GRE_SEQ)
2679 nhoff += 4;
2680 goto again;
2681 }
2682 }
2683 break;
2684 case IPPROTO_IPIP:
2685 goto again;
2686 default:
2687 break;
2688 }
2689
2575 ports.v32 = 0; 2690 ports.v32 = 0;
2576 poff = proto_ports_offset(ip_proto); 2691 poff = proto_ports_offset(ip_proto);
2577 if (poff >= 0) { 2692 if (poff >= 0) {
2578 nhoff += ihl * 4 + poff; 2693 nhoff += poff;
2579 if (pskb_may_pull(skb, nhoff + 4)) { 2694 if (pskb_may_pull(skb, nhoff + 4)) {
2580 ports.v32 = * (__force u32 *) (skb->data + nhoff); 2695 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2581 if (ports.v16[1] < ports.v16[0]) 2696 if (ports.v16[1] < ports.v16[0])
2582 swap(ports.v16[0], ports.v16[1]); 2697 swap(ports.v16[0], ports.v16[1]);
2698 skb->l4_rxhash = 1;
2583 } 2699 }
2584 } 2700 }
2585 2701
@@ -2592,7 +2708,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
2592 hash = 1; 2708 hash = 1;
2593 2709
2594done: 2710done:
2595 return hash; 2711 skb->rxhash = hash;
2596} 2712}
2597EXPORT_SYMBOL(__skb_get_rxhash); 2713EXPORT_SYMBOL(__skb_get_rxhash);
2598 2714
@@ -2606,10 +2722,7 @@ static struct rps_dev_flow *
2606set_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2722set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2607 struct rps_dev_flow *rflow, u16 next_cpu) 2723 struct rps_dev_flow *rflow, u16 next_cpu)
2608{ 2724{
2609 u16 tcpu; 2725 if (next_cpu != RPS_NO_CPU) {
2610
2611 tcpu = rflow->cpu = next_cpu;
2612 if (tcpu != RPS_NO_CPU) {
2613#ifdef CONFIG_RFS_ACCEL 2726#ifdef CONFIG_RFS_ACCEL
2614 struct netdev_rx_queue *rxqueue; 2727 struct netdev_rx_queue *rxqueue;
2615 struct rps_dev_flow_table *flow_table; 2728 struct rps_dev_flow_table *flow_table;
@@ -2637,16 +2750,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2637 goto out; 2750 goto out;
2638 old_rflow = rflow; 2751 old_rflow = rflow;
2639 rflow = &flow_table->flows[flow_id]; 2752 rflow = &flow_table->flows[flow_id];
2640 rflow->cpu = next_cpu;
2641 rflow->filter = rc; 2753 rflow->filter = rc;
2642 if (old_rflow->filter == rflow->filter) 2754 if (old_rflow->filter == rflow->filter)
2643 old_rflow->filter = RPS_NO_FILTER; 2755 old_rflow->filter = RPS_NO_FILTER;
2644 out: 2756 out:
2645#endif 2757#endif
2646 rflow->last_qtail = 2758 rflow->last_qtail =
2647 per_cpu(softnet_data, tcpu).input_queue_head; 2759 per_cpu(softnet_data, next_cpu).input_queue_head;
2648 } 2760 }
2649 2761
2762 rflow->cpu = next_cpu;
2650 return rflow; 2763 return rflow;
2651} 2764}
2652 2765
@@ -2681,13 +2794,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2681 map = rcu_dereference(rxqueue->rps_map); 2794 map = rcu_dereference(rxqueue->rps_map);
2682 if (map) { 2795 if (map) {
2683 if (map->len == 1 && 2796 if (map->len == 1 &&
2684 !rcu_dereference_raw(rxqueue->rps_flow_table)) { 2797 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2685 tcpu = map->cpus[0]; 2798 tcpu = map->cpus[0];
2686 if (cpu_online(tcpu)) 2799 if (cpu_online(tcpu))
2687 cpu = tcpu; 2800 cpu = tcpu;
2688 goto done; 2801 goto done;
2689 } 2802 }
2690 } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { 2803 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2691 goto done; 2804 goto done;
2692 } 2805 }
2693 2806
@@ -3102,8 +3215,8 @@ void netdev_rx_handler_unregister(struct net_device *dev)
3102{ 3215{
3103 3216
3104 ASSERT_RTNL(); 3217 ASSERT_RTNL();
3105 rcu_assign_pointer(dev->rx_handler, NULL); 3218 RCU_INIT_POINTER(dev->rx_handler, NULL);
3106 rcu_assign_pointer(dev->rx_handler_data, NULL); 3219 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3107} 3220}
3108EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); 3221EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3109 3222
@@ -3170,6 +3283,17 @@ another_round:
3170ncls: 3283ncls:
3171#endif 3284#endif
3172 3285
3286 if (vlan_tx_tag_present(skb)) {
3287 if (pt_prev) {
3288 ret = deliver_skb(skb, pt_prev, orig_dev);
3289 pt_prev = NULL;
3290 }
3291 if (vlan_do_receive(&skb))
3292 goto another_round;
3293 else if (unlikely(!skb))
3294 goto out;
3295 }
3296
3173 rx_handler = rcu_dereference(skb->dev->rx_handler); 3297 rx_handler = rcu_dereference(skb->dev->rx_handler);
3174 if (rx_handler) { 3298 if (rx_handler) {
3175 if (pt_prev) { 3299 if (pt_prev) {
@@ -3190,18 +3314,6 @@ ncls:
3190 } 3314 }
3191 } 3315 }
3192 3316
3193 if (vlan_tx_tag_present(skb)) {
3194 if (pt_prev) {
3195 ret = deliver_skb(skb, pt_prev, orig_dev);
3196 pt_prev = NULL;
3197 }
3198 if (vlan_do_receive(&skb)) {
3199 ret = __netif_receive_skb(skb);
3200 goto out;
3201 } else if (unlikely(!skb))
3202 goto out;
3203 }
3204
3205 /* deliver only exact match when indicated */ 3317 /* deliver only exact match when indicated */
3206 null_or_dev = deliver_exact ? skb->dev : NULL; 3318 null_or_dev = deliver_exact ? skb->dev : NULL;
3207 3319
@@ -3429,10 +3541,10 @@ pull:
3429 skb->data_len -= grow; 3541 skb->data_len -= grow;
3430 3542
3431 skb_shinfo(skb)->frags[0].page_offset += grow; 3543 skb_shinfo(skb)->frags[0].page_offset += grow;
3432 skb_shinfo(skb)->frags[0].size -= grow; 3544 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3433 3545
3434 if (unlikely(!skb_shinfo(skb)->frags[0].size)) { 3546 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3435 put_page(skb_shinfo(skb)->frags[0].page); 3547 skb_frag_unref(skb, 0);
3436 memmove(skb_shinfo(skb)->frags, 3548 memmove(skb_shinfo(skb)->frags,
3437 skb_shinfo(skb)->frags + 1, 3549 skb_shinfo(skb)->frags + 1,
3438 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 3550 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -3496,11 +3608,10 @@ void skb_gro_reset_offset(struct sk_buff *skb)
3496 NAPI_GRO_CB(skb)->frag0_len = 0; 3608 NAPI_GRO_CB(skb)->frag0_len = 0;
3497 3609
3498 if (skb->mac_header == skb->tail && 3610 if (skb->mac_header == skb->tail &&
3499 !PageHighMem(skb_shinfo(skb)->frags[0].page)) { 3611 !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3500 NAPI_GRO_CB(skb)->frag0 = 3612 NAPI_GRO_CB(skb)->frag0 =
3501 page_address(skb_shinfo(skb)->frags[0].page) + 3613 skb_frag_address(&skb_shinfo(skb)->frags[0]);
3502 skb_shinfo(skb)->frags[0].page_offset; 3614 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
3503 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
3504 } 3615 }
3505} 3616}
3506EXPORT_SYMBOL(skb_gro_reset_offset); 3617EXPORT_SYMBOL(skb_gro_reset_offset);
@@ -3982,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg)
3982} 4093}
3983 4094
3984#ifdef CONFIG_PROC_FS 4095#ifdef CONFIG_PROC_FS
4096
4097#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
4098
4099struct dev_iter_state {
4100 struct seq_net_private p;
4101 unsigned int pos; /* bucket << BUCKET_SPACE + offset */
4102};
4103
4104#define get_bucket(x) ((x) >> BUCKET_SPACE)
4105#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4106#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4107
4108static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
4109{
4110 struct dev_iter_state *state = seq->private;
4111 struct net *net = seq_file_net(seq);
4112 struct net_device *dev;
4113 struct hlist_node *p;
4114 struct hlist_head *h;
4115 unsigned int count, bucket, offset;
4116
4117 bucket = get_bucket(state->pos);
4118 offset = get_offset(state->pos);
4119 h = &net->dev_name_head[bucket];
4120 count = 0;
4121 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4122 if (count++ == offset) {
4123 state->pos = set_bucket_offset(bucket, count);
4124 return dev;
4125 }
4126 }
4127
4128 return NULL;
4129}
4130
4131static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
4132{
4133 struct dev_iter_state *state = seq->private;
4134 struct net_device *dev;
4135 unsigned int bucket;
4136
4137 bucket = get_bucket(state->pos);
4138 do {
4139 dev = dev_from_same_bucket(seq);
4140 if (dev)
4141 return dev;
4142
4143 bucket++;
4144 state->pos = set_bucket_offset(bucket, 0);
4145 } while (bucket < NETDEV_HASHENTRIES);
4146
4147 return NULL;
4148}
4149
3985/* 4150/*
3986 * This is invoked by the /proc filesystem handler to display a device 4151 * This is invoked by the /proc filesystem handler to display a device
3987 * in detail. 4152 * in detail.
@@ -3989,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg)
3989void *dev_seq_start(struct seq_file *seq, loff_t *pos) 4154void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3990 __acquires(RCU) 4155 __acquires(RCU)
3991{ 4156{
3992 struct net *net = seq_file_net(seq); 4157 struct dev_iter_state *state = seq->private;
3993 loff_t off;
3994 struct net_device *dev;
3995 4158
3996 rcu_read_lock(); 4159 rcu_read_lock();
3997 if (!*pos) 4160 if (!*pos)
3998 return SEQ_START_TOKEN; 4161 return SEQ_START_TOKEN;
3999 4162
4000 off = 1; 4163 /* check for end of the hash */
4001 for_each_netdev_rcu(net, dev) 4164 if (state->pos == 0 && *pos > 1)
4002 if (off++ == *pos) 4165 return NULL;
4003 return dev;
4004 4166
4005 return NULL; 4167 return dev_from_new_bucket(seq);
4006} 4168}
4007 4169
4008void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) 4170void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4009{ 4171{
4010 struct net_device *dev = v; 4172 struct net_device *dev;
4173
4174 ++*pos;
4011 4175
4012 if (v == SEQ_START_TOKEN) 4176 if (v == SEQ_START_TOKEN)
4013 dev = first_net_device_rcu(seq_file_net(seq)); 4177 return dev_from_new_bucket(seq);
4014 else
4015 dev = next_net_device_rcu(dev);
4016 4178
4017 ++*pos; 4179 dev = dev_from_same_bucket(seq);
4018 return dev; 4180 if (dev)
4181 return dev;
4182
4183 return dev_from_new_bucket(seq);
4019} 4184}
4020 4185
4021void dev_seq_stop(struct seq_file *seq, void *v) 4186void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4114,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = {
4114static int dev_seq_open(struct inode *inode, struct file *file) 4279static int dev_seq_open(struct inode *inode, struct file *file)
4115{ 4280{
4116 return seq_open_net(inode, file, &dev_seq_ops, 4281 return seq_open_net(inode, file, &dev_seq_ops,
4117 sizeof(struct seq_net_private)); 4282 sizeof(struct dev_iter_state));
4118} 4283}
4119 4284
4120static const struct file_operations dev_seq_fops = { 4285static const struct file_operations dev_seq_fops = {
@@ -4497,9 +4662,7 @@ void __dev_set_rx_mode(struct net_device *dev)
4497 if (!netif_device_present(dev)) 4662 if (!netif_device_present(dev))
4498 return; 4663 return;
4499 4664
4500 if (ops->ndo_set_rx_mode) 4665 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4501 ops->ndo_set_rx_mode(dev);
4502 else {
4503 /* Unicast addresses changes may only happen under the rtnl, 4666 /* Unicast addresses changes may only happen under the rtnl,
4504 * therefore calling __dev_set_promiscuity here is safe. 4667 * therefore calling __dev_set_promiscuity here is safe.
4505 */ 4668 */
@@ -4510,10 +4673,10 @@ void __dev_set_rx_mode(struct net_device *dev)
4510 __dev_set_promiscuity(dev, -1); 4673 __dev_set_promiscuity(dev, -1);
4511 dev->uc_promisc = false; 4674 dev->uc_promisc = false;
4512 } 4675 }
4513
4514 if (ops->ndo_set_multicast_list)
4515 ops->ndo_set_multicast_list(dev);
4516 } 4676 }
4677
4678 if (ops->ndo_set_rx_mode)
4679 ops->ndo_set_rx_mode(dev);
4517} 4680}
4518 4681
4519void dev_set_rx_mode(struct net_device *dev) 4682void dev_set_rx_mode(struct net_device *dev)
@@ -4524,30 +4687,6 @@ void dev_set_rx_mode(struct net_device *dev)
4524} 4687}
4525 4688
4526/** 4689/**
4527 * dev_ethtool_get_settings - call device's ethtool_ops::get_settings()
4528 * @dev: device
4529 * @cmd: memory area for ethtool_ops::get_settings() result
4530 *
4531 * The cmd arg is initialized properly (cleared and
4532 * ethtool_cmd::cmd field set to ETHTOOL_GSET).
4533 *
4534 * Return device's ethtool_ops::get_settings() result value or
4535 * -EOPNOTSUPP when device doesn't expose
4536 * ethtool_ops::get_settings() operation.
4537 */
4538int dev_ethtool_get_settings(struct net_device *dev,
4539 struct ethtool_cmd *cmd)
4540{
4541 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
4542 return -EOPNOTSUPP;
4543
4544 memset(cmd, 0, sizeof(struct ethtool_cmd));
4545 cmd->cmd = ETHTOOL_GSET;
4546 return dev->ethtool_ops->get_settings(dev, cmd);
4547}
4548EXPORT_SYMBOL(dev_ethtool_get_settings);
4549
4550/**
4551 * dev_get_flags - get flags reported to userspace 4690 * dev_get_flags - get flags reported to userspace
4552 * @dev: device 4691 * @dev: device
4553 * 4692 *
@@ -4863,7 +5002,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4863 return -EOPNOTSUPP; 5002 return -EOPNOTSUPP;
4864 5003
4865 case SIOCADDMULTI: 5004 case SIOCADDMULTI:
4866 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 5005 if (!ops->ndo_set_rx_mode ||
4867 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 5006 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4868 return -EINVAL; 5007 return -EINVAL;
4869 if (!netif_device_present(dev)) 5008 if (!netif_device_present(dev))
@@ -4871,7 +5010,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4871 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); 5010 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4872 5011
4873 case SIOCDELMULTI: 5012 case SIOCDELMULTI:
4874 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 5013 if (!ops->ndo_set_rx_mode ||
4875 ifr->ifr_hwaddr.sa_family != AF_UNSPEC) 5014 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4876 return -EINVAL; 5015 return -EINVAL;
4877 if (!netif_device_present(dev)) 5016 if (!netif_device_present(dev))
@@ -4888,6 +5027,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4888 ifr->ifr_newname[IFNAMSIZ-1] = '\0'; 5027 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4889 return dev_change_name(dev, ifr->ifr_newname); 5028 return dev_change_name(dev, ifr->ifr_newname);
4890 5029
5030 case SIOCSHWTSTAMP:
5031 err = net_hwtstamp_validate(ifr);
5032 if (err)
5033 return err;
5034 /* fall through */
5035
4891 /* 5036 /*
4892 * Unknown or private ioctl 5037 * Unknown or private ioctl
4893 */ 5038 */
@@ -5202,7 +5347,7 @@ static void rollback_registered_many(struct list_head *head)
5202 dev = list_first_entry(head, struct net_device, unreg_list); 5347 dev = list_first_entry(head, struct net_device, unreg_list);
5203 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5348 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5204 5349
5205 rcu_barrier(); 5350 synchronize_net();
5206 5351
5207 list_for_each_entry(dev, head, unreg_list) 5352 list_for_each_entry(dev, head, unreg_list)
5208 dev_put(dev); 5353 dev_put(dev);
@@ -5715,6 +5860,12 @@ void netdev_run_todo(void)
5715 5860
5716 __rtnl_unlock(); 5861 __rtnl_unlock();
5717 5862
5863 /* Wait for rcu callbacks to finish before attempting to drain
5864 * the device list. This usually avoids a 250ms wait.
5865 */
5866 if (!list_empty(&list))
5867 rcu_barrier();
5868
5718 while (!list_empty(&list)) { 5869 while (!list_empty(&list)) {
5719 struct net_device *dev 5870 struct net_device *dev
5720 = list_first_entry(&list, struct net_device, todo_list); 5871 = list_first_entry(&list, struct net_device, todo_list);
@@ -5735,8 +5886,8 @@ void netdev_run_todo(void)
5735 5886
5736 /* paranoia */ 5887 /* paranoia */
5737 BUG_ON(netdev_refcnt_read(dev)); 5888 BUG_ON(netdev_refcnt_read(dev));
5738 WARN_ON(rcu_dereference_raw(dev->ip_ptr)); 5889 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5739 WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); 5890 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5740 WARN_ON(dev->dn_ptr); 5891 WARN_ON(dev->dn_ptr);
5741 5892
5742 if (dev->destructor) 5893 if (dev->destructor)
@@ -5940,7 +6091,7 @@ void free_netdev(struct net_device *dev)
5940 kfree(dev->_rx); 6091 kfree(dev->_rx);
5941#endif 6092#endif
5942 6093
5943 kfree(rcu_dereference_raw(dev->ingress_queue)); 6094 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
5944 6095
5945 /* Flush device addresses */ 6096 /* Flush device addresses */
5946 dev_addr_flush(dev); 6097 dev_addr_flush(dev);
@@ -6115,6 +6266,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
6115 */ 6266 */
6116 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 6267 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6117 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 6268 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6269 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6118 6270
6119 /* 6271 /*
6120 * Flush the unicast and multicast chains 6272 * Flush the unicast and multicast chains
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index e2e66939ed00..283d1b863876 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global);
591 * addresses that have no users left. The source device must be 591 * addresses that have no users left. The source device must be
592 * locked by netif_tx_lock_bh. 592 * locked by netif_tx_lock_bh.
593 * 593 *
594 * This function is intended to be called from the dev->set_multicast_list 594 * This function is intended to be called from the ndo_set_rx_mode
595 * or dev->set_rx_mode function of layered software devices. 595 * function of layered software devices.
596 */ 596 */
597int dev_mc_sync(struct net_device *to, struct net_device *from) 597int dev_mc_sync(struct net_device *to, struct net_device *from)
598{ 598{
diff --git a/net/core/dst.c b/net/core/dst.c
index 14b33baf0733..d5e2c4c09107 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
171 dst_init_metrics(dst, dst_default_metrics, true); 171 dst_init_metrics(dst, dst_default_metrics, true);
172 dst->expires = 0UL; 172 dst->expires = 0UL;
173 dst->path = dst; 173 dst->path = dst;
174 dst->_neighbour = NULL; 174 RCU_INIT_POINTER(dst->_neighbour, NULL);
175#ifdef CONFIG_XFRM 175#ifdef CONFIG_XFRM
176 dst->xfrm = NULL; 176 dst->xfrm = NULL;
177#endif 177#endif
@@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
229 smp_rmb(); 229 smp_rmb();
230 230
231again: 231again:
232 neigh = dst->_neighbour; 232 neigh = rcu_dereference_protected(dst->_neighbour, 1);
233 child = dst->child; 233 child = dst->child;
234 234
235 if (neigh) { 235 if (neigh) {
236 dst->_neighbour = NULL; 236 RCU_INIT_POINTER(dst->_neighbour, NULL);
237 neigh_release(neigh); 237 neigh_release(neigh);
238 } 238 }
239 239
@@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
360 if (!unregister) { 360 if (!unregister) {
361 dst->input = dst->output = dst_discard; 361 dst->input = dst->output = dst_discard;
362 } else { 362 } else {
363 struct neighbour *neigh;
364
363 dst->dev = dev_net(dst->dev)->loopback_dev; 365 dst->dev = dev_net(dst->dev)->loopback_dev;
364 dev_hold(dst->dev); 366 dev_hold(dst->dev);
365 dev_put(dev); 367 dev_put(dev);
366 if (dst->_neighbour && dst->_neighbour->dev == dev) { 368 rcu_read_lock();
367 dst->_neighbour->dev = dst->dev; 369 neigh = dst_get_neighbour(dst);
370 if (neigh && neigh->dev == dev) {
371 neigh->dev = dst->dev;
368 dev_hold(dst->dev); 372 dev_hold(dst->dev);
369 dev_put(dev); 373 dev_put(dev);
370 } 374 }
375 rcu_read_unlock();
371 } 376 }
372} 377}
373 378
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6cdba5fc2bed..f44481707124 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data)
569 return 0; 569 return 0;
570} 570}
571 571
572static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) 572int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
573{ 573{
574 struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; 574 ASSERT_RTNL();
575 int err;
576 575
577 if (!dev->ethtool_ops->get_settings) 576 if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings)
578 return -EOPNOTSUPP; 577 return -EOPNOTSUPP;
579 578
580 err = dev->ethtool_ops->get_settings(dev, &cmd); 579 memset(cmd, 0, sizeof(struct ethtool_cmd));
580 cmd->cmd = ETHTOOL_GSET;
581 return dev->ethtool_ops->get_settings(dev, cmd);
582}
583EXPORT_SYMBOL(__ethtool_get_settings);
584
585static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
586{
587 int err;
588 struct ethtool_cmd cmd;
589
590 err = __ethtool_get_settings(dev, &cmd);
581 if (err < 0) 591 if (err < 0)
582 return err; 592 return err;
583 593
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 27071ee2a4e1..57e8f95110e6 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -490,7 +490,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
490 if (ops->nr_goto_rules > 0) { 490 if (ops->nr_goto_rules > 0) {
491 list_for_each_entry(tmp, &ops->rules_list, list) { 491 list_for_each_entry(tmp, &ops->rules_list, list) {
492 if (rtnl_dereference(tmp->ctarget) == rule) { 492 if (rtnl_dereference(tmp->ctarget) == rule) {
493 rcu_assign_pointer(tmp->ctarget, NULL); 493 RCU_INIT_POINTER(tmp->ctarget, NULL);
494 ops->unresolved_rules++; 494 ops->unresolved_rules++;
495 } 495 }
496 } 496 }
@@ -548,7 +548,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
548 frh->flags = rule->flags; 548 frh->flags = rule->flags;
549 549
550 if (rule->action == FR_ACT_GOTO && 550 if (rule->action == FR_ACT_GOTO &&
551 rcu_dereference_raw(rule->ctarget) == NULL) 551 rcu_access_pointer(rule->ctarget) == NULL)
552 frh->flags |= FIB_RULE_UNRESOLVED; 552 frh->flags |= FIB_RULE_UNRESOLVED;
553 553
554 if (rule->iifname[0]) { 554 if (rule->iifname[0]) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 36f975fa87cb..5dea45279215 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -436,7 +436,7 @@ error:
436 * 436 *
437 * Returns 0 if the rule set is legal or -EINVAL if not. 437 * Returns 0 if the rule set is legal or -EINVAL if not.
438 */ 438 */
439int sk_chk_filter(struct sock_filter *filter, int flen) 439int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
440{ 440{
441 /* 441 /*
442 * Valid instructions are initialized to non-0. 442 * Valid instructions are initialized to non-0.
@@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk)
645 filter = rcu_dereference_protected(sk->sk_filter, 645 filter = rcu_dereference_protected(sk->sk_filter,
646 sock_owned_by_user(sk)); 646 sock_owned_by_user(sk));
647 if (filter) { 647 if (filter) {
648 rcu_assign_pointer(sk->sk_filter, NULL); 648 RCU_INIT_POINTER(sk->sk_filter, NULL);
649 sk_filter_uncharge(sk, filter); 649 sk_filter_uncharge(sk, filter);
650 ret = 0; 650 ret = 0;
651 } 651 }
diff --git a/net/core/flow.c b/net/core/flow.c
index 555a456efb07..8ae42de9c79e 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -413,7 +413,7 @@ static int __init flow_cache_init(struct flow_cache *fc)
413 413
414 for_each_online_cpu(i) { 414 for_each_online_cpu(i) {
415 if (flow_cache_cpu_prepare(fc, i)) 415 if (flow_cache_cpu_prepare(fc, i))
416 return -ENOMEM; 416 goto err;
417 } 417 }
418 fc->hotcpu_notifier = (struct notifier_block){ 418 fc->hotcpu_notifier = (struct notifier_block){
419 .notifier_call = flow_cache_cpu, 419 .notifier_call = flow_cache_cpu,
@@ -426,6 +426,18 @@ static int __init flow_cache_init(struct flow_cache *fc)
426 add_timer(&fc->rnd_timer); 426 add_timer(&fc->rnd_timer);
427 427
428 return 0; 428 return 0;
429
430err:
431 for_each_possible_cpu(i) {
432 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
433 kfree(fcp->hash_table);
434 fcp->hash_table = NULL;
435 }
436
437 free_percpu(fc->percpu);
438 fc->percpu = NULL;
439
440 return -ENOMEM;
429} 441}
430 442
431static int __init flow_cache_init_global(void) 443static int __init flow_cache_init_global(void)
diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h
index 283c2b993fb8..81e1ed7c8383 100644
--- a/net/core/kmap_skb.h
+++ b/net/core/kmap_skb.h
@@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag)
7 7
8 local_bh_disable(); 8 local_bh_disable();
9#endif 9#endif
10 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 10 return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ);
11} 11}
12 12
13static inline void kunmap_skb_frag(void *vaddr) 13static inline void kunmap_skb_frag(void *vaddr)
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 357bd4ee4baa..c3519c6d1b16 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev)
78 78
79static bool linkwatch_urgent_event(struct net_device *dev) 79static bool linkwatch_urgent_event(struct net_device *dev)
80{ 80{
81 return netif_running(dev) && netif_carrier_ok(dev) && 81 if (!netif_running(dev))
82 qdisc_tx_changing(dev); 82 return false;
83
84 if (dev->ifindex != dev->iflink)
85 return true;
86
87 return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
83} 88}
84 89
85 90
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 1334d7e56f02..909ecb3c2a33 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh)
844 skb_queue_purge(&neigh->arp_queue); 844 skb_queue_purge(&neigh->arp_queue);
845} 845}
846 846
847static void neigh_probe(struct neighbour *neigh)
848 __releases(neigh->lock)
849{
850 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
851 /* keep skb alive even if arp_queue overflows */
852 if (skb)
853 skb = skb_copy(skb, GFP_ATOMIC);
854 write_unlock(&neigh->lock);
855 neigh->ops->solicit(neigh, skb);
856 atomic_inc(&neigh->probes);
857 kfree_skb(skb);
858}
859
847/* Called when a timer expires for a neighbour entry. */ 860/* Called when a timer expires for a neighbour entry. */
848 861
849static void neigh_timer_handler(unsigned long arg) 862static void neigh_timer_handler(unsigned long arg)
@@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg)
920 neigh_hold(neigh); 933 neigh_hold(neigh);
921 } 934 }
922 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 935 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
923 struct sk_buff *skb = skb_peek(&neigh->arp_queue); 936 neigh_probe(neigh);
924 /* keep skb alive even if arp_queue overflows */
925 if (skb)
926 skb = skb_copy(skb, GFP_ATOMIC);
927 write_unlock(&neigh->lock);
928 neigh->ops->solicit(neigh, skb);
929 atomic_inc(&neigh->probes);
930 kfree_skb(skb);
931 } else { 937 } else {
932out: 938out:
933 write_unlock(&neigh->lock); 939 write_unlock(&neigh->lock);
@@ -942,7 +948,7 @@ out:
942int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 948int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
943{ 949{
944 int rc; 950 int rc;
945 unsigned long now; 951 bool immediate_probe = false;
946 952
947 write_lock_bh(&neigh->lock); 953 write_lock_bh(&neigh->lock);
948 954
@@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
950 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 956 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
951 goto out_unlock_bh; 957 goto out_unlock_bh;
952 958
953 now = jiffies;
954
955 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 959 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
956 if (neigh->parms->mcast_probes + neigh->parms->app_probes) { 960 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
961 unsigned long next, now = jiffies;
962
957 atomic_set(&neigh->probes, neigh->parms->ucast_probes); 963 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
958 neigh->nud_state = NUD_INCOMPLETE; 964 neigh->nud_state = NUD_INCOMPLETE;
959 neigh->updated = jiffies; 965 neigh->updated = now;
960 neigh_add_timer(neigh, now + 1); 966 next = now + max(neigh->parms->retrans_time, HZ/2);
967 neigh_add_timer(neigh, next);
968 immediate_probe = true;
961 } else { 969 } else {
962 neigh->nud_state = NUD_FAILED; 970 neigh->nud_state = NUD_FAILED;
963 neigh->updated = jiffies; 971 neigh->updated = jiffies;
@@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
989 rc = 1; 997 rc = 1;
990 } 998 }
991out_unlock_bh: 999out_unlock_bh:
992 write_unlock_bh(&neigh->lock); 1000 if (immediate_probe)
1001 neigh_probe(neigh);
1002 else
1003 write_unlock(&neigh->lock);
1004 local_bh_enable();
993 return rc; 1005 return rc;
994} 1006}
995EXPORT_SYMBOL(__neigh_event_send); 1007EXPORT_SYMBOL(__neigh_event_send);
@@ -1156,10 +1168,14 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1156 struct dst_entry *dst = skb_dst(skb); 1168 struct dst_entry *dst = skb_dst(skb);
1157 struct neighbour *n2, *n1 = neigh; 1169 struct neighbour *n2, *n1 = neigh;
1158 write_unlock_bh(&neigh->lock); 1170 write_unlock_bh(&neigh->lock);
1171
1172 rcu_read_lock();
1159 /* On shaper/eql skb->dst->neighbour != neigh :( */ 1173 /* On shaper/eql skb->dst->neighbour != neigh :( */
1160 if (dst && (n2 = dst_get_neighbour(dst)) != NULL) 1174 if (dst && (n2 = dst_get_neighbour(dst)) != NULL)
1161 n1 = n2; 1175 n1 = n2;
1162 n1->output(n1, skb); 1176 n1->output(n1, skb);
1177 rcu_read_unlock();
1178
1163 write_lock_bh(&neigh->lock); 1179 write_lock_bh(&neigh->lock);
1164 } 1180 }
1165 skb_queue_purge(&neigh->arp_queue); 1181 skb_queue_purge(&neigh->arp_queue);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1683e5db2f27..7604a635376b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev,
147 147
148 if (netif_running(netdev)) { 148 if (netif_running(netdev)) {
149 struct ethtool_cmd cmd; 149 struct ethtool_cmd cmd;
150 if (!dev_ethtool_get_settings(netdev, &cmd)) 150 if (!__ethtool_get_settings(netdev, &cmd))
151 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); 151 ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd));
152 } 152 }
153 rtnl_unlock(); 153 rtnl_unlock();
@@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev,
165 165
166 if (netif_running(netdev)) { 166 if (netif_running(netdev)) {
167 struct ethtool_cmd cmd; 167 struct ethtool_cmd cmd;
168 if (!dev_ethtool_get_settings(netdev, &cmd)) 168 if (!__ethtool_get_settings(netdev, &cmd))
169 ret = sprintf(buf, "%s\n", 169 ret = sprintf(buf, "%s\n",
170 cmd.duplex ? "full" : "half"); 170 cmd.duplex ? "full" : "half");
171 } 171 }
@@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj)
712 struct rps_dev_flow_table *flow_table; 712 struct rps_dev_flow_table *flow_table;
713 713
714 714
715 map = rcu_dereference_raw(queue->rps_map); 715 map = rcu_dereference_protected(queue->rps_map, 1);
716 if (map) { 716 if (map) {
717 RCU_INIT_POINTER(queue->rps_map, NULL); 717 RCU_INIT_POINTER(queue->rps_map, NULL);
718 kfree_rcu(map, rcu); 718 kfree_rcu(map, rcu);
719 } 719 }
720 720
721 flow_table = rcu_dereference_raw(queue->rps_flow_table); 721 flow_table = rcu_dereference_protected(queue->rps_flow_table, 1);
722 if (flow_table) { 722 if (flow_table) {
723 RCU_INIT_POINTER(queue->rps_flow_table, NULL); 723 RCU_INIT_POINTER(queue->rps_flow_table, NULL);
724 call_rcu(&flow_table->rcu, rps_dev_flow_table_release); 724 call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
@@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue,
987 } 987 }
988 988
989 if (nonempty) 989 if (nonempty)
990 rcu_assign_pointer(dev->xps_maps, new_dev_maps); 990 RCU_INIT_POINTER(dev->xps_maps, new_dev_maps);
991 else { 991 else {
992 kfree(new_dev_maps); 992 kfree(new_dev_maps);
993 rcu_assign_pointer(dev->xps_maps, NULL); 993 RCU_INIT_POINTER(dev->xps_maps, NULL);
994 } 994 }
995 995
996 if (dev_maps) 996 if (dev_maps)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 52622517e0d8..f57d94627a2a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -762,7 +762,7 @@ int __netpoll_setup(struct netpoll *np)
762 } 762 }
763 763
764 /* last thing to do is link it to the net device structure */ 764 /* last thing to do is link it to the net device structure */
765 rcu_assign_pointer(ndev->npinfo, npinfo); 765 RCU_INIT_POINTER(ndev->npinfo, npinfo);
766 766
767 return 0; 767 return 0;
768 768
@@ -903,7 +903,7 @@ void __netpoll_cleanup(struct netpoll *np)
903 if (ops->ndo_netpoll_cleanup) 903 if (ops->ndo_netpoll_cleanup)
904 ops->ndo_netpoll_cleanup(np->dev); 904 ops->ndo_netpoll_cleanup(np->dev);
905 905
906 rcu_assign_pointer(np->dev->npinfo, NULL); 906 RCU_INIT_POINTER(np->dev->npinfo, NULL);
907 907
908 /* avoid racing with NAPI reading npinfo */ 908 /* avoid racing with NAPI reading npinfo */
909 synchronize_rcu_bh(); 909 synchronize_rcu_bh();
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e35a6fbb8110..0001c243b35c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2145,9 +2145,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2145 } 2145 }
2146 2146
2147 start_time = ktime_now(); 2147 start_time = ktime_now();
2148 if (remaining < 100000) 2148 if (remaining < 100000) {
2149 ndelay(remaining); /* really small just spin */ 2149 /* for small delays (<100us), just loop until limit is reached */
2150 else { 2150 do {
2151 end_time = ktime_now();
2152 } while (ktime_lt(end_time, spin_until));
2153 } else {
2151 /* see do_nanosleep */ 2154 /* see do_nanosleep */
2152 hrtimer_init_sleeper(&t, current); 2155 hrtimer_init_sleeper(&t, current);
2153 do { 2156 do {
@@ -2162,8 +2165,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2162 hrtimer_cancel(&t.timer); 2165 hrtimer_cancel(&t.timer);
2163 } while (t.task && pkt_dev->running && !signal_pending(current)); 2166 } while (t.task && pkt_dev->running && !signal_pending(current));
2164 __set_current_state(TASK_RUNNING); 2167 __set_current_state(TASK_RUNNING);
2168 end_time = ktime_now();
2165 } 2169 }
2166 end_time = ktime_now();
2167 2170
2168 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2171 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
2169 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); 2172 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
@@ -2602,18 +2605,18 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
2602 if (!pkt_dev->page) 2605 if (!pkt_dev->page)
2603 break; 2606 break;
2604 } 2607 }
2605 skb_shinfo(skb)->frags[i].page = pkt_dev->page;
2606 get_page(pkt_dev->page); 2608 get_page(pkt_dev->page);
2609 skb_frag_set_page(skb, i, pkt_dev->page);
2607 skb_shinfo(skb)->frags[i].page_offset = 0; 2610 skb_shinfo(skb)->frags[i].page_offset = 0;
2608 /*last fragment, fill rest of data*/ 2611 /*last fragment, fill rest of data*/
2609 if (i == (frags - 1)) 2612 if (i == (frags - 1))
2610 skb_shinfo(skb)->frags[i].size = 2613 skb_frag_size_set(&skb_shinfo(skb)->frags[i],
2611 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); 2614 (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
2612 else 2615 else
2613 skb_shinfo(skb)->frags[i].size = frag_len; 2616 skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
2614 datalen -= skb_shinfo(skb)->frags[i].size; 2617 datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
2615 skb->len += skb_shinfo(skb)->frags[i].size; 2618 skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
2616 skb->data_len += skb_shinfo(skb)->frags[i].size; 2619 skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
2617 i++; 2620 i++;
2618 skb_shinfo(skb)->nr_frags = i; 2621 skb_shinfo(skb)->nr_frags = i;
2619 } 2622 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 99d9e953fe39..9083e82bdae5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -731,7 +731,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
731 size += num_vfs * 731 size += num_vfs *
732 (nla_total_size(sizeof(struct ifla_vf_mac)) + 732 (nla_total_size(sizeof(struct ifla_vf_mac)) +
733 nla_total_size(sizeof(struct ifla_vf_vlan)) + 733 nla_total_size(sizeof(struct ifla_vf_vlan)) +
734 nla_total_size(sizeof(struct ifla_vf_tx_rate))); 734 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
735 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
735 return size; 736 return size;
736 } else 737 } else
737 return 0; 738 return 0;
@@ -954,13 +955,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
954 struct ifla_vf_mac vf_mac; 955 struct ifla_vf_mac vf_mac;
955 struct ifla_vf_vlan vf_vlan; 956 struct ifla_vf_vlan vf_vlan;
956 struct ifla_vf_tx_rate vf_tx_rate; 957 struct ifla_vf_tx_rate vf_tx_rate;
958 struct ifla_vf_spoofchk vf_spoofchk;
959
960 /*
961 * Not all SR-IOV capable drivers support the
962 * spoofcheck query. Preset to -1 so the user
963 * space tool can detect that the driver didn't
964 * report anything.
965 */
966 ivi.spoofchk = -1;
957 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 967 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
958 break; 968 break;
959 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; 969 vf_mac.vf =
970 vf_vlan.vf =
971 vf_tx_rate.vf =
972 vf_spoofchk.vf = ivi.vf;
973
960 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); 974 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
961 vf_vlan.vlan = ivi.vlan; 975 vf_vlan.vlan = ivi.vlan;
962 vf_vlan.qos = ivi.qos; 976 vf_vlan.qos = ivi.qos;
963 vf_tx_rate.rate = ivi.tx_rate; 977 vf_tx_rate.rate = ivi.tx_rate;
978 vf_spoofchk.setting = ivi.spoofchk;
964 vf = nla_nest_start(skb, IFLA_VF_INFO); 979 vf = nla_nest_start(skb, IFLA_VF_INFO);
965 if (!vf) { 980 if (!vf) {
966 nla_nest_cancel(skb, vfinfo); 981 nla_nest_cancel(skb, vfinfo);
@@ -968,7 +983,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
968 } 983 }
969 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); 984 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
970 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); 985 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
971 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); 986 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
987 &vf_tx_rate);
988 NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
989 &vf_spoofchk);
972 nla_nest_end(skb, vf); 990 nla_nest_end(skb, vf);
973 } 991 }
974 nla_nest_end(skb, vfinfo); 992 nla_nest_end(skb, vfinfo);
@@ -1202,6 +1220,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1202 ivt->rate); 1220 ivt->rate);
1203 break; 1221 break;
1204 } 1222 }
1223 case IFLA_VF_SPOOFCHK: {
1224 struct ifla_vf_spoofchk *ivs;
1225 ivs = nla_data(vf);
1226 err = -EOPNOTSUPP;
1227 if (ops->ndo_set_vf_spoofchk)
1228 err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
1229 ivs->setting);
1230 break;
1231 }
1205 default: 1232 default:
1206 err = -EINVAL; 1233 err = -EINVAL;
1207 break; 1234 break;
@@ -1604,7 +1631,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
1604 dev_net_set(dev, net); 1631 dev_net_set(dev, net);
1605 dev->rtnl_link_ops = ops; 1632 dev->rtnl_link_ops = ops;
1606 dev->rtnl_link_state = RTNL_LINK_INITIALIZING; 1633 dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
1607 dev->real_num_tx_queues = real_num_queues;
1608 1634
1609 if (tb[IFLA_MTU]) 1635 if (tb[IFLA_MTU])
1610 dev->mtu = nla_get_u32(tb[IFLA_MTU]); 1636 dev->mtu = nla_get_u32(tb[IFLA_MTU]);
diff --git a/net/core/scm.c b/net/core/scm.c
index 811b53fb330e..ff52ad0a5150 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
173 if (err) 173 if (err)
174 goto error; 174 goto error;
175 175
176 if (pid_vnr(p->pid) != p->creds.pid) { 176 if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
177 struct pid *pid; 177 struct pid *pid;
178 err = -ESRCH; 178 err = -ESRCH;
179 pid = find_get_pid(p->creds.pid); 179 pid = find_get_pid(p->creds.pid);
@@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
183 p->pid = pid; 183 p->pid = pid;
184 } 184 }
185 185
186 if ((p->cred->euid != p->creds.uid) || 186 if (!p->cred ||
187 (p->cred->egid != p->creds.gid)) { 187 (p->cred->euid != p->creds.uid) ||
188 (p->cred->egid != p->creds.gid)) {
188 struct cred *cred; 189 struct cred *cred;
189 err = -ENOMEM; 190 err = -ENOMEM;
190 cred = prepare_creds(); 191 cred = prepare_creds();
@@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
193 194
194 cred->uid = cred->euid = p->creds.uid; 195 cred->uid = cred->euid = p->creds.uid;
195 cred->gid = cred->egid = p->creds.gid; 196 cred->gid = cred->egid = p->creds.gid;
196 put_cred(p->cred); 197 if (p->cred)
198 put_cred(p->cred);
197 p->cred = cred; 199 p->cred = cred;
198 } 200 }
199 break; 201 break;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 45329d7c9dd9..025233de25f9 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq)
35} 35}
36 36
37#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 37#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
38__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, 38__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
39 __be16 sport, __be16 dport) 39 __be16 sport, __be16 dport)
40{ 40{
41 u32 secret[MD5_MESSAGE_BYTES / 4]; 41 u32 secret[MD5_MESSAGE_BYTES / 4];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 387703f56fce..ca4db40e75b8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
184 goto out; 184 goto out;
185 prefetchw(skb); 185 prefetchw(skb);
186 186
187 size = SKB_DATA_ALIGN(size); 187 /* We do our best to align skb_shared_info on a separate cache
188 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 188 * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
189 gfp_mask, node); 189 * aligned memory blocks, unless SLUB/SLAB debug is enabled.
190 * Both skb->head and skb_shared_info are cache line aligned.
191 */
192 size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
193 data = kmalloc_node_track_caller(size, gfp_mask, node);
190 if (!data) 194 if (!data)
191 goto nodata; 195 goto nodata;
196 /* kmalloc(size) might give us more room than requested.
197 * Put skb_shared_info exactly at the end of allocated zone,
198 * to allow max possible filling before reallocation.
199 */
200 size = SKB_WITH_OVERHEAD(ksize(data));
192 prefetchw(data + size); 201 prefetchw(data + size);
193 202
194 /* 203 /*
@@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
197 * the tail pointer in struct sk_buff! 206 * the tail pointer in struct sk_buff!
198 */ 207 */
199 memset(skb, 0, offsetof(struct sk_buff, tail)); 208 memset(skb, 0, offsetof(struct sk_buff, tail));
200 skb->truesize = size + sizeof(struct sk_buff); 209 /* Account for allocated memory : skb + skb->head */
210 skb->truesize = SKB_TRUESIZE(size);
201 atomic_set(&skb->users, 1); 211 atomic_set(&skb->users, 1);
202 skb->head = data; 212 skb->head = data;
203 skb->data = data; 213 skb->data = data;
@@ -326,7 +336,7 @@ static void skb_release_data(struct sk_buff *skb)
326 if (skb_shinfo(skb)->nr_frags) { 336 if (skb_shinfo(skb)->nr_frags) {
327 int i; 337 int i;
328 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 338 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
329 put_page(skb_shinfo(skb)->frags[i].page); 339 skb_frag_unref(skb, i);
330 } 340 }
331 341
332 /* 342 /*
@@ -475,6 +485,30 @@ void consume_skb(struct sk_buff *skb)
475EXPORT_SYMBOL(consume_skb); 485EXPORT_SYMBOL(consume_skb);
476 486
477/** 487/**
488 * skb_recycle - clean up an skb for reuse
489 * @skb: buffer
490 *
491 * Recycles the skb to be reused as a receive buffer. This
492 * function does any necessary reference count dropping, and
493 * cleans up the skbuff as if it just came from __alloc_skb().
494 */
495void skb_recycle(struct sk_buff *skb)
496{
497 struct skb_shared_info *shinfo;
498
499 skb_release_head_state(skb);
500
501 shinfo = skb_shinfo(skb);
502 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
503 atomic_set(&shinfo->dataref, 1);
504
505 memset(skb, 0, offsetof(struct sk_buff, tail));
506 skb->data = skb->head + NET_SKB_PAD;
507 skb_reset_tail_pointer(skb);
508}
509EXPORT_SYMBOL(skb_recycle);
510
511/**
478 * skb_recycle_check - check if skb can be reused for receive 512 * skb_recycle_check - check if skb can be reused for receive
479 * @skb: buffer 513 * @skb: buffer
480 * @skb_size: minimum receive buffer size 514 * @skb_size: minimum receive buffer size
@@ -488,33 +522,10 @@ EXPORT_SYMBOL(consume_skb);
488 */ 522 */
489bool skb_recycle_check(struct sk_buff *skb, int skb_size) 523bool skb_recycle_check(struct sk_buff *skb, int skb_size)
490{ 524{
491 struct skb_shared_info *shinfo; 525 if (!skb_is_recycleable(skb, skb_size))
492
493 if (irqs_disabled())
494 return false;
495
496 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
497 return false;
498
499 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
500 return false; 526 return false;
501 527
502 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); 528 skb_recycle(skb);
503 if (skb_end_pointer(skb) - skb->head < skb_size)
504 return false;
505
506 if (skb_shared(skb) || skb_cloned(skb))
507 return false;
508
509 skb_release_head_state(skb);
510
511 shinfo = skb_shinfo(skb);
512 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
513 atomic_set(&shinfo->dataref, 1);
514
515 memset(skb, 0, offsetof(struct sk_buff, tail));
516 skb->data = skb->head + NET_SKB_PAD;
517 skb_reset_tail_pointer(skb);
518 529
519 return true; 530 return true;
520} 531}
@@ -529,6 +540,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
529 new->mac_header = old->mac_header; 540 new->mac_header = old->mac_header;
530 skb_dst_copy(new, old); 541 skb_dst_copy(new, old);
531 new->rxhash = old->rxhash; 542 new->rxhash = old->rxhash;
543 new->ooo_okay = old->ooo_okay;
544 new->l4_rxhash = old->l4_rxhash;
532#ifdef CONFIG_XFRM 545#ifdef CONFIG_XFRM
533 new->sp = secpath_get(old->sp); 546 new->sp = secpath_get(old->sp);
534#endif 547#endif
@@ -647,7 +660,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
647 } 660 }
648 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 661 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
649 memcpy(page_address(page), 662 memcpy(page_address(page),
650 vaddr + f->page_offset, f->size); 663 vaddr + f->page_offset, skb_frag_size(f));
651 kunmap_skb_frag(vaddr); 664 kunmap_skb_frag(vaddr);
652 page->private = (unsigned long)head; 665 page->private = (unsigned long)head;
653 head = page; 666 head = page;
@@ -655,14 +668,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
655 668
656 /* skb frags release userspace buffers */ 669 /* skb frags release userspace buffers */
657 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 670 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
658 put_page(skb_shinfo(skb)->frags[i].page); 671 skb_frag_unref(skb, i);
659 672
660 uarg->callback(uarg); 673 uarg->callback(uarg);
661 674
662 /* skb frags point to kernel buffers */ 675 /* skb frags point to kernel buffers */
663 for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { 676 for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
664 skb_shinfo(skb)->frags[i - 1].page_offset = 0; 677 __skb_fill_page_desc(skb, i-1, head, 0,
665 skb_shinfo(skb)->frags[i - 1].page = head; 678 skb_shinfo(skb)->frags[i - 1].size);
666 head = (struct page *)head->private; 679 head = (struct page *)head->private;
667 } 680 }
668 681
@@ -820,7 +833,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
820 } 833 }
821 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 834 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
822 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 835 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
823 get_page(skb_shinfo(n)->frags[i].page); 836 skb_frag_ref(skb, i);
824 } 837 }
825 skb_shinfo(n)->nr_frags = i; 838 skb_shinfo(n)->nr_frags = i;
826 } 839 }
@@ -911,7 +924,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
911 goto nofrags; 924 goto nofrags;
912 } 925 }
913 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 926 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
914 get_page(skb_shinfo(skb)->frags[i].page); 927 skb_frag_ref(skb, i);
915 928
916 if (skb_has_frag_list(skb)) 929 if (skb_has_frag_list(skb))
917 skb_clone_fraglist(skb); 930 skb_clone_fraglist(skb);
@@ -1178,20 +1191,20 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len)
1178 goto drop_pages; 1191 goto drop_pages;
1179 1192
1180 for (; i < nfrags; i++) { 1193 for (; i < nfrags; i++) {
1181 int end = offset + skb_shinfo(skb)->frags[i].size; 1194 int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1182 1195
1183 if (end < len) { 1196 if (end < len) {
1184 offset = end; 1197 offset = end;
1185 continue; 1198 continue;
1186 } 1199 }
1187 1200
1188 skb_shinfo(skb)->frags[i++].size = len - offset; 1201 skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset);
1189 1202
1190drop_pages: 1203drop_pages:
1191 skb_shinfo(skb)->nr_frags = i; 1204 skb_shinfo(skb)->nr_frags = i;
1192 1205
1193 for (; i < nfrags; i++) 1206 for (; i < nfrags; i++)
1194 put_page(skb_shinfo(skb)->frags[i].page); 1207 skb_frag_unref(skb, i);
1195 1208
1196 if (skb_has_frag_list(skb)) 1209 if (skb_has_frag_list(skb))
1197 skb_drop_fraglist(skb); 1210 skb_drop_fraglist(skb);
@@ -1294,9 +1307,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1294 /* Estimate size of pulled pages. */ 1307 /* Estimate size of pulled pages. */
1295 eat = delta; 1308 eat = delta;
1296 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1309 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1297 if (skb_shinfo(skb)->frags[i].size >= eat) 1310 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1311
1312 if (size >= eat)
1298 goto pull_pages; 1313 goto pull_pages;
1299 eat -= skb_shinfo(skb)->frags[i].size; 1314 eat -= size;
1300 } 1315 }
1301 1316
1302 /* If we need update frag list, we are in troubles. 1317 /* If we need update frag list, we are in troubles.
@@ -1359,14 +1374,16 @@ pull_pages:
1359 eat = delta; 1374 eat = delta;
1360 k = 0; 1375 k = 0;
1361 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1376 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1362 if (skb_shinfo(skb)->frags[i].size <= eat) { 1377 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
1363 put_page(skb_shinfo(skb)->frags[i].page); 1378
1364 eat -= skb_shinfo(skb)->frags[i].size; 1379 if (size <= eat) {
1380 skb_frag_unref(skb, i);
1381 eat -= size;
1365 } else { 1382 } else {
1366 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1383 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1367 if (eat) { 1384 if (eat) {
1368 skb_shinfo(skb)->frags[k].page_offset += eat; 1385 skb_shinfo(skb)->frags[k].page_offset += eat;
1369 skb_shinfo(skb)->frags[k].size -= eat; 1386 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat);
1370 eat = 0; 1387 eat = 0;
1371 } 1388 }
1372 k++; 1389 k++;
@@ -1421,7 +1438,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1421 1438
1422 WARN_ON(start > offset + len); 1439 WARN_ON(start > offset + len);
1423 1440
1424 end = start + skb_shinfo(skb)->frags[i].size; 1441 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1425 if ((copy = end - offset) > 0) { 1442 if ((copy = end - offset) > 0) {
1426 u8 *vaddr; 1443 u8 *vaddr;
1427 1444
@@ -1619,7 +1636,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1619 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { 1636 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1620 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1637 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1621 1638
1622 if (__splice_segment(f->page, f->page_offset, f->size, 1639 if (__splice_segment(skb_frag_page(f),
1640 f->page_offset, skb_frag_size(f),
1623 offset, len, skb, spd, 0, sk, pipe)) 1641 offset, len, skb, spd, 0, sk, pipe))
1624 return 1; 1642 return 1;
1625 } 1643 }
@@ -1729,7 +1747,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1729 1747
1730 WARN_ON(start > offset + len); 1748 WARN_ON(start > offset + len);
1731 1749
1732 end = start + frag->size; 1750 end = start + skb_frag_size(frag);
1733 if ((copy = end - offset) > 0) { 1751 if ((copy = end - offset) > 0) {
1734 u8 *vaddr; 1752 u8 *vaddr;
1735 1753
@@ -1802,7 +1820,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1802 1820
1803 WARN_ON(start > offset + len); 1821 WARN_ON(start > offset + len);
1804 1822
1805 end = start + skb_shinfo(skb)->frags[i].size; 1823 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1806 if ((copy = end - offset) > 0) { 1824 if ((copy = end - offset) > 0) {
1807 __wsum csum2; 1825 __wsum csum2;
1808 u8 *vaddr; 1826 u8 *vaddr;
@@ -1877,7 +1895,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1877 1895
1878 WARN_ON(start > offset + len); 1896 WARN_ON(start > offset + len);
1879 1897
1880 end = start + skb_shinfo(skb)->frags[i].size; 1898 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
1881 if ((copy = end - offset) > 0) { 1899 if ((copy = end - offset) > 0) {
1882 __wsum csum2; 1900 __wsum csum2;
1883 u8 *vaddr; 1901 u8 *vaddr;
@@ -2150,7 +2168,7 @@ static inline void skb_split_no_header(struct sk_buff *skb,
2150 skb->data_len = len - pos; 2168 skb->data_len = len - pos;
2151 2169
2152 for (i = 0; i < nfrags; i++) { 2170 for (i = 0; i < nfrags; i++) {
2153 int size = skb_shinfo(skb)->frags[i].size; 2171 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]);
2154 2172
2155 if (pos + size > len) { 2173 if (pos + size > len) {
2156 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; 2174 skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
@@ -2164,10 +2182,10 @@ static inline void skb_split_no_header(struct sk_buff *skb,
2164 * where splitting is expensive. 2182 * where splitting is expensive.
2165 * 2. Split is accurately. We make this. 2183 * 2. Split is accurately. We make this.
2166 */ 2184 */
2167 get_page(skb_shinfo(skb)->frags[i].page); 2185 skb_frag_ref(skb, i);
2168 skb_shinfo(skb1)->frags[0].page_offset += len - pos; 2186 skb_shinfo(skb1)->frags[0].page_offset += len - pos;
2169 skb_shinfo(skb1)->frags[0].size -= len - pos; 2187 skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos);
2170 skb_shinfo(skb)->frags[i].size = len - pos; 2188 skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos);
2171 skb_shinfo(skb)->nr_frags++; 2189 skb_shinfo(skb)->nr_frags++;
2172 } 2190 }
2173 k++; 2191 k++;
@@ -2239,12 +2257,13 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2239 * commit all, so that we don't have to undo partial changes 2257 * commit all, so that we don't have to undo partial changes
2240 */ 2258 */
2241 if (!to || 2259 if (!to ||
2242 !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { 2260 !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom),
2261 fragfrom->page_offset)) {
2243 merge = -1; 2262 merge = -1;
2244 } else { 2263 } else {
2245 merge = to - 1; 2264 merge = to - 1;
2246 2265
2247 todo -= fragfrom->size; 2266 todo -= skb_frag_size(fragfrom);
2248 if (todo < 0) { 2267 if (todo < 0) {
2249 if (skb_prepare_for_shift(skb) || 2268 if (skb_prepare_for_shift(skb) ||
2250 skb_prepare_for_shift(tgt)) 2269 skb_prepare_for_shift(tgt))
@@ -2254,8 +2273,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2254 fragfrom = &skb_shinfo(skb)->frags[from]; 2273 fragfrom = &skb_shinfo(skb)->frags[from];
2255 fragto = &skb_shinfo(tgt)->frags[merge]; 2274 fragto = &skb_shinfo(tgt)->frags[merge];
2256 2275
2257 fragto->size += shiftlen; 2276 skb_frag_size_add(fragto, shiftlen);
2258 fragfrom->size -= shiftlen; 2277 skb_frag_size_sub(fragfrom, shiftlen);
2259 fragfrom->page_offset += shiftlen; 2278 fragfrom->page_offset += shiftlen;
2260 2279
2261 goto onlymerged; 2280 goto onlymerged;
@@ -2279,20 +2298,20 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2279 fragfrom = &skb_shinfo(skb)->frags[from]; 2298 fragfrom = &skb_shinfo(skb)->frags[from];
2280 fragto = &skb_shinfo(tgt)->frags[to]; 2299 fragto = &skb_shinfo(tgt)->frags[to];
2281 2300
2282 if (todo >= fragfrom->size) { 2301 if (todo >= skb_frag_size(fragfrom)) {
2283 *fragto = *fragfrom; 2302 *fragto = *fragfrom;
2284 todo -= fragfrom->size; 2303 todo -= skb_frag_size(fragfrom);
2285 from++; 2304 from++;
2286 to++; 2305 to++;
2287 2306
2288 } else { 2307 } else {
2289 get_page(fragfrom->page); 2308 __skb_frag_ref(fragfrom);
2290 fragto->page = fragfrom->page; 2309 fragto->page = fragfrom->page;
2291 fragto->page_offset = fragfrom->page_offset; 2310 fragto->page_offset = fragfrom->page_offset;
2292 fragto->size = todo; 2311 skb_frag_size_set(fragto, todo);
2293 2312
2294 fragfrom->page_offset += todo; 2313 fragfrom->page_offset += todo;
2295 fragfrom->size -= todo; 2314 skb_frag_size_sub(fragfrom, todo);
2296 todo = 0; 2315 todo = 0;
2297 2316
2298 to++; 2317 to++;
@@ -2307,8 +2326,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
2307 fragfrom = &skb_shinfo(skb)->frags[0]; 2326 fragfrom = &skb_shinfo(skb)->frags[0];
2308 fragto = &skb_shinfo(tgt)->frags[merge]; 2327 fragto = &skb_shinfo(tgt)->frags[merge];
2309 2328
2310 fragto->size += fragfrom->size; 2329 skb_frag_size_add(fragto, skb_frag_size(fragfrom));
2311 put_page(fragfrom->page); 2330 __skb_frag_unref(fragfrom);
2312 } 2331 }
2313 2332
2314 /* Reposition in the original skb */ 2333 /* Reposition in the original skb */
@@ -2405,7 +2424,7 @@ next_skb:
2405 2424
2406 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { 2425 while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {
2407 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; 2426 frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx];
2408 block_limit = frag->size + st->stepped_offset; 2427 block_limit = skb_frag_size(frag) + st->stepped_offset;
2409 2428
2410 if (abs_offset < block_limit) { 2429 if (abs_offset < block_limit) {
2411 if (!st->frag_data) 2430 if (!st->frag_data)
@@ -2423,7 +2442,7 @@ next_skb:
2423 } 2442 }
2424 2443
2425 st->frag_idx++; 2444 st->frag_idx++;
2426 st->stepped_offset += frag->size; 2445 st->stepped_offset += skb_frag_size(frag);
2427 } 2446 }
2428 2447
2429 if (st->frag_data) { 2448 if (st->frag_data) {
@@ -2553,14 +2572,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2553 left = PAGE_SIZE - frag->page_offset; 2572 left = PAGE_SIZE - frag->page_offset;
2554 copy = (length > left)? left : length; 2573 copy = (length > left)? left : length;
2555 2574
2556 ret = getfrag(from, (page_address(frag->page) + 2575 ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag),
2557 frag->page_offset + frag->size),
2558 offset, copy, 0, skb); 2576 offset, copy, 0, skb);
2559 if (ret < 0) 2577 if (ret < 0)
2560 return -EFAULT; 2578 return -EFAULT;
2561 2579
2562 /* copy was successful so update the size parameters */ 2580 /* copy was successful so update the size parameters */
2563 frag->size += copy; 2581 skb_frag_size_add(frag, copy);
2564 skb->len += copy; 2582 skb->len += copy;
2565 skb->data_len += copy; 2583 skb->data_len += copy;
2566 offset += copy; 2584 offset += copy;
@@ -2706,12 +2724,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2706 2724
2707 while (pos < offset + len && i < nfrags) { 2725 while (pos < offset + len && i < nfrags) {
2708 *frag = skb_shinfo(skb)->frags[i]; 2726 *frag = skb_shinfo(skb)->frags[i];
2709 get_page(frag->page); 2727 __skb_frag_ref(frag);
2710 size = frag->size; 2728 size = skb_frag_size(frag);
2711 2729
2712 if (pos < offset) { 2730 if (pos < offset) {
2713 frag->page_offset += offset - pos; 2731 frag->page_offset += offset - pos;
2714 frag->size -= offset - pos; 2732 skb_frag_size_sub(frag, offset - pos);
2715 } 2733 }
2716 2734
2717 skb_shinfo(nskb)->nr_frags++; 2735 skb_shinfo(nskb)->nr_frags++;
@@ -2720,7 +2738,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
2720 i++; 2738 i++;
2721 pos += size; 2739 pos += size;
2722 } else { 2740 } else {
2723 frag->size -= pos + size - (offset + len); 2741 skb_frag_size_sub(frag, pos + size - (offset + len));
2724 goto skip_fraglist; 2742 goto skip_fraglist;
2725 } 2743 }
2726 2744
@@ -2800,7 +2818,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2800 } while (--i); 2818 } while (--i);
2801 2819
2802 frag->page_offset += offset; 2820 frag->page_offset += offset;
2803 frag->size -= offset; 2821 skb_frag_size_sub(frag, offset);
2804 2822
2805 skb->truesize -= skb->data_len; 2823 skb->truesize -= skb->data_len;
2806 skb->len -= skb->data_len; 2824 skb->len -= skb->data_len;
@@ -2852,7 +2870,7 @@ merge:
2852 unsigned int eat = offset - headlen; 2870 unsigned int eat = offset - headlen;
2853 2871
2854 skbinfo->frags[0].page_offset += eat; 2872 skbinfo->frags[0].page_offset += eat;
2855 skbinfo->frags[0].size -= eat; 2873 skb_frag_size_sub(&skbinfo->frags[0], eat);
2856 skb->data_len -= eat; 2874 skb->data_len -= eat;
2857 skb->len -= eat; 2875 skb->len -= eat;
2858 offset = headlen; 2876 offset = headlen;
@@ -2923,13 +2941,13 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
2923 2941
2924 WARN_ON(start > offset + len); 2942 WARN_ON(start > offset + len);
2925 2943
2926 end = start + skb_shinfo(skb)->frags[i].size; 2944 end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
2927 if ((copy = end - offset) > 0) { 2945 if ((copy = end - offset) > 0) {
2928 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 2946 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2929 2947
2930 if (copy > len) 2948 if (copy > len)
2931 copy = len; 2949 copy = len;
2932 sg_set_page(&sg[elt], frag->page, copy, 2950 sg_set_page(&sg[elt], skb_frag_page(frag), copy,
2933 frag->page_offset+offset-start); 2951 frag->page_offset+offset-start);
2934 elt++; 2952 elt++;
2935 if (!(len -= copy)) 2953 if (!(len -= copy))
diff --git a/net/core/sock.c b/net/core/sock.c
index bc745d00ea4d..5a087626bb3a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX];
207 * not depend upon such differences. 207 * not depend upon such differences.
208 */ 208 */
209#define _SK_MEM_PACKETS 256 209#define _SK_MEM_PACKETS 256
210#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) 210#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
211#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 211#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
212#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) 212#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
213 213
@@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
387 387
388 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 388 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
389 sk_tx_queue_clear(sk); 389 sk_tx_queue_clear(sk);
390 rcu_assign_pointer(sk->sk_dst_cache, NULL); 390 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
391 dst_release(dst); 391 dst_release(dst);
392 return NULL; 392 return NULL;
393 } 393 }
@@ -738,10 +738,7 @@ set_rcvbuf:
738 /* We implement the SO_SNDLOWAT etc to 738 /* We implement the SO_SNDLOWAT etc to
739 not be settable (1003.1g 5.3) */ 739 not be settable (1003.1g 5.3) */
740 case SO_RXQ_OVFL: 740 case SO_RXQ_OVFL:
741 if (valbool) 741 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
742 sock_set_flag(sk, SOCK_RXQ_OVFL);
743 else
744 sock_reset_flag(sk, SOCK_RXQ_OVFL);
745 break; 742 break;
746 default: 743 default:
747 ret = -ENOPROTOOPT; 744 ret = -ENOPROTOOPT;
@@ -1158,7 +1155,7 @@ static void __sk_free(struct sock *sk)
1158 atomic_read(&sk->sk_wmem_alloc) == 0); 1155 atomic_read(&sk->sk_wmem_alloc) == 0);
1159 if (filter) { 1156 if (filter) {
1160 sk_filter_uncharge(sk, filter); 1157 sk_filter_uncharge(sk, filter);
1161 rcu_assign_pointer(sk->sk_filter, NULL); 1158 RCU_INIT_POINTER(sk->sk_filter, NULL);
1162 } 1159 }
1163 1160
1164 sock_disable_timestamp(sk, SOCK_TIMESTAMP); 1161 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
@@ -1533,7 +1530,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1533 skb_shinfo(skb)->nr_frags = npages; 1530 skb_shinfo(skb)->nr_frags = npages;
1534 for (i = 0; i < npages; i++) { 1531 for (i = 0; i < npages; i++) {
1535 struct page *page; 1532 struct page *page;
1536 skb_frag_t *frag;
1537 1533
1538 page = alloc_pages(sk->sk_allocation, 0); 1534 page = alloc_pages(sk->sk_allocation, 0);
1539 if (!page) { 1535 if (!page) {
@@ -1543,12 +1539,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1543 goto failure; 1539 goto failure;
1544 } 1540 }
1545 1541
1546 frag = &skb_shinfo(skb)->frags[i]; 1542 __skb_fill_page_desc(skb, i,
1547 frag->page = page; 1543 page, 0,
1548 frag->page_offset = 0; 1544 (data_len >= PAGE_SIZE ?
1549 frag->size = (data_len >= PAGE_SIZE ? 1545 PAGE_SIZE :
1550 PAGE_SIZE : 1546 data_len));
1551 data_len);
1552 data_len -= PAGE_SIZE; 1547 data_len -= PAGE_SIZE;
1553 } 1548 }
1554 1549
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 98a52640e7cd..82fb28857b64 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
57 case PTP_CLASS_V2_VLAN: 57 case PTP_CLASS_V2_VLAN:
58 phydev = skb->dev->phydev; 58 phydev = skb->dev->phydev;
59 if (likely(phydev->drv->txtstamp)) { 59 if (likely(phydev->drv->txtstamp)) {
60 if (!atomic_inc_not_zero(&sk->sk_refcnt))
61 return;
60 clone = skb_clone(skb, GFP_ATOMIC); 62 clone = skb_clone(skb, GFP_ATOMIC);
61 if (!clone) 63 if (!clone) {
64 sock_put(sk);
62 return; 65 return;
66 }
63 clone->sk = sk; 67 clone->sk = sk;
64 phydev->drv->txtstamp(phydev, clone, type); 68 phydev->drv->txtstamp(phydev, clone, type);
65 } 69 }
@@ -77,8 +81,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
77 struct sock_exterr_skb *serr; 81 struct sock_exterr_skb *serr;
78 int err; 82 int err;
79 83
80 if (!hwtstamps) 84 if (!hwtstamps) {
85 sock_put(sk);
86 kfree_skb(skb);
81 return; 87 return;
88 }
82 89
83 *skb_hwtstamps(skb) = *hwtstamps; 90 *skb_hwtstamps(skb) = *hwtstamps;
84 serr = SKB_EXT_ERR(skb); 91 serr = SKB_EXT_ERR(skb);
@@ -87,6 +94,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
87 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 94 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
88 skb->sk = NULL; 95 skb->sk = NULL;
89 err = sock_queue_err_skb(sk, skb); 96 err = sock_queue_err_skb(sk, skb);
97 sock_put(sk);
90 if (err) 98 if (err)
91 kfree_skb(skb); 99 kfree_skb(skb);
92} 100}
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 25d717ebc92e..2d7cf3d52b4c 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -71,14 +71,14 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
71 /* Copy paged appendix. Hmm... why does this look so complicated? */ 71 /* Copy paged appendix. Hmm... why does this look so complicated? */
72 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 72 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
73 int end; 73 int end;
74 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
74 75
75 WARN_ON(start > offset + len); 76 WARN_ON(start > offset + len);
76 77
77 end = start + skb_shinfo(skb)->frags[i].size; 78 end = start + skb_frag_size(frag);
78 copy = end - offset; 79 copy = end - offset;
79 if (copy > 0) { 80 if (copy > 0) {
80 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 81 struct page *page = skb_frag_page(frag);
81 struct page *page = frag->page;
82 82
83 if (copy > len) 83 if (copy > len)
84 copy = len; 84 copy = len;