aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-21 11:19:50 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-21 11:19:50 -0400
commiteb6a12c2428d21a9f3e0f1a50e927d5fd80fc3d0 (patch)
tree5ac6f43899648abeab1d43aad3107f664e7f13d5 /net/core
parentc4762aba0b1f72659aae9ce37b772ca8bd8f06f4 (diff)
parent14b395e35d1afdd8019d11b92e28041fad591b71 (diff)
Merge branch 'linus' into cpus4096-for-linus
Conflicts: net/sunrpc/svc.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c380
-rw-r--r--net/core/dev_mcast.c24
-rw-r--r--net/core/ethtool.c37
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/iovec.c2
-rw-r--r--net/core/link_watch.c11
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/netpoll.c24
-rw-r--r--net/core/pktgen.c71
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/skbuff.c166
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/sysctl_net_core.c39
14 files changed, 529 insertions, 295 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index df5520a60b90..106d5e6d987c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,7 @@
90#include <linux/if_ether.h> 90#include <linux/if_ether.h>
91#include <linux/netdevice.h> 91#include <linux/netdevice.h>
92#include <linux/etherdevice.h> 92#include <linux/etherdevice.h>
93#include <linux/ethtool.h>
93#include <linux/notifier.h> 94#include <linux/notifier.h>
94#include <linux/skbuff.h> 95#include <linux/skbuff.h>
95#include <net/net_namespace.h> 96#include <net/net_namespace.h>
@@ -120,6 +121,9 @@
120#include <linux/ctype.h> 121#include <linux/ctype.h>
121#include <linux/if_arp.h> 122#include <linux/if_arp.h>
122#include <linux/if_vlan.h> 123#include <linux/if_vlan.h>
124#include <linux/ip.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
123 127
124#include "net-sysfs.h" 128#include "net-sysfs.h"
125 129
@@ -257,7 +261,7 @@ DEFINE_PER_CPU(struct softnet_data, softnet_data);
257 261
258#ifdef CONFIG_DEBUG_LOCK_ALLOC 262#ifdef CONFIG_DEBUG_LOCK_ALLOC
259/* 263/*
260 * register_netdevice() inits dev->_xmit_lock and sets lockdep class 264 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
261 * according to dev->type 265 * according to dev->type
262 */ 266 */
263static const unsigned short netdev_lock_type[] = 267static const unsigned short netdev_lock_type[] =
@@ -961,6 +965,12 @@ void netdev_state_change(struct net_device *dev)
961 } 965 }
962} 966}
963 967
968void netdev_bonding_change(struct net_device *dev)
969{
970 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
971}
972EXPORT_SYMBOL(netdev_bonding_change);
973
964/** 974/**
965 * dev_load - load a network module 975 * dev_load - load a network module
966 * @net: the applicable net namespace 976 * @net: the applicable net namespace
@@ -1117,6 +1127,29 @@ int dev_close(struct net_device *dev)
1117} 1127}
1118 1128
1119 1129
1130/**
1131 * dev_disable_lro - disable Large Receive Offload on a device
1132 * @dev: device
1133 *
1134 * Disable Large Receive Offload (LRO) on a net device. Must be
1135 * called under RTNL. This is needed if received packets may be
1136 * forwarded to another interface.
1137 */
1138void dev_disable_lro(struct net_device *dev)
1139{
1140 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1141 dev->ethtool_ops->set_flags) {
1142 u32 flags = dev->ethtool_ops->get_flags(dev);
1143 if (flags & ETH_FLAG_LRO) {
1144 flags &= ~ETH_FLAG_LRO;
1145 dev->ethtool_ops->set_flags(dev, flags);
1146 }
1147 }
1148 WARN_ON(dev->features & NETIF_F_LRO);
1149}
1150EXPORT_SYMBOL(dev_disable_lro);
1151
1152
1120static int dev_boot_phase = 1; 1153static int dev_boot_phase = 1;
1121 1154
1122/* 1155/*
@@ -1290,16 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1290} 1323}
1291 1324
1292 1325
1293void __netif_schedule(struct net_device *dev) 1326void __netif_schedule(struct Qdisc *q)
1294{ 1327{
1295 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { 1328 BUG_ON(q == &noop_qdisc);
1296 unsigned long flags; 1329
1330 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
1297 struct softnet_data *sd; 1331 struct softnet_data *sd;
1332 unsigned long flags;
1298 1333
1299 local_irq_save(flags); 1334 local_irq_save(flags);
1300 sd = &__get_cpu_var(softnet_data); 1335 sd = &__get_cpu_var(softnet_data);
1301 dev->next_sched = sd->output_queue; 1336 q->next_sched = sd->output_queue;
1302 sd->output_queue = dev; 1337 sd->output_queue = q;
1303 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1338 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1304 local_irq_restore(flags); 1339 local_irq_restore(flags);
1305 } 1340 }
@@ -1566,7 +1601,8 @@ static int dev_gso_segment(struct sk_buff *skb)
1566 return 0; 1601 return 0;
1567} 1602}
1568 1603
1569int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) 1604int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1605 struct netdev_queue *txq)
1570{ 1606{
1571 if (likely(!skb->next)) { 1607 if (likely(!skb->next)) {
1572 if (!list_empty(&ptype_all)) 1608 if (!list_empty(&ptype_all))
@@ -1595,9 +1631,7 @@ gso:
1595 skb->next = nskb; 1631 skb->next = nskb;
1596 return rc; 1632 return rc;
1597 } 1633 }
1598 if (unlikely((netif_queue_stopped(dev) || 1634 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
1599 netif_subqueue_stopped(dev, skb)) &&
1600 skb->next))
1601 return NETDEV_TX_BUSY; 1635 return NETDEV_TX_BUSY;
1602 } while (skb->next); 1636 } while (skb->next);
1603 1637
@@ -1634,9 +1668,71 @@ out_kfree_skb:
1634 * --BLG 1668 * --BLG
1635 */ 1669 */
1636 1670
1671static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1672{
1673 u32 *addr, *ports, hash, ihl;
1674 u8 ip_proto;
1675 int alen;
1676
1677 switch (skb->protocol) {
1678 case __constant_htons(ETH_P_IP):
1679 ip_proto = ip_hdr(skb)->protocol;
1680 addr = &ip_hdr(skb)->saddr;
1681 ihl = ip_hdr(skb)->ihl;
1682 alen = 2;
1683 break;
1684 case __constant_htons(ETH_P_IPV6):
1685 ip_proto = ipv6_hdr(skb)->nexthdr;
1686 addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
1687 ihl = (40 >> 2);
1688 alen = 8;
1689 break;
1690 default:
1691 return 0;
1692 }
1693
1694 ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
1695
1696 hash = 0;
1697 while (alen--)
1698 hash ^= *addr++;
1699
1700 switch (ip_proto) {
1701 case IPPROTO_TCP:
1702 case IPPROTO_UDP:
1703 case IPPROTO_DCCP:
1704 case IPPROTO_ESP:
1705 case IPPROTO_AH:
1706 case IPPROTO_SCTP:
1707 case IPPROTO_UDPLITE:
1708 hash ^= *ports;
1709 break;
1710
1711 default:
1712 break;
1713 }
1714
1715 return hash % dev->real_num_tx_queues;
1716}
1717
1718static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1719 struct sk_buff *skb)
1720{
1721 u16 queue_index = 0;
1722
1723 if (dev->select_queue)
1724 queue_index = dev->select_queue(dev, skb);
1725 else if (dev->real_num_tx_queues > 1)
1726 queue_index = simple_tx_hash(dev, skb);
1727
1728 skb_set_queue_mapping(skb, queue_index);
1729 return netdev_get_tx_queue(dev, queue_index);
1730}
1731
1637int dev_queue_xmit(struct sk_buff *skb) 1732int dev_queue_xmit(struct sk_buff *skb)
1638{ 1733{
1639 struct net_device *dev = skb->dev; 1734 struct net_device *dev = skb->dev;
1735 struct netdev_queue *txq;
1640 struct Qdisc *q; 1736 struct Qdisc *q;
1641 int rc = -ENOMEM; 1737 int rc = -ENOMEM;
1642 1738
@@ -1669,44 +1765,29 @@ int dev_queue_xmit(struct sk_buff *skb)
1669 } 1765 }
1670 1766
1671gso: 1767gso:
1672 spin_lock_prefetch(&dev->queue_lock);
1673
1674 /* Disable soft irqs for various locks below. Also 1768 /* Disable soft irqs for various locks below. Also
1675 * stops preemption for RCU. 1769 * stops preemption for RCU.
1676 */ 1770 */
1677 rcu_read_lock_bh(); 1771 rcu_read_lock_bh();
1678 1772
1679 /* Updates of qdisc are serialized by queue_lock. 1773 txq = dev_pick_tx(dev, skb);
1680 * The struct Qdisc which is pointed to by qdisc is now a 1774 q = rcu_dereference(txq->qdisc);
1681 * rcu structure - it may be accessed without acquiring
1682 * a lock (but the structure may be stale.) The freeing of the
1683 * qdisc will be deferred until it's known that there are no
1684 * more references to it.
1685 *
1686 * If the qdisc has an enqueue function, we still need to
1687 * hold the queue_lock before calling it, since queue_lock
1688 * also serializes access to the device queue.
1689 */
1690 1775
1691 q = rcu_dereference(dev->qdisc);
1692#ifdef CONFIG_NET_CLS_ACT 1776#ifdef CONFIG_NET_CLS_ACT
1693 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); 1777 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1694#endif 1778#endif
1695 if (q->enqueue) { 1779 if (q->enqueue) {
1696 /* Grab device queue */ 1780 spinlock_t *root_lock = qdisc_root_lock(q);
1697 spin_lock(&dev->queue_lock); 1781
1698 q = dev->qdisc; 1782 spin_lock(root_lock);
1699 if (q->enqueue) { 1783
1700 /* reset queue_mapping to zero */ 1784 rc = qdisc_enqueue_root(skb, q);
1701 skb_set_queue_mapping(skb, 0); 1785 qdisc_run(q);
1702 rc = q->enqueue(skb, q); 1786
1703 qdisc_run(dev); 1787 spin_unlock(root_lock);
1704 spin_unlock(&dev->queue_lock); 1788
1705 1789 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1706 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; 1790 goto out;
1707 goto out;
1708 }
1709 spin_unlock(&dev->queue_lock);
1710 } 1791 }
1711 1792
1712 /* The device has no queue. Common case for software devices: 1793 /* The device has no queue. Common case for software devices:
@@ -1724,19 +1805,18 @@ gso:
1724 if (dev->flags & IFF_UP) { 1805 if (dev->flags & IFF_UP) {
1725 int cpu = smp_processor_id(); /* ok because BHs are off */ 1806 int cpu = smp_processor_id(); /* ok because BHs are off */
1726 1807
1727 if (dev->xmit_lock_owner != cpu) { 1808 if (txq->xmit_lock_owner != cpu) {
1728 1809
1729 HARD_TX_LOCK(dev, cpu); 1810 HARD_TX_LOCK(dev, txq, cpu);
1730 1811
1731 if (!netif_queue_stopped(dev) && 1812 if (!netif_tx_queue_stopped(txq)) {
1732 !netif_subqueue_stopped(dev, skb)) {
1733 rc = 0; 1813 rc = 0;
1734 if (!dev_hard_start_xmit(skb, dev)) { 1814 if (!dev_hard_start_xmit(skb, dev, txq)) {
1735 HARD_TX_UNLOCK(dev); 1815 HARD_TX_UNLOCK(dev, txq);
1736 goto out; 1816 goto out;
1737 } 1817 }
1738 } 1818 }
1739 HARD_TX_UNLOCK(dev); 1819 HARD_TX_UNLOCK(dev, txq);
1740 if (net_ratelimit()) 1820 if (net_ratelimit())
1741 printk(KERN_CRIT "Virtual device %s asks to " 1821 printk(KERN_CRIT "Virtual device %s asks to "
1742 "queue packet!\n", dev->name); 1822 "queue packet!\n", dev->name);
@@ -1880,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h)
1880 } 1960 }
1881 1961
1882 if (sd->output_queue) { 1962 if (sd->output_queue) {
1883 struct net_device *head; 1963 struct Qdisc *head;
1884 1964
1885 local_irq_disable(); 1965 local_irq_disable();
1886 head = sd->output_queue; 1966 head = sd->output_queue;
@@ -1888,17 +1968,20 @@ static void net_tx_action(struct softirq_action *h)
1888 local_irq_enable(); 1968 local_irq_enable();
1889 1969
1890 while (head) { 1970 while (head) {
1891 struct net_device *dev = head; 1971 struct Qdisc *q = head;
1972 spinlock_t *root_lock;
1973
1892 head = head->next_sched; 1974 head = head->next_sched;
1893 1975
1894 smp_mb__before_clear_bit(); 1976 smp_mb__before_clear_bit();
1895 clear_bit(__LINK_STATE_SCHED, &dev->state); 1977 clear_bit(__QDISC_STATE_SCHED, &q->state);
1896 1978
1897 if (spin_trylock(&dev->queue_lock)) { 1979 root_lock = qdisc_root_lock(q);
1898 qdisc_run(dev); 1980 if (spin_trylock(root_lock)) {
1899 spin_unlock(&dev->queue_lock); 1981 qdisc_run(q);
1982 spin_unlock(root_lock);
1900 } else { 1983 } else {
1901 netif_schedule(dev); 1984 __netif_schedule(q);
1902 } 1985 }
1903 } 1986 }
1904 } 1987 }
@@ -1979,10 +2062,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1979 */ 2062 */
1980static int ing_filter(struct sk_buff *skb) 2063static int ing_filter(struct sk_buff *skb)
1981{ 2064{
1982 struct Qdisc *q;
1983 struct net_device *dev = skb->dev; 2065 struct net_device *dev = skb->dev;
1984 int result = TC_ACT_OK;
1985 u32 ttl = G_TC_RTTL(skb->tc_verd); 2066 u32 ttl = G_TC_RTTL(skb->tc_verd);
2067 struct netdev_queue *rxq;
2068 int result = TC_ACT_OK;
2069 struct Qdisc *q;
1986 2070
1987 if (MAX_RED_LOOP < ttl++) { 2071 if (MAX_RED_LOOP < ttl++) {
1988 printk(KERN_WARNING 2072 printk(KERN_WARNING
@@ -1994,10 +2078,14 @@ static int ing_filter(struct sk_buff *skb)
1994 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2078 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1995 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2079 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1996 2080
1997 spin_lock(&dev->ingress_lock); 2081 rxq = &dev->rx_queue;
1998 if ((q = dev->qdisc_ingress) != NULL) 2082
1999 result = q->enqueue(skb, q); 2083 q = rxq->qdisc;
2000 spin_unlock(&dev->ingress_lock); 2084 if (q) {
2085 spin_lock(qdisc_lock(q));
2086 result = qdisc_enqueue_root(skb, q);
2087 spin_unlock(qdisc_lock(q));
2088 }
2001 2089
2002 return result; 2090 return result;
2003} 2091}
@@ -2006,7 +2094,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2006 struct packet_type **pt_prev, 2094 struct packet_type **pt_prev,
2007 int *ret, struct net_device *orig_dev) 2095 int *ret, struct net_device *orig_dev)
2008{ 2096{
2009 if (!skb->dev->qdisc_ingress) 2097 if (!skb->dev->rx_queue.qdisc)
2010 goto out; 2098 goto out;
2011 2099
2012 if (*pt_prev) { 2100 if (*pt_prev) {
@@ -2030,6 +2118,33 @@ out:
2030} 2118}
2031#endif 2119#endif
2032 2120
2121/*
2122 * netif_nit_deliver - deliver received packets to network taps
2123 * @skb: buffer
2124 *
2125 * This function is used to deliver incoming packets to network
2126 * taps. It should be used when the normal netif_receive_skb path
2127 * is bypassed, for example because of VLAN acceleration.
2128 */
2129void netif_nit_deliver(struct sk_buff *skb)
2130{
2131 struct packet_type *ptype;
2132
2133 if (list_empty(&ptype_all))
2134 return;
2135
2136 skb_reset_network_header(skb);
2137 skb_reset_transport_header(skb);
2138 skb->mac_len = skb->network_header - skb->mac_header;
2139
2140 rcu_read_lock();
2141 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2142 if (!ptype->dev || ptype->dev == skb->dev)
2143 deliver_skb(skb, ptype, skb->dev);
2144 }
2145 rcu_read_unlock();
2146}
2147
2033/** 2148/**
2034 * netif_receive_skb - process receive buffer from network 2149 * netif_receive_skb - process receive buffer from network
2035 * @skb: buffer to process 2150 * @skb: buffer to process
@@ -2769,16 +2884,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
2769 return 0; 2884 return 0;
2770} 2885}
2771 2886
2772static void __dev_set_promiscuity(struct net_device *dev, int inc) 2887static int __dev_set_promiscuity(struct net_device *dev, int inc)
2773{ 2888{
2774 unsigned short old_flags = dev->flags; 2889 unsigned short old_flags = dev->flags;
2775 2890
2776 ASSERT_RTNL(); 2891 ASSERT_RTNL();
2777 2892
2778 if ((dev->promiscuity += inc) == 0) 2893 dev->flags |= IFF_PROMISC;
2779 dev->flags &= ~IFF_PROMISC; 2894 dev->promiscuity += inc;
2780 else 2895 if (dev->promiscuity == 0) {
2781 dev->flags |= IFF_PROMISC; 2896 /*
2897 * Avoid overflow.
2898 * If inc causes overflow, untouch promisc and return error.
2899 */
2900 if (inc < 0)
2901 dev->flags &= ~IFF_PROMISC;
2902 else {
2903 dev->promiscuity -= inc;
2904 printk(KERN_WARNING "%s: promiscuity touches roof, "
2905 "set promiscuity failed, promiscuity feature "
2906 "of device might be broken.\n", dev->name);
2907 return -EOVERFLOW;
2908 }
2909 }
2782 if (dev->flags != old_flags) { 2910 if (dev->flags != old_flags) {
2783 printk(KERN_INFO "device %s %s promiscuous mode\n", 2911 printk(KERN_INFO "device %s %s promiscuous mode\n",
2784 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2912 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -2796,6 +2924,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
2796 if (dev->change_rx_flags) 2924 if (dev->change_rx_flags)
2797 dev->change_rx_flags(dev, IFF_PROMISC); 2925 dev->change_rx_flags(dev, IFF_PROMISC);
2798 } 2926 }
2927 return 0;
2799} 2928}
2800 2929
2801/** 2930/**
@@ -2807,14 +2936,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
2807 * remains above zero the interface remains promiscuous. Once it hits zero 2936 * remains above zero the interface remains promiscuous. Once it hits zero
2808 * the device reverts back to normal filtering operation. A negative inc 2937 * the device reverts back to normal filtering operation. A negative inc
2809 * value is used to drop promiscuity on the device. 2938 * value is used to drop promiscuity on the device.
2939 * Return 0 if successful or a negative errno code on error.
2810 */ 2940 */
2811void dev_set_promiscuity(struct net_device *dev, int inc) 2941int dev_set_promiscuity(struct net_device *dev, int inc)
2812{ 2942{
2813 unsigned short old_flags = dev->flags; 2943 unsigned short old_flags = dev->flags;
2944 int err;
2814 2945
2815 __dev_set_promiscuity(dev, inc); 2946 err = __dev_set_promiscuity(dev, inc);
2947 if (err < 0)
2948 return err;
2816 if (dev->flags != old_flags) 2949 if (dev->flags != old_flags)
2817 dev_set_rx_mode(dev); 2950 dev_set_rx_mode(dev);
2951 return err;
2818} 2952}
2819 2953
2820/** 2954/**
@@ -2827,22 +2961,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
2827 * to all interfaces. Once it hits zero the device reverts back to normal 2961 * to all interfaces. Once it hits zero the device reverts back to normal
2828 * filtering operation. A negative @inc value is used to drop the counter 2962 * filtering operation. A negative @inc value is used to drop the counter
2829 * when releasing a resource needing all multicasts. 2963 * when releasing a resource needing all multicasts.
2964 * Return 0 if successful or a negative errno code on error.
2830 */ 2965 */
2831 2966
2832void dev_set_allmulti(struct net_device *dev, int inc) 2967int dev_set_allmulti(struct net_device *dev, int inc)
2833{ 2968{
2834 unsigned short old_flags = dev->flags; 2969 unsigned short old_flags = dev->flags;
2835 2970
2836 ASSERT_RTNL(); 2971 ASSERT_RTNL();
2837 2972
2838 dev->flags |= IFF_ALLMULTI; 2973 dev->flags |= IFF_ALLMULTI;
2839 if ((dev->allmulti += inc) == 0) 2974 dev->allmulti += inc;
2840 dev->flags &= ~IFF_ALLMULTI; 2975 if (dev->allmulti == 0) {
2976 /*
2977 * Avoid overflow.
2978 * If inc causes overflow, untouch allmulti and return error.
2979 */
2980 if (inc < 0)
2981 dev->flags &= ~IFF_ALLMULTI;
2982 else {
2983 dev->allmulti -= inc;
2984 printk(KERN_WARNING "%s: allmulti touches roof, "
2985 "set allmulti failed, allmulti feature of "
2986 "device might be broken.\n", dev->name);
2987 return -EOVERFLOW;
2988 }
2989 }
2841 if (dev->flags ^ old_flags) { 2990 if (dev->flags ^ old_flags) {
2842 if (dev->change_rx_flags) 2991 if (dev->change_rx_flags)
2843 dev->change_rx_flags(dev, IFF_ALLMULTI); 2992 dev->change_rx_flags(dev, IFF_ALLMULTI);
2844 dev_set_rx_mode(dev); 2993 dev_set_rx_mode(dev);
2845 } 2994 }
2995 return 0;
2846} 2996}
2847 2997
2848/* 2998/*
@@ -2881,9 +3031,9 @@ void __dev_set_rx_mode(struct net_device *dev)
2881 3031
2882void dev_set_rx_mode(struct net_device *dev) 3032void dev_set_rx_mode(struct net_device *dev)
2883{ 3033{
2884 netif_tx_lock_bh(dev); 3034 netif_addr_lock_bh(dev);
2885 __dev_set_rx_mode(dev); 3035 __dev_set_rx_mode(dev);
2886 netif_tx_unlock_bh(dev); 3036 netif_addr_unlock_bh(dev);
2887} 3037}
2888 3038
2889int __dev_addr_delete(struct dev_addr_list **list, int *count, 3039int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -2961,11 +3111,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2961 3111
2962 ASSERT_RTNL(); 3112 ASSERT_RTNL();
2963 3113
2964 netif_tx_lock_bh(dev); 3114 netif_addr_lock_bh(dev);
2965 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3115 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2966 if (!err) 3116 if (!err)
2967 __dev_set_rx_mode(dev); 3117 __dev_set_rx_mode(dev);
2968 netif_tx_unlock_bh(dev); 3118 netif_addr_unlock_bh(dev);
2969 return err; 3119 return err;
2970} 3120}
2971EXPORT_SYMBOL(dev_unicast_delete); 3121EXPORT_SYMBOL(dev_unicast_delete);
@@ -2987,11 +3137,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2987 3137
2988 ASSERT_RTNL(); 3138 ASSERT_RTNL();
2989 3139
2990 netif_tx_lock_bh(dev); 3140 netif_addr_lock_bh(dev);
2991 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0); 3141 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2992 if (!err) 3142 if (!err)
2993 __dev_set_rx_mode(dev); 3143 __dev_set_rx_mode(dev);
2994 netif_tx_unlock_bh(dev); 3144 netif_addr_unlock_bh(dev);
2995 return err; 3145 return err;
2996} 3146}
2997EXPORT_SYMBOL(dev_unicast_add); 3147EXPORT_SYMBOL(dev_unicast_add);
@@ -3058,12 +3208,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
3058{ 3208{
3059 int err = 0; 3209 int err = 0;
3060 3210
3061 netif_tx_lock_bh(to); 3211 netif_addr_lock_bh(to);
3062 err = __dev_addr_sync(&to->uc_list, &to->uc_count, 3212 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3063 &from->uc_list, &from->uc_count); 3213 &from->uc_list, &from->uc_count);
3064 if (!err) 3214 if (!err)
3065 __dev_set_rx_mode(to); 3215 __dev_set_rx_mode(to);
3066 netif_tx_unlock_bh(to); 3216 netif_addr_unlock_bh(to);
3067 return err; 3217 return err;
3068} 3218}
3069EXPORT_SYMBOL(dev_unicast_sync); 3219EXPORT_SYMBOL(dev_unicast_sync);
@@ -3079,15 +3229,15 @@ EXPORT_SYMBOL(dev_unicast_sync);
3079 */ 3229 */
3080void dev_unicast_unsync(struct net_device *to, struct net_device *from) 3230void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3081{ 3231{
3082 netif_tx_lock_bh(from); 3232 netif_addr_lock_bh(from);
3083 netif_tx_lock_bh(to); 3233 netif_addr_lock(to);
3084 3234
3085 __dev_addr_unsync(&to->uc_list, &to->uc_count, 3235 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3086 &from->uc_list, &from->uc_count); 3236 &from->uc_list, &from->uc_count);
3087 __dev_set_rx_mode(to); 3237 __dev_set_rx_mode(to);
3088 3238
3089 netif_tx_unlock_bh(to); 3239 netif_addr_unlock(to);
3090 netif_tx_unlock_bh(from); 3240 netif_addr_unlock_bh(from);
3091} 3241}
3092EXPORT_SYMBOL(dev_unicast_unsync); 3242EXPORT_SYMBOL(dev_unicast_unsync);
3093 3243
@@ -3107,7 +3257,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
3107 3257
3108static void dev_addr_discard(struct net_device *dev) 3258static void dev_addr_discard(struct net_device *dev)
3109{ 3259{
3110 netif_tx_lock_bh(dev); 3260 netif_addr_lock_bh(dev);
3111 3261
3112 __dev_addr_discard(&dev->uc_list); 3262 __dev_addr_discard(&dev->uc_list);
3113 dev->uc_count = 0; 3263 dev->uc_count = 0;
@@ -3115,7 +3265,7 @@ static void dev_addr_discard(struct net_device *dev)
3115 __dev_addr_discard(&dev->mc_list); 3265 __dev_addr_discard(&dev->mc_list);
3116 dev->mc_count = 0; 3266 dev->mc_count = 0;
3117 3267
3118 netif_tx_unlock_bh(dev); 3268 netif_addr_unlock_bh(dev);
3119} 3269}
3120 3270
3121unsigned dev_get_flags(const struct net_device *dev) 3271unsigned dev_get_flags(const struct net_device *dev)
@@ -3688,6 +3838,21 @@ static void rollback_registered(struct net_device *dev)
3688 dev_put(dev); 3838 dev_put(dev);
3689} 3839}
3690 3840
3841static void __netdev_init_queue_locks_one(struct net_device *dev,
3842 struct netdev_queue *dev_queue,
3843 void *_unused)
3844{
3845 spin_lock_init(&dev_queue->_xmit_lock);
3846 netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
3847 dev_queue->xmit_lock_owner = -1;
3848}
3849
3850static void netdev_init_queue_locks(struct net_device *dev)
3851{
3852 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3853 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
3854}
3855
3691/** 3856/**
3692 * register_netdevice - register a network device 3857 * register_netdevice - register a network device
3693 * @dev: device to register 3858 * @dev: device to register
@@ -3722,11 +3887,8 @@ int register_netdevice(struct net_device *dev)
3722 BUG_ON(!dev_net(dev)); 3887 BUG_ON(!dev_net(dev));
3723 net = dev_net(dev); 3888 net = dev_net(dev);
3724 3889
3725 spin_lock_init(&dev->queue_lock); 3890 spin_lock_init(&dev->addr_list_lock);
3726 spin_lock_init(&dev->_xmit_lock); 3891 netdev_init_queue_locks(dev);
3727 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3728 dev->xmit_lock_owner = -1;
3729 spin_lock_init(&dev->ingress_lock);
3730 3892
3731 dev->iflink = -1; 3893 dev->iflink = -1;
3732 3894
@@ -4007,6 +4169,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
4007 return &dev->stats; 4169 return &dev->stats;
4008} 4170}
4009 4171
4172static void netdev_init_one_queue(struct net_device *dev,
4173 struct netdev_queue *queue,
4174 void *_unused)
4175{
4176 queue->dev = dev;
4177}
4178
4179static void netdev_init_queues(struct net_device *dev)
4180{
4181 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4182 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
4183}
4184
4010/** 4185/**
4011 * alloc_netdev_mq - allocate network device 4186 * alloc_netdev_mq - allocate network device
4012 * @sizeof_priv: size of private data to allocate space for 4187 * @sizeof_priv: size of private data to allocate space for
@@ -4021,14 +4196,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
4021struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 4196struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4022 void (*setup)(struct net_device *), unsigned int queue_count) 4197 void (*setup)(struct net_device *), unsigned int queue_count)
4023{ 4198{
4024 void *p; 4199 struct netdev_queue *tx;
4025 struct net_device *dev; 4200 struct net_device *dev;
4026 int alloc_size; 4201 int alloc_size;
4202 void *p;
4027 4203
4028 BUG_ON(strlen(name) >= sizeof(dev->name)); 4204 BUG_ON(strlen(name) >= sizeof(dev->name));
4029 4205
4030 alloc_size = sizeof(struct net_device) + 4206 alloc_size = sizeof(struct net_device);
4031 sizeof(struct net_device_subqueue) * (queue_count - 1);
4032 if (sizeof_priv) { 4207 if (sizeof_priv) {
4033 /* ensure 32-byte alignment of private area */ 4208 /* ensure 32-byte alignment of private area */
4034 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; 4209 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4043,22 +4218,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4043 return NULL; 4218 return NULL;
4044 } 4219 }
4045 4220
4221 tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
4222 if (!tx) {
4223 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4224 "tx qdiscs.\n");
4225 kfree(p);
4226 return NULL;
4227 }
4228
4046 dev = (struct net_device *) 4229 dev = (struct net_device *)
4047 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); 4230 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4048 dev->padded = (char *)dev - (char *)p; 4231 dev->padded = (char *)dev - (char *)p;
4049 dev_net_set(dev, &init_net); 4232 dev_net_set(dev, &init_net);
4050 4233
4234 dev->_tx = tx;
4235 dev->num_tx_queues = queue_count;
4236 dev->real_num_tx_queues = queue_count;
4237
4051 if (sizeof_priv) { 4238 if (sizeof_priv) {
4052 dev->priv = ((char *)dev + 4239 dev->priv = ((char *)dev +
4053 ((sizeof(struct net_device) + 4240 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
4054 (sizeof(struct net_device_subqueue) *
4055 (queue_count - 1)) + NETDEV_ALIGN_CONST)
4056 & ~NETDEV_ALIGN_CONST)); 4241 & ~NETDEV_ALIGN_CONST));
4057 } 4242 }
4058 4243
4059 dev->egress_subqueue_count = queue_count;
4060 dev->gso_max_size = GSO_MAX_SIZE; 4244 dev->gso_max_size = GSO_MAX_SIZE;
4061 4245
4246 netdev_init_queues(dev);
4247
4062 dev->get_stats = internal_stats; 4248 dev->get_stats = internal_stats;
4063 netpoll_netdev_init(dev); 4249 netpoll_netdev_init(dev);
4064 setup(dev); 4250 setup(dev);
@@ -4079,6 +4265,8 @@ void free_netdev(struct net_device *dev)
4079{ 4265{
4080 release_net(dev_net(dev)); 4266 release_net(dev_net(dev));
4081 4267
4268 kfree(dev->_tx);
4269
4082 /* Compatibility with error handling in drivers */ 4270 /* Compatibility with error handling in drivers */
4083 if (dev->reg_state == NETREG_UNINITIALIZED) { 4271 if (dev->reg_state == NETREG_UNINITIALIZED) {
4084 kfree((char *)dev - dev->padded); 4272 kfree((char *)dev - dev->padded);
@@ -4260,7 +4448,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
4260 void *ocpu) 4448 void *ocpu)
4261{ 4449{
4262 struct sk_buff **list_skb; 4450 struct sk_buff **list_skb;
4263 struct net_device **list_net; 4451 struct Qdisc **list_net;
4264 struct sk_buff *skb; 4452 struct sk_buff *skb;
4265 unsigned int cpu, oldcpu = (unsigned long)ocpu; 4453 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4266 struct softnet_data *sd, *oldsd; 4454 struct softnet_data *sd, *oldsd;
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f8a3455f4493..5402b3b38e0d 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
72{ 72{
73 int err; 73 int err;
74 74
75 netif_tx_lock_bh(dev); 75 netif_addr_lock_bh(dev);
76 err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, 76 err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
77 addr, alen, glbl); 77 addr, alen, glbl);
78 if (!err) { 78 if (!err) {
@@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
83 83
84 __dev_set_rx_mode(dev); 84 __dev_set_rx_mode(dev);
85 } 85 }
86 netif_tx_unlock_bh(dev); 86 netif_addr_unlock_bh(dev);
87 return err; 87 return err;
88} 88}
89 89
@@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
95{ 95{
96 int err; 96 int err;
97 97
98 netif_tx_lock_bh(dev); 98 netif_addr_lock_bh(dev);
99 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); 99 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
100 if (!err) 100 if (!err)
101 __dev_set_rx_mode(dev); 101 __dev_set_rx_mode(dev);
102 netif_tx_unlock_bh(dev); 102 netif_addr_unlock_bh(dev);
103 return err; 103 return err;
104} 104}
105 105
@@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
119{ 119{
120 int err = 0; 120 int err = 0;
121 121
122 netif_tx_lock_bh(to); 122 netif_addr_lock_bh(to);
123 err = __dev_addr_sync(&to->mc_list, &to->mc_count, 123 err = __dev_addr_sync(&to->mc_list, &to->mc_count,
124 &from->mc_list, &from->mc_count); 124 &from->mc_list, &from->mc_count);
125 if (!err) 125 if (!err)
126 __dev_set_rx_mode(to); 126 __dev_set_rx_mode(to);
127 netif_tx_unlock_bh(to); 127 netif_addr_unlock_bh(to);
128 128
129 return err; 129 return err;
130} 130}
@@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync);
143 */ 143 */
144void dev_mc_unsync(struct net_device *to, struct net_device *from) 144void dev_mc_unsync(struct net_device *to, struct net_device *from)
145{ 145{
146 netif_tx_lock_bh(from); 146 netif_addr_lock_bh(from);
147 netif_tx_lock_bh(to); 147 netif_addr_lock(to);
148 148
149 __dev_addr_unsync(&to->mc_list, &to->mc_count, 149 __dev_addr_unsync(&to->mc_list, &to->mc_count,
150 &from->mc_list, &from->mc_count); 150 &from->mc_list, &from->mc_count);
151 __dev_set_rx_mode(to); 151 __dev_set_rx_mode(to);
152 152
153 netif_tx_unlock_bh(to); 153 netif_addr_unlock(to);
154 netif_tx_unlock_bh(from); 154 netif_addr_unlock_bh(from);
155} 155}
156EXPORT_SYMBOL(dev_mc_unsync); 156EXPORT_SYMBOL(dev_mc_unsync);
157 157
@@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
164 if (v == SEQ_START_TOKEN) 164 if (v == SEQ_START_TOKEN)
165 return 0; 165 return 0;
166 166
167 netif_tx_lock_bh(dev); 167 netif_addr_lock_bh(dev);
168 for (m = dev->mc_list; m; m = m->next) { 168 for (m = dev->mc_list; m; m = m->next) {
169 int i; 169 int i;
170 170
@@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
176 176
177 seq_putc(seq, '\n'); 177 seq_putc(seq, '\n');
178 } 178 }
179 netif_tx_unlock_bh(dev); 179 netif_addr_unlock_bh(dev);
180 return 0; 180 return 0;
181} 181}
182 182
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0133b5ebd545..14ada537f895 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
209 return 0; 209 return 0;
210} 210}
211 211
212static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
213{
214 struct ethtool_rxnfc cmd;
215
216 if (!dev->ethtool_ops->set_rxhash)
217 return -EOPNOTSUPP;
218
219 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
220 return -EFAULT;
221
222 return dev->ethtool_ops->set_rxhash(dev, &cmd);
223}
224
225static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
226{
227 struct ethtool_rxnfc info;
228
229 if (!dev->ethtool_ops->get_rxhash)
230 return -EOPNOTSUPP;
231
232 if (copy_from_user(&info, useraddr, sizeof(info)))
233 return -EFAULT;
234
235 dev->ethtool_ops->get_rxhash(dev, &info);
236
237 if (copy_to_user(useraddr, &info, sizeof(info)))
238 return -EFAULT;
239 return 0;
240}
241
212static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 242static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
213{ 243{
214 struct ethtool_regs regs; 244 struct ethtool_regs regs;
@@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
826 case ETHTOOL_GGSO: 856 case ETHTOOL_GGSO:
827 case ETHTOOL_GFLAGS: 857 case ETHTOOL_GFLAGS:
828 case ETHTOOL_GPFLAGS: 858 case ETHTOOL_GPFLAGS:
859 case ETHTOOL_GRXFH:
829 break; 860 break;
830 default: 861 default:
831 if (!capable(CAP_NET_ADMIN)) 862 if (!capable(CAP_NET_ADMIN))
@@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
977 rc = ethtool_set_value(dev, useraddr, 1008 rc = ethtool_set_value(dev, useraddr,
978 dev->ethtool_ops->set_priv_flags); 1009 dev->ethtool_ops->set_priv_flags);
979 break; 1010 break;
1011 case ETHTOOL_GRXFH:
1012 rc = ethtool_get_rxhash(dev, useraddr);
1013 break;
1014 case ETHTOOL_SRXFH:
1015 rc = ethtool_set_rxhash(dev, useraddr);
1016 break;
980 default: 1017 default:
981 rc = -EOPNOTSUPP; 1018 rc = -EOPNOTSUPP;
982 } 1019 }
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 277a2302eb3a..79de3b14a8d1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops)
69static void flush_route_cache(struct fib_rules_ops *ops) 69static void flush_route_cache(struct fib_rules_ops *ops)
70{ 70{
71 if (ops->flush_cache) 71 if (ops->flush_cache)
72 ops->flush_cache(); 72 ops->flush_cache(ops);
73} 73}
74 74
75int fib_rules_register(struct fib_rules_ops *ops) 75int fib_rules_register(struct fib_rules_ops *ops)
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 755c37fdaee7..4c9c0121c9da 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -36,7 +36,7 @@
36 * in any case. 36 * in any case.
37 */ 37 */
38 38
39int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode) 39int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode)
40{ 40{
41 int size, err, ct; 41 int size, err, ct;
42 42
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index a5e372b9ec4d..bf8f7af699d7 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev)
77} 77}
78 78
79 79
80static int linkwatch_urgent_event(struct net_device *dev) 80static bool linkwatch_urgent_event(struct net_device *dev)
81{ 81{
82 return netif_running(dev) && netif_carrier_ok(dev) && 82 return netif_running(dev) && netif_carrier_ok(dev) &&
83 dev->qdisc != dev->qdisc_sleeping; 83 qdisc_tx_changing(dev);
84} 84}
85 85
86 86
@@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only)
180 180
181 rfc2863_policy(dev); 181 rfc2863_policy(dev);
182 if (dev->flags & IFF_UP) { 182 if (dev->flags & IFF_UP) {
183 if (netif_carrier_ok(dev)) { 183 if (netif_carrier_ok(dev))
184 WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
185 dev_activate(dev); 184 dev_activate(dev);
186 } else 185 else
187 dev_deactivate(dev); 186 dev_deactivate(dev);
188 187
189 netdev_state_change(dev); 188 netdev_state_change(dev);
@@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy)
214 213
215void linkwatch_fire_event(struct net_device *dev) 214void linkwatch_fire_event(struct net_device *dev)
216{ 215{
217 int urgent = linkwatch_urgent_event(dev); 216 bool urgent = linkwatch_urgent_event(dev);
218 217
219 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { 218 if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
220 dev_hold(dev); 219 dev_hold(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 65f01f71b3f3..f62c8af85d38 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
930 buff = neigh->arp_queue.next; 930 buff = neigh->arp_queue.next;
931 __skb_unlink(buff, &neigh->arp_queue); 931 __skb_unlink(buff, &neigh->arp_queue);
932 kfree_skb(buff); 932 kfree_skb(buff);
933 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
933 } 934 }
934 __skb_queue_tail(&neigh->arp_queue, skb); 935 __skb_queue_tail(&neigh->arp_queue, skb);
935 } 936 }
@@ -2462,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2462 struct neigh_statistics *st = v; 2463 struct neigh_statistics *st = v;
2463 2464
2464 if (v == SEQ_START_TOKEN) { 2465 if (v == SEQ_START_TOKEN) {
2465 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n"); 2466 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2466 return 0; 2467 return 0;
2467 } 2468 }
2468 2469
2469 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 2470 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2470 "%08lx %08lx %08lx %08lx\n", 2471 "%08lx %08lx %08lx %08lx %08lx\n",
2471 atomic_read(&tbl->entries), 2472 atomic_read(&tbl->entries),
2472 2473
2473 st->allocs, 2474 st->allocs,
@@ -2483,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2483 st->rcv_probes_ucast, 2484 st->rcv_probes_ucast,
2484 2485
2485 st->periodic_gc_runs, 2486 st->periodic_gc_runs,
2486 st->forced_gc_runs 2487 st->forced_gc_runs,
2488 st->unres_discards
2487 ); 2489 );
2488 2490
2489 return 0; 2491 return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 90e2177af081..c1f4e0d428c0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d,
242 offset % sizeof(unsigned long) != 0); 242 offset % sizeof(unsigned long) != 0);
243 243
244 read_lock(&dev_base_lock); 244 read_lock(&dev_base_lock);
245 if (dev_isalive(dev) && dev->get_stats && 245 if (dev_isalive(dev)) {
246 (stats = (*dev->get_stats)(dev))) 246 stats = dev->get_stats(dev);
247 ret = sprintf(buf, fmt_ulong, 247 ret = sprintf(buf, fmt_ulong,
248 *(unsigned long *)(((u8 *) stats) + offset)); 248 *(unsigned long *)(((u8 *) stats) + offset));
249 249 }
250 read_unlock(&dev_base_lock); 250 read_unlock(&dev_base_lock);
251 return ret; 251 return ret;
252} 252}
@@ -318,7 +318,7 @@ static struct attribute_group netstat_group = {
318 .attrs = netstat_attrs, 318 .attrs = netstat_attrs,
319}; 319};
320 320
321#ifdef CONFIG_WIRELESS_EXT 321#ifdef CONFIG_WIRELESS_EXT_SYSFS
322/* helper function that does all the locking etc for wireless stats */ 322/* helper function that does all the locking etc for wireless stats */
323static ssize_t wireless_show(struct device *d, char *buf, 323static ssize_t wireless_show(struct device *d, char *buf,
324 ssize_t (*format)(const struct iw_statistics *, 324 ssize_t (*format)(const struct iw_statistics *,
@@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net)
457 strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); 457 strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
458 458
459#ifdef CONFIG_SYSFS 459#ifdef CONFIG_SYSFS
460 if (net->get_stats) 460 *groups++ = &netstat_group;
461 *groups++ = &netstat_group;
462 461
463#ifdef CONFIG_WIRELESS_EXT 462#ifdef CONFIG_WIRELESS_EXT_SYSFS
464 if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) 463 if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
465 *groups++ = &wireless_group; 464 *groups++ = &wireless_group;
466#endif 465#endif
@@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net)
469 return device_add(dev); 468 return device_add(dev);
470} 469}
471 470
471int netdev_class_create_file(struct class_attribute *class_attr)
472{
473 return class_create_file(&net_class, class_attr);
474}
475
476void netdev_class_remove_file(struct class_attribute *class_attr)
477{
478 class_remove_file(&net_class, class_attr);
479}
480
481EXPORT_SYMBOL(netdev_class_create_file);
482EXPORT_SYMBOL(netdev_class_remove_file);
483
472void netdev_initialize_kobject(struct net_device *net) 484void netdev_initialize_kobject(struct net_device *net)
473{ 485{
474 struct device *device = &(net->dev); 486 struct device *device = &(net->dev);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8fb134da0346..c12720895ecf 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work)
58 58
59 while ((skb = skb_dequeue(&npinfo->txq))) { 59 while ((skb = skb_dequeue(&npinfo->txq))) {
60 struct net_device *dev = skb->dev; 60 struct net_device *dev = skb->dev;
61 struct netdev_queue *txq;
61 62
62 if (!netif_device_present(dev) || !netif_running(dev)) { 63 if (!netif_device_present(dev) || !netif_running(dev)) {
63 __kfree_skb(skb); 64 __kfree_skb(skb);
64 continue; 65 continue;
65 } 66 }
66 67
68 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
69
67 local_irq_save(flags); 70 local_irq_save(flags);
68 netif_tx_lock(dev); 71 __netif_tx_lock(txq, smp_processor_id());
69 if ((netif_queue_stopped(dev) || 72 if (netif_tx_queue_stopped(txq) ||
70 netif_subqueue_stopped(dev, skb)) || 73 dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
71 dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
72 skb_queue_head(&npinfo->txq, skb); 74 skb_queue_head(&npinfo->txq, skb);
73 netif_tx_unlock(dev); 75 __netif_tx_unlock(txq);
74 local_irq_restore(flags); 76 local_irq_restore(flags);
75 77
76 schedule_delayed_work(&npinfo->tx_work, HZ/10); 78 schedule_delayed_work(&npinfo->tx_work, HZ/10);
77 return; 79 return;
78 } 80 }
79 netif_tx_unlock(dev); 81 __netif_tx_unlock(txq);
80 local_irq_restore(flags); 82 local_irq_restore(flags);
81 } 83 }
82} 84}
@@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
278 280
279 /* don't get messages out of order, and no recursion */ 281 /* don't get messages out of order, and no recursion */
280 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { 282 if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
283 struct netdev_queue *txq;
281 unsigned long flags; 284 unsigned long flags;
282 285
286 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
287
283 local_irq_save(flags); 288 local_irq_save(flags);
284 /* try until next clock tick */ 289 /* try until next clock tick */
285 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; 290 for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
286 tries > 0; --tries) { 291 tries > 0; --tries) {
287 if (netif_tx_trylock(dev)) { 292 if (__netif_tx_trylock(txq)) {
288 if (!netif_queue_stopped(dev) && 293 if (!netif_tx_queue_stopped(txq))
289 !netif_subqueue_stopped(dev, skb))
290 status = dev->hard_start_xmit(skb, dev); 294 status = dev->hard_start_xmit(skb, dev);
291 netif_tx_unlock(dev); 295 __netif_tx_unlock(txq);
292 296
293 if (status == NETDEV_TX_OK) 297 if (status == NETDEV_TX_OK)
294 break; 298 break;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdf537707e51..c7d484f7e1c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1875,7 +1875,7 @@ static int pktgen_device_event(struct notifier_block *unused,
1875{ 1875{
1876 struct net_device *dev = ptr; 1876 struct net_device *dev = ptr;
1877 1877
1878 if (dev_net(dev) != &init_net) 1878 if (!net_eq(dev_net(dev), &init_net))
1879 return NOTIFY_DONE; 1879 return NOTIFY_DONE;
1880 1880
1881 /* It is OK that we do not hold the group lock right now, 1881 /* It is OK that we do not hold the group lock right now,
@@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
2123 } 2123 }
2124} 2124}
2125#endif 2125#endif
2126static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
2127{
2128 if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
2129 __u16 t;
2130 if (pkt_dev->flags & F_QUEUE_MAP_RND) {
2131 t = random32() %
2132 (pkt_dev->queue_map_max -
2133 pkt_dev->queue_map_min + 1)
2134 + pkt_dev->queue_map_min;
2135 } else {
2136 t = pkt_dev->cur_queue_map + 1;
2137 if (t > pkt_dev->queue_map_max)
2138 t = pkt_dev->queue_map_min;
2139 }
2140 pkt_dev->cur_queue_map = t;
2141 }
2142}
2143
2126/* Increment/randomize headers according to flags and current values 2144/* Increment/randomize headers according to flags and current values
2127 * for IP src/dest, UDP src/dst port, MAC-Addr src/dst 2145 * for IP src/dest, UDP src/dst port, MAC-Addr src/dst
2128 */ 2146 */
@@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2325 pkt_dev->cur_pkt_size = t; 2343 pkt_dev->cur_pkt_size = t;
2326 } 2344 }
2327 2345
2328 if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { 2346 set_cur_queue_map(pkt_dev);
2329 __u16 t;
2330 if (pkt_dev->flags & F_QUEUE_MAP_RND) {
2331 t = random32() %
2332 (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
2333 + pkt_dev->queue_map_min;
2334 } else {
2335 t = pkt_dev->cur_queue_map + 1;
2336 if (t > pkt_dev->queue_map_max)
2337 t = pkt_dev->queue_map_min;
2338 }
2339 pkt_dev->cur_queue_map = t;
2340 }
2341 2347
2342 pkt_dev->flows[flow].count++; 2348 pkt_dev->flows[flow].count++;
2343} 2349}
@@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2458 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ 2464 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
2459 __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ 2465 __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
2460 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ 2466 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
2461 2467 u16 queue_map;
2462 2468
2463 if (pkt_dev->nr_labels) 2469 if (pkt_dev->nr_labels)
2464 protocol = htons(ETH_P_MPLS_UC); 2470 protocol = htons(ETH_P_MPLS_UC);
@@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2469 /* Update any of the values, used when we're incrementing various 2475 /* Update any of the values, used when we're incrementing various
2470 * fields. 2476 * fields.
2471 */ 2477 */
2478 queue_map = pkt_dev->cur_queue_map;
2472 mod_cur_headers(pkt_dev); 2479 mod_cur_headers(pkt_dev);
2473 2480
2474 datalen = (odev->hard_header_len + 16) & ~0xf; 2481 datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2507 skb->network_header = skb->tail; 2514 skb->network_header = skb->tail;
2508 skb->transport_header = skb->network_header + sizeof(struct iphdr); 2515 skb->transport_header = skb->network_header + sizeof(struct iphdr);
2509 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); 2516 skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
2510 skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); 2517 skb_set_queue_mapping(skb, queue_map);
2511 iph = ip_hdr(skb); 2518 iph = ip_hdr(skb);
2512 udph = udp_hdr(skb); 2519 udph = udp_hdr(skb);
2513 2520
@@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2797 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ 2804 __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
2798 __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ 2805 __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
2799 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ 2806 __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
2807 u16 queue_map;
2800 2808
2801 if (pkt_dev->nr_labels) 2809 if (pkt_dev->nr_labels)
2802 protocol = htons(ETH_P_MPLS_UC); 2810 protocol = htons(ETH_P_MPLS_UC);
@@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2807 /* Update any of the values, used when we're incrementing various 2815 /* Update any of the values, used when we're incrementing various
2808 * fields. 2816 * fields.
2809 */ 2817 */
2818 queue_map = pkt_dev->cur_queue_map;
2810 mod_cur_headers(pkt_dev); 2819 mod_cur_headers(pkt_dev);
2811 2820
2812 skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + 2821 skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
@@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2844 skb->network_header = skb->tail; 2853 skb->network_header = skb->tail;
2845 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); 2854 skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
2846 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); 2855 skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
2847 skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); 2856 skb_set_queue_mapping(skb, queue_map);
2848 iph = ipv6_hdr(skb); 2857 iph = ipv6_hdr(skb);
2849 udph = udp_hdr(skb); 2858 udph = udp_hdr(skb);
2850 2859
@@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
3263static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) 3272static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3264{ 3273{
3265 struct net_device *odev = NULL; 3274 struct net_device *odev = NULL;
3275 struct netdev_queue *txq;
3266 __u64 idle_start = 0; 3276 __u64 idle_start = 0;
3277 u16 queue_map;
3267 int ret; 3278 int ret;
3268 3279
3269 odev = pkt_dev->odev; 3280 odev = pkt_dev->odev;
@@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3285 } 3296 }
3286 } 3297 }
3287 3298
3288 if ((netif_queue_stopped(odev) || 3299 if (!pkt_dev->skb) {
3289 (pkt_dev->skb && 3300 set_cur_queue_map(pkt_dev);
3290 netif_subqueue_stopped(odev, pkt_dev->skb))) || 3301 queue_map = pkt_dev->cur_queue_map;
3302 } else {
3303 queue_map = skb_get_queue_mapping(pkt_dev->skb);
3304 }
3305
3306 txq = netdev_get_tx_queue(odev, queue_map);
3307 if (netif_tx_queue_stopped(txq) ||
3291 need_resched()) { 3308 need_resched()) {
3292 idle_start = getCurUs(); 3309 idle_start = getCurUs();
3293 3310
@@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3303 3320
3304 pkt_dev->idle_acc += getCurUs() - idle_start; 3321 pkt_dev->idle_acc += getCurUs() - idle_start;
3305 3322
3306 if (netif_queue_stopped(odev) || 3323 if (netif_tx_queue_stopped(txq)) {
3307 netif_subqueue_stopped(odev, pkt_dev->skb)) {
3308 pkt_dev->next_tx_us = getCurUs(); /* TODO */ 3324 pkt_dev->next_tx_us = getCurUs(); /* TODO */
3309 pkt_dev->next_tx_ns = 0; 3325 pkt_dev->next_tx_ns = 0;
3310 goto out; /* Try the next interface */ 3326 goto out; /* Try the next interface */
@@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3331 } 3347 }
3332 } 3348 }
3333 3349
3334 netif_tx_lock_bh(odev); 3350 /* fill_packet() might have changed the queue */
3335 if (!netif_queue_stopped(odev) && 3351 queue_map = skb_get_queue_mapping(pkt_dev->skb);
3336 !netif_subqueue_stopped(odev, pkt_dev->skb)) { 3352 txq = netdev_get_tx_queue(odev, queue_map);
3353
3354 __netif_tx_lock_bh(txq);
3355 if (!netif_tx_queue_stopped(txq)) {
3337 3356
3338 atomic_inc(&(pkt_dev->skb->users)); 3357 atomic_inc(&(pkt_dev->skb->users));
3339 retry_now: 3358 retry_now:
@@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3377 pkt_dev->next_tx_ns = 0; 3396 pkt_dev->next_tx_ns = 0;
3378 } 3397 }
3379 3398
3380 netif_tx_unlock_bh(odev); 3399 __netif_tx_unlock_bh(txq);
3381 3400
3382 /* If pkt_dev->count is zero, then run forever */ 3401 /* If pkt_dev->count is zero, then run forever */
3383 if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { 3402 if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a9a77216310e..71edb8b36341 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -605,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
605 int type, u32 pid, u32 seq, u32 change, 605 int type, u32 pid, u32 seq, u32 change,
606 unsigned int flags) 606 unsigned int flags)
607{ 607{
608 struct netdev_queue *txq;
608 struct ifinfomsg *ifm; 609 struct ifinfomsg *ifm;
609 struct nlmsghdr *nlh; 610 struct nlmsghdr *nlh;
611 struct net_device_stats *stats;
612 struct nlattr *attr;
610 613
611 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 614 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
612 if (nlh == NULL) 615 if (nlh == NULL)
@@ -633,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
633 if (dev->master) 636 if (dev->master)
634 NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); 637 NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
635 638
636 if (dev->qdisc_sleeping) 639 txq = netdev_get_tx_queue(dev, 0);
637 NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); 640 if (txq->qdisc_sleeping)
641 NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
638 642
639 if (1) { 643 if (1) {
640 struct rtnl_link_ifmap map = { 644 struct rtnl_link_ifmap map = {
@@ -653,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
653 NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); 657 NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
654 } 658 }
655 659
656 if (dev->get_stats) { 660 attr = nla_reserve(skb, IFLA_STATS,
657 struct net_device_stats *stats = dev->get_stats(dev); 661 sizeof(struct rtnl_link_stats));
658 if (stats) { 662 if (attr == NULL)
659 struct nlattr *attr; 663 goto nla_put_failure;
660 664
661 attr = nla_reserve(skb, IFLA_STATS, 665 stats = dev->get_stats(dev);
662 sizeof(struct rtnl_link_stats)); 666 copy_rtnl_link_stats(nla_data(attr), stats);
663 if (attr == NULL)
664 goto nla_put_failure;
665
666 copy_rtnl_link_stats(nla_data(attr), stats);
667 }
668 }
669 667
670 if (dev->rtnl_link_ops) { 668 if (dev->rtnl_link_ops) {
671 if (rtnl_link_fill(skb, dev) < 0) 669 if (rtnl_link_fill(skb, dev) < 0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 366621610e76..e4115672b6cf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,8 +4,6 @@
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de> 5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 * 6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes: 7 * Fixes:
10 * Alan Cox : Fixed the worst of the load 8 * Alan Cox : Fixed the worst of the load
11 * balancer bugs. 9 * balancer bugs.
@@ -461,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
461 new->tc_verd = old->tc_verd; 459 new->tc_verd = old->tc_verd;
462#endif 460#endif
463#endif 461#endif
462 new->vlan_tci = old->vlan_tci;
463
464 skb_copy_secmark(new, old); 464 skb_copy_secmark(new, old);
465} 465}
466 466
@@ -1282,114 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1282 return 0; 1282 return 0;
1283} 1283}
1284 1284
1285/* 1285static inline void __segment_seek(struct page **page, unsigned int *poff,
1286 * Map linear and fragment data from the skb to spd. Returns number of 1286 unsigned int *plen, unsigned int off)
1287 * pages mapped. 1287{
1288 */ 1288 *poff += off;
1289static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1289 *page += *poff / PAGE_SIZE;
1290 unsigned int *total_len, 1290 *poff = *poff % PAGE_SIZE;
1291 struct splice_pipe_desc *spd) 1291 *plen -= off;
1292{ 1292}
1293 unsigned int nr_pages = spd->nr_pages; 1293
1294 unsigned int poff, plen, len, toff, tlen; 1294static inline int __splice_segment(struct page *page, unsigned int poff,
1295 int headlen, seg, error = 0; 1295 unsigned int plen, unsigned int *off,
1296 1296 unsigned int *len, struct sk_buff *skb,
1297 toff = *offset; 1297 struct splice_pipe_desc *spd)
1298 tlen = *total_len; 1298{
1299 if (!tlen) { 1299 if (!*len)
1300 error = 1; 1300 return 1;
1301 goto err; 1301
1302 /* skip this segment if already processed */
1303 if (*off >= plen) {
1304 *off -= plen;
1305 return 0;
1302 } 1306 }
1303 1307
1304 /* 1308 /* ignore any bits we already processed */
1305 * if the offset is greater than the linear part, go directly to 1309 if (*off) {
1306 * the fragments. 1310 __segment_seek(&page, &poff, &plen, *off);
1307 */ 1311 *off = 0;
1308 headlen = skb_headlen(skb);
1309 if (toff >= headlen) {
1310 toff -= headlen;
1311 goto map_frag;
1312 } 1312 }
1313 1313
1314 /* 1314 do {
1315 * first map the linear region into the pages/partial map, skipping 1315 unsigned int flen = min(*len, plen);
1316 * any potential initial offset.
1317 */
1318 len = 0;
1319 while (len < headlen) {
1320 void *p = skb->data + len;
1321
1322 poff = (unsigned long) p & (PAGE_SIZE - 1);
1323 plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
1324 len += plen;
1325
1326 if (toff) {
1327 if (plen <= toff) {
1328 toff -= plen;
1329 continue;
1330 }
1331 plen -= toff;
1332 poff += toff;
1333 toff = 0;
1334 }
1335 1316
1336 plen = min(plen, tlen); 1317 /* the linear region may spread across several pages */
1337 if (!plen) 1318 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1338 break;
1339 1319
1340 /* 1320 if (spd_fill_page(spd, page, flen, poff, skb))
1341 * just jump directly to update and return, no point 1321 return 1;
1342 * in going over fragments when the output is full.
1343 */
1344 error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb);
1345 if (error)
1346 goto done;
1347 1322
1348 tlen -= plen; 1323 __segment_seek(&page, &poff, &plen, flen);
1349 } 1324 *len -= flen;
1325
1326 } while (*len && plen);
1327
1328 return 0;
1329}
1330
1331/*
1332 * Map linear and fragment data from the skb to spd. It reports failure if the
1333 * pipe is full or if we already spliced the requested length.
1334 */
1335static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1336 unsigned int *len,
1337 struct splice_pipe_desc *spd)
1338{
1339 int seg;
1340
1341 /*
1342 * map the linear part
1343 */
1344 if (__splice_segment(virt_to_page(skb->data),
1345 (unsigned long) skb->data & (PAGE_SIZE - 1),
1346 skb_headlen(skb),
1347 offset, len, skb, spd))
1348 return 1;
1350 1349
1351 /* 1350 /*
1352 * then map the fragments 1351 * then map the fragments
1353 */ 1352 */
1354map_frag:
1355 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { 1353 for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
1356 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1354 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1357 1355
1358 plen = f->size; 1356 if (__splice_segment(f->page, f->page_offset, f->size,
1359 poff = f->page_offset; 1357 offset, len, skb, spd))
1360 1358 return 1;
1361 if (toff) {
1362 if (plen <= toff) {
1363 toff -= plen;
1364 continue;
1365 }
1366 plen -= toff;
1367 poff += toff;
1368 toff = 0;
1369 }
1370
1371 plen = min(plen, tlen);
1372 if (!plen)
1373 break;
1374
1375 error = spd_fill_page(spd, f->page, plen, poff, skb);
1376 if (error)
1377 break;
1378
1379 tlen -= plen;
1380 } 1359 }
1381 1360
1382done: 1361 return 0;
1383 if (spd->nr_pages - nr_pages) {
1384 *offset = 0;
1385 *total_len = tlen;
1386 return 0;
1387 }
1388err:
1389 /* update the offset to reflect the linear part skip, if any */
1390 if (!error)
1391 *offset = toff;
1392 return error;
1393} 1362}
1394 1363
1395/* 1364/*
@@ -2288,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
2288 skb_copy_queue_mapping(nskb, skb); 2257 skb_copy_queue_mapping(nskb, skb);
2289 nskb->priority = skb->priority; 2258 nskb->priority = skb->priority;
2290 nskb->protocol = skb->protocol; 2259 nskb->protocol = skb->protocol;
2260 nskb->vlan_tci = skb->vlan_tci;
2291 nskb->dst = dst_clone(skb->dst); 2261 nskb->dst = dst_clone(skb->dst);
2292 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 2262 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
2293 nskb->pkt_type = skb->pkt_type; 2263 nskb->pkt_type = skb->pkt_type;
@@ -2592,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
2592 return true; 2562 return true;
2593} 2563}
2594 2564
2565void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2566{
2567 if (net_ratelimit())
2568 pr_warning("%s: received packets cannot be forwarded"
2569 " while LRO is enabled\n", skb->dev->name);
2570}
2571
2595EXPORT_SYMBOL(___pskb_trim); 2572EXPORT_SYMBOL(___pskb_trim);
2596EXPORT_SYMBOL(__kfree_skb); 2573EXPORT_SYMBOL(__kfree_skb);
2597EXPORT_SYMBOL(kfree_skb); 2574EXPORT_SYMBOL(kfree_skb);
@@ -2625,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read);
2625EXPORT_SYMBOL(skb_abort_seq_read); 2602EXPORT_SYMBOL(skb_abort_seq_read);
2626EXPORT_SYMBOL(skb_find_text); 2603EXPORT_SYMBOL(skb_find_text);
2627EXPORT_SYMBOL(skb_append_datato_frags); 2604EXPORT_SYMBOL(skb_append_datato_frags);
2605EXPORT_SYMBOL(__skb_warn_lro_forwarding);
2628 2606
2629EXPORT_SYMBOL_GPL(skb_to_sgvec); 2607EXPORT_SYMBOL_GPL(skb_to_sgvec);
2630EXPORT_SYMBOL_GPL(skb_cow_data); 2608EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 88094cb09c06..10a64d57078c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,8 +7,6 @@
7 * handler for protocols to use and generic option handler. 7 * handler for protocols to use and generic option handler.
8 * 8 *
9 * 9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
12 * Authors: Ross Biro 10 * Authors: Ross Biro
13 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de> 12 * Florian La Roche, <flla@stud.uni-sb.de>
@@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1068 * to be taken into account in all callers. -acme 1066 * to be taken into account in all callers. -acme
1069 */ 1067 */
1070 sk_refcnt_debug_inc(newsk); 1068 sk_refcnt_debug_inc(newsk);
1071 newsk->sk_socket = NULL; 1069 sk_set_socket(newsk, NULL);
1072 newsk->sk_sleep = NULL; 1070 newsk->sk_sleep = NULL;
1073 1071
1074 if (newsk->sk_prot->sockets_allocated) 1072 if (newsk->sk_prot->sockets_allocated)
@@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
1444 /* Under pressure. */ 1442 /* Under pressure. */
1445 if (allocated > prot->sysctl_mem[1]) 1443 if (allocated > prot->sysctl_mem[1])
1446 if (prot->enter_memory_pressure) 1444 if (prot->enter_memory_pressure)
1447 prot->enter_memory_pressure(); 1445 prot->enter_memory_pressure(sk);
1448 1446
1449 /* Over hard limit. */ 1447 /* Over hard limit. */
1450 if (allocated > prot->sysctl_mem[2]) 1448 if (allocated > prot->sysctl_mem[2])
@@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1704 sk->sk_rcvbuf = sysctl_rmem_default; 1702 sk->sk_rcvbuf = sysctl_rmem_default;
1705 sk->sk_sndbuf = sysctl_wmem_default; 1703 sk->sk_sndbuf = sysctl_wmem_default;
1706 sk->sk_state = TCP_CLOSE; 1704 sk->sk_state = TCP_CLOSE;
1707 sk->sk_socket = sock; 1705 sk_set_socket(sk, sock);
1708 1706
1709 sock_set_flag(sk, SOCK_ZAPPED); 1707 sock_set_flag(sk, SOCK_ZAPPED);
1710 1708
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5fc801057244..a570e2af22cb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = {
125#endif /* CONFIG_XFRM */ 125#endif /* CONFIG_XFRM */
126#endif /* CONFIG_NET */ 126#endif /* CONFIG_NET */
127 { 127 {
128 .ctl_name = NET_CORE_SOMAXCONN,
129 .procname = "somaxconn",
130 .data = &init_net.core.sysctl_somaxconn,
131 .maxlen = sizeof(int),
132 .mode = 0644,
133 .proc_handler = &proc_dointvec
134 },
135 {
136 .ctl_name = NET_CORE_BUDGET, 128 .ctl_name = NET_CORE_BUDGET,
137 .procname = "netdev_budget", 129 .procname = "netdev_budget",
138 .data = &netdev_budget, 130 .data = &netdev_budget,
@@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = {
151 { .ctl_name = 0 } 143 { .ctl_name = 0 }
152}; 144};
153 145
146static struct ctl_table netns_core_table[] = {
147 {
148 .ctl_name = NET_CORE_SOMAXCONN,
149 .procname = "somaxconn",
150 .data = &init_net.core.sysctl_somaxconn,
151 .maxlen = sizeof(int),
152 .mode = 0644,
153 .proc_handler = &proc_dointvec
154 },
155 { .ctl_name = 0 }
156};
157
154static __net_initdata struct ctl_path net_core_path[] = { 158static __net_initdata struct ctl_path net_core_path[] = {
155 { .procname = "net", .ctl_name = CTL_NET, }, 159 { .procname = "net", .ctl_name = CTL_NET, },
156 { .procname = "core", .ctl_name = NET_CORE, }, 160 { .procname = "core", .ctl_name = NET_CORE, },
@@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = {
159 163
160static __net_init int sysctl_core_net_init(struct net *net) 164static __net_init int sysctl_core_net_init(struct net *net)
161{ 165{
162 struct ctl_table *tbl, *tmp; 166 struct ctl_table *tbl;
163 167
164 net->core.sysctl_somaxconn = SOMAXCONN; 168 net->core.sysctl_somaxconn = SOMAXCONN;
165 169
166 tbl = net_core_table; 170 tbl = netns_core_table;
167 if (net != &init_net) { 171 if (net != &init_net) {
168 tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); 172 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
169 if (tbl == NULL) 173 if (tbl == NULL)
170 goto err_dup; 174 goto err_dup;
171 175
172 for (tmp = tbl; tmp->procname; tmp++) { 176 tbl[0].data = &net->core.sysctl_somaxconn;
173 if (tmp->data >= (void *)&init_net &&
174 tmp->data < (void *)(&init_net + 1))
175 tmp->data += (char *)net - (char *)&init_net;
176 else
177 tmp->mode &= ~0222;
178 }
179 } 177 }
180 178
181 net->core.sysctl_hdr = register_net_sysctl_table(net, 179 net->core.sysctl_hdr = register_net_sysctl_table(net,
@@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
186 return 0; 184 return 0;
187 185
188err_reg: 186err_reg:
189 if (tbl != net_core_table) 187 if (tbl != netns_core_table)
190 kfree(tbl); 188 kfree(tbl);
191err_dup: 189err_dup:
192 return -ENOMEM; 190 return -ENOMEM;
@@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
198 196
199 tbl = net->core.sysctl_hdr->ctl_table_arg; 197 tbl = net->core.sysctl_hdr->ctl_table_arg;
200 unregister_net_sysctl_table(net->core.sysctl_hdr); 198 unregister_net_sysctl_table(net->core.sysctl_hdr);
201 BUG_ON(tbl == net_core_table); 199 BUG_ON(tbl == netns_core_table);
202 kfree(tbl); 200 kfree(tbl);
203} 201}
204 202
@@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
209 207
210static __init int sysctl_core_init(void) 208static __init int sysctl_core_init(void)
211{ 209{
210 register_net_sysctl_rotable(net_core_path, net_core_table);
212 return register_pernet_subsys(&sysctl_core_ops); 211 return register_pernet_subsys(&sysctl_core_ops);
213} 212}
214 213