diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 1402 |
1 files changed, 654 insertions, 748 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index f769098774b7..d273e4e3ecdc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -130,6 +130,7 @@ | |||
130 | #include <linux/jhash.h> | 130 | #include <linux/jhash.h> |
131 | #include <linux/random.h> | 131 | #include <linux/random.h> |
132 | #include <trace/events/napi.h> | 132 | #include <trace/events/napi.h> |
133 | #include <linux/pci.h> | ||
133 | 134 | ||
134 | #include "net-sysfs.h" | 135 | #include "net-sysfs.h" |
135 | 136 | ||
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |||
207 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; | 208 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
208 | } | 209 | } |
209 | 210 | ||
211 | static inline void rps_lock(struct softnet_data *sd) | ||
212 | { | ||
213 | #ifdef CONFIG_RPS | ||
214 | spin_lock(&sd->input_pkt_queue.lock); | ||
215 | #endif | ||
216 | } | ||
217 | |||
218 | static inline void rps_unlock(struct softnet_data *sd) | ||
219 | { | ||
220 | #ifdef CONFIG_RPS | ||
221 | spin_unlock(&sd->input_pkt_queue.lock); | ||
222 | #endif | ||
223 | } | ||
224 | |||
210 | /* Device list insertion */ | 225 | /* Device list insertion */ |
211 | static int list_netdevice(struct net_device *dev) | 226 | static int list_netdevice(struct net_device *dev) |
212 | { | 227 | { |
@@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain); | |||
249 | * queue in the local softnet handler. | 264 | * queue in the local softnet handler. |
250 | */ | 265 | */ |
251 | 266 | ||
252 | DEFINE_PER_CPU(struct softnet_data, softnet_data); | 267 | DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
253 | EXPORT_PER_CPU_SYMBOL(softnet_data); | 268 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
254 | 269 | ||
255 | #ifdef CONFIG_LOCKDEP | 270 | #ifdef CONFIG_LOCKDEP |
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); | |||
773 | 788 | ||
774 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) | 789 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) |
775 | { | 790 | { |
776 | struct net_device *dev; | 791 | struct net_device *dev, *ret = NULL; |
777 | 792 | ||
778 | rtnl_lock(); | 793 | rcu_read_lock(); |
779 | dev = __dev_getfirstbyhwtype(net, type); | 794 | for_each_netdev_rcu(net, dev) |
780 | if (dev) | 795 | if (dev->type == type) { |
781 | dev_hold(dev); | 796 | dev_hold(dev); |
782 | rtnl_unlock(); | 797 | ret = dev; |
783 | return dev; | 798 | break; |
799 | } | ||
800 | rcu_read_unlock(); | ||
801 | return ret; | ||
784 | } | 802 | } |
785 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | 803 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
786 | 804 | ||
@@ -984,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
984 | return err; | 1002 | return err; |
985 | 1003 | ||
986 | rollback: | 1004 | rollback: |
987 | /* For now only devices in the initial network namespace | 1005 | ret = device_rename(&dev->dev, dev->name); |
988 | * are in sysfs. | 1006 | if (ret) { |
989 | */ | 1007 | memcpy(dev->name, oldname, IFNAMSIZ); |
990 | if (net_eq(net, &init_net)) { | 1008 | return ret; |
991 | ret = device_rename(&dev->dev, dev->name); | ||
992 | if (ret) { | ||
993 | memcpy(dev->name, oldname, IFNAMSIZ); | ||
994 | return ret; | ||
995 | } | ||
996 | } | 1009 | } |
997 | 1010 | ||
998 | write_lock_bh(&dev_base_lock); | 1011 | write_lock_bh(&dev_base_lock); |
@@ -1085,9 +1098,9 @@ void netdev_state_change(struct net_device *dev) | |||
1085 | } | 1098 | } |
1086 | EXPORT_SYMBOL(netdev_state_change); | 1099 | EXPORT_SYMBOL(netdev_state_change); |
1087 | 1100 | ||
1088 | void netdev_bonding_change(struct net_device *dev, unsigned long event) | 1101 | int netdev_bonding_change(struct net_device *dev, unsigned long event) |
1089 | { | 1102 | { |
1090 | call_netdevice_notifiers(event, dev); | 1103 | return call_netdevice_notifiers(event, dev); |
1091 | } | 1104 | } |
1092 | EXPORT_SYMBOL(netdev_bonding_change); | 1105 | EXPORT_SYMBOL(netdev_bonding_change); |
1093 | 1106 | ||
@@ -1417,6 +1430,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); | |||
1417 | 1430 | ||
1418 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | 1431 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
1419 | { | 1432 | { |
1433 | ASSERT_RTNL(); | ||
1420 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1434 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1421 | } | 1435 | } |
1422 | 1436 | ||
@@ -1435,7 +1449,7 @@ void net_disable_timestamp(void) | |||
1435 | } | 1449 | } |
1436 | EXPORT_SYMBOL(net_disable_timestamp); | 1450 | EXPORT_SYMBOL(net_disable_timestamp); |
1437 | 1451 | ||
1438 | static inline void net_timestamp(struct sk_buff *skb) | 1452 | static inline void net_timestamp_set(struct sk_buff *skb) |
1439 | { | 1453 | { |
1440 | if (atomic_read(&netstamp_needed)) | 1454 | if (atomic_read(&netstamp_needed)) |
1441 | __net_timestamp(skb); | 1455 | __net_timestamp(skb); |
@@ -1443,6 +1457,12 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1443 | skb->tstamp.tv64 = 0; | 1457 | skb->tstamp.tv64 = 0; |
1444 | } | 1458 | } |
1445 | 1459 | ||
1460 | static inline void net_timestamp_check(struct sk_buff *skb) | ||
1461 | { | ||
1462 | if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) | ||
1463 | __net_timestamp(skb); | ||
1464 | } | ||
1465 | |||
1446 | /** | 1466 | /** |
1447 | * dev_forward_skb - loopback an skb to another netif | 1467 | * dev_forward_skb - loopback an skb to another netif |
1448 | * | 1468 | * |
@@ -1451,7 +1471,7 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1451 | * | 1471 | * |
1452 | * return values: | 1472 | * return values: |
1453 | * NET_RX_SUCCESS (no congestion) | 1473 | * NET_RX_SUCCESS (no congestion) |
1454 | * NET_RX_DROP (packet was dropped) | 1474 | * NET_RX_DROP (packet was dropped, but freed) |
1455 | * | 1475 | * |
1456 | * dev_forward_skb can be used for injecting an skb from the | 1476 | * dev_forward_skb can be used for injecting an skb from the |
1457 | * start_xmit function of one device into the receive queue | 1477 | * start_xmit function of one device into the receive queue |
@@ -1465,12 +1485,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1465 | { | 1485 | { |
1466 | skb_orphan(skb); | 1486 | skb_orphan(skb); |
1467 | 1487 | ||
1468 | if (!(dev->flags & IFF_UP)) | 1488 | if (!(dev->flags & IFF_UP) || |
1469 | return NET_RX_DROP; | 1489 | (skb->len > (dev->mtu + dev->hard_header_len))) { |
1470 | 1490 | kfree_skb(skb); | |
1471 | if (skb->len > (dev->mtu + dev->hard_header_len)) | ||
1472 | return NET_RX_DROP; | 1491 | return NET_RX_DROP; |
1473 | 1492 | } | |
1474 | skb_set_dev(skb, dev); | 1493 | skb_set_dev(skb, dev); |
1475 | skb->tstamp.tv64 = 0; | 1494 | skb->tstamp.tv64 = 0; |
1476 | skb->pkt_type = PACKET_HOST; | 1495 | skb->pkt_type = PACKET_HOST; |
@@ -1490,9 +1509,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1490 | 1509 | ||
1491 | #ifdef CONFIG_NET_CLS_ACT | 1510 | #ifdef CONFIG_NET_CLS_ACT |
1492 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | 1511 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) |
1493 | net_timestamp(skb); | 1512 | net_timestamp_set(skb); |
1494 | #else | 1513 | #else |
1495 | net_timestamp(skb); | 1514 | net_timestamp_set(skb); |
1496 | #endif | 1515 | #endif |
1497 | 1516 | ||
1498 | rcu_read_lock(); | 1517 | rcu_read_lock(); |
@@ -1538,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q) | |||
1538 | 1557 | ||
1539 | local_irq_save(flags); | 1558 | local_irq_save(flags); |
1540 | sd = &__get_cpu_var(softnet_data); | 1559 | sd = &__get_cpu_var(softnet_data); |
1541 | q->next_sched = sd->output_queue; | 1560 | q->next_sched = NULL; |
1542 | sd->output_queue = q; | 1561 | *sd->output_queue_tailp = q; |
1562 | sd->output_queue_tailp = &q->next_sched; | ||
1543 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 1563 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
1544 | local_irq_restore(flags); | 1564 | local_irq_restore(flags); |
1545 | } | 1565 | } |
@@ -1784,18 +1804,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); | |||
1784 | * 2. No high memory really exists on this machine. | 1804 | * 2. No high memory really exists on this machine. |
1785 | */ | 1805 | */ |
1786 | 1806 | ||
1787 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | 1807 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
1788 | { | 1808 | { |
1789 | #ifdef CONFIG_HIGHMEM | 1809 | #ifdef CONFIG_HIGHMEM |
1790 | int i; | 1810 | int i; |
1811 | if (!(dev->features & NETIF_F_HIGHDMA)) { | ||
1812 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1813 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
1814 | return 1; | ||
1815 | } | ||
1791 | 1816 | ||
1792 | if (dev->features & NETIF_F_HIGHDMA) | 1817 | if (PCI_DMA_BUS_IS_PHYS) { |
1793 | return 0; | 1818 | struct device *pdev = dev->dev.parent; |
1794 | |||
1795 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1796 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
1797 | return 1; | ||
1798 | 1819 | ||
1820 | if (!pdev) | ||
1821 | return 0; | ||
1822 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
1823 | dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); | ||
1824 | if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) | ||
1825 | return 1; | ||
1826 | } | ||
1827 | } | ||
1799 | #endif | 1828 | #endif |
1800 | return 0; | 1829 | return 0; |
1801 | } | 1830 | } |
@@ -1853,6 +1882,17 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1853 | return 0; | 1882 | return 0; |
1854 | } | 1883 | } |
1855 | 1884 | ||
1885 | /* | ||
1886 | * Try to orphan skb early, right before transmission by the device. | ||
1887 | * We cannot orphan skb if tx timestamp is requested, since | ||
1888 | * drivers need to call skb_tstamp_tx() to send the timestamp. | ||
1889 | */ | ||
1890 | static inline void skb_orphan_try(struct sk_buff *skb) | ||
1891 | { | ||
1892 | if (!skb_tx(skb)->flags) | ||
1893 | skb_orphan(skb); | ||
1894 | } | ||
1895 | |||
1856 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1896 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
1857 | struct netdev_queue *txq) | 1897 | struct netdev_queue *txq) |
1858 | { | 1898 | { |
@@ -1863,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1863 | if (!list_empty(&ptype_all)) | 1903 | if (!list_empty(&ptype_all)) |
1864 | dev_queue_xmit_nit(skb, dev); | 1904 | dev_queue_xmit_nit(skb, dev); |
1865 | 1905 | ||
1866 | if (netif_needs_gso(dev, skb)) { | ||
1867 | if (unlikely(dev_gso_segment(skb))) | ||
1868 | goto out_kfree_skb; | ||
1869 | if (skb->next) | ||
1870 | goto gso; | ||
1871 | } | ||
1872 | |||
1873 | /* | 1906 | /* |
1874 | * If device doesnt need skb->dst, release it right now while | 1907 | * If device doesnt need skb->dst, release it right now while |
1875 | * its hot in this cpu cache | 1908 | * its hot in this cpu cache |
@@ -1877,23 +1910,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1877 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 1910 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1878 | skb_dst_drop(skb); | 1911 | skb_dst_drop(skb); |
1879 | 1912 | ||
1913 | skb_orphan_try(skb); | ||
1914 | |||
1915 | if (netif_needs_gso(dev, skb)) { | ||
1916 | if (unlikely(dev_gso_segment(skb))) | ||
1917 | goto out_kfree_skb; | ||
1918 | if (skb->next) | ||
1919 | goto gso; | ||
1920 | } | ||
1921 | |||
1880 | rc = ops->ndo_start_xmit(skb, dev); | 1922 | rc = ops->ndo_start_xmit(skb, dev); |
1881 | if (rc == NETDEV_TX_OK) | 1923 | if (rc == NETDEV_TX_OK) |
1882 | txq_trans_update(txq); | 1924 | txq_trans_update(txq); |
1883 | /* | ||
1884 | * TODO: if skb_orphan() was called by | ||
1885 | * dev->hard_start_xmit() (for example, the unmodified | ||
1886 | * igb driver does that; bnx2 doesn't), then | ||
1887 | * skb_tx_software_timestamp() will be unable to send | ||
1888 | * back the time stamp. | ||
1889 | * | ||
1890 | * How can this be prevented? Always create another | ||
1891 | * reference to the socket before calling | ||
1892 | * dev->hard_start_xmit()? Prevent that skb_orphan() | ||
1893 | * does anything in dev->hard_start_xmit() by clearing | ||
1894 | * the skb destructor before the call and restoring it | ||
1895 | * afterwards, then doing the skb_orphan() ourselves? | ||
1896 | */ | ||
1897 | return rc; | 1925 | return rc; |
1898 | } | 1926 | } |
1899 | 1927 | ||
@@ -1932,7 +1960,7 @@ out_kfree_skb: | |||
1932 | return rc; | 1960 | return rc; |
1933 | } | 1961 | } |
1934 | 1962 | ||
1935 | static u32 skb_tx_hashrnd; | 1963 | static u32 hashrnd __read_mostly; |
1936 | 1964 | ||
1937 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 1965 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1938 | { | 1966 | { |
@@ -1948,9 +1976,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1948 | if (skb->sk && skb->sk->sk_hash) | 1976 | if (skb->sk && skb->sk->sk_hash) |
1949 | hash = skb->sk->sk_hash; | 1977 | hash = skb->sk->sk_hash; |
1950 | else | 1978 | else |
1951 | hash = skb->protocol; | 1979 | hash = (__force u16) skb->protocol; |
1952 | 1980 | ||
1953 | hash = jhash_1word(hash, skb_tx_hashrnd); | 1981 | hash = jhash_1word(hash, hashrnd); |
1954 | 1982 | ||
1955 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1983 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1956 | } | 1984 | } |
@@ -1960,10 +1988,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
1960 | { | 1988 | { |
1961 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | 1989 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { |
1962 | if (net_ratelimit()) { | 1990 | if (net_ratelimit()) { |
1963 | WARN(1, "%s selects TX queue %d, but " | 1991 | pr_warning("%s selects TX queue %d, but " |
1964 | "real number of TX queues is %d\n", | 1992 | "real number of TX queues is %d\n", |
1965 | dev->name, queue_index, | 1993 | dev->name, queue_index, dev->real_num_tx_queues); |
1966 | dev->real_num_tx_queues); | ||
1967 | } | 1994 | } |
1968 | return 0; | 1995 | return 0; |
1969 | } | 1996 | } |
@@ -1990,7 +2017,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, | |||
1990 | queue_index = skb_tx_hash(dev, skb); | 2017 | queue_index = skb_tx_hash(dev, skb); |
1991 | 2018 | ||
1992 | if (sk) { | 2019 | if (sk) { |
1993 | struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); | 2020 | struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); |
1994 | 2021 | ||
1995 | if (dst && skb_dst(skb) == dst) | 2022 | if (dst && skb_dst(skb) == dst) |
1996 | sk_tx_queue_set(sk, queue_index); | 2023 | sk_tx_queue_set(sk, queue_index); |
@@ -2020,6 +2047,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2020 | * waiting to be sent out; and the qdisc is not running - | 2047 | * waiting to be sent out; and the qdisc is not running - |
2021 | * xmit the skb directly. | 2048 | * xmit the skb directly. |
2022 | */ | 2049 | */ |
2050 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | ||
2051 | skb_dst_force(skb); | ||
2023 | __qdisc_update_bstats(q, skb->len); | 2052 | __qdisc_update_bstats(q, skb->len); |
2024 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | 2053 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) |
2025 | __qdisc_run(q); | 2054 | __qdisc_run(q); |
@@ -2028,6 +2057,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2028 | 2057 | ||
2029 | rc = NET_XMIT_SUCCESS; | 2058 | rc = NET_XMIT_SUCCESS; |
2030 | } else { | 2059 | } else { |
2060 | skb_dst_force(skb); | ||
2031 | rc = qdisc_enqueue_root(skb, q); | 2061 | rc = qdisc_enqueue_root(skb, q); |
2032 | qdisc_run(q); | 2062 | qdisc_run(q); |
2033 | } | 2063 | } |
@@ -2175,11 +2205,249 @@ EXPORT_SYMBOL(dev_queue_xmit); | |||
2175 | =======================================================================*/ | 2205 | =======================================================================*/ |
2176 | 2206 | ||
2177 | int netdev_max_backlog __read_mostly = 1000; | 2207 | int netdev_max_backlog __read_mostly = 1000; |
2208 | int netdev_tstamp_prequeue __read_mostly = 1; | ||
2178 | int netdev_budget __read_mostly = 300; | 2209 | int netdev_budget __read_mostly = 300; |
2179 | int weight_p __read_mostly = 64; /* old backlog weight */ | 2210 | int weight_p __read_mostly = 64; /* old backlog weight */ |
2180 | 2211 | ||
2181 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 2212 | /* Called with irq disabled */ |
2213 | static inline void ____napi_schedule(struct softnet_data *sd, | ||
2214 | struct napi_struct *napi) | ||
2215 | { | ||
2216 | list_add_tail(&napi->poll_list, &sd->poll_list); | ||
2217 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2218 | } | ||
2182 | 2219 | ||
2220 | #ifdef CONFIG_RPS | ||
2221 | |||
2222 | /* One global table that all flow-based protocols share. */ | ||
2223 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2224 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2225 | |||
2226 | /* | ||
2227 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2228 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2229 | * rcu_read_lock must be held on entry. | ||
2230 | */ | ||
2231 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2232 | struct rps_dev_flow **rflowp) | ||
2233 | { | ||
2234 | struct ipv6hdr *ip6; | ||
2235 | struct iphdr *ip; | ||
2236 | struct netdev_rx_queue *rxqueue; | ||
2237 | struct rps_map *map; | ||
2238 | struct rps_dev_flow_table *flow_table; | ||
2239 | struct rps_sock_flow_table *sock_flow_table; | ||
2240 | int cpu = -1; | ||
2241 | u8 ip_proto; | ||
2242 | u16 tcpu; | ||
2243 | u32 addr1, addr2, ihl; | ||
2244 | union { | ||
2245 | u32 v32; | ||
2246 | u16 v16[2]; | ||
2247 | } ports; | ||
2248 | |||
2249 | if (skb_rx_queue_recorded(skb)) { | ||
2250 | u16 index = skb_get_rx_queue(skb); | ||
2251 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2252 | if (net_ratelimit()) { | ||
2253 | pr_warning("%s received packet on queue " | ||
2254 | "%u, but number of RX queues is %u\n", | ||
2255 | dev->name, index, dev->num_rx_queues); | ||
2256 | } | ||
2257 | goto done; | ||
2258 | } | ||
2259 | rxqueue = dev->_rx + index; | ||
2260 | } else | ||
2261 | rxqueue = dev->_rx; | ||
2262 | |||
2263 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2264 | goto done; | ||
2265 | |||
2266 | if (skb->rxhash) | ||
2267 | goto got_hash; /* Skip hash computation on packet header */ | ||
2268 | |||
2269 | switch (skb->protocol) { | ||
2270 | case __constant_htons(ETH_P_IP): | ||
2271 | if (!pskb_may_pull(skb, sizeof(*ip))) | ||
2272 | goto done; | ||
2273 | |||
2274 | ip = (struct iphdr *) skb->data; | ||
2275 | ip_proto = ip->protocol; | ||
2276 | addr1 = (__force u32) ip->saddr; | ||
2277 | addr2 = (__force u32) ip->daddr; | ||
2278 | ihl = ip->ihl; | ||
2279 | break; | ||
2280 | case __constant_htons(ETH_P_IPV6): | ||
2281 | if (!pskb_may_pull(skb, sizeof(*ip6))) | ||
2282 | goto done; | ||
2283 | |||
2284 | ip6 = (struct ipv6hdr *) skb->data; | ||
2285 | ip_proto = ip6->nexthdr; | ||
2286 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | ||
2287 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | ||
2288 | ihl = (40 >> 2); | ||
2289 | break; | ||
2290 | default: | ||
2291 | goto done; | ||
2292 | } | ||
2293 | switch (ip_proto) { | ||
2294 | case IPPROTO_TCP: | ||
2295 | case IPPROTO_UDP: | ||
2296 | case IPPROTO_DCCP: | ||
2297 | case IPPROTO_ESP: | ||
2298 | case IPPROTO_AH: | ||
2299 | case IPPROTO_SCTP: | ||
2300 | case IPPROTO_UDPLITE: | ||
2301 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2302 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2303 | if (ports.v16[1] < ports.v16[0]) | ||
2304 | swap(ports.v16[0], ports.v16[1]); | ||
2305 | break; | ||
2306 | } | ||
2307 | default: | ||
2308 | ports.v32 = 0; | ||
2309 | break; | ||
2310 | } | ||
2311 | |||
2312 | /* get a consistent hash (same value on both flow directions) */ | ||
2313 | if (addr2 < addr1) | ||
2314 | swap(addr1, addr2); | ||
2315 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2316 | if (!skb->rxhash) | ||
2317 | skb->rxhash = 1; | ||
2318 | |||
2319 | got_hash: | ||
2320 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2321 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
2322 | if (flow_table && sock_flow_table) { | ||
2323 | u16 next_cpu; | ||
2324 | struct rps_dev_flow *rflow; | ||
2325 | |||
2326 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; | ||
2327 | tcpu = rflow->cpu; | ||
2328 | |||
2329 | next_cpu = sock_flow_table->ents[skb->rxhash & | ||
2330 | sock_flow_table->mask]; | ||
2331 | |||
2332 | /* | ||
2333 | * If the desired CPU (where last recvmsg was done) is | ||
2334 | * different from current CPU (one in the rx-queue flow | ||
2335 | * table entry), switch if one of the following holds: | ||
2336 | * - Current CPU is unset (equal to RPS_NO_CPU). | ||
2337 | * - Current CPU is offline. | ||
2338 | * - The current CPU's queue tail has advanced beyond the | ||
2339 | * last packet that was enqueued using this table entry. | ||
2340 | * This guarantees that all previous packets for the flow | ||
2341 | * have been dequeued, thus preserving in order delivery. | ||
2342 | */ | ||
2343 | if (unlikely(tcpu != next_cpu) && | ||
2344 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | ||
2345 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | ||
2346 | rflow->last_qtail)) >= 0)) { | ||
2347 | tcpu = rflow->cpu = next_cpu; | ||
2348 | if (tcpu != RPS_NO_CPU) | ||
2349 | rflow->last_qtail = per_cpu(softnet_data, | ||
2350 | tcpu).input_queue_head; | ||
2351 | } | ||
2352 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | ||
2353 | *rflowp = rflow; | ||
2354 | cpu = tcpu; | ||
2355 | goto done; | ||
2356 | } | ||
2357 | } | ||
2358 | |||
2359 | map = rcu_dereference(rxqueue->rps_map); | ||
2360 | if (map) { | ||
2361 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | ||
2362 | |||
2363 | if (cpu_online(tcpu)) { | ||
2364 | cpu = tcpu; | ||
2365 | goto done; | ||
2366 | } | ||
2367 | } | ||
2368 | |||
2369 | done: | ||
2370 | return cpu; | ||
2371 | } | ||
2372 | |||
2373 | /* Called from hardirq (IPI) context */ | ||
2374 | static void rps_trigger_softirq(void *data) | ||
2375 | { | ||
2376 | struct softnet_data *sd = data; | ||
2377 | |||
2378 | ____napi_schedule(sd, &sd->backlog); | ||
2379 | sd->received_rps++; | ||
2380 | } | ||
2381 | |||
2382 | #endif /* CONFIG_RPS */ | ||
2383 | |||
2384 | /* | ||
2385 | * Check if this softnet_data structure is another cpu one | ||
2386 | * If yes, queue it to our IPI list and return 1 | ||
2387 | * If no, return 0 | ||
2388 | */ | ||
2389 | static int rps_ipi_queued(struct softnet_data *sd) | ||
2390 | { | ||
2391 | #ifdef CONFIG_RPS | ||
2392 | struct softnet_data *mysd = &__get_cpu_var(softnet_data); | ||
2393 | |||
2394 | if (sd != mysd) { | ||
2395 | sd->rps_ipi_next = mysd->rps_ipi_list; | ||
2396 | mysd->rps_ipi_list = sd; | ||
2397 | |||
2398 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2399 | return 1; | ||
2400 | } | ||
2401 | #endif /* CONFIG_RPS */ | ||
2402 | return 0; | ||
2403 | } | ||
2404 | |||
2405 | /* | ||
2406 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | ||
2407 | * queue (may be a remote CPU queue). | ||
2408 | */ | ||
2409 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | ||
2410 | unsigned int *qtail) | ||
2411 | { | ||
2412 | struct softnet_data *sd; | ||
2413 | unsigned long flags; | ||
2414 | |||
2415 | sd = &per_cpu(softnet_data, cpu); | ||
2416 | |||
2417 | local_irq_save(flags); | ||
2418 | |||
2419 | rps_lock(sd); | ||
2420 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { | ||
2421 | if (skb_queue_len(&sd->input_pkt_queue)) { | ||
2422 | enqueue: | ||
2423 | __skb_queue_tail(&sd->input_pkt_queue, skb); | ||
2424 | #ifdef CONFIG_RPS | ||
2425 | *qtail = sd->input_queue_head + | ||
2426 | skb_queue_len(&sd->input_pkt_queue); | ||
2427 | #endif | ||
2428 | rps_unlock(sd); | ||
2429 | local_irq_restore(flags); | ||
2430 | return NET_RX_SUCCESS; | ||
2431 | } | ||
2432 | |||
2433 | /* Schedule NAPI for backlog device | ||
2434 | * We can use non atomic operation since we own the queue lock | ||
2435 | */ | ||
2436 | if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { | ||
2437 | if (!rps_ipi_queued(sd)) | ||
2438 | ____napi_schedule(sd, &sd->backlog); | ||
2439 | } | ||
2440 | goto enqueue; | ||
2441 | } | ||
2442 | |||
2443 | sd->dropped++; | ||
2444 | rps_unlock(sd); | ||
2445 | |||
2446 | local_irq_restore(flags); | ||
2447 | |||
2448 | kfree_skb(skb); | ||
2449 | return NET_RX_DROP; | ||
2450 | } | ||
2183 | 2451 | ||
2184 | /** | 2452 | /** |
2185 | * netif_rx - post buffer to the network code | 2453 | * netif_rx - post buffer to the network code |
@@ -2198,41 +2466,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | |||
2198 | 2466 | ||
2199 | int netif_rx(struct sk_buff *skb) | 2467 | int netif_rx(struct sk_buff *skb) |
2200 | { | 2468 | { |
2201 | struct softnet_data *queue; | 2469 | int ret; |
2202 | unsigned long flags; | ||
2203 | 2470 | ||
2204 | /* if netpoll wants it, pretend we never saw it */ | 2471 | /* if netpoll wants it, pretend we never saw it */ |
2205 | if (netpoll_rx(skb)) | 2472 | if (netpoll_rx(skb)) |
2206 | return NET_RX_DROP; | 2473 | return NET_RX_DROP; |
2207 | 2474 | ||
2208 | if (!skb->tstamp.tv64) | 2475 | if (netdev_tstamp_prequeue) |
2209 | net_timestamp(skb); | 2476 | net_timestamp_check(skb); |
2210 | 2477 | ||
2211 | /* | 2478 | #ifdef CONFIG_RPS |
2212 | * The code is rearranged so that the path is the most | 2479 | { |
2213 | * short when CPU is congested, but is still operating. | 2480 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
2214 | */ | 2481 | int cpu; |
2215 | local_irq_save(flags); | ||
2216 | queue = &__get_cpu_var(softnet_data); | ||
2217 | 2482 | ||
2218 | __get_cpu_var(netdev_rx_stat).total++; | 2483 | rcu_read_lock(); |
2219 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
2220 | if (queue->input_pkt_queue.qlen) { | ||
2221 | enqueue: | ||
2222 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
2223 | local_irq_restore(flags); | ||
2224 | return NET_RX_SUCCESS; | ||
2225 | } | ||
2226 | 2484 | ||
2227 | napi_schedule(&queue->backlog); | 2485 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
2228 | goto enqueue; | 2486 | if (cpu < 0) |
2229 | } | 2487 | cpu = smp_processor_id(); |
2230 | 2488 | ||
2231 | __get_cpu_var(netdev_rx_stat).dropped++; | 2489 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
2232 | local_irq_restore(flags); | ||
2233 | 2490 | ||
2234 | kfree_skb(skb); | 2491 | rcu_read_unlock(); |
2235 | return NET_RX_DROP; | 2492 | } |
2493 | #else | ||
2494 | { | ||
2495 | unsigned int qtail; | ||
2496 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | ||
2497 | put_cpu(); | ||
2498 | } | ||
2499 | #endif | ||
2500 | return ret; | ||
2236 | } | 2501 | } |
2237 | EXPORT_SYMBOL(netif_rx); | 2502 | EXPORT_SYMBOL(netif_rx); |
2238 | 2503 | ||
@@ -2277,6 +2542,7 @@ static void net_tx_action(struct softirq_action *h) | |||
2277 | local_irq_disable(); | 2542 | local_irq_disable(); |
2278 | head = sd->output_queue; | 2543 | head = sd->output_queue; |
2279 | sd->output_queue = NULL; | 2544 | sd->output_queue = NULL; |
2545 | sd->output_queue_tailp = &sd->output_queue; | ||
2280 | local_irq_enable(); | 2546 | local_irq_enable(); |
2281 | 2547 | ||
2282 | while (head) { | 2548 | while (head) { |
@@ -2353,7 +2619,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, | |||
2353 | #endif | 2619 | #endif |
2354 | 2620 | ||
2355 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) | 2621 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) |
2356 | struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; | 2622 | struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, |
2623 | struct sk_buff *skb) __read_mostly; | ||
2357 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); | 2624 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); |
2358 | 2625 | ||
2359 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | 2626 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, |
@@ -2361,14 +2628,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | |||
2361 | int *ret, | 2628 | int *ret, |
2362 | struct net_device *orig_dev) | 2629 | struct net_device *orig_dev) |
2363 | { | 2630 | { |
2364 | if (skb->dev->macvlan_port == NULL) | 2631 | struct macvlan_port *port; |
2632 | |||
2633 | port = rcu_dereference(skb->dev->macvlan_port); | ||
2634 | if (!port) | ||
2365 | return skb; | 2635 | return skb; |
2366 | 2636 | ||
2367 | if (*pt_prev) { | 2637 | if (*pt_prev) { |
2368 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | 2638 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2369 | *pt_prev = NULL; | 2639 | *pt_prev = NULL; |
2370 | } | 2640 | } |
2371 | return macvlan_handle_frame_hook(skb); | 2641 | return macvlan_handle_frame_hook(port, skb); |
2372 | } | 2642 | } |
2373 | #else | 2643 | #else |
2374 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) | 2644 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) |
@@ -2469,22 +2739,56 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2469 | rcu_read_unlock(); | 2739 | rcu_read_unlock(); |
2470 | } | 2740 | } |
2471 | 2741 | ||
2472 | /** | 2742 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, |
2473 | * netif_receive_skb - process receive buffer from network | 2743 | struct net_device *master) |
2474 | * @skb: buffer to process | 2744 | { |
2475 | * | 2745 | if (skb->pkt_type == PACKET_HOST) { |
2476 | * netif_receive_skb() is the main receive data processing function. | 2746 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; |
2477 | * It always succeeds. The buffer may be dropped during processing | 2747 | |
2478 | * for congestion control or by the protocol layers. | 2748 | memcpy(dest, master->dev_addr, ETH_ALEN); |
2479 | * | 2749 | } |
2480 | * This function may only be called from softirq context and interrupts | 2750 | } |
2481 | * should be enabled. | 2751 | |
2482 | * | 2752 | /* On bonding slaves other than the currently active slave, suppress |
2483 | * Return values (usually ignored): | 2753 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and |
2484 | * NET_RX_SUCCESS: no congestion | 2754 | * ARP on active-backup slaves with arp_validate enabled. |
2485 | * NET_RX_DROP: packet was dropped | ||
2486 | */ | 2755 | */ |
2487 | int netif_receive_skb(struct sk_buff *skb) | 2756 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) |
2757 | { | ||
2758 | struct net_device *dev = skb->dev; | ||
2759 | |||
2760 | if (master->priv_flags & IFF_MASTER_ARPMON) | ||
2761 | dev->last_rx = jiffies; | ||
2762 | |||
2763 | if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { | ||
2764 | /* Do address unmangle. The local destination address | ||
2765 | * will be always the one master has. Provides the right | ||
2766 | * functionality in a bridge. | ||
2767 | */ | ||
2768 | skb_bond_set_mac_by_master(skb, master); | ||
2769 | } | ||
2770 | |||
2771 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
2772 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
2773 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
2774 | return 0; | ||
2775 | |||
2776 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
2777 | if (skb->pkt_type != PACKET_BROADCAST && | ||
2778 | skb->pkt_type != PACKET_MULTICAST) | ||
2779 | return 0; | ||
2780 | } | ||
2781 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
2782 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
2783 | return 0; | ||
2784 | |||
2785 | return 1; | ||
2786 | } | ||
2787 | return 0; | ||
2788 | } | ||
2789 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
2790 | |||
2791 | static int __netif_receive_skb(struct sk_buff *skb) | ||
2488 | { | 2792 | { |
2489 | struct packet_type *ptype, *pt_prev; | 2793 | struct packet_type *ptype, *pt_prev; |
2490 | struct net_device *orig_dev; | 2794 | struct net_device *orig_dev; |
@@ -2494,8 +2798,8 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2494 | int ret = NET_RX_DROP; | 2798 | int ret = NET_RX_DROP; |
2495 | __be16 type; | 2799 | __be16 type; |
2496 | 2800 | ||
2497 | if (!skb->tstamp.tv64) | 2801 | if (!netdev_tstamp_prequeue) |
2498 | net_timestamp(skb); | 2802 | net_timestamp_check(skb); |
2499 | 2803 | ||
2500 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 2804 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
2501 | return NET_RX_SUCCESS; | 2805 | return NET_RX_SUCCESS; |
@@ -2517,7 +2821,7 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2517 | skb->dev = master; | 2821 | skb->dev = master; |
2518 | } | 2822 | } |
2519 | 2823 | ||
2520 | __get_cpu_var(netdev_rx_stat).total++; | 2824 | __get_cpu_var(softnet_data).processed++; |
2521 | 2825 | ||
2522 | skb_reset_network_header(skb); | 2826 | skb_reset_network_header(skb); |
2523 | skb_reset_transport_header(skb); | 2827 | skb_reset_transport_header(skb); |
@@ -2595,20 +2899,77 @@ out: | |||
2595 | rcu_read_unlock(); | 2899 | rcu_read_unlock(); |
2596 | return ret; | 2900 | return ret; |
2597 | } | 2901 | } |
2902 | |||
2903 | /** | ||
2904 | * netif_receive_skb - process receive buffer from network | ||
2905 | * @skb: buffer to process | ||
2906 | * | ||
2907 | * netif_receive_skb() is the main receive data processing function. | ||
2908 | * It always succeeds. The buffer may be dropped during processing | ||
2909 | * for congestion control or by the protocol layers. | ||
2910 | * | ||
2911 | * This function may only be called from softirq context and interrupts | ||
2912 | * should be enabled. | ||
2913 | * | ||
2914 | * Return values (usually ignored): | ||
2915 | * NET_RX_SUCCESS: no congestion | ||
2916 | * NET_RX_DROP: packet was dropped | ||
2917 | */ | ||
2918 | int netif_receive_skb(struct sk_buff *skb) | ||
2919 | { | ||
2920 | if (netdev_tstamp_prequeue) | ||
2921 | net_timestamp_check(skb); | ||
2922 | |||
2923 | #ifdef CONFIG_RPS | ||
2924 | { | ||
2925 | struct rps_dev_flow voidflow, *rflow = &voidflow; | ||
2926 | int cpu, ret; | ||
2927 | |||
2928 | rcu_read_lock(); | ||
2929 | |||
2930 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | ||
2931 | |||
2932 | if (cpu >= 0) { | ||
2933 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | ||
2934 | rcu_read_unlock(); | ||
2935 | } else { | ||
2936 | rcu_read_unlock(); | ||
2937 | ret = __netif_receive_skb(skb); | ||
2938 | } | ||
2939 | |||
2940 | return ret; | ||
2941 | } | ||
2942 | #else | ||
2943 | return __netif_receive_skb(skb); | ||
2944 | #endif | ||
2945 | } | ||
2598 | EXPORT_SYMBOL(netif_receive_skb); | 2946 | EXPORT_SYMBOL(netif_receive_skb); |
2599 | 2947 | ||
2600 | /* Network device is going away, flush any packets still pending */ | 2948 | /* Network device is going away, flush any packets still pending |
2949 | * Called with irqs disabled. | ||
2950 | */ | ||
2601 | static void flush_backlog(void *arg) | 2951 | static void flush_backlog(void *arg) |
2602 | { | 2952 | { |
2603 | struct net_device *dev = arg; | 2953 | struct net_device *dev = arg; |
2604 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 2954 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
2605 | struct sk_buff *skb, *tmp; | 2955 | struct sk_buff *skb, *tmp; |
2606 | 2956 | ||
2607 | skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) | 2957 | rps_lock(sd); |
2958 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | ||
2608 | if (skb->dev == dev) { | 2959 | if (skb->dev == dev) { |
2609 | __skb_unlink(skb, &queue->input_pkt_queue); | 2960 | __skb_unlink(skb, &sd->input_pkt_queue); |
2610 | kfree_skb(skb); | 2961 | kfree_skb(skb); |
2962 | input_queue_head_add(sd, 1); | ||
2611 | } | 2963 | } |
2964 | } | ||
2965 | rps_unlock(sd); | ||
2966 | |||
2967 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | ||
2968 | if (skb->dev == dev) { | ||
2969 | __skb_unlink(skb, &sd->process_queue); | ||
2970 | kfree_skb(skb); | ||
2971 | } | ||
2972 | } | ||
2612 | } | 2973 | } |
2613 | 2974 | ||
2614 | static int napi_gro_complete(struct sk_buff *skb) | 2975 | static int napi_gro_complete(struct sk_buff *skb) |
@@ -2911,27 +3272,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) | |||
2911 | } | 3272 | } |
2912 | EXPORT_SYMBOL(napi_gro_frags); | 3273 | EXPORT_SYMBOL(napi_gro_frags); |
2913 | 3274 | ||
3275 | /* | ||
3276 | * net_rps_action sends any pending IPI's for rps. | ||
3277 | * Note: called with local irq disabled, but exits with local irq enabled. | ||
3278 | */ | ||
3279 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) | ||
3280 | { | ||
3281 | #ifdef CONFIG_RPS | ||
3282 | struct softnet_data *remsd = sd->rps_ipi_list; | ||
3283 | |||
3284 | if (remsd) { | ||
3285 | sd->rps_ipi_list = NULL; | ||
3286 | |||
3287 | local_irq_enable(); | ||
3288 | |||
3289 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | ||
3290 | while (remsd) { | ||
3291 | struct softnet_data *next = remsd->rps_ipi_next; | ||
3292 | |||
3293 | if (cpu_online(remsd->cpu)) | ||
3294 | __smp_call_function_single(remsd->cpu, | ||
3295 | &remsd->csd, 0); | ||
3296 | remsd = next; | ||
3297 | } | ||
3298 | } else | ||
3299 | #endif | ||
3300 | local_irq_enable(); | ||
3301 | } | ||
3302 | |||
2914 | static int process_backlog(struct napi_struct *napi, int quota) | 3303 | static int process_backlog(struct napi_struct *napi, int quota) |
2915 | { | 3304 | { |
2916 | int work = 0; | 3305 | int work = 0; |
2917 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 3306 | struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); |
2918 | unsigned long start_time = jiffies; | ||
2919 | 3307 | ||
3308 | #ifdef CONFIG_RPS | ||
3309 | /* Check if we have pending ipi, its better to send them now, | ||
3310 | * not waiting net_rx_action() end. | ||
3311 | */ | ||
3312 | if (sd->rps_ipi_list) { | ||
3313 | local_irq_disable(); | ||
3314 | net_rps_action_and_irq_enable(sd); | ||
3315 | } | ||
3316 | #endif | ||
2920 | napi->weight = weight_p; | 3317 | napi->weight = weight_p; |
2921 | do { | 3318 | local_irq_disable(); |
3319 | while (work < quota) { | ||
2922 | struct sk_buff *skb; | 3320 | struct sk_buff *skb; |
3321 | unsigned int qlen; | ||
2923 | 3322 | ||
2924 | local_irq_disable(); | 3323 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
2925 | skb = __skb_dequeue(&queue->input_pkt_queue); | ||
2926 | if (!skb) { | ||
2927 | __napi_complete(napi); | ||
2928 | local_irq_enable(); | 3324 | local_irq_enable(); |
2929 | break; | 3325 | __netif_receive_skb(skb); |
3326 | if (++work >= quota) | ||
3327 | return work; | ||
3328 | local_irq_disable(); | ||
2930 | } | 3329 | } |
2931 | local_irq_enable(); | ||
2932 | 3330 | ||
2933 | netif_receive_skb(skb); | 3331 | rps_lock(sd); |
2934 | } while (++work < quota && jiffies == start_time); | 3332 | qlen = skb_queue_len(&sd->input_pkt_queue); |
3333 | if (qlen) { | ||
3334 | input_queue_head_add(sd, qlen); | ||
3335 | skb_queue_splice_tail_init(&sd->input_pkt_queue, | ||
3336 | &sd->process_queue); | ||
3337 | } | ||
3338 | if (qlen < quota - work) { | ||
3339 | /* | ||
3340 | * Inline a custom version of __napi_complete(). | ||
3341 | * only current cpu owns and manipulates this napi, | ||
3342 | * and NAPI_STATE_SCHED is the only possible flag set on backlog. | ||
3343 | * we can use a plain write instead of clear_bit(), | ||
3344 | * and we dont need an smp_mb() memory barrier. | ||
3345 | */ | ||
3346 | list_del(&napi->poll_list); | ||
3347 | napi->state = 0; | ||
3348 | |||
3349 | quota = work + qlen; | ||
3350 | } | ||
3351 | rps_unlock(sd); | ||
3352 | } | ||
3353 | local_irq_enable(); | ||
2935 | 3354 | ||
2936 | return work; | 3355 | return work; |
2937 | } | 3356 | } |
@@ -2947,8 +3366,7 @@ void __napi_schedule(struct napi_struct *n) | |||
2947 | unsigned long flags; | 3366 | unsigned long flags; |
2948 | 3367 | ||
2949 | local_irq_save(flags); | 3368 | local_irq_save(flags); |
2950 | list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); | 3369 | ____napi_schedule(&__get_cpu_var(softnet_data), n); |
2951 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2952 | local_irq_restore(flags); | 3370 | local_irq_restore(flags); |
2953 | } | 3371 | } |
2954 | EXPORT_SYMBOL(__napi_schedule); | 3372 | EXPORT_SYMBOL(__napi_schedule); |
@@ -3019,17 +3437,16 @@ void netif_napi_del(struct napi_struct *napi) | |||
3019 | } | 3437 | } |
3020 | EXPORT_SYMBOL(netif_napi_del); | 3438 | EXPORT_SYMBOL(netif_napi_del); |
3021 | 3439 | ||
3022 | |||
3023 | static void net_rx_action(struct softirq_action *h) | 3440 | static void net_rx_action(struct softirq_action *h) |
3024 | { | 3441 | { |
3025 | struct list_head *list = &__get_cpu_var(softnet_data).poll_list; | 3442 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
3026 | unsigned long time_limit = jiffies + 2; | 3443 | unsigned long time_limit = jiffies + 2; |
3027 | int budget = netdev_budget; | 3444 | int budget = netdev_budget; |
3028 | void *have; | 3445 | void *have; |
3029 | 3446 | ||
3030 | local_irq_disable(); | 3447 | local_irq_disable(); |
3031 | 3448 | ||
3032 | while (!list_empty(list)) { | 3449 | while (!list_empty(&sd->poll_list)) { |
3033 | struct napi_struct *n; | 3450 | struct napi_struct *n; |
3034 | int work, weight; | 3451 | int work, weight; |
3035 | 3452 | ||
@@ -3047,7 +3464,7 @@ static void net_rx_action(struct softirq_action *h) | |||
3047 | * entries to the tail of this list, and only ->poll() | 3464 | * entries to the tail of this list, and only ->poll() |
3048 | * calls can remove this head entry from the list. | 3465 | * calls can remove this head entry from the list. |
3049 | */ | 3466 | */ |
3050 | n = list_first_entry(list, struct napi_struct, poll_list); | 3467 | n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); |
3051 | 3468 | ||
3052 | have = netpoll_poll_lock(n); | 3469 | have = netpoll_poll_lock(n); |
3053 | 3470 | ||
@@ -3082,13 +3499,13 @@ static void net_rx_action(struct softirq_action *h) | |||
3082 | napi_complete(n); | 3499 | napi_complete(n); |
3083 | local_irq_disable(); | 3500 | local_irq_disable(); |
3084 | } else | 3501 | } else |
3085 | list_move_tail(&n->poll_list, list); | 3502 | list_move_tail(&n->poll_list, &sd->poll_list); |
3086 | } | 3503 | } |
3087 | 3504 | ||
3088 | netpoll_poll_unlock(have); | 3505 | netpoll_poll_unlock(have); |
3089 | } | 3506 | } |
3090 | out: | 3507 | out: |
3091 | local_irq_enable(); | 3508 | net_rps_action_and_irq_enable(sd); |
3092 | 3509 | ||
3093 | #ifdef CONFIG_NET_DMA | 3510 | #ifdef CONFIG_NET_DMA |
3094 | /* | 3511 | /* |
@@ -3101,7 +3518,7 @@ out: | |||
3101 | return; | 3518 | return; |
3102 | 3519 | ||
3103 | softnet_break: | 3520 | softnet_break: |
3104 | __get_cpu_var(netdev_rx_stat).time_squeeze++; | 3521 | sd->time_squeeze++; |
3105 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 3522 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
3106 | goto out; | 3523 | goto out; |
3107 | } | 3524 | } |
@@ -3302,17 +3719,17 @@ static int dev_seq_show(struct seq_file *seq, void *v) | |||
3302 | return 0; | 3719 | return 0; |
3303 | } | 3720 | } |
3304 | 3721 | ||
3305 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) | 3722 | static struct softnet_data *softnet_get_online(loff_t *pos) |
3306 | { | 3723 | { |
3307 | struct netif_rx_stats *rc = NULL; | 3724 | struct softnet_data *sd = NULL; |
3308 | 3725 | ||
3309 | while (*pos < nr_cpu_ids) | 3726 | while (*pos < nr_cpu_ids) |
3310 | if (cpu_online(*pos)) { | 3727 | if (cpu_online(*pos)) { |
3311 | rc = &per_cpu(netdev_rx_stat, *pos); | 3728 | sd = &per_cpu(softnet_data, *pos); |
3312 | break; | 3729 | break; |
3313 | } else | 3730 | } else |
3314 | ++*pos; | 3731 | ++*pos; |
3315 | return rc; | 3732 | return sd; |
3316 | } | 3733 | } |
3317 | 3734 | ||
3318 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | 3735 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
@@ -3332,12 +3749,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) | |||
3332 | 3749 | ||
3333 | static int softnet_seq_show(struct seq_file *seq, void *v) | 3750 | static int softnet_seq_show(struct seq_file *seq, void *v) |
3334 | { | 3751 | { |
3335 | struct netif_rx_stats *s = v; | 3752 | struct softnet_data *sd = v; |
3336 | 3753 | ||
3337 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 3754 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
3338 | s->total, s->dropped, s->time_squeeze, 0, | 3755 | sd->processed, sd->dropped, sd->time_squeeze, 0, |
3339 | 0, 0, 0, 0, /* was fastroute */ | 3756 | 0, 0, 0, 0, /* was fastroute */ |
3340 | s->cpu_collision); | 3757 | sd->cpu_collision, sd->received_rps); |
3341 | return 0; | 3758 | return 0; |
3342 | } | 3759 | } |
3343 | 3760 | ||
@@ -3560,11 +3977,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
3560 | 3977 | ||
3561 | slave->master = master; | 3978 | slave->master = master; |
3562 | 3979 | ||
3563 | synchronize_net(); | 3980 | if (old) { |
3564 | 3981 | synchronize_net(); | |
3565 | if (old) | ||
3566 | dev_put(old); | 3982 | dev_put(old); |
3567 | 3983 | } | |
3568 | if (master) | 3984 | if (master) |
3569 | slave->flags |= IFF_SLAVE; | 3985 | slave->flags |= IFF_SLAVE; |
3570 | else | 3986 | else |
@@ -3741,562 +4157,6 @@ void dev_set_rx_mode(struct net_device *dev) | |||
3741 | netif_addr_unlock_bh(dev); | 4157 | netif_addr_unlock_bh(dev); |
3742 | } | 4158 | } |
3743 | 4159 | ||
3744 | /* hw addresses list handling functions */ | ||
3745 | |||
3746 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
3747 | int addr_len, unsigned char addr_type) | ||
3748 | { | ||
3749 | struct netdev_hw_addr *ha; | ||
3750 | int alloc_size; | ||
3751 | |||
3752 | if (addr_len > MAX_ADDR_LEN) | ||
3753 | return -EINVAL; | ||
3754 | |||
3755 | list_for_each_entry(ha, &list->list, list) { | ||
3756 | if (!memcmp(ha->addr, addr, addr_len) && | ||
3757 | ha->type == addr_type) { | ||
3758 | ha->refcount++; | ||
3759 | return 0; | ||
3760 | } | ||
3761 | } | ||
3762 | |||
3763 | |||
3764 | alloc_size = sizeof(*ha); | ||
3765 | if (alloc_size < L1_CACHE_BYTES) | ||
3766 | alloc_size = L1_CACHE_BYTES; | ||
3767 | ha = kmalloc(alloc_size, GFP_ATOMIC); | ||
3768 | if (!ha) | ||
3769 | return -ENOMEM; | ||
3770 | memcpy(ha->addr, addr, addr_len); | ||
3771 | ha->type = addr_type; | ||
3772 | ha->refcount = 1; | ||
3773 | ha->synced = false; | ||
3774 | list_add_tail_rcu(&ha->list, &list->list); | ||
3775 | list->count++; | ||
3776 | return 0; | ||
3777 | } | ||
3778 | |||
3779 | static void ha_rcu_free(struct rcu_head *head) | ||
3780 | { | ||
3781 | struct netdev_hw_addr *ha; | ||
3782 | |||
3783 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
3784 | kfree(ha); | ||
3785 | } | ||
3786 | |||
3787 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
3788 | int addr_len, unsigned char addr_type) | ||
3789 | { | ||
3790 | struct netdev_hw_addr *ha; | ||
3791 | |||
3792 | list_for_each_entry(ha, &list->list, list) { | ||
3793 | if (!memcmp(ha->addr, addr, addr_len) && | ||
3794 | (ha->type == addr_type || !addr_type)) { | ||
3795 | if (--ha->refcount) | ||
3796 | return 0; | ||
3797 | list_del_rcu(&ha->list); | ||
3798 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
3799 | list->count--; | ||
3800 | return 0; | ||
3801 | } | ||
3802 | } | ||
3803 | return -ENOENT; | ||
3804 | } | ||
3805 | |||
3806 | static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
3807 | struct netdev_hw_addr_list *from_list, | ||
3808 | int addr_len, | ||
3809 | unsigned char addr_type) | ||
3810 | { | ||
3811 | int err; | ||
3812 | struct netdev_hw_addr *ha, *ha2; | ||
3813 | unsigned char type; | ||
3814 | |||
3815 | list_for_each_entry(ha, &from_list->list, list) { | ||
3816 | type = addr_type ? addr_type : ha->type; | ||
3817 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
3818 | if (err) | ||
3819 | goto unroll; | ||
3820 | } | ||
3821 | return 0; | ||
3822 | |||
3823 | unroll: | ||
3824 | list_for_each_entry(ha2, &from_list->list, list) { | ||
3825 | if (ha2 == ha) | ||
3826 | break; | ||
3827 | type = addr_type ? addr_type : ha2->type; | ||
3828 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
3829 | } | ||
3830 | return err; | ||
3831 | } | ||
3832 | |||
3833 | static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
3834 | struct netdev_hw_addr_list *from_list, | ||
3835 | int addr_len, | ||
3836 | unsigned char addr_type) | ||
3837 | { | ||
3838 | struct netdev_hw_addr *ha; | ||
3839 | unsigned char type; | ||
3840 | |||
3841 | list_for_each_entry(ha, &from_list->list, list) { | ||
3842 | type = addr_type ? addr_type : ha->type; | ||
3843 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | ||
3844 | } | ||
3845 | } | ||
3846 | |||
3847 | static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | ||
3848 | struct netdev_hw_addr_list *from_list, | ||
3849 | int addr_len) | ||
3850 | { | ||
3851 | int err = 0; | ||
3852 | struct netdev_hw_addr *ha, *tmp; | ||
3853 | |||
3854 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
3855 | if (!ha->synced) { | ||
3856 | err = __hw_addr_add(to_list, ha->addr, | ||
3857 | addr_len, ha->type); | ||
3858 | if (err) | ||
3859 | break; | ||
3860 | ha->synced = true; | ||
3861 | ha->refcount++; | ||
3862 | } else if (ha->refcount == 1) { | ||
3863 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); | ||
3864 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); | ||
3865 | } | ||
3866 | } | ||
3867 | return err; | ||
3868 | } | ||
3869 | |||
3870 | static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | ||
3871 | struct netdev_hw_addr_list *from_list, | ||
3872 | int addr_len) | ||
3873 | { | ||
3874 | struct netdev_hw_addr *ha, *tmp; | ||
3875 | |||
3876 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
3877 | if (ha->synced) { | ||
3878 | __hw_addr_del(to_list, ha->addr, | ||
3879 | addr_len, ha->type); | ||
3880 | ha->synced = false; | ||
3881 | __hw_addr_del(from_list, ha->addr, | ||
3882 | addr_len, ha->type); | ||
3883 | } | ||
3884 | } | ||
3885 | } | ||
3886 | |||
3887 | static void __hw_addr_flush(struct netdev_hw_addr_list *list) | ||
3888 | { | ||
3889 | struct netdev_hw_addr *ha, *tmp; | ||
3890 | |||
3891 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | ||
3892 | list_del_rcu(&ha->list); | ||
3893 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
3894 | } | ||
3895 | list->count = 0; | ||
3896 | } | ||
3897 | |||
3898 | static void __hw_addr_init(struct netdev_hw_addr_list *list) | ||
3899 | { | ||
3900 | INIT_LIST_HEAD(&list->list); | ||
3901 | list->count = 0; | ||
3902 | } | ||
3903 | |||
3904 | /* Device addresses handling functions */ | ||
3905 | |||
3906 | static void dev_addr_flush(struct net_device *dev) | ||
3907 | { | ||
3908 | /* rtnl_mutex must be held here */ | ||
3909 | |||
3910 | __hw_addr_flush(&dev->dev_addrs); | ||
3911 | dev->dev_addr = NULL; | ||
3912 | } | ||
3913 | |||
3914 | static int dev_addr_init(struct net_device *dev) | ||
3915 | { | ||
3916 | unsigned char addr[MAX_ADDR_LEN]; | ||
3917 | struct netdev_hw_addr *ha; | ||
3918 | int err; | ||
3919 | |||
3920 | /* rtnl_mutex must be held here */ | ||
3921 | |||
3922 | __hw_addr_init(&dev->dev_addrs); | ||
3923 | memset(addr, 0, sizeof(addr)); | ||
3924 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), | ||
3925 | NETDEV_HW_ADDR_T_LAN); | ||
3926 | if (!err) { | ||
3927 | /* | ||
3928 | * Get the first (previously created) address from the list | ||
3929 | * and set dev_addr pointer to this location. | ||
3930 | */ | ||
3931 | ha = list_first_entry(&dev->dev_addrs.list, | ||
3932 | struct netdev_hw_addr, list); | ||
3933 | dev->dev_addr = ha->addr; | ||
3934 | } | ||
3935 | return err; | ||
3936 | } | ||
3937 | |||
3938 | /** | ||
3939 | * dev_addr_add - Add a device address | ||
3940 | * @dev: device | ||
3941 | * @addr: address to add | ||
3942 | * @addr_type: address type | ||
3943 | * | ||
3944 | * Add a device address to the device or increase the reference count if | ||
3945 | * it already exists. | ||
3946 | * | ||
3947 | * The caller must hold the rtnl_mutex. | ||
3948 | */ | ||
3949 | int dev_addr_add(struct net_device *dev, unsigned char *addr, | ||
3950 | unsigned char addr_type) | ||
3951 | { | ||
3952 | int err; | ||
3953 | |||
3954 | ASSERT_RTNL(); | ||
3955 | |||
3956 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); | ||
3957 | if (!err) | ||
3958 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
3959 | return err; | ||
3960 | } | ||
3961 | EXPORT_SYMBOL(dev_addr_add); | ||
3962 | |||
3963 | /** | ||
3964 | * dev_addr_del - Release a device address. | ||
3965 | * @dev: device | ||
3966 | * @addr: address to delete | ||
3967 | * @addr_type: address type | ||
3968 | * | ||
3969 | * Release reference to a device address and remove it from the device | ||
3970 | * if the reference count drops to zero. | ||
3971 | * | ||
3972 | * The caller must hold the rtnl_mutex. | ||
3973 | */ | ||
3974 | int dev_addr_del(struct net_device *dev, unsigned char *addr, | ||
3975 | unsigned char addr_type) | ||
3976 | { | ||
3977 | int err; | ||
3978 | struct netdev_hw_addr *ha; | ||
3979 | |||
3980 | ASSERT_RTNL(); | ||
3981 | |||
3982 | /* | ||
3983 | * We can not remove the first address from the list because | ||
3984 | * dev->dev_addr points to that. | ||
3985 | */ | ||
3986 | ha = list_first_entry(&dev->dev_addrs.list, | ||
3987 | struct netdev_hw_addr, list); | ||
3988 | if (ha->addr == dev->dev_addr && ha->refcount == 1) | ||
3989 | return -ENOENT; | ||
3990 | |||
3991 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, | ||
3992 | addr_type); | ||
3993 | if (!err) | ||
3994 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
3995 | return err; | ||
3996 | } | ||
3997 | EXPORT_SYMBOL(dev_addr_del); | ||
3998 | |||
3999 | /** | ||
4000 | * dev_addr_add_multiple - Add device addresses from another device | ||
4001 | * @to_dev: device to which addresses will be added | ||
4002 | * @from_dev: device from which addresses will be added | ||
4003 | * @addr_type: address type - 0 means type will be used from from_dev | ||
4004 | * | ||
4005 | * Add device addresses of the one device to another. | ||
4006 | ** | ||
4007 | * The caller must hold the rtnl_mutex. | ||
4008 | */ | ||
4009 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
4010 | struct net_device *from_dev, | ||
4011 | unsigned char addr_type) | ||
4012 | { | ||
4013 | int err; | ||
4014 | |||
4015 | ASSERT_RTNL(); | ||
4016 | |||
4017 | if (from_dev->addr_len != to_dev->addr_len) | ||
4018 | return -EINVAL; | ||
4019 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
4020 | to_dev->addr_len, addr_type); | ||
4021 | if (!err) | ||
4022 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
4023 | return err; | ||
4024 | } | ||
4025 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
4026 | |||
4027 | /** | ||
4028 | * dev_addr_del_multiple - Delete device addresses by another device | ||
4029 | * @to_dev: device where the addresses will be deleted | ||
4030 | * @from_dev: device by which addresses the addresses will be deleted | ||
4031 | * @addr_type: address type - 0 means type will used from from_dev | ||
4032 | * | ||
4033 | * Deletes addresses in to device by the list of addresses in from device. | ||
4034 | * | ||
4035 | * The caller must hold the rtnl_mutex. | ||
4036 | */ | ||
4037 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
4038 | struct net_device *from_dev, | ||
4039 | unsigned char addr_type) | ||
4040 | { | ||
4041 | ASSERT_RTNL(); | ||
4042 | |||
4043 | if (from_dev->addr_len != to_dev->addr_len) | ||
4044 | return -EINVAL; | ||
4045 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
4046 | to_dev->addr_len, addr_type); | ||
4047 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
4048 | return 0; | ||
4049 | } | ||
4050 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
4051 | |||
4052 | /* multicast addresses handling functions */ | ||
4053 | |||
4054 | int __dev_addr_delete(struct dev_addr_list **list, int *count, | ||
4055 | void *addr, int alen, int glbl) | ||
4056 | { | ||
4057 | struct dev_addr_list *da; | ||
4058 | |||
4059 | for (; (da = *list) != NULL; list = &da->next) { | ||
4060 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
4061 | alen == da->da_addrlen) { | ||
4062 | if (glbl) { | ||
4063 | int old_glbl = da->da_gusers; | ||
4064 | da->da_gusers = 0; | ||
4065 | if (old_glbl == 0) | ||
4066 | break; | ||
4067 | } | ||
4068 | if (--da->da_users) | ||
4069 | return 0; | ||
4070 | |||
4071 | *list = da->next; | ||
4072 | kfree(da); | ||
4073 | (*count)--; | ||
4074 | return 0; | ||
4075 | } | ||
4076 | } | ||
4077 | return -ENOENT; | ||
4078 | } | ||
4079 | |||
4080 | int __dev_addr_add(struct dev_addr_list **list, int *count, | ||
4081 | void *addr, int alen, int glbl) | ||
4082 | { | ||
4083 | struct dev_addr_list *da; | ||
4084 | |||
4085 | for (da = *list; da != NULL; da = da->next) { | ||
4086 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
4087 | da->da_addrlen == alen) { | ||
4088 | if (glbl) { | ||
4089 | int old_glbl = da->da_gusers; | ||
4090 | da->da_gusers = 1; | ||
4091 | if (old_glbl) | ||
4092 | return 0; | ||
4093 | } | ||
4094 | da->da_users++; | ||
4095 | return 0; | ||
4096 | } | ||
4097 | } | ||
4098 | |||
4099 | da = kzalloc(sizeof(*da), GFP_ATOMIC); | ||
4100 | if (da == NULL) | ||
4101 | return -ENOMEM; | ||
4102 | memcpy(da->da_addr, addr, alen); | ||
4103 | da->da_addrlen = alen; | ||
4104 | da->da_users = 1; | ||
4105 | da->da_gusers = glbl ? 1 : 0; | ||
4106 | da->next = *list; | ||
4107 | *list = da; | ||
4108 | (*count)++; | ||
4109 | return 0; | ||
4110 | } | ||
4111 | |||
4112 | /** | ||
4113 | * dev_unicast_delete - Release secondary unicast address. | ||
4114 | * @dev: device | ||
4115 | * @addr: address to delete | ||
4116 | * | ||
4117 | * Release reference to a secondary unicast address and remove it | ||
4118 | * from the device if the reference count drops to zero. | ||
4119 | * | ||
4120 | * The caller must hold the rtnl_mutex. | ||
4121 | */ | ||
4122 | int dev_unicast_delete(struct net_device *dev, void *addr) | ||
4123 | { | ||
4124 | int err; | ||
4125 | |||
4126 | ASSERT_RTNL(); | ||
4127 | |||
4128 | netif_addr_lock_bh(dev); | ||
4129 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, | ||
4130 | NETDEV_HW_ADDR_T_UNICAST); | ||
4131 | if (!err) | ||
4132 | __dev_set_rx_mode(dev); | ||
4133 | netif_addr_unlock_bh(dev); | ||
4134 | return err; | ||
4135 | } | ||
4136 | EXPORT_SYMBOL(dev_unicast_delete); | ||
4137 | |||
4138 | /** | ||
4139 | * dev_unicast_add - add a secondary unicast address | ||
4140 | * @dev: device | ||
4141 | * @addr: address to add | ||
4142 | * | ||
4143 | * Add a secondary unicast address to the device or increase | ||
4144 | * the reference count if it already exists. | ||
4145 | * | ||
4146 | * The caller must hold the rtnl_mutex. | ||
4147 | */ | ||
4148 | int dev_unicast_add(struct net_device *dev, void *addr) | ||
4149 | { | ||
4150 | int err; | ||
4151 | |||
4152 | ASSERT_RTNL(); | ||
4153 | |||
4154 | netif_addr_lock_bh(dev); | ||
4155 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, | ||
4156 | NETDEV_HW_ADDR_T_UNICAST); | ||
4157 | if (!err) | ||
4158 | __dev_set_rx_mode(dev); | ||
4159 | netif_addr_unlock_bh(dev); | ||
4160 | return err; | ||
4161 | } | ||
4162 | EXPORT_SYMBOL(dev_unicast_add); | ||
4163 | |||
4164 | int __dev_addr_sync(struct dev_addr_list **to, int *to_count, | ||
4165 | struct dev_addr_list **from, int *from_count) | ||
4166 | { | ||
4167 | struct dev_addr_list *da, *next; | ||
4168 | int err = 0; | ||
4169 | |||
4170 | da = *from; | ||
4171 | while (da != NULL) { | ||
4172 | next = da->next; | ||
4173 | if (!da->da_synced) { | ||
4174 | err = __dev_addr_add(to, to_count, | ||
4175 | da->da_addr, da->da_addrlen, 0); | ||
4176 | if (err < 0) | ||
4177 | break; | ||
4178 | da->da_synced = 1; | ||
4179 | da->da_users++; | ||
4180 | } else if (da->da_users == 1) { | ||
4181 | __dev_addr_delete(to, to_count, | ||
4182 | da->da_addr, da->da_addrlen, 0); | ||
4183 | __dev_addr_delete(from, from_count, | ||
4184 | da->da_addr, da->da_addrlen, 0); | ||
4185 | } | ||
4186 | da = next; | ||
4187 | } | ||
4188 | return err; | ||
4189 | } | ||
4190 | EXPORT_SYMBOL_GPL(__dev_addr_sync); | ||
4191 | |||
4192 | void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, | ||
4193 | struct dev_addr_list **from, int *from_count) | ||
4194 | { | ||
4195 | struct dev_addr_list *da, *next; | ||
4196 | |||
4197 | da = *from; | ||
4198 | while (da != NULL) { | ||
4199 | next = da->next; | ||
4200 | if (da->da_synced) { | ||
4201 | __dev_addr_delete(to, to_count, | ||
4202 | da->da_addr, da->da_addrlen, 0); | ||
4203 | da->da_synced = 0; | ||
4204 | __dev_addr_delete(from, from_count, | ||
4205 | da->da_addr, da->da_addrlen, 0); | ||
4206 | } | ||
4207 | da = next; | ||
4208 | } | ||
4209 | } | ||
4210 | EXPORT_SYMBOL_GPL(__dev_addr_unsync); | ||
4211 | |||
4212 | /** | ||
4213 | * dev_unicast_sync - Synchronize device's unicast list to another device | ||
4214 | * @to: destination device | ||
4215 | * @from: source device | ||
4216 | * | ||
4217 | * Add newly added addresses to the destination device and release | ||
4218 | * addresses that have no users left. The source device must be | ||
4219 | * locked by netif_tx_lock_bh. | ||
4220 | * | ||
4221 | * This function is intended to be called from the dev->set_rx_mode | ||
4222 | * function of layered software devices. | ||
4223 | */ | ||
4224 | int dev_unicast_sync(struct net_device *to, struct net_device *from) | ||
4225 | { | ||
4226 | int err = 0; | ||
4227 | |||
4228 | if (to->addr_len != from->addr_len) | ||
4229 | return -EINVAL; | ||
4230 | |||
4231 | netif_addr_lock_bh(to); | ||
4232 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | ||
4233 | if (!err) | ||
4234 | __dev_set_rx_mode(to); | ||
4235 | netif_addr_unlock_bh(to); | ||
4236 | return err; | ||
4237 | } | ||
4238 | EXPORT_SYMBOL(dev_unicast_sync); | ||
4239 | |||
4240 | /** | ||
4241 | * dev_unicast_unsync - Remove synchronized addresses from the destination device | ||
4242 | * @to: destination device | ||
4243 | * @from: source device | ||
4244 | * | ||
4245 | * Remove all addresses that were added to the destination device by | ||
4246 | * dev_unicast_sync(). This function is intended to be called from the | ||
4247 | * dev->stop function of layered software devices. | ||
4248 | */ | ||
4249 | void dev_unicast_unsync(struct net_device *to, struct net_device *from) | ||
4250 | { | ||
4251 | if (to->addr_len != from->addr_len) | ||
4252 | return; | ||
4253 | |||
4254 | netif_addr_lock_bh(from); | ||
4255 | netif_addr_lock(to); | ||
4256 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | ||
4257 | __dev_set_rx_mode(to); | ||
4258 | netif_addr_unlock(to); | ||
4259 | netif_addr_unlock_bh(from); | ||
4260 | } | ||
4261 | EXPORT_SYMBOL(dev_unicast_unsync); | ||
4262 | |||
4263 | static void dev_unicast_flush(struct net_device *dev) | ||
4264 | { | ||
4265 | netif_addr_lock_bh(dev); | ||
4266 | __hw_addr_flush(&dev->uc); | ||
4267 | netif_addr_unlock_bh(dev); | ||
4268 | } | ||
4269 | |||
4270 | static void dev_unicast_init(struct net_device *dev) | ||
4271 | { | ||
4272 | __hw_addr_init(&dev->uc); | ||
4273 | } | ||
4274 | |||
4275 | |||
4276 | static void __dev_addr_discard(struct dev_addr_list **list) | ||
4277 | { | ||
4278 | struct dev_addr_list *tmp; | ||
4279 | |||
4280 | while (*list != NULL) { | ||
4281 | tmp = *list; | ||
4282 | *list = tmp->next; | ||
4283 | if (tmp->da_users > tmp->da_gusers) | ||
4284 | printk("__dev_addr_discard: address leakage! " | ||
4285 | "da_users=%d\n", tmp->da_users); | ||
4286 | kfree(tmp); | ||
4287 | } | ||
4288 | } | ||
4289 | |||
4290 | static void dev_addr_discard(struct net_device *dev) | ||
4291 | { | ||
4292 | netif_addr_lock_bh(dev); | ||
4293 | |||
4294 | __dev_addr_discard(&dev->mc_list); | ||
4295 | netdev_mc_count(dev) = 0; | ||
4296 | |||
4297 | netif_addr_unlock_bh(dev); | ||
4298 | } | ||
4299 | |||
4300 | /** | 4160 | /** |
4301 | * dev_get_flags - get flags reported to userspace | 4161 | * dev_get_flags - get flags reported to userspace |
4302 | * @dev: device | 4162 | * @dev: device |
@@ -4607,8 +4467,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
4607 | return -EINVAL; | 4467 | return -EINVAL; |
4608 | if (!netif_device_present(dev)) | 4468 | if (!netif_device_present(dev)) |
4609 | return -ENODEV; | 4469 | return -ENODEV; |
4610 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, | 4470 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); |
4611 | dev->addr_len, 1); | ||
4612 | 4471 | ||
4613 | case SIOCDELMULTI: | 4472 | case SIOCDELMULTI: |
4614 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || | 4473 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
@@ -4616,8 +4475,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
4616 | return -EINVAL; | 4475 | return -EINVAL; |
4617 | if (!netif_device_present(dev)) | 4476 | if (!netif_device_present(dev)) |
4618 | return -ENODEV; | 4477 | return -ENODEV; |
4619 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, | 4478 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); |
4620 | dev->addr_len, 1); | ||
4621 | 4479 | ||
4622 | case SIOCSIFTXQLEN: | 4480 | case SIOCSIFTXQLEN: |
4623 | if (ifr->ifr_qlen < 0) | 4481 | if (ifr->ifr_qlen < 0) |
@@ -4924,8 +4782,8 @@ static void rollback_registered_many(struct list_head *head) | |||
4924 | /* | 4782 | /* |
4925 | * Flush the unicast and multicast chains | 4783 | * Flush the unicast and multicast chains |
4926 | */ | 4784 | */ |
4927 | dev_unicast_flush(dev); | 4785 | dev_uc_flush(dev); |
4928 | dev_addr_discard(dev); | 4786 | dev_mc_flush(dev); |
4929 | 4787 | ||
4930 | if (dev->netdev_ops->ndo_uninit) | 4788 | if (dev->netdev_ops->ndo_uninit) |
4931 | dev->netdev_ops->ndo_uninit(dev); | 4789 | dev->netdev_ops->ndo_uninit(dev); |
@@ -5074,6 +4932,24 @@ int register_netdevice(struct net_device *dev) | |||
5074 | 4932 | ||
5075 | dev->iflink = -1; | 4933 | dev->iflink = -1; |
5076 | 4934 | ||
4935 | #ifdef CONFIG_RPS | ||
4936 | if (!dev->num_rx_queues) { | ||
4937 | /* | ||
4938 | * Allocate a single RX queue if driver never called | ||
4939 | * alloc_netdev_mq | ||
4940 | */ | ||
4941 | |||
4942 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
4943 | if (!dev->_rx) { | ||
4944 | ret = -ENOMEM; | ||
4945 | goto out; | ||
4946 | } | ||
4947 | |||
4948 | dev->_rx->first = dev->_rx; | ||
4949 | atomic_set(&dev->_rx->count, 1); | ||
4950 | dev->num_rx_queues = 1; | ||
4951 | } | ||
4952 | #endif | ||
5077 | /* Init, if this function is available */ | 4953 | /* Init, if this function is available */ |
5078 | if (dev->netdev_ops->ndo_init) { | 4954 | if (dev->netdev_ops->ndo_init) { |
5079 | ret = dev->netdev_ops->ndo_init(dev); | 4955 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5113,8 +4989,6 @@ int register_netdevice(struct net_device *dev) | |||
5113 | if (dev->features & NETIF_F_SG) | 4989 | if (dev->features & NETIF_F_SG) |
5114 | dev->features |= NETIF_F_GSO; | 4990 | dev->features |= NETIF_F_GSO; |
5115 | 4991 | ||
5116 | netdev_initialize_kobject(dev); | ||
5117 | |||
5118 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 4992 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5119 | ret = notifier_to_errno(ret); | 4993 | ret = notifier_to_errno(ret); |
5120 | if (ret) | 4994 | if (ret) |
@@ -5434,6 +5308,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5434 | struct net_device *dev; | 5308 | struct net_device *dev; |
5435 | size_t alloc_size; | 5309 | size_t alloc_size; |
5436 | struct net_device *p; | 5310 | struct net_device *p; |
5311 | #ifdef CONFIG_RPS | ||
5312 | struct netdev_rx_queue *rx; | ||
5313 | int i; | ||
5314 | #endif | ||
5437 | 5315 | ||
5438 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5316 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5439 | 5317 | ||
@@ -5459,13 +5337,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5459 | goto free_p; | 5337 | goto free_p; |
5460 | } | 5338 | } |
5461 | 5339 | ||
5340 | #ifdef CONFIG_RPS | ||
5341 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5342 | if (!rx) { | ||
5343 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5344 | "rx queues.\n"); | ||
5345 | goto free_tx; | ||
5346 | } | ||
5347 | |||
5348 | atomic_set(&rx->count, queue_count); | ||
5349 | |||
5350 | /* | ||
5351 | * Set a pointer to first element in the array which holds the | ||
5352 | * reference count. | ||
5353 | */ | ||
5354 | for (i = 0; i < queue_count; i++) | ||
5355 | rx[i].first = rx; | ||
5356 | #endif | ||
5357 | |||
5462 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5358 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5463 | dev->padded = (char *)dev - (char *)p; | 5359 | dev->padded = (char *)dev - (char *)p; |
5464 | 5360 | ||
5465 | if (dev_addr_init(dev)) | 5361 | if (dev_addr_init(dev)) |
5466 | goto free_tx; | 5362 | goto free_rx; |
5467 | 5363 | ||
5468 | dev_unicast_init(dev); | 5364 | dev_mc_init(dev); |
5365 | dev_uc_init(dev); | ||
5469 | 5366 | ||
5470 | dev_net_set(dev, &init_net); | 5367 | dev_net_set(dev, &init_net); |
5471 | 5368 | ||
@@ -5473,6 +5370,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5473 | dev->num_tx_queues = queue_count; | 5370 | dev->num_tx_queues = queue_count; |
5474 | dev->real_num_tx_queues = queue_count; | 5371 | dev->real_num_tx_queues = queue_count; |
5475 | 5372 | ||
5373 | #ifdef CONFIG_RPS | ||
5374 | dev->_rx = rx; | ||
5375 | dev->num_rx_queues = queue_count; | ||
5376 | #endif | ||
5377 | |||
5476 | dev->gso_max_size = GSO_MAX_SIZE; | 5378 | dev->gso_max_size = GSO_MAX_SIZE; |
5477 | 5379 | ||
5478 | netdev_init_queues(dev); | 5380 | netdev_init_queues(dev); |
@@ -5487,9 +5389,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5487 | strcpy(dev->name, name); | 5389 | strcpy(dev->name, name); |
5488 | return dev; | 5390 | return dev; |
5489 | 5391 | ||
5392 | free_rx: | ||
5393 | #ifdef CONFIG_RPS | ||
5394 | kfree(rx); | ||
5490 | free_tx: | 5395 | free_tx: |
5396 | #endif | ||
5491 | kfree(tx); | 5397 | kfree(tx); |
5492 | |||
5493 | free_p: | 5398 | free_p: |
5494 | kfree(p); | 5399 | kfree(p); |
5495 | return NULL; | 5400 | return NULL; |
@@ -5635,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5635 | if (dev->features & NETIF_F_NETNS_LOCAL) | 5540 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5636 | goto out; | 5541 | goto out; |
5637 | 5542 | ||
5638 | #ifdef CONFIG_SYSFS | ||
5639 | /* Don't allow real devices to be moved when sysfs | ||
5640 | * is enabled. | ||
5641 | */ | ||
5642 | err = -EINVAL; | ||
5643 | if (dev->dev.parent) | ||
5644 | goto out; | ||
5645 | #endif | ||
5646 | |||
5647 | /* Ensure the device has been registrered */ | 5543 | /* Ensure the device has been registrered */ |
5648 | err = -EINVAL; | 5544 | err = -EINVAL; |
5649 | if (dev->reg_state != NETREG_REGISTERED) | 5545 | if (dev->reg_state != NETREG_REGISTERED) |
@@ -5691,10 +5587,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5691 | /* | 5587 | /* |
5692 | * Flush the unicast and multicast chains | 5588 | * Flush the unicast and multicast chains |
5693 | */ | 5589 | */ |
5694 | dev_unicast_flush(dev); | 5590 | dev_uc_flush(dev); |
5695 | dev_addr_discard(dev); | 5591 | dev_mc_flush(dev); |
5696 | |||
5697 | netdev_unregister_kobject(dev); | ||
5698 | 5592 | ||
5699 | /* Actually switch the network namespace */ | 5593 | /* Actually switch the network namespace */ |
5700 | dev_net_set(dev, net); | 5594 | dev_net_set(dev, net); |
@@ -5708,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5708 | } | 5602 | } |
5709 | 5603 | ||
5710 | /* Fixup kobjects */ | 5604 | /* Fixup kobjects */ |
5711 | err = netdev_register_kobject(dev); | 5605 | err = device_rename(&dev->dev, dev->name); |
5712 | WARN_ON(err); | 5606 | WARN_ON(err); |
5713 | 5607 | ||
5714 | /* Add the device back in the hashes */ | 5608 | /* Add the device back in the hashes */ |
@@ -5735,7 +5629,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5735 | void *ocpu) | 5629 | void *ocpu) |
5736 | { | 5630 | { |
5737 | struct sk_buff **list_skb; | 5631 | struct sk_buff **list_skb; |
5738 | struct Qdisc **list_net; | ||
5739 | struct sk_buff *skb; | 5632 | struct sk_buff *skb; |
5740 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | 5633 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
5741 | struct softnet_data *sd, *oldsd; | 5634 | struct softnet_data *sd, *oldsd; |
@@ -5756,19 +5649,23 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5756 | *list_skb = oldsd->completion_queue; | 5649 | *list_skb = oldsd->completion_queue; |
5757 | oldsd->completion_queue = NULL; | 5650 | oldsd->completion_queue = NULL; |
5758 | 5651 | ||
5759 | /* Find end of our output_queue. */ | ||
5760 | list_net = &sd->output_queue; | ||
5761 | while (*list_net) | ||
5762 | list_net = &(*list_net)->next_sched; | ||
5763 | /* Append output queue from offline CPU. */ | 5652 | /* Append output queue from offline CPU. */ |
5764 | *list_net = oldsd->output_queue; | 5653 | if (oldsd->output_queue) { |
5765 | oldsd->output_queue = NULL; | 5654 | *sd->output_queue_tailp = oldsd->output_queue; |
5655 | sd->output_queue_tailp = oldsd->output_queue_tailp; | ||
5656 | oldsd->output_queue = NULL; | ||
5657 | oldsd->output_queue_tailp = &oldsd->output_queue; | ||
5658 | } | ||
5766 | 5659 | ||
5767 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 5660 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
5768 | local_irq_enable(); | 5661 | local_irq_enable(); |
5769 | 5662 | ||
5770 | /* Process offline CPU's input_pkt_queue */ | 5663 | /* Process offline CPU's input_pkt_queue */ |
5771 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | 5664 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
5665 | netif_rx(skb); | ||
5666 | input_queue_head_add(oldsd, 1); | ||
5667 | } | ||
5668 | while ((skb = __skb_dequeue(&oldsd->process_queue))) | ||
5772 | netif_rx(skb); | 5669 | netif_rx(skb); |
5773 | 5670 | ||
5774 | return NOTIFY_OK; | 5671 | return NOTIFY_OK; |
@@ -5985,17 +5882,26 @@ static int __init net_dev_init(void) | |||
5985 | */ | 5882 | */ |
5986 | 5883 | ||
5987 | for_each_possible_cpu(i) { | 5884 | for_each_possible_cpu(i) { |
5988 | struct softnet_data *queue; | 5885 | struct softnet_data *sd = &per_cpu(softnet_data, i); |
5989 | 5886 | ||
5990 | queue = &per_cpu(softnet_data, i); | 5887 | memset(sd, 0, sizeof(*sd)); |
5991 | skb_queue_head_init(&queue->input_pkt_queue); | 5888 | skb_queue_head_init(&sd->input_pkt_queue); |
5992 | queue->completion_queue = NULL; | 5889 | skb_queue_head_init(&sd->process_queue); |
5993 | INIT_LIST_HEAD(&queue->poll_list); | 5890 | sd->completion_queue = NULL; |
5891 | INIT_LIST_HEAD(&sd->poll_list); | ||
5892 | sd->output_queue = NULL; | ||
5893 | sd->output_queue_tailp = &sd->output_queue; | ||
5894 | #ifdef CONFIG_RPS | ||
5895 | sd->csd.func = rps_trigger_softirq; | ||
5896 | sd->csd.info = sd; | ||
5897 | sd->csd.flags = 0; | ||
5898 | sd->cpu = i; | ||
5899 | #endif | ||
5994 | 5900 | ||
5995 | queue->backlog.poll = process_backlog; | 5901 | sd->backlog.poll = process_backlog; |
5996 | queue->backlog.weight = weight_p; | 5902 | sd->backlog.weight = weight_p; |
5997 | queue->backlog.gro_list = NULL; | 5903 | sd->backlog.gro_list = NULL; |
5998 | queue->backlog.gro_count = 0; | 5904 | sd->backlog.gro_count = 0; |
5999 | } | 5905 | } |
6000 | 5906 | ||
6001 | dev_boot_phase = 0; | 5907 | dev_boot_phase = 0; |
@@ -6030,7 +5936,7 @@ subsys_initcall(net_dev_init); | |||
6030 | 5936 | ||
6031 | static int __init initialize_hashrnd(void) | 5937 | static int __init initialize_hashrnd(void) |
6032 | { | 5938 | { |
6033 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | 5939 | get_random_bytes(&hashrnd, sizeof(hashrnd)); |
6034 | return 0; | 5940 | return 0; |
6035 | } | 5941 | } |
6036 | 5942 | ||