diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 2 | ||||
-rw-r--r-- | net/core/datagram.c | 21 | ||||
-rw-r--r-- | net/core/dev.c | 1402 | ||||
-rw-r--r-- | net/core/dev_addr_lists.c | 741 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 232 | ||||
-rw-r--r-- | net/core/dst.c | 45 | ||||
-rw-r--r-- | net/core/ethtool.c | 152 | ||||
-rw-r--r-- | net/core/fib_rules.c | 31 | ||||
-rw-r--r-- | net/core/filter.c | 7 | ||||
-rw-r--r-- | net/core/flow.c | 405 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 377 | ||||
-rw-r--r-- | net/core/net-sysfs.h | 1 | ||||
-rw-r--r-- | net/core/net_namespace.c | 95 | ||||
-rw-r--r-- | net/core/netpoll.c | 26 | ||||
-rw-r--r-- | net/core/pktgen.c | 58 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 369 | ||||
-rw-r--r-- | net/core/skbuff.c | 33 | ||||
-rw-r--r-- | net/core/sock.c | 78 | ||||
-rw-r--r-- | net/core/stream.c | 22 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 75 |
20 files changed, 2731 insertions, 1441 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 08791ac3e05a..51c3eec850ef 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ | |||
7 | 7 | ||
8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
9 | 9 | ||
10 | obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ | 10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ |
11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o | 11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o |
12 | 12 | ||
13 | obj-$(CONFIG_XFRM) += flow.o | 13 | obj-$(CONFIG_XFRM) += flow.o |
diff --git a/net/core/datagram.c b/net/core/datagram.c index 2dccd4ee591b..e0097531417a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | |||
86 | int error; | 86 | int error; |
87 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); | 87 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); |
88 | 88 | ||
89 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 89 | prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
90 | 90 | ||
91 | /* Socket errors? */ | 91 | /* Socket errors? */ |
92 | error = sock_error(sk); | 92 | error = sock_error(sk); |
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | |||
115 | error = 0; | 115 | error = 0; |
116 | *timeo_p = schedule_timeout(*timeo_p); | 116 | *timeo_p = schedule_timeout(*timeo_p); |
117 | out: | 117 | out: |
118 | finish_wait(sk->sk_sleep, &wait); | 118 | finish_wait(sk_sleep(sk), &wait); |
119 | return error; | 119 | return error; |
120 | interrupted: | 120 | interrupted: |
121 | error = sock_intr_errno(*timeo_p); | 121 | error = sock_intr_errno(*timeo_p); |
@@ -229,9 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram); | |||
229 | 229 | ||
230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) | 230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) |
231 | { | 231 | { |
232 | lock_sock(sk); | 232 | if (likely(atomic_read(&skb->users) == 1)) |
233 | skb_free_datagram(sk, skb); | 233 | smp_rmb(); |
234 | release_sock(sk); | 234 | else if (likely(!atomic_dec_and_test(&skb->users))) |
235 | return; | ||
236 | |||
237 | lock_sock_bh(sk); | ||
238 | skb_orphan(skb); | ||
239 | sk_mem_reclaim_partial(sk); | ||
240 | unlock_sock_bh(sk); | ||
241 | |||
242 | /* skb is now orphaned, can be freed outside of locked section */ | ||
243 | __kfree_skb(skb); | ||
235 | } | 244 | } |
236 | EXPORT_SYMBOL(skb_free_datagram_locked); | 245 | EXPORT_SYMBOL(skb_free_datagram_locked); |
237 | 246 | ||
@@ -726,7 +735,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, | |||
726 | struct sock *sk = sock->sk; | 735 | struct sock *sk = sock->sk; |
727 | unsigned int mask; | 736 | unsigned int mask; |
728 | 737 | ||
729 | sock_poll_wait(file, sk->sk_sleep, wait); | 738 | sock_poll_wait(file, sk_sleep(sk), wait); |
730 | mask = 0; | 739 | mask = 0; |
731 | 740 | ||
732 | /* exceptional events? */ | 741 | /* exceptional events? */ |
diff --git a/net/core/dev.c b/net/core/dev.c index f769098774b7..d273e4e3ecdc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -130,6 +130,7 @@ | |||
130 | #include <linux/jhash.h> | 130 | #include <linux/jhash.h> |
131 | #include <linux/random.h> | 131 | #include <linux/random.h> |
132 | #include <trace/events/napi.h> | 132 | #include <trace/events/napi.h> |
133 | #include <linux/pci.h> | ||
133 | 134 | ||
134 | #include "net-sysfs.h" | 135 | #include "net-sysfs.h" |
135 | 136 | ||
@@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |||
207 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; | 208 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
208 | } | 209 | } |
209 | 210 | ||
211 | static inline void rps_lock(struct softnet_data *sd) | ||
212 | { | ||
213 | #ifdef CONFIG_RPS | ||
214 | spin_lock(&sd->input_pkt_queue.lock); | ||
215 | #endif | ||
216 | } | ||
217 | |||
218 | static inline void rps_unlock(struct softnet_data *sd) | ||
219 | { | ||
220 | #ifdef CONFIG_RPS | ||
221 | spin_unlock(&sd->input_pkt_queue.lock); | ||
222 | #endif | ||
223 | } | ||
224 | |||
210 | /* Device list insertion */ | 225 | /* Device list insertion */ |
211 | static int list_netdevice(struct net_device *dev) | 226 | static int list_netdevice(struct net_device *dev) |
212 | { | 227 | { |
@@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain); | |||
249 | * queue in the local softnet handler. | 264 | * queue in the local softnet handler. |
250 | */ | 265 | */ |
251 | 266 | ||
252 | DEFINE_PER_CPU(struct softnet_data, softnet_data); | 267 | DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
253 | EXPORT_PER_CPU_SYMBOL(softnet_data); | 268 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
254 | 269 | ||
255 | #ifdef CONFIG_LOCKDEP | 270 | #ifdef CONFIG_LOCKDEP |
@@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); | |||
773 | 788 | ||
774 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) | 789 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) |
775 | { | 790 | { |
776 | struct net_device *dev; | 791 | struct net_device *dev, *ret = NULL; |
777 | 792 | ||
778 | rtnl_lock(); | 793 | rcu_read_lock(); |
779 | dev = __dev_getfirstbyhwtype(net, type); | 794 | for_each_netdev_rcu(net, dev) |
780 | if (dev) | 795 | if (dev->type == type) { |
781 | dev_hold(dev); | 796 | dev_hold(dev); |
782 | rtnl_unlock(); | 797 | ret = dev; |
783 | return dev; | 798 | break; |
799 | } | ||
800 | rcu_read_unlock(); | ||
801 | return ret; | ||
784 | } | 802 | } |
785 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | 803 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
786 | 804 | ||
@@ -984,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
984 | return err; | 1002 | return err; |
985 | 1003 | ||
986 | rollback: | 1004 | rollback: |
987 | /* For now only devices in the initial network namespace | 1005 | ret = device_rename(&dev->dev, dev->name); |
988 | * are in sysfs. | 1006 | if (ret) { |
989 | */ | 1007 | memcpy(dev->name, oldname, IFNAMSIZ); |
990 | if (net_eq(net, &init_net)) { | 1008 | return ret; |
991 | ret = device_rename(&dev->dev, dev->name); | ||
992 | if (ret) { | ||
993 | memcpy(dev->name, oldname, IFNAMSIZ); | ||
994 | return ret; | ||
995 | } | ||
996 | } | 1009 | } |
997 | 1010 | ||
998 | write_lock_bh(&dev_base_lock); | 1011 | write_lock_bh(&dev_base_lock); |
@@ -1085,9 +1098,9 @@ void netdev_state_change(struct net_device *dev) | |||
1085 | } | 1098 | } |
1086 | EXPORT_SYMBOL(netdev_state_change); | 1099 | EXPORT_SYMBOL(netdev_state_change); |
1087 | 1100 | ||
1088 | void netdev_bonding_change(struct net_device *dev, unsigned long event) | 1101 | int netdev_bonding_change(struct net_device *dev, unsigned long event) |
1089 | { | 1102 | { |
1090 | call_netdevice_notifiers(event, dev); | 1103 | return call_netdevice_notifiers(event, dev); |
1091 | } | 1104 | } |
1092 | EXPORT_SYMBOL(netdev_bonding_change); | 1105 | EXPORT_SYMBOL(netdev_bonding_change); |
1093 | 1106 | ||
@@ -1417,6 +1430,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); | |||
1417 | 1430 | ||
1418 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | 1431 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
1419 | { | 1432 | { |
1433 | ASSERT_RTNL(); | ||
1420 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1434 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
1421 | } | 1435 | } |
1422 | 1436 | ||
@@ -1435,7 +1449,7 @@ void net_disable_timestamp(void) | |||
1435 | } | 1449 | } |
1436 | EXPORT_SYMBOL(net_disable_timestamp); | 1450 | EXPORT_SYMBOL(net_disable_timestamp); |
1437 | 1451 | ||
1438 | static inline void net_timestamp(struct sk_buff *skb) | 1452 | static inline void net_timestamp_set(struct sk_buff *skb) |
1439 | { | 1453 | { |
1440 | if (atomic_read(&netstamp_needed)) | 1454 | if (atomic_read(&netstamp_needed)) |
1441 | __net_timestamp(skb); | 1455 | __net_timestamp(skb); |
@@ -1443,6 +1457,12 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1443 | skb->tstamp.tv64 = 0; | 1457 | skb->tstamp.tv64 = 0; |
1444 | } | 1458 | } |
1445 | 1459 | ||
1460 | static inline void net_timestamp_check(struct sk_buff *skb) | ||
1461 | { | ||
1462 | if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) | ||
1463 | __net_timestamp(skb); | ||
1464 | } | ||
1465 | |||
1446 | /** | 1466 | /** |
1447 | * dev_forward_skb - loopback an skb to another netif | 1467 | * dev_forward_skb - loopback an skb to another netif |
1448 | * | 1468 | * |
@@ -1451,7 +1471,7 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
1451 | * | 1471 | * |
1452 | * return values: | 1472 | * return values: |
1453 | * NET_RX_SUCCESS (no congestion) | 1473 | * NET_RX_SUCCESS (no congestion) |
1454 | * NET_RX_DROP (packet was dropped) | 1474 | * NET_RX_DROP (packet was dropped, but freed) |
1455 | * | 1475 | * |
1456 | * dev_forward_skb can be used for injecting an skb from the | 1476 | * dev_forward_skb can be used for injecting an skb from the |
1457 | * start_xmit function of one device into the receive queue | 1477 | * start_xmit function of one device into the receive queue |
@@ -1465,12 +1485,11 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) | |||
1465 | { | 1485 | { |
1466 | skb_orphan(skb); | 1486 | skb_orphan(skb); |
1467 | 1487 | ||
1468 | if (!(dev->flags & IFF_UP)) | 1488 | if (!(dev->flags & IFF_UP) || |
1469 | return NET_RX_DROP; | 1489 | (skb->len > (dev->mtu + dev->hard_header_len))) { |
1470 | 1490 | kfree_skb(skb); | |
1471 | if (skb->len > (dev->mtu + dev->hard_header_len)) | ||
1472 | return NET_RX_DROP; | 1491 | return NET_RX_DROP; |
1473 | 1492 | } | |
1474 | skb_set_dev(skb, dev); | 1493 | skb_set_dev(skb, dev); |
1475 | skb->tstamp.tv64 = 0; | 1494 | skb->tstamp.tv64 = 0; |
1476 | skb->pkt_type = PACKET_HOST; | 1495 | skb->pkt_type = PACKET_HOST; |
@@ -1490,9 +1509,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
1490 | 1509 | ||
1491 | #ifdef CONFIG_NET_CLS_ACT | 1510 | #ifdef CONFIG_NET_CLS_ACT |
1492 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | 1511 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) |
1493 | net_timestamp(skb); | 1512 | net_timestamp_set(skb); |
1494 | #else | 1513 | #else |
1495 | net_timestamp(skb); | 1514 | net_timestamp_set(skb); |
1496 | #endif | 1515 | #endif |
1497 | 1516 | ||
1498 | rcu_read_lock(); | 1517 | rcu_read_lock(); |
@@ -1538,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q) | |||
1538 | 1557 | ||
1539 | local_irq_save(flags); | 1558 | local_irq_save(flags); |
1540 | sd = &__get_cpu_var(softnet_data); | 1559 | sd = &__get_cpu_var(softnet_data); |
1541 | q->next_sched = sd->output_queue; | 1560 | q->next_sched = NULL; |
1542 | sd->output_queue = q; | 1561 | *sd->output_queue_tailp = q; |
1562 | sd->output_queue_tailp = &q->next_sched; | ||
1543 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 1563 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
1544 | local_irq_restore(flags); | 1564 | local_irq_restore(flags); |
1545 | } | 1565 | } |
@@ -1784,18 +1804,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); | |||
1784 | * 2. No high memory really exists on this machine. | 1804 | * 2. No high memory really exists on this machine. |
1785 | */ | 1805 | */ |
1786 | 1806 | ||
1787 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | 1807 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
1788 | { | 1808 | { |
1789 | #ifdef CONFIG_HIGHMEM | 1809 | #ifdef CONFIG_HIGHMEM |
1790 | int i; | 1810 | int i; |
1811 | if (!(dev->features & NETIF_F_HIGHDMA)) { | ||
1812 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1813 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
1814 | return 1; | ||
1815 | } | ||
1791 | 1816 | ||
1792 | if (dev->features & NETIF_F_HIGHDMA) | 1817 | if (PCI_DMA_BUS_IS_PHYS) { |
1793 | return 0; | 1818 | struct device *pdev = dev->dev.parent; |
1794 | |||
1795 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
1796 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
1797 | return 1; | ||
1798 | 1819 | ||
1820 | if (!pdev) | ||
1821 | return 0; | ||
1822 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
1823 | dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); | ||
1824 | if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) | ||
1825 | return 1; | ||
1826 | } | ||
1827 | } | ||
1799 | #endif | 1828 | #endif |
1800 | return 0; | 1829 | return 0; |
1801 | } | 1830 | } |
@@ -1853,6 +1882,17 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1853 | return 0; | 1882 | return 0; |
1854 | } | 1883 | } |
1855 | 1884 | ||
1885 | /* | ||
1886 | * Try to orphan skb early, right before transmission by the device. | ||
1887 | * We cannot orphan skb if tx timestamp is requested, since | ||
1888 | * drivers need to call skb_tstamp_tx() to send the timestamp. | ||
1889 | */ | ||
1890 | static inline void skb_orphan_try(struct sk_buff *skb) | ||
1891 | { | ||
1892 | if (!skb_tx(skb)->flags) | ||
1893 | skb_orphan(skb); | ||
1894 | } | ||
1895 | |||
1856 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1896 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
1857 | struct netdev_queue *txq) | 1897 | struct netdev_queue *txq) |
1858 | { | 1898 | { |
@@ -1863,13 +1903,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1863 | if (!list_empty(&ptype_all)) | 1903 | if (!list_empty(&ptype_all)) |
1864 | dev_queue_xmit_nit(skb, dev); | 1904 | dev_queue_xmit_nit(skb, dev); |
1865 | 1905 | ||
1866 | if (netif_needs_gso(dev, skb)) { | ||
1867 | if (unlikely(dev_gso_segment(skb))) | ||
1868 | goto out_kfree_skb; | ||
1869 | if (skb->next) | ||
1870 | goto gso; | ||
1871 | } | ||
1872 | |||
1873 | /* | 1906 | /* |
1874 | * If device doesnt need skb->dst, release it right now while | 1907 | * If device doesnt need skb->dst, release it right now while |
1875 | * its hot in this cpu cache | 1908 | * its hot in this cpu cache |
@@ -1877,23 +1910,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1877 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 1910 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
1878 | skb_dst_drop(skb); | 1911 | skb_dst_drop(skb); |
1879 | 1912 | ||
1913 | skb_orphan_try(skb); | ||
1914 | |||
1915 | if (netif_needs_gso(dev, skb)) { | ||
1916 | if (unlikely(dev_gso_segment(skb))) | ||
1917 | goto out_kfree_skb; | ||
1918 | if (skb->next) | ||
1919 | goto gso; | ||
1920 | } | ||
1921 | |||
1880 | rc = ops->ndo_start_xmit(skb, dev); | 1922 | rc = ops->ndo_start_xmit(skb, dev); |
1881 | if (rc == NETDEV_TX_OK) | 1923 | if (rc == NETDEV_TX_OK) |
1882 | txq_trans_update(txq); | 1924 | txq_trans_update(txq); |
1883 | /* | ||
1884 | * TODO: if skb_orphan() was called by | ||
1885 | * dev->hard_start_xmit() (for example, the unmodified | ||
1886 | * igb driver does that; bnx2 doesn't), then | ||
1887 | * skb_tx_software_timestamp() will be unable to send | ||
1888 | * back the time stamp. | ||
1889 | * | ||
1890 | * How can this be prevented? Always create another | ||
1891 | * reference to the socket before calling | ||
1892 | * dev->hard_start_xmit()? Prevent that skb_orphan() | ||
1893 | * does anything in dev->hard_start_xmit() by clearing | ||
1894 | * the skb destructor before the call and restoring it | ||
1895 | * afterwards, then doing the skb_orphan() ourselves? | ||
1896 | */ | ||
1897 | return rc; | 1925 | return rc; |
1898 | } | 1926 | } |
1899 | 1927 | ||
@@ -1932,7 +1960,7 @@ out_kfree_skb: | |||
1932 | return rc; | 1960 | return rc; |
1933 | } | 1961 | } |
1934 | 1962 | ||
1935 | static u32 skb_tx_hashrnd; | 1963 | static u32 hashrnd __read_mostly; |
1936 | 1964 | ||
1937 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 1965 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1938 | { | 1966 | { |
@@ -1948,9 +1976,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
1948 | if (skb->sk && skb->sk->sk_hash) | 1976 | if (skb->sk && skb->sk->sk_hash) |
1949 | hash = skb->sk->sk_hash; | 1977 | hash = skb->sk->sk_hash; |
1950 | else | 1978 | else |
1951 | hash = skb->protocol; | 1979 | hash = (__force u16) skb->protocol; |
1952 | 1980 | ||
1953 | hash = jhash_1word(hash, skb_tx_hashrnd); | 1981 | hash = jhash_1word(hash, hashrnd); |
1954 | 1982 | ||
1955 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1983 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1956 | } | 1984 | } |
@@ -1960,10 +1988,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
1960 | { | 1988 | { |
1961 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | 1989 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { |
1962 | if (net_ratelimit()) { | 1990 | if (net_ratelimit()) { |
1963 | WARN(1, "%s selects TX queue %d, but " | 1991 | pr_warning("%s selects TX queue %d, but " |
1964 | "real number of TX queues is %d\n", | 1992 | "real number of TX queues is %d\n", |
1965 | dev->name, queue_index, | 1993 | dev->name, queue_index, dev->real_num_tx_queues); |
1966 | dev->real_num_tx_queues); | ||
1967 | } | 1994 | } |
1968 | return 0; | 1995 | return 0; |
1969 | } | 1996 | } |
@@ -1990,7 +2017,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, | |||
1990 | queue_index = skb_tx_hash(dev, skb); | 2017 | queue_index = skb_tx_hash(dev, skb); |
1991 | 2018 | ||
1992 | if (sk) { | 2019 | if (sk) { |
1993 | struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); | 2020 | struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); |
1994 | 2021 | ||
1995 | if (dst && skb_dst(skb) == dst) | 2022 | if (dst && skb_dst(skb) == dst) |
1996 | sk_tx_queue_set(sk, queue_index); | 2023 | sk_tx_queue_set(sk, queue_index); |
@@ -2020,6 +2047,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2020 | * waiting to be sent out; and the qdisc is not running - | 2047 | * waiting to be sent out; and the qdisc is not running - |
2021 | * xmit the skb directly. | 2048 | * xmit the skb directly. |
2022 | */ | 2049 | */ |
2050 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | ||
2051 | skb_dst_force(skb); | ||
2023 | __qdisc_update_bstats(q, skb->len); | 2052 | __qdisc_update_bstats(q, skb->len); |
2024 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | 2053 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) |
2025 | __qdisc_run(q); | 2054 | __qdisc_run(q); |
@@ -2028,6 +2057,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
2028 | 2057 | ||
2029 | rc = NET_XMIT_SUCCESS; | 2058 | rc = NET_XMIT_SUCCESS; |
2030 | } else { | 2059 | } else { |
2060 | skb_dst_force(skb); | ||
2031 | rc = qdisc_enqueue_root(skb, q); | 2061 | rc = qdisc_enqueue_root(skb, q); |
2032 | qdisc_run(q); | 2062 | qdisc_run(q); |
2033 | } | 2063 | } |
@@ -2175,11 +2205,249 @@ EXPORT_SYMBOL(dev_queue_xmit); | |||
2175 | =======================================================================*/ | 2205 | =======================================================================*/ |
2176 | 2206 | ||
2177 | int netdev_max_backlog __read_mostly = 1000; | 2207 | int netdev_max_backlog __read_mostly = 1000; |
2208 | int netdev_tstamp_prequeue __read_mostly = 1; | ||
2178 | int netdev_budget __read_mostly = 300; | 2209 | int netdev_budget __read_mostly = 300; |
2179 | int weight_p __read_mostly = 64; /* old backlog weight */ | 2210 | int weight_p __read_mostly = 64; /* old backlog weight */ |
2180 | 2211 | ||
2181 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 2212 | /* Called with irq disabled */ |
2213 | static inline void ____napi_schedule(struct softnet_data *sd, | ||
2214 | struct napi_struct *napi) | ||
2215 | { | ||
2216 | list_add_tail(&napi->poll_list, &sd->poll_list); | ||
2217 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2218 | } | ||
2182 | 2219 | ||
2220 | #ifdef CONFIG_RPS | ||
2221 | |||
2222 | /* One global table that all flow-based protocols share. */ | ||
2223 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
2224 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
2225 | |||
2226 | /* | ||
2227 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
2228 | * CPU from the RPS map of the receiving queue for a given skb. | ||
2229 | * rcu_read_lock must be held on entry. | ||
2230 | */ | ||
2231 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
2232 | struct rps_dev_flow **rflowp) | ||
2233 | { | ||
2234 | struct ipv6hdr *ip6; | ||
2235 | struct iphdr *ip; | ||
2236 | struct netdev_rx_queue *rxqueue; | ||
2237 | struct rps_map *map; | ||
2238 | struct rps_dev_flow_table *flow_table; | ||
2239 | struct rps_sock_flow_table *sock_flow_table; | ||
2240 | int cpu = -1; | ||
2241 | u8 ip_proto; | ||
2242 | u16 tcpu; | ||
2243 | u32 addr1, addr2, ihl; | ||
2244 | union { | ||
2245 | u32 v32; | ||
2246 | u16 v16[2]; | ||
2247 | } ports; | ||
2248 | |||
2249 | if (skb_rx_queue_recorded(skb)) { | ||
2250 | u16 index = skb_get_rx_queue(skb); | ||
2251 | if (unlikely(index >= dev->num_rx_queues)) { | ||
2252 | if (net_ratelimit()) { | ||
2253 | pr_warning("%s received packet on queue " | ||
2254 | "%u, but number of RX queues is %u\n", | ||
2255 | dev->name, index, dev->num_rx_queues); | ||
2256 | } | ||
2257 | goto done; | ||
2258 | } | ||
2259 | rxqueue = dev->_rx + index; | ||
2260 | } else | ||
2261 | rxqueue = dev->_rx; | ||
2262 | |||
2263 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
2264 | goto done; | ||
2265 | |||
2266 | if (skb->rxhash) | ||
2267 | goto got_hash; /* Skip hash computation on packet header */ | ||
2268 | |||
2269 | switch (skb->protocol) { | ||
2270 | case __constant_htons(ETH_P_IP): | ||
2271 | if (!pskb_may_pull(skb, sizeof(*ip))) | ||
2272 | goto done; | ||
2273 | |||
2274 | ip = (struct iphdr *) skb->data; | ||
2275 | ip_proto = ip->protocol; | ||
2276 | addr1 = (__force u32) ip->saddr; | ||
2277 | addr2 = (__force u32) ip->daddr; | ||
2278 | ihl = ip->ihl; | ||
2279 | break; | ||
2280 | case __constant_htons(ETH_P_IPV6): | ||
2281 | if (!pskb_may_pull(skb, sizeof(*ip6))) | ||
2282 | goto done; | ||
2283 | |||
2284 | ip6 = (struct ipv6hdr *) skb->data; | ||
2285 | ip_proto = ip6->nexthdr; | ||
2286 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | ||
2287 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | ||
2288 | ihl = (40 >> 2); | ||
2289 | break; | ||
2290 | default: | ||
2291 | goto done; | ||
2292 | } | ||
2293 | switch (ip_proto) { | ||
2294 | case IPPROTO_TCP: | ||
2295 | case IPPROTO_UDP: | ||
2296 | case IPPROTO_DCCP: | ||
2297 | case IPPROTO_ESP: | ||
2298 | case IPPROTO_AH: | ||
2299 | case IPPROTO_SCTP: | ||
2300 | case IPPROTO_UDPLITE: | ||
2301 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
2302 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
2303 | if (ports.v16[1] < ports.v16[0]) | ||
2304 | swap(ports.v16[0], ports.v16[1]); | ||
2305 | break; | ||
2306 | } | ||
2307 | default: | ||
2308 | ports.v32 = 0; | ||
2309 | break; | ||
2310 | } | ||
2311 | |||
2312 | /* get a consistent hash (same value on both flow directions) */ | ||
2313 | if (addr2 < addr1) | ||
2314 | swap(addr1, addr2); | ||
2315 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
2316 | if (!skb->rxhash) | ||
2317 | skb->rxhash = 1; | ||
2318 | |||
2319 | got_hash: | ||
2320 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
2321 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
2322 | if (flow_table && sock_flow_table) { | ||
2323 | u16 next_cpu; | ||
2324 | struct rps_dev_flow *rflow; | ||
2325 | |||
2326 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; | ||
2327 | tcpu = rflow->cpu; | ||
2328 | |||
2329 | next_cpu = sock_flow_table->ents[skb->rxhash & | ||
2330 | sock_flow_table->mask]; | ||
2331 | |||
2332 | /* | ||
2333 | * If the desired CPU (where last recvmsg was done) is | ||
2334 | * different from current CPU (one in the rx-queue flow | ||
2335 | * table entry), switch if one of the following holds: | ||
2336 | * - Current CPU is unset (equal to RPS_NO_CPU). | ||
2337 | * - Current CPU is offline. | ||
2338 | * - The current CPU's queue tail has advanced beyond the | ||
2339 | * last packet that was enqueued using this table entry. | ||
2340 | * This guarantees that all previous packets for the flow | ||
2341 | * have been dequeued, thus preserving in order delivery. | ||
2342 | */ | ||
2343 | if (unlikely(tcpu != next_cpu) && | ||
2344 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | ||
2345 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | ||
2346 | rflow->last_qtail)) >= 0)) { | ||
2347 | tcpu = rflow->cpu = next_cpu; | ||
2348 | if (tcpu != RPS_NO_CPU) | ||
2349 | rflow->last_qtail = per_cpu(softnet_data, | ||
2350 | tcpu).input_queue_head; | ||
2351 | } | ||
2352 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | ||
2353 | *rflowp = rflow; | ||
2354 | cpu = tcpu; | ||
2355 | goto done; | ||
2356 | } | ||
2357 | } | ||
2358 | |||
2359 | map = rcu_dereference(rxqueue->rps_map); | ||
2360 | if (map) { | ||
2361 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | ||
2362 | |||
2363 | if (cpu_online(tcpu)) { | ||
2364 | cpu = tcpu; | ||
2365 | goto done; | ||
2366 | } | ||
2367 | } | ||
2368 | |||
2369 | done: | ||
2370 | return cpu; | ||
2371 | } | ||
2372 | |||
2373 | /* Called from hardirq (IPI) context */ | ||
2374 | static void rps_trigger_softirq(void *data) | ||
2375 | { | ||
2376 | struct softnet_data *sd = data; | ||
2377 | |||
2378 | ____napi_schedule(sd, &sd->backlog); | ||
2379 | sd->received_rps++; | ||
2380 | } | ||
2381 | |||
2382 | #endif /* CONFIG_RPS */ | ||
2383 | |||
2384 | /* | ||
2385 | * Check if this softnet_data structure is another cpu one | ||
2386 | * If yes, queue it to our IPI list and return 1 | ||
2387 | * If no, return 0 | ||
2388 | */ | ||
2389 | static int rps_ipi_queued(struct softnet_data *sd) | ||
2390 | { | ||
2391 | #ifdef CONFIG_RPS | ||
2392 | struct softnet_data *mysd = &__get_cpu_var(softnet_data); | ||
2393 | |||
2394 | if (sd != mysd) { | ||
2395 | sd->rps_ipi_next = mysd->rps_ipi_list; | ||
2396 | mysd->rps_ipi_list = sd; | ||
2397 | |||
2398 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2399 | return 1; | ||
2400 | } | ||
2401 | #endif /* CONFIG_RPS */ | ||
2402 | return 0; | ||
2403 | } | ||
2404 | |||
2405 | /* | ||
2406 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | ||
2407 | * queue (may be a remote CPU queue). | ||
2408 | */ | ||
2409 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | ||
2410 | unsigned int *qtail) | ||
2411 | { | ||
2412 | struct softnet_data *sd; | ||
2413 | unsigned long flags; | ||
2414 | |||
2415 | sd = &per_cpu(softnet_data, cpu); | ||
2416 | |||
2417 | local_irq_save(flags); | ||
2418 | |||
2419 | rps_lock(sd); | ||
2420 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { | ||
2421 | if (skb_queue_len(&sd->input_pkt_queue)) { | ||
2422 | enqueue: | ||
2423 | __skb_queue_tail(&sd->input_pkt_queue, skb); | ||
2424 | #ifdef CONFIG_RPS | ||
2425 | *qtail = sd->input_queue_head + | ||
2426 | skb_queue_len(&sd->input_pkt_queue); | ||
2427 | #endif | ||
2428 | rps_unlock(sd); | ||
2429 | local_irq_restore(flags); | ||
2430 | return NET_RX_SUCCESS; | ||
2431 | } | ||
2432 | |||
2433 | /* Schedule NAPI for backlog device | ||
2434 | * We can use non atomic operation since we own the queue lock | ||
2435 | */ | ||
2436 | if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { | ||
2437 | if (!rps_ipi_queued(sd)) | ||
2438 | ____napi_schedule(sd, &sd->backlog); | ||
2439 | } | ||
2440 | goto enqueue; | ||
2441 | } | ||
2442 | |||
2443 | sd->dropped++; | ||
2444 | rps_unlock(sd); | ||
2445 | |||
2446 | local_irq_restore(flags); | ||
2447 | |||
2448 | kfree_skb(skb); | ||
2449 | return NET_RX_DROP; | ||
2450 | } | ||
2183 | 2451 | ||
2184 | /** | 2452 | /** |
2185 | * netif_rx - post buffer to the network code | 2453 | * netif_rx - post buffer to the network code |
@@ -2198,41 +2466,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | |||
2198 | 2466 | ||
2199 | int netif_rx(struct sk_buff *skb) | 2467 | int netif_rx(struct sk_buff *skb) |
2200 | { | 2468 | { |
2201 | struct softnet_data *queue; | 2469 | int ret; |
2202 | unsigned long flags; | ||
2203 | 2470 | ||
2204 | /* if netpoll wants it, pretend we never saw it */ | 2471 | /* if netpoll wants it, pretend we never saw it */ |
2205 | if (netpoll_rx(skb)) | 2472 | if (netpoll_rx(skb)) |
2206 | return NET_RX_DROP; | 2473 | return NET_RX_DROP; |
2207 | 2474 | ||
2208 | if (!skb->tstamp.tv64) | 2475 | if (netdev_tstamp_prequeue) |
2209 | net_timestamp(skb); | 2476 | net_timestamp_check(skb); |
2210 | 2477 | ||
2211 | /* | 2478 | #ifdef CONFIG_RPS |
2212 | * The code is rearranged so that the path is the most | 2479 | { |
2213 | * short when CPU is congested, but is still operating. | 2480 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
2214 | */ | 2481 | int cpu; |
2215 | local_irq_save(flags); | ||
2216 | queue = &__get_cpu_var(softnet_data); | ||
2217 | 2482 | ||
2218 | __get_cpu_var(netdev_rx_stat).total++; | 2483 | rcu_read_lock(); |
2219 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
2220 | if (queue->input_pkt_queue.qlen) { | ||
2221 | enqueue: | ||
2222 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
2223 | local_irq_restore(flags); | ||
2224 | return NET_RX_SUCCESS; | ||
2225 | } | ||
2226 | 2484 | ||
2227 | napi_schedule(&queue->backlog); | 2485 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
2228 | goto enqueue; | 2486 | if (cpu < 0) |
2229 | } | 2487 | cpu = smp_processor_id(); |
2230 | 2488 | ||
2231 | __get_cpu_var(netdev_rx_stat).dropped++; | 2489 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
2232 | local_irq_restore(flags); | ||
2233 | 2490 | ||
2234 | kfree_skb(skb); | 2491 | rcu_read_unlock(); |
2235 | return NET_RX_DROP; | 2492 | } |
2493 | #else | ||
2494 | { | ||
2495 | unsigned int qtail; | ||
2496 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | ||
2497 | put_cpu(); | ||
2498 | } | ||
2499 | #endif | ||
2500 | return ret; | ||
2236 | } | 2501 | } |
2237 | EXPORT_SYMBOL(netif_rx); | 2502 | EXPORT_SYMBOL(netif_rx); |
2238 | 2503 | ||
@@ -2277,6 +2542,7 @@ static void net_tx_action(struct softirq_action *h) | |||
2277 | local_irq_disable(); | 2542 | local_irq_disable(); |
2278 | head = sd->output_queue; | 2543 | head = sd->output_queue; |
2279 | sd->output_queue = NULL; | 2544 | sd->output_queue = NULL; |
2545 | sd->output_queue_tailp = &sd->output_queue; | ||
2280 | local_irq_enable(); | 2546 | local_irq_enable(); |
2281 | 2547 | ||
2282 | while (head) { | 2548 | while (head) { |
@@ -2353,7 +2619,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, | |||
2353 | #endif | 2619 | #endif |
2354 | 2620 | ||
2355 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) | 2621 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) |
2356 | struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; | 2622 | struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, |
2623 | struct sk_buff *skb) __read_mostly; | ||
2357 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); | 2624 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); |
2358 | 2625 | ||
2359 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | 2626 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, |
@@ -2361,14 +2628,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | |||
2361 | int *ret, | 2628 | int *ret, |
2362 | struct net_device *orig_dev) | 2629 | struct net_device *orig_dev) |
2363 | { | 2630 | { |
2364 | if (skb->dev->macvlan_port == NULL) | 2631 | struct macvlan_port *port; |
2632 | |||
2633 | port = rcu_dereference(skb->dev->macvlan_port); | ||
2634 | if (!port) | ||
2365 | return skb; | 2635 | return skb; |
2366 | 2636 | ||
2367 | if (*pt_prev) { | 2637 | if (*pt_prev) { |
2368 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | 2638 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
2369 | *pt_prev = NULL; | 2639 | *pt_prev = NULL; |
2370 | } | 2640 | } |
2371 | return macvlan_handle_frame_hook(skb); | 2641 | return macvlan_handle_frame_hook(port, skb); |
2372 | } | 2642 | } |
2373 | #else | 2643 | #else |
2374 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) | 2644 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) |
@@ -2469,22 +2739,56 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
2469 | rcu_read_unlock(); | 2739 | rcu_read_unlock(); |
2470 | } | 2740 | } |
2471 | 2741 | ||
2472 | /** | 2742 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, |
2473 | * netif_receive_skb - process receive buffer from network | 2743 | struct net_device *master) |
2474 | * @skb: buffer to process | 2744 | { |
2475 | * | 2745 | if (skb->pkt_type == PACKET_HOST) { |
2476 | * netif_receive_skb() is the main receive data processing function. | 2746 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; |
2477 | * It always succeeds. The buffer may be dropped during processing | 2747 | |
2478 | * for congestion control or by the protocol layers. | 2748 | memcpy(dest, master->dev_addr, ETH_ALEN); |
2479 | * | 2749 | } |
2480 | * This function may only be called from softirq context and interrupts | 2750 | } |
2481 | * should be enabled. | 2751 | |
2482 | * | 2752 | /* On bonding slaves other than the currently active slave, suppress |
2483 | * Return values (usually ignored): | 2753 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and |
2484 | * NET_RX_SUCCESS: no congestion | 2754 | * ARP on active-backup slaves with arp_validate enabled. |
2485 | * NET_RX_DROP: packet was dropped | ||
2486 | */ | 2755 | */ |
2487 | int netif_receive_skb(struct sk_buff *skb) | 2756 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) |
2757 | { | ||
2758 | struct net_device *dev = skb->dev; | ||
2759 | |||
2760 | if (master->priv_flags & IFF_MASTER_ARPMON) | ||
2761 | dev->last_rx = jiffies; | ||
2762 | |||
2763 | if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { | ||
2764 | /* Do address unmangle. The local destination address | ||
2765 | * will be always the one master has. Provides the right | ||
2766 | * functionality in a bridge. | ||
2767 | */ | ||
2768 | skb_bond_set_mac_by_master(skb, master); | ||
2769 | } | ||
2770 | |||
2771 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
2772 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
2773 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
2774 | return 0; | ||
2775 | |||
2776 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
2777 | if (skb->pkt_type != PACKET_BROADCAST && | ||
2778 | skb->pkt_type != PACKET_MULTICAST) | ||
2779 | return 0; | ||
2780 | } | ||
2781 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
2782 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
2783 | return 0; | ||
2784 | |||
2785 | return 1; | ||
2786 | } | ||
2787 | return 0; | ||
2788 | } | ||
2789 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
2790 | |||
2791 | static int __netif_receive_skb(struct sk_buff *skb) | ||
2488 | { | 2792 | { |
2489 | struct packet_type *ptype, *pt_prev; | 2793 | struct packet_type *ptype, *pt_prev; |
2490 | struct net_device *orig_dev; | 2794 | struct net_device *orig_dev; |
@@ -2494,8 +2798,8 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2494 | int ret = NET_RX_DROP; | 2798 | int ret = NET_RX_DROP; |
2495 | __be16 type; | 2799 | __be16 type; |
2496 | 2800 | ||
2497 | if (!skb->tstamp.tv64) | 2801 | if (!netdev_tstamp_prequeue) |
2498 | net_timestamp(skb); | 2802 | net_timestamp_check(skb); |
2499 | 2803 | ||
2500 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 2804 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
2501 | return NET_RX_SUCCESS; | 2805 | return NET_RX_SUCCESS; |
@@ -2517,7 +2821,7 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2517 | skb->dev = master; | 2821 | skb->dev = master; |
2518 | } | 2822 | } |
2519 | 2823 | ||
2520 | __get_cpu_var(netdev_rx_stat).total++; | 2824 | __get_cpu_var(softnet_data).processed++; |
2521 | 2825 | ||
2522 | skb_reset_network_header(skb); | 2826 | skb_reset_network_header(skb); |
2523 | skb_reset_transport_header(skb); | 2827 | skb_reset_transport_header(skb); |
@@ -2595,20 +2899,77 @@ out: | |||
2595 | rcu_read_unlock(); | 2899 | rcu_read_unlock(); |
2596 | return ret; | 2900 | return ret; |
2597 | } | 2901 | } |
2902 | |||
2903 | /** | ||
2904 | * netif_receive_skb - process receive buffer from network | ||
2905 | * @skb: buffer to process | ||
2906 | * | ||
2907 | * netif_receive_skb() is the main receive data processing function. | ||
2908 | * It always succeeds. The buffer may be dropped during processing | ||
2909 | * for congestion control or by the protocol layers. | ||
2910 | * | ||
2911 | * This function may only be called from softirq context and interrupts | ||
2912 | * should be enabled. | ||
2913 | * | ||
2914 | * Return values (usually ignored): | ||
2915 | * NET_RX_SUCCESS: no congestion | ||
2916 | * NET_RX_DROP: packet was dropped | ||
2917 | */ | ||
2918 | int netif_receive_skb(struct sk_buff *skb) | ||
2919 | { | ||
2920 | if (netdev_tstamp_prequeue) | ||
2921 | net_timestamp_check(skb); | ||
2922 | |||
2923 | #ifdef CONFIG_RPS | ||
2924 | { | ||
2925 | struct rps_dev_flow voidflow, *rflow = &voidflow; | ||
2926 | int cpu, ret; | ||
2927 | |||
2928 | rcu_read_lock(); | ||
2929 | |||
2930 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | ||
2931 | |||
2932 | if (cpu >= 0) { | ||
2933 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | ||
2934 | rcu_read_unlock(); | ||
2935 | } else { | ||
2936 | rcu_read_unlock(); | ||
2937 | ret = __netif_receive_skb(skb); | ||
2938 | } | ||
2939 | |||
2940 | return ret; | ||
2941 | } | ||
2942 | #else | ||
2943 | return __netif_receive_skb(skb); | ||
2944 | #endif | ||
2945 | } | ||
2598 | EXPORT_SYMBOL(netif_receive_skb); | 2946 | EXPORT_SYMBOL(netif_receive_skb); |
2599 | 2947 | ||
2600 | /* Network device is going away, flush any packets still pending */ | 2948 | /* Network device is going away, flush any packets still pending |
2949 | * Called with irqs disabled. | ||
2950 | */ | ||
2601 | static void flush_backlog(void *arg) | 2951 | static void flush_backlog(void *arg) |
2602 | { | 2952 | { |
2603 | struct net_device *dev = arg; | 2953 | struct net_device *dev = arg; |
2604 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 2954 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
2605 | struct sk_buff *skb, *tmp; | 2955 | struct sk_buff *skb, *tmp; |
2606 | 2956 | ||
2607 | skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) | 2957 | rps_lock(sd); |
2958 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | ||
2608 | if (skb->dev == dev) { | 2959 | if (skb->dev == dev) { |
2609 | __skb_unlink(skb, &queue->input_pkt_queue); | 2960 | __skb_unlink(skb, &sd->input_pkt_queue); |
2610 | kfree_skb(skb); | 2961 | kfree_skb(skb); |
2962 | input_queue_head_add(sd, 1); | ||
2611 | } | 2963 | } |
2964 | } | ||
2965 | rps_unlock(sd); | ||
2966 | |||
2967 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | ||
2968 | if (skb->dev == dev) { | ||
2969 | __skb_unlink(skb, &sd->process_queue); | ||
2970 | kfree_skb(skb); | ||
2971 | } | ||
2972 | } | ||
2612 | } | 2973 | } |
2613 | 2974 | ||
2614 | static int napi_gro_complete(struct sk_buff *skb) | 2975 | static int napi_gro_complete(struct sk_buff *skb) |
@@ -2911,27 +3272,85 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) | |||
2911 | } | 3272 | } |
2912 | EXPORT_SYMBOL(napi_gro_frags); | 3273 | EXPORT_SYMBOL(napi_gro_frags); |
2913 | 3274 | ||
3275 | /* | ||
3276 | * net_rps_action sends any pending IPI's for rps. | ||
3277 | * Note: called with local irq disabled, but exits with local irq enabled. | ||
3278 | */ | ||
3279 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) | ||
3280 | { | ||
3281 | #ifdef CONFIG_RPS | ||
3282 | struct softnet_data *remsd = sd->rps_ipi_list; | ||
3283 | |||
3284 | if (remsd) { | ||
3285 | sd->rps_ipi_list = NULL; | ||
3286 | |||
3287 | local_irq_enable(); | ||
3288 | |||
3289 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | ||
3290 | while (remsd) { | ||
3291 | struct softnet_data *next = remsd->rps_ipi_next; | ||
3292 | |||
3293 | if (cpu_online(remsd->cpu)) | ||
3294 | __smp_call_function_single(remsd->cpu, | ||
3295 | &remsd->csd, 0); | ||
3296 | remsd = next; | ||
3297 | } | ||
3298 | } else | ||
3299 | #endif | ||
3300 | local_irq_enable(); | ||
3301 | } | ||
3302 | |||
2914 | static int process_backlog(struct napi_struct *napi, int quota) | 3303 | static int process_backlog(struct napi_struct *napi, int quota) |
2915 | { | 3304 | { |
2916 | int work = 0; | 3305 | int work = 0; |
2917 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 3306 | struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); |
2918 | unsigned long start_time = jiffies; | ||
2919 | 3307 | ||
3308 | #ifdef CONFIG_RPS | ||
3309 | /* Check if we have pending ipi, its better to send them now, | ||
3310 | * not waiting net_rx_action() end. | ||
3311 | */ | ||
3312 | if (sd->rps_ipi_list) { | ||
3313 | local_irq_disable(); | ||
3314 | net_rps_action_and_irq_enable(sd); | ||
3315 | } | ||
3316 | #endif | ||
2920 | napi->weight = weight_p; | 3317 | napi->weight = weight_p; |
2921 | do { | 3318 | local_irq_disable(); |
3319 | while (work < quota) { | ||
2922 | struct sk_buff *skb; | 3320 | struct sk_buff *skb; |
3321 | unsigned int qlen; | ||
2923 | 3322 | ||
2924 | local_irq_disable(); | 3323 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
2925 | skb = __skb_dequeue(&queue->input_pkt_queue); | ||
2926 | if (!skb) { | ||
2927 | __napi_complete(napi); | ||
2928 | local_irq_enable(); | 3324 | local_irq_enable(); |
2929 | break; | 3325 | __netif_receive_skb(skb); |
3326 | if (++work >= quota) | ||
3327 | return work; | ||
3328 | local_irq_disable(); | ||
2930 | } | 3329 | } |
2931 | local_irq_enable(); | ||
2932 | 3330 | ||
2933 | netif_receive_skb(skb); | 3331 | rps_lock(sd); |
2934 | } while (++work < quota && jiffies == start_time); | 3332 | qlen = skb_queue_len(&sd->input_pkt_queue); |
3333 | if (qlen) { | ||
3334 | input_queue_head_add(sd, qlen); | ||
3335 | skb_queue_splice_tail_init(&sd->input_pkt_queue, | ||
3336 | &sd->process_queue); | ||
3337 | } | ||
3338 | if (qlen < quota - work) { | ||
3339 | /* | ||
3340 | * Inline a custom version of __napi_complete(). | ||
3341 | * only current cpu owns and manipulates this napi, | ||
3342 | * and NAPI_STATE_SCHED is the only possible flag set on backlog. | ||
3343 | * we can use a plain write instead of clear_bit(), | ||
3344 | * and we dont need an smp_mb() memory barrier. | ||
3345 | */ | ||
3346 | list_del(&napi->poll_list); | ||
3347 | napi->state = 0; | ||
3348 | |||
3349 | quota = work + qlen; | ||
3350 | } | ||
3351 | rps_unlock(sd); | ||
3352 | } | ||
3353 | local_irq_enable(); | ||
2935 | 3354 | ||
2936 | return work; | 3355 | return work; |
2937 | } | 3356 | } |
@@ -2947,8 +3366,7 @@ void __napi_schedule(struct napi_struct *n) | |||
2947 | unsigned long flags; | 3366 | unsigned long flags; |
2948 | 3367 | ||
2949 | local_irq_save(flags); | 3368 | local_irq_save(flags); |
2950 | list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); | 3369 | ____napi_schedule(&__get_cpu_var(softnet_data), n); |
2951 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
2952 | local_irq_restore(flags); | 3370 | local_irq_restore(flags); |
2953 | } | 3371 | } |
2954 | EXPORT_SYMBOL(__napi_schedule); | 3372 | EXPORT_SYMBOL(__napi_schedule); |
@@ -3019,17 +3437,16 @@ void netif_napi_del(struct napi_struct *napi) | |||
3019 | } | 3437 | } |
3020 | EXPORT_SYMBOL(netif_napi_del); | 3438 | EXPORT_SYMBOL(netif_napi_del); |
3021 | 3439 | ||
3022 | |||
3023 | static void net_rx_action(struct softirq_action *h) | 3440 | static void net_rx_action(struct softirq_action *h) |
3024 | { | 3441 | { |
3025 | struct list_head *list = &__get_cpu_var(softnet_data).poll_list; | 3442 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
3026 | unsigned long time_limit = jiffies + 2; | 3443 | unsigned long time_limit = jiffies + 2; |
3027 | int budget = netdev_budget; | 3444 | int budget = netdev_budget; |
3028 | void *have; | 3445 | void *have; |
3029 | 3446 | ||
3030 | local_irq_disable(); | 3447 | local_irq_disable(); |
3031 | 3448 | ||
3032 | while (!list_empty(list)) { | 3449 | while (!list_empty(&sd->poll_list)) { |
3033 | struct napi_struct *n; | 3450 | struct napi_struct *n; |
3034 | int work, weight; | 3451 | int work, weight; |
3035 | 3452 | ||
@@ -3047,7 +3464,7 @@ static void net_rx_action(struct softirq_action *h) | |||
3047 | * entries to the tail of this list, and only ->poll() | 3464 | * entries to the tail of this list, and only ->poll() |
3048 | * calls can remove this head entry from the list. | 3465 | * calls can remove this head entry from the list. |
3049 | */ | 3466 | */ |
3050 | n = list_first_entry(list, struct napi_struct, poll_list); | 3467 | n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); |
3051 | 3468 | ||
3052 | have = netpoll_poll_lock(n); | 3469 | have = netpoll_poll_lock(n); |
3053 | 3470 | ||
@@ -3082,13 +3499,13 @@ static void net_rx_action(struct softirq_action *h) | |||
3082 | napi_complete(n); | 3499 | napi_complete(n); |
3083 | local_irq_disable(); | 3500 | local_irq_disable(); |
3084 | } else | 3501 | } else |
3085 | list_move_tail(&n->poll_list, list); | 3502 | list_move_tail(&n->poll_list, &sd->poll_list); |
3086 | } | 3503 | } |
3087 | 3504 | ||
3088 | netpoll_poll_unlock(have); | 3505 | netpoll_poll_unlock(have); |
3089 | } | 3506 | } |
3090 | out: | 3507 | out: |
3091 | local_irq_enable(); | 3508 | net_rps_action_and_irq_enable(sd); |
3092 | 3509 | ||
3093 | #ifdef CONFIG_NET_DMA | 3510 | #ifdef CONFIG_NET_DMA |
3094 | /* | 3511 | /* |
@@ -3101,7 +3518,7 @@ out: | |||
3101 | return; | 3518 | return; |
3102 | 3519 | ||
3103 | softnet_break: | 3520 | softnet_break: |
3104 | __get_cpu_var(netdev_rx_stat).time_squeeze++; | 3521 | sd->time_squeeze++; |
3105 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 3522 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
3106 | goto out; | 3523 | goto out; |
3107 | } | 3524 | } |
@@ -3302,17 +3719,17 @@ static int dev_seq_show(struct seq_file *seq, void *v) | |||
3302 | return 0; | 3719 | return 0; |
3303 | } | 3720 | } |
3304 | 3721 | ||
3305 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) | 3722 | static struct softnet_data *softnet_get_online(loff_t *pos) |
3306 | { | 3723 | { |
3307 | struct netif_rx_stats *rc = NULL; | 3724 | struct softnet_data *sd = NULL; |
3308 | 3725 | ||
3309 | while (*pos < nr_cpu_ids) | 3726 | while (*pos < nr_cpu_ids) |
3310 | if (cpu_online(*pos)) { | 3727 | if (cpu_online(*pos)) { |
3311 | rc = &per_cpu(netdev_rx_stat, *pos); | 3728 | sd = &per_cpu(softnet_data, *pos); |
3312 | break; | 3729 | break; |
3313 | } else | 3730 | } else |
3314 | ++*pos; | 3731 | ++*pos; |
3315 | return rc; | 3732 | return sd; |
3316 | } | 3733 | } |
3317 | 3734 | ||
3318 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | 3735 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
@@ -3332,12 +3749,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) | |||
3332 | 3749 | ||
3333 | static int softnet_seq_show(struct seq_file *seq, void *v) | 3750 | static int softnet_seq_show(struct seq_file *seq, void *v) |
3334 | { | 3751 | { |
3335 | struct netif_rx_stats *s = v; | 3752 | struct softnet_data *sd = v; |
3336 | 3753 | ||
3337 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 3754 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
3338 | s->total, s->dropped, s->time_squeeze, 0, | 3755 | sd->processed, sd->dropped, sd->time_squeeze, 0, |
3339 | 0, 0, 0, 0, /* was fastroute */ | 3756 | 0, 0, 0, 0, /* was fastroute */ |
3340 | s->cpu_collision); | 3757 | sd->cpu_collision, sd->received_rps); |
3341 | return 0; | 3758 | return 0; |
3342 | } | 3759 | } |
3343 | 3760 | ||
@@ -3560,11 +3977,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
3560 | 3977 | ||
3561 | slave->master = master; | 3978 | slave->master = master; |
3562 | 3979 | ||
3563 | synchronize_net(); | 3980 | if (old) { |
3564 | 3981 | synchronize_net(); | |
3565 | if (old) | ||
3566 | dev_put(old); | 3982 | dev_put(old); |
3567 | 3983 | } | |
3568 | if (master) | 3984 | if (master) |
3569 | slave->flags |= IFF_SLAVE; | 3985 | slave->flags |= IFF_SLAVE; |
3570 | else | 3986 | else |
@@ -3741,562 +4157,6 @@ void dev_set_rx_mode(struct net_device *dev) | |||
3741 | netif_addr_unlock_bh(dev); | 4157 | netif_addr_unlock_bh(dev); |
3742 | } | 4158 | } |
3743 | 4159 | ||
3744 | /* hw addresses list handling functions */ | ||
3745 | |||
3746 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
3747 | int addr_len, unsigned char addr_type) | ||
3748 | { | ||
3749 | struct netdev_hw_addr *ha; | ||
3750 | int alloc_size; | ||
3751 | |||
3752 | if (addr_len > MAX_ADDR_LEN) | ||
3753 | return -EINVAL; | ||
3754 | |||
3755 | list_for_each_entry(ha, &list->list, list) { | ||
3756 | if (!memcmp(ha->addr, addr, addr_len) && | ||
3757 | ha->type == addr_type) { | ||
3758 | ha->refcount++; | ||
3759 | return 0; | ||
3760 | } | ||
3761 | } | ||
3762 | |||
3763 | |||
3764 | alloc_size = sizeof(*ha); | ||
3765 | if (alloc_size < L1_CACHE_BYTES) | ||
3766 | alloc_size = L1_CACHE_BYTES; | ||
3767 | ha = kmalloc(alloc_size, GFP_ATOMIC); | ||
3768 | if (!ha) | ||
3769 | return -ENOMEM; | ||
3770 | memcpy(ha->addr, addr, addr_len); | ||
3771 | ha->type = addr_type; | ||
3772 | ha->refcount = 1; | ||
3773 | ha->synced = false; | ||
3774 | list_add_tail_rcu(&ha->list, &list->list); | ||
3775 | list->count++; | ||
3776 | return 0; | ||
3777 | } | ||
3778 | |||
3779 | static void ha_rcu_free(struct rcu_head *head) | ||
3780 | { | ||
3781 | struct netdev_hw_addr *ha; | ||
3782 | |||
3783 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
3784 | kfree(ha); | ||
3785 | } | ||
3786 | |||
3787 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
3788 | int addr_len, unsigned char addr_type) | ||
3789 | { | ||
3790 | struct netdev_hw_addr *ha; | ||
3791 | |||
3792 | list_for_each_entry(ha, &list->list, list) { | ||
3793 | if (!memcmp(ha->addr, addr, addr_len) && | ||
3794 | (ha->type == addr_type || !addr_type)) { | ||
3795 | if (--ha->refcount) | ||
3796 | return 0; | ||
3797 | list_del_rcu(&ha->list); | ||
3798 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
3799 | list->count--; | ||
3800 | return 0; | ||
3801 | } | ||
3802 | } | ||
3803 | return -ENOENT; | ||
3804 | } | ||
3805 | |||
3806 | static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
3807 | struct netdev_hw_addr_list *from_list, | ||
3808 | int addr_len, | ||
3809 | unsigned char addr_type) | ||
3810 | { | ||
3811 | int err; | ||
3812 | struct netdev_hw_addr *ha, *ha2; | ||
3813 | unsigned char type; | ||
3814 | |||
3815 | list_for_each_entry(ha, &from_list->list, list) { | ||
3816 | type = addr_type ? addr_type : ha->type; | ||
3817 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
3818 | if (err) | ||
3819 | goto unroll; | ||
3820 | } | ||
3821 | return 0; | ||
3822 | |||
3823 | unroll: | ||
3824 | list_for_each_entry(ha2, &from_list->list, list) { | ||
3825 | if (ha2 == ha) | ||
3826 | break; | ||
3827 | type = addr_type ? addr_type : ha2->type; | ||
3828 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
3829 | } | ||
3830 | return err; | ||
3831 | } | ||
3832 | |||
3833 | static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
3834 | struct netdev_hw_addr_list *from_list, | ||
3835 | int addr_len, | ||
3836 | unsigned char addr_type) | ||
3837 | { | ||
3838 | struct netdev_hw_addr *ha; | ||
3839 | unsigned char type; | ||
3840 | |||
3841 | list_for_each_entry(ha, &from_list->list, list) { | ||
3842 | type = addr_type ? addr_type : ha->type; | ||
3843 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | ||
3844 | } | ||
3845 | } | ||
3846 | |||
3847 | static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | ||
3848 | struct netdev_hw_addr_list *from_list, | ||
3849 | int addr_len) | ||
3850 | { | ||
3851 | int err = 0; | ||
3852 | struct netdev_hw_addr *ha, *tmp; | ||
3853 | |||
3854 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
3855 | if (!ha->synced) { | ||
3856 | err = __hw_addr_add(to_list, ha->addr, | ||
3857 | addr_len, ha->type); | ||
3858 | if (err) | ||
3859 | break; | ||
3860 | ha->synced = true; | ||
3861 | ha->refcount++; | ||
3862 | } else if (ha->refcount == 1) { | ||
3863 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); | ||
3864 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); | ||
3865 | } | ||
3866 | } | ||
3867 | return err; | ||
3868 | } | ||
3869 | |||
3870 | static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | ||
3871 | struct netdev_hw_addr_list *from_list, | ||
3872 | int addr_len) | ||
3873 | { | ||
3874 | struct netdev_hw_addr *ha, *tmp; | ||
3875 | |||
3876 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
3877 | if (ha->synced) { | ||
3878 | __hw_addr_del(to_list, ha->addr, | ||
3879 | addr_len, ha->type); | ||
3880 | ha->synced = false; | ||
3881 | __hw_addr_del(from_list, ha->addr, | ||
3882 | addr_len, ha->type); | ||
3883 | } | ||
3884 | } | ||
3885 | } | ||
3886 | |||
3887 | static void __hw_addr_flush(struct netdev_hw_addr_list *list) | ||
3888 | { | ||
3889 | struct netdev_hw_addr *ha, *tmp; | ||
3890 | |||
3891 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | ||
3892 | list_del_rcu(&ha->list); | ||
3893 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
3894 | } | ||
3895 | list->count = 0; | ||
3896 | } | ||
3897 | |||
3898 | static void __hw_addr_init(struct netdev_hw_addr_list *list) | ||
3899 | { | ||
3900 | INIT_LIST_HEAD(&list->list); | ||
3901 | list->count = 0; | ||
3902 | } | ||
3903 | |||
3904 | /* Device addresses handling functions */ | ||
3905 | |||
3906 | static void dev_addr_flush(struct net_device *dev) | ||
3907 | { | ||
3908 | /* rtnl_mutex must be held here */ | ||
3909 | |||
3910 | __hw_addr_flush(&dev->dev_addrs); | ||
3911 | dev->dev_addr = NULL; | ||
3912 | } | ||
3913 | |||
3914 | static int dev_addr_init(struct net_device *dev) | ||
3915 | { | ||
3916 | unsigned char addr[MAX_ADDR_LEN]; | ||
3917 | struct netdev_hw_addr *ha; | ||
3918 | int err; | ||
3919 | |||
3920 | /* rtnl_mutex must be held here */ | ||
3921 | |||
3922 | __hw_addr_init(&dev->dev_addrs); | ||
3923 | memset(addr, 0, sizeof(addr)); | ||
3924 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), | ||
3925 | NETDEV_HW_ADDR_T_LAN); | ||
3926 | if (!err) { | ||
3927 | /* | ||
3928 | * Get the first (previously created) address from the list | ||
3929 | * and set dev_addr pointer to this location. | ||
3930 | */ | ||
3931 | ha = list_first_entry(&dev->dev_addrs.list, | ||
3932 | struct netdev_hw_addr, list); | ||
3933 | dev->dev_addr = ha->addr; | ||
3934 | } | ||
3935 | return err; | ||
3936 | } | ||
3937 | |||
3938 | /** | ||
3939 | * dev_addr_add - Add a device address | ||
3940 | * @dev: device | ||
3941 | * @addr: address to add | ||
3942 | * @addr_type: address type | ||
3943 | * | ||
3944 | * Add a device address to the device or increase the reference count if | ||
3945 | * it already exists. | ||
3946 | * | ||
3947 | * The caller must hold the rtnl_mutex. | ||
3948 | */ | ||
3949 | int dev_addr_add(struct net_device *dev, unsigned char *addr, | ||
3950 | unsigned char addr_type) | ||
3951 | { | ||
3952 | int err; | ||
3953 | |||
3954 | ASSERT_RTNL(); | ||
3955 | |||
3956 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); | ||
3957 | if (!err) | ||
3958 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
3959 | return err; | ||
3960 | } | ||
3961 | EXPORT_SYMBOL(dev_addr_add); | ||
3962 | |||
3963 | /** | ||
3964 | * dev_addr_del - Release a device address. | ||
3965 | * @dev: device | ||
3966 | * @addr: address to delete | ||
3967 | * @addr_type: address type | ||
3968 | * | ||
3969 | * Release reference to a device address and remove it from the device | ||
3970 | * if the reference count drops to zero. | ||
3971 | * | ||
3972 | * The caller must hold the rtnl_mutex. | ||
3973 | */ | ||
3974 | int dev_addr_del(struct net_device *dev, unsigned char *addr, | ||
3975 | unsigned char addr_type) | ||
3976 | { | ||
3977 | int err; | ||
3978 | struct netdev_hw_addr *ha; | ||
3979 | |||
3980 | ASSERT_RTNL(); | ||
3981 | |||
3982 | /* | ||
3983 | * We can not remove the first address from the list because | ||
3984 | * dev->dev_addr points to that. | ||
3985 | */ | ||
3986 | ha = list_first_entry(&dev->dev_addrs.list, | ||
3987 | struct netdev_hw_addr, list); | ||
3988 | if (ha->addr == dev->dev_addr && ha->refcount == 1) | ||
3989 | return -ENOENT; | ||
3990 | |||
3991 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, | ||
3992 | addr_type); | ||
3993 | if (!err) | ||
3994 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
3995 | return err; | ||
3996 | } | ||
3997 | EXPORT_SYMBOL(dev_addr_del); | ||
3998 | |||
3999 | /** | ||
4000 | * dev_addr_add_multiple - Add device addresses from another device | ||
4001 | * @to_dev: device to which addresses will be added | ||
4002 | * @from_dev: device from which addresses will be added | ||
4003 | * @addr_type: address type - 0 means type will be used from from_dev | ||
4004 | * | ||
4005 | * Add device addresses of the one device to another. | ||
4006 | ** | ||
4007 | * The caller must hold the rtnl_mutex. | ||
4008 | */ | ||
4009 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
4010 | struct net_device *from_dev, | ||
4011 | unsigned char addr_type) | ||
4012 | { | ||
4013 | int err; | ||
4014 | |||
4015 | ASSERT_RTNL(); | ||
4016 | |||
4017 | if (from_dev->addr_len != to_dev->addr_len) | ||
4018 | return -EINVAL; | ||
4019 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
4020 | to_dev->addr_len, addr_type); | ||
4021 | if (!err) | ||
4022 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
4023 | return err; | ||
4024 | } | ||
4025 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
4026 | |||
4027 | /** | ||
4028 | * dev_addr_del_multiple - Delete device addresses by another device | ||
4029 | * @to_dev: device where the addresses will be deleted | ||
4030 | * @from_dev: device by which addresses the addresses will be deleted | ||
4031 | * @addr_type: address type - 0 means type will used from from_dev | ||
4032 | * | ||
4033 | * Deletes addresses in to device by the list of addresses in from device. | ||
4034 | * | ||
4035 | * The caller must hold the rtnl_mutex. | ||
4036 | */ | ||
4037 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
4038 | struct net_device *from_dev, | ||
4039 | unsigned char addr_type) | ||
4040 | { | ||
4041 | ASSERT_RTNL(); | ||
4042 | |||
4043 | if (from_dev->addr_len != to_dev->addr_len) | ||
4044 | return -EINVAL; | ||
4045 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
4046 | to_dev->addr_len, addr_type); | ||
4047 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
4048 | return 0; | ||
4049 | } | ||
4050 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
4051 | |||
4052 | /* multicast addresses handling functions */ | ||
4053 | |||
4054 | int __dev_addr_delete(struct dev_addr_list **list, int *count, | ||
4055 | void *addr, int alen, int glbl) | ||
4056 | { | ||
4057 | struct dev_addr_list *da; | ||
4058 | |||
4059 | for (; (da = *list) != NULL; list = &da->next) { | ||
4060 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
4061 | alen == da->da_addrlen) { | ||
4062 | if (glbl) { | ||
4063 | int old_glbl = da->da_gusers; | ||
4064 | da->da_gusers = 0; | ||
4065 | if (old_glbl == 0) | ||
4066 | break; | ||
4067 | } | ||
4068 | if (--da->da_users) | ||
4069 | return 0; | ||
4070 | |||
4071 | *list = da->next; | ||
4072 | kfree(da); | ||
4073 | (*count)--; | ||
4074 | return 0; | ||
4075 | } | ||
4076 | } | ||
4077 | return -ENOENT; | ||
4078 | } | ||
4079 | |||
4080 | int __dev_addr_add(struct dev_addr_list **list, int *count, | ||
4081 | void *addr, int alen, int glbl) | ||
4082 | { | ||
4083 | struct dev_addr_list *da; | ||
4084 | |||
4085 | for (da = *list; da != NULL; da = da->next) { | ||
4086 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
4087 | da->da_addrlen == alen) { | ||
4088 | if (glbl) { | ||
4089 | int old_glbl = da->da_gusers; | ||
4090 | da->da_gusers = 1; | ||
4091 | if (old_glbl) | ||
4092 | return 0; | ||
4093 | } | ||
4094 | da->da_users++; | ||
4095 | return 0; | ||
4096 | } | ||
4097 | } | ||
4098 | |||
4099 | da = kzalloc(sizeof(*da), GFP_ATOMIC); | ||
4100 | if (da == NULL) | ||
4101 | return -ENOMEM; | ||
4102 | memcpy(da->da_addr, addr, alen); | ||
4103 | da->da_addrlen = alen; | ||
4104 | da->da_users = 1; | ||
4105 | da->da_gusers = glbl ? 1 : 0; | ||
4106 | da->next = *list; | ||
4107 | *list = da; | ||
4108 | (*count)++; | ||
4109 | return 0; | ||
4110 | } | ||
4111 | |||
4112 | /** | ||
4113 | * dev_unicast_delete - Release secondary unicast address. | ||
4114 | * @dev: device | ||
4115 | * @addr: address to delete | ||
4116 | * | ||
4117 | * Release reference to a secondary unicast address and remove it | ||
4118 | * from the device if the reference count drops to zero. | ||
4119 | * | ||
4120 | * The caller must hold the rtnl_mutex. | ||
4121 | */ | ||
4122 | int dev_unicast_delete(struct net_device *dev, void *addr) | ||
4123 | { | ||
4124 | int err; | ||
4125 | |||
4126 | ASSERT_RTNL(); | ||
4127 | |||
4128 | netif_addr_lock_bh(dev); | ||
4129 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, | ||
4130 | NETDEV_HW_ADDR_T_UNICAST); | ||
4131 | if (!err) | ||
4132 | __dev_set_rx_mode(dev); | ||
4133 | netif_addr_unlock_bh(dev); | ||
4134 | return err; | ||
4135 | } | ||
4136 | EXPORT_SYMBOL(dev_unicast_delete); | ||
4137 | |||
4138 | /** | ||
4139 | * dev_unicast_add - add a secondary unicast address | ||
4140 | * @dev: device | ||
4141 | * @addr: address to add | ||
4142 | * | ||
4143 | * Add a secondary unicast address to the device or increase | ||
4144 | * the reference count if it already exists. | ||
4145 | * | ||
4146 | * The caller must hold the rtnl_mutex. | ||
4147 | */ | ||
4148 | int dev_unicast_add(struct net_device *dev, void *addr) | ||
4149 | { | ||
4150 | int err; | ||
4151 | |||
4152 | ASSERT_RTNL(); | ||
4153 | |||
4154 | netif_addr_lock_bh(dev); | ||
4155 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, | ||
4156 | NETDEV_HW_ADDR_T_UNICAST); | ||
4157 | if (!err) | ||
4158 | __dev_set_rx_mode(dev); | ||
4159 | netif_addr_unlock_bh(dev); | ||
4160 | return err; | ||
4161 | } | ||
4162 | EXPORT_SYMBOL(dev_unicast_add); | ||
4163 | |||
4164 | int __dev_addr_sync(struct dev_addr_list **to, int *to_count, | ||
4165 | struct dev_addr_list **from, int *from_count) | ||
4166 | { | ||
4167 | struct dev_addr_list *da, *next; | ||
4168 | int err = 0; | ||
4169 | |||
4170 | da = *from; | ||
4171 | while (da != NULL) { | ||
4172 | next = da->next; | ||
4173 | if (!da->da_synced) { | ||
4174 | err = __dev_addr_add(to, to_count, | ||
4175 | da->da_addr, da->da_addrlen, 0); | ||
4176 | if (err < 0) | ||
4177 | break; | ||
4178 | da->da_synced = 1; | ||
4179 | da->da_users++; | ||
4180 | } else if (da->da_users == 1) { | ||
4181 | __dev_addr_delete(to, to_count, | ||
4182 | da->da_addr, da->da_addrlen, 0); | ||
4183 | __dev_addr_delete(from, from_count, | ||
4184 | da->da_addr, da->da_addrlen, 0); | ||
4185 | } | ||
4186 | da = next; | ||
4187 | } | ||
4188 | return err; | ||
4189 | } | ||
4190 | EXPORT_SYMBOL_GPL(__dev_addr_sync); | ||
4191 | |||
4192 | void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, | ||
4193 | struct dev_addr_list **from, int *from_count) | ||
4194 | { | ||
4195 | struct dev_addr_list *da, *next; | ||
4196 | |||
4197 | da = *from; | ||
4198 | while (da != NULL) { | ||
4199 | next = da->next; | ||
4200 | if (da->da_synced) { | ||
4201 | __dev_addr_delete(to, to_count, | ||
4202 | da->da_addr, da->da_addrlen, 0); | ||
4203 | da->da_synced = 0; | ||
4204 | __dev_addr_delete(from, from_count, | ||
4205 | da->da_addr, da->da_addrlen, 0); | ||
4206 | } | ||
4207 | da = next; | ||
4208 | } | ||
4209 | } | ||
4210 | EXPORT_SYMBOL_GPL(__dev_addr_unsync); | ||
4211 | |||
4212 | /** | ||
4213 | * dev_unicast_sync - Synchronize device's unicast list to another device | ||
4214 | * @to: destination device | ||
4215 | * @from: source device | ||
4216 | * | ||
4217 | * Add newly added addresses to the destination device and release | ||
4218 | * addresses that have no users left. The source device must be | ||
4219 | * locked by netif_tx_lock_bh. | ||
4220 | * | ||
4221 | * This function is intended to be called from the dev->set_rx_mode | ||
4222 | * function of layered software devices. | ||
4223 | */ | ||
4224 | int dev_unicast_sync(struct net_device *to, struct net_device *from) | ||
4225 | { | ||
4226 | int err = 0; | ||
4227 | |||
4228 | if (to->addr_len != from->addr_len) | ||
4229 | return -EINVAL; | ||
4230 | |||
4231 | netif_addr_lock_bh(to); | ||
4232 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | ||
4233 | if (!err) | ||
4234 | __dev_set_rx_mode(to); | ||
4235 | netif_addr_unlock_bh(to); | ||
4236 | return err; | ||
4237 | } | ||
4238 | EXPORT_SYMBOL(dev_unicast_sync); | ||
4239 | |||
4240 | /** | ||
4241 | * dev_unicast_unsync - Remove synchronized addresses from the destination device | ||
4242 | * @to: destination device | ||
4243 | * @from: source device | ||
4244 | * | ||
4245 | * Remove all addresses that were added to the destination device by | ||
4246 | * dev_unicast_sync(). This function is intended to be called from the | ||
4247 | * dev->stop function of layered software devices. | ||
4248 | */ | ||
4249 | void dev_unicast_unsync(struct net_device *to, struct net_device *from) | ||
4250 | { | ||
4251 | if (to->addr_len != from->addr_len) | ||
4252 | return; | ||
4253 | |||
4254 | netif_addr_lock_bh(from); | ||
4255 | netif_addr_lock(to); | ||
4256 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | ||
4257 | __dev_set_rx_mode(to); | ||
4258 | netif_addr_unlock(to); | ||
4259 | netif_addr_unlock_bh(from); | ||
4260 | } | ||
4261 | EXPORT_SYMBOL(dev_unicast_unsync); | ||
4262 | |||
4263 | static void dev_unicast_flush(struct net_device *dev) | ||
4264 | { | ||
4265 | netif_addr_lock_bh(dev); | ||
4266 | __hw_addr_flush(&dev->uc); | ||
4267 | netif_addr_unlock_bh(dev); | ||
4268 | } | ||
4269 | |||
4270 | static void dev_unicast_init(struct net_device *dev) | ||
4271 | { | ||
4272 | __hw_addr_init(&dev->uc); | ||
4273 | } | ||
4274 | |||
4275 | |||
4276 | static void __dev_addr_discard(struct dev_addr_list **list) | ||
4277 | { | ||
4278 | struct dev_addr_list *tmp; | ||
4279 | |||
4280 | while (*list != NULL) { | ||
4281 | tmp = *list; | ||
4282 | *list = tmp->next; | ||
4283 | if (tmp->da_users > tmp->da_gusers) | ||
4284 | printk("__dev_addr_discard: address leakage! " | ||
4285 | "da_users=%d\n", tmp->da_users); | ||
4286 | kfree(tmp); | ||
4287 | } | ||
4288 | } | ||
4289 | |||
4290 | static void dev_addr_discard(struct net_device *dev) | ||
4291 | { | ||
4292 | netif_addr_lock_bh(dev); | ||
4293 | |||
4294 | __dev_addr_discard(&dev->mc_list); | ||
4295 | netdev_mc_count(dev) = 0; | ||
4296 | |||
4297 | netif_addr_unlock_bh(dev); | ||
4298 | } | ||
4299 | |||
4300 | /** | 4160 | /** |
4301 | * dev_get_flags - get flags reported to userspace | 4161 | * dev_get_flags - get flags reported to userspace |
4302 | * @dev: device | 4162 | * @dev: device |
@@ -4607,8 +4467,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
4607 | return -EINVAL; | 4467 | return -EINVAL; |
4608 | if (!netif_device_present(dev)) | 4468 | if (!netif_device_present(dev)) |
4609 | return -ENODEV; | 4469 | return -ENODEV; |
4610 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, | 4470 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); |
4611 | dev->addr_len, 1); | ||
4612 | 4471 | ||
4613 | case SIOCDELMULTI: | 4472 | case SIOCDELMULTI: |
4614 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || | 4473 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
@@ -4616,8 +4475,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
4616 | return -EINVAL; | 4475 | return -EINVAL; |
4617 | if (!netif_device_present(dev)) | 4476 | if (!netif_device_present(dev)) |
4618 | return -ENODEV; | 4477 | return -ENODEV; |
4619 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, | 4478 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); |
4620 | dev->addr_len, 1); | ||
4621 | 4479 | ||
4622 | case SIOCSIFTXQLEN: | 4480 | case SIOCSIFTXQLEN: |
4623 | if (ifr->ifr_qlen < 0) | 4481 | if (ifr->ifr_qlen < 0) |
@@ -4924,8 +4782,8 @@ static void rollback_registered_many(struct list_head *head) | |||
4924 | /* | 4782 | /* |
4925 | * Flush the unicast and multicast chains | 4783 | * Flush the unicast and multicast chains |
4926 | */ | 4784 | */ |
4927 | dev_unicast_flush(dev); | 4785 | dev_uc_flush(dev); |
4928 | dev_addr_discard(dev); | 4786 | dev_mc_flush(dev); |
4929 | 4787 | ||
4930 | if (dev->netdev_ops->ndo_uninit) | 4788 | if (dev->netdev_ops->ndo_uninit) |
4931 | dev->netdev_ops->ndo_uninit(dev); | 4789 | dev->netdev_ops->ndo_uninit(dev); |
@@ -5074,6 +4932,24 @@ int register_netdevice(struct net_device *dev) | |||
5074 | 4932 | ||
5075 | dev->iflink = -1; | 4933 | dev->iflink = -1; |
5076 | 4934 | ||
4935 | #ifdef CONFIG_RPS | ||
4936 | if (!dev->num_rx_queues) { | ||
4937 | /* | ||
4938 | * Allocate a single RX queue if driver never called | ||
4939 | * alloc_netdev_mq | ||
4940 | */ | ||
4941 | |||
4942 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
4943 | if (!dev->_rx) { | ||
4944 | ret = -ENOMEM; | ||
4945 | goto out; | ||
4946 | } | ||
4947 | |||
4948 | dev->_rx->first = dev->_rx; | ||
4949 | atomic_set(&dev->_rx->count, 1); | ||
4950 | dev->num_rx_queues = 1; | ||
4951 | } | ||
4952 | #endif | ||
5077 | /* Init, if this function is available */ | 4953 | /* Init, if this function is available */ |
5078 | if (dev->netdev_ops->ndo_init) { | 4954 | if (dev->netdev_ops->ndo_init) { |
5079 | ret = dev->netdev_ops->ndo_init(dev); | 4955 | ret = dev->netdev_ops->ndo_init(dev); |
@@ -5113,8 +4989,6 @@ int register_netdevice(struct net_device *dev) | |||
5113 | if (dev->features & NETIF_F_SG) | 4989 | if (dev->features & NETIF_F_SG) |
5114 | dev->features |= NETIF_F_GSO; | 4990 | dev->features |= NETIF_F_GSO; |
5115 | 4991 | ||
5116 | netdev_initialize_kobject(dev); | ||
5117 | |||
5118 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 4992 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
5119 | ret = notifier_to_errno(ret); | 4993 | ret = notifier_to_errno(ret); |
5120 | if (ret) | 4994 | if (ret) |
@@ -5434,6 +5308,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5434 | struct net_device *dev; | 5308 | struct net_device *dev; |
5435 | size_t alloc_size; | 5309 | size_t alloc_size; |
5436 | struct net_device *p; | 5310 | struct net_device *p; |
5311 | #ifdef CONFIG_RPS | ||
5312 | struct netdev_rx_queue *rx; | ||
5313 | int i; | ||
5314 | #endif | ||
5437 | 5315 | ||
5438 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5316 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
5439 | 5317 | ||
@@ -5459,13 +5337,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5459 | goto free_p; | 5337 | goto free_p; |
5460 | } | 5338 | } |
5461 | 5339 | ||
5340 | #ifdef CONFIG_RPS | ||
5341 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
5342 | if (!rx) { | ||
5343 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
5344 | "rx queues.\n"); | ||
5345 | goto free_tx; | ||
5346 | } | ||
5347 | |||
5348 | atomic_set(&rx->count, queue_count); | ||
5349 | |||
5350 | /* | ||
5351 | * Set a pointer to first element in the array which holds the | ||
5352 | * reference count. | ||
5353 | */ | ||
5354 | for (i = 0; i < queue_count; i++) | ||
5355 | rx[i].first = rx; | ||
5356 | #endif | ||
5357 | |||
5462 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5358 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
5463 | dev->padded = (char *)dev - (char *)p; | 5359 | dev->padded = (char *)dev - (char *)p; |
5464 | 5360 | ||
5465 | if (dev_addr_init(dev)) | 5361 | if (dev_addr_init(dev)) |
5466 | goto free_tx; | 5362 | goto free_rx; |
5467 | 5363 | ||
5468 | dev_unicast_init(dev); | 5364 | dev_mc_init(dev); |
5365 | dev_uc_init(dev); | ||
5469 | 5366 | ||
5470 | dev_net_set(dev, &init_net); | 5367 | dev_net_set(dev, &init_net); |
5471 | 5368 | ||
@@ -5473,6 +5370,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5473 | dev->num_tx_queues = queue_count; | 5370 | dev->num_tx_queues = queue_count; |
5474 | dev->real_num_tx_queues = queue_count; | 5371 | dev->real_num_tx_queues = queue_count; |
5475 | 5372 | ||
5373 | #ifdef CONFIG_RPS | ||
5374 | dev->_rx = rx; | ||
5375 | dev->num_rx_queues = queue_count; | ||
5376 | #endif | ||
5377 | |||
5476 | dev->gso_max_size = GSO_MAX_SIZE; | 5378 | dev->gso_max_size = GSO_MAX_SIZE; |
5477 | 5379 | ||
5478 | netdev_init_queues(dev); | 5380 | netdev_init_queues(dev); |
@@ -5487,9 +5389,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
5487 | strcpy(dev->name, name); | 5389 | strcpy(dev->name, name); |
5488 | return dev; | 5390 | return dev; |
5489 | 5391 | ||
5392 | free_rx: | ||
5393 | #ifdef CONFIG_RPS | ||
5394 | kfree(rx); | ||
5490 | free_tx: | 5395 | free_tx: |
5396 | #endif | ||
5491 | kfree(tx); | 5397 | kfree(tx); |
5492 | |||
5493 | free_p: | 5398 | free_p: |
5494 | kfree(p); | 5399 | kfree(p); |
5495 | return NULL; | 5400 | return NULL; |
@@ -5635,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5635 | if (dev->features & NETIF_F_NETNS_LOCAL) | 5540 | if (dev->features & NETIF_F_NETNS_LOCAL) |
5636 | goto out; | 5541 | goto out; |
5637 | 5542 | ||
5638 | #ifdef CONFIG_SYSFS | ||
5639 | /* Don't allow real devices to be moved when sysfs | ||
5640 | * is enabled. | ||
5641 | */ | ||
5642 | err = -EINVAL; | ||
5643 | if (dev->dev.parent) | ||
5644 | goto out; | ||
5645 | #endif | ||
5646 | |||
5647 | /* Ensure the device has been registrered */ | 5543 | /* Ensure the device has been registrered */ |
5648 | err = -EINVAL; | 5544 | err = -EINVAL; |
5649 | if (dev->reg_state != NETREG_REGISTERED) | 5545 | if (dev->reg_state != NETREG_REGISTERED) |
@@ -5691,10 +5587,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5691 | /* | 5587 | /* |
5692 | * Flush the unicast and multicast chains | 5588 | * Flush the unicast and multicast chains |
5693 | */ | 5589 | */ |
5694 | dev_unicast_flush(dev); | 5590 | dev_uc_flush(dev); |
5695 | dev_addr_discard(dev); | 5591 | dev_mc_flush(dev); |
5696 | |||
5697 | netdev_unregister_kobject(dev); | ||
5698 | 5592 | ||
5699 | /* Actually switch the network namespace */ | 5593 | /* Actually switch the network namespace */ |
5700 | dev_net_set(dev, net); | 5594 | dev_net_set(dev, net); |
@@ -5708,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
5708 | } | 5602 | } |
5709 | 5603 | ||
5710 | /* Fixup kobjects */ | 5604 | /* Fixup kobjects */ |
5711 | err = netdev_register_kobject(dev); | 5605 | err = device_rename(&dev->dev, dev->name); |
5712 | WARN_ON(err); | 5606 | WARN_ON(err); |
5713 | 5607 | ||
5714 | /* Add the device back in the hashes */ | 5608 | /* Add the device back in the hashes */ |
@@ -5735,7 +5629,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5735 | void *ocpu) | 5629 | void *ocpu) |
5736 | { | 5630 | { |
5737 | struct sk_buff **list_skb; | 5631 | struct sk_buff **list_skb; |
5738 | struct Qdisc **list_net; | ||
5739 | struct sk_buff *skb; | 5632 | struct sk_buff *skb; |
5740 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | 5633 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
5741 | struct softnet_data *sd, *oldsd; | 5634 | struct softnet_data *sd, *oldsd; |
@@ -5756,19 +5649,23 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
5756 | *list_skb = oldsd->completion_queue; | 5649 | *list_skb = oldsd->completion_queue; |
5757 | oldsd->completion_queue = NULL; | 5650 | oldsd->completion_queue = NULL; |
5758 | 5651 | ||
5759 | /* Find end of our output_queue. */ | ||
5760 | list_net = &sd->output_queue; | ||
5761 | while (*list_net) | ||
5762 | list_net = &(*list_net)->next_sched; | ||
5763 | /* Append output queue from offline CPU. */ | 5652 | /* Append output queue from offline CPU. */ |
5764 | *list_net = oldsd->output_queue; | 5653 | if (oldsd->output_queue) { |
5765 | oldsd->output_queue = NULL; | 5654 | *sd->output_queue_tailp = oldsd->output_queue; |
5655 | sd->output_queue_tailp = oldsd->output_queue_tailp; | ||
5656 | oldsd->output_queue = NULL; | ||
5657 | oldsd->output_queue_tailp = &oldsd->output_queue; | ||
5658 | } | ||
5766 | 5659 | ||
5767 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 5660 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
5768 | local_irq_enable(); | 5661 | local_irq_enable(); |
5769 | 5662 | ||
5770 | /* Process offline CPU's input_pkt_queue */ | 5663 | /* Process offline CPU's input_pkt_queue */ |
5771 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | 5664 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { |
5665 | netif_rx(skb); | ||
5666 | input_queue_head_add(oldsd, 1); | ||
5667 | } | ||
5668 | while ((skb = __skb_dequeue(&oldsd->process_queue))) | ||
5772 | netif_rx(skb); | 5669 | netif_rx(skb); |
5773 | 5670 | ||
5774 | return NOTIFY_OK; | 5671 | return NOTIFY_OK; |
@@ -5985,17 +5882,26 @@ static int __init net_dev_init(void) | |||
5985 | */ | 5882 | */ |
5986 | 5883 | ||
5987 | for_each_possible_cpu(i) { | 5884 | for_each_possible_cpu(i) { |
5988 | struct softnet_data *queue; | 5885 | struct softnet_data *sd = &per_cpu(softnet_data, i); |
5989 | 5886 | ||
5990 | queue = &per_cpu(softnet_data, i); | 5887 | memset(sd, 0, sizeof(*sd)); |
5991 | skb_queue_head_init(&queue->input_pkt_queue); | 5888 | skb_queue_head_init(&sd->input_pkt_queue); |
5992 | queue->completion_queue = NULL; | 5889 | skb_queue_head_init(&sd->process_queue); |
5993 | INIT_LIST_HEAD(&queue->poll_list); | 5890 | sd->completion_queue = NULL; |
5891 | INIT_LIST_HEAD(&sd->poll_list); | ||
5892 | sd->output_queue = NULL; | ||
5893 | sd->output_queue_tailp = &sd->output_queue; | ||
5894 | #ifdef CONFIG_RPS | ||
5895 | sd->csd.func = rps_trigger_softirq; | ||
5896 | sd->csd.info = sd; | ||
5897 | sd->csd.flags = 0; | ||
5898 | sd->cpu = i; | ||
5899 | #endif | ||
5994 | 5900 | ||
5995 | queue->backlog.poll = process_backlog; | 5901 | sd->backlog.poll = process_backlog; |
5996 | queue->backlog.weight = weight_p; | 5902 | sd->backlog.weight = weight_p; |
5997 | queue->backlog.gro_list = NULL; | 5903 | sd->backlog.gro_list = NULL; |
5998 | queue->backlog.gro_count = 0; | 5904 | sd->backlog.gro_count = 0; |
5999 | } | 5905 | } |
6000 | 5906 | ||
6001 | dev_boot_phase = 0; | 5907 | dev_boot_phase = 0; |
@@ -6030,7 +5936,7 @@ subsys_initcall(net_dev_init); | |||
6030 | 5936 | ||
6031 | static int __init initialize_hashrnd(void) | 5937 | static int __init initialize_hashrnd(void) |
6032 | { | 5938 | { |
6033 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | 5939 | get_random_bytes(&hashrnd, sizeof(hashrnd)); |
6034 | return 0; | 5940 | return 0; |
6035 | } | 5941 | } |
6036 | 5942 | ||
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c new file mode 100644 index 000000000000..508f9c18992f --- /dev/null +++ b/net/core/dev_addr_lists.c | |||
@@ -0,0 +1,741 @@ | |||
1 | /* | ||
2 | * net/core/dev_addr_lists.c - Functions for handling net device lists | ||
3 | * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com> | ||
4 | * | ||
5 | * This file contains functions for working with unicast, multicast and device | ||
6 | * addresses lists. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | */ | ||
13 | |||
14 | #include <linux/netdevice.h> | ||
15 | #include <linux/rtnetlink.h> | ||
16 | #include <linux/list.h> | ||
17 | #include <linux/proc_fs.h> | ||
18 | |||
19 | /* | ||
20 | * General list handling functions | ||
21 | */ | ||
22 | |||
23 | static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, | ||
24 | unsigned char *addr, int addr_len, | ||
25 | unsigned char addr_type, bool global) | ||
26 | { | ||
27 | struct netdev_hw_addr *ha; | ||
28 | int alloc_size; | ||
29 | |||
30 | if (addr_len > MAX_ADDR_LEN) | ||
31 | return -EINVAL; | ||
32 | |||
33 | list_for_each_entry(ha, &list->list, list) { | ||
34 | if (!memcmp(ha->addr, addr, addr_len) && | ||
35 | ha->type == addr_type) { | ||
36 | if (global) { | ||
37 | /* check if addr is already used as global */ | ||
38 | if (ha->global_use) | ||
39 | return 0; | ||
40 | else | ||
41 | ha->global_use = true; | ||
42 | } | ||
43 | ha->refcount++; | ||
44 | return 0; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | |||
49 | alloc_size = sizeof(*ha); | ||
50 | if (alloc_size < L1_CACHE_BYTES) | ||
51 | alloc_size = L1_CACHE_BYTES; | ||
52 | ha = kmalloc(alloc_size, GFP_ATOMIC); | ||
53 | if (!ha) | ||
54 | return -ENOMEM; | ||
55 | memcpy(ha->addr, addr, addr_len); | ||
56 | ha->type = addr_type; | ||
57 | ha->refcount = 1; | ||
58 | ha->global_use = global; | ||
59 | ha->synced = false; | ||
60 | list_add_tail_rcu(&ha->list, &list->list); | ||
61 | list->count++; | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
66 | int addr_len, unsigned char addr_type) | ||
67 | { | ||
68 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); | ||
69 | } | ||
70 | |||
71 | static void ha_rcu_free(struct rcu_head *head) | ||
72 | { | ||
73 | struct netdev_hw_addr *ha; | ||
74 | |||
75 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
76 | kfree(ha); | ||
77 | } | ||
78 | |||
79 | static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, | ||
80 | unsigned char *addr, int addr_len, | ||
81 | unsigned char addr_type, bool global) | ||
82 | { | ||
83 | struct netdev_hw_addr *ha; | ||
84 | |||
85 | list_for_each_entry(ha, &list->list, list) { | ||
86 | if (!memcmp(ha->addr, addr, addr_len) && | ||
87 | (ha->type == addr_type || !addr_type)) { | ||
88 | if (global) { | ||
89 | if (!ha->global_use) | ||
90 | break; | ||
91 | else | ||
92 | ha->global_use = false; | ||
93 | } | ||
94 | if (--ha->refcount) | ||
95 | return 0; | ||
96 | list_del_rcu(&ha->list); | ||
97 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
98 | list->count--; | ||
99 | return 0; | ||
100 | } | ||
101 | } | ||
102 | return -ENOENT; | ||
103 | } | ||
104 | |||
105 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
106 | int addr_len, unsigned char addr_type) | ||
107 | { | ||
108 | return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); | ||
109 | } | ||
110 | |||
111 | int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
112 | struct netdev_hw_addr_list *from_list, | ||
113 | int addr_len, unsigned char addr_type) | ||
114 | { | ||
115 | int err; | ||
116 | struct netdev_hw_addr *ha, *ha2; | ||
117 | unsigned char type; | ||
118 | |||
119 | list_for_each_entry(ha, &from_list->list, list) { | ||
120 | type = addr_type ? addr_type : ha->type; | ||
121 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
122 | if (err) | ||
123 | goto unroll; | ||
124 | } | ||
125 | return 0; | ||
126 | |||
127 | unroll: | ||
128 | list_for_each_entry(ha2, &from_list->list, list) { | ||
129 | if (ha2 == ha) | ||
130 | break; | ||
131 | type = addr_type ? addr_type : ha2->type; | ||
132 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
133 | } | ||
134 | return err; | ||
135 | } | ||
136 | EXPORT_SYMBOL(__hw_addr_add_multiple); | ||
137 | |||
138 | void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
139 | struct netdev_hw_addr_list *from_list, | ||
140 | int addr_len, unsigned char addr_type) | ||
141 | { | ||
142 | struct netdev_hw_addr *ha; | ||
143 | unsigned char type; | ||
144 | |||
145 | list_for_each_entry(ha, &from_list->list, list) { | ||
146 | type = addr_type ? addr_type : ha->type; | ||
147 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | ||
148 | } | ||
149 | } | ||
150 | EXPORT_SYMBOL(__hw_addr_del_multiple); | ||
151 | |||
152 | int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | ||
153 | struct netdev_hw_addr_list *from_list, | ||
154 | int addr_len) | ||
155 | { | ||
156 | int err = 0; | ||
157 | struct netdev_hw_addr *ha, *tmp; | ||
158 | |||
159 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
160 | if (!ha->synced) { | ||
161 | err = __hw_addr_add(to_list, ha->addr, | ||
162 | addr_len, ha->type); | ||
163 | if (err) | ||
164 | break; | ||
165 | ha->synced = true; | ||
166 | ha->refcount++; | ||
167 | } else if (ha->refcount == 1) { | ||
168 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); | ||
169 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); | ||
170 | } | ||
171 | } | ||
172 | return err; | ||
173 | } | ||
174 | EXPORT_SYMBOL(__hw_addr_sync); | ||
175 | |||
176 | void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | ||
177 | struct netdev_hw_addr_list *from_list, | ||
178 | int addr_len) | ||
179 | { | ||
180 | struct netdev_hw_addr *ha, *tmp; | ||
181 | |||
182 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
183 | if (ha->synced) { | ||
184 | __hw_addr_del(to_list, ha->addr, | ||
185 | addr_len, ha->type); | ||
186 | ha->synced = false; | ||
187 | __hw_addr_del(from_list, ha->addr, | ||
188 | addr_len, ha->type); | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | EXPORT_SYMBOL(__hw_addr_unsync); | ||
193 | |||
194 | void __hw_addr_flush(struct netdev_hw_addr_list *list) | ||
195 | { | ||
196 | struct netdev_hw_addr *ha, *tmp; | ||
197 | |||
198 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | ||
199 | list_del_rcu(&ha->list); | ||
200 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
201 | } | ||
202 | list->count = 0; | ||
203 | } | ||
204 | EXPORT_SYMBOL(__hw_addr_flush); | ||
205 | |||
206 | void __hw_addr_init(struct netdev_hw_addr_list *list) | ||
207 | { | ||
208 | INIT_LIST_HEAD(&list->list); | ||
209 | list->count = 0; | ||
210 | } | ||
211 | EXPORT_SYMBOL(__hw_addr_init); | ||
212 | |||
213 | /* | ||
214 | * Device addresses handling functions | ||
215 | */ | ||
216 | |||
217 | /** | ||
218 | * dev_addr_flush - Flush device address list | ||
219 | * @dev: device | ||
220 | * | ||
221 | * Flush device address list and reset ->dev_addr. | ||
222 | * | ||
223 | * The caller must hold the rtnl_mutex. | ||
224 | */ | ||
225 | void dev_addr_flush(struct net_device *dev) | ||
226 | { | ||
227 | /* rtnl_mutex must be held here */ | ||
228 | |||
229 | __hw_addr_flush(&dev->dev_addrs); | ||
230 | dev->dev_addr = NULL; | ||
231 | } | ||
232 | EXPORT_SYMBOL(dev_addr_flush); | ||
233 | |||
234 | /** | ||
235 | * dev_addr_init - Init device address list | ||
236 | * @dev: device | ||
237 | * | ||
238 | * Init device address list and create the first element, | ||
239 | * used by ->dev_addr. | ||
240 | * | ||
241 | * The caller must hold the rtnl_mutex. | ||
242 | */ | ||
243 | int dev_addr_init(struct net_device *dev) | ||
244 | { | ||
245 | unsigned char addr[MAX_ADDR_LEN]; | ||
246 | struct netdev_hw_addr *ha; | ||
247 | int err; | ||
248 | |||
249 | /* rtnl_mutex must be held here */ | ||
250 | |||
251 | __hw_addr_init(&dev->dev_addrs); | ||
252 | memset(addr, 0, sizeof(addr)); | ||
253 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), | ||
254 | NETDEV_HW_ADDR_T_LAN); | ||
255 | if (!err) { | ||
256 | /* | ||
257 | * Get the first (previously created) address from the list | ||
258 | * and set dev_addr pointer to this location. | ||
259 | */ | ||
260 | ha = list_first_entry(&dev->dev_addrs.list, | ||
261 | struct netdev_hw_addr, list); | ||
262 | dev->dev_addr = ha->addr; | ||
263 | } | ||
264 | return err; | ||
265 | } | ||
266 | EXPORT_SYMBOL(dev_addr_init); | ||
267 | |||
268 | /** | ||
269 | * dev_addr_add - Add a device address | ||
270 | * @dev: device | ||
271 | * @addr: address to add | ||
272 | * @addr_type: address type | ||
273 | * | ||
274 | * Add a device address to the device or increase the reference count if | ||
275 | * it already exists. | ||
276 | * | ||
277 | * The caller must hold the rtnl_mutex. | ||
278 | */ | ||
279 | int dev_addr_add(struct net_device *dev, unsigned char *addr, | ||
280 | unsigned char addr_type) | ||
281 | { | ||
282 | int err; | ||
283 | |||
284 | ASSERT_RTNL(); | ||
285 | |||
286 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); | ||
287 | if (!err) | ||
288 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
289 | return err; | ||
290 | } | ||
291 | EXPORT_SYMBOL(dev_addr_add); | ||
292 | |||
293 | /** | ||
294 | * dev_addr_del - Release a device address. | ||
295 | * @dev: device | ||
296 | * @addr: address to delete | ||
297 | * @addr_type: address type | ||
298 | * | ||
299 | * Release reference to a device address and remove it from the device | ||
300 | * if the reference count drops to zero. | ||
301 | * | ||
302 | * The caller must hold the rtnl_mutex. | ||
303 | */ | ||
304 | int dev_addr_del(struct net_device *dev, unsigned char *addr, | ||
305 | unsigned char addr_type) | ||
306 | { | ||
307 | int err; | ||
308 | struct netdev_hw_addr *ha; | ||
309 | |||
310 | ASSERT_RTNL(); | ||
311 | |||
312 | /* | ||
313 | * We can not remove the first address from the list because | ||
314 | * dev->dev_addr points to that. | ||
315 | */ | ||
316 | ha = list_first_entry(&dev->dev_addrs.list, | ||
317 | struct netdev_hw_addr, list); | ||
318 | if (ha->addr == dev->dev_addr && ha->refcount == 1) | ||
319 | return -ENOENT; | ||
320 | |||
321 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, | ||
322 | addr_type); | ||
323 | if (!err) | ||
324 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
325 | return err; | ||
326 | } | ||
327 | EXPORT_SYMBOL(dev_addr_del); | ||
328 | |||
329 | /** | ||
330 | * dev_addr_add_multiple - Add device addresses from another device | ||
331 | * @to_dev: device to which addresses will be added | ||
332 | * @from_dev: device from which addresses will be added | ||
333 | * @addr_type: address type - 0 means type will be used from from_dev | ||
334 | * | ||
335 | * Add device addresses of the one device to another. | ||
336 | ** | ||
337 | * The caller must hold the rtnl_mutex. | ||
338 | */ | ||
339 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
340 | struct net_device *from_dev, | ||
341 | unsigned char addr_type) | ||
342 | { | ||
343 | int err; | ||
344 | |||
345 | ASSERT_RTNL(); | ||
346 | |||
347 | if (from_dev->addr_len != to_dev->addr_len) | ||
348 | return -EINVAL; | ||
349 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
350 | to_dev->addr_len, addr_type); | ||
351 | if (!err) | ||
352 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
353 | return err; | ||
354 | } | ||
355 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
356 | |||
357 | /** | ||
358 | * dev_addr_del_multiple - Delete device addresses by another device | ||
359 | * @to_dev: device where the addresses will be deleted | ||
360 | * @from_dev: device by which addresses the addresses will be deleted | ||
361 | * @addr_type: address type - 0 means type will used from from_dev | ||
362 | * | ||
363 | * Deletes addresses in to device by the list of addresses in from device. | ||
364 | * | ||
365 | * The caller must hold the rtnl_mutex. | ||
366 | */ | ||
367 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
368 | struct net_device *from_dev, | ||
369 | unsigned char addr_type) | ||
370 | { | ||
371 | ASSERT_RTNL(); | ||
372 | |||
373 | if (from_dev->addr_len != to_dev->addr_len) | ||
374 | return -EINVAL; | ||
375 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
376 | to_dev->addr_len, addr_type); | ||
377 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
378 | return 0; | ||
379 | } | ||
380 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
381 | |||
382 | /* | ||
383 | * Unicast list handling functions | ||
384 | */ | ||
385 | |||
386 | /** | ||
387 | * dev_uc_add - Add a secondary unicast address | ||
388 | * @dev: device | ||
389 | * @addr: address to add | ||
390 | * | ||
391 | * Add a secondary unicast address to the device or increase | ||
392 | * the reference count if it already exists. | ||
393 | */ | ||
394 | int dev_uc_add(struct net_device *dev, unsigned char *addr) | ||
395 | { | ||
396 | int err; | ||
397 | |||
398 | netif_addr_lock_bh(dev); | ||
399 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, | ||
400 | NETDEV_HW_ADDR_T_UNICAST); | ||
401 | if (!err) | ||
402 | __dev_set_rx_mode(dev); | ||
403 | netif_addr_unlock_bh(dev); | ||
404 | return err; | ||
405 | } | ||
406 | EXPORT_SYMBOL(dev_uc_add); | ||
407 | |||
408 | /** | ||
409 | * dev_uc_del - Release secondary unicast address. | ||
410 | * @dev: device | ||
411 | * @addr: address to delete | ||
412 | * | ||
413 | * Release reference to a secondary unicast address and remove it | ||
414 | * from the device if the reference count drops to zero. | ||
415 | */ | ||
416 | int dev_uc_del(struct net_device *dev, unsigned char *addr) | ||
417 | { | ||
418 | int err; | ||
419 | |||
420 | netif_addr_lock_bh(dev); | ||
421 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, | ||
422 | NETDEV_HW_ADDR_T_UNICAST); | ||
423 | if (!err) | ||
424 | __dev_set_rx_mode(dev); | ||
425 | netif_addr_unlock_bh(dev); | ||
426 | return err; | ||
427 | } | ||
428 | EXPORT_SYMBOL(dev_uc_del); | ||
429 | |||
430 | /** | ||
431 | * dev_uc_sync - Synchronize device's unicast list to another device | ||
432 | * @to: destination device | ||
433 | * @from: source device | ||
434 | * | ||
435 | * Add newly added addresses to the destination device and release | ||
436 | * addresses that have no users left. The source device must be | ||
437 | * locked by netif_tx_lock_bh. | ||
438 | * | ||
439 | * This function is intended to be called from the dev->set_rx_mode | ||
440 | * function of layered software devices. | ||
441 | */ | ||
442 | int dev_uc_sync(struct net_device *to, struct net_device *from) | ||
443 | { | ||
444 | int err = 0; | ||
445 | |||
446 | if (to->addr_len != from->addr_len) | ||
447 | return -EINVAL; | ||
448 | |||
449 | netif_addr_lock_bh(to); | ||
450 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | ||
451 | if (!err) | ||
452 | __dev_set_rx_mode(to); | ||
453 | netif_addr_unlock_bh(to); | ||
454 | return err; | ||
455 | } | ||
456 | EXPORT_SYMBOL(dev_uc_sync); | ||
457 | |||
458 | /** | ||
459 | * dev_uc_unsync - Remove synchronized addresses from the destination device | ||
460 | * @to: destination device | ||
461 | * @from: source device | ||
462 | * | ||
463 | * Remove all addresses that were added to the destination device by | ||
464 | * dev_uc_sync(). This function is intended to be called from the | ||
465 | * dev->stop function of layered software devices. | ||
466 | */ | ||
467 | void dev_uc_unsync(struct net_device *to, struct net_device *from) | ||
468 | { | ||
469 | if (to->addr_len != from->addr_len) | ||
470 | return; | ||
471 | |||
472 | netif_addr_lock_bh(from); | ||
473 | netif_addr_lock(to); | ||
474 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | ||
475 | __dev_set_rx_mode(to); | ||
476 | netif_addr_unlock(to); | ||
477 | netif_addr_unlock_bh(from); | ||
478 | } | ||
479 | EXPORT_SYMBOL(dev_uc_unsync); | ||
480 | |||
481 | /** | ||
482 | * dev_uc_flush - Flush unicast addresses | ||
483 | * @dev: device | ||
484 | * | ||
485 | * Flush unicast addresses. | ||
486 | */ | ||
487 | void dev_uc_flush(struct net_device *dev) | ||
488 | { | ||
489 | netif_addr_lock_bh(dev); | ||
490 | __hw_addr_flush(&dev->uc); | ||
491 | netif_addr_unlock_bh(dev); | ||
492 | } | ||
493 | EXPORT_SYMBOL(dev_uc_flush); | ||
494 | |||
495 | /** | ||
496 | * dev_uc_flush - Init unicast address list | ||
497 | * @dev: device | ||
498 | * | ||
499 | * Init unicast address list. | ||
500 | */ | ||
501 | void dev_uc_init(struct net_device *dev) | ||
502 | { | ||
503 | __hw_addr_init(&dev->uc); | ||
504 | } | ||
505 | EXPORT_SYMBOL(dev_uc_init); | ||
506 | |||
507 | /* | ||
508 | * Multicast list handling functions | ||
509 | */ | ||
510 | |||
511 | static int __dev_mc_add(struct net_device *dev, unsigned char *addr, | ||
512 | bool global) | ||
513 | { | ||
514 | int err; | ||
515 | |||
516 | netif_addr_lock_bh(dev); | ||
517 | err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, | ||
518 | NETDEV_HW_ADDR_T_MULTICAST, global); | ||
519 | if (!err) | ||
520 | __dev_set_rx_mode(dev); | ||
521 | netif_addr_unlock_bh(dev); | ||
522 | return err; | ||
523 | } | ||
524 | /** | ||
525 | * dev_mc_add - Add a multicast address | ||
526 | * @dev: device | ||
527 | * @addr: address to add | ||
528 | * | ||
529 | * Add a multicast address to the device or increase | ||
530 | * the reference count if it already exists. | ||
531 | */ | ||
532 | int dev_mc_add(struct net_device *dev, unsigned char *addr) | ||
533 | { | ||
534 | return __dev_mc_add(dev, addr, false); | ||
535 | } | ||
536 | EXPORT_SYMBOL(dev_mc_add); | ||
537 | |||
538 | /** | ||
539 | * dev_mc_add_global - Add a global multicast address | ||
540 | * @dev: device | ||
541 | * @addr: address to add | ||
542 | * | ||
543 | * Add a global multicast address to the device. | ||
544 | */ | ||
545 | int dev_mc_add_global(struct net_device *dev, unsigned char *addr) | ||
546 | { | ||
547 | return __dev_mc_add(dev, addr, true); | ||
548 | } | ||
549 | EXPORT_SYMBOL(dev_mc_add_global); | ||
550 | |||
551 | static int __dev_mc_del(struct net_device *dev, unsigned char *addr, | ||
552 | bool global) | ||
553 | { | ||
554 | int err; | ||
555 | |||
556 | netif_addr_lock_bh(dev); | ||
557 | err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, | ||
558 | NETDEV_HW_ADDR_T_MULTICAST, global); | ||
559 | if (!err) | ||
560 | __dev_set_rx_mode(dev); | ||
561 | netif_addr_unlock_bh(dev); | ||
562 | return err; | ||
563 | } | ||
564 | |||
565 | /** | ||
566 | * dev_mc_del - Delete a multicast address. | ||
567 | * @dev: device | ||
568 | * @addr: address to delete | ||
569 | * | ||
570 | * Release reference to a multicast address and remove it | ||
571 | * from the device if the reference count drops to zero. | ||
572 | */ | ||
573 | int dev_mc_del(struct net_device *dev, unsigned char *addr) | ||
574 | { | ||
575 | return __dev_mc_del(dev, addr, false); | ||
576 | } | ||
577 | EXPORT_SYMBOL(dev_mc_del); | ||
578 | |||
579 | /** | ||
580 | * dev_mc_del_global - Delete a global multicast address. | ||
581 | * @dev: device | ||
582 | * @addr: address to delete | ||
583 | * | ||
584 | * Release reference to a multicast address and remove it | ||
585 | * from the device if the reference count drops to zero. | ||
586 | */ | ||
587 | int dev_mc_del_global(struct net_device *dev, unsigned char *addr) | ||
588 | { | ||
589 | return __dev_mc_del(dev, addr, true); | ||
590 | } | ||
591 | EXPORT_SYMBOL(dev_mc_del_global); | ||
592 | |||
593 | /** | ||
594 | * dev_mc_sync - Synchronize device's unicast list to another device | ||
595 | * @to: destination device | ||
596 | * @from: source device | ||
597 | * | ||
598 | * Add newly added addresses to the destination device and release | ||
599 | * addresses that have no users left. The source device must be | ||
600 | * locked by netif_tx_lock_bh. | ||
601 | * | ||
602 | * This function is intended to be called from the dev->set_multicast_list | ||
603 | * or dev->set_rx_mode function of layered software devices. | ||
604 | */ | ||
605 | int dev_mc_sync(struct net_device *to, struct net_device *from) | ||
606 | { | ||
607 | int err = 0; | ||
608 | |||
609 | if (to->addr_len != from->addr_len) | ||
610 | return -EINVAL; | ||
611 | |||
612 | netif_addr_lock_bh(to); | ||
613 | err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); | ||
614 | if (!err) | ||
615 | __dev_set_rx_mode(to); | ||
616 | netif_addr_unlock_bh(to); | ||
617 | return err; | ||
618 | } | ||
619 | EXPORT_SYMBOL(dev_mc_sync); | ||
620 | |||
621 | /** | ||
622 | * dev_mc_unsync - Remove synchronized addresses from the destination device | ||
623 | * @to: destination device | ||
624 | * @from: source device | ||
625 | * | ||
626 | * Remove all addresses that were added to the destination device by | ||
627 | * dev_mc_sync(). This function is intended to be called from the | ||
628 | * dev->stop function of layered software devices. | ||
629 | */ | ||
630 | void dev_mc_unsync(struct net_device *to, struct net_device *from) | ||
631 | { | ||
632 | if (to->addr_len != from->addr_len) | ||
633 | return; | ||
634 | |||
635 | netif_addr_lock_bh(from); | ||
636 | netif_addr_lock(to); | ||
637 | __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); | ||
638 | __dev_set_rx_mode(to); | ||
639 | netif_addr_unlock(to); | ||
640 | netif_addr_unlock_bh(from); | ||
641 | } | ||
642 | EXPORT_SYMBOL(dev_mc_unsync); | ||
643 | |||
644 | /** | ||
645 | * dev_mc_flush - Flush multicast addresses | ||
646 | * @dev: device | ||
647 | * | ||
648 | * Flush multicast addresses. | ||
649 | */ | ||
650 | void dev_mc_flush(struct net_device *dev) | ||
651 | { | ||
652 | netif_addr_lock_bh(dev); | ||
653 | __hw_addr_flush(&dev->mc); | ||
654 | netif_addr_unlock_bh(dev); | ||
655 | } | ||
656 | EXPORT_SYMBOL(dev_mc_flush); | ||
657 | |||
658 | /** | ||
659 | * dev_mc_flush - Init multicast address list | ||
660 | * @dev: device | ||
661 | * | ||
662 | * Init multicast address list. | ||
663 | */ | ||
664 | void dev_mc_init(struct net_device *dev) | ||
665 | { | ||
666 | __hw_addr_init(&dev->mc); | ||
667 | } | ||
668 | EXPORT_SYMBOL(dev_mc_init); | ||
669 | |||
670 | #ifdef CONFIG_PROC_FS | ||
671 | #include <linux/seq_file.h> | ||
672 | |||
673 | static int dev_mc_seq_show(struct seq_file *seq, void *v) | ||
674 | { | ||
675 | struct netdev_hw_addr *ha; | ||
676 | struct net_device *dev = v; | ||
677 | |||
678 | if (v == SEQ_START_TOKEN) | ||
679 | return 0; | ||
680 | |||
681 | netif_addr_lock_bh(dev); | ||
682 | netdev_for_each_mc_addr(ha, dev) { | ||
683 | int i; | ||
684 | |||
685 | seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, | ||
686 | dev->name, ha->refcount, ha->global_use); | ||
687 | |||
688 | for (i = 0; i < dev->addr_len; i++) | ||
689 | seq_printf(seq, "%02x", ha->addr[i]); | ||
690 | |||
691 | seq_putc(seq, '\n'); | ||
692 | } | ||
693 | netif_addr_unlock_bh(dev); | ||
694 | return 0; | ||
695 | } | ||
696 | |||
697 | static const struct seq_operations dev_mc_seq_ops = { | ||
698 | .start = dev_seq_start, | ||
699 | .next = dev_seq_next, | ||
700 | .stop = dev_seq_stop, | ||
701 | .show = dev_mc_seq_show, | ||
702 | }; | ||
703 | |||
704 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | ||
705 | { | ||
706 | return seq_open_net(inode, file, &dev_mc_seq_ops, | ||
707 | sizeof(struct seq_net_private)); | ||
708 | } | ||
709 | |||
710 | static const struct file_operations dev_mc_seq_fops = { | ||
711 | .owner = THIS_MODULE, | ||
712 | .open = dev_mc_seq_open, | ||
713 | .read = seq_read, | ||
714 | .llseek = seq_lseek, | ||
715 | .release = seq_release_net, | ||
716 | }; | ||
717 | |||
718 | #endif | ||
719 | |||
720 | static int __net_init dev_mc_net_init(struct net *net) | ||
721 | { | ||
722 | if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) | ||
723 | return -ENOMEM; | ||
724 | return 0; | ||
725 | } | ||
726 | |||
727 | static void __net_exit dev_mc_net_exit(struct net *net) | ||
728 | { | ||
729 | proc_net_remove(net, "dev_mcast"); | ||
730 | } | ||
731 | |||
732 | static struct pernet_operations __net_initdata dev_mc_net_ops = { | ||
733 | .init = dev_mc_net_init, | ||
734 | .exit = dev_mc_net_exit, | ||
735 | }; | ||
736 | |||
737 | void __init dev_mcast_init(void) | ||
738 | { | ||
739 | register_pernet_subsys(&dev_mc_net_ops); | ||
740 | } | ||
741 | |||
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c deleted file mode 100644 index 3dc295beb483..000000000000 --- a/net/core/dev_mcast.c +++ /dev/null | |||
@@ -1,232 +0,0 @@ | |||
1 | /* | ||
2 | * Linux NET3: Multicast List maintenance. | ||
3 | * | ||
4 | * Authors: | ||
5 | * Tim Kordas <tjk@nostromo.eeap.cwru.edu> | ||
6 | * Richard Underwood <richard@wuzz.demon.co.uk> | ||
7 | * | ||
8 | * Stir fried together from the IP multicast and CAP patches above | ||
9 | * Alan Cox <alan@lxorguk.ukuu.org.uk> | ||
10 | * | ||
11 | * Fixes: | ||
12 | * Alan Cox : Update the device on a real delete | ||
13 | * rather than any time but... | ||
14 | * Alan Cox : IFF_ALLMULTI support. | ||
15 | * Alan Cox : New format set_multicast_list() calls. | ||
16 | * Gleb Natapov : Remove dev_mc_lock. | ||
17 | * | ||
18 | * This program is free software; you can redistribute it and/or | ||
19 | * modify it under the terms of the GNU General Public License | ||
20 | * as published by the Free Software Foundation; either version | ||
21 | * 2 of the License, or (at your option) any later version. | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <asm/uaccess.h> | ||
26 | #include <asm/system.h> | ||
27 | #include <linux/bitops.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/kernel.h> | ||
30 | #include <linux/string.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/socket.h> | ||
33 | #include <linux/sockios.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/errno.h> | ||
36 | #include <linux/interrupt.h> | ||
37 | #include <linux/if_ether.h> | ||
38 | #include <linux/inet.h> | ||
39 | #include <linux/netdevice.h> | ||
40 | #include <linux/etherdevice.h> | ||
41 | #include <linux/proc_fs.h> | ||
42 | #include <linux/seq_file.h> | ||
43 | #include <linux/init.h> | ||
44 | #include <net/net_namespace.h> | ||
45 | #include <net/ip.h> | ||
46 | #include <net/route.h> | ||
47 | #include <linux/skbuff.h> | ||
48 | #include <net/sock.h> | ||
49 | #include <net/arp.h> | ||
50 | |||
51 | |||
52 | /* | ||
53 | * Device multicast list maintenance. | ||
54 | * | ||
55 | * This is used both by IP and by the user level maintenance functions. | ||
56 | * Unlike BSD we maintain a usage count on a given multicast address so | ||
57 | * that a casual user application can add/delete multicasts used by | ||
58 | * protocols without doing damage to the protocols when it deletes the | ||
59 | * entries. It also helps IP as it tracks overlapping maps. | ||
60 | * | ||
61 | * Device mc lists are changed by bh at least if IPv6 is enabled, | ||
62 | * so that it must be bh protected. | ||
63 | * | ||
64 | * We block accesses to device mc filters with netif_tx_lock. | ||
65 | */ | ||
66 | |||
67 | /* | ||
68 | * Delete a device level multicast | ||
69 | */ | ||
70 | |||
71 | int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) | ||
72 | { | ||
73 | int err; | ||
74 | |||
75 | netif_addr_lock_bh(dev); | ||
76 | err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, | ||
77 | addr, alen, glbl); | ||
78 | if (!err) { | ||
79 | /* | ||
80 | * We have altered the list, so the card | ||
81 | * loaded filter is now wrong. Fix it | ||
82 | */ | ||
83 | |||
84 | __dev_set_rx_mode(dev); | ||
85 | } | ||
86 | netif_addr_unlock_bh(dev); | ||
87 | return err; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * Add a device level multicast | ||
92 | */ | ||
93 | |||
94 | int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) | ||
95 | { | ||
96 | int err; | ||
97 | |||
98 | netif_addr_lock_bh(dev); | ||
99 | if (alen != dev->addr_len) | ||
100 | err = -EINVAL; | ||
101 | else | ||
102 | err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); | ||
103 | if (!err) | ||
104 | __dev_set_rx_mode(dev); | ||
105 | netif_addr_unlock_bh(dev); | ||
106 | return err; | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * dev_mc_sync - Synchronize device's multicast list to another device | ||
111 | * @to: destination device | ||
112 | * @from: source device | ||
113 | * | ||
114 | * Add newly added addresses to the destination device and release | ||
115 | * addresses that have no users left. The source device must be | ||
116 | * locked by netif_tx_lock_bh. | ||
117 | * | ||
118 | * This function is intended to be called from the dev->set_multicast_list | ||
119 | * or dev->set_rx_mode function of layered software devices. | ||
120 | */ | ||
121 | int dev_mc_sync(struct net_device *to, struct net_device *from) | ||
122 | { | ||
123 | int err = 0; | ||
124 | |||
125 | netif_addr_lock_bh(to); | ||
126 | err = __dev_addr_sync(&to->mc_list, &to->mc_count, | ||
127 | &from->mc_list, &from->mc_count); | ||
128 | if (!err) | ||
129 | __dev_set_rx_mode(to); | ||
130 | netif_addr_unlock_bh(to); | ||
131 | |||
132 | return err; | ||
133 | } | ||
134 | EXPORT_SYMBOL(dev_mc_sync); | ||
135 | |||
136 | |||
137 | /** | ||
138 | * dev_mc_unsync - Remove synchronized addresses from the destination | ||
139 | * device | ||
140 | * @to: destination device | ||
141 | * @from: source device | ||
142 | * | ||
143 | * Remove all addresses that were added to the destination device by | ||
144 | * dev_mc_sync(). This function is intended to be called from the | ||
145 | * dev->stop function of layered software devices. | ||
146 | */ | ||
147 | void dev_mc_unsync(struct net_device *to, struct net_device *from) | ||
148 | { | ||
149 | netif_addr_lock_bh(from); | ||
150 | netif_addr_lock(to); | ||
151 | |||
152 | __dev_addr_unsync(&to->mc_list, &to->mc_count, | ||
153 | &from->mc_list, &from->mc_count); | ||
154 | __dev_set_rx_mode(to); | ||
155 | |||
156 | netif_addr_unlock(to); | ||
157 | netif_addr_unlock_bh(from); | ||
158 | } | ||
159 | EXPORT_SYMBOL(dev_mc_unsync); | ||
160 | |||
161 | #ifdef CONFIG_PROC_FS | ||
162 | static int dev_mc_seq_show(struct seq_file *seq, void *v) | ||
163 | { | ||
164 | struct dev_addr_list *m; | ||
165 | struct net_device *dev = v; | ||
166 | |||
167 | if (v == SEQ_START_TOKEN) | ||
168 | return 0; | ||
169 | |||
170 | netif_addr_lock_bh(dev); | ||
171 | for (m = dev->mc_list; m; m = m->next) { | ||
172 | int i; | ||
173 | |||
174 | seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, | ||
175 | dev->name, m->dmi_users, m->dmi_gusers); | ||
176 | |||
177 | for (i = 0; i < m->dmi_addrlen; i++) | ||
178 | seq_printf(seq, "%02x", m->dmi_addr[i]); | ||
179 | |||
180 | seq_putc(seq, '\n'); | ||
181 | } | ||
182 | netif_addr_unlock_bh(dev); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | static const struct seq_operations dev_mc_seq_ops = { | ||
187 | .start = dev_seq_start, | ||
188 | .next = dev_seq_next, | ||
189 | .stop = dev_seq_stop, | ||
190 | .show = dev_mc_seq_show, | ||
191 | }; | ||
192 | |||
193 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | ||
194 | { | ||
195 | return seq_open_net(inode, file, &dev_mc_seq_ops, | ||
196 | sizeof(struct seq_net_private)); | ||
197 | } | ||
198 | |||
199 | static const struct file_operations dev_mc_seq_fops = { | ||
200 | .owner = THIS_MODULE, | ||
201 | .open = dev_mc_seq_open, | ||
202 | .read = seq_read, | ||
203 | .llseek = seq_lseek, | ||
204 | .release = seq_release_net, | ||
205 | }; | ||
206 | |||
207 | #endif | ||
208 | |||
209 | static int __net_init dev_mc_net_init(struct net *net) | ||
210 | { | ||
211 | if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) | ||
212 | return -ENOMEM; | ||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | static void __net_exit dev_mc_net_exit(struct net *net) | ||
217 | { | ||
218 | proc_net_remove(net, "dev_mcast"); | ||
219 | } | ||
220 | |||
221 | static struct pernet_operations __net_initdata dev_mc_net_ops = { | ||
222 | .init = dev_mc_net_init, | ||
223 | .exit = dev_mc_net_exit, | ||
224 | }; | ||
225 | |||
226 | void __init dev_mcast_init(void) | ||
227 | { | ||
228 | register_pernet_subsys(&dev_mc_net_ops); | ||
229 | } | ||
230 | |||
231 | EXPORT_SYMBOL(dev_mc_add); | ||
232 | EXPORT_SYMBOL(dev_mc_delete); | ||
diff --git a/net/core/dst.c b/net/core/dst.c index f307bc18f6a0..9920722cc82b 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
@@ -44,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0); | |||
44 | */ | 44 | */ |
45 | static struct { | 45 | static struct { |
46 | spinlock_t lock; | 46 | spinlock_t lock; |
47 | struct dst_entry *list; | 47 | struct dst_entry *list; |
48 | unsigned long timer_inc; | 48 | unsigned long timer_inc; |
49 | unsigned long timer_expires; | 49 | unsigned long timer_expires; |
50 | } dst_garbage = { | 50 | } dst_garbage = { |
@@ -52,7 +52,7 @@ static struct { | |||
52 | .timer_inc = DST_GC_MAX, | 52 | .timer_inc = DST_GC_MAX, |
53 | }; | 53 | }; |
54 | static void dst_gc_task(struct work_struct *work); | 54 | static void dst_gc_task(struct work_struct *work); |
55 | static void ___dst_free(struct dst_entry * dst); | 55 | static void ___dst_free(struct dst_entry *dst); |
56 | 56 | ||
57 | static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); | 57 | static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); |
58 | 58 | ||
@@ -136,8 +136,8 @@ loop: | |||
136 | } | 136 | } |
137 | expires = dst_garbage.timer_expires; | 137 | expires = dst_garbage.timer_expires; |
138 | /* | 138 | /* |
139 | * if the next desired timer is more than 4 seconds in the future | 139 | * if the next desired timer is more than 4 seconds in the |
140 | * then round the timer to whole seconds | 140 | * future then round the timer to whole seconds |
141 | */ | 141 | */ |
142 | if (expires > 4*HZ) | 142 | if (expires > 4*HZ) |
143 | expires = round_jiffies_relative(expires); | 143 | expires = round_jiffies_relative(expires); |
@@ -152,7 +152,8 @@ loop: | |||
152 | " expires: %lu elapsed: %lu us\n", | 152 | " expires: %lu elapsed: %lu us\n", |
153 | atomic_read(&dst_total), delayed, work_performed, | 153 | atomic_read(&dst_total), delayed, work_performed, |
154 | expires, | 154 | expires, |
155 | elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); | 155 | elapsed.tv_sec * USEC_PER_SEC + |
156 | elapsed.tv_nsec / NSEC_PER_USEC); | ||
156 | #endif | 157 | #endif |
157 | } | 158 | } |
158 | 159 | ||
@@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb) | |||
163 | } | 164 | } |
164 | EXPORT_SYMBOL(dst_discard); | 165 | EXPORT_SYMBOL(dst_discard); |
165 | 166 | ||
166 | void * dst_alloc(struct dst_ops * ops) | 167 | void *dst_alloc(struct dst_ops *ops) |
167 | { | 168 | { |
168 | struct dst_entry * dst; | 169 | struct dst_entry *dst; |
169 | 170 | ||
170 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { | 171 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { |
171 | if (ops->gc(ops)) | 172 | if (ops->gc(ops)) |
@@ -185,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops) | |||
185 | atomic_inc(&ops->entries); | 186 | atomic_inc(&ops->entries); |
186 | return dst; | 187 | return dst; |
187 | } | 188 | } |
189 | EXPORT_SYMBOL(dst_alloc); | ||
188 | 190 | ||
189 | static void ___dst_free(struct dst_entry * dst) | 191 | static void ___dst_free(struct dst_entry *dst) |
190 | { | 192 | { |
191 | /* The first case (dev==NULL) is required, when | 193 | /* The first case (dev==NULL) is required, when |
192 | protocol module is unloaded. | 194 | protocol module is unloaded. |
193 | */ | 195 | */ |
194 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { | 196 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) |
195 | dst->input = dst->output = dst_discard; | 197 | dst->input = dst->output = dst_discard; |
196 | } | ||
197 | dst->obsolete = 2; | 198 | dst->obsolete = 2; |
198 | } | 199 | } |
200 | EXPORT_SYMBOL(__dst_free); | ||
199 | 201 | ||
200 | void __dst_free(struct dst_entry * dst) | 202 | void __dst_free(struct dst_entry *dst) |
201 | { | 203 | { |
202 | spin_lock_bh(&dst_garbage.lock); | 204 | spin_lock_bh(&dst_garbage.lock); |
203 | ___dst_free(dst); | 205 | ___dst_free(dst); |
@@ -262,15 +264,16 @@ again: | |||
262 | } | 264 | } |
263 | return NULL; | 265 | return NULL; |
264 | } | 266 | } |
267 | EXPORT_SYMBOL(dst_destroy); | ||
265 | 268 | ||
266 | void dst_release(struct dst_entry *dst) | 269 | void dst_release(struct dst_entry *dst) |
267 | { | 270 | { |
268 | if (dst) { | 271 | if (dst) { |
269 | int newrefcnt; | 272 | int newrefcnt; |
270 | 273 | ||
271 | smp_mb__before_atomic_dec(); | 274 | smp_mb__before_atomic_dec(); |
272 | newrefcnt = atomic_dec_return(&dst->__refcnt); | 275 | newrefcnt = atomic_dec_return(&dst->__refcnt); |
273 | WARN_ON(newrefcnt < 0); | 276 | WARN_ON(newrefcnt < 0); |
274 | } | 277 | } |
275 | } | 278 | } |
276 | EXPORT_SYMBOL(dst_release); | 279 | EXPORT_SYMBOL(dst_release); |
@@ -283,8 +286,8 @@ EXPORT_SYMBOL(dst_release); | |||
283 | * | 286 | * |
284 | * Commented and originally written by Alexey. | 287 | * Commented and originally written by Alexey. |
285 | */ | 288 | */ |
286 | static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 289 | static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
287 | int unregister) | 290 | int unregister) |
288 | { | 291 | { |
289 | if (dst->ops->ifdown) | 292 | if (dst->ops->ifdown) |
290 | dst->ops->ifdown(dst, dev, unregister); | 293 | dst->ops->ifdown(dst, dev, unregister); |
@@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
306 | } | 309 | } |
307 | } | 310 | } |
308 | 311 | ||
309 | static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) | 312 | static int dst_dev_event(struct notifier_block *this, unsigned long event, |
313 | void *ptr) | ||
310 | { | 314 | { |
311 | struct net_device *dev = ptr; | 315 | struct net_device *dev = ptr; |
312 | struct dst_entry *dst, *last = NULL; | 316 | struct dst_entry *dst, *last = NULL; |
@@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void | |||
329 | last->next = dst; | 333 | last->next = dst; |
330 | else | 334 | else |
331 | dst_busy_list = dst; | 335 | dst_busy_list = dst; |
332 | for (; dst; dst = dst->next) { | 336 | for (; dst; dst = dst->next) |
333 | dst_ifdown(dst, dev, event != NETDEV_DOWN); | 337 | dst_ifdown(dst, dev, event != NETDEV_DOWN); |
334 | } | ||
335 | mutex_unlock(&dst_gc_mutex); | 338 | mutex_unlock(&dst_gc_mutex); |
336 | break; | 339 | break; |
337 | } | 340 | } |
@@ -346,7 +349,3 @@ void __init dst_init(void) | |||
346 | { | 349 | { |
347 | register_netdevice_notifier(&dst_dev_notifier); | 350 | register_netdevice_notifier(&dst_dev_notifier); |
348 | } | 351 | } |
349 | |||
350 | EXPORT_SYMBOL(__dst_free); | ||
351 | EXPORT_SYMBOL(dst_alloc); | ||
352 | EXPORT_SYMBOL(dst_destroy); | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9d55c57f318a..a0f4964033d2 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -18,8 +18,8 @@ | |||
18 | #include <linux/ethtool.h> | 18 | #include <linux/ethtool.h> |
19 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/uaccess.h> | ||
21 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
22 | #include <asm/uaccess.h> | ||
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Some useful ethtool_ops methods that're device independent. | 25 | * Some useful ethtool_ops methods that're device independent. |
@@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
31 | { | 31 | { |
32 | return netif_carrier_ok(dev) ? 1 : 0; | 32 | return netif_carrier_ok(dev) ? 1 : 0; |
33 | } | 33 | } |
34 | EXPORT_SYMBOL(ethtool_op_get_link); | ||
34 | 35 | ||
35 | u32 ethtool_op_get_rx_csum(struct net_device *dev) | 36 | u32 ethtool_op_get_rx_csum(struct net_device *dev) |
36 | { | 37 | { |
@@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | |||
63 | 64 | ||
64 | return 0; | 65 | return 0; |
65 | } | 66 | } |
67 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
66 | 68 | ||
67 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | 69 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) |
68 | { | 70 | { |
@@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | |||
73 | 75 | ||
74 | return 0; | 76 | return 0; |
75 | } | 77 | } |
78 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
76 | 79 | ||
77 | u32 ethtool_op_get_sg(struct net_device *dev) | 80 | u32 ethtool_op_get_sg(struct net_device *dev) |
78 | { | 81 | { |
79 | return (dev->features & NETIF_F_SG) != 0; | 82 | return (dev->features & NETIF_F_SG) != 0; |
80 | } | 83 | } |
84 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
81 | 85 | ||
82 | int ethtool_op_set_sg(struct net_device *dev, u32 data) | 86 | int ethtool_op_set_sg(struct net_device *dev, u32 data) |
83 | { | 87 | { |
@@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data) | |||
88 | 92 | ||
89 | return 0; | 93 | return 0; |
90 | } | 94 | } |
95 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
91 | 96 | ||
92 | u32 ethtool_op_get_tso(struct net_device *dev) | 97 | u32 ethtool_op_get_tso(struct net_device *dev) |
93 | { | 98 | { |
94 | return (dev->features & NETIF_F_TSO) != 0; | 99 | return (dev->features & NETIF_F_TSO) != 0; |
95 | } | 100 | } |
101 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
96 | 102 | ||
97 | int ethtool_op_set_tso(struct net_device *dev, u32 data) | 103 | int ethtool_op_set_tso(struct net_device *dev, u32 data) |
98 | { | 104 | { |
@@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) | |||
103 | 109 | ||
104 | return 0; | 110 | return 0; |
105 | } | 111 | } |
112 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
106 | 113 | ||
107 | u32 ethtool_op_get_ufo(struct net_device *dev) | 114 | u32 ethtool_op_get_ufo(struct net_device *dev) |
108 | { | 115 | { |
109 | return (dev->features & NETIF_F_UFO) != 0; | 116 | return (dev->features & NETIF_F_UFO) != 0; |
110 | } | 117 | } |
118 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
111 | 119 | ||
112 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) | 120 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) |
113 | { | 121 | { |
@@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) | |||
117 | dev->features &= ~NETIF_F_UFO; | 125 | dev->features &= ~NETIF_F_UFO; |
118 | return 0; | 126 | return 0; |
119 | } | 127 | } |
128 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
120 | 129 | ||
121 | /* the following list of flags are the same as their associated | 130 | /* the following list of flags are the same as their associated |
122 | * NETIF_F_xxx values in include/linux/netdevice.h | 131 | * NETIF_F_xxx values in include/linux/netdevice.h |
123 | */ | 132 | */ |
124 | static const u32 flags_dup_features = | 133 | static const u32 flags_dup_features = |
125 | (ETH_FLAG_LRO | ETH_FLAG_NTUPLE); | 134 | (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); |
126 | 135 | ||
127 | u32 ethtool_op_get_flags(struct net_device *dev) | 136 | u32 ethtool_op_get_flags(struct net_device *dev) |
128 | { | 137 | { |
@@ -133,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev) | |||
133 | 142 | ||
134 | return dev->features & flags_dup_features; | 143 | return dev->features & flags_dup_features; |
135 | } | 144 | } |
145 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
136 | 146 | ||
137 | int ethtool_op_set_flags(struct net_device *dev, u32 data) | 147 | int ethtool_op_set_flags(struct net_device *dev, u32 data) |
138 | { | 148 | { |
@@ -153,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data) | |||
153 | features &= ~NETIF_F_NTUPLE; | 163 | features &= ~NETIF_F_NTUPLE; |
154 | } | 164 | } |
155 | 165 | ||
166 | if (data & ETH_FLAG_RXHASH) | ||
167 | features |= NETIF_F_RXHASH; | ||
168 | else | ||
169 | features &= ~NETIF_F_RXHASH; | ||
170 | |||
156 | dev->features = features; | 171 | dev->features = features; |
157 | return 0; | 172 | return 0; |
158 | } | 173 | } |
174 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
159 | 175 | ||
160 | void ethtool_ntuple_flush(struct net_device *dev) | 176 | void ethtool_ntuple_flush(struct net_device *dev) |
161 | { | 177 | { |
@@ -201,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) | |||
201 | return dev->ethtool_ops->set_settings(dev, &cmd); | 217 | return dev->ethtool_ops->set_settings(dev, &cmd); |
202 | } | 218 | } |
203 | 219 | ||
204 | static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) | 220 | static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, |
221 | void __user *useraddr) | ||
205 | { | 222 | { |
206 | struct ethtool_drvinfo info; | 223 | struct ethtool_drvinfo info; |
207 | const struct ethtool_ops *ops = dev->ethtool_ops; | 224 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -241,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _ | |||
241 | } | 258 | } |
242 | 259 | ||
243 | static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, | 260 | static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, |
244 | void __user *useraddr) | 261 | void __user *useraddr) |
245 | { | 262 | { |
246 | struct ethtool_sset_info info; | 263 | struct ethtool_sset_info info; |
247 | const struct ethtool_ops *ops = dev->ethtool_ops; | 264 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -300,7 +317,8 @@ out: | |||
300 | return ret; | 317 | return ret; |
301 | } | 318 | } |
302 | 319 | ||
303 | static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) | 320 | static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, |
321 | void __user *useraddr) | ||
304 | { | 322 | { |
305 | struct ethtool_rxnfc cmd; | 323 | struct ethtool_rxnfc cmd; |
306 | 324 | ||
@@ -313,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u | |||
313 | return dev->ethtool_ops->set_rxnfc(dev, &cmd); | 331 | return dev->ethtool_ops->set_rxnfc(dev, &cmd); |
314 | } | 332 | } |
315 | 333 | ||
316 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) | 334 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, |
335 | void __user *useraddr) | ||
317 | { | 336 | { |
318 | struct ethtool_rxnfc info; | 337 | struct ethtool_rxnfc info; |
319 | const struct ethtool_ops *ops = dev->ethtool_ops; | 338 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -358,8 +377,8 @@ err_out: | |||
358 | } | 377 | } |
359 | 378 | ||
360 | static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, | 379 | static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, |
361 | struct ethtool_rx_ntuple_flow_spec *spec, | 380 | struct ethtool_rx_ntuple_flow_spec *spec, |
362 | struct ethtool_rx_ntuple_flow_spec_container *fsc) | 381 | struct ethtool_rx_ntuple_flow_spec_container *fsc) |
363 | { | 382 | { |
364 | 383 | ||
365 | /* don't add filters forever */ | 384 | /* don't add filters forever */ |
@@ -385,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, | |||
385 | list->count++; | 404 | list->count++; |
386 | } | 405 | } |
387 | 406 | ||
388 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) | 407 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, |
408 | void __user *useraddr) | ||
389 | { | 409 | { |
390 | struct ethtool_rx_ntuple cmd; | 410 | struct ethtool_rx_ntuple cmd; |
391 | const struct ethtool_ops *ops = dev->ethtool_ops; | 411 | const struct ethtool_ops *ops = dev->ethtool_ops; |
@@ -502,7 +522,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
502 | p += ETH_GSTRING_LEN; | 522 | p += ETH_GSTRING_LEN; |
503 | num_strings++; | 523 | num_strings++; |
504 | goto unknown_filter; | 524 | goto unknown_filter; |
505 | }; | 525 | } |
506 | 526 | ||
507 | /* now the rest of the filters */ | 527 | /* now the rest of the filters */ |
508 | switch (fsc->fs.flow_type) { | 528 | switch (fsc->fs.flow_type) { |
@@ -510,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
510 | case UDP_V4_FLOW: | 530 | case UDP_V4_FLOW: |
511 | case SCTP_V4_FLOW: | 531 | case SCTP_V4_FLOW: |
512 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 532 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
513 | fsc->fs.h_u.tcp_ip4_spec.ip4src); | 533 | fsc->fs.h_u.tcp_ip4_spec.ip4src); |
514 | p += ETH_GSTRING_LEN; | 534 | p += ETH_GSTRING_LEN; |
515 | num_strings++; | 535 | num_strings++; |
516 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 536 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
517 | fsc->fs.m_u.tcp_ip4_spec.ip4src); | 537 | fsc->fs.m_u.tcp_ip4_spec.ip4src); |
518 | p += ETH_GSTRING_LEN; | 538 | p += ETH_GSTRING_LEN; |
519 | num_strings++; | 539 | num_strings++; |
520 | sprintf(p, "\tDest IP addr: 0x%x\n", | 540 | sprintf(p, "\tDest IP addr: 0x%x\n", |
521 | fsc->fs.h_u.tcp_ip4_spec.ip4dst); | 541 | fsc->fs.h_u.tcp_ip4_spec.ip4dst); |
522 | p += ETH_GSTRING_LEN; | 542 | p += ETH_GSTRING_LEN; |
523 | num_strings++; | 543 | num_strings++; |
524 | sprintf(p, "\tDest IP mask: 0x%x\n", | 544 | sprintf(p, "\tDest IP mask: 0x%x\n", |
525 | fsc->fs.m_u.tcp_ip4_spec.ip4dst); | 545 | fsc->fs.m_u.tcp_ip4_spec.ip4dst); |
526 | p += ETH_GSTRING_LEN; | 546 | p += ETH_GSTRING_LEN; |
527 | num_strings++; | 547 | num_strings++; |
528 | sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", | 548 | sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", |
529 | fsc->fs.h_u.tcp_ip4_spec.psrc, | 549 | fsc->fs.h_u.tcp_ip4_spec.psrc, |
530 | fsc->fs.m_u.tcp_ip4_spec.psrc); | 550 | fsc->fs.m_u.tcp_ip4_spec.psrc); |
531 | p += ETH_GSTRING_LEN; | 551 | p += ETH_GSTRING_LEN; |
532 | num_strings++; | 552 | num_strings++; |
533 | sprintf(p, "\tDest Port: %d, mask: 0x%x\n", | 553 | sprintf(p, "\tDest Port: %d, mask: 0x%x\n", |
534 | fsc->fs.h_u.tcp_ip4_spec.pdst, | 554 | fsc->fs.h_u.tcp_ip4_spec.pdst, |
535 | fsc->fs.m_u.tcp_ip4_spec.pdst); | 555 | fsc->fs.m_u.tcp_ip4_spec.pdst); |
536 | p += ETH_GSTRING_LEN; | 556 | p += ETH_GSTRING_LEN; |
537 | num_strings++; | 557 | num_strings++; |
538 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 558 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
539 | fsc->fs.h_u.tcp_ip4_spec.tos, | 559 | fsc->fs.h_u.tcp_ip4_spec.tos, |
540 | fsc->fs.m_u.tcp_ip4_spec.tos); | 560 | fsc->fs.m_u.tcp_ip4_spec.tos); |
541 | p += ETH_GSTRING_LEN; | 561 | p += ETH_GSTRING_LEN; |
542 | num_strings++; | 562 | num_strings++; |
543 | break; | 563 | break; |
544 | case AH_ESP_V4_FLOW: | 564 | case AH_ESP_V4_FLOW: |
545 | case ESP_V4_FLOW: | 565 | case ESP_V4_FLOW: |
546 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 566 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
547 | fsc->fs.h_u.ah_ip4_spec.ip4src); | 567 | fsc->fs.h_u.ah_ip4_spec.ip4src); |
548 | p += ETH_GSTRING_LEN; | 568 | p += ETH_GSTRING_LEN; |
549 | num_strings++; | 569 | num_strings++; |
550 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 570 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
551 | fsc->fs.m_u.ah_ip4_spec.ip4src); | 571 | fsc->fs.m_u.ah_ip4_spec.ip4src); |
552 | p += ETH_GSTRING_LEN; | 572 | p += ETH_GSTRING_LEN; |
553 | num_strings++; | 573 | num_strings++; |
554 | sprintf(p, "\tDest IP addr: 0x%x\n", | 574 | sprintf(p, "\tDest IP addr: 0x%x\n", |
555 | fsc->fs.h_u.ah_ip4_spec.ip4dst); | 575 | fsc->fs.h_u.ah_ip4_spec.ip4dst); |
556 | p += ETH_GSTRING_LEN; | 576 | p += ETH_GSTRING_LEN; |
557 | num_strings++; | 577 | num_strings++; |
558 | sprintf(p, "\tDest IP mask: 0x%x\n", | 578 | sprintf(p, "\tDest IP mask: 0x%x\n", |
559 | fsc->fs.m_u.ah_ip4_spec.ip4dst); | 579 | fsc->fs.m_u.ah_ip4_spec.ip4dst); |
560 | p += ETH_GSTRING_LEN; | 580 | p += ETH_GSTRING_LEN; |
561 | num_strings++; | 581 | num_strings++; |
562 | sprintf(p, "\tSPI: %d, mask: 0x%x\n", | 582 | sprintf(p, "\tSPI: %d, mask: 0x%x\n", |
563 | fsc->fs.h_u.ah_ip4_spec.spi, | 583 | fsc->fs.h_u.ah_ip4_spec.spi, |
564 | fsc->fs.m_u.ah_ip4_spec.spi); | 584 | fsc->fs.m_u.ah_ip4_spec.spi); |
565 | p += ETH_GSTRING_LEN; | 585 | p += ETH_GSTRING_LEN; |
566 | num_strings++; | 586 | num_strings++; |
567 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 587 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
568 | fsc->fs.h_u.ah_ip4_spec.tos, | 588 | fsc->fs.h_u.ah_ip4_spec.tos, |
569 | fsc->fs.m_u.ah_ip4_spec.tos); | 589 | fsc->fs.m_u.ah_ip4_spec.tos); |
570 | p += ETH_GSTRING_LEN; | 590 | p += ETH_GSTRING_LEN; |
571 | num_strings++; | 591 | num_strings++; |
572 | break; | 592 | break; |
573 | case IP_USER_FLOW: | 593 | case IP_USER_FLOW: |
574 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 594 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
575 | fsc->fs.h_u.raw_ip4_spec.ip4src); | 595 | fsc->fs.h_u.raw_ip4_spec.ip4src); |
576 | p += ETH_GSTRING_LEN; | 596 | p += ETH_GSTRING_LEN; |
577 | num_strings++; | 597 | num_strings++; |
578 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 598 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
579 | fsc->fs.m_u.raw_ip4_spec.ip4src); | 599 | fsc->fs.m_u.raw_ip4_spec.ip4src); |
580 | p += ETH_GSTRING_LEN; | 600 | p += ETH_GSTRING_LEN; |
581 | num_strings++; | 601 | num_strings++; |
582 | sprintf(p, "\tDest IP addr: 0x%x\n", | 602 | sprintf(p, "\tDest IP addr: 0x%x\n", |
583 | fsc->fs.h_u.raw_ip4_spec.ip4dst); | 603 | fsc->fs.h_u.raw_ip4_spec.ip4dst); |
584 | p += ETH_GSTRING_LEN; | 604 | p += ETH_GSTRING_LEN; |
585 | num_strings++; | 605 | num_strings++; |
586 | sprintf(p, "\tDest IP mask: 0x%x\n", | 606 | sprintf(p, "\tDest IP mask: 0x%x\n", |
587 | fsc->fs.m_u.raw_ip4_spec.ip4dst); | 607 | fsc->fs.m_u.raw_ip4_spec.ip4dst); |
588 | p += ETH_GSTRING_LEN; | 608 | p += ETH_GSTRING_LEN; |
589 | num_strings++; | 609 | num_strings++; |
590 | break; | 610 | break; |
591 | case IPV4_FLOW: | 611 | case IPV4_FLOW: |
592 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 612 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
593 | fsc->fs.h_u.usr_ip4_spec.ip4src); | 613 | fsc->fs.h_u.usr_ip4_spec.ip4src); |
594 | p += ETH_GSTRING_LEN; | 614 | p += ETH_GSTRING_LEN; |
595 | num_strings++; | 615 | num_strings++; |
596 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 616 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
597 | fsc->fs.m_u.usr_ip4_spec.ip4src); | 617 | fsc->fs.m_u.usr_ip4_spec.ip4src); |
598 | p += ETH_GSTRING_LEN; | 618 | p += ETH_GSTRING_LEN; |
599 | num_strings++; | 619 | num_strings++; |
600 | sprintf(p, "\tDest IP addr: 0x%x\n", | 620 | sprintf(p, "\tDest IP addr: 0x%x\n", |
601 | fsc->fs.h_u.usr_ip4_spec.ip4dst); | 621 | fsc->fs.h_u.usr_ip4_spec.ip4dst); |
602 | p += ETH_GSTRING_LEN; | 622 | p += ETH_GSTRING_LEN; |
603 | num_strings++; | 623 | num_strings++; |
604 | sprintf(p, "\tDest IP mask: 0x%x\n", | 624 | sprintf(p, "\tDest IP mask: 0x%x\n", |
605 | fsc->fs.m_u.usr_ip4_spec.ip4dst); | 625 | fsc->fs.m_u.usr_ip4_spec.ip4dst); |
606 | p += ETH_GSTRING_LEN; | 626 | p += ETH_GSTRING_LEN; |
607 | num_strings++; | 627 | num_strings++; |
608 | sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", | 628 | sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", |
609 | fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, | 629 | fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, |
610 | fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); | 630 | fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); |
611 | p += ETH_GSTRING_LEN; | 631 | p += ETH_GSTRING_LEN; |
612 | num_strings++; | 632 | num_strings++; |
613 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 633 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
614 | fsc->fs.h_u.usr_ip4_spec.tos, | 634 | fsc->fs.h_u.usr_ip4_spec.tos, |
615 | fsc->fs.m_u.usr_ip4_spec.tos); | 635 | fsc->fs.m_u.usr_ip4_spec.tos); |
616 | p += ETH_GSTRING_LEN; | 636 | p += ETH_GSTRING_LEN; |
617 | num_strings++; | 637 | num_strings++; |
618 | sprintf(p, "\tIP Version: %d, mask: 0x%x\n", | 638 | sprintf(p, "\tIP Version: %d, mask: 0x%x\n", |
619 | fsc->fs.h_u.usr_ip4_spec.ip_ver, | 639 | fsc->fs.h_u.usr_ip4_spec.ip_ver, |
620 | fsc->fs.m_u.usr_ip4_spec.ip_ver); | 640 | fsc->fs.m_u.usr_ip4_spec.ip_ver); |
621 | p += ETH_GSTRING_LEN; | 641 | p += ETH_GSTRING_LEN; |
622 | num_strings++; | 642 | num_strings++; |
623 | sprintf(p, "\tProtocol: %d, mask: 0x%x\n", | 643 | sprintf(p, "\tProtocol: %d, mask: 0x%x\n", |
624 | fsc->fs.h_u.usr_ip4_spec.proto, | 644 | fsc->fs.h_u.usr_ip4_spec.proto, |
625 | fsc->fs.m_u.usr_ip4_spec.proto); | 645 | fsc->fs.m_u.usr_ip4_spec.proto); |
626 | p += ETH_GSTRING_LEN; | 646 | p += ETH_GSTRING_LEN; |
627 | num_strings++; | 647 | num_strings++; |
628 | break; | 648 | break; |
629 | }; | 649 | } |
630 | sprintf(p, "\tVLAN: %d, mask: 0x%x\n", | 650 | sprintf(p, "\tVLAN: %d, mask: 0x%x\n", |
631 | fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); | 651 | fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); |
632 | p += ETH_GSTRING_LEN; | 652 | p += ETH_GSTRING_LEN; |
633 | num_strings++; | 653 | num_strings++; |
634 | sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); | 654 | sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); |
@@ -641,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
641 | sprintf(p, "\tAction: Drop\n"); | 661 | sprintf(p, "\tAction: Drop\n"); |
642 | else | 662 | else |
643 | sprintf(p, "\tAction: Direct to queue %d\n", | 663 | sprintf(p, "\tAction: Direct to queue %d\n", |
644 | fsc->fs.action); | 664 | fsc->fs.action); |
645 | p += ETH_GSTRING_LEN; | 665 | p += ETH_GSTRING_LEN; |
646 | num_strings++; | 666 | num_strings++; |
647 | unknown_filter: | 667 | unknown_filter: |
@@ -853,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) | |||
853 | return ret; | 873 | return ret; |
854 | } | 874 | } |
855 | 875 | ||
856 | static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) | 876 | static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, |
877 | void __user *useraddr) | ||
857 | { | 878 | { |
858 | struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; | 879 | struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; |
859 | 880 | ||
@@ -867,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void | |||
867 | return 0; | 888 | return 0; |
868 | } | 889 | } |
869 | 890 | ||
870 | static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) | 891 | static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, |
892 | void __user *useraddr) | ||
871 | { | 893 | { |
872 | struct ethtool_coalesce coalesce; | 894 | struct ethtool_coalesce coalesce; |
873 | 895 | ||
@@ -971,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) | |||
971 | 993 | ||
972 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); | 994 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); |
973 | } | 995 | } |
996 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
974 | 997 | ||
975 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) | 998 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) |
976 | { | 999 | { |
@@ -1042,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) | |||
1042 | 1065 | ||
1043 | edata.data = dev->features & NETIF_F_GSO; | 1066 | edata.data = dev->features & NETIF_F_GSO; |
1044 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 1067 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
1045 | return -EFAULT; | 1068 | return -EFAULT; |
1046 | return 0; | 1069 | return 0; |
1047 | } | 1070 | } |
1048 | 1071 | ||
@@ -1065,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) | |||
1065 | 1088 | ||
1066 | edata.data = dev->features & NETIF_F_GRO; | 1089 | edata.data = dev->features & NETIF_F_GRO; |
1067 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 1090 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
1068 | return -EFAULT; | 1091 | return -EFAULT; |
1069 | return 0; | 1092 | return 0; |
1070 | } | 1093 | } |
1071 | 1094 | ||
@@ -1277,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, | |||
1277 | return actor(dev, edata.data); | 1300 | return actor(dev, edata.data); |
1278 | } | 1301 | } |
1279 | 1302 | ||
1280 | static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) | 1303 | static noinline_for_stack int ethtool_flash_device(struct net_device *dev, |
1304 | char __user *useraddr) | ||
1281 | { | 1305 | { |
1282 | struct ethtool_flash efl; | 1306 | struct ethtool_flash efl; |
1283 | 1307 | ||
@@ -1306,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1306 | if (!dev->ethtool_ops) | 1330 | if (!dev->ethtool_ops) |
1307 | return -EOPNOTSUPP; | 1331 | return -EOPNOTSUPP; |
1308 | 1332 | ||
1309 | if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) | 1333 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) |
1310 | return -EFAULT; | 1334 | return -EFAULT; |
1311 | 1335 | ||
1312 | /* Allow some commands to be done by anyone */ | 1336 | /* Allow some commands to be done by anyone */ |
1313 | switch(ethcmd) { | 1337 | switch (ethcmd) { |
1314 | case ETHTOOL_GDRVINFO: | 1338 | case ETHTOOL_GDRVINFO: |
1315 | case ETHTOOL_GMSGLVL: | 1339 | case ETHTOOL_GMSGLVL: |
1316 | case ETHTOOL_GCOALESCE: | 1340 | case ETHTOOL_GCOALESCE: |
@@ -1338,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1338 | return -EPERM; | 1362 | return -EPERM; |
1339 | } | 1363 | } |
1340 | 1364 | ||
1341 | if (dev->ethtool_ops->begin) | 1365 | if (dev->ethtool_ops->begin) { |
1342 | if ((rc = dev->ethtool_ops->begin(dev)) < 0) | 1366 | rc = dev->ethtool_ops->begin(dev); |
1367 | if (rc < 0) | ||
1343 | return rc; | 1368 | return rc; |
1344 | 1369 | } | |
1345 | old_features = dev->features; | 1370 | old_features = dev->features; |
1346 | 1371 | ||
1347 | switch (ethcmd) { | 1372 | switch (ethcmd) { |
@@ -1531,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1531 | 1556 | ||
1532 | return rc; | 1557 | return rc; |
1533 | } | 1558 | } |
1534 | |||
1535 | EXPORT_SYMBOL(ethtool_op_get_link); | ||
1536 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
1537 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
1538 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
1539 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
1540 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
1541 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
1542 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
1543 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
1544 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
1545 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
1546 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d2c3e7dc2e5f..42e84e08a1be 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops, | |||
39 | } | 39 | } |
40 | EXPORT_SYMBOL(fib_default_rule_add); | 40 | EXPORT_SYMBOL(fib_default_rule_add); |
41 | 41 | ||
42 | u32 fib_default_rule_pref(struct fib_rules_ops *ops) | ||
43 | { | ||
44 | struct list_head *pos; | ||
45 | struct fib_rule *rule; | ||
46 | |||
47 | if (!list_empty(&ops->rules_list)) { | ||
48 | pos = ops->rules_list.next; | ||
49 | if (pos->next != &ops->rules_list) { | ||
50 | rule = list_entry(pos->next, struct fib_rule, list); | ||
51 | if (rule->pref) | ||
52 | return rule->pref - 1; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | return 0; | ||
57 | } | ||
58 | EXPORT_SYMBOL(fib_default_rule_pref); | ||
59 | |||
42 | static void notify_rule_change(int event, struct fib_rule *rule, | 60 | static void notify_rule_change(int event, struct fib_rule *rule, |
43 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, | 61 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, |
44 | u32 pid); | 62 | u32 pid); |
@@ -104,12 +122,12 @@ errout: | |||
104 | } | 122 | } |
105 | 123 | ||
106 | struct fib_rules_ops * | 124 | struct fib_rules_ops * |
107 | fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) | 125 | fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) |
108 | { | 126 | { |
109 | struct fib_rules_ops *ops; | 127 | struct fib_rules_ops *ops; |
110 | int err; | 128 | int err; |
111 | 129 | ||
112 | ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); | 130 | ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); |
113 | if (ops == NULL) | 131 | if (ops == NULL) |
114 | return ERR_PTR(-ENOMEM); | 132 | return ERR_PTR(-ENOMEM); |
115 | 133 | ||
@@ -124,7 +142,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) | |||
124 | 142 | ||
125 | return ops; | 143 | return ops; |
126 | } | 144 | } |
127 | |||
128 | EXPORT_SYMBOL_GPL(fib_rules_register); | 145 | EXPORT_SYMBOL_GPL(fib_rules_register); |
129 | 146 | ||
130 | void fib_rules_cleanup_ops(struct fib_rules_ops *ops) | 147 | void fib_rules_cleanup_ops(struct fib_rules_ops *ops) |
@@ -158,7 +175,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops) | |||
158 | 175 | ||
159 | call_rcu(&ops->rcu, fib_rules_put_rcu); | 176 | call_rcu(&ops->rcu, fib_rules_put_rcu); |
160 | } | 177 | } |
161 | |||
162 | EXPORT_SYMBOL_GPL(fib_rules_unregister); | 178 | EXPORT_SYMBOL_GPL(fib_rules_unregister); |
163 | 179 | ||
164 | static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, | 180 | static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, |
@@ -221,7 +237,6 @@ out: | |||
221 | 237 | ||
222 | return err; | 238 | return err; |
223 | } | 239 | } |
224 | |||
225 | EXPORT_SYMBOL_GPL(fib_rules_lookup); | 240 | EXPORT_SYMBOL_GPL(fib_rules_lookup); |
226 | 241 | ||
227 | static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, | 242 | static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, |
@@ -520,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, | |||
520 | return -EMSGSIZE; | 535 | return -EMSGSIZE; |
521 | 536 | ||
522 | frh = nlmsg_data(nlh); | 537 | frh = nlmsg_data(nlh); |
538 | frh->family = ops->family; | ||
523 | frh->table = rule->table; | 539 | frh->table = rule->table; |
524 | NLA_PUT_U32(skb, FRA_TABLE, rule->table); | 540 | NLA_PUT_U32(skb, FRA_TABLE, rule->table); |
525 | frh->res1 = 0; | 541 | frh->res1 = 0; |
@@ -614,7 +630,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) | |||
614 | break; | 630 | break; |
615 | 631 | ||
616 | cb->args[1] = 0; | 632 | cb->args[1] = 0; |
617 | skip: | 633 | skip: |
618 | idx++; | 634 | idx++; |
619 | } | 635 | } |
620 | rcu_read_unlock(); | 636 | rcu_read_unlock(); |
@@ -686,7 +702,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
686 | struct fib_rules_ops *ops; | 702 | struct fib_rules_ops *ops; |
687 | 703 | ||
688 | ASSERT_RTNL(); | 704 | ASSERT_RTNL(); |
689 | rcu_read_lock(); | ||
690 | 705 | ||
691 | switch (event) { | 706 | switch (event) { |
692 | case NETDEV_REGISTER: | 707 | case NETDEV_REGISTER: |
@@ -700,8 +715,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
700 | break; | 715 | break; |
701 | } | 716 | } |
702 | 717 | ||
703 | rcu_read_unlock(); | ||
704 | |||
705 | return NOTIFY_DONE; | 718 | return NOTIFY_DONE; |
706 | } | 719 | } |
707 | 720 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index ff943bed21af..da69fb728d32 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
@@ -302,6 +302,8 @@ load_b: | |||
302 | A = skb->pkt_type; | 302 | A = skb->pkt_type; |
303 | continue; | 303 | continue; |
304 | case SKF_AD_IFINDEX: | 304 | case SKF_AD_IFINDEX: |
305 | if (!skb->dev) | ||
306 | return 0; | ||
305 | A = skb->dev->ifindex; | 307 | A = skb->dev->ifindex; |
306 | continue; | 308 | continue; |
307 | case SKF_AD_MARK: | 309 | case SKF_AD_MARK: |
@@ -310,6 +312,11 @@ load_b: | |||
310 | case SKF_AD_QUEUE: | 312 | case SKF_AD_QUEUE: |
311 | A = skb->queue_mapping; | 313 | A = skb->queue_mapping; |
312 | continue; | 314 | continue; |
315 | case SKF_AD_HATYPE: | ||
316 | if (!skb->dev) | ||
317 | return 0; | ||
318 | A = skb->dev->type; | ||
319 | continue; | ||
313 | case SKF_AD_NLATTR: { | 320 | case SKF_AD_NLATTR: { |
314 | struct nlattr *nla; | 321 | struct nlattr *nla; |
315 | 322 | ||
diff --git a/net/core/flow.c b/net/core/flow.c index 96015871ecea..161900674009 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
@@ -26,113 +26,158 @@ | |||
26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
27 | 27 | ||
28 | struct flow_cache_entry { | 28 | struct flow_cache_entry { |
29 | struct flow_cache_entry *next; | 29 | union { |
30 | u16 family; | 30 | struct hlist_node hlist; |
31 | u8 dir; | 31 | struct list_head gc_list; |
32 | u32 genid; | 32 | } u; |
33 | struct flowi key; | 33 | u16 family; |
34 | void *object; | 34 | u8 dir; |
35 | atomic_t *object_ref; | 35 | u32 genid; |
36 | struct flowi key; | ||
37 | struct flow_cache_object *object; | ||
36 | }; | 38 | }; |
37 | 39 | ||
38 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | 40 | struct flow_cache_percpu { |
39 | 41 | struct hlist_head *hash_table; | |
40 | static u32 flow_hash_shift; | 42 | int hash_count; |
41 | #define flow_hash_size (1 << flow_hash_shift) | 43 | u32 hash_rnd; |
42 | static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; | 44 | int hash_rnd_recalc; |
43 | 45 | struct tasklet_struct flush_tasklet; | |
44 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) | 46 | }; |
45 | |||
46 | static struct kmem_cache *flow_cachep __read_mostly; | ||
47 | 47 | ||
48 | static int flow_lwm, flow_hwm; | 48 | struct flow_flush_info { |
49 | struct flow_cache *cache; | ||
50 | atomic_t cpuleft; | ||
51 | struct completion completion; | ||
52 | }; | ||
49 | 53 | ||
50 | struct flow_percpu_info { | 54 | struct flow_cache { |
51 | int hash_rnd_recalc; | 55 | u32 hash_shift; |
52 | u32 hash_rnd; | 56 | unsigned long order; |
53 | int count; | 57 | struct flow_cache_percpu *percpu; |
58 | struct notifier_block hotcpu_notifier; | ||
59 | int low_watermark; | ||
60 | int high_watermark; | ||
61 | struct timer_list rnd_timer; | ||
54 | }; | 62 | }; |
55 | static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; | ||
56 | 63 | ||
57 | #define flow_hash_rnd_recalc(cpu) \ | 64 | atomic_t flow_cache_genid = ATOMIC_INIT(0); |
58 | (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) | 65 | static struct flow_cache flow_cache_global; |
59 | #define flow_hash_rnd(cpu) \ | 66 | static struct kmem_cache *flow_cachep; |
60 | (per_cpu(flow_hash_info, cpu).hash_rnd) | ||
61 | #define flow_count(cpu) \ | ||
62 | (per_cpu(flow_hash_info, cpu).count) | ||
63 | 67 | ||
64 | static struct timer_list flow_hash_rnd_timer; | 68 | static DEFINE_SPINLOCK(flow_cache_gc_lock); |
69 | static LIST_HEAD(flow_cache_gc_list); | ||
65 | 70 | ||
66 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | 71 | #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) |
67 | 72 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | |
68 | struct flow_flush_info { | ||
69 | atomic_t cpuleft; | ||
70 | struct completion completion; | ||
71 | }; | ||
72 | static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL }; | ||
73 | |||
74 | #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) | ||
75 | 73 | ||
76 | static void flow_cache_new_hashrnd(unsigned long arg) | 74 | static void flow_cache_new_hashrnd(unsigned long arg) |
77 | { | 75 | { |
76 | struct flow_cache *fc = (void *) arg; | ||
78 | int i; | 77 | int i; |
79 | 78 | ||
80 | for_each_possible_cpu(i) | 79 | for_each_possible_cpu(i) |
81 | flow_hash_rnd_recalc(i) = 1; | 80 | per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; |
82 | 81 | ||
83 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 82 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
84 | add_timer(&flow_hash_rnd_timer); | 83 | add_timer(&fc->rnd_timer); |
84 | } | ||
85 | |||
86 | static int flow_entry_valid(struct flow_cache_entry *fle) | ||
87 | { | ||
88 | if (atomic_read(&flow_cache_genid) != fle->genid) | ||
89 | return 0; | ||
90 | if (fle->object && !fle->object->ops->check(fle->object)) | ||
91 | return 0; | ||
92 | return 1; | ||
85 | } | 93 | } |
86 | 94 | ||
87 | static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) | 95 | static void flow_entry_kill(struct flow_cache_entry *fle) |
88 | { | 96 | { |
89 | if (fle->object) | 97 | if (fle->object) |
90 | atomic_dec(fle->object_ref); | 98 | fle->object->ops->delete(fle->object); |
91 | kmem_cache_free(flow_cachep, fle); | 99 | kmem_cache_free(flow_cachep, fle); |
92 | flow_count(cpu)--; | ||
93 | } | 100 | } |
94 | 101 | ||
95 | static void __flow_cache_shrink(int cpu, int shrink_to) | 102 | static void flow_cache_gc_task(struct work_struct *work) |
96 | { | 103 | { |
97 | struct flow_cache_entry *fle, **flp; | 104 | struct list_head gc_list; |
98 | int i; | 105 | struct flow_cache_entry *fce, *n; |
99 | 106 | ||
100 | for (i = 0; i < flow_hash_size; i++) { | 107 | INIT_LIST_HEAD(&gc_list); |
101 | int k = 0; | 108 | spin_lock_bh(&flow_cache_gc_lock); |
109 | list_splice_tail_init(&flow_cache_gc_list, &gc_list); | ||
110 | spin_unlock_bh(&flow_cache_gc_lock); | ||
102 | 111 | ||
103 | flp = &flow_table(cpu)[i]; | 112 | list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) |
104 | while ((fle = *flp) != NULL && k < shrink_to) { | 113 | flow_entry_kill(fce); |
105 | k++; | 114 | } |
106 | flp = &fle->next; | 115 | static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); |
107 | } | 116 | |
108 | while ((fle = *flp) != NULL) { | 117 | static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, |
109 | *flp = fle->next; | 118 | int deleted, struct list_head *gc_list) |
110 | flow_entry_kill(cpu, fle); | 119 | { |
111 | } | 120 | if (deleted) { |
121 | fcp->hash_count -= deleted; | ||
122 | spin_lock_bh(&flow_cache_gc_lock); | ||
123 | list_splice_tail(gc_list, &flow_cache_gc_list); | ||
124 | spin_unlock_bh(&flow_cache_gc_lock); | ||
125 | schedule_work(&flow_cache_gc_work); | ||
112 | } | 126 | } |
113 | } | 127 | } |
114 | 128 | ||
115 | static void flow_cache_shrink(int cpu) | 129 | static void __flow_cache_shrink(struct flow_cache *fc, |
130 | struct flow_cache_percpu *fcp, | ||
131 | int shrink_to) | ||
116 | { | 132 | { |
117 | int shrink_to = flow_lwm / flow_hash_size; | 133 | struct flow_cache_entry *fle; |
134 | struct hlist_node *entry, *tmp; | ||
135 | LIST_HEAD(gc_list); | ||
136 | int i, deleted = 0; | ||
137 | |||
138 | for (i = 0; i < flow_cache_hash_size(fc); i++) { | ||
139 | int saved = 0; | ||
140 | |||
141 | hlist_for_each_entry_safe(fle, entry, tmp, | ||
142 | &fcp->hash_table[i], u.hlist) { | ||
143 | if (saved < shrink_to && | ||
144 | flow_entry_valid(fle)) { | ||
145 | saved++; | ||
146 | } else { | ||
147 | deleted++; | ||
148 | hlist_del(&fle->u.hlist); | ||
149 | list_add_tail(&fle->u.gc_list, &gc_list); | ||
150 | } | ||
151 | } | ||
152 | } | ||
118 | 153 | ||
119 | __flow_cache_shrink(cpu, shrink_to); | 154 | flow_cache_queue_garbage(fcp, deleted, &gc_list); |
120 | } | 155 | } |
121 | 156 | ||
122 | static void flow_new_hash_rnd(int cpu) | 157 | static void flow_cache_shrink(struct flow_cache *fc, |
158 | struct flow_cache_percpu *fcp) | ||
123 | { | 159 | { |
124 | get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); | 160 | int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); |
125 | flow_hash_rnd_recalc(cpu) = 0; | ||
126 | 161 | ||
127 | __flow_cache_shrink(cpu, 0); | 162 | __flow_cache_shrink(fc, fcp, shrink_to); |
128 | } | 163 | } |
129 | 164 | ||
130 | static u32 flow_hash_code(struct flowi *key, int cpu) | 165 | static void flow_new_hash_rnd(struct flow_cache *fc, |
166 | struct flow_cache_percpu *fcp) | ||
167 | { | ||
168 | get_random_bytes(&fcp->hash_rnd, sizeof(u32)); | ||
169 | fcp->hash_rnd_recalc = 0; | ||
170 | __flow_cache_shrink(fc, fcp, 0); | ||
171 | } | ||
172 | |||
173 | static u32 flow_hash_code(struct flow_cache *fc, | ||
174 | struct flow_cache_percpu *fcp, | ||
175 | struct flowi *key) | ||
131 | { | 176 | { |
132 | u32 *k = (u32 *) key; | 177 | u32 *k = (u32 *) key; |
133 | 178 | ||
134 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & | 179 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) |
135 | (flow_hash_size - 1)); | 180 | & (flow_cache_hash_size(fc) - 1)); |
136 | } | 181 | } |
137 | 182 | ||
138 | #if (BITS_PER_LONG == 64) | 183 | #if (BITS_PER_LONG == 64) |
@@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
165 | return 0; | 210 | return 0; |
166 | } | 211 | } |
167 | 212 | ||
168 | void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | 213 | struct flow_cache_object * |
169 | flow_resolve_t resolver) | 214 | flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, |
215 | flow_resolve_t resolver, void *ctx) | ||
170 | { | 216 | { |
171 | struct flow_cache_entry *fle, **head; | 217 | struct flow_cache *fc = &flow_cache_global; |
218 | struct flow_cache_percpu *fcp; | ||
219 | struct flow_cache_entry *fle, *tfle; | ||
220 | struct hlist_node *entry; | ||
221 | struct flow_cache_object *flo; | ||
172 | unsigned int hash; | 222 | unsigned int hash; |
173 | int cpu; | ||
174 | 223 | ||
175 | local_bh_disable(); | 224 | local_bh_disable(); |
176 | cpu = smp_processor_id(); | 225 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
177 | 226 | ||
178 | fle = NULL; | 227 | fle = NULL; |
228 | flo = NULL; | ||
179 | /* Packet really early in init? Making flow_cache_init a | 229 | /* Packet really early in init? Making flow_cache_init a |
180 | * pre-smp initcall would solve this. --RR */ | 230 | * pre-smp initcall would solve this. --RR */ |
181 | if (!flow_table(cpu)) | 231 | if (!fcp->hash_table) |
182 | goto nocache; | 232 | goto nocache; |
183 | 233 | ||
184 | if (flow_hash_rnd_recalc(cpu)) | 234 | if (fcp->hash_rnd_recalc) |
185 | flow_new_hash_rnd(cpu); | 235 | flow_new_hash_rnd(fc, fcp); |
186 | hash = flow_hash_code(key, cpu); | ||
187 | 236 | ||
188 | head = &flow_table(cpu)[hash]; | 237 | hash = flow_hash_code(fc, fcp, key); |
189 | for (fle = *head; fle; fle = fle->next) { | 238 | hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { |
190 | if (fle->family == family && | 239 | if (tfle->family == family && |
191 | fle->dir == dir && | 240 | tfle->dir == dir && |
192 | flow_key_compare(key, &fle->key) == 0) { | 241 | flow_key_compare(key, &tfle->key) == 0) { |
193 | if (fle->genid == atomic_read(&flow_cache_genid)) { | 242 | fle = tfle; |
194 | void *ret = fle->object; | ||
195 | |||
196 | if (ret) | ||
197 | atomic_inc(fle->object_ref); | ||
198 | local_bh_enable(); | ||
199 | |||
200 | return ret; | ||
201 | } | ||
202 | break; | 243 | break; |
203 | } | 244 | } |
204 | } | 245 | } |
205 | 246 | ||
206 | if (!fle) { | 247 | if (unlikely(!fle)) { |
207 | if (flow_count(cpu) > flow_hwm) | 248 | if (fcp->hash_count > fc->high_watermark) |
208 | flow_cache_shrink(cpu); | 249 | flow_cache_shrink(fc, fcp); |
209 | 250 | ||
210 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); | 251 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); |
211 | if (fle) { | 252 | if (fle) { |
212 | fle->next = *head; | ||
213 | *head = fle; | ||
214 | fle->family = family; | 253 | fle->family = family; |
215 | fle->dir = dir; | 254 | fle->dir = dir; |
216 | memcpy(&fle->key, key, sizeof(*key)); | 255 | memcpy(&fle->key, key, sizeof(*key)); |
217 | fle->object = NULL; | 256 | fle->object = NULL; |
218 | flow_count(cpu)++; | 257 | hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); |
258 | fcp->hash_count++; | ||
219 | } | 259 | } |
260 | } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { | ||
261 | flo = fle->object; | ||
262 | if (!flo) | ||
263 | goto ret_object; | ||
264 | flo = flo->ops->get(flo); | ||
265 | if (flo) | ||
266 | goto ret_object; | ||
267 | } else if (fle->object) { | ||
268 | flo = fle->object; | ||
269 | flo->ops->delete(flo); | ||
270 | fle->object = NULL; | ||
220 | } | 271 | } |
221 | 272 | ||
222 | nocache: | 273 | nocache: |
223 | { | 274 | flo = NULL; |
224 | int err; | 275 | if (fle) { |
225 | void *obj; | 276 | flo = fle->object; |
226 | atomic_t *obj_ref; | 277 | fle->object = NULL; |
227 | |||
228 | err = resolver(net, key, family, dir, &obj, &obj_ref); | ||
229 | |||
230 | if (fle && !err) { | ||
231 | fle->genid = atomic_read(&flow_cache_genid); | ||
232 | |||
233 | if (fle->object) | ||
234 | atomic_dec(fle->object_ref); | ||
235 | |||
236 | fle->object = obj; | ||
237 | fle->object_ref = obj_ref; | ||
238 | if (obj) | ||
239 | atomic_inc(fle->object_ref); | ||
240 | } | ||
241 | local_bh_enable(); | ||
242 | |||
243 | if (err) | ||
244 | obj = ERR_PTR(err); | ||
245 | return obj; | ||
246 | } | 278 | } |
279 | flo = resolver(net, key, family, dir, flo, ctx); | ||
280 | if (fle) { | ||
281 | fle->genid = atomic_read(&flow_cache_genid); | ||
282 | if (!IS_ERR(flo)) | ||
283 | fle->object = flo; | ||
284 | else | ||
285 | fle->genid--; | ||
286 | } else { | ||
287 | if (flo && !IS_ERR(flo)) | ||
288 | flo->ops->delete(flo); | ||
289 | } | ||
290 | ret_object: | ||
291 | local_bh_enable(); | ||
292 | return flo; | ||
247 | } | 293 | } |
248 | 294 | ||
249 | static void flow_cache_flush_tasklet(unsigned long data) | 295 | static void flow_cache_flush_tasklet(unsigned long data) |
250 | { | 296 | { |
251 | struct flow_flush_info *info = (void *)data; | 297 | struct flow_flush_info *info = (void *)data; |
252 | int i; | 298 | struct flow_cache *fc = info->cache; |
253 | int cpu; | 299 | struct flow_cache_percpu *fcp; |
254 | 300 | struct flow_cache_entry *fle; | |
255 | cpu = smp_processor_id(); | 301 | struct hlist_node *entry, *tmp; |
256 | for (i = 0; i < flow_hash_size; i++) { | 302 | LIST_HEAD(gc_list); |
257 | struct flow_cache_entry *fle; | 303 | int i, deleted = 0; |
258 | 304 | ||
259 | fle = flow_table(cpu)[i]; | 305 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
260 | for (; fle; fle = fle->next) { | 306 | for (i = 0; i < flow_cache_hash_size(fc); i++) { |
261 | unsigned genid = atomic_read(&flow_cache_genid); | 307 | hlist_for_each_entry_safe(fle, entry, tmp, |
262 | 308 | &fcp->hash_table[i], u.hlist) { | |
263 | if (!fle->object || fle->genid == genid) | 309 | if (flow_entry_valid(fle)) |
264 | continue; | 310 | continue; |
265 | 311 | ||
266 | fle->object = NULL; | 312 | deleted++; |
267 | atomic_dec(fle->object_ref); | 313 | hlist_del(&fle->u.hlist); |
314 | list_add_tail(&fle->u.gc_list, &gc_list); | ||
268 | } | 315 | } |
269 | } | 316 | } |
270 | 317 | ||
318 | flow_cache_queue_garbage(fcp, deleted, &gc_list); | ||
319 | |||
271 | if (atomic_dec_and_test(&info->cpuleft)) | 320 | if (atomic_dec_and_test(&info->cpuleft)) |
272 | complete(&info->completion); | 321 | complete(&info->completion); |
273 | } | 322 | } |
274 | 323 | ||
275 | static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__)); | ||
276 | static void flow_cache_flush_per_cpu(void *data) | 324 | static void flow_cache_flush_per_cpu(void *data) |
277 | { | 325 | { |
278 | struct flow_flush_info *info = data; | 326 | struct flow_flush_info *info = data; |
@@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data) | |||
280 | struct tasklet_struct *tasklet; | 328 | struct tasklet_struct *tasklet; |
281 | 329 | ||
282 | cpu = smp_processor_id(); | 330 | cpu = smp_processor_id(); |
283 | 331 | tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; | |
284 | tasklet = flow_flush_tasklet(cpu); | ||
285 | tasklet->data = (unsigned long)info; | 332 | tasklet->data = (unsigned long)info; |
286 | tasklet_schedule(tasklet); | 333 | tasklet_schedule(tasklet); |
287 | } | 334 | } |
@@ -294,6 +341,7 @@ void flow_cache_flush(void) | |||
294 | /* Don't want cpus going down or up during this. */ | 341 | /* Don't want cpus going down or up during this. */ |
295 | get_online_cpus(); | 342 | get_online_cpus(); |
296 | mutex_lock(&flow_flush_sem); | 343 | mutex_lock(&flow_flush_sem); |
344 | info.cache = &flow_cache_global; | ||
297 | atomic_set(&info.cpuleft, num_online_cpus()); | 345 | atomic_set(&info.cpuleft, num_online_cpus()); |
298 | init_completion(&info.completion); | 346 | init_completion(&info.completion); |
299 | 347 | ||
@@ -307,62 +355,75 @@ void flow_cache_flush(void) | |||
307 | put_online_cpus(); | 355 | put_online_cpus(); |
308 | } | 356 | } |
309 | 357 | ||
310 | static void __init flow_cache_cpu_prepare(int cpu) | 358 | static void __init flow_cache_cpu_prepare(struct flow_cache *fc, |
359 | struct flow_cache_percpu *fcp) | ||
311 | { | 360 | { |
312 | struct tasklet_struct *tasklet; | 361 | fcp->hash_table = (struct hlist_head *) |
313 | unsigned long order; | 362 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); |
314 | 363 | if (!fcp->hash_table) | |
315 | for (order = 0; | 364 | panic("NET: failed to allocate flow cache order %lu\n", fc->order); |
316 | (PAGE_SIZE << order) < | 365 | |
317 | (sizeof(struct flow_cache_entry *)*flow_hash_size); | 366 | fcp->hash_rnd_recalc = 1; |
318 | order++) | 367 | fcp->hash_count = 0; |
319 | /* NOTHING */; | 368 | tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); |
320 | |||
321 | flow_table(cpu) = (struct flow_cache_entry **) | ||
322 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); | ||
323 | if (!flow_table(cpu)) | ||
324 | panic("NET: failed to allocate flow cache order %lu\n", order); | ||
325 | |||
326 | flow_hash_rnd_recalc(cpu) = 1; | ||
327 | flow_count(cpu) = 0; | ||
328 | |||
329 | tasklet = flow_flush_tasklet(cpu); | ||
330 | tasklet_init(tasklet, flow_cache_flush_tasklet, 0); | ||
331 | } | 369 | } |
332 | 370 | ||
333 | static int flow_cache_cpu(struct notifier_block *nfb, | 371 | static int flow_cache_cpu(struct notifier_block *nfb, |
334 | unsigned long action, | 372 | unsigned long action, |
335 | void *hcpu) | 373 | void *hcpu) |
336 | { | 374 | { |
375 | struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); | ||
376 | int cpu = (unsigned long) hcpu; | ||
377 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); | ||
378 | |||
337 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | 379 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
338 | __flow_cache_shrink((unsigned long)hcpu, 0); | 380 | __flow_cache_shrink(fc, fcp, 0); |
339 | return NOTIFY_OK; | 381 | return NOTIFY_OK; |
340 | } | 382 | } |
341 | 383 | ||
342 | static int __init flow_cache_init(void) | 384 | static int flow_cache_init(struct flow_cache *fc) |
343 | { | 385 | { |
386 | unsigned long order; | ||
344 | int i; | 387 | int i; |
345 | 388 | ||
346 | flow_cachep = kmem_cache_create("flow_cache", | 389 | fc->hash_shift = 10; |
347 | sizeof(struct flow_cache_entry), | 390 | fc->low_watermark = 2 * flow_cache_hash_size(fc); |
348 | 0, SLAB_PANIC, | 391 | fc->high_watermark = 4 * flow_cache_hash_size(fc); |
349 | NULL); | 392 | |
350 | flow_hash_shift = 10; | 393 | for (order = 0; |
351 | flow_lwm = 2 * flow_hash_size; | 394 | (PAGE_SIZE << order) < |
352 | flow_hwm = 4 * flow_hash_size; | 395 | (sizeof(struct hlist_head)*flow_cache_hash_size(fc)); |
396 | order++) | ||
397 | /* NOTHING */; | ||
398 | fc->order = order; | ||
399 | fc->percpu = alloc_percpu(struct flow_cache_percpu); | ||
353 | 400 | ||
354 | setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); | 401 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, |
355 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 402 | (unsigned long) fc); |
356 | add_timer(&flow_hash_rnd_timer); | 403 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
404 | add_timer(&fc->rnd_timer); | ||
357 | 405 | ||
358 | for_each_possible_cpu(i) | 406 | for_each_possible_cpu(i) |
359 | flow_cache_cpu_prepare(i); | 407 | flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i)); |
408 | |||
409 | fc->hotcpu_notifier = (struct notifier_block){ | ||
410 | .notifier_call = flow_cache_cpu, | ||
411 | }; | ||
412 | register_hotcpu_notifier(&fc->hotcpu_notifier); | ||
360 | 413 | ||
361 | hotcpu_notifier(flow_cache_cpu, 0); | ||
362 | return 0; | 414 | return 0; |
363 | } | 415 | } |
364 | 416 | ||
365 | module_init(flow_cache_init); | 417 | static int __init flow_cache_init_global(void) |
418 | { | ||
419 | flow_cachep = kmem_cache_create("flow_cache", | ||
420 | sizeof(struct flow_cache_entry), | ||
421 | 0, SLAB_PANIC, NULL); | ||
422 | |||
423 | return flow_cache_init(&flow_cache_global); | ||
424 | } | ||
425 | |||
426 | module_init(flow_cache_init_global); | ||
366 | 427 | ||
367 | EXPORT_SYMBOL(flow_cache_genid); | 428 | EXPORT_SYMBOL(flow_cache_genid); |
368 | EXPORT_SYMBOL(flow_cache_lookup); | 429 | EXPORT_SYMBOL(flow_cache_lookup); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 59cfc7d8fc45..99e7052d7323 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -14,9 +14,12 @@ | |||
14 | #include <linux/netdevice.h> | 14 | #include <linux/netdevice.h> |
15 | #include <linux/if_arp.h> | 15 | #include <linux/if_arp.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/nsproxy.h> | ||
17 | #include <net/sock.h> | 18 | #include <net/sock.h> |
19 | #include <net/net_namespace.h> | ||
18 | #include <linux/rtnetlink.h> | 20 | #include <linux/rtnetlink.h> |
19 | #include <linux/wireless.h> | 21 | #include <linux/wireless.h> |
22 | #include <linux/vmalloc.h> | ||
20 | #include <net/wext.h> | 23 | #include <net/wext.h> |
21 | 24 | ||
22 | #include "net-sysfs.h" | 25 | #include "net-sysfs.h" |
@@ -466,18 +469,345 @@ static struct attribute_group wireless_group = { | |||
466 | .attrs = wireless_attrs, | 469 | .attrs = wireless_attrs, |
467 | }; | 470 | }; |
468 | #endif | 471 | #endif |
469 | |||
470 | #endif /* CONFIG_SYSFS */ | 472 | #endif /* CONFIG_SYSFS */ |
471 | 473 | ||
474 | #ifdef CONFIG_RPS | ||
475 | /* | ||
476 | * RX queue sysfs structures and functions. | ||
477 | */ | ||
478 | struct rx_queue_attribute { | ||
479 | struct attribute attr; | ||
480 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
481 | struct rx_queue_attribute *attr, char *buf); | ||
482 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
483 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
484 | }; | ||
485 | #define to_rx_queue_attr(_attr) container_of(_attr, \ | ||
486 | struct rx_queue_attribute, attr) | ||
487 | |||
488 | #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) | ||
489 | |||
490 | static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, | ||
491 | char *buf) | ||
492 | { | ||
493 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
494 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
495 | |||
496 | if (!attribute->show) | ||
497 | return -EIO; | ||
498 | |||
499 | return attribute->show(queue, attribute, buf); | ||
500 | } | ||
501 | |||
502 | static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, | ||
503 | const char *buf, size_t count) | ||
504 | { | ||
505 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
506 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
507 | |||
508 | if (!attribute->store) | ||
509 | return -EIO; | ||
510 | |||
511 | return attribute->store(queue, attribute, buf, count); | ||
512 | } | ||
513 | |||
514 | static struct sysfs_ops rx_queue_sysfs_ops = { | ||
515 | .show = rx_queue_attr_show, | ||
516 | .store = rx_queue_attr_store, | ||
517 | }; | ||
518 | |||
519 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, | ||
520 | struct rx_queue_attribute *attribute, char *buf) | ||
521 | { | ||
522 | struct rps_map *map; | ||
523 | cpumask_var_t mask; | ||
524 | size_t len = 0; | ||
525 | int i; | ||
526 | |||
527 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
528 | return -ENOMEM; | ||
529 | |||
530 | rcu_read_lock(); | ||
531 | map = rcu_dereference(queue->rps_map); | ||
532 | if (map) | ||
533 | for (i = 0; i < map->len; i++) | ||
534 | cpumask_set_cpu(map->cpus[i], mask); | ||
535 | |||
536 | len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); | ||
537 | if (PAGE_SIZE - len < 3) { | ||
538 | rcu_read_unlock(); | ||
539 | free_cpumask_var(mask); | ||
540 | return -EINVAL; | ||
541 | } | ||
542 | rcu_read_unlock(); | ||
543 | |||
544 | free_cpumask_var(mask); | ||
545 | len += sprintf(buf + len, "\n"); | ||
546 | return len; | ||
547 | } | ||
548 | |||
549 | static void rps_map_release(struct rcu_head *rcu) | ||
550 | { | ||
551 | struct rps_map *map = container_of(rcu, struct rps_map, rcu); | ||
552 | |||
553 | kfree(map); | ||
554 | } | ||
555 | |||
556 | static ssize_t store_rps_map(struct netdev_rx_queue *queue, | ||
557 | struct rx_queue_attribute *attribute, | ||
558 | const char *buf, size_t len) | ||
559 | { | ||
560 | struct rps_map *old_map, *map; | ||
561 | cpumask_var_t mask; | ||
562 | int err, cpu, i; | ||
563 | static DEFINE_SPINLOCK(rps_map_lock); | ||
564 | |||
565 | if (!capable(CAP_NET_ADMIN)) | ||
566 | return -EPERM; | ||
567 | |||
568 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
569 | return -ENOMEM; | ||
570 | |||
571 | err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); | ||
572 | if (err) { | ||
573 | free_cpumask_var(mask); | ||
574 | return err; | ||
575 | } | ||
576 | |||
577 | map = kzalloc(max_t(unsigned, | ||
578 | RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), | ||
579 | GFP_KERNEL); | ||
580 | if (!map) { | ||
581 | free_cpumask_var(mask); | ||
582 | return -ENOMEM; | ||
583 | } | ||
584 | |||
585 | i = 0; | ||
586 | for_each_cpu_and(cpu, mask, cpu_online_mask) | ||
587 | map->cpus[i++] = cpu; | ||
588 | |||
589 | if (i) | ||
590 | map->len = i; | ||
591 | else { | ||
592 | kfree(map); | ||
593 | map = NULL; | ||
594 | } | ||
595 | |||
596 | spin_lock(&rps_map_lock); | ||
597 | old_map = queue->rps_map; | ||
598 | rcu_assign_pointer(queue->rps_map, map); | ||
599 | spin_unlock(&rps_map_lock); | ||
600 | |||
601 | if (old_map) | ||
602 | call_rcu(&old_map->rcu, rps_map_release); | ||
603 | |||
604 | free_cpumask_var(mask); | ||
605 | return len; | ||
606 | } | ||
607 | |||
608 | static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||
609 | struct rx_queue_attribute *attr, | ||
610 | char *buf) | ||
611 | { | ||
612 | struct rps_dev_flow_table *flow_table; | ||
613 | unsigned int val = 0; | ||
614 | |||
615 | rcu_read_lock(); | ||
616 | flow_table = rcu_dereference(queue->rps_flow_table); | ||
617 | if (flow_table) | ||
618 | val = flow_table->mask + 1; | ||
619 | rcu_read_unlock(); | ||
620 | |||
621 | return sprintf(buf, "%u\n", val); | ||
622 | } | ||
623 | |||
624 | static void rps_dev_flow_table_release_work(struct work_struct *work) | ||
625 | { | ||
626 | struct rps_dev_flow_table *table = container_of(work, | ||
627 | struct rps_dev_flow_table, free_work); | ||
628 | |||
629 | vfree(table); | ||
630 | } | ||
631 | |||
632 | static void rps_dev_flow_table_release(struct rcu_head *rcu) | ||
633 | { | ||
634 | struct rps_dev_flow_table *table = container_of(rcu, | ||
635 | struct rps_dev_flow_table, rcu); | ||
636 | |||
637 | INIT_WORK(&table->free_work, rps_dev_flow_table_release_work); | ||
638 | schedule_work(&table->free_work); | ||
639 | } | ||
640 | |||
641 | static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||
642 | struct rx_queue_attribute *attr, | ||
643 | const char *buf, size_t len) | ||
644 | { | ||
645 | unsigned int count; | ||
646 | char *endp; | ||
647 | struct rps_dev_flow_table *table, *old_table; | ||
648 | static DEFINE_SPINLOCK(rps_dev_flow_lock); | ||
649 | |||
650 | if (!capable(CAP_NET_ADMIN)) | ||
651 | return -EPERM; | ||
652 | |||
653 | count = simple_strtoul(buf, &endp, 0); | ||
654 | if (endp == buf) | ||
655 | return -EINVAL; | ||
656 | |||
657 | if (count) { | ||
658 | int i; | ||
659 | |||
660 | if (count > 1<<30) { | ||
661 | /* Enforce a limit to prevent overflow */ | ||
662 | return -EINVAL; | ||
663 | } | ||
664 | count = roundup_pow_of_two(count); | ||
665 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); | ||
666 | if (!table) | ||
667 | return -ENOMEM; | ||
668 | |||
669 | table->mask = count - 1; | ||
670 | for (i = 0; i < count; i++) | ||
671 | table->flows[i].cpu = RPS_NO_CPU; | ||
672 | } else | ||
673 | table = NULL; | ||
674 | |||
675 | spin_lock(&rps_dev_flow_lock); | ||
676 | old_table = queue->rps_flow_table; | ||
677 | rcu_assign_pointer(queue->rps_flow_table, table); | ||
678 | spin_unlock(&rps_dev_flow_lock); | ||
679 | |||
680 | if (old_table) | ||
681 | call_rcu(&old_table->rcu, rps_dev_flow_table_release); | ||
682 | |||
683 | return len; | ||
684 | } | ||
685 | |||
686 | static struct rx_queue_attribute rps_cpus_attribute = | ||
687 | __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); | ||
688 | |||
689 | |||
690 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = | ||
691 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, | ||
692 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); | ||
693 | |||
694 | static struct attribute *rx_queue_default_attrs[] = { | ||
695 | &rps_cpus_attribute.attr, | ||
696 | &rps_dev_flow_table_cnt_attribute.attr, | ||
697 | NULL | ||
698 | }; | ||
699 | |||
700 | static void rx_queue_release(struct kobject *kobj) | ||
701 | { | ||
702 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
703 | struct netdev_rx_queue *first = queue->first; | ||
704 | |||
705 | if (queue->rps_map) | ||
706 | call_rcu(&queue->rps_map->rcu, rps_map_release); | ||
707 | |||
708 | if (queue->rps_flow_table) | ||
709 | call_rcu(&queue->rps_flow_table->rcu, | ||
710 | rps_dev_flow_table_release); | ||
711 | |||
712 | if (atomic_dec_and_test(&first->count)) | ||
713 | kfree(first); | ||
714 | } | ||
715 | |||
716 | static struct kobj_type rx_queue_ktype = { | ||
717 | .sysfs_ops = &rx_queue_sysfs_ops, | ||
718 | .release = rx_queue_release, | ||
719 | .default_attrs = rx_queue_default_attrs, | ||
720 | }; | ||
721 | |||
722 | static int rx_queue_add_kobject(struct net_device *net, int index) | ||
723 | { | ||
724 | struct netdev_rx_queue *queue = net->_rx + index; | ||
725 | struct kobject *kobj = &queue->kobj; | ||
726 | int error = 0; | ||
727 | |||
728 | kobj->kset = net->queues_kset; | ||
729 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, | ||
730 | "rx-%u", index); | ||
731 | if (error) { | ||
732 | kobject_put(kobj); | ||
733 | return error; | ||
734 | } | ||
735 | |||
736 | kobject_uevent(kobj, KOBJ_ADD); | ||
737 | |||
738 | return error; | ||
739 | } | ||
740 | |||
741 | static int rx_queue_register_kobjects(struct net_device *net) | ||
742 | { | ||
743 | int i; | ||
744 | int error = 0; | ||
745 | |||
746 | net->queues_kset = kset_create_and_add("queues", | ||
747 | NULL, &net->dev.kobj); | ||
748 | if (!net->queues_kset) | ||
749 | return -ENOMEM; | ||
750 | for (i = 0; i < net->num_rx_queues; i++) { | ||
751 | error = rx_queue_add_kobject(net, i); | ||
752 | if (error) | ||
753 | break; | ||
754 | } | ||
755 | |||
756 | if (error) | ||
757 | while (--i >= 0) | ||
758 | kobject_put(&net->_rx[i].kobj); | ||
759 | |||
760 | return error; | ||
761 | } | ||
762 | |||
763 | static void rx_queue_remove_kobjects(struct net_device *net) | ||
764 | { | ||
765 | int i; | ||
766 | |||
767 | for (i = 0; i < net->num_rx_queues; i++) | ||
768 | kobject_put(&net->_rx[i].kobj); | ||
769 | kset_unregister(net->queues_kset); | ||
770 | } | ||
771 | #endif /* CONFIG_RPS */ | ||
772 | |||
773 | static const void *net_current_ns(void) | ||
774 | { | ||
775 | return current->nsproxy->net_ns; | ||
776 | } | ||
777 | |||
778 | static const void *net_initial_ns(void) | ||
779 | { | ||
780 | return &init_net; | ||
781 | } | ||
782 | |||
783 | static const void *net_netlink_ns(struct sock *sk) | ||
784 | { | ||
785 | return sock_net(sk); | ||
786 | } | ||
787 | |||
788 | static struct kobj_ns_type_operations net_ns_type_operations = { | ||
789 | .type = KOBJ_NS_TYPE_NET, | ||
790 | .current_ns = net_current_ns, | ||
791 | .netlink_ns = net_netlink_ns, | ||
792 | .initial_ns = net_initial_ns, | ||
793 | }; | ||
794 | |||
795 | static void net_kobj_ns_exit(struct net *net) | ||
796 | { | ||
797 | kobj_ns_exit(KOBJ_NS_TYPE_NET, net); | ||
798 | } | ||
799 | |||
800 | static struct pernet_operations kobj_net_ops = { | ||
801 | .exit = net_kobj_ns_exit, | ||
802 | }; | ||
803 | |||
804 | |||
472 | #ifdef CONFIG_HOTPLUG | 805 | #ifdef CONFIG_HOTPLUG |
473 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) | 806 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) |
474 | { | 807 | { |
475 | struct net_device *dev = to_net_dev(d); | 808 | struct net_device *dev = to_net_dev(d); |
476 | int retval; | 809 | int retval; |
477 | 810 | ||
478 | if (!net_eq(dev_net(dev), &init_net)) | ||
479 | return 0; | ||
480 | |||
481 | /* pass interface to uevent. */ | 811 | /* pass interface to uevent. */ |
482 | retval = add_uevent_var(env, "INTERFACE=%s", dev->name); | 812 | retval = add_uevent_var(env, "INTERFACE=%s", dev->name); |
483 | if (retval) | 813 | if (retval) |
@@ -507,6 +837,13 @@ static void netdev_release(struct device *d) | |||
507 | kfree((char *)dev - dev->padded); | 837 | kfree((char *)dev - dev->padded); |
508 | } | 838 | } |
509 | 839 | ||
840 | static const void *net_namespace(struct device *d) | ||
841 | { | ||
842 | struct net_device *dev; | ||
843 | dev = container_of(d, struct net_device, dev); | ||
844 | return dev_net(dev); | ||
845 | } | ||
846 | |||
510 | static struct class net_class = { | 847 | static struct class net_class = { |
511 | .name = "net", | 848 | .name = "net", |
512 | .dev_release = netdev_release, | 849 | .dev_release = netdev_release, |
@@ -516,6 +853,8 @@ static struct class net_class = { | |||
516 | #ifdef CONFIG_HOTPLUG | 853 | #ifdef CONFIG_HOTPLUG |
517 | .dev_uevent = netdev_uevent, | 854 | .dev_uevent = netdev_uevent, |
518 | #endif | 855 | #endif |
856 | .ns_type = &net_ns_type_operations, | ||
857 | .namespace = net_namespace, | ||
519 | }; | 858 | }; |
520 | 859 | ||
521 | /* Delete sysfs entries but hold kobject reference until after all | 860 | /* Delete sysfs entries but hold kobject reference until after all |
@@ -527,8 +866,9 @@ void netdev_unregister_kobject(struct net_device * net) | |||
527 | 866 | ||
528 | kobject_get(&dev->kobj); | 867 | kobject_get(&dev->kobj); |
529 | 868 | ||
530 | if (!net_eq(dev_net(net), &init_net)) | 869 | #ifdef CONFIG_RPS |
531 | return; | 870 | rx_queue_remove_kobjects(net); |
871 | #endif | ||
532 | 872 | ||
533 | device_del(dev); | 873 | device_del(dev); |
534 | } | 874 | } |
@@ -538,7 +878,9 @@ int netdev_register_kobject(struct net_device *net) | |||
538 | { | 878 | { |
539 | struct device *dev = &(net->dev); | 879 | struct device *dev = &(net->dev); |
540 | const struct attribute_group **groups = net->sysfs_groups; | 880 | const struct attribute_group **groups = net->sysfs_groups; |
881 | int error = 0; | ||
541 | 882 | ||
883 | device_initialize(dev); | ||
542 | dev->class = &net_class; | 884 | dev->class = &net_class; |
543 | dev->platform_data = net; | 885 | dev->platform_data = net; |
544 | dev->groups = groups; | 886 | dev->groups = groups; |
@@ -561,10 +903,19 @@ int netdev_register_kobject(struct net_device *net) | |||
561 | #endif | 903 | #endif |
562 | #endif /* CONFIG_SYSFS */ | 904 | #endif /* CONFIG_SYSFS */ |
563 | 905 | ||
564 | if (!net_eq(dev_net(net), &init_net)) | 906 | error = device_add(dev); |
565 | return 0; | 907 | if (error) |
908 | return error; | ||
909 | |||
910 | #ifdef CONFIG_RPS | ||
911 | error = rx_queue_register_kobjects(net); | ||
912 | if (error) { | ||
913 | device_del(dev); | ||
914 | return error; | ||
915 | } | ||
916 | #endif | ||
566 | 917 | ||
567 | return device_add(dev); | 918 | return error; |
568 | } | 919 | } |
569 | 920 | ||
570 | int netdev_class_create_file(struct class_attribute *class_attr) | 921 | int netdev_class_create_file(struct class_attribute *class_attr) |
@@ -580,13 +931,9 @@ void netdev_class_remove_file(struct class_attribute *class_attr) | |||
580 | EXPORT_SYMBOL(netdev_class_create_file); | 931 | EXPORT_SYMBOL(netdev_class_create_file); |
581 | EXPORT_SYMBOL(netdev_class_remove_file); | 932 | EXPORT_SYMBOL(netdev_class_remove_file); |
582 | 933 | ||
583 | void netdev_initialize_kobject(struct net_device *net) | ||
584 | { | ||
585 | struct device *device = &(net->dev); | ||
586 | device_initialize(device); | ||
587 | } | ||
588 | |||
589 | int netdev_kobject_init(void) | 934 | int netdev_kobject_init(void) |
590 | { | 935 | { |
936 | kobj_ns_type_register(&net_ns_type_operations); | ||
937 | register_pernet_subsys(&kobj_net_ops); | ||
591 | return class_register(&net_class); | 938 | return class_register(&net_class); |
592 | } | 939 | } |
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 14e7524260b3..805555e8b187 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h | |||
@@ -4,5 +4,4 @@ | |||
4 | int netdev_kobject_init(void); | 4 | int netdev_kobject_init(void); |
5 | int netdev_register_kobject(struct net_device *); | 5 | int netdev_register_kobject(struct net_device *); |
6 | void netdev_unregister_kobject(struct net_device *); | 6 | void netdev_unregister_kobject(struct net_device *); |
7 | void netdev_initialize_kobject(struct net_device *); | ||
8 | #endif | 7 | #endif |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bd8c4712ea24..c988e685433a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net); | |||
27 | 27 | ||
28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | 28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
29 | 29 | ||
30 | static void net_generic_release(struct rcu_head *rcu) | ||
31 | { | ||
32 | struct net_generic *ng; | ||
33 | |||
34 | ng = container_of(rcu, struct net_generic, rcu); | ||
35 | kfree(ng); | ||
36 | } | ||
37 | |||
38 | static int net_assign_generic(struct net *net, int id, void *data) | ||
39 | { | ||
40 | struct net_generic *ng, *old_ng; | ||
41 | |||
42 | BUG_ON(!mutex_is_locked(&net_mutex)); | ||
43 | BUG_ON(id == 0); | ||
44 | |||
45 | ng = old_ng = net->gen; | ||
46 | if (old_ng->len >= id) | ||
47 | goto assign; | ||
48 | |||
49 | ng = kzalloc(sizeof(struct net_generic) + | ||
50 | id * sizeof(void *), GFP_KERNEL); | ||
51 | if (ng == NULL) | ||
52 | return -ENOMEM; | ||
53 | |||
54 | /* | ||
55 | * Some synchronisation notes: | ||
56 | * | ||
57 | * The net_generic explores the net->gen array inside rcu | ||
58 | * read section. Besides once set the net->gen->ptr[x] | ||
59 | * pointer never changes (see rules in netns/generic.h). | ||
60 | * | ||
61 | * That said, we simply duplicate this array and schedule | ||
62 | * the old copy for kfree after a grace period. | ||
63 | */ | ||
64 | |||
65 | ng->len = id; | ||
66 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | ||
67 | |||
68 | rcu_assign_pointer(net->gen, ng); | ||
69 | call_rcu(&old_ng->rcu, net_generic_release); | ||
70 | assign: | ||
71 | ng->ptr[id - 1] = data; | ||
72 | return 0; | ||
73 | } | ||
74 | |||
30 | static int ops_init(const struct pernet_operations *ops, struct net *net) | 75 | static int ops_init(const struct pernet_operations *ops, struct net *net) |
31 | { | 76 | { |
32 | int err; | 77 | int err; |
@@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); | |||
469 | * addition run the exit method for all existing network | 514 | * addition run the exit method for all existing network |
470 | * namespaces. | 515 | * namespaces. |
471 | */ | 516 | */ |
472 | void unregister_pernet_subsys(struct pernet_operations *module) | 517 | void unregister_pernet_subsys(struct pernet_operations *ops) |
473 | { | 518 | { |
474 | mutex_lock(&net_mutex); | 519 | mutex_lock(&net_mutex); |
475 | unregister_pernet_operations(module); | 520 | unregister_pernet_operations(ops); |
476 | mutex_unlock(&net_mutex); | 521 | mutex_unlock(&net_mutex); |
477 | } | 522 | } |
478 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); | 523 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); |
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops) | |||
526 | mutex_unlock(&net_mutex); | 571 | mutex_unlock(&net_mutex); |
527 | } | 572 | } |
528 | EXPORT_SYMBOL_GPL(unregister_pernet_device); | 573 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |
529 | |||
530 | static void net_generic_release(struct rcu_head *rcu) | ||
531 | { | ||
532 | struct net_generic *ng; | ||
533 | |||
534 | ng = container_of(rcu, struct net_generic, rcu); | ||
535 | kfree(ng); | ||
536 | } | ||
537 | |||
538 | int net_assign_generic(struct net *net, int id, void *data) | ||
539 | { | ||
540 | struct net_generic *ng, *old_ng; | ||
541 | |||
542 | BUG_ON(!mutex_is_locked(&net_mutex)); | ||
543 | BUG_ON(id == 0); | ||
544 | |||
545 | ng = old_ng = net->gen; | ||
546 | if (old_ng->len >= id) | ||
547 | goto assign; | ||
548 | |||
549 | ng = kzalloc(sizeof(struct net_generic) + | ||
550 | id * sizeof(void *), GFP_KERNEL); | ||
551 | if (ng == NULL) | ||
552 | return -ENOMEM; | ||
553 | |||
554 | /* | ||
555 | * Some synchronisation notes: | ||
556 | * | ||
557 | * The net_generic explores the net->gen array inside rcu | ||
558 | * read section. Besides once set the net->gen->ptr[x] | ||
559 | * pointer never changes (see rules in netns/generic.h). | ||
560 | * | ||
561 | * That said, we simply duplicate this array and schedule | ||
562 | * the old copy for kfree after a grace period. | ||
563 | */ | ||
564 | |||
565 | ng->len = id; | ||
566 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | ||
567 | |||
568 | rcu_assign_pointer(net->gen, ng); | ||
569 | call_rcu(&old_ng->rcu, net_generic_release); | ||
570 | assign: | ||
571 | ng->ptr[id - 1] = data; | ||
572 | return 0; | ||
573 | } | ||
574 | EXPORT_SYMBOL_GPL(net_assign_generic); | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a58f59b97597..94825b109551 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi) | |||
179 | } | 179 | } |
180 | } | 180 | } |
181 | 181 | ||
182 | void netpoll_poll(struct netpoll *np) | 182 | void netpoll_poll_dev(struct net_device *dev) |
183 | { | 183 | { |
184 | struct net_device *dev = np->dev; | ||
185 | const struct net_device_ops *ops; | 184 | const struct net_device_ops *ops; |
186 | 185 | ||
187 | if (!dev || !netif_running(dev)) | 186 | if (!dev || !netif_running(dev)) |
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np) | |||
201 | zap_completion_queue(); | 200 | zap_completion_queue(); |
202 | } | 201 | } |
203 | 202 | ||
203 | void netpoll_poll(struct netpoll *np) | ||
204 | { | ||
205 | netpoll_poll_dev(np->dev); | ||
206 | } | ||
207 | |||
204 | static void refill_skbs(void) | 208 | static void refill_skbs(void) |
205 | { | 209 | { |
206 | struct sk_buff *skb; | 210 | struct sk_buff *skb; |
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev) | |||
282 | return 0; | 286 | return 0; |
283 | } | 287 | } |
284 | 288 | ||
285 | static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | 289 | void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) |
286 | { | 290 | { |
287 | int status = NETDEV_TX_BUSY; | 291 | int status = NETDEV_TX_BUSY; |
288 | unsigned long tries; | 292 | unsigned long tries; |
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | |||
308 | tries > 0; --tries) { | 312 | tries > 0; --tries) { |
309 | if (__netif_tx_trylock(txq)) { | 313 | if (__netif_tx_trylock(txq)) { |
310 | if (!netif_tx_queue_stopped(txq)) { | 314 | if (!netif_tx_queue_stopped(txq)) { |
315 | dev->priv_flags |= IFF_IN_NETPOLL; | ||
311 | status = ops->ndo_start_xmit(skb, dev); | 316 | status = ops->ndo_start_xmit(skb, dev); |
317 | dev->priv_flags &= ~IFF_IN_NETPOLL; | ||
312 | if (status == NETDEV_TX_OK) | 318 | if (status == NETDEV_TX_OK) |
313 | txq_trans_update(txq); | 319 | txq_trans_update(txq); |
314 | } | 320 | } |
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np) | |||
756 | atomic_inc(&npinfo->refcnt); | 762 | atomic_inc(&npinfo->refcnt); |
757 | } | 763 | } |
758 | 764 | ||
759 | if (!ndev->netdev_ops->ndo_poll_controller) { | 765 | npinfo->netpoll = np; |
766 | |||
767 | if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || | ||
768 | !ndev->netdev_ops->ndo_poll_controller) { | ||
760 | printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", | 769 | printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", |
761 | np->name, np->dev_name); | 770 | np->name, np->dev_name); |
762 | err = -ENOTSUPP; | 771 | err = -ENOTSUPP; |
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np) | |||
878 | } | 887 | } |
879 | 888 | ||
880 | if (atomic_dec_and_test(&npinfo->refcnt)) { | 889 | if (atomic_dec_and_test(&npinfo->refcnt)) { |
890 | const struct net_device_ops *ops; | ||
881 | skb_queue_purge(&npinfo->arp_tx); | 891 | skb_queue_purge(&npinfo->arp_tx); |
882 | skb_queue_purge(&npinfo->txq); | 892 | skb_queue_purge(&npinfo->txq); |
883 | cancel_rearming_delayed_work(&npinfo->tx_work); | 893 | cancel_rearming_delayed_work(&npinfo->tx_work); |
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np) | |||
885 | /* clean after last, unfinished work */ | 895 | /* clean after last, unfinished work */ |
886 | __skb_queue_purge(&npinfo->txq); | 896 | __skb_queue_purge(&npinfo->txq); |
887 | kfree(npinfo); | 897 | kfree(npinfo); |
888 | np->dev->npinfo = NULL; | 898 | ops = np->dev->netdev_ops; |
899 | if (ops->ndo_netpoll_cleanup) | ||
900 | ops->ndo_netpoll_cleanup(np->dev); | ||
901 | else | ||
902 | np->dev->npinfo = NULL; | ||
889 | } | 903 | } |
890 | } | 904 | } |
891 | 905 | ||
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap) | |||
908 | atomic_dec(&trapped); | 922 | atomic_dec(&trapped); |
909 | } | 923 | } |
910 | 924 | ||
925 | EXPORT_SYMBOL(netpoll_send_skb); | ||
911 | EXPORT_SYMBOL(netpoll_set_trap); | 926 | EXPORT_SYMBOL(netpoll_set_trap); |
912 | EXPORT_SYMBOL(netpoll_trap); | 927 | EXPORT_SYMBOL(netpoll_trap); |
913 | EXPORT_SYMBOL(netpoll_print_options); | 928 | EXPORT_SYMBOL(netpoll_print_options); |
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options); | |||
915 | EXPORT_SYMBOL(netpoll_setup); | 930 | EXPORT_SYMBOL(netpoll_setup); |
916 | EXPORT_SYMBOL(netpoll_cleanup); | 931 | EXPORT_SYMBOL(netpoll_cleanup); |
917 | EXPORT_SYMBOL(netpoll_send_udp); | 932 | EXPORT_SYMBOL(netpoll_send_udp); |
933 | EXPORT_SYMBOL(netpoll_poll_dev); | ||
918 | EXPORT_SYMBOL(netpoll_poll); | 934 | EXPORT_SYMBOL(netpoll_poll); |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 43923811bd6a..2ad68da418df 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -169,7 +169,7 @@ | |||
169 | #include <asm/dma.h> | 169 | #include <asm/dma.h> |
170 | #include <asm/div64.h> /* do_div */ | 170 | #include <asm/div64.h> /* do_div */ |
171 | 171 | ||
172 | #define VERSION "2.72" | 172 | #define VERSION "2.73" |
173 | #define IP_NAME_SZ 32 | 173 | #define IP_NAME_SZ 32 |
174 | #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ | 174 | #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ |
175 | #define MPLS_STACK_BOTTOM htonl(0x00000100) | 175 | #define MPLS_STACK_BOTTOM htonl(0x00000100) |
@@ -190,6 +190,7 @@ | |||
190 | #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ | 190 | #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ |
191 | #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ | 191 | #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ |
192 | #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ | 192 | #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ |
193 | #define F_NODE (1<<15) /* Node memory alloc*/ | ||
193 | 194 | ||
194 | /* Thread control flag bits */ | 195 | /* Thread control flag bits */ |
195 | #define T_STOP (1<<0) /* Stop run */ | 196 | #define T_STOP (1<<0) /* Stop run */ |
@@ -372,6 +373,7 @@ struct pktgen_dev { | |||
372 | 373 | ||
373 | u16 queue_map_min; | 374 | u16 queue_map_min; |
374 | u16 queue_map_max; | 375 | u16 queue_map_max; |
376 | int node; /* Memory node */ | ||
375 | 377 | ||
376 | #ifdef CONFIG_XFRM | 378 | #ifdef CONFIG_XFRM |
377 | __u8 ipsmode; /* IPSEC mode (config) */ | 379 | __u8 ipsmode; /* IPSEC mode (config) */ |
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
607 | if (pkt_dev->traffic_class) | 609 | if (pkt_dev->traffic_class) |
608 | seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); | 610 | seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); |
609 | 611 | ||
612 | if (pkt_dev->node >= 0) | ||
613 | seq_printf(seq, " node: %d\n", pkt_dev->node); | ||
614 | |||
610 | seq_printf(seq, " Flags: "); | 615 | seq_printf(seq, " Flags: "); |
611 | 616 | ||
612 | if (pkt_dev->flags & F_IPV6) | 617 | if (pkt_dev->flags & F_IPV6) |
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
660 | if (pkt_dev->flags & F_SVID_RND) | 665 | if (pkt_dev->flags & F_SVID_RND) |
661 | seq_printf(seq, "SVID_RND "); | 666 | seq_printf(seq, "SVID_RND "); |
662 | 667 | ||
668 | if (pkt_dev->flags & F_NODE) | ||
669 | seq_printf(seq, "NODE_ALLOC "); | ||
670 | |||
663 | seq_puts(seq, "\n"); | 671 | seq_puts(seq, "\n"); |
664 | 672 | ||
665 | /* not really stopped, more like last-running-at */ | 673 | /* not really stopped, more like last-running-at */ |
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1074 | pkt_dev->dst_mac_count); | 1082 | pkt_dev->dst_mac_count); |
1075 | return count; | 1083 | return count; |
1076 | } | 1084 | } |
1085 | if (!strcmp(name, "node")) { | ||
1086 | len = num_arg(&user_buffer[i], 10, &value); | ||
1087 | if (len < 0) | ||
1088 | return len; | ||
1089 | |||
1090 | i += len; | ||
1091 | |||
1092 | if (node_possible(value)) { | ||
1093 | pkt_dev->node = value; | ||
1094 | sprintf(pg_result, "OK: node=%d", pkt_dev->node); | ||
1095 | } | ||
1096 | else | ||
1097 | sprintf(pg_result, "ERROR: node not possible"); | ||
1098 | return count; | ||
1099 | } | ||
1077 | if (!strcmp(name, "flag")) { | 1100 | if (!strcmp(name, "flag")) { |
1078 | char f[32]; | 1101 | char f[32]; |
1079 | memset(f, 0, 32); | 1102 | memset(f, 0, 32); |
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file, | |||
1166 | else if (strcmp(f, "!IPV6") == 0) | 1189 | else if (strcmp(f, "!IPV6") == 0) |
1167 | pkt_dev->flags &= ~F_IPV6; | 1190 | pkt_dev->flags &= ~F_IPV6; |
1168 | 1191 | ||
1192 | else if (strcmp(f, "NODE_ALLOC") == 0) | ||
1193 | pkt_dev->flags |= F_NODE; | ||
1194 | |||
1195 | else if (strcmp(f, "!NODE_ALLOC") == 0) | ||
1196 | pkt_dev->flags &= ~F_NODE; | ||
1197 | |||
1169 | else { | 1198 | else { |
1170 | sprintf(pg_result, | 1199 | sprintf(pg_result, |
1171 | "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", | 1200 | "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", |
1172 | f, | 1201 | f, |
1173 | "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " | 1202 | "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " |
1174 | "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); | 1203 | "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); |
1175 | return count; | 1204 | return count; |
1176 | } | 1205 | } |
1177 | sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); | 1206 | sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); |
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
2572 | mod_cur_headers(pkt_dev); | 2601 | mod_cur_headers(pkt_dev); |
2573 | 2602 | ||
2574 | datalen = (odev->hard_header_len + 16) & ~0xf; | 2603 | datalen = (odev->hard_header_len + 16) & ~0xf; |
2575 | skb = __netdev_alloc_skb(odev, | 2604 | |
2576 | pkt_dev->cur_pkt_size + 64 | 2605 | if (pkt_dev->flags & F_NODE) { |
2577 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); | 2606 | int node; |
2607 | |||
2608 | if (pkt_dev->node >= 0) | ||
2609 | node = pkt_dev->node; | ||
2610 | else | ||
2611 | node = numa_node_id(); | ||
2612 | |||
2613 | skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 | ||
2614 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); | ||
2615 | if (likely(skb)) { | ||
2616 | skb_reserve(skb, NET_SKB_PAD); | ||
2617 | skb->dev = odev; | ||
2618 | } | ||
2619 | } | ||
2620 | else | ||
2621 | skb = __netdev_alloc_skb(odev, | ||
2622 | pkt_dev->cur_pkt_size + 64 | ||
2623 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); | ||
2624 | |||
2578 | if (!skb) { | 2625 | if (!skb) { |
2579 | sprintf(pkt_dev->result, "No memory"); | 2626 | sprintf(pkt_dev->result, "No memory"); |
2580 | return NULL; | 2627 | return NULL; |
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) | |||
3674 | pkt_dev->svlan_p = 0; | 3721 | pkt_dev->svlan_p = 0; |
3675 | pkt_dev->svlan_cfi = 0; | 3722 | pkt_dev->svlan_cfi = 0; |
3676 | pkt_dev->svlan_id = 0xffff; | 3723 | pkt_dev->svlan_id = 0xffff; |
3724 | pkt_dev->node = -1; | ||
3677 | 3725 | ||
3678 | err = pktgen_setup_dev(pkt_dev, ifname); | 3726 | err = pktgen_setup_dev(pkt_dev, ifname); |
3679 | if (err) | 3727 | if (err) |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fe776c9ddeca..e4b9870e4706 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void) | |||
98 | EXPORT_SYMBOL(lockdep_rtnl_is_held); | 98 | EXPORT_SYMBOL(lockdep_rtnl_is_held); |
99 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 99 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
100 | 100 | ||
101 | static struct rtnl_link *rtnl_msg_handlers[NPROTO]; | 101 | static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; |
102 | 102 | ||
103 | static inline int rtm_msgindex(int msgtype) | 103 | static inline int rtm_msgindex(int msgtype) |
104 | { | 104 | { |
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) | |||
118 | { | 118 | { |
119 | struct rtnl_link *tab; | 119 | struct rtnl_link *tab; |
120 | 120 | ||
121 | tab = rtnl_msg_handlers[protocol]; | 121 | if (protocol <= RTNL_FAMILY_MAX) |
122 | tab = rtnl_msg_handlers[protocol]; | ||
123 | else | ||
124 | tab = NULL; | ||
125 | |||
122 | if (tab == NULL || tab[msgindex].doit == NULL) | 126 | if (tab == NULL || tab[msgindex].doit == NULL) |
123 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 127 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
124 | 128 | ||
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) | |||
129 | { | 133 | { |
130 | struct rtnl_link *tab; | 134 | struct rtnl_link *tab; |
131 | 135 | ||
132 | tab = rtnl_msg_handlers[protocol]; | 136 | if (protocol <= RTNL_FAMILY_MAX) |
137 | tab = rtnl_msg_handlers[protocol]; | ||
138 | else | ||
139 | tab = NULL; | ||
140 | |||
133 | if (tab == NULL || tab[msgindex].dumpit == NULL) | 141 | if (tab == NULL || tab[msgindex].dumpit == NULL) |
134 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 142 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
135 | 143 | ||
@@ -159,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype, | |||
159 | struct rtnl_link *tab; | 167 | struct rtnl_link *tab; |
160 | int msgindex; | 168 | int msgindex; |
161 | 169 | ||
162 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 170 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
163 | msgindex = rtm_msgindex(msgtype); | 171 | msgindex = rtm_msgindex(msgtype); |
164 | 172 | ||
165 | tab = rtnl_msg_handlers[protocol]; | 173 | tab = rtnl_msg_handlers[protocol]; |
@@ -211,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype) | |||
211 | { | 219 | { |
212 | int msgindex; | 220 | int msgindex; |
213 | 221 | ||
214 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 222 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
215 | msgindex = rtm_msgindex(msgtype); | 223 | msgindex = rtm_msgindex(msgtype); |
216 | 224 | ||
217 | if (rtnl_msg_handlers[protocol] == NULL) | 225 | if (rtnl_msg_handlers[protocol] == NULL) |
@@ -233,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); | |||
233 | */ | 241 | */ |
234 | void rtnl_unregister_all(int protocol) | 242 | void rtnl_unregister_all(int protocol) |
235 | { | 243 | { |
236 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 244 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
237 | 245 | ||
238 | kfree(rtnl_msg_handlers[protocol]); | 246 | kfree(rtnl_msg_handlers[protocol]); |
239 | rtnl_msg_handlers[protocol] = NULL; | 247 | rtnl_msg_handlers[protocol] = NULL; |
@@ -600,17 +608,83 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, | |||
600 | 608 | ||
601 | a->rx_compressed = b->rx_compressed; | 609 | a->rx_compressed = b->rx_compressed; |
602 | a->tx_compressed = b->tx_compressed; | 610 | a->tx_compressed = b->tx_compressed; |
603 | }; | 611 | } |
604 | 612 | ||
613 | static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) | ||
614 | { | ||
615 | struct rtnl_link_stats64 a; | ||
616 | |||
617 | a.rx_packets = b->rx_packets; | ||
618 | a.tx_packets = b->tx_packets; | ||
619 | a.rx_bytes = b->rx_bytes; | ||
620 | a.tx_bytes = b->tx_bytes; | ||
621 | a.rx_errors = b->rx_errors; | ||
622 | a.tx_errors = b->tx_errors; | ||
623 | a.rx_dropped = b->rx_dropped; | ||
624 | a.tx_dropped = b->tx_dropped; | ||
625 | |||
626 | a.multicast = b->multicast; | ||
627 | a.collisions = b->collisions; | ||
628 | |||
629 | a.rx_length_errors = b->rx_length_errors; | ||
630 | a.rx_over_errors = b->rx_over_errors; | ||
631 | a.rx_crc_errors = b->rx_crc_errors; | ||
632 | a.rx_frame_errors = b->rx_frame_errors; | ||
633 | a.rx_fifo_errors = b->rx_fifo_errors; | ||
634 | a.rx_missed_errors = b->rx_missed_errors; | ||
635 | |||
636 | a.tx_aborted_errors = b->tx_aborted_errors; | ||
637 | a.tx_carrier_errors = b->tx_carrier_errors; | ||
638 | a.tx_fifo_errors = b->tx_fifo_errors; | ||
639 | a.tx_heartbeat_errors = b->tx_heartbeat_errors; | ||
640 | a.tx_window_errors = b->tx_window_errors; | ||
641 | |||
642 | a.rx_compressed = b->rx_compressed; | ||
643 | a.tx_compressed = b->tx_compressed; | ||
644 | memcpy(v, &a, sizeof(a)); | ||
645 | } | ||
646 | |||
647 | /* All VF info */ | ||
605 | static inline int rtnl_vfinfo_size(const struct net_device *dev) | 648 | static inline int rtnl_vfinfo_size(const struct net_device *dev) |
606 | { | 649 | { |
607 | if (dev->dev.parent && dev_is_pci(dev->dev.parent)) | 650 | if (dev->dev.parent && dev_is_pci(dev->dev.parent)) { |
608 | return dev_num_vf(dev->dev.parent) * | 651 | |
609 | sizeof(struct ifla_vf_info); | 652 | int num_vfs = dev_num_vf(dev->dev.parent); |
610 | else | 653 | size_t size = nlmsg_total_size(sizeof(struct nlattr)); |
654 | size += nlmsg_total_size(num_vfs * sizeof(struct nlattr)); | ||
655 | size += num_vfs * (sizeof(struct ifla_vf_mac) + | ||
656 | sizeof(struct ifla_vf_vlan) + | ||
657 | sizeof(struct ifla_vf_tx_rate)); | ||
658 | return size; | ||
659 | } else | ||
611 | return 0; | 660 | return 0; |
612 | } | 661 | } |
613 | 662 | ||
663 | static size_t rtnl_port_size(const struct net_device *dev) | ||
664 | { | ||
665 | size_t port_size = nla_total_size(4) /* PORT_VF */ | ||
666 | + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ | ||
667 | + nla_total_size(sizeof(struct ifla_port_vsi)) | ||
668 | /* PORT_VSI_TYPE */ | ||
669 | + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */ | ||
670 | + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */ | ||
671 | + nla_total_size(1) /* PROT_VDP_REQUEST */ | ||
672 | + nla_total_size(2); /* PORT_VDP_RESPONSE */ | ||
673 | size_t vf_ports_size = nla_total_size(sizeof(struct nlattr)); | ||
674 | size_t vf_port_size = nla_total_size(sizeof(struct nlattr)) | ||
675 | + port_size; | ||
676 | size_t port_self_size = nla_total_size(sizeof(struct nlattr)) | ||
677 | + port_size; | ||
678 | |||
679 | if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) | ||
680 | return 0; | ||
681 | if (dev_num_vf(dev->dev.parent)) | ||
682 | return port_self_size + vf_ports_size + | ||
683 | vf_port_size * dev_num_vf(dev->dev.parent); | ||
684 | else | ||
685 | return port_self_size; | ||
686 | } | ||
687 | |||
614 | static inline size_t if_nlmsg_size(const struct net_device *dev) | 688 | static inline size_t if_nlmsg_size(const struct net_device *dev) |
615 | { | 689 | { |
616 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | 690 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) |
@@ -619,6 +693,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) | |||
619 | + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ | 693 | + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ |
620 | + nla_total_size(sizeof(struct rtnl_link_ifmap)) | 694 | + nla_total_size(sizeof(struct rtnl_link_ifmap)) |
621 | + nla_total_size(sizeof(struct rtnl_link_stats)) | 695 | + nla_total_size(sizeof(struct rtnl_link_stats)) |
696 | + nla_total_size(sizeof(struct rtnl_link_stats64)) | ||
622 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ | 697 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ |
623 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ | 698 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ |
624 | + nla_total_size(4) /* IFLA_TXQLEN */ | 699 | + nla_total_size(4) /* IFLA_TXQLEN */ |
@@ -629,10 +704,83 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) | |||
629 | + nla_total_size(1) /* IFLA_OPERSTATE */ | 704 | + nla_total_size(1) /* IFLA_OPERSTATE */ |
630 | + nla_total_size(1) /* IFLA_LINKMODE */ | 705 | + nla_total_size(1) /* IFLA_LINKMODE */ |
631 | + nla_total_size(4) /* IFLA_NUM_VF */ | 706 | + nla_total_size(4) /* IFLA_NUM_VF */ |
632 | + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ | 707 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ |
708 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ | ||
633 | + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ | 709 | + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ |
634 | } | 710 | } |
635 | 711 | ||
712 | static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) | ||
713 | { | ||
714 | struct nlattr *vf_ports; | ||
715 | struct nlattr *vf_port; | ||
716 | int vf; | ||
717 | int err; | ||
718 | |||
719 | vf_ports = nla_nest_start(skb, IFLA_VF_PORTS); | ||
720 | if (!vf_ports) | ||
721 | return -EMSGSIZE; | ||
722 | |||
723 | for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { | ||
724 | vf_port = nla_nest_start(skb, IFLA_VF_PORT); | ||
725 | if (!vf_port) { | ||
726 | nla_nest_cancel(skb, vf_ports); | ||
727 | return -EMSGSIZE; | ||
728 | } | ||
729 | NLA_PUT_U32(skb, IFLA_PORT_VF, vf); | ||
730 | err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); | ||
731 | if (err) { | ||
732 | nla_put_failure: | ||
733 | nla_nest_cancel(skb, vf_port); | ||
734 | continue; | ||
735 | } | ||
736 | nla_nest_end(skb, vf_port); | ||
737 | } | ||
738 | |||
739 | nla_nest_end(skb, vf_ports); | ||
740 | |||
741 | return 0; | ||
742 | } | ||
743 | |||
744 | static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) | ||
745 | { | ||
746 | struct nlattr *port_self; | ||
747 | int err; | ||
748 | |||
749 | port_self = nla_nest_start(skb, IFLA_PORT_SELF); | ||
750 | if (!port_self) | ||
751 | return -EMSGSIZE; | ||
752 | |||
753 | err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); | ||
754 | if (err) { | ||
755 | nla_nest_cancel(skb, port_self); | ||
756 | return err; | ||
757 | } | ||
758 | |||
759 | nla_nest_end(skb, port_self); | ||
760 | |||
761 | return 0; | ||
762 | } | ||
763 | |||
764 | static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) | ||
765 | { | ||
766 | int err; | ||
767 | |||
768 | if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) | ||
769 | return 0; | ||
770 | |||
771 | err = rtnl_port_self_fill(skb, dev); | ||
772 | if (err) | ||
773 | return err; | ||
774 | |||
775 | if (dev_num_vf(dev->dev.parent)) { | ||
776 | err = rtnl_vf_ports_fill(skb, dev); | ||
777 | if (err) | ||
778 | return err; | ||
779 | } | ||
780 | |||
781 | return 0; | ||
782 | } | ||
783 | |||
636 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | 784 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, |
637 | int type, u32 pid, u32 seq, u32 change, | 785 | int type, u32 pid, u32 seq, u32 change, |
638 | unsigned int flags) | 786 | unsigned int flags) |
@@ -698,17 +846,52 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
698 | stats = dev_get_stats(dev); | 846 | stats = dev_get_stats(dev); |
699 | copy_rtnl_link_stats(nla_data(attr), stats); | 847 | copy_rtnl_link_stats(nla_data(attr), stats); |
700 | 848 | ||
849 | attr = nla_reserve(skb, IFLA_STATS64, | ||
850 | sizeof(struct rtnl_link_stats64)); | ||
851 | if (attr == NULL) | ||
852 | goto nla_put_failure; | ||
853 | copy_rtnl_link_stats64(nla_data(attr), stats); | ||
854 | |||
855 | if (dev->dev.parent) | ||
856 | NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); | ||
857 | |||
701 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { | 858 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { |
702 | int i; | 859 | int i; |
703 | struct ifla_vf_info ivi; | ||
704 | 860 | ||
705 | NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); | 861 | struct nlattr *vfinfo, *vf; |
706 | for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { | 862 | int num_vfs = dev_num_vf(dev->dev.parent); |
863 | |||
864 | vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); | ||
865 | if (!vfinfo) | ||
866 | goto nla_put_failure; | ||
867 | for (i = 0; i < num_vfs; i++) { | ||
868 | struct ifla_vf_info ivi; | ||
869 | struct ifla_vf_mac vf_mac; | ||
870 | struct ifla_vf_vlan vf_vlan; | ||
871 | struct ifla_vf_tx_rate vf_tx_rate; | ||
707 | if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) | 872 | if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) |
708 | break; | 873 | break; |
709 | NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); | 874 | vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; |
875 | memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); | ||
876 | vf_vlan.vlan = ivi.vlan; | ||
877 | vf_vlan.qos = ivi.qos; | ||
878 | vf_tx_rate.rate = ivi.tx_rate; | ||
879 | vf = nla_nest_start(skb, IFLA_VF_INFO); | ||
880 | if (!vf) { | ||
881 | nla_nest_cancel(skb, vfinfo); | ||
882 | goto nla_put_failure; | ||
883 | } | ||
884 | NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); | ||
885 | NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); | ||
886 | NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); | ||
887 | nla_nest_end(skb, vf); | ||
710 | } | 888 | } |
889 | nla_nest_end(skb, vfinfo); | ||
711 | } | 890 | } |
891 | |||
892 | if (rtnl_port_fill(skb, dev)) | ||
893 | goto nla_put_failure; | ||
894 | |||
712 | if (dev->rtnl_link_ops) { | 895 | if (dev->rtnl_link_ops) { |
713 | if (rtnl_link_fill(skb, dev) < 0) | 896 | if (rtnl_link_fill(skb, dev) < 0) |
714 | goto nla_put_failure; | 897 | goto nla_put_failure; |
@@ -769,6 +952,22 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { | |||
769 | [IFLA_LINKINFO] = { .type = NLA_NESTED }, | 952 | [IFLA_LINKINFO] = { .type = NLA_NESTED }, |
770 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, | 953 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, |
771 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, | 954 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, |
955 | [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, | ||
956 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, | ||
957 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, | ||
958 | }; | ||
959 | EXPORT_SYMBOL(ifla_policy); | ||
960 | |||
961 | static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { | ||
962 | [IFLA_INFO_KIND] = { .type = NLA_STRING }, | ||
963 | [IFLA_INFO_DATA] = { .type = NLA_NESTED }, | ||
964 | }; | ||
965 | |||
966 | static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = { | ||
967 | [IFLA_VF_INFO] = { .type = NLA_NESTED }, | ||
968 | }; | ||
969 | |||
970 | static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { | ||
772 | [IFLA_VF_MAC] = { .type = NLA_BINARY, | 971 | [IFLA_VF_MAC] = { .type = NLA_BINARY, |
773 | .len = sizeof(struct ifla_vf_mac) }, | 972 | .len = sizeof(struct ifla_vf_mac) }, |
774 | [IFLA_VF_VLAN] = { .type = NLA_BINARY, | 973 | [IFLA_VF_VLAN] = { .type = NLA_BINARY, |
@@ -776,11 +975,19 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { | |||
776 | [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, | 975 | [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, |
777 | .len = sizeof(struct ifla_vf_tx_rate) }, | 976 | .len = sizeof(struct ifla_vf_tx_rate) }, |
778 | }; | 977 | }; |
779 | EXPORT_SYMBOL(ifla_policy); | ||
780 | 978 | ||
781 | static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { | 979 | static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { |
782 | [IFLA_INFO_KIND] = { .type = NLA_STRING }, | 980 | [IFLA_PORT_VF] = { .type = NLA_U32 }, |
783 | [IFLA_INFO_DATA] = { .type = NLA_NESTED }, | 981 | [IFLA_PORT_PROFILE] = { .type = NLA_STRING, |
982 | .len = PORT_PROFILE_MAX }, | ||
983 | [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY, | ||
984 | .len = sizeof(struct ifla_port_vsi)}, | ||
985 | [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY, | ||
986 | .len = PORT_UUID_MAX }, | ||
987 | [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING, | ||
988 | .len = PORT_UUID_MAX }, | ||
989 | [IFLA_PORT_REQUEST] = { .type = NLA_U8, }, | ||
990 | [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, | ||
784 | }; | 991 | }; |
785 | 992 | ||
786 | struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) | 993 | struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) |
@@ -812,6 +1019,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) | |||
812 | return 0; | 1019 | return 0; |
813 | } | 1020 | } |
814 | 1021 | ||
1022 | static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) | ||
1023 | { | ||
1024 | int rem, err = -EINVAL; | ||
1025 | struct nlattr *vf; | ||
1026 | const struct net_device_ops *ops = dev->netdev_ops; | ||
1027 | |||
1028 | nla_for_each_nested(vf, attr, rem) { | ||
1029 | switch (nla_type(vf)) { | ||
1030 | case IFLA_VF_MAC: { | ||
1031 | struct ifla_vf_mac *ivm; | ||
1032 | ivm = nla_data(vf); | ||
1033 | err = -EOPNOTSUPP; | ||
1034 | if (ops->ndo_set_vf_mac) | ||
1035 | err = ops->ndo_set_vf_mac(dev, ivm->vf, | ||
1036 | ivm->mac); | ||
1037 | break; | ||
1038 | } | ||
1039 | case IFLA_VF_VLAN: { | ||
1040 | struct ifla_vf_vlan *ivv; | ||
1041 | ivv = nla_data(vf); | ||
1042 | err = -EOPNOTSUPP; | ||
1043 | if (ops->ndo_set_vf_vlan) | ||
1044 | err = ops->ndo_set_vf_vlan(dev, ivv->vf, | ||
1045 | ivv->vlan, | ||
1046 | ivv->qos); | ||
1047 | break; | ||
1048 | } | ||
1049 | case IFLA_VF_TX_RATE: { | ||
1050 | struct ifla_vf_tx_rate *ivt; | ||
1051 | ivt = nla_data(vf); | ||
1052 | err = -EOPNOTSUPP; | ||
1053 | if (ops->ndo_set_vf_tx_rate) | ||
1054 | err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, | ||
1055 | ivt->rate); | ||
1056 | break; | ||
1057 | } | ||
1058 | default: | ||
1059 | err = -EINVAL; | ||
1060 | break; | ||
1061 | } | ||
1062 | if (err) | ||
1063 | break; | ||
1064 | } | ||
1065 | return err; | ||
1066 | } | ||
1067 | |||
815 | static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | 1068 | static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, |
816 | struct nlattr **tb, char *ifname, int modified) | 1069 | struct nlattr **tb, char *ifname, int modified) |
817 | { | 1070 | { |
@@ -942,37 +1195,61 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
942 | write_unlock_bh(&dev_base_lock); | 1195 | write_unlock_bh(&dev_base_lock); |
943 | } | 1196 | } |
944 | 1197 | ||
945 | if (tb[IFLA_VF_MAC]) { | 1198 | if (tb[IFLA_VFINFO_LIST]) { |
946 | struct ifla_vf_mac *ivm; | 1199 | struct nlattr *attr; |
947 | ivm = nla_data(tb[IFLA_VF_MAC]); | 1200 | int rem; |
948 | err = -EOPNOTSUPP; | 1201 | nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { |
949 | if (ops->ndo_set_vf_mac) | 1202 | if (nla_type(attr) != IFLA_VF_INFO) |
950 | err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); | 1203 | goto errout; |
951 | if (err < 0) | 1204 | err = do_setvfinfo(dev, attr); |
952 | goto errout; | 1205 | if (err < 0) |
953 | modified = 1; | 1206 | goto errout; |
1207 | modified = 1; | ||
1208 | } | ||
954 | } | 1209 | } |
1210 | err = 0; | ||
1211 | |||
1212 | if (tb[IFLA_VF_PORTS]) { | ||
1213 | struct nlattr *port[IFLA_PORT_MAX+1]; | ||
1214 | struct nlattr *attr; | ||
1215 | int vf; | ||
1216 | int rem; | ||
955 | 1217 | ||
956 | if (tb[IFLA_VF_VLAN]) { | ||
957 | struct ifla_vf_vlan *ivv; | ||
958 | ivv = nla_data(tb[IFLA_VF_VLAN]); | ||
959 | err = -EOPNOTSUPP; | 1218 | err = -EOPNOTSUPP; |
960 | if (ops->ndo_set_vf_vlan) | 1219 | if (!ops->ndo_set_vf_port) |
961 | err = ops->ndo_set_vf_vlan(dev, ivv->vf, | ||
962 | ivv->vlan, | ||
963 | ivv->qos); | ||
964 | if (err < 0) | ||
965 | goto errout; | 1220 | goto errout; |
966 | modified = 1; | 1221 | |
1222 | nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { | ||
1223 | if (nla_type(attr) != IFLA_VF_PORT) | ||
1224 | continue; | ||
1225 | err = nla_parse_nested(port, IFLA_PORT_MAX, | ||
1226 | attr, ifla_port_policy); | ||
1227 | if (err < 0) | ||
1228 | goto errout; | ||
1229 | if (!port[IFLA_PORT_VF]) { | ||
1230 | err = -EOPNOTSUPP; | ||
1231 | goto errout; | ||
1232 | } | ||
1233 | vf = nla_get_u32(port[IFLA_PORT_VF]); | ||
1234 | err = ops->ndo_set_vf_port(dev, vf, port); | ||
1235 | if (err < 0) | ||
1236 | goto errout; | ||
1237 | modified = 1; | ||
1238 | } | ||
967 | } | 1239 | } |
968 | err = 0; | 1240 | err = 0; |
969 | 1241 | ||
970 | if (tb[IFLA_VF_TX_RATE]) { | 1242 | if (tb[IFLA_PORT_SELF]) { |
971 | struct ifla_vf_tx_rate *ivt; | 1243 | struct nlattr *port[IFLA_PORT_MAX+1]; |
972 | ivt = nla_data(tb[IFLA_VF_TX_RATE]); | 1244 | |
1245 | err = nla_parse_nested(port, IFLA_PORT_MAX, | ||
1246 | tb[IFLA_PORT_SELF], ifla_port_policy); | ||
1247 | if (err < 0) | ||
1248 | goto errout; | ||
1249 | |||
973 | err = -EOPNOTSUPP; | 1250 | err = -EOPNOTSUPP; |
974 | if (ops->ndo_set_vf_tx_rate) | 1251 | if (ops->ndo_set_vf_port) |
975 | err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate); | 1252 | err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port); |
976 | if (err < 0) | 1253 | if (err < 0) |
977 | goto errout; | 1254 | goto errout; |
978 | modified = 1; | 1255 | modified = 1; |
@@ -1336,7 +1613,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) | |||
1336 | 1613 | ||
1337 | if (s_idx == 0) | 1614 | if (s_idx == 0) |
1338 | s_idx = 1; | 1615 | s_idx = 1; |
1339 | for (idx = 1; idx < NPROTO; idx++) { | 1616 | for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { |
1340 | int type = cb->nlh->nlmsg_type-RTM_BASE; | 1617 | int type = cb->nlh->nlmsg_type-RTM_BASE; |
1341 | if (idx < s_idx || idx == PF_PACKET) | 1618 | if (idx < s_idx || idx == PF_PACKET) |
1342 | continue; | 1619 | continue; |
@@ -1404,9 +1681,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
1404 | return 0; | 1681 | return 0; |
1405 | 1682 | ||
1406 | family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; | 1683 | family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; |
1407 | if (family >= NPROTO) | ||
1408 | return -EAFNOSUPPORT; | ||
1409 | |||
1410 | sz_idx = type>>2; | 1684 | sz_idx = type>>2; |
1411 | kind = type&3; | 1685 | kind = type&3; |
1412 | 1686 | ||
@@ -1474,6 +1748,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi | |||
1474 | case NETDEV_POST_INIT: | 1748 | case NETDEV_POST_INIT: |
1475 | case NETDEV_REGISTER: | 1749 | case NETDEV_REGISTER: |
1476 | case NETDEV_CHANGE: | 1750 | case NETDEV_CHANGE: |
1751 | case NETDEV_PRE_TYPE_CHANGE: | ||
1477 | case NETDEV_GOING_DOWN: | 1752 | case NETDEV_GOING_DOWN: |
1478 | case NETDEV_UNREGISTER: | 1753 | case NETDEV_UNREGISTER: |
1479 | case NETDEV_UNREGISTER_BATCH: | 1754 | case NETDEV_UNREGISTER_BATCH: |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 931981774b1a..66d9c416851e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { | |||
117 | * | 117 | * |
118 | * Out of line support code for skb_put(). Not user callable. | 118 | * Out of line support code for skb_put(). Not user callable. |
119 | */ | 119 | */ |
120 | void skb_over_panic(struct sk_buff *skb, int sz, void *here) | 120 | static void skb_over_panic(struct sk_buff *skb, int sz, void *here) |
121 | { | 121 | { |
122 | printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " | 122 | printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " |
123 | "data:%p tail:%#lx end:%#lx dev:%s\n", | 123 | "data:%p tail:%#lx end:%#lx dev:%s\n", |
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) | |||
126 | skb->dev ? skb->dev->name : "<NULL>"); | 126 | skb->dev ? skb->dev->name : "<NULL>"); |
127 | BUG(); | 127 | BUG(); |
128 | } | 128 | } |
129 | EXPORT_SYMBOL(skb_over_panic); | ||
130 | 129 | ||
131 | /** | 130 | /** |
132 | * skb_under_panic - private function | 131 | * skb_under_panic - private function |
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic); | |||
137 | * Out of line support code for skb_push(). Not user callable. | 136 | * Out of line support code for skb_push(). Not user callable. |
138 | */ | 137 | */ |
139 | 138 | ||
140 | void skb_under_panic(struct sk_buff *skb, int sz, void *here) | 139 | static void skb_under_panic(struct sk_buff *skb, int sz, void *here) |
141 | { | 140 | { |
142 | printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " | 141 | printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " |
143 | "data:%p tail:%#lx end:%#lx dev:%s\n", | 142 | "data:%p tail:%#lx end:%#lx dev:%s\n", |
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
146 | skb->dev ? skb->dev->name : "<NULL>"); | 145 | skb->dev ? skb->dev->name : "<NULL>"); |
147 | BUG(); | 146 | BUG(); |
148 | } | 147 | } |
149 | EXPORT_SYMBOL(skb_under_panic); | ||
150 | 148 | ||
151 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 149 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
152 | * 'private' fields and also do memory statistics to find all the | 150 | * 'private' fields and also do memory statistics to find all the |
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
183 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
184 | if (!skb) | 182 | if (!skb) |
185 | goto out; | 183 | goto out; |
184 | prefetchw(skb); | ||
186 | 185 | ||
187 | size = SKB_DATA_ALIGN(size); | 186 | size = SKB_DATA_ALIGN(size); |
188 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | 187 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), |
189 | gfp_mask, node); | 188 | gfp_mask, node); |
190 | if (!data) | 189 | if (!data) |
191 | goto nodata; | 190 | goto nodata; |
191 | prefetchw(data + size); | ||
192 | 192 | ||
193 | /* | 193 | /* |
194 | * Only clear those fields we need to clear, not those that we will | 194 | * Only clear those fields we need to clear, not those that we will |
@@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
210 | 210 | ||
211 | /* make sure we initialize shinfo sequentially */ | 211 | /* make sure we initialize shinfo sequentially */ |
212 | shinfo = skb_shinfo(skb); | 212 | shinfo = skb_shinfo(skb); |
213 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
213 | atomic_set(&shinfo->dataref, 1); | 214 | atomic_set(&shinfo->dataref, 1); |
214 | shinfo->nr_frags = 0; | ||
215 | shinfo->gso_size = 0; | ||
216 | shinfo->gso_segs = 0; | ||
217 | shinfo->gso_type = 0; | ||
218 | shinfo->ip6_frag_id = 0; | ||
219 | shinfo->tx_flags.flags = 0; | ||
220 | skb_frag_list_init(skb); | ||
221 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
222 | 215 | ||
223 | if (fclone) { | 216 | if (fclone) { |
224 | struct sk_buff *child = skb + 1; | 217 | struct sk_buff *child = skb + 1; |
@@ -507,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) | |||
507 | return 0; | 500 | return 0; |
508 | 501 | ||
509 | skb_release_head_state(skb); | 502 | skb_release_head_state(skb); |
503 | |||
510 | shinfo = skb_shinfo(skb); | 504 | shinfo = skb_shinfo(skb); |
505 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
511 | atomic_set(&shinfo->dataref, 1); | 506 | atomic_set(&shinfo->dataref, 1); |
512 | shinfo->nr_frags = 0; | ||
513 | shinfo->gso_size = 0; | ||
514 | shinfo->gso_segs = 0; | ||
515 | shinfo->gso_type = 0; | ||
516 | shinfo->ip6_frag_id = 0; | ||
517 | shinfo->tx_flags.flags = 0; | ||
518 | skb_frag_list_init(skb); | ||
519 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
520 | 507 | ||
521 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 508 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
522 | skb->data = skb->head + NET_SKB_PAD; | 509 | skb->data = skb->head + NET_SKB_PAD; |
@@ -533,7 +520,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
533 | new->transport_header = old->transport_header; | 520 | new->transport_header = old->transport_header; |
534 | new->network_header = old->network_header; | 521 | new->network_header = old->network_header; |
535 | new->mac_header = old->mac_header; | 522 | new->mac_header = old->mac_header; |
536 | skb_dst_set(new, dst_clone(skb_dst(old))); | 523 | skb_dst_copy(new, old); |
524 | new->rxhash = old->rxhash; | ||
537 | #ifdef CONFIG_XFRM | 525 | #ifdef CONFIG_XFRM |
538 | new->sp = secpath_get(old->sp); | 526 | new->sp = secpath_get(old->sp); |
539 | #endif | 527 | #endif |
@@ -581,6 +569,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) | |||
581 | C(len); | 569 | C(len); |
582 | C(data_len); | 570 | C(data_len); |
583 | C(mac_len); | 571 | C(mac_len); |
572 | C(rxhash); | ||
584 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; | 573 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; |
585 | n->cloned = 1; | 574 | n->cloned = 1; |
586 | n->nohdr = 0; | 575 | n->nohdr = 0; |
@@ -1051,7 +1040,7 @@ EXPORT_SYMBOL(skb_push); | |||
1051 | */ | 1040 | */ |
1052 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) | 1041 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) |
1053 | { | 1042 | { |
1054 | return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); | 1043 | return skb_pull_inline(skb, len); |
1055 | } | 1044 | } |
1056 | EXPORT_SYMBOL(skb_pull); | 1045 | EXPORT_SYMBOL(skb_pull); |
1057 | 1046 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index c5812bbc2cc9..bf88a167c8f2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
307 | */ | 307 | */ |
308 | skb_len = skb->len; | 308 | skb_len = skb->len; |
309 | 309 | ||
310 | /* we escape from rcu protected region, make sure we dont leak | ||
311 | * a norefcounted dst | ||
312 | */ | ||
313 | skb_dst_force(skb); | ||
314 | |||
310 | spin_lock_irqsave(&list->lock, flags); | 315 | spin_lock_irqsave(&list->lock, flags); |
311 | skb->dropcount = atomic_read(&sk->sk_drops); | 316 | skb->dropcount = atomic_read(&sk->sk_drops); |
312 | __skb_queue_tail(list, skb); | 317 | __skb_queue_tail(list, skb); |
@@ -327,6 +332,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) | |||
327 | 332 | ||
328 | skb->dev = NULL; | 333 | skb->dev = NULL; |
329 | 334 | ||
335 | if (sk_rcvqueues_full(sk, skb)) { | ||
336 | atomic_inc(&sk->sk_drops); | ||
337 | goto discard_and_relse; | ||
338 | } | ||
330 | if (nested) | 339 | if (nested) |
331 | bh_lock_sock_nested(sk); | 340 | bh_lock_sock_nested(sk); |
332 | else | 341 | else |
@@ -364,11 +373,11 @@ EXPORT_SYMBOL(sk_reset_txq); | |||
364 | 373 | ||
365 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) | 374 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) |
366 | { | 375 | { |
367 | struct dst_entry *dst = sk->sk_dst_cache; | 376 | struct dst_entry *dst = __sk_dst_get(sk); |
368 | 377 | ||
369 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 378 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { |
370 | sk_tx_queue_clear(sk); | 379 | sk_tx_queue_clear(sk); |
371 | sk->sk_dst_cache = NULL; | 380 | rcu_assign_pointer(sk->sk_dst_cache, NULL); |
372 | dst_release(dst); | 381 | dst_release(dst); |
373 | return NULL; | 382 | return NULL; |
374 | } | 383 | } |
@@ -1157,7 +1166,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1157 | skb_queue_head_init(&newsk->sk_async_wait_queue); | 1166 | skb_queue_head_init(&newsk->sk_async_wait_queue); |
1158 | #endif | 1167 | #endif |
1159 | 1168 | ||
1160 | rwlock_init(&newsk->sk_dst_lock); | 1169 | spin_lock_init(&newsk->sk_dst_lock); |
1161 | rwlock_init(&newsk->sk_callback_lock); | 1170 | rwlock_init(&newsk->sk_callback_lock); |
1162 | lockdep_set_class_and_name(&newsk->sk_callback_lock, | 1171 | lockdep_set_class_and_name(&newsk->sk_callback_lock, |
1163 | af_callback_keys + newsk->sk_family, | 1172 | af_callback_keys + newsk->sk_family, |
@@ -1207,7 +1216,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
1207 | */ | 1216 | */ |
1208 | sk_refcnt_debug_inc(newsk); | 1217 | sk_refcnt_debug_inc(newsk); |
1209 | sk_set_socket(newsk, NULL); | 1218 | sk_set_socket(newsk, NULL); |
1210 | newsk->sk_sleep = NULL; | 1219 | newsk->sk_wq = NULL; |
1211 | 1220 | ||
1212 | if (newsk->sk_prot->sockets_allocated) | 1221 | if (newsk->sk_prot->sockets_allocated) |
1213 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1222 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); |
@@ -1227,6 +1236,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
1227 | sk->sk_route_caps = dst->dev->features; | 1236 | sk->sk_route_caps = dst->dev->features; |
1228 | if (sk->sk_route_caps & NETIF_F_GSO) | 1237 | if (sk->sk_route_caps & NETIF_F_GSO) |
1229 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 1238 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
1239 | sk->sk_route_caps &= ~sk->sk_route_nocaps; | ||
1230 | if (sk_can_gso(sk)) { | 1240 | if (sk_can_gso(sk)) { |
1231 | if (dst->header_len) { | 1241 | if (dst->header_len) { |
1232 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1242 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
@@ -1395,7 +1405,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
1395 | if (signal_pending(current)) | 1405 | if (signal_pending(current)) |
1396 | break; | 1406 | break; |
1397 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1407 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
1398 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1408 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
1399 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) | 1409 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
1400 | break; | 1410 | break; |
1401 | if (sk->sk_shutdown & SEND_SHUTDOWN) | 1411 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
@@ -1404,7 +1414,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
1404 | break; | 1414 | break; |
1405 | timeo = schedule_timeout(timeo); | 1415 | timeo = schedule_timeout(timeo); |
1406 | } | 1416 | } |
1407 | finish_wait(sk->sk_sleep, &wait); | 1417 | finish_wait(sk_sleep(sk), &wait); |
1408 | return timeo; | 1418 | return timeo; |
1409 | } | 1419 | } |
1410 | 1420 | ||
@@ -1531,6 +1541,7 @@ static void __release_sock(struct sock *sk) | |||
1531 | do { | 1541 | do { |
1532 | struct sk_buff *next = skb->next; | 1542 | struct sk_buff *next = skb->next; |
1533 | 1543 | ||
1544 | WARN_ON_ONCE(skb_dst_is_noref(skb)); | ||
1534 | skb->next = NULL; | 1545 | skb->next = NULL; |
1535 | sk_backlog_rcv(sk, skb); | 1546 | sk_backlog_rcv(sk, skb); |
1536 | 1547 | ||
@@ -1570,11 +1581,11 @@ int sk_wait_data(struct sock *sk, long *timeo) | |||
1570 | int rc; | 1581 | int rc; |
1571 | DEFINE_WAIT(wait); | 1582 | DEFINE_WAIT(wait); |
1572 | 1583 | ||
1573 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1584 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
1574 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1585 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
1575 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); | 1586 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); |
1576 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1587 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
1577 | finish_wait(sk->sk_sleep, &wait); | 1588 | finish_wait(sk_sleep(sk), &wait); |
1578 | return rc; | 1589 | return rc; |
1579 | } | 1590 | } |
1580 | EXPORT_SYMBOL(sk_wait_data); | 1591 | EXPORT_SYMBOL(sk_wait_data); |
@@ -1796,41 +1807,53 @@ EXPORT_SYMBOL(sock_no_sendpage); | |||
1796 | 1807 | ||
1797 | static void sock_def_wakeup(struct sock *sk) | 1808 | static void sock_def_wakeup(struct sock *sk) |
1798 | { | 1809 | { |
1799 | read_lock(&sk->sk_callback_lock); | 1810 | struct socket_wq *wq; |
1800 | if (sk_has_sleeper(sk)) | 1811 | |
1801 | wake_up_interruptible_all(sk->sk_sleep); | 1812 | rcu_read_lock(); |
1802 | read_unlock(&sk->sk_callback_lock); | 1813 | wq = rcu_dereference(sk->sk_wq); |
1814 | if (wq_has_sleeper(wq)) | ||
1815 | wake_up_interruptible_all(&wq->wait); | ||
1816 | rcu_read_unlock(); | ||
1803 | } | 1817 | } |
1804 | 1818 | ||
1805 | static void sock_def_error_report(struct sock *sk) | 1819 | static void sock_def_error_report(struct sock *sk) |
1806 | { | 1820 | { |
1807 | read_lock(&sk->sk_callback_lock); | 1821 | struct socket_wq *wq; |
1808 | if (sk_has_sleeper(sk)) | 1822 | |
1809 | wake_up_interruptible_poll(sk->sk_sleep, POLLERR); | 1823 | rcu_read_lock(); |
1824 | wq = rcu_dereference(sk->sk_wq); | ||
1825 | if (wq_has_sleeper(wq)) | ||
1826 | wake_up_interruptible_poll(&wq->wait, POLLERR); | ||
1810 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 1827 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
1811 | read_unlock(&sk->sk_callback_lock); | 1828 | rcu_read_unlock(); |
1812 | } | 1829 | } |
1813 | 1830 | ||
1814 | static void sock_def_readable(struct sock *sk, int len) | 1831 | static void sock_def_readable(struct sock *sk, int len) |
1815 | { | 1832 | { |
1816 | read_lock(&sk->sk_callback_lock); | 1833 | struct socket_wq *wq; |
1817 | if (sk_has_sleeper(sk)) | 1834 | |
1818 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | | 1835 | rcu_read_lock(); |
1836 | wq = rcu_dereference(sk->sk_wq); | ||
1837 | if (wq_has_sleeper(wq)) | ||
1838 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
1819 | POLLRDNORM | POLLRDBAND); | 1839 | POLLRDNORM | POLLRDBAND); |
1820 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1840 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
1821 | read_unlock(&sk->sk_callback_lock); | 1841 | rcu_read_unlock(); |
1822 | } | 1842 | } |
1823 | 1843 | ||
1824 | static void sock_def_write_space(struct sock *sk) | 1844 | static void sock_def_write_space(struct sock *sk) |
1825 | { | 1845 | { |
1826 | read_lock(&sk->sk_callback_lock); | 1846 | struct socket_wq *wq; |
1847 | |||
1848 | rcu_read_lock(); | ||
1827 | 1849 | ||
1828 | /* Do not wake up a writer until he can make "significant" | 1850 | /* Do not wake up a writer until he can make "significant" |
1829 | * progress. --DaveM | 1851 | * progress. --DaveM |
1830 | */ | 1852 | */ |
1831 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 1853 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
1832 | if (sk_has_sleeper(sk)) | 1854 | wq = rcu_dereference(sk->sk_wq); |
1833 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | | 1855 | if (wq_has_sleeper(wq)) |
1856 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
1834 | POLLWRNORM | POLLWRBAND); | 1857 | POLLWRNORM | POLLWRBAND); |
1835 | 1858 | ||
1836 | /* Should agree with poll, otherwise some programs break */ | 1859 | /* Should agree with poll, otherwise some programs break */ |
@@ -1838,7 +1861,7 @@ static void sock_def_write_space(struct sock *sk) | |||
1838 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 1861 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
1839 | } | 1862 | } |
1840 | 1863 | ||
1841 | read_unlock(&sk->sk_callback_lock); | 1864 | rcu_read_unlock(); |
1842 | } | 1865 | } |
1843 | 1866 | ||
1844 | static void sock_def_destruct(struct sock *sk) | 1867 | static void sock_def_destruct(struct sock *sk) |
@@ -1885,7 +1908,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
1885 | sk->sk_allocation = GFP_KERNEL; | 1908 | sk->sk_allocation = GFP_KERNEL; |
1886 | sk->sk_rcvbuf = sysctl_rmem_default; | 1909 | sk->sk_rcvbuf = sysctl_rmem_default; |
1887 | sk->sk_sndbuf = sysctl_wmem_default; | 1910 | sk->sk_sndbuf = sysctl_wmem_default; |
1888 | sk->sk_backlog.limit = sk->sk_rcvbuf << 1; | ||
1889 | sk->sk_state = TCP_CLOSE; | 1911 | sk->sk_state = TCP_CLOSE; |
1890 | sk_set_socket(sk, sock); | 1912 | sk_set_socket(sk, sock); |
1891 | 1913 | ||
@@ -1893,12 +1915,12 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
1893 | 1915 | ||
1894 | if (sock) { | 1916 | if (sock) { |
1895 | sk->sk_type = sock->type; | 1917 | sk->sk_type = sock->type; |
1896 | sk->sk_sleep = &sock->wait; | 1918 | sk->sk_wq = sock->wq; |
1897 | sock->sk = sk; | 1919 | sock->sk = sk; |
1898 | } else | 1920 | } else |
1899 | sk->sk_sleep = NULL; | 1921 | sk->sk_wq = NULL; |
1900 | 1922 | ||
1901 | rwlock_init(&sk->sk_dst_lock); | 1923 | spin_lock_init(&sk->sk_dst_lock); |
1902 | rwlock_init(&sk->sk_callback_lock); | 1924 | rwlock_init(&sk->sk_callback_lock); |
1903 | lockdep_set_class_and_name(&sk->sk_callback_lock, | 1925 | lockdep_set_class_and_name(&sk->sk_callback_lock, |
1904 | af_callback_keys + sk->sk_family, | 1926 | af_callback_keys + sk->sk_family, |
diff --git a/net/core/stream.c b/net/core/stream.c index a37debfeb1b2..cc196f42b8d8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
@@ -28,15 +28,19 @@ | |||
28 | void sk_stream_write_space(struct sock *sk) | 28 | void sk_stream_write_space(struct sock *sk) |
29 | { | 29 | { |
30 | struct socket *sock = sk->sk_socket; | 30 | struct socket *sock = sk->sk_socket; |
31 | struct socket_wq *wq; | ||
31 | 32 | ||
32 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { | 33 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { |
33 | clear_bit(SOCK_NOSPACE, &sock->flags); | 34 | clear_bit(SOCK_NOSPACE, &sock->flags); |
34 | 35 | ||
35 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 36 | rcu_read_lock(); |
36 | wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | | 37 | wq = rcu_dereference(sk->sk_wq); |
38 | if (wq_has_sleeper(wq)) | ||
39 | wake_up_interruptible_poll(&wq->wait, POLLOUT | | ||
37 | POLLWRNORM | POLLWRBAND); | 40 | POLLWRNORM | POLLWRBAND); |
38 | if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | 41 | if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) |
39 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); | 42 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); |
43 | rcu_read_unlock(); | ||
40 | } | 44 | } |
41 | } | 45 | } |
42 | 46 | ||
@@ -66,13 +70,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
66 | if (signal_pending(tsk)) | 70 | if (signal_pending(tsk)) |
67 | return sock_intr_errno(*timeo_p); | 71 | return sock_intr_errno(*timeo_p); |
68 | 72 | ||
69 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 73 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
70 | sk->sk_write_pending++; | 74 | sk->sk_write_pending++; |
71 | done = sk_wait_event(sk, timeo_p, | 75 | done = sk_wait_event(sk, timeo_p, |
72 | !sk->sk_err && | 76 | !sk->sk_err && |
73 | !((1 << sk->sk_state) & | 77 | !((1 << sk->sk_state) & |
74 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); | 78 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); |
75 | finish_wait(sk->sk_sleep, &wait); | 79 | finish_wait(sk_sleep(sk), &wait); |
76 | sk->sk_write_pending--; | 80 | sk->sk_write_pending--; |
77 | } while (!done); | 81 | } while (!done); |
78 | return 0; | 82 | return 0; |
@@ -96,13 +100,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout) | |||
96 | DEFINE_WAIT(wait); | 100 | DEFINE_WAIT(wait); |
97 | 101 | ||
98 | do { | 102 | do { |
99 | prepare_to_wait(sk->sk_sleep, &wait, | 103 | prepare_to_wait(sk_sleep(sk), &wait, |
100 | TASK_INTERRUPTIBLE); | 104 | TASK_INTERRUPTIBLE); |
101 | if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) | 105 | if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) |
102 | break; | 106 | break; |
103 | } while (!signal_pending(current) && timeout); | 107 | } while (!signal_pending(current) && timeout); |
104 | 108 | ||
105 | finish_wait(sk->sk_sleep, &wait); | 109 | finish_wait(sk_sleep(sk), &wait); |
106 | } | 110 | } |
107 | } | 111 | } |
108 | 112 | ||
@@ -126,7 +130,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
126 | while (1) { | 130 | while (1) { |
127 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 131 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
128 | 132 | ||
129 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 133 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
130 | 134 | ||
131 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 135 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
132 | goto do_error; | 136 | goto do_error; |
@@ -157,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
157 | *timeo_p = current_timeo; | 161 | *timeo_p = current_timeo; |
158 | } | 162 | } |
159 | out: | 163 | out: |
160 | finish_wait(sk->sk_sleep, &wait); | 164 | finish_wait(sk_sleep(sk), &wait); |
161 | return err; | 165 | return err; |
162 | 166 | ||
163 | do_error: | 167 | do_error: |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b7b6b8208f75..01eee5d984be 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -11,12 +11,72 @@ | |||
11 | #include <linux/socket.h> | 11 | #include <linux/socket.h> |
12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
13 | #include <linux/ratelimit.h> | 13 | #include <linux/ratelimit.h> |
14 | #include <linux/vmalloc.h> | ||
14 | #include <linux/init.h> | 15 | #include <linux/init.h> |
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
16 | 17 | ||
17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
18 | #include <net/sock.h> | 19 | #include <net/sock.h> |
19 | 20 | ||
21 | #ifdef CONFIG_RPS | ||
22 | static int rps_sock_flow_sysctl(ctl_table *table, int write, | ||
23 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
24 | { | ||
25 | unsigned int orig_size, size; | ||
26 | int ret, i; | ||
27 | ctl_table tmp = { | ||
28 | .data = &size, | ||
29 | .maxlen = sizeof(size), | ||
30 | .mode = table->mode | ||
31 | }; | ||
32 | struct rps_sock_flow_table *orig_sock_table, *sock_table; | ||
33 | static DEFINE_MUTEX(sock_flow_mutex); | ||
34 | |||
35 | mutex_lock(&sock_flow_mutex); | ||
36 | |||
37 | orig_sock_table = rps_sock_flow_table; | ||
38 | size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; | ||
39 | |||
40 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | ||
41 | |||
42 | if (write) { | ||
43 | if (size) { | ||
44 | if (size > 1<<30) { | ||
45 | /* Enforce limit to prevent overflow */ | ||
46 | mutex_unlock(&sock_flow_mutex); | ||
47 | return -EINVAL; | ||
48 | } | ||
49 | size = roundup_pow_of_two(size); | ||
50 | if (size != orig_size) { | ||
51 | sock_table = | ||
52 | vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); | ||
53 | if (!sock_table) { | ||
54 | mutex_unlock(&sock_flow_mutex); | ||
55 | return -ENOMEM; | ||
56 | } | ||
57 | |||
58 | sock_table->mask = size - 1; | ||
59 | } else | ||
60 | sock_table = orig_sock_table; | ||
61 | |||
62 | for (i = 0; i < size; i++) | ||
63 | sock_table->ents[i] = RPS_NO_CPU; | ||
64 | } else | ||
65 | sock_table = NULL; | ||
66 | |||
67 | if (sock_table != orig_sock_table) { | ||
68 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | ||
69 | synchronize_rcu(); | ||
70 | vfree(orig_sock_table); | ||
71 | } | ||
72 | } | ||
73 | |||
74 | mutex_unlock(&sock_flow_mutex); | ||
75 | |||
76 | return ret; | ||
77 | } | ||
78 | #endif /* CONFIG_RPS */ | ||
79 | |||
20 | static struct ctl_table net_core_table[] = { | 80 | static struct ctl_table net_core_table[] = { |
21 | #ifdef CONFIG_NET | 81 | #ifdef CONFIG_NET |
22 | { | 82 | { |
@@ -62,6 +122,13 @@ static struct ctl_table net_core_table[] = { | |||
62 | .proc_handler = proc_dointvec | 122 | .proc_handler = proc_dointvec |
63 | }, | 123 | }, |
64 | { | 124 | { |
125 | .procname = "netdev_tstamp_prequeue", | ||
126 | .data = &netdev_tstamp_prequeue, | ||
127 | .maxlen = sizeof(int), | ||
128 | .mode = 0644, | ||
129 | .proc_handler = proc_dointvec | ||
130 | }, | ||
131 | { | ||
65 | .procname = "message_cost", | 132 | .procname = "message_cost", |
66 | .data = &net_ratelimit_state.interval, | 133 | .data = &net_ratelimit_state.interval, |
67 | .maxlen = sizeof(int), | 134 | .maxlen = sizeof(int), |
@@ -82,6 +149,14 @@ static struct ctl_table net_core_table[] = { | |||
82 | .mode = 0644, | 149 | .mode = 0644, |
83 | .proc_handler = proc_dointvec | 150 | .proc_handler = proc_dointvec |
84 | }, | 151 | }, |
152 | #ifdef CONFIG_RPS | ||
153 | { | ||
154 | .procname = "rps_sock_flow_entries", | ||
155 | .maxlen = sizeof(int), | ||
156 | .mode = 0644, | ||
157 | .proc_handler = rps_sock_flow_sysctl | ||
158 | }, | ||
159 | #endif | ||
85 | #endif /* CONFIG_NET */ | 160 | #endif /* CONFIG_NET */ |
86 | { | 161 | { |
87 | .procname = "netdev_budget", | 162 | .procname = "netdev_budget", |