diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/Makefile | 2 | ||||
| -rw-r--r-- | net/core/datagram.c | 21 | ||||
| -rw-r--r-- | net/core/dev.c | 1413 | ||||
| -rw-r--r-- | net/core/dev_addr_lists.c | 741 | ||||
| -rw-r--r-- | net/core/dev_mcast.c | 232 | ||||
| -rw-r--r-- | net/core/dst.c | 45 | ||||
| -rw-r--r-- | net/core/ethtool.c | 152 | ||||
| -rw-r--r-- | net/core/fib_rules.c | 31 | ||||
| -rw-r--r-- | net/core/filter.c | 7 | ||||
| -rw-r--r-- | net/core/flow.c | 405 | ||||
| -rw-r--r-- | net/core/net-sysfs.c | 377 | ||||
| -rw-r--r-- | net/core/net-sysfs.h | 1 | ||||
| -rw-r--r-- | net/core/net_namespace.c | 95 | ||||
| -rw-r--r-- | net/core/netpoll.c | 26 | ||||
| -rw-r--r-- | net/core/pktgen.c | 58 | ||||
| -rw-r--r-- | net/core/rtnetlink.c | 242 | ||||
| -rw-r--r-- | net/core/skbuff.c | 72 | ||||
| -rw-r--r-- | net/core/sock.c | 97 | ||||
| -rw-r--r-- | net/core/stream.c | 22 | ||||
| -rw-r--r-- | net/core/sysctl_net_core.c | 75 |
20 files changed, 2688 insertions, 1426 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 08791ac3e05a..51c3eec850ef 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
| @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ | |||
| 7 | 7 | ||
| 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o | 8 | obj-$(CONFIG_SYSCTL) += sysctl_net_core.o |
| 9 | 9 | ||
| 10 | obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ | 10 | obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ |
| 11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o | 11 | neighbour.o rtnetlink.o utils.o link_watch.o filter.o |
| 12 | 12 | ||
| 13 | obj-$(CONFIG_XFRM) += flow.o | 13 | obj-$(CONFIG_XFRM) += flow.o |
diff --git a/net/core/datagram.c b/net/core/datagram.c index 2dccd4ee591b..e0097531417a 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
| @@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | |||
| 86 | int error; | 86 | int error; |
| 87 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); | 87 | DEFINE_WAIT_FUNC(wait, receiver_wake_function); |
| 88 | 88 | ||
| 89 | prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 89 | prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 90 | 90 | ||
| 91 | /* Socket errors? */ | 91 | /* Socket errors? */ |
| 92 | error = sock_error(sk); | 92 | error = sock_error(sk); |
| @@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) | |||
| 115 | error = 0; | 115 | error = 0; |
| 116 | *timeo_p = schedule_timeout(*timeo_p); | 116 | *timeo_p = schedule_timeout(*timeo_p); |
| 117 | out: | 117 | out: |
| 118 | finish_wait(sk->sk_sleep, &wait); | 118 | finish_wait(sk_sleep(sk), &wait); |
| 119 | return error; | 119 | return error; |
| 120 | interrupted: | 120 | interrupted: |
| 121 | error = sock_intr_errno(*timeo_p); | 121 | error = sock_intr_errno(*timeo_p); |
| @@ -229,9 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram); | |||
| 229 | 229 | ||
| 230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) | 230 | void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) |
| 231 | { | 231 | { |
| 232 | lock_sock(sk); | 232 | if (likely(atomic_read(&skb->users) == 1)) |
| 233 | skb_free_datagram(sk, skb); | 233 | smp_rmb(); |
| 234 | release_sock(sk); | 234 | else if (likely(!atomic_dec_and_test(&skb->users))) |
| 235 | return; | ||
| 236 | |||
| 237 | lock_sock_bh(sk); | ||
| 238 | skb_orphan(skb); | ||
| 239 | sk_mem_reclaim_partial(sk); | ||
| 240 | unlock_sock_bh(sk); | ||
| 241 | |||
| 242 | /* skb is now orphaned, can be freed outside of locked section */ | ||
| 243 | __kfree_skb(skb); | ||
| 235 | } | 244 | } |
| 236 | EXPORT_SYMBOL(skb_free_datagram_locked); | 245 | EXPORT_SYMBOL(skb_free_datagram_locked); |
| 237 | 246 | ||
| @@ -726,7 +735,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, | |||
| 726 | struct sock *sk = sock->sk; | 735 | struct sock *sk = sock->sk; |
| 727 | unsigned int mask; | 736 | unsigned int mask; |
| 728 | 737 | ||
| 729 | sock_poll_wait(file, sk->sk_sleep, wait); | 738 | sock_poll_wait(file, sk_sleep(sk), wait); |
| 730 | mask = 0; | 739 | mask = 0; |
| 731 | 740 | ||
| 732 | /* exceptional events? */ | 741 | /* exceptional events? */ |
diff --git a/net/core/dev.c b/net/core/dev.c index 264137fce3a2..1845b08c624e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -130,6 +130,7 @@ | |||
| 130 | #include <linux/jhash.h> | 130 | #include <linux/jhash.h> |
| 131 | #include <linux/random.h> | 131 | #include <linux/random.h> |
| 132 | #include <trace/events/napi.h> | 132 | #include <trace/events/napi.h> |
| 133 | #include <linux/pci.h> | ||
| 133 | 134 | ||
| 134 | #include "net-sysfs.h" | 135 | #include "net-sysfs.h" |
| 135 | 136 | ||
| @@ -207,6 +208,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |||
| 207 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; | 208 | return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; |
| 208 | } | 209 | } |
| 209 | 210 | ||
| 211 | static inline void rps_lock(struct softnet_data *sd) | ||
| 212 | { | ||
| 213 | #ifdef CONFIG_RPS | ||
| 214 | spin_lock(&sd->input_pkt_queue.lock); | ||
| 215 | #endif | ||
| 216 | } | ||
| 217 | |||
| 218 | static inline void rps_unlock(struct softnet_data *sd) | ||
| 219 | { | ||
| 220 | #ifdef CONFIG_RPS | ||
| 221 | spin_unlock(&sd->input_pkt_queue.lock); | ||
| 222 | #endif | ||
| 223 | } | ||
| 224 | |||
| 210 | /* Device list insertion */ | 225 | /* Device list insertion */ |
| 211 | static int list_netdevice(struct net_device *dev) | 226 | static int list_netdevice(struct net_device *dev) |
| 212 | { | 227 | { |
| @@ -249,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain); | |||
| 249 | * queue in the local softnet handler. | 264 | * queue in the local softnet handler. |
| 250 | */ | 265 | */ |
| 251 | 266 | ||
| 252 | DEFINE_PER_CPU(struct softnet_data, softnet_data); | 267 | DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); |
| 253 | EXPORT_PER_CPU_SYMBOL(softnet_data); | 268 | EXPORT_PER_CPU_SYMBOL(softnet_data); |
| 254 | 269 | ||
| 255 | #ifdef CONFIG_LOCKDEP | 270 | #ifdef CONFIG_LOCKDEP |
| @@ -773,14 +788,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype); | |||
| 773 | 788 | ||
| 774 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) | 789 | struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) |
| 775 | { | 790 | { |
| 776 | struct net_device *dev; | 791 | struct net_device *dev, *ret = NULL; |
| 777 | 792 | ||
| 778 | rtnl_lock(); | 793 | rcu_read_lock(); |
| 779 | dev = __dev_getfirstbyhwtype(net, type); | 794 | for_each_netdev_rcu(net, dev) |
| 780 | if (dev) | 795 | if (dev->type == type) { |
| 781 | dev_hold(dev); | 796 | dev_hold(dev); |
| 782 | rtnl_unlock(); | 797 | ret = dev; |
| 783 | return dev; | 798 | break; |
| 799 | } | ||
| 800 | rcu_read_unlock(); | ||
| 801 | return ret; | ||
| 784 | } | 802 | } |
| 785 | EXPORT_SYMBOL(dev_getfirstbyhwtype); | 803 | EXPORT_SYMBOL(dev_getfirstbyhwtype); |
| 786 | 804 | ||
| @@ -936,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name) | |||
| 936 | } | 954 | } |
| 937 | EXPORT_SYMBOL(dev_alloc_name); | 955 | EXPORT_SYMBOL(dev_alloc_name); |
| 938 | 956 | ||
| 939 | static int dev_get_valid_name(struct net *net, const char *name, char *buf, | 957 | static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt) |
| 940 | bool fmt) | ||
| 941 | { | 958 | { |
| 959 | struct net *net; | ||
| 960 | |||
| 961 | BUG_ON(!dev_net(dev)); | ||
| 962 | net = dev_net(dev); | ||
| 963 | |||
| 942 | if (!dev_valid_name(name)) | 964 | if (!dev_valid_name(name)) |
| 943 | return -EINVAL; | 965 | return -EINVAL; |
| 944 | 966 | ||
| 945 | if (fmt && strchr(name, '%')) | 967 | if (fmt && strchr(name, '%')) |
| 946 | return __dev_alloc_name(net, name, buf); | 968 | return dev_alloc_name(dev, name); |
| 947 | else if (__dev_get_by_name(net, name)) | 969 | else if (__dev_get_by_name(net, name)) |
| 948 | return -EEXIST; | 970 | return -EEXIST; |
| 949 | else if (buf != name) | 971 | else if (dev->name != name) |
| 950 | strlcpy(buf, name, IFNAMSIZ); | 972 | strlcpy(dev->name, name, IFNAMSIZ); |
| 951 | 973 | ||
| 952 | return 0; | 974 | return 0; |
| 953 | } | 975 | } |
| @@ -979,20 +1001,15 @@ int dev_change_name(struct net_device *dev, const char *newname) | |||
| 979 | 1001 | ||
| 980 | memcpy(oldname, dev->name, IFNAMSIZ); | 1002 | memcpy(oldname, dev->name, IFNAMSIZ); |
| 981 | 1003 | ||
| 982 | err = dev_get_valid_name(net, newname, dev->name, 1); | 1004 | err = dev_get_valid_name(dev, newname, 1); |
| 983 | if (err < 0) | 1005 | if (err < 0) |
| 984 | return err; | 1006 | return err; |
| 985 | 1007 | ||
| 986 | rollback: | 1008 | rollback: |
| 987 | /* For now only devices in the initial network namespace | 1009 | ret = device_rename(&dev->dev, dev->name); |
| 988 | * are in sysfs. | 1010 | if (ret) { |
| 989 | */ | 1011 | memcpy(dev->name, oldname, IFNAMSIZ); |
| 990 | if (net_eq(net, &init_net)) { | 1012 | return ret; |
| 991 | ret = device_rename(&dev->dev, dev->name); | ||
| 992 | if (ret) { | ||
| 993 | memcpy(dev->name, oldname, IFNAMSIZ); | ||
| 994 | return ret; | ||
| 995 | } | ||
| 996 | } | 1013 | } |
| 997 | 1014 | ||
| 998 | write_lock_bh(&dev_base_lock); | 1015 | write_lock_bh(&dev_base_lock); |
| @@ -1085,9 +1102,9 @@ void netdev_state_change(struct net_device *dev) | |||
| 1085 | } | 1102 | } |
| 1086 | EXPORT_SYMBOL(netdev_state_change); | 1103 | EXPORT_SYMBOL(netdev_state_change); |
| 1087 | 1104 | ||
| 1088 | void netdev_bonding_change(struct net_device *dev, unsigned long event) | 1105 | int netdev_bonding_change(struct net_device *dev, unsigned long event) |
| 1089 | { | 1106 | { |
| 1090 | call_netdevice_notifiers(event, dev); | 1107 | return call_netdevice_notifiers(event, dev); |
| 1091 | } | 1108 | } |
| 1092 | EXPORT_SYMBOL(netdev_bonding_change); | 1109 | EXPORT_SYMBOL(netdev_bonding_change); |
| 1093 | 1110 | ||
| @@ -1417,6 +1434,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); | |||
| 1417 | 1434 | ||
| 1418 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) | 1435 | int call_netdevice_notifiers(unsigned long val, struct net_device *dev) |
| 1419 | { | 1436 | { |
| 1437 | ASSERT_RTNL(); | ||
| 1420 | return raw_notifier_call_chain(&netdev_chain, val, dev); | 1438 | return raw_notifier_call_chain(&netdev_chain, val, dev); |
| 1421 | } | 1439 | } |
| 1422 | 1440 | ||
| @@ -1435,7 +1453,7 @@ void net_disable_timestamp(void) | |||
| 1435 | } | 1453 | } |
| 1436 | EXPORT_SYMBOL(net_disable_timestamp); | 1454 | EXPORT_SYMBOL(net_disable_timestamp); |
| 1437 | 1455 | ||
| 1438 | static inline void net_timestamp(struct sk_buff *skb) | 1456 | static inline void net_timestamp_set(struct sk_buff *skb) |
| 1439 | { | 1457 | { |
| 1440 | if (atomic_read(&netstamp_needed)) | 1458 | if (atomic_read(&netstamp_needed)) |
| 1441 | __net_timestamp(skb); | 1459 | __net_timestamp(skb); |
| @@ -1443,6 +1461,12 @@ static inline void net_timestamp(struct sk_buff *skb) | |||
| 1443 | skb->tstamp.tv64 = 0; | 1461 | skb->tstamp.tv64 = 0; |
| 1444 | } | 1462 | } |
| 1445 | 1463 | ||
| 1464 | static inline void net_timestamp_check(struct sk_buff *skb) | ||
| 1465 | { | ||
| 1466 | if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed)) | ||
| 1467 | __net_timestamp(skb); | ||
| 1468 | } | ||
| 1469 | |||
| 1446 | /** | 1470 | /** |
| 1447 | * dev_forward_skb - loopback an skb to another netif | 1471 | * dev_forward_skb - loopback an skb to another netif |
| 1448 | * | 1472 | * |
| @@ -1489,9 +1513,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
| 1489 | 1513 | ||
| 1490 | #ifdef CONFIG_NET_CLS_ACT | 1514 | #ifdef CONFIG_NET_CLS_ACT |
| 1491 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) | 1515 | if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) |
| 1492 | net_timestamp(skb); | 1516 | net_timestamp_set(skb); |
| 1493 | #else | 1517 | #else |
| 1494 | net_timestamp(skb); | 1518 | net_timestamp_set(skb); |
| 1495 | #endif | 1519 | #endif |
| 1496 | 1520 | ||
| 1497 | rcu_read_lock(); | 1521 | rcu_read_lock(); |
| @@ -1537,8 +1561,9 @@ static inline void __netif_reschedule(struct Qdisc *q) | |||
| 1537 | 1561 | ||
| 1538 | local_irq_save(flags); | 1562 | local_irq_save(flags); |
| 1539 | sd = &__get_cpu_var(softnet_data); | 1563 | sd = &__get_cpu_var(softnet_data); |
| 1540 | q->next_sched = sd->output_queue; | 1564 | q->next_sched = NULL; |
| 1541 | sd->output_queue = q; | 1565 | *sd->output_queue_tailp = q; |
| 1566 | sd->output_queue_tailp = &q->next_sched; | ||
| 1542 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 1567 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
| 1543 | local_irq_restore(flags); | 1568 | local_irq_restore(flags); |
| 1544 | } | 1569 | } |
| @@ -1783,18 +1808,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); | |||
| 1783 | * 2. No high memory really exists on this machine. | 1808 | * 2. No high memory really exists on this machine. |
| 1784 | */ | 1809 | */ |
| 1785 | 1810 | ||
| 1786 | static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | 1811 | static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) |
| 1787 | { | 1812 | { |
| 1788 | #ifdef CONFIG_HIGHMEM | 1813 | #ifdef CONFIG_HIGHMEM |
| 1789 | int i; | 1814 | int i; |
| 1815 | if (!(dev->features & NETIF_F_HIGHDMA)) { | ||
| 1816 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
| 1817 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
| 1818 | return 1; | ||
| 1819 | } | ||
| 1790 | 1820 | ||
| 1791 | if (dev->features & NETIF_F_HIGHDMA) | 1821 | if (PCI_DMA_BUS_IS_PHYS) { |
| 1792 | return 0; | 1822 | struct device *pdev = dev->dev.parent; |
| 1793 | |||
| 1794 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | ||
| 1795 | if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | ||
| 1796 | return 1; | ||
| 1797 | 1823 | ||
| 1824 | if (!pdev) | ||
| 1825 | return 0; | ||
| 1826 | for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { | ||
| 1827 | dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); | ||
| 1828 | if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) | ||
| 1829 | return 1; | ||
| 1830 | } | ||
| 1831 | } | ||
| 1798 | #endif | 1832 | #endif |
| 1799 | return 0; | 1833 | return 0; |
| 1800 | } | 1834 | } |
| @@ -1852,6 +1886,17 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
| 1852 | return 0; | 1886 | return 0; |
| 1853 | } | 1887 | } |
| 1854 | 1888 | ||
| 1889 | /* | ||
| 1890 | * Try to orphan skb early, right before transmission by the device. | ||
| 1891 | * We cannot orphan skb if tx timestamp is requested, since | ||
| 1892 | * drivers need to call skb_tstamp_tx() to send the timestamp. | ||
| 1893 | */ | ||
| 1894 | static inline void skb_orphan_try(struct sk_buff *skb) | ||
| 1895 | { | ||
| 1896 | if (!skb_tx(skb)->flags) | ||
| 1897 | skb_orphan(skb); | ||
| 1898 | } | ||
| 1899 | |||
| 1855 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1900 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
| 1856 | struct netdev_queue *txq) | 1901 | struct netdev_queue *txq) |
| 1857 | { | 1902 | { |
| @@ -1862,13 +1907,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 1862 | if (!list_empty(&ptype_all)) | 1907 | if (!list_empty(&ptype_all)) |
| 1863 | dev_queue_xmit_nit(skb, dev); | 1908 | dev_queue_xmit_nit(skb, dev); |
| 1864 | 1909 | ||
| 1865 | if (netif_needs_gso(dev, skb)) { | ||
| 1866 | if (unlikely(dev_gso_segment(skb))) | ||
| 1867 | goto out_kfree_skb; | ||
| 1868 | if (skb->next) | ||
| 1869 | goto gso; | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | /* | 1910 | /* |
| 1873 | * If device doesnt need skb->dst, release it right now while | 1911 | * If device doesnt need skb->dst, release it right now while |
| 1874 | * its hot in this cpu cache | 1912 | * its hot in this cpu cache |
| @@ -1876,23 +1914,18 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 1876 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) | 1914 | if (dev->priv_flags & IFF_XMIT_DST_RELEASE) |
| 1877 | skb_dst_drop(skb); | 1915 | skb_dst_drop(skb); |
| 1878 | 1916 | ||
| 1917 | skb_orphan_try(skb); | ||
| 1918 | |||
| 1919 | if (netif_needs_gso(dev, skb)) { | ||
| 1920 | if (unlikely(dev_gso_segment(skb))) | ||
| 1921 | goto out_kfree_skb; | ||
| 1922 | if (skb->next) | ||
| 1923 | goto gso; | ||
| 1924 | } | ||
| 1925 | |||
| 1879 | rc = ops->ndo_start_xmit(skb, dev); | 1926 | rc = ops->ndo_start_xmit(skb, dev); |
| 1880 | if (rc == NETDEV_TX_OK) | 1927 | if (rc == NETDEV_TX_OK) |
| 1881 | txq_trans_update(txq); | 1928 | txq_trans_update(txq); |
| 1882 | /* | ||
| 1883 | * TODO: if skb_orphan() was called by | ||
| 1884 | * dev->hard_start_xmit() (for example, the unmodified | ||
| 1885 | * igb driver does that; bnx2 doesn't), then | ||
| 1886 | * skb_tx_software_timestamp() will be unable to send | ||
| 1887 | * back the time stamp. | ||
| 1888 | * | ||
| 1889 | * How can this be prevented? Always create another | ||
| 1890 | * reference to the socket before calling | ||
| 1891 | * dev->hard_start_xmit()? Prevent that skb_orphan() | ||
| 1892 | * does anything in dev->hard_start_xmit() by clearing | ||
| 1893 | * the skb destructor before the call and restoring it | ||
| 1894 | * afterwards, then doing the skb_orphan() ourselves? | ||
| 1895 | */ | ||
| 1896 | return rc; | 1929 | return rc; |
| 1897 | } | 1930 | } |
| 1898 | 1931 | ||
| @@ -1931,7 +1964,7 @@ out_kfree_skb: | |||
| 1931 | return rc; | 1964 | return rc; |
| 1932 | } | 1965 | } |
| 1933 | 1966 | ||
| 1934 | static u32 skb_tx_hashrnd; | 1967 | static u32 hashrnd __read_mostly; |
| 1935 | 1968 | ||
| 1936 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | 1969 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
| 1937 | { | 1970 | { |
| @@ -1947,9 +1980,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) | |||
| 1947 | if (skb->sk && skb->sk->sk_hash) | 1980 | if (skb->sk && skb->sk->sk_hash) |
| 1948 | hash = skb->sk->sk_hash; | 1981 | hash = skb->sk->sk_hash; |
| 1949 | else | 1982 | else |
| 1950 | hash = skb->protocol; | 1983 | hash = (__force u16) skb->protocol; |
| 1951 | 1984 | ||
| 1952 | hash = jhash_1word(hash, skb_tx_hashrnd); | 1985 | hash = jhash_1word(hash, hashrnd); |
| 1953 | 1986 | ||
| 1954 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1987 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
| 1955 | } | 1988 | } |
| @@ -1959,10 +1992,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) | |||
| 1959 | { | 1992 | { |
| 1960 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { | 1993 | if (unlikely(queue_index >= dev->real_num_tx_queues)) { |
| 1961 | if (net_ratelimit()) { | 1994 | if (net_ratelimit()) { |
| 1962 | WARN(1, "%s selects TX queue %d, but " | 1995 | pr_warning("%s selects TX queue %d, but " |
| 1963 | "real number of TX queues is %d\n", | 1996 | "real number of TX queues is %d\n", |
| 1964 | dev->name, queue_index, | 1997 | dev->name, queue_index, dev->real_num_tx_queues); |
| 1965 | dev->real_num_tx_queues); | ||
| 1966 | } | 1998 | } |
| 1967 | return 0; | 1999 | return 0; |
| 1968 | } | 2000 | } |
| @@ -1989,7 +2021,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, | |||
| 1989 | queue_index = skb_tx_hash(dev, skb); | 2021 | queue_index = skb_tx_hash(dev, skb); |
| 1990 | 2022 | ||
| 1991 | if (sk) { | 2023 | if (sk) { |
| 1992 | struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); | 2024 | struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); |
| 1993 | 2025 | ||
| 1994 | if (dst && skb_dst(skb) == dst) | 2026 | if (dst && skb_dst(skb) == dst) |
| 1995 | sk_tx_queue_set(sk, queue_index); | 2027 | sk_tx_queue_set(sk, queue_index); |
| @@ -2019,6 +2051,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
| 2019 | * waiting to be sent out; and the qdisc is not running - | 2051 | * waiting to be sent out; and the qdisc is not running - |
| 2020 | * xmit the skb directly. | 2052 | * xmit the skb directly. |
| 2021 | */ | 2053 | */ |
| 2054 | if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) | ||
| 2055 | skb_dst_force(skb); | ||
| 2022 | __qdisc_update_bstats(q, skb->len); | 2056 | __qdisc_update_bstats(q, skb->len); |
| 2023 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) | 2057 | if (sch_direct_xmit(skb, q, dev, txq, root_lock)) |
| 2024 | __qdisc_run(q); | 2058 | __qdisc_run(q); |
| @@ -2027,6 +2061,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |||
| 2027 | 2061 | ||
| 2028 | rc = NET_XMIT_SUCCESS; | 2062 | rc = NET_XMIT_SUCCESS; |
| 2029 | } else { | 2063 | } else { |
| 2064 | skb_dst_force(skb); | ||
| 2030 | rc = qdisc_enqueue_root(skb, q); | 2065 | rc = qdisc_enqueue_root(skb, q); |
| 2031 | qdisc_run(q); | 2066 | qdisc_run(q); |
| 2032 | } | 2067 | } |
| @@ -2174,11 +2209,246 @@ EXPORT_SYMBOL(dev_queue_xmit); | |||
| 2174 | =======================================================================*/ | 2209 | =======================================================================*/ |
| 2175 | 2210 | ||
| 2176 | int netdev_max_backlog __read_mostly = 1000; | 2211 | int netdev_max_backlog __read_mostly = 1000; |
| 2212 | int netdev_tstamp_prequeue __read_mostly = 1; | ||
| 2177 | int netdev_budget __read_mostly = 300; | 2213 | int netdev_budget __read_mostly = 300; |
| 2178 | int weight_p __read_mostly = 64; /* old backlog weight */ | 2214 | int weight_p __read_mostly = 64; /* old backlog weight */ |
| 2179 | 2215 | ||
| 2180 | DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | 2216 | /* Called with irq disabled */ |
| 2217 | static inline void ____napi_schedule(struct softnet_data *sd, | ||
| 2218 | struct napi_struct *napi) | ||
| 2219 | { | ||
| 2220 | list_add_tail(&napi->poll_list, &sd->poll_list); | ||
| 2221 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
| 2222 | } | ||
| 2223 | |||
| 2224 | #ifdef CONFIG_RPS | ||
| 2225 | |||
| 2226 | /* One global table that all flow-based protocols share. */ | ||
| 2227 | struct rps_sock_flow_table *rps_sock_flow_table __read_mostly; | ||
| 2228 | EXPORT_SYMBOL(rps_sock_flow_table); | ||
| 2229 | |||
| 2230 | /* | ||
| 2231 | * get_rps_cpu is called from netif_receive_skb and returns the target | ||
| 2232 | * CPU from the RPS map of the receiving queue for a given skb. | ||
| 2233 | * rcu_read_lock must be held on entry. | ||
| 2234 | */ | ||
| 2235 | static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, | ||
| 2236 | struct rps_dev_flow **rflowp) | ||
| 2237 | { | ||
| 2238 | struct ipv6hdr *ip6; | ||
| 2239 | struct iphdr *ip; | ||
| 2240 | struct netdev_rx_queue *rxqueue; | ||
| 2241 | struct rps_map *map; | ||
| 2242 | struct rps_dev_flow_table *flow_table; | ||
| 2243 | struct rps_sock_flow_table *sock_flow_table; | ||
| 2244 | int cpu = -1; | ||
| 2245 | u8 ip_proto; | ||
| 2246 | u16 tcpu; | ||
| 2247 | u32 addr1, addr2, ihl; | ||
| 2248 | union { | ||
| 2249 | u32 v32; | ||
| 2250 | u16 v16[2]; | ||
| 2251 | } ports; | ||
| 2252 | |||
| 2253 | if (skb_rx_queue_recorded(skb)) { | ||
| 2254 | u16 index = skb_get_rx_queue(skb); | ||
| 2255 | if (unlikely(index >= dev->num_rx_queues)) { | ||
| 2256 | if (net_ratelimit()) { | ||
| 2257 | pr_warning("%s received packet on queue " | ||
| 2258 | "%u, but number of RX queues is %u\n", | ||
| 2259 | dev->name, index, dev->num_rx_queues); | ||
| 2260 | } | ||
| 2261 | goto done; | ||
| 2262 | } | ||
| 2263 | rxqueue = dev->_rx + index; | ||
| 2264 | } else | ||
| 2265 | rxqueue = dev->_rx; | ||
| 2266 | |||
| 2267 | if (!rxqueue->rps_map && !rxqueue->rps_flow_table) | ||
| 2268 | goto done; | ||
| 2269 | |||
| 2270 | if (skb->rxhash) | ||
| 2271 | goto got_hash; /* Skip hash computation on packet header */ | ||
| 2272 | |||
| 2273 | switch (skb->protocol) { | ||
| 2274 | case __constant_htons(ETH_P_IP): | ||
| 2275 | if (!pskb_may_pull(skb, sizeof(*ip))) | ||
| 2276 | goto done; | ||
| 2277 | |||
| 2278 | ip = (struct iphdr *) skb->data; | ||
| 2279 | ip_proto = ip->protocol; | ||
| 2280 | addr1 = (__force u32) ip->saddr; | ||
| 2281 | addr2 = (__force u32) ip->daddr; | ||
| 2282 | ihl = ip->ihl; | ||
| 2283 | break; | ||
| 2284 | case __constant_htons(ETH_P_IPV6): | ||
| 2285 | if (!pskb_may_pull(skb, sizeof(*ip6))) | ||
| 2286 | goto done; | ||
| 2287 | |||
| 2288 | ip6 = (struct ipv6hdr *) skb->data; | ||
| 2289 | ip_proto = ip6->nexthdr; | ||
| 2290 | addr1 = (__force u32) ip6->saddr.s6_addr32[3]; | ||
| 2291 | addr2 = (__force u32) ip6->daddr.s6_addr32[3]; | ||
| 2292 | ihl = (40 >> 2); | ||
| 2293 | break; | ||
| 2294 | default: | ||
| 2295 | goto done; | ||
| 2296 | } | ||
| 2297 | switch (ip_proto) { | ||
| 2298 | case IPPROTO_TCP: | ||
| 2299 | case IPPROTO_UDP: | ||
| 2300 | case IPPROTO_DCCP: | ||
| 2301 | case IPPROTO_ESP: | ||
| 2302 | case IPPROTO_AH: | ||
| 2303 | case IPPROTO_SCTP: | ||
| 2304 | case IPPROTO_UDPLITE: | ||
| 2305 | if (pskb_may_pull(skb, (ihl * 4) + 4)) { | ||
| 2306 | ports.v32 = * (__force u32 *) (skb->data + (ihl * 4)); | ||
| 2307 | if (ports.v16[1] < ports.v16[0]) | ||
| 2308 | swap(ports.v16[0], ports.v16[1]); | ||
| 2309 | break; | ||
| 2310 | } | ||
| 2311 | default: | ||
| 2312 | ports.v32 = 0; | ||
| 2313 | break; | ||
| 2314 | } | ||
| 2315 | |||
| 2316 | /* get a consistent hash (same value on both flow directions) */ | ||
| 2317 | if (addr2 < addr1) | ||
| 2318 | swap(addr1, addr2); | ||
| 2319 | skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd); | ||
| 2320 | if (!skb->rxhash) | ||
| 2321 | skb->rxhash = 1; | ||
| 2322 | |||
| 2323 | got_hash: | ||
| 2324 | flow_table = rcu_dereference(rxqueue->rps_flow_table); | ||
| 2325 | sock_flow_table = rcu_dereference(rps_sock_flow_table); | ||
| 2326 | if (flow_table && sock_flow_table) { | ||
| 2327 | u16 next_cpu; | ||
| 2328 | struct rps_dev_flow *rflow; | ||
| 2329 | |||
| 2330 | rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; | ||
| 2331 | tcpu = rflow->cpu; | ||
| 2332 | |||
| 2333 | next_cpu = sock_flow_table->ents[skb->rxhash & | ||
| 2334 | sock_flow_table->mask]; | ||
| 2335 | |||
| 2336 | /* | ||
| 2337 | * If the desired CPU (where last recvmsg was done) is | ||
| 2338 | * different from current CPU (one in the rx-queue flow | ||
| 2339 | * table entry), switch if one of the following holds: | ||
| 2340 | * - Current CPU is unset (equal to RPS_NO_CPU). | ||
| 2341 | * - Current CPU is offline. | ||
| 2342 | * - The current CPU's queue tail has advanced beyond the | ||
| 2343 | * last packet that was enqueued using this table entry. | ||
| 2344 | * This guarantees that all previous packets for the flow | ||
| 2345 | * have been dequeued, thus preserving in order delivery. | ||
| 2346 | */ | ||
| 2347 | if (unlikely(tcpu != next_cpu) && | ||
| 2348 | (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || | ||
| 2349 | ((int)(per_cpu(softnet_data, tcpu).input_queue_head - | ||
| 2350 | rflow->last_qtail)) >= 0)) { | ||
| 2351 | tcpu = rflow->cpu = next_cpu; | ||
| 2352 | if (tcpu != RPS_NO_CPU) | ||
| 2353 | rflow->last_qtail = per_cpu(softnet_data, | ||
| 2354 | tcpu).input_queue_head; | ||
| 2355 | } | ||
| 2356 | if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { | ||
| 2357 | *rflowp = rflow; | ||
| 2358 | cpu = tcpu; | ||
| 2359 | goto done; | ||
| 2360 | } | ||
| 2361 | } | ||
| 2362 | |||
| 2363 | map = rcu_dereference(rxqueue->rps_map); | ||
| 2364 | if (map) { | ||
| 2365 | tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; | ||
| 2366 | |||
| 2367 | if (cpu_online(tcpu)) { | ||
| 2368 | cpu = tcpu; | ||
| 2369 | goto done; | ||
| 2370 | } | ||
| 2371 | } | ||
| 2372 | |||
| 2373 | done: | ||
| 2374 | return cpu; | ||
| 2375 | } | ||
| 2376 | |||
| 2377 | /* Called from hardirq (IPI) context */ | ||
| 2378 | static void rps_trigger_softirq(void *data) | ||
| 2379 | { | ||
| 2380 | struct softnet_data *sd = data; | ||
| 2381 | |||
| 2382 | ____napi_schedule(sd, &sd->backlog); | ||
| 2383 | sd->received_rps++; | ||
| 2384 | } | ||
| 2385 | |||
| 2386 | #endif /* CONFIG_RPS */ | ||
| 2387 | |||
| 2388 | /* | ||
| 2389 | * Check if this softnet_data structure is another cpu one | ||
| 2390 | * If yes, queue it to our IPI list and return 1 | ||
| 2391 | * If no, return 0 | ||
| 2392 | */ | ||
| 2393 | static int rps_ipi_queued(struct softnet_data *sd) | ||
| 2394 | { | ||
| 2395 | #ifdef CONFIG_RPS | ||
| 2396 | struct softnet_data *mysd = &__get_cpu_var(softnet_data); | ||
| 2397 | |||
| 2398 | if (sd != mysd) { | ||
| 2399 | sd->rps_ipi_next = mysd->rps_ipi_list; | ||
| 2400 | mysd->rps_ipi_list = sd; | ||
| 2401 | |||
| 2402 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
| 2403 | return 1; | ||
| 2404 | } | ||
| 2405 | #endif /* CONFIG_RPS */ | ||
| 2406 | return 0; | ||
| 2407 | } | ||
| 2408 | |||
| 2409 | /* | ||
| 2410 | * enqueue_to_backlog is called to queue an skb to a per CPU backlog | ||
| 2411 | * queue (may be a remote CPU queue). | ||
| 2412 | */ | ||
| 2413 | static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | ||
| 2414 | unsigned int *qtail) | ||
| 2415 | { | ||
| 2416 | struct softnet_data *sd; | ||
| 2417 | unsigned long flags; | ||
| 2418 | |||
| 2419 | sd = &per_cpu(softnet_data, cpu); | ||
| 2420 | |||
| 2421 | local_irq_save(flags); | ||
| 2422 | |||
| 2423 | rps_lock(sd); | ||
| 2424 | if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { | ||
| 2425 | if (skb_queue_len(&sd->input_pkt_queue)) { | ||
| 2426 | enqueue: | ||
| 2427 | __skb_queue_tail(&sd->input_pkt_queue, skb); | ||
| 2428 | input_queue_tail_incr_save(sd, qtail); | ||
| 2429 | rps_unlock(sd); | ||
| 2430 | local_irq_restore(flags); | ||
| 2431 | return NET_RX_SUCCESS; | ||
| 2432 | } | ||
| 2433 | |||
| 2434 | /* Schedule NAPI for backlog device | ||
| 2435 | * We can use non atomic operation since we own the queue lock | ||
| 2436 | */ | ||
| 2437 | if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { | ||
| 2438 | if (!rps_ipi_queued(sd)) | ||
| 2439 | ____napi_schedule(sd, &sd->backlog); | ||
| 2440 | } | ||
| 2441 | goto enqueue; | ||
| 2442 | } | ||
| 2443 | |||
| 2444 | sd->dropped++; | ||
| 2445 | rps_unlock(sd); | ||
| 2181 | 2446 | ||
| 2447 | local_irq_restore(flags); | ||
| 2448 | |||
| 2449 | kfree_skb(skb); | ||
| 2450 | return NET_RX_DROP; | ||
| 2451 | } | ||
| 2182 | 2452 | ||
| 2183 | /** | 2453 | /** |
| 2184 | * netif_rx - post buffer to the network code | 2454 | * netif_rx - post buffer to the network code |
| @@ -2197,41 +2467,38 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | |||
| 2197 | 2467 | ||
| 2198 | int netif_rx(struct sk_buff *skb) | 2468 | int netif_rx(struct sk_buff *skb) |
| 2199 | { | 2469 | { |
| 2200 | struct softnet_data *queue; | 2470 | int ret; |
| 2201 | unsigned long flags; | ||
| 2202 | 2471 | ||
| 2203 | /* if netpoll wants it, pretend we never saw it */ | 2472 | /* if netpoll wants it, pretend we never saw it */ |
| 2204 | if (netpoll_rx(skb)) | 2473 | if (netpoll_rx(skb)) |
| 2205 | return NET_RX_DROP; | 2474 | return NET_RX_DROP; |
| 2206 | 2475 | ||
| 2207 | if (!skb->tstamp.tv64) | 2476 | if (netdev_tstamp_prequeue) |
| 2208 | net_timestamp(skb); | 2477 | net_timestamp_check(skb); |
| 2209 | 2478 | ||
| 2210 | /* | 2479 | #ifdef CONFIG_RPS |
| 2211 | * The code is rearranged so that the path is the most | 2480 | { |
| 2212 | * short when CPU is congested, but is still operating. | 2481 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
| 2213 | */ | 2482 | int cpu; |
| 2214 | local_irq_save(flags); | ||
| 2215 | queue = &__get_cpu_var(softnet_data); | ||
| 2216 | 2483 | ||
| 2217 | __get_cpu_var(netdev_rx_stat).total++; | 2484 | rcu_read_lock(); |
| 2218 | if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | ||
| 2219 | if (queue->input_pkt_queue.qlen) { | ||
| 2220 | enqueue: | ||
| 2221 | __skb_queue_tail(&queue->input_pkt_queue, skb); | ||
| 2222 | local_irq_restore(flags); | ||
| 2223 | return NET_RX_SUCCESS; | ||
| 2224 | } | ||
| 2225 | 2485 | ||
| 2226 | napi_schedule(&queue->backlog); | 2486 | cpu = get_rps_cpu(skb->dev, skb, &rflow); |
| 2227 | goto enqueue; | 2487 | if (cpu < 0) |
| 2228 | } | 2488 | cpu = smp_processor_id(); |
| 2229 | 2489 | ||
| 2230 | __get_cpu_var(netdev_rx_stat).dropped++; | 2490 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
| 2231 | local_irq_restore(flags); | ||
| 2232 | 2491 | ||
| 2233 | kfree_skb(skb); | 2492 | rcu_read_unlock(); |
| 2234 | return NET_RX_DROP; | 2493 | } |
| 2494 | #else | ||
| 2495 | { | ||
| 2496 | unsigned int qtail; | ||
| 2497 | ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | ||
| 2498 | put_cpu(); | ||
| 2499 | } | ||
| 2500 | #endif | ||
| 2501 | return ret; | ||
| 2235 | } | 2502 | } |
| 2236 | EXPORT_SYMBOL(netif_rx); | 2503 | EXPORT_SYMBOL(netif_rx); |
| 2237 | 2504 | ||
| @@ -2276,6 +2543,7 @@ static void net_tx_action(struct softirq_action *h) | |||
| 2276 | local_irq_disable(); | 2543 | local_irq_disable(); |
| 2277 | head = sd->output_queue; | 2544 | head = sd->output_queue; |
| 2278 | sd->output_queue = NULL; | 2545 | sd->output_queue = NULL; |
| 2546 | sd->output_queue_tailp = &sd->output_queue; | ||
| 2279 | local_irq_enable(); | 2547 | local_irq_enable(); |
| 2280 | 2548 | ||
| 2281 | while (head) { | 2549 | while (head) { |
| @@ -2352,7 +2620,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb, | |||
| 2352 | #endif | 2620 | #endif |
| 2353 | 2621 | ||
| 2354 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) | 2622 | #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) |
| 2355 | struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly; | 2623 | struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p, |
| 2624 | struct sk_buff *skb) __read_mostly; | ||
| 2356 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); | 2625 | EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook); |
| 2357 | 2626 | ||
| 2358 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | 2627 | static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, |
| @@ -2360,14 +2629,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb, | |||
| 2360 | int *ret, | 2629 | int *ret, |
| 2361 | struct net_device *orig_dev) | 2630 | struct net_device *orig_dev) |
| 2362 | { | 2631 | { |
| 2363 | if (skb->dev->macvlan_port == NULL) | 2632 | struct macvlan_port *port; |
| 2633 | |||
| 2634 | port = rcu_dereference(skb->dev->macvlan_port); | ||
| 2635 | if (!port) | ||
| 2364 | return skb; | 2636 | return skb; |
| 2365 | 2637 | ||
| 2366 | if (*pt_prev) { | 2638 | if (*pt_prev) { |
| 2367 | *ret = deliver_skb(skb, *pt_prev, orig_dev); | 2639 | *ret = deliver_skb(skb, *pt_prev, orig_dev); |
| 2368 | *pt_prev = NULL; | 2640 | *pt_prev = NULL; |
| 2369 | } | 2641 | } |
| 2370 | return macvlan_handle_frame_hook(skb); | 2642 | return macvlan_handle_frame_hook(port, skb); |
| 2371 | } | 2643 | } |
| 2372 | #else | 2644 | #else |
| 2373 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) | 2645 | #define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb) |
| @@ -2468,22 +2740,56 @@ void netif_nit_deliver(struct sk_buff *skb) | |||
| 2468 | rcu_read_unlock(); | 2740 | rcu_read_unlock(); |
| 2469 | } | 2741 | } |
| 2470 | 2742 | ||
| 2471 | /** | 2743 | static inline void skb_bond_set_mac_by_master(struct sk_buff *skb, |
| 2472 | * netif_receive_skb - process receive buffer from network | 2744 | struct net_device *master) |
| 2473 | * @skb: buffer to process | 2745 | { |
| 2474 | * | 2746 | if (skb->pkt_type == PACKET_HOST) { |
| 2475 | * netif_receive_skb() is the main receive data processing function. | 2747 | u16 *dest = (u16 *) eth_hdr(skb)->h_dest; |
| 2476 | * It always succeeds. The buffer may be dropped during processing | 2748 | |
| 2477 | * for congestion control or by the protocol layers. | 2749 | memcpy(dest, master->dev_addr, ETH_ALEN); |
| 2478 | * | 2750 | } |
| 2479 | * This function may only be called from softirq context and interrupts | 2751 | } |
| 2480 | * should be enabled. | 2752 | |
| 2481 | * | 2753 | /* On bonding slaves other than the currently active slave, suppress |
| 2482 | * Return values (usually ignored): | 2754 | * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and |
| 2483 | * NET_RX_SUCCESS: no congestion | 2755 | * ARP on active-backup slaves with arp_validate enabled. |
| 2484 | * NET_RX_DROP: packet was dropped | ||
| 2485 | */ | 2756 | */ |
| 2486 | int netif_receive_skb(struct sk_buff *skb) | 2757 | int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master) |
| 2758 | { | ||
| 2759 | struct net_device *dev = skb->dev; | ||
| 2760 | |||
| 2761 | if (master->priv_flags & IFF_MASTER_ARPMON) | ||
| 2762 | dev->last_rx = jiffies; | ||
| 2763 | |||
| 2764 | if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) { | ||
| 2765 | /* Do address unmangle. The local destination address | ||
| 2766 | * will be always the one master has. Provides the right | ||
| 2767 | * functionality in a bridge. | ||
| 2768 | */ | ||
| 2769 | skb_bond_set_mac_by_master(skb, master); | ||
| 2770 | } | ||
| 2771 | |||
| 2772 | if (dev->priv_flags & IFF_SLAVE_INACTIVE) { | ||
| 2773 | if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && | ||
| 2774 | skb->protocol == __cpu_to_be16(ETH_P_ARP)) | ||
| 2775 | return 0; | ||
| 2776 | |||
| 2777 | if (master->priv_flags & IFF_MASTER_ALB) { | ||
| 2778 | if (skb->pkt_type != PACKET_BROADCAST && | ||
| 2779 | skb->pkt_type != PACKET_MULTICAST) | ||
| 2780 | return 0; | ||
| 2781 | } | ||
| 2782 | if (master->priv_flags & IFF_MASTER_8023AD && | ||
| 2783 | skb->protocol == __cpu_to_be16(ETH_P_SLOW)) | ||
| 2784 | return 0; | ||
| 2785 | |||
| 2786 | return 1; | ||
| 2787 | } | ||
| 2788 | return 0; | ||
| 2789 | } | ||
| 2790 | EXPORT_SYMBOL(__skb_bond_should_drop); | ||
| 2791 | |||
| 2792 | static int __netif_receive_skb(struct sk_buff *skb) | ||
| 2487 | { | 2793 | { |
| 2488 | struct packet_type *ptype, *pt_prev; | 2794 | struct packet_type *ptype, *pt_prev; |
| 2489 | struct net_device *orig_dev; | 2795 | struct net_device *orig_dev; |
| @@ -2493,8 +2799,8 @@ int netif_receive_skb(struct sk_buff *skb) | |||
| 2493 | int ret = NET_RX_DROP; | 2799 | int ret = NET_RX_DROP; |
| 2494 | __be16 type; | 2800 | __be16 type; |
| 2495 | 2801 | ||
| 2496 | if (!skb->tstamp.tv64) | 2802 | if (!netdev_tstamp_prequeue) |
| 2497 | net_timestamp(skb); | 2803 | net_timestamp_check(skb); |
| 2498 | 2804 | ||
| 2499 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) | 2805 | if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) |
| 2500 | return NET_RX_SUCCESS; | 2806 | return NET_RX_SUCCESS; |
| @@ -2516,7 +2822,7 @@ int netif_receive_skb(struct sk_buff *skb) | |||
| 2516 | skb->dev = master; | 2822 | skb->dev = master; |
| 2517 | } | 2823 | } |
| 2518 | 2824 | ||
| 2519 | __get_cpu_var(netdev_rx_stat).total++; | 2825 | __get_cpu_var(softnet_data).processed++; |
| 2520 | 2826 | ||
| 2521 | skb_reset_network_header(skb); | 2827 | skb_reset_network_header(skb); |
| 2522 | skb_reset_transport_header(skb); | 2828 | skb_reset_transport_header(skb); |
| @@ -2594,20 +2900,78 @@ out: | |||
| 2594 | rcu_read_unlock(); | 2900 | rcu_read_unlock(); |
| 2595 | return ret; | 2901 | return ret; |
| 2596 | } | 2902 | } |
| 2903 | |||
| 2904 | /** | ||
| 2905 | * netif_receive_skb - process receive buffer from network | ||
| 2906 | * @skb: buffer to process | ||
| 2907 | * | ||
| 2908 | * netif_receive_skb() is the main receive data processing function. | ||
| 2909 | * It always succeeds. The buffer may be dropped during processing | ||
| 2910 | * for congestion control or by the protocol layers. | ||
| 2911 | * | ||
| 2912 | * This function may only be called from softirq context and interrupts | ||
| 2913 | * should be enabled. | ||
| 2914 | * | ||
| 2915 | * Return values (usually ignored): | ||
| 2916 | * NET_RX_SUCCESS: no congestion | ||
| 2917 | * NET_RX_DROP: packet was dropped | ||
| 2918 | */ | ||
| 2919 | int netif_receive_skb(struct sk_buff *skb) | ||
| 2920 | { | ||
| 2921 | if (netdev_tstamp_prequeue) | ||
| 2922 | net_timestamp_check(skb); | ||
| 2923 | |||
| 2924 | #ifdef CONFIG_RPS | ||
| 2925 | { | ||
| 2926 | struct rps_dev_flow voidflow, *rflow = &voidflow; | ||
| 2927 | int cpu, ret; | ||
| 2928 | |||
| 2929 | rcu_read_lock(); | ||
| 2930 | |||
| 2931 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | ||
| 2932 | |||
| 2933 | if (cpu >= 0) { | ||
| 2934 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | ||
| 2935 | rcu_read_unlock(); | ||
| 2936 | } else { | ||
| 2937 | rcu_read_unlock(); | ||
| 2938 | ret = __netif_receive_skb(skb); | ||
| 2939 | } | ||
| 2940 | |||
| 2941 | return ret; | ||
| 2942 | } | ||
| 2943 | #else | ||
| 2944 | return __netif_receive_skb(skb); | ||
| 2945 | #endif | ||
| 2946 | } | ||
| 2597 | EXPORT_SYMBOL(netif_receive_skb); | 2947 | EXPORT_SYMBOL(netif_receive_skb); |
| 2598 | 2948 | ||
| 2599 | /* Network device is going away, flush any packets still pending */ | 2949 | /* Network device is going away, flush any packets still pending |
| 2950 | * Called with irqs disabled. | ||
| 2951 | */ | ||
| 2600 | static void flush_backlog(void *arg) | 2952 | static void flush_backlog(void *arg) |
| 2601 | { | 2953 | { |
| 2602 | struct net_device *dev = arg; | 2954 | struct net_device *dev = arg; |
| 2603 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 2955 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
| 2604 | struct sk_buff *skb, *tmp; | 2956 | struct sk_buff *skb, *tmp; |
| 2605 | 2957 | ||
| 2606 | skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) | 2958 | rps_lock(sd); |
| 2959 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | ||
| 2607 | if (skb->dev == dev) { | 2960 | if (skb->dev == dev) { |
| 2608 | __skb_unlink(skb, &queue->input_pkt_queue); | 2961 | __skb_unlink(skb, &sd->input_pkt_queue); |
| 2609 | kfree_skb(skb); | 2962 | kfree_skb(skb); |
| 2963 | input_queue_head_incr(sd); | ||
| 2610 | } | 2964 | } |
| 2965 | } | ||
| 2966 | rps_unlock(sd); | ||
| 2967 | |||
| 2968 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | ||
| 2969 | if (skb->dev == dev) { | ||
| 2970 | __skb_unlink(skb, &sd->process_queue); | ||
| 2971 | kfree_skb(skb); | ||
| 2972 | input_queue_head_incr(sd); | ||
| 2973 | } | ||
| 2974 | } | ||
| 2611 | } | 2975 | } |
| 2612 | 2976 | ||
| 2613 | static int napi_gro_complete(struct sk_buff *skb) | 2977 | static int napi_gro_complete(struct sk_buff *skb) |
| @@ -2910,27 +3274,87 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) | |||
| 2910 | } | 3274 | } |
| 2911 | EXPORT_SYMBOL(napi_gro_frags); | 3275 | EXPORT_SYMBOL(napi_gro_frags); |
| 2912 | 3276 | ||
| 3277 | /* | ||
| 3278 | * net_rps_action sends any pending IPI's for rps. | ||
| 3279 | * Note: called with local irq disabled, but exits with local irq enabled. | ||
| 3280 | */ | ||
| 3281 | static void net_rps_action_and_irq_enable(struct softnet_data *sd) | ||
| 3282 | { | ||
| 3283 | #ifdef CONFIG_RPS | ||
| 3284 | struct softnet_data *remsd = sd->rps_ipi_list; | ||
| 3285 | |||
| 3286 | if (remsd) { | ||
| 3287 | sd->rps_ipi_list = NULL; | ||
| 3288 | |||
| 3289 | local_irq_enable(); | ||
| 3290 | |||
| 3291 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | ||
| 3292 | while (remsd) { | ||
| 3293 | struct softnet_data *next = remsd->rps_ipi_next; | ||
| 3294 | |||
| 3295 | if (cpu_online(remsd->cpu)) | ||
| 3296 | __smp_call_function_single(remsd->cpu, | ||
| 3297 | &remsd->csd, 0); | ||
| 3298 | remsd = next; | ||
| 3299 | } | ||
| 3300 | } else | ||
| 3301 | #endif | ||
| 3302 | local_irq_enable(); | ||
| 3303 | } | ||
| 3304 | |||
| 2913 | static int process_backlog(struct napi_struct *napi, int quota) | 3305 | static int process_backlog(struct napi_struct *napi, int quota) |
| 2914 | { | 3306 | { |
| 2915 | int work = 0; | 3307 | int work = 0; |
| 2916 | struct softnet_data *queue = &__get_cpu_var(softnet_data); | 3308 | struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); |
| 2917 | unsigned long start_time = jiffies; | ||
| 2918 | 3309 | ||
| 3310 | #ifdef CONFIG_RPS | ||
| 3311 | /* Check if we have pending ipi, its better to send them now, | ||
| 3312 | * not waiting net_rx_action() end. | ||
| 3313 | */ | ||
| 3314 | if (sd->rps_ipi_list) { | ||
| 3315 | local_irq_disable(); | ||
| 3316 | net_rps_action_and_irq_enable(sd); | ||
| 3317 | } | ||
| 3318 | #endif | ||
| 2919 | napi->weight = weight_p; | 3319 | napi->weight = weight_p; |
| 2920 | do { | 3320 | local_irq_disable(); |
| 3321 | while (work < quota) { | ||
| 2921 | struct sk_buff *skb; | 3322 | struct sk_buff *skb; |
| 3323 | unsigned int qlen; | ||
| 2922 | 3324 | ||
| 2923 | local_irq_disable(); | 3325 | while ((skb = __skb_dequeue(&sd->process_queue))) { |
| 2924 | skb = __skb_dequeue(&queue->input_pkt_queue); | ||
| 2925 | if (!skb) { | ||
| 2926 | __napi_complete(napi); | ||
| 2927 | local_irq_enable(); | 3326 | local_irq_enable(); |
| 2928 | break; | 3327 | __netif_receive_skb(skb); |
| 3328 | local_irq_disable(); | ||
| 3329 | input_queue_head_incr(sd); | ||
| 3330 | if (++work >= quota) { | ||
| 3331 | local_irq_enable(); | ||
| 3332 | return work; | ||
| 3333 | } | ||
| 2929 | } | 3334 | } |
| 2930 | local_irq_enable(); | ||
| 2931 | 3335 | ||
| 2932 | netif_receive_skb(skb); | 3336 | rps_lock(sd); |
| 2933 | } while (++work < quota && jiffies == start_time); | 3337 | qlen = skb_queue_len(&sd->input_pkt_queue); |
| 3338 | if (qlen) | ||
| 3339 | skb_queue_splice_tail_init(&sd->input_pkt_queue, | ||
| 3340 | &sd->process_queue); | ||
| 3341 | |||
| 3342 | if (qlen < quota - work) { | ||
| 3343 | /* | ||
| 3344 | * Inline a custom version of __napi_complete(). | ||
| 3345 | * only current cpu owns and manipulates this napi, | ||
| 3346 | * and NAPI_STATE_SCHED is the only possible flag set on backlog. | ||
| 3347 | * we can use a plain write instead of clear_bit(), | ||
| 3348 | * and we dont need an smp_mb() memory barrier. | ||
| 3349 | */ | ||
| 3350 | list_del(&napi->poll_list); | ||
| 3351 | napi->state = 0; | ||
| 3352 | |||
| 3353 | quota = work + qlen; | ||
| 3354 | } | ||
| 3355 | rps_unlock(sd); | ||
| 3356 | } | ||
| 3357 | local_irq_enable(); | ||
| 2934 | 3358 | ||
| 2935 | return work; | 3359 | return work; |
| 2936 | } | 3360 | } |
| @@ -2946,8 +3370,7 @@ void __napi_schedule(struct napi_struct *n) | |||
| 2946 | unsigned long flags; | 3370 | unsigned long flags; |
| 2947 | 3371 | ||
| 2948 | local_irq_save(flags); | 3372 | local_irq_save(flags); |
| 2949 | list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); | 3373 | ____napi_schedule(&__get_cpu_var(softnet_data), n); |
| 2950 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | ||
| 2951 | local_irq_restore(flags); | 3374 | local_irq_restore(flags); |
| 2952 | } | 3375 | } |
| 2953 | EXPORT_SYMBOL(__napi_schedule); | 3376 | EXPORT_SYMBOL(__napi_schedule); |
| @@ -3018,17 +3441,16 @@ void netif_napi_del(struct napi_struct *napi) | |||
| 3018 | } | 3441 | } |
| 3019 | EXPORT_SYMBOL(netif_napi_del); | 3442 | EXPORT_SYMBOL(netif_napi_del); |
| 3020 | 3443 | ||
| 3021 | |||
| 3022 | static void net_rx_action(struct softirq_action *h) | 3444 | static void net_rx_action(struct softirq_action *h) |
| 3023 | { | 3445 | { |
| 3024 | struct list_head *list = &__get_cpu_var(softnet_data).poll_list; | 3446 | struct softnet_data *sd = &__get_cpu_var(softnet_data); |
| 3025 | unsigned long time_limit = jiffies + 2; | 3447 | unsigned long time_limit = jiffies + 2; |
| 3026 | int budget = netdev_budget; | 3448 | int budget = netdev_budget; |
| 3027 | void *have; | 3449 | void *have; |
| 3028 | 3450 | ||
| 3029 | local_irq_disable(); | 3451 | local_irq_disable(); |
| 3030 | 3452 | ||
| 3031 | while (!list_empty(list)) { | 3453 | while (!list_empty(&sd->poll_list)) { |
| 3032 | struct napi_struct *n; | 3454 | struct napi_struct *n; |
| 3033 | int work, weight; | 3455 | int work, weight; |
| 3034 | 3456 | ||
| @@ -3046,7 +3468,7 @@ static void net_rx_action(struct softirq_action *h) | |||
| 3046 | * entries to the tail of this list, and only ->poll() | 3468 | * entries to the tail of this list, and only ->poll() |
| 3047 | * calls can remove this head entry from the list. | 3469 | * calls can remove this head entry from the list. |
| 3048 | */ | 3470 | */ |
| 3049 | n = list_first_entry(list, struct napi_struct, poll_list); | 3471 | n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list); |
| 3050 | 3472 | ||
| 3051 | have = netpoll_poll_lock(n); | 3473 | have = netpoll_poll_lock(n); |
| 3052 | 3474 | ||
| @@ -3081,13 +3503,13 @@ static void net_rx_action(struct softirq_action *h) | |||
| 3081 | napi_complete(n); | 3503 | napi_complete(n); |
| 3082 | local_irq_disable(); | 3504 | local_irq_disable(); |
| 3083 | } else | 3505 | } else |
| 3084 | list_move_tail(&n->poll_list, list); | 3506 | list_move_tail(&n->poll_list, &sd->poll_list); |
| 3085 | } | 3507 | } |
| 3086 | 3508 | ||
| 3087 | netpoll_poll_unlock(have); | 3509 | netpoll_poll_unlock(have); |
| 3088 | } | 3510 | } |
| 3089 | out: | 3511 | out: |
| 3090 | local_irq_enable(); | 3512 | net_rps_action_and_irq_enable(sd); |
| 3091 | 3513 | ||
| 3092 | #ifdef CONFIG_NET_DMA | 3514 | #ifdef CONFIG_NET_DMA |
| 3093 | /* | 3515 | /* |
| @@ -3100,7 +3522,7 @@ out: | |||
| 3100 | return; | 3522 | return; |
| 3101 | 3523 | ||
| 3102 | softnet_break: | 3524 | softnet_break: |
| 3103 | __get_cpu_var(netdev_rx_stat).time_squeeze++; | 3525 | sd->time_squeeze++; |
| 3104 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); | 3526 | __raise_softirq_irqoff(NET_RX_SOFTIRQ); |
| 3105 | goto out; | 3527 | goto out; |
| 3106 | } | 3528 | } |
| @@ -3301,17 +3723,17 @@ static int dev_seq_show(struct seq_file *seq, void *v) | |||
| 3301 | return 0; | 3723 | return 0; |
| 3302 | } | 3724 | } |
| 3303 | 3725 | ||
| 3304 | static struct netif_rx_stats *softnet_get_online(loff_t *pos) | 3726 | static struct softnet_data *softnet_get_online(loff_t *pos) |
| 3305 | { | 3727 | { |
| 3306 | struct netif_rx_stats *rc = NULL; | 3728 | struct softnet_data *sd = NULL; |
| 3307 | 3729 | ||
| 3308 | while (*pos < nr_cpu_ids) | 3730 | while (*pos < nr_cpu_ids) |
| 3309 | if (cpu_online(*pos)) { | 3731 | if (cpu_online(*pos)) { |
| 3310 | rc = &per_cpu(netdev_rx_stat, *pos); | 3732 | sd = &per_cpu(softnet_data, *pos); |
| 3311 | break; | 3733 | break; |
| 3312 | } else | 3734 | } else |
| 3313 | ++*pos; | 3735 | ++*pos; |
| 3314 | return rc; | 3736 | return sd; |
| 3315 | } | 3737 | } |
| 3316 | 3738 | ||
| 3317 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | 3739 | static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) |
| @@ -3331,12 +3753,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) | |||
| 3331 | 3753 | ||
| 3332 | static int softnet_seq_show(struct seq_file *seq, void *v) | 3754 | static int softnet_seq_show(struct seq_file *seq, void *v) |
| 3333 | { | 3755 | { |
| 3334 | struct netif_rx_stats *s = v; | 3756 | struct softnet_data *sd = v; |
| 3335 | 3757 | ||
| 3336 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | 3758 | seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", |
| 3337 | s->total, s->dropped, s->time_squeeze, 0, | 3759 | sd->processed, sd->dropped, sd->time_squeeze, 0, |
| 3338 | 0, 0, 0, 0, /* was fastroute */ | 3760 | 0, 0, 0, 0, /* was fastroute */ |
| 3339 | s->cpu_collision); | 3761 | sd->cpu_collision, sd->received_rps); |
| 3340 | return 0; | 3762 | return 0; |
| 3341 | } | 3763 | } |
| 3342 | 3764 | ||
| @@ -3559,11 +3981,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) | |||
| 3559 | 3981 | ||
| 3560 | slave->master = master; | 3982 | slave->master = master; |
| 3561 | 3983 | ||
| 3562 | synchronize_net(); | 3984 | if (old) { |
| 3563 | 3985 | synchronize_net(); | |
| 3564 | if (old) | ||
| 3565 | dev_put(old); | 3986 | dev_put(old); |
| 3566 | 3987 | } | |
| 3567 | if (master) | 3988 | if (master) |
| 3568 | slave->flags |= IFF_SLAVE; | 3989 | slave->flags |= IFF_SLAVE; |
| 3569 | else | 3990 | else |
| @@ -3740,562 +4161,6 @@ void dev_set_rx_mode(struct net_device *dev) | |||
| 3740 | netif_addr_unlock_bh(dev); | 4161 | netif_addr_unlock_bh(dev); |
| 3741 | } | 4162 | } |
| 3742 | 4163 | ||
| 3743 | /* hw addresses list handling functions */ | ||
| 3744 | |||
| 3745 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
| 3746 | int addr_len, unsigned char addr_type) | ||
| 3747 | { | ||
| 3748 | struct netdev_hw_addr *ha; | ||
| 3749 | int alloc_size; | ||
| 3750 | |||
| 3751 | if (addr_len > MAX_ADDR_LEN) | ||
| 3752 | return -EINVAL; | ||
| 3753 | |||
| 3754 | list_for_each_entry(ha, &list->list, list) { | ||
| 3755 | if (!memcmp(ha->addr, addr, addr_len) && | ||
| 3756 | ha->type == addr_type) { | ||
| 3757 | ha->refcount++; | ||
| 3758 | return 0; | ||
| 3759 | } | ||
| 3760 | } | ||
| 3761 | |||
| 3762 | |||
| 3763 | alloc_size = sizeof(*ha); | ||
| 3764 | if (alloc_size < L1_CACHE_BYTES) | ||
| 3765 | alloc_size = L1_CACHE_BYTES; | ||
| 3766 | ha = kmalloc(alloc_size, GFP_ATOMIC); | ||
| 3767 | if (!ha) | ||
| 3768 | return -ENOMEM; | ||
| 3769 | memcpy(ha->addr, addr, addr_len); | ||
| 3770 | ha->type = addr_type; | ||
| 3771 | ha->refcount = 1; | ||
| 3772 | ha->synced = false; | ||
| 3773 | list_add_tail_rcu(&ha->list, &list->list); | ||
| 3774 | list->count++; | ||
| 3775 | return 0; | ||
| 3776 | } | ||
| 3777 | |||
| 3778 | static void ha_rcu_free(struct rcu_head *head) | ||
| 3779 | { | ||
| 3780 | struct netdev_hw_addr *ha; | ||
| 3781 | |||
| 3782 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
| 3783 | kfree(ha); | ||
| 3784 | } | ||
| 3785 | |||
| 3786 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
| 3787 | int addr_len, unsigned char addr_type) | ||
| 3788 | { | ||
| 3789 | struct netdev_hw_addr *ha; | ||
| 3790 | |||
| 3791 | list_for_each_entry(ha, &list->list, list) { | ||
| 3792 | if (!memcmp(ha->addr, addr, addr_len) && | ||
| 3793 | (ha->type == addr_type || !addr_type)) { | ||
| 3794 | if (--ha->refcount) | ||
| 3795 | return 0; | ||
| 3796 | list_del_rcu(&ha->list); | ||
| 3797 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
| 3798 | list->count--; | ||
| 3799 | return 0; | ||
| 3800 | } | ||
| 3801 | } | ||
| 3802 | return -ENOENT; | ||
| 3803 | } | ||
| 3804 | |||
| 3805 | static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
| 3806 | struct netdev_hw_addr_list *from_list, | ||
| 3807 | int addr_len, | ||
| 3808 | unsigned char addr_type) | ||
| 3809 | { | ||
| 3810 | int err; | ||
| 3811 | struct netdev_hw_addr *ha, *ha2; | ||
| 3812 | unsigned char type; | ||
| 3813 | |||
| 3814 | list_for_each_entry(ha, &from_list->list, list) { | ||
| 3815 | type = addr_type ? addr_type : ha->type; | ||
| 3816 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
| 3817 | if (err) | ||
| 3818 | goto unroll; | ||
| 3819 | } | ||
| 3820 | return 0; | ||
| 3821 | |||
| 3822 | unroll: | ||
| 3823 | list_for_each_entry(ha2, &from_list->list, list) { | ||
| 3824 | if (ha2 == ha) | ||
| 3825 | break; | ||
| 3826 | type = addr_type ? addr_type : ha2->type; | ||
| 3827 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
| 3828 | } | ||
| 3829 | return err; | ||
| 3830 | } | ||
| 3831 | |||
| 3832 | static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
| 3833 | struct netdev_hw_addr_list *from_list, | ||
| 3834 | int addr_len, | ||
| 3835 | unsigned char addr_type) | ||
| 3836 | { | ||
| 3837 | struct netdev_hw_addr *ha; | ||
| 3838 | unsigned char type; | ||
| 3839 | |||
| 3840 | list_for_each_entry(ha, &from_list->list, list) { | ||
| 3841 | type = addr_type ? addr_type : ha->type; | ||
| 3842 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | ||
| 3843 | } | ||
| 3844 | } | ||
| 3845 | |||
| 3846 | static int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | ||
| 3847 | struct netdev_hw_addr_list *from_list, | ||
| 3848 | int addr_len) | ||
| 3849 | { | ||
| 3850 | int err = 0; | ||
| 3851 | struct netdev_hw_addr *ha, *tmp; | ||
| 3852 | |||
| 3853 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
| 3854 | if (!ha->synced) { | ||
| 3855 | err = __hw_addr_add(to_list, ha->addr, | ||
| 3856 | addr_len, ha->type); | ||
| 3857 | if (err) | ||
| 3858 | break; | ||
| 3859 | ha->synced = true; | ||
| 3860 | ha->refcount++; | ||
| 3861 | } else if (ha->refcount == 1) { | ||
| 3862 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); | ||
| 3863 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); | ||
| 3864 | } | ||
| 3865 | } | ||
| 3866 | return err; | ||
| 3867 | } | ||
| 3868 | |||
| 3869 | static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | ||
| 3870 | struct netdev_hw_addr_list *from_list, | ||
| 3871 | int addr_len) | ||
| 3872 | { | ||
| 3873 | struct netdev_hw_addr *ha, *tmp; | ||
| 3874 | |||
| 3875 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
| 3876 | if (ha->synced) { | ||
| 3877 | __hw_addr_del(to_list, ha->addr, | ||
| 3878 | addr_len, ha->type); | ||
| 3879 | ha->synced = false; | ||
| 3880 | __hw_addr_del(from_list, ha->addr, | ||
| 3881 | addr_len, ha->type); | ||
| 3882 | } | ||
| 3883 | } | ||
| 3884 | } | ||
| 3885 | |||
| 3886 | static void __hw_addr_flush(struct netdev_hw_addr_list *list) | ||
| 3887 | { | ||
| 3888 | struct netdev_hw_addr *ha, *tmp; | ||
| 3889 | |||
| 3890 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | ||
| 3891 | list_del_rcu(&ha->list); | ||
| 3892 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
| 3893 | } | ||
| 3894 | list->count = 0; | ||
| 3895 | } | ||
| 3896 | |||
| 3897 | static void __hw_addr_init(struct netdev_hw_addr_list *list) | ||
| 3898 | { | ||
| 3899 | INIT_LIST_HEAD(&list->list); | ||
| 3900 | list->count = 0; | ||
| 3901 | } | ||
| 3902 | |||
| 3903 | /* Device addresses handling functions */ | ||
| 3904 | |||
| 3905 | static void dev_addr_flush(struct net_device *dev) | ||
| 3906 | { | ||
| 3907 | /* rtnl_mutex must be held here */ | ||
| 3908 | |||
| 3909 | __hw_addr_flush(&dev->dev_addrs); | ||
| 3910 | dev->dev_addr = NULL; | ||
| 3911 | } | ||
| 3912 | |||
| 3913 | static int dev_addr_init(struct net_device *dev) | ||
| 3914 | { | ||
| 3915 | unsigned char addr[MAX_ADDR_LEN]; | ||
| 3916 | struct netdev_hw_addr *ha; | ||
| 3917 | int err; | ||
| 3918 | |||
| 3919 | /* rtnl_mutex must be held here */ | ||
| 3920 | |||
| 3921 | __hw_addr_init(&dev->dev_addrs); | ||
| 3922 | memset(addr, 0, sizeof(addr)); | ||
| 3923 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), | ||
| 3924 | NETDEV_HW_ADDR_T_LAN); | ||
| 3925 | if (!err) { | ||
| 3926 | /* | ||
| 3927 | * Get the first (previously created) address from the list | ||
| 3928 | * and set dev_addr pointer to this location. | ||
| 3929 | */ | ||
| 3930 | ha = list_first_entry(&dev->dev_addrs.list, | ||
| 3931 | struct netdev_hw_addr, list); | ||
| 3932 | dev->dev_addr = ha->addr; | ||
| 3933 | } | ||
| 3934 | return err; | ||
| 3935 | } | ||
| 3936 | |||
| 3937 | /** | ||
| 3938 | * dev_addr_add - Add a device address | ||
| 3939 | * @dev: device | ||
| 3940 | * @addr: address to add | ||
| 3941 | * @addr_type: address type | ||
| 3942 | * | ||
| 3943 | * Add a device address to the device or increase the reference count if | ||
| 3944 | * it already exists. | ||
| 3945 | * | ||
| 3946 | * The caller must hold the rtnl_mutex. | ||
| 3947 | */ | ||
| 3948 | int dev_addr_add(struct net_device *dev, unsigned char *addr, | ||
| 3949 | unsigned char addr_type) | ||
| 3950 | { | ||
| 3951 | int err; | ||
| 3952 | |||
| 3953 | ASSERT_RTNL(); | ||
| 3954 | |||
| 3955 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); | ||
| 3956 | if (!err) | ||
| 3957 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
| 3958 | return err; | ||
| 3959 | } | ||
| 3960 | EXPORT_SYMBOL(dev_addr_add); | ||
| 3961 | |||
| 3962 | /** | ||
| 3963 | * dev_addr_del - Release a device address. | ||
| 3964 | * @dev: device | ||
| 3965 | * @addr: address to delete | ||
| 3966 | * @addr_type: address type | ||
| 3967 | * | ||
| 3968 | * Release reference to a device address and remove it from the device | ||
| 3969 | * if the reference count drops to zero. | ||
| 3970 | * | ||
| 3971 | * The caller must hold the rtnl_mutex. | ||
| 3972 | */ | ||
| 3973 | int dev_addr_del(struct net_device *dev, unsigned char *addr, | ||
| 3974 | unsigned char addr_type) | ||
| 3975 | { | ||
| 3976 | int err; | ||
| 3977 | struct netdev_hw_addr *ha; | ||
| 3978 | |||
| 3979 | ASSERT_RTNL(); | ||
| 3980 | |||
| 3981 | /* | ||
| 3982 | * We can not remove the first address from the list because | ||
| 3983 | * dev->dev_addr points to that. | ||
| 3984 | */ | ||
| 3985 | ha = list_first_entry(&dev->dev_addrs.list, | ||
| 3986 | struct netdev_hw_addr, list); | ||
| 3987 | if (ha->addr == dev->dev_addr && ha->refcount == 1) | ||
| 3988 | return -ENOENT; | ||
| 3989 | |||
| 3990 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, | ||
| 3991 | addr_type); | ||
| 3992 | if (!err) | ||
| 3993 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
| 3994 | return err; | ||
| 3995 | } | ||
| 3996 | EXPORT_SYMBOL(dev_addr_del); | ||
| 3997 | |||
| 3998 | /** | ||
| 3999 | * dev_addr_add_multiple - Add device addresses from another device | ||
| 4000 | * @to_dev: device to which addresses will be added | ||
| 4001 | * @from_dev: device from which addresses will be added | ||
| 4002 | * @addr_type: address type - 0 means type will be used from from_dev | ||
| 4003 | * | ||
| 4004 | * Add device addresses of the one device to another. | ||
| 4005 | ** | ||
| 4006 | * The caller must hold the rtnl_mutex. | ||
| 4007 | */ | ||
| 4008 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
| 4009 | struct net_device *from_dev, | ||
| 4010 | unsigned char addr_type) | ||
| 4011 | { | ||
| 4012 | int err; | ||
| 4013 | |||
| 4014 | ASSERT_RTNL(); | ||
| 4015 | |||
| 4016 | if (from_dev->addr_len != to_dev->addr_len) | ||
| 4017 | return -EINVAL; | ||
| 4018 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
| 4019 | to_dev->addr_len, addr_type); | ||
| 4020 | if (!err) | ||
| 4021 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
| 4022 | return err; | ||
| 4023 | } | ||
| 4024 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
| 4025 | |||
| 4026 | /** | ||
| 4027 | * dev_addr_del_multiple - Delete device addresses by another device | ||
| 4028 | * @to_dev: device where the addresses will be deleted | ||
| 4029 | * @from_dev: device by which addresses the addresses will be deleted | ||
| 4030 | * @addr_type: address type - 0 means type will used from from_dev | ||
| 4031 | * | ||
| 4032 | * Deletes addresses in to device by the list of addresses in from device. | ||
| 4033 | * | ||
| 4034 | * The caller must hold the rtnl_mutex. | ||
| 4035 | */ | ||
| 4036 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
| 4037 | struct net_device *from_dev, | ||
| 4038 | unsigned char addr_type) | ||
| 4039 | { | ||
| 4040 | ASSERT_RTNL(); | ||
| 4041 | |||
| 4042 | if (from_dev->addr_len != to_dev->addr_len) | ||
| 4043 | return -EINVAL; | ||
| 4044 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
| 4045 | to_dev->addr_len, addr_type); | ||
| 4046 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
| 4047 | return 0; | ||
| 4048 | } | ||
| 4049 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
| 4050 | |||
| 4051 | /* multicast addresses handling functions */ | ||
| 4052 | |||
| 4053 | int __dev_addr_delete(struct dev_addr_list **list, int *count, | ||
| 4054 | void *addr, int alen, int glbl) | ||
| 4055 | { | ||
| 4056 | struct dev_addr_list *da; | ||
| 4057 | |||
| 4058 | for (; (da = *list) != NULL; list = &da->next) { | ||
| 4059 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
| 4060 | alen == da->da_addrlen) { | ||
| 4061 | if (glbl) { | ||
| 4062 | int old_glbl = da->da_gusers; | ||
| 4063 | da->da_gusers = 0; | ||
| 4064 | if (old_glbl == 0) | ||
| 4065 | break; | ||
| 4066 | } | ||
| 4067 | if (--da->da_users) | ||
| 4068 | return 0; | ||
| 4069 | |||
| 4070 | *list = da->next; | ||
| 4071 | kfree(da); | ||
| 4072 | (*count)--; | ||
| 4073 | return 0; | ||
| 4074 | } | ||
| 4075 | } | ||
| 4076 | return -ENOENT; | ||
| 4077 | } | ||
| 4078 | |||
| 4079 | int __dev_addr_add(struct dev_addr_list **list, int *count, | ||
| 4080 | void *addr, int alen, int glbl) | ||
| 4081 | { | ||
| 4082 | struct dev_addr_list *da; | ||
| 4083 | |||
| 4084 | for (da = *list; da != NULL; da = da->next) { | ||
| 4085 | if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 && | ||
| 4086 | da->da_addrlen == alen) { | ||
| 4087 | if (glbl) { | ||
| 4088 | int old_glbl = da->da_gusers; | ||
| 4089 | da->da_gusers = 1; | ||
| 4090 | if (old_glbl) | ||
| 4091 | return 0; | ||
| 4092 | } | ||
| 4093 | da->da_users++; | ||
| 4094 | return 0; | ||
| 4095 | } | ||
| 4096 | } | ||
| 4097 | |||
| 4098 | da = kzalloc(sizeof(*da), GFP_ATOMIC); | ||
| 4099 | if (da == NULL) | ||
| 4100 | return -ENOMEM; | ||
| 4101 | memcpy(da->da_addr, addr, alen); | ||
| 4102 | da->da_addrlen = alen; | ||
| 4103 | da->da_users = 1; | ||
| 4104 | da->da_gusers = glbl ? 1 : 0; | ||
| 4105 | da->next = *list; | ||
| 4106 | *list = da; | ||
| 4107 | (*count)++; | ||
| 4108 | return 0; | ||
| 4109 | } | ||
| 4110 | |||
| 4111 | /** | ||
| 4112 | * dev_unicast_delete - Release secondary unicast address. | ||
| 4113 | * @dev: device | ||
| 4114 | * @addr: address to delete | ||
| 4115 | * | ||
| 4116 | * Release reference to a secondary unicast address and remove it | ||
| 4117 | * from the device if the reference count drops to zero. | ||
| 4118 | * | ||
| 4119 | * The caller must hold the rtnl_mutex. | ||
| 4120 | */ | ||
| 4121 | int dev_unicast_delete(struct net_device *dev, void *addr) | ||
| 4122 | { | ||
| 4123 | int err; | ||
| 4124 | |||
| 4125 | ASSERT_RTNL(); | ||
| 4126 | |||
| 4127 | netif_addr_lock_bh(dev); | ||
| 4128 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, | ||
| 4129 | NETDEV_HW_ADDR_T_UNICAST); | ||
| 4130 | if (!err) | ||
| 4131 | __dev_set_rx_mode(dev); | ||
| 4132 | netif_addr_unlock_bh(dev); | ||
| 4133 | return err; | ||
| 4134 | } | ||
| 4135 | EXPORT_SYMBOL(dev_unicast_delete); | ||
| 4136 | |||
| 4137 | /** | ||
| 4138 | * dev_unicast_add - add a secondary unicast address | ||
| 4139 | * @dev: device | ||
| 4140 | * @addr: address to add | ||
| 4141 | * | ||
| 4142 | * Add a secondary unicast address to the device or increase | ||
| 4143 | * the reference count if it already exists. | ||
| 4144 | * | ||
| 4145 | * The caller must hold the rtnl_mutex. | ||
| 4146 | */ | ||
| 4147 | int dev_unicast_add(struct net_device *dev, void *addr) | ||
| 4148 | { | ||
| 4149 | int err; | ||
| 4150 | |||
| 4151 | ASSERT_RTNL(); | ||
| 4152 | |||
| 4153 | netif_addr_lock_bh(dev); | ||
| 4154 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, | ||
| 4155 | NETDEV_HW_ADDR_T_UNICAST); | ||
| 4156 | if (!err) | ||
| 4157 | __dev_set_rx_mode(dev); | ||
| 4158 | netif_addr_unlock_bh(dev); | ||
| 4159 | return err; | ||
| 4160 | } | ||
| 4161 | EXPORT_SYMBOL(dev_unicast_add); | ||
| 4162 | |||
| 4163 | int __dev_addr_sync(struct dev_addr_list **to, int *to_count, | ||
| 4164 | struct dev_addr_list **from, int *from_count) | ||
| 4165 | { | ||
| 4166 | struct dev_addr_list *da, *next; | ||
| 4167 | int err = 0; | ||
| 4168 | |||
| 4169 | da = *from; | ||
| 4170 | while (da != NULL) { | ||
| 4171 | next = da->next; | ||
| 4172 | if (!da->da_synced) { | ||
| 4173 | err = __dev_addr_add(to, to_count, | ||
| 4174 | da->da_addr, da->da_addrlen, 0); | ||
| 4175 | if (err < 0) | ||
| 4176 | break; | ||
| 4177 | da->da_synced = 1; | ||
| 4178 | da->da_users++; | ||
| 4179 | } else if (da->da_users == 1) { | ||
| 4180 | __dev_addr_delete(to, to_count, | ||
| 4181 | da->da_addr, da->da_addrlen, 0); | ||
| 4182 | __dev_addr_delete(from, from_count, | ||
| 4183 | da->da_addr, da->da_addrlen, 0); | ||
| 4184 | } | ||
| 4185 | da = next; | ||
| 4186 | } | ||
| 4187 | return err; | ||
| 4188 | } | ||
| 4189 | EXPORT_SYMBOL_GPL(__dev_addr_sync); | ||
| 4190 | |||
| 4191 | void __dev_addr_unsync(struct dev_addr_list **to, int *to_count, | ||
| 4192 | struct dev_addr_list **from, int *from_count) | ||
| 4193 | { | ||
| 4194 | struct dev_addr_list *da, *next; | ||
| 4195 | |||
| 4196 | da = *from; | ||
| 4197 | while (da != NULL) { | ||
| 4198 | next = da->next; | ||
| 4199 | if (da->da_synced) { | ||
| 4200 | __dev_addr_delete(to, to_count, | ||
| 4201 | da->da_addr, da->da_addrlen, 0); | ||
| 4202 | da->da_synced = 0; | ||
| 4203 | __dev_addr_delete(from, from_count, | ||
| 4204 | da->da_addr, da->da_addrlen, 0); | ||
| 4205 | } | ||
| 4206 | da = next; | ||
| 4207 | } | ||
| 4208 | } | ||
| 4209 | EXPORT_SYMBOL_GPL(__dev_addr_unsync); | ||
| 4210 | |||
| 4211 | /** | ||
| 4212 | * dev_unicast_sync - Synchronize device's unicast list to another device | ||
| 4213 | * @to: destination device | ||
| 4214 | * @from: source device | ||
| 4215 | * | ||
| 4216 | * Add newly added addresses to the destination device and release | ||
| 4217 | * addresses that have no users left. The source device must be | ||
| 4218 | * locked by netif_tx_lock_bh. | ||
| 4219 | * | ||
| 4220 | * This function is intended to be called from the dev->set_rx_mode | ||
| 4221 | * function of layered software devices. | ||
| 4222 | */ | ||
| 4223 | int dev_unicast_sync(struct net_device *to, struct net_device *from) | ||
| 4224 | { | ||
| 4225 | int err = 0; | ||
| 4226 | |||
| 4227 | if (to->addr_len != from->addr_len) | ||
| 4228 | return -EINVAL; | ||
| 4229 | |||
| 4230 | netif_addr_lock_bh(to); | ||
| 4231 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | ||
| 4232 | if (!err) | ||
| 4233 | __dev_set_rx_mode(to); | ||
| 4234 | netif_addr_unlock_bh(to); | ||
| 4235 | return err; | ||
| 4236 | } | ||
| 4237 | EXPORT_SYMBOL(dev_unicast_sync); | ||
| 4238 | |||
| 4239 | /** | ||
| 4240 | * dev_unicast_unsync - Remove synchronized addresses from the destination device | ||
| 4241 | * @to: destination device | ||
| 4242 | * @from: source device | ||
| 4243 | * | ||
| 4244 | * Remove all addresses that were added to the destination device by | ||
| 4245 | * dev_unicast_sync(). This function is intended to be called from the | ||
| 4246 | * dev->stop function of layered software devices. | ||
| 4247 | */ | ||
| 4248 | void dev_unicast_unsync(struct net_device *to, struct net_device *from) | ||
| 4249 | { | ||
| 4250 | if (to->addr_len != from->addr_len) | ||
| 4251 | return; | ||
| 4252 | |||
| 4253 | netif_addr_lock_bh(from); | ||
| 4254 | netif_addr_lock(to); | ||
| 4255 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | ||
| 4256 | __dev_set_rx_mode(to); | ||
| 4257 | netif_addr_unlock(to); | ||
| 4258 | netif_addr_unlock_bh(from); | ||
| 4259 | } | ||
| 4260 | EXPORT_SYMBOL(dev_unicast_unsync); | ||
| 4261 | |||
| 4262 | static void dev_unicast_flush(struct net_device *dev) | ||
| 4263 | { | ||
| 4264 | netif_addr_lock_bh(dev); | ||
| 4265 | __hw_addr_flush(&dev->uc); | ||
| 4266 | netif_addr_unlock_bh(dev); | ||
| 4267 | } | ||
| 4268 | |||
| 4269 | static void dev_unicast_init(struct net_device *dev) | ||
| 4270 | { | ||
| 4271 | __hw_addr_init(&dev->uc); | ||
| 4272 | } | ||
| 4273 | |||
| 4274 | |||
| 4275 | static void __dev_addr_discard(struct dev_addr_list **list) | ||
| 4276 | { | ||
| 4277 | struct dev_addr_list *tmp; | ||
| 4278 | |||
| 4279 | while (*list != NULL) { | ||
| 4280 | tmp = *list; | ||
| 4281 | *list = tmp->next; | ||
| 4282 | if (tmp->da_users > tmp->da_gusers) | ||
| 4283 | printk("__dev_addr_discard: address leakage! " | ||
| 4284 | "da_users=%d\n", tmp->da_users); | ||
| 4285 | kfree(tmp); | ||
| 4286 | } | ||
| 4287 | } | ||
| 4288 | |||
| 4289 | static void dev_addr_discard(struct net_device *dev) | ||
| 4290 | { | ||
| 4291 | netif_addr_lock_bh(dev); | ||
| 4292 | |||
| 4293 | __dev_addr_discard(&dev->mc_list); | ||
| 4294 | netdev_mc_count(dev) = 0; | ||
| 4295 | |||
| 4296 | netif_addr_unlock_bh(dev); | ||
| 4297 | } | ||
| 4298 | |||
| 4299 | /** | 4164 | /** |
| 4300 | * dev_get_flags - get flags reported to userspace | 4165 | * dev_get_flags - get flags reported to userspace |
| 4301 | * @dev: device | 4166 | * @dev: device |
| @@ -4606,8 +4471,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
| 4606 | return -EINVAL; | 4471 | return -EINVAL; |
| 4607 | if (!netif_device_present(dev)) | 4472 | if (!netif_device_present(dev)) |
| 4608 | return -ENODEV; | 4473 | return -ENODEV; |
| 4609 | return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, | 4474 | return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); |
| 4610 | dev->addr_len, 1); | ||
| 4611 | 4475 | ||
| 4612 | case SIOCDELMULTI: | 4476 | case SIOCDELMULTI: |
| 4613 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || | 4477 | if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || |
| @@ -4615,8 +4479,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) | |||
| 4615 | return -EINVAL; | 4479 | return -EINVAL; |
| 4616 | if (!netif_device_present(dev)) | 4480 | if (!netif_device_present(dev)) |
| 4617 | return -ENODEV; | 4481 | return -ENODEV; |
| 4618 | return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, | 4482 | return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data); |
| 4619 | dev->addr_len, 1); | ||
| 4620 | 4483 | ||
| 4621 | case SIOCSIFTXQLEN: | 4484 | case SIOCSIFTXQLEN: |
| 4622 | if (ifr->ifr_qlen < 0) | 4485 | if (ifr->ifr_qlen < 0) |
| @@ -4923,8 +4786,8 @@ static void rollback_registered_many(struct list_head *head) | |||
| 4923 | /* | 4786 | /* |
| 4924 | * Flush the unicast and multicast chains | 4787 | * Flush the unicast and multicast chains |
| 4925 | */ | 4788 | */ |
| 4926 | dev_unicast_flush(dev); | 4789 | dev_uc_flush(dev); |
| 4927 | dev_addr_discard(dev); | 4790 | dev_mc_flush(dev); |
| 4928 | 4791 | ||
| 4929 | if (dev->netdev_ops->ndo_uninit) | 4792 | if (dev->netdev_ops->ndo_uninit) |
| 4930 | dev->netdev_ops->ndo_uninit(dev); | 4793 | dev->netdev_ops->ndo_uninit(dev); |
| @@ -5073,6 +4936,24 @@ int register_netdevice(struct net_device *dev) | |||
| 5073 | 4936 | ||
| 5074 | dev->iflink = -1; | 4937 | dev->iflink = -1; |
| 5075 | 4938 | ||
| 4939 | #ifdef CONFIG_RPS | ||
| 4940 | if (!dev->num_rx_queues) { | ||
| 4941 | /* | ||
| 4942 | * Allocate a single RX queue if driver never called | ||
| 4943 | * alloc_netdev_mq | ||
| 4944 | */ | ||
| 4945 | |||
| 4946 | dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
| 4947 | if (!dev->_rx) { | ||
| 4948 | ret = -ENOMEM; | ||
| 4949 | goto out; | ||
| 4950 | } | ||
| 4951 | |||
| 4952 | dev->_rx->first = dev->_rx; | ||
| 4953 | atomic_set(&dev->_rx->count, 1); | ||
| 4954 | dev->num_rx_queues = 1; | ||
| 4955 | } | ||
| 4956 | #endif | ||
| 5076 | /* Init, if this function is available */ | 4957 | /* Init, if this function is available */ |
| 5077 | if (dev->netdev_ops->ndo_init) { | 4958 | if (dev->netdev_ops->ndo_init) { |
| 5078 | ret = dev->netdev_ops->ndo_init(dev); | 4959 | ret = dev->netdev_ops->ndo_init(dev); |
| @@ -5083,7 +4964,7 @@ int register_netdevice(struct net_device *dev) | |||
| 5083 | } | 4964 | } |
| 5084 | } | 4965 | } |
| 5085 | 4966 | ||
| 5086 | ret = dev_get_valid_name(net, dev->name, dev->name, 0); | 4967 | ret = dev_get_valid_name(dev, dev->name, 0); |
| 5087 | if (ret) | 4968 | if (ret) |
| 5088 | goto err_uninit; | 4969 | goto err_uninit; |
| 5089 | 4970 | ||
| @@ -5112,8 +4993,6 @@ int register_netdevice(struct net_device *dev) | |||
| 5112 | if (dev->features & NETIF_F_SG) | 4993 | if (dev->features & NETIF_F_SG) |
| 5113 | dev->features |= NETIF_F_GSO; | 4994 | dev->features |= NETIF_F_GSO; |
| 5114 | 4995 | ||
| 5115 | netdev_initialize_kobject(dev); | ||
| 5116 | |||
| 5117 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); | 4996 | ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); |
| 5118 | ret = notifier_to_errno(ret); | 4997 | ret = notifier_to_errno(ret); |
| 5119 | if (ret) | 4998 | if (ret) |
| @@ -5433,6 +5312,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
| 5433 | struct net_device *dev; | 5312 | struct net_device *dev; |
| 5434 | size_t alloc_size; | 5313 | size_t alloc_size; |
| 5435 | struct net_device *p; | 5314 | struct net_device *p; |
| 5315 | #ifdef CONFIG_RPS | ||
| 5316 | struct netdev_rx_queue *rx; | ||
| 5317 | int i; | ||
| 5318 | #endif | ||
| 5436 | 5319 | ||
| 5437 | BUG_ON(strlen(name) >= sizeof(dev->name)); | 5320 | BUG_ON(strlen(name) >= sizeof(dev->name)); |
| 5438 | 5321 | ||
| @@ -5458,13 +5341,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
| 5458 | goto free_p; | 5341 | goto free_p; |
| 5459 | } | 5342 | } |
| 5460 | 5343 | ||
| 5344 | #ifdef CONFIG_RPS | ||
| 5345 | rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL); | ||
| 5346 | if (!rx) { | ||
| 5347 | printk(KERN_ERR "alloc_netdev: Unable to allocate " | ||
| 5348 | "rx queues.\n"); | ||
| 5349 | goto free_tx; | ||
| 5350 | } | ||
| 5351 | |||
| 5352 | atomic_set(&rx->count, queue_count); | ||
| 5353 | |||
| 5354 | /* | ||
| 5355 | * Set a pointer to first element in the array which holds the | ||
| 5356 | * reference count. | ||
| 5357 | */ | ||
| 5358 | for (i = 0; i < queue_count; i++) | ||
| 5359 | rx[i].first = rx; | ||
| 5360 | #endif | ||
| 5361 | |||
| 5461 | dev = PTR_ALIGN(p, NETDEV_ALIGN); | 5362 | dev = PTR_ALIGN(p, NETDEV_ALIGN); |
| 5462 | dev->padded = (char *)dev - (char *)p; | 5363 | dev->padded = (char *)dev - (char *)p; |
| 5463 | 5364 | ||
| 5464 | if (dev_addr_init(dev)) | 5365 | if (dev_addr_init(dev)) |
| 5465 | goto free_tx; | 5366 | goto free_rx; |
| 5466 | 5367 | ||
| 5467 | dev_unicast_init(dev); | 5368 | dev_mc_init(dev); |
| 5369 | dev_uc_init(dev); | ||
| 5468 | 5370 | ||
| 5469 | dev_net_set(dev, &init_net); | 5371 | dev_net_set(dev, &init_net); |
| 5470 | 5372 | ||
| @@ -5472,6 +5374,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
| 5472 | dev->num_tx_queues = queue_count; | 5374 | dev->num_tx_queues = queue_count; |
| 5473 | dev->real_num_tx_queues = queue_count; | 5375 | dev->real_num_tx_queues = queue_count; |
| 5474 | 5376 | ||
| 5377 | #ifdef CONFIG_RPS | ||
| 5378 | dev->_rx = rx; | ||
| 5379 | dev->num_rx_queues = queue_count; | ||
| 5380 | #endif | ||
| 5381 | |||
| 5475 | dev->gso_max_size = GSO_MAX_SIZE; | 5382 | dev->gso_max_size = GSO_MAX_SIZE; |
| 5476 | 5383 | ||
| 5477 | netdev_init_queues(dev); | 5384 | netdev_init_queues(dev); |
| @@ -5486,9 +5393,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, | |||
| 5486 | strcpy(dev->name, name); | 5393 | strcpy(dev->name, name); |
| 5487 | return dev; | 5394 | return dev; |
| 5488 | 5395 | ||
| 5396 | free_rx: | ||
| 5397 | #ifdef CONFIG_RPS | ||
| 5398 | kfree(rx); | ||
| 5489 | free_tx: | 5399 | free_tx: |
| 5400 | #endif | ||
| 5490 | kfree(tx); | 5401 | kfree(tx); |
| 5491 | |||
| 5492 | free_p: | 5402 | free_p: |
| 5493 | kfree(p); | 5403 | kfree(p); |
| 5494 | return NULL; | 5404 | return NULL; |
| @@ -5634,15 +5544,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 5634 | if (dev->features & NETIF_F_NETNS_LOCAL) | 5544 | if (dev->features & NETIF_F_NETNS_LOCAL) |
| 5635 | goto out; | 5545 | goto out; |
| 5636 | 5546 | ||
| 5637 | #ifdef CONFIG_SYSFS | ||
| 5638 | /* Don't allow real devices to be moved when sysfs | ||
| 5639 | * is enabled. | ||
| 5640 | */ | ||
| 5641 | err = -EINVAL; | ||
| 5642 | if (dev->dev.parent) | ||
| 5643 | goto out; | ||
| 5644 | #endif | ||
| 5645 | |||
| 5646 | /* Ensure the device has been registrered */ | 5547 | /* Ensure the device has been registrered */ |
| 5647 | err = -EINVAL; | 5548 | err = -EINVAL; |
| 5648 | if (dev->reg_state != NETREG_REGISTERED) | 5549 | if (dev->reg_state != NETREG_REGISTERED) |
| @@ -5661,7 +5562,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 5661 | /* We get here if we can't use the current device name */ | 5562 | /* We get here if we can't use the current device name */ |
| 5662 | if (!pat) | 5563 | if (!pat) |
| 5663 | goto out; | 5564 | goto out; |
| 5664 | if (dev_get_valid_name(net, pat, dev->name, 1)) | 5565 | if (dev_get_valid_name(dev, pat, 1)) |
| 5665 | goto out; | 5566 | goto out; |
| 5666 | } | 5567 | } |
| 5667 | 5568 | ||
| @@ -5690,10 +5591,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 5690 | /* | 5591 | /* |
| 5691 | * Flush the unicast and multicast chains | 5592 | * Flush the unicast and multicast chains |
| 5692 | */ | 5593 | */ |
| 5693 | dev_unicast_flush(dev); | 5594 | dev_uc_flush(dev); |
| 5694 | dev_addr_discard(dev); | 5595 | dev_mc_flush(dev); |
| 5695 | |||
| 5696 | netdev_unregister_kobject(dev); | ||
| 5697 | 5596 | ||
| 5698 | /* Actually switch the network namespace */ | 5597 | /* Actually switch the network namespace */ |
| 5699 | dev_net_set(dev, net); | 5598 | dev_net_set(dev, net); |
| @@ -5707,7 +5606,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char | |||
| 5707 | } | 5606 | } |
| 5708 | 5607 | ||
| 5709 | /* Fixup kobjects */ | 5608 | /* Fixup kobjects */ |
| 5710 | err = netdev_register_kobject(dev); | 5609 | err = device_rename(&dev->dev, dev->name); |
| 5711 | WARN_ON(err); | 5610 | WARN_ON(err); |
| 5712 | 5611 | ||
| 5713 | /* Add the device back in the hashes */ | 5612 | /* Add the device back in the hashes */ |
| @@ -5734,7 +5633,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
| 5734 | void *ocpu) | 5633 | void *ocpu) |
| 5735 | { | 5634 | { |
| 5736 | struct sk_buff **list_skb; | 5635 | struct sk_buff **list_skb; |
| 5737 | struct Qdisc **list_net; | ||
| 5738 | struct sk_buff *skb; | 5636 | struct sk_buff *skb; |
| 5739 | unsigned int cpu, oldcpu = (unsigned long)ocpu; | 5637 | unsigned int cpu, oldcpu = (unsigned long)ocpu; |
| 5740 | struct softnet_data *sd, *oldsd; | 5638 | struct softnet_data *sd, *oldsd; |
| @@ -5755,20 +5653,26 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |||
| 5755 | *list_skb = oldsd->completion_queue; | 5653 | *list_skb = oldsd->completion_queue; |
| 5756 | oldsd->completion_queue = NULL; | 5654 | oldsd->completion_queue = NULL; |
| 5757 | 5655 | ||
| 5758 | /* Find end of our output_queue. */ | ||
| 5759 | list_net = &sd->output_queue; | ||
| 5760 | while (*list_net) | ||
| 5761 | list_net = &(*list_net)->next_sched; | ||
| 5762 | /* Append output queue from offline CPU. */ | 5656 | /* Append output queue from offline CPU. */ |
| 5763 | *list_net = oldsd->output_queue; | 5657 | if (oldsd->output_queue) { |
| 5764 | oldsd->output_queue = NULL; | 5658 | *sd->output_queue_tailp = oldsd->output_queue; |
| 5659 | sd->output_queue_tailp = oldsd->output_queue_tailp; | ||
| 5660 | oldsd->output_queue = NULL; | ||
| 5661 | oldsd->output_queue_tailp = &oldsd->output_queue; | ||
| 5662 | } | ||
| 5765 | 5663 | ||
| 5766 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | 5664 | raise_softirq_irqoff(NET_TX_SOFTIRQ); |
| 5767 | local_irq_enable(); | 5665 | local_irq_enable(); |
| 5768 | 5666 | ||
| 5769 | /* Process offline CPU's input_pkt_queue */ | 5667 | /* Process offline CPU's input_pkt_queue */ |
| 5770 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | 5668 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { |
| 5669 | netif_rx(skb); | ||
| 5670 | input_queue_head_incr(oldsd); | ||
| 5671 | } | ||
| 5672 | while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | ||
| 5771 | netif_rx(skb); | 5673 | netif_rx(skb); |
| 5674 | input_queue_head_incr(oldsd); | ||
| 5675 | } | ||
| 5772 | 5676 | ||
| 5773 | return NOTIFY_OK; | 5677 | return NOTIFY_OK; |
| 5774 | } | 5678 | } |
| @@ -5984,17 +5888,26 @@ static int __init net_dev_init(void) | |||
| 5984 | */ | 5888 | */ |
| 5985 | 5889 | ||
| 5986 | for_each_possible_cpu(i) { | 5890 | for_each_possible_cpu(i) { |
| 5987 | struct softnet_data *queue; | 5891 | struct softnet_data *sd = &per_cpu(softnet_data, i); |
| 5988 | 5892 | ||
| 5989 | queue = &per_cpu(softnet_data, i); | 5893 | memset(sd, 0, sizeof(*sd)); |
| 5990 | skb_queue_head_init(&queue->input_pkt_queue); | 5894 | skb_queue_head_init(&sd->input_pkt_queue); |
| 5991 | queue->completion_queue = NULL; | 5895 | skb_queue_head_init(&sd->process_queue); |
| 5992 | INIT_LIST_HEAD(&queue->poll_list); | 5896 | sd->completion_queue = NULL; |
| 5897 | INIT_LIST_HEAD(&sd->poll_list); | ||
| 5898 | sd->output_queue = NULL; | ||
| 5899 | sd->output_queue_tailp = &sd->output_queue; | ||
| 5900 | #ifdef CONFIG_RPS | ||
| 5901 | sd->csd.func = rps_trigger_softirq; | ||
| 5902 | sd->csd.info = sd; | ||
| 5903 | sd->csd.flags = 0; | ||
| 5904 | sd->cpu = i; | ||
| 5905 | #endif | ||
| 5993 | 5906 | ||
| 5994 | queue->backlog.poll = process_backlog; | 5907 | sd->backlog.poll = process_backlog; |
| 5995 | queue->backlog.weight = weight_p; | 5908 | sd->backlog.weight = weight_p; |
| 5996 | queue->backlog.gro_list = NULL; | 5909 | sd->backlog.gro_list = NULL; |
| 5997 | queue->backlog.gro_count = 0; | 5910 | sd->backlog.gro_count = 0; |
| 5998 | } | 5911 | } |
| 5999 | 5912 | ||
| 6000 | dev_boot_phase = 0; | 5913 | dev_boot_phase = 0; |
| @@ -6029,7 +5942,7 @@ subsys_initcall(net_dev_init); | |||
| 6029 | 5942 | ||
| 6030 | static int __init initialize_hashrnd(void) | 5943 | static int __init initialize_hashrnd(void) |
| 6031 | { | 5944 | { |
| 6032 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | 5945 | get_random_bytes(&hashrnd, sizeof(hashrnd)); |
| 6033 | return 0; | 5946 | return 0; |
| 6034 | } | 5947 | } |
| 6035 | 5948 | ||
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c new file mode 100644 index 000000000000..508f9c18992f --- /dev/null +++ b/net/core/dev_addr_lists.c | |||
| @@ -0,0 +1,741 @@ | |||
| 1 | /* | ||
| 2 | * net/core/dev_addr_lists.c - Functions for handling net device lists | ||
| 3 | * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com> | ||
| 4 | * | ||
| 5 | * This file contains functions for working with unicast, multicast and device | ||
| 6 | * addresses lists. | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or modify | ||
| 9 | * it under the terms of the GNU General Public License as published by | ||
| 10 | * the Free Software Foundation; either version 2 of the License, or | ||
| 11 | * (at your option) any later version. | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <linux/netdevice.h> | ||
| 15 | #include <linux/rtnetlink.h> | ||
| 16 | #include <linux/list.h> | ||
| 17 | #include <linux/proc_fs.h> | ||
| 18 | |||
| 19 | /* | ||
| 20 | * General list handling functions | ||
| 21 | */ | ||
| 22 | |||
| 23 | static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, | ||
| 24 | unsigned char *addr, int addr_len, | ||
| 25 | unsigned char addr_type, bool global) | ||
| 26 | { | ||
| 27 | struct netdev_hw_addr *ha; | ||
| 28 | int alloc_size; | ||
| 29 | |||
| 30 | if (addr_len > MAX_ADDR_LEN) | ||
| 31 | return -EINVAL; | ||
| 32 | |||
| 33 | list_for_each_entry(ha, &list->list, list) { | ||
| 34 | if (!memcmp(ha->addr, addr, addr_len) && | ||
| 35 | ha->type == addr_type) { | ||
| 36 | if (global) { | ||
| 37 | /* check if addr is already used as global */ | ||
| 38 | if (ha->global_use) | ||
| 39 | return 0; | ||
| 40 | else | ||
| 41 | ha->global_use = true; | ||
| 42 | } | ||
| 43 | ha->refcount++; | ||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | |||
| 49 | alloc_size = sizeof(*ha); | ||
| 50 | if (alloc_size < L1_CACHE_BYTES) | ||
| 51 | alloc_size = L1_CACHE_BYTES; | ||
| 52 | ha = kmalloc(alloc_size, GFP_ATOMIC); | ||
| 53 | if (!ha) | ||
| 54 | return -ENOMEM; | ||
| 55 | memcpy(ha->addr, addr, addr_len); | ||
| 56 | ha->type = addr_type; | ||
| 57 | ha->refcount = 1; | ||
| 58 | ha->global_use = global; | ||
| 59 | ha->synced = false; | ||
| 60 | list_add_tail_rcu(&ha->list, &list->list); | ||
| 61 | list->count++; | ||
| 62 | return 0; | ||
| 63 | } | ||
| 64 | |||
| 65 | static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
| 66 | int addr_len, unsigned char addr_type) | ||
| 67 | { | ||
| 68 | return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); | ||
| 69 | } | ||
| 70 | |||
| 71 | static void ha_rcu_free(struct rcu_head *head) | ||
| 72 | { | ||
| 73 | struct netdev_hw_addr *ha; | ||
| 74 | |||
| 75 | ha = container_of(head, struct netdev_hw_addr, rcu_head); | ||
| 76 | kfree(ha); | ||
| 77 | } | ||
| 78 | |||
| 79 | static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, | ||
| 80 | unsigned char *addr, int addr_len, | ||
| 81 | unsigned char addr_type, bool global) | ||
| 82 | { | ||
| 83 | struct netdev_hw_addr *ha; | ||
| 84 | |||
| 85 | list_for_each_entry(ha, &list->list, list) { | ||
| 86 | if (!memcmp(ha->addr, addr, addr_len) && | ||
| 87 | (ha->type == addr_type || !addr_type)) { | ||
| 88 | if (global) { | ||
| 89 | if (!ha->global_use) | ||
| 90 | break; | ||
| 91 | else | ||
| 92 | ha->global_use = false; | ||
| 93 | } | ||
| 94 | if (--ha->refcount) | ||
| 95 | return 0; | ||
| 96 | list_del_rcu(&ha->list); | ||
| 97 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
| 98 | list->count--; | ||
| 99 | return 0; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | return -ENOENT; | ||
| 103 | } | ||
| 104 | |||
| 105 | static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr, | ||
| 106 | int addr_len, unsigned char addr_type) | ||
| 107 | { | ||
| 108 | return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); | ||
| 109 | } | ||
| 110 | |||
| 111 | int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, | ||
| 112 | struct netdev_hw_addr_list *from_list, | ||
| 113 | int addr_len, unsigned char addr_type) | ||
| 114 | { | ||
| 115 | int err; | ||
| 116 | struct netdev_hw_addr *ha, *ha2; | ||
| 117 | unsigned char type; | ||
| 118 | |||
| 119 | list_for_each_entry(ha, &from_list->list, list) { | ||
| 120 | type = addr_type ? addr_type : ha->type; | ||
| 121 | err = __hw_addr_add(to_list, ha->addr, addr_len, type); | ||
| 122 | if (err) | ||
| 123 | goto unroll; | ||
| 124 | } | ||
| 125 | return 0; | ||
| 126 | |||
| 127 | unroll: | ||
| 128 | list_for_each_entry(ha2, &from_list->list, list) { | ||
| 129 | if (ha2 == ha) | ||
| 130 | break; | ||
| 131 | type = addr_type ? addr_type : ha2->type; | ||
| 132 | __hw_addr_del(to_list, ha2->addr, addr_len, type); | ||
| 133 | } | ||
| 134 | return err; | ||
| 135 | } | ||
| 136 | EXPORT_SYMBOL(__hw_addr_add_multiple); | ||
| 137 | |||
| 138 | void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, | ||
| 139 | struct netdev_hw_addr_list *from_list, | ||
| 140 | int addr_len, unsigned char addr_type) | ||
| 141 | { | ||
| 142 | struct netdev_hw_addr *ha; | ||
| 143 | unsigned char type; | ||
| 144 | |||
| 145 | list_for_each_entry(ha, &from_list->list, list) { | ||
| 146 | type = addr_type ? addr_type : ha->type; | ||
| 147 | __hw_addr_del(to_list, ha->addr, addr_len, addr_type); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | EXPORT_SYMBOL(__hw_addr_del_multiple); | ||
| 151 | |||
| 152 | int __hw_addr_sync(struct netdev_hw_addr_list *to_list, | ||
| 153 | struct netdev_hw_addr_list *from_list, | ||
| 154 | int addr_len) | ||
| 155 | { | ||
| 156 | int err = 0; | ||
| 157 | struct netdev_hw_addr *ha, *tmp; | ||
| 158 | |||
| 159 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
| 160 | if (!ha->synced) { | ||
| 161 | err = __hw_addr_add(to_list, ha->addr, | ||
| 162 | addr_len, ha->type); | ||
| 163 | if (err) | ||
| 164 | break; | ||
| 165 | ha->synced = true; | ||
| 166 | ha->refcount++; | ||
| 167 | } else if (ha->refcount == 1) { | ||
| 168 | __hw_addr_del(to_list, ha->addr, addr_len, ha->type); | ||
| 169 | __hw_addr_del(from_list, ha->addr, addr_len, ha->type); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | return err; | ||
| 173 | } | ||
| 174 | EXPORT_SYMBOL(__hw_addr_sync); | ||
| 175 | |||
| 176 | void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, | ||
| 177 | struct netdev_hw_addr_list *from_list, | ||
| 178 | int addr_len) | ||
| 179 | { | ||
| 180 | struct netdev_hw_addr *ha, *tmp; | ||
| 181 | |||
| 182 | list_for_each_entry_safe(ha, tmp, &from_list->list, list) { | ||
| 183 | if (ha->synced) { | ||
| 184 | __hw_addr_del(to_list, ha->addr, | ||
| 185 | addr_len, ha->type); | ||
| 186 | ha->synced = false; | ||
| 187 | __hw_addr_del(from_list, ha->addr, | ||
| 188 | addr_len, ha->type); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | } | ||
| 192 | EXPORT_SYMBOL(__hw_addr_unsync); | ||
| 193 | |||
| 194 | void __hw_addr_flush(struct netdev_hw_addr_list *list) | ||
| 195 | { | ||
| 196 | struct netdev_hw_addr *ha, *tmp; | ||
| 197 | |||
| 198 | list_for_each_entry_safe(ha, tmp, &list->list, list) { | ||
| 199 | list_del_rcu(&ha->list); | ||
| 200 | call_rcu(&ha->rcu_head, ha_rcu_free); | ||
| 201 | } | ||
| 202 | list->count = 0; | ||
| 203 | } | ||
| 204 | EXPORT_SYMBOL(__hw_addr_flush); | ||
| 205 | |||
| 206 | void __hw_addr_init(struct netdev_hw_addr_list *list) | ||
| 207 | { | ||
| 208 | INIT_LIST_HEAD(&list->list); | ||
| 209 | list->count = 0; | ||
| 210 | } | ||
| 211 | EXPORT_SYMBOL(__hw_addr_init); | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Device addresses handling functions | ||
| 215 | */ | ||
| 216 | |||
| 217 | /** | ||
| 218 | * dev_addr_flush - Flush device address list | ||
| 219 | * @dev: device | ||
| 220 | * | ||
| 221 | * Flush device address list and reset ->dev_addr. | ||
| 222 | * | ||
| 223 | * The caller must hold the rtnl_mutex. | ||
| 224 | */ | ||
| 225 | void dev_addr_flush(struct net_device *dev) | ||
| 226 | { | ||
| 227 | /* rtnl_mutex must be held here */ | ||
| 228 | |||
| 229 | __hw_addr_flush(&dev->dev_addrs); | ||
| 230 | dev->dev_addr = NULL; | ||
| 231 | } | ||
| 232 | EXPORT_SYMBOL(dev_addr_flush); | ||
| 233 | |||
| 234 | /** | ||
| 235 | * dev_addr_init - Init device address list | ||
| 236 | * @dev: device | ||
| 237 | * | ||
| 238 | * Init device address list and create the first element, | ||
| 239 | * used by ->dev_addr. | ||
| 240 | * | ||
| 241 | * The caller must hold the rtnl_mutex. | ||
| 242 | */ | ||
| 243 | int dev_addr_init(struct net_device *dev) | ||
| 244 | { | ||
| 245 | unsigned char addr[MAX_ADDR_LEN]; | ||
| 246 | struct netdev_hw_addr *ha; | ||
| 247 | int err; | ||
| 248 | |||
| 249 | /* rtnl_mutex must be held here */ | ||
| 250 | |||
| 251 | __hw_addr_init(&dev->dev_addrs); | ||
| 252 | memset(addr, 0, sizeof(addr)); | ||
| 253 | err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr), | ||
| 254 | NETDEV_HW_ADDR_T_LAN); | ||
| 255 | if (!err) { | ||
| 256 | /* | ||
| 257 | * Get the first (previously created) address from the list | ||
| 258 | * and set dev_addr pointer to this location. | ||
| 259 | */ | ||
| 260 | ha = list_first_entry(&dev->dev_addrs.list, | ||
| 261 | struct netdev_hw_addr, list); | ||
| 262 | dev->dev_addr = ha->addr; | ||
| 263 | } | ||
| 264 | return err; | ||
| 265 | } | ||
| 266 | EXPORT_SYMBOL(dev_addr_init); | ||
| 267 | |||
| 268 | /** | ||
| 269 | * dev_addr_add - Add a device address | ||
| 270 | * @dev: device | ||
| 271 | * @addr: address to add | ||
| 272 | * @addr_type: address type | ||
| 273 | * | ||
| 274 | * Add a device address to the device or increase the reference count if | ||
| 275 | * it already exists. | ||
| 276 | * | ||
| 277 | * The caller must hold the rtnl_mutex. | ||
| 278 | */ | ||
| 279 | int dev_addr_add(struct net_device *dev, unsigned char *addr, | ||
| 280 | unsigned char addr_type) | ||
| 281 | { | ||
| 282 | int err; | ||
| 283 | |||
| 284 | ASSERT_RTNL(); | ||
| 285 | |||
| 286 | err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type); | ||
| 287 | if (!err) | ||
| 288 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
| 289 | return err; | ||
| 290 | } | ||
| 291 | EXPORT_SYMBOL(dev_addr_add); | ||
| 292 | |||
| 293 | /** | ||
| 294 | * dev_addr_del - Release a device address. | ||
| 295 | * @dev: device | ||
| 296 | * @addr: address to delete | ||
| 297 | * @addr_type: address type | ||
| 298 | * | ||
| 299 | * Release reference to a device address and remove it from the device | ||
| 300 | * if the reference count drops to zero. | ||
| 301 | * | ||
| 302 | * The caller must hold the rtnl_mutex. | ||
| 303 | */ | ||
| 304 | int dev_addr_del(struct net_device *dev, unsigned char *addr, | ||
| 305 | unsigned char addr_type) | ||
| 306 | { | ||
| 307 | int err; | ||
| 308 | struct netdev_hw_addr *ha; | ||
| 309 | |||
| 310 | ASSERT_RTNL(); | ||
| 311 | |||
| 312 | /* | ||
| 313 | * We can not remove the first address from the list because | ||
| 314 | * dev->dev_addr points to that. | ||
| 315 | */ | ||
| 316 | ha = list_first_entry(&dev->dev_addrs.list, | ||
| 317 | struct netdev_hw_addr, list); | ||
| 318 | if (ha->addr == dev->dev_addr && ha->refcount == 1) | ||
| 319 | return -ENOENT; | ||
| 320 | |||
| 321 | err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len, | ||
| 322 | addr_type); | ||
| 323 | if (!err) | ||
| 324 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | ||
| 325 | return err; | ||
| 326 | } | ||
| 327 | EXPORT_SYMBOL(dev_addr_del); | ||
| 328 | |||
| 329 | /** | ||
| 330 | * dev_addr_add_multiple - Add device addresses from another device | ||
| 331 | * @to_dev: device to which addresses will be added | ||
| 332 | * @from_dev: device from which addresses will be added | ||
| 333 | * @addr_type: address type - 0 means type will be used from from_dev | ||
| 334 | * | ||
| 335 | * Add device addresses of the one device to another. | ||
| 336 | ** | ||
| 337 | * The caller must hold the rtnl_mutex. | ||
| 338 | */ | ||
| 339 | int dev_addr_add_multiple(struct net_device *to_dev, | ||
| 340 | struct net_device *from_dev, | ||
| 341 | unsigned char addr_type) | ||
| 342 | { | ||
| 343 | int err; | ||
| 344 | |||
| 345 | ASSERT_RTNL(); | ||
| 346 | |||
| 347 | if (from_dev->addr_len != to_dev->addr_len) | ||
| 348 | return -EINVAL; | ||
| 349 | err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
| 350 | to_dev->addr_len, addr_type); | ||
| 351 | if (!err) | ||
| 352 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
| 353 | return err; | ||
| 354 | } | ||
| 355 | EXPORT_SYMBOL(dev_addr_add_multiple); | ||
| 356 | |||
| 357 | /** | ||
| 358 | * dev_addr_del_multiple - Delete device addresses by another device | ||
| 359 | * @to_dev: device where the addresses will be deleted | ||
| 360 | * @from_dev: device by which addresses the addresses will be deleted | ||
| 361 | * @addr_type: address type - 0 means type will used from from_dev | ||
| 362 | * | ||
| 363 | * Deletes addresses in to device by the list of addresses in from device. | ||
| 364 | * | ||
| 365 | * The caller must hold the rtnl_mutex. | ||
| 366 | */ | ||
| 367 | int dev_addr_del_multiple(struct net_device *to_dev, | ||
| 368 | struct net_device *from_dev, | ||
| 369 | unsigned char addr_type) | ||
| 370 | { | ||
| 371 | ASSERT_RTNL(); | ||
| 372 | |||
| 373 | if (from_dev->addr_len != to_dev->addr_len) | ||
| 374 | return -EINVAL; | ||
| 375 | __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs, | ||
| 376 | to_dev->addr_len, addr_type); | ||
| 377 | call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev); | ||
| 378 | return 0; | ||
| 379 | } | ||
| 380 | EXPORT_SYMBOL(dev_addr_del_multiple); | ||
| 381 | |||
| 382 | /* | ||
| 383 | * Unicast list handling functions | ||
| 384 | */ | ||
| 385 | |||
| 386 | /** | ||
| 387 | * dev_uc_add - Add a secondary unicast address | ||
| 388 | * @dev: device | ||
| 389 | * @addr: address to add | ||
| 390 | * | ||
| 391 | * Add a secondary unicast address to the device or increase | ||
| 392 | * the reference count if it already exists. | ||
| 393 | */ | ||
| 394 | int dev_uc_add(struct net_device *dev, unsigned char *addr) | ||
| 395 | { | ||
| 396 | int err; | ||
| 397 | |||
| 398 | netif_addr_lock_bh(dev); | ||
| 399 | err = __hw_addr_add(&dev->uc, addr, dev->addr_len, | ||
| 400 | NETDEV_HW_ADDR_T_UNICAST); | ||
| 401 | if (!err) | ||
| 402 | __dev_set_rx_mode(dev); | ||
| 403 | netif_addr_unlock_bh(dev); | ||
| 404 | return err; | ||
| 405 | } | ||
| 406 | EXPORT_SYMBOL(dev_uc_add); | ||
| 407 | |||
| 408 | /** | ||
| 409 | * dev_uc_del - Release secondary unicast address. | ||
| 410 | * @dev: device | ||
| 411 | * @addr: address to delete | ||
| 412 | * | ||
| 413 | * Release reference to a secondary unicast address and remove it | ||
| 414 | * from the device if the reference count drops to zero. | ||
| 415 | */ | ||
| 416 | int dev_uc_del(struct net_device *dev, unsigned char *addr) | ||
| 417 | { | ||
| 418 | int err; | ||
| 419 | |||
| 420 | netif_addr_lock_bh(dev); | ||
| 421 | err = __hw_addr_del(&dev->uc, addr, dev->addr_len, | ||
| 422 | NETDEV_HW_ADDR_T_UNICAST); | ||
| 423 | if (!err) | ||
| 424 | __dev_set_rx_mode(dev); | ||
| 425 | netif_addr_unlock_bh(dev); | ||
| 426 | return err; | ||
| 427 | } | ||
| 428 | EXPORT_SYMBOL(dev_uc_del); | ||
| 429 | |||
| 430 | /** | ||
| 431 | * dev_uc_sync - Synchronize device's unicast list to another device | ||
| 432 | * @to: destination device | ||
| 433 | * @from: source device | ||
| 434 | * | ||
| 435 | * Add newly added addresses to the destination device and release | ||
| 436 | * addresses that have no users left. The source device must be | ||
| 437 | * locked by netif_tx_lock_bh. | ||
| 438 | * | ||
| 439 | * This function is intended to be called from the dev->set_rx_mode | ||
| 440 | * function of layered software devices. | ||
| 441 | */ | ||
| 442 | int dev_uc_sync(struct net_device *to, struct net_device *from) | ||
| 443 | { | ||
| 444 | int err = 0; | ||
| 445 | |||
| 446 | if (to->addr_len != from->addr_len) | ||
| 447 | return -EINVAL; | ||
| 448 | |||
| 449 | netif_addr_lock_bh(to); | ||
| 450 | err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); | ||
| 451 | if (!err) | ||
| 452 | __dev_set_rx_mode(to); | ||
| 453 | netif_addr_unlock_bh(to); | ||
| 454 | return err; | ||
| 455 | } | ||
| 456 | EXPORT_SYMBOL(dev_uc_sync); | ||
| 457 | |||
| 458 | /** | ||
| 459 | * dev_uc_unsync - Remove synchronized addresses from the destination device | ||
| 460 | * @to: destination device | ||
| 461 | * @from: source device | ||
| 462 | * | ||
| 463 | * Remove all addresses that were added to the destination device by | ||
| 464 | * dev_uc_sync(). This function is intended to be called from the | ||
| 465 | * dev->stop function of layered software devices. | ||
| 466 | */ | ||
| 467 | void dev_uc_unsync(struct net_device *to, struct net_device *from) | ||
| 468 | { | ||
| 469 | if (to->addr_len != from->addr_len) | ||
| 470 | return; | ||
| 471 | |||
| 472 | netif_addr_lock_bh(from); | ||
| 473 | netif_addr_lock(to); | ||
| 474 | __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); | ||
| 475 | __dev_set_rx_mode(to); | ||
| 476 | netif_addr_unlock(to); | ||
| 477 | netif_addr_unlock_bh(from); | ||
| 478 | } | ||
| 479 | EXPORT_SYMBOL(dev_uc_unsync); | ||
| 480 | |||
| 481 | /** | ||
| 482 | * dev_uc_flush - Flush unicast addresses | ||
| 483 | * @dev: device | ||
| 484 | * | ||
| 485 | * Flush unicast addresses. | ||
| 486 | */ | ||
| 487 | void dev_uc_flush(struct net_device *dev) | ||
| 488 | { | ||
| 489 | netif_addr_lock_bh(dev); | ||
| 490 | __hw_addr_flush(&dev->uc); | ||
| 491 | netif_addr_unlock_bh(dev); | ||
| 492 | } | ||
| 493 | EXPORT_SYMBOL(dev_uc_flush); | ||
| 494 | |||
| 495 | /** | ||
| 496 | * dev_uc_flush - Init unicast address list | ||
| 497 | * @dev: device | ||
| 498 | * | ||
| 499 | * Init unicast address list. | ||
| 500 | */ | ||
| 501 | void dev_uc_init(struct net_device *dev) | ||
| 502 | { | ||
| 503 | __hw_addr_init(&dev->uc); | ||
| 504 | } | ||
| 505 | EXPORT_SYMBOL(dev_uc_init); | ||
| 506 | |||
| 507 | /* | ||
| 508 | * Multicast list handling functions | ||
| 509 | */ | ||
| 510 | |||
| 511 | static int __dev_mc_add(struct net_device *dev, unsigned char *addr, | ||
| 512 | bool global) | ||
| 513 | { | ||
| 514 | int err; | ||
| 515 | |||
| 516 | netif_addr_lock_bh(dev); | ||
| 517 | err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, | ||
| 518 | NETDEV_HW_ADDR_T_MULTICAST, global); | ||
| 519 | if (!err) | ||
| 520 | __dev_set_rx_mode(dev); | ||
| 521 | netif_addr_unlock_bh(dev); | ||
| 522 | return err; | ||
| 523 | } | ||
| 524 | /** | ||
| 525 | * dev_mc_add - Add a multicast address | ||
| 526 | * @dev: device | ||
| 527 | * @addr: address to add | ||
| 528 | * | ||
| 529 | * Add a multicast address to the device or increase | ||
| 530 | * the reference count if it already exists. | ||
| 531 | */ | ||
| 532 | int dev_mc_add(struct net_device *dev, unsigned char *addr) | ||
| 533 | { | ||
| 534 | return __dev_mc_add(dev, addr, false); | ||
| 535 | } | ||
| 536 | EXPORT_SYMBOL(dev_mc_add); | ||
| 537 | |||
| 538 | /** | ||
| 539 | * dev_mc_add_global - Add a global multicast address | ||
| 540 | * @dev: device | ||
| 541 | * @addr: address to add | ||
| 542 | * | ||
| 543 | * Add a global multicast address to the device. | ||
| 544 | */ | ||
| 545 | int dev_mc_add_global(struct net_device *dev, unsigned char *addr) | ||
| 546 | { | ||
| 547 | return __dev_mc_add(dev, addr, true); | ||
| 548 | } | ||
| 549 | EXPORT_SYMBOL(dev_mc_add_global); | ||
| 550 | |||
| 551 | static int __dev_mc_del(struct net_device *dev, unsigned char *addr, | ||
| 552 | bool global) | ||
| 553 | { | ||
| 554 | int err; | ||
| 555 | |||
| 556 | netif_addr_lock_bh(dev); | ||
| 557 | err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, | ||
| 558 | NETDEV_HW_ADDR_T_MULTICAST, global); | ||
| 559 | if (!err) | ||
| 560 | __dev_set_rx_mode(dev); | ||
| 561 | netif_addr_unlock_bh(dev); | ||
| 562 | return err; | ||
| 563 | } | ||
| 564 | |||
| 565 | /** | ||
| 566 | * dev_mc_del - Delete a multicast address. | ||
| 567 | * @dev: device | ||
| 568 | * @addr: address to delete | ||
| 569 | * | ||
| 570 | * Release reference to a multicast address and remove it | ||
| 571 | * from the device if the reference count drops to zero. | ||
| 572 | */ | ||
| 573 | int dev_mc_del(struct net_device *dev, unsigned char *addr) | ||
| 574 | { | ||
| 575 | return __dev_mc_del(dev, addr, false); | ||
| 576 | } | ||
| 577 | EXPORT_SYMBOL(dev_mc_del); | ||
| 578 | |||
| 579 | /** | ||
| 580 | * dev_mc_del_global - Delete a global multicast address. | ||
| 581 | * @dev: device | ||
| 582 | * @addr: address to delete | ||
| 583 | * | ||
| 584 | * Release reference to a multicast address and remove it | ||
| 585 | * from the device if the reference count drops to zero. | ||
| 586 | */ | ||
| 587 | int dev_mc_del_global(struct net_device *dev, unsigned char *addr) | ||
| 588 | { | ||
| 589 | return __dev_mc_del(dev, addr, true); | ||
| 590 | } | ||
| 591 | EXPORT_SYMBOL(dev_mc_del_global); | ||
| 592 | |||
| 593 | /** | ||
| 594 | * dev_mc_sync - Synchronize device's unicast list to another device | ||
| 595 | * @to: destination device | ||
| 596 | * @from: source device | ||
| 597 | * | ||
| 598 | * Add newly added addresses to the destination device and release | ||
| 599 | * addresses that have no users left. The source device must be | ||
| 600 | * locked by netif_tx_lock_bh. | ||
| 601 | * | ||
| 602 | * This function is intended to be called from the dev->set_multicast_list | ||
| 603 | * or dev->set_rx_mode function of layered software devices. | ||
| 604 | */ | ||
| 605 | int dev_mc_sync(struct net_device *to, struct net_device *from) | ||
| 606 | { | ||
| 607 | int err = 0; | ||
| 608 | |||
| 609 | if (to->addr_len != from->addr_len) | ||
| 610 | return -EINVAL; | ||
| 611 | |||
| 612 | netif_addr_lock_bh(to); | ||
| 613 | err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); | ||
| 614 | if (!err) | ||
| 615 | __dev_set_rx_mode(to); | ||
| 616 | netif_addr_unlock_bh(to); | ||
| 617 | return err; | ||
| 618 | } | ||
| 619 | EXPORT_SYMBOL(dev_mc_sync); | ||
| 620 | |||
| 621 | /** | ||
| 622 | * dev_mc_unsync - Remove synchronized addresses from the destination device | ||
| 623 | * @to: destination device | ||
| 624 | * @from: source device | ||
| 625 | * | ||
| 626 | * Remove all addresses that were added to the destination device by | ||
| 627 | * dev_mc_sync(). This function is intended to be called from the | ||
| 628 | * dev->stop function of layered software devices. | ||
| 629 | */ | ||
| 630 | void dev_mc_unsync(struct net_device *to, struct net_device *from) | ||
| 631 | { | ||
| 632 | if (to->addr_len != from->addr_len) | ||
| 633 | return; | ||
| 634 | |||
| 635 | netif_addr_lock_bh(from); | ||
| 636 | netif_addr_lock(to); | ||
| 637 | __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); | ||
| 638 | __dev_set_rx_mode(to); | ||
| 639 | netif_addr_unlock(to); | ||
| 640 | netif_addr_unlock_bh(from); | ||
| 641 | } | ||
| 642 | EXPORT_SYMBOL(dev_mc_unsync); | ||
| 643 | |||
| 644 | /** | ||
| 645 | * dev_mc_flush - Flush multicast addresses | ||
| 646 | * @dev: device | ||
| 647 | * | ||
| 648 | * Flush multicast addresses. | ||
| 649 | */ | ||
| 650 | void dev_mc_flush(struct net_device *dev) | ||
| 651 | { | ||
| 652 | netif_addr_lock_bh(dev); | ||
| 653 | __hw_addr_flush(&dev->mc); | ||
| 654 | netif_addr_unlock_bh(dev); | ||
| 655 | } | ||
| 656 | EXPORT_SYMBOL(dev_mc_flush); | ||
| 657 | |||
| 658 | /** | ||
| 659 | * dev_mc_flush - Init multicast address list | ||
| 660 | * @dev: device | ||
| 661 | * | ||
| 662 | * Init multicast address list. | ||
| 663 | */ | ||
| 664 | void dev_mc_init(struct net_device *dev) | ||
| 665 | { | ||
| 666 | __hw_addr_init(&dev->mc); | ||
| 667 | } | ||
| 668 | EXPORT_SYMBOL(dev_mc_init); | ||
| 669 | |||
| 670 | #ifdef CONFIG_PROC_FS | ||
| 671 | #include <linux/seq_file.h> | ||
| 672 | |||
| 673 | static int dev_mc_seq_show(struct seq_file *seq, void *v) | ||
| 674 | { | ||
| 675 | struct netdev_hw_addr *ha; | ||
| 676 | struct net_device *dev = v; | ||
| 677 | |||
| 678 | if (v == SEQ_START_TOKEN) | ||
| 679 | return 0; | ||
| 680 | |||
| 681 | netif_addr_lock_bh(dev); | ||
| 682 | netdev_for_each_mc_addr(ha, dev) { | ||
| 683 | int i; | ||
| 684 | |||
| 685 | seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, | ||
| 686 | dev->name, ha->refcount, ha->global_use); | ||
| 687 | |||
| 688 | for (i = 0; i < dev->addr_len; i++) | ||
| 689 | seq_printf(seq, "%02x", ha->addr[i]); | ||
| 690 | |||
| 691 | seq_putc(seq, '\n'); | ||
| 692 | } | ||
| 693 | netif_addr_unlock_bh(dev); | ||
| 694 | return 0; | ||
| 695 | } | ||
| 696 | |||
| 697 | static const struct seq_operations dev_mc_seq_ops = { | ||
| 698 | .start = dev_seq_start, | ||
| 699 | .next = dev_seq_next, | ||
| 700 | .stop = dev_seq_stop, | ||
| 701 | .show = dev_mc_seq_show, | ||
| 702 | }; | ||
| 703 | |||
| 704 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | ||
| 705 | { | ||
| 706 | return seq_open_net(inode, file, &dev_mc_seq_ops, | ||
| 707 | sizeof(struct seq_net_private)); | ||
| 708 | } | ||
| 709 | |||
| 710 | static const struct file_operations dev_mc_seq_fops = { | ||
| 711 | .owner = THIS_MODULE, | ||
| 712 | .open = dev_mc_seq_open, | ||
| 713 | .read = seq_read, | ||
| 714 | .llseek = seq_lseek, | ||
| 715 | .release = seq_release_net, | ||
| 716 | }; | ||
| 717 | |||
| 718 | #endif | ||
| 719 | |||
| 720 | static int __net_init dev_mc_net_init(struct net *net) | ||
| 721 | { | ||
| 722 | if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) | ||
| 723 | return -ENOMEM; | ||
| 724 | return 0; | ||
| 725 | } | ||
| 726 | |||
| 727 | static void __net_exit dev_mc_net_exit(struct net *net) | ||
| 728 | { | ||
| 729 | proc_net_remove(net, "dev_mcast"); | ||
| 730 | } | ||
| 731 | |||
| 732 | static struct pernet_operations __net_initdata dev_mc_net_ops = { | ||
| 733 | .init = dev_mc_net_init, | ||
| 734 | .exit = dev_mc_net_exit, | ||
| 735 | }; | ||
| 736 | |||
| 737 | void __init dev_mcast_init(void) | ||
| 738 | { | ||
| 739 | register_pernet_subsys(&dev_mc_net_ops); | ||
| 740 | } | ||
| 741 | |||
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c deleted file mode 100644 index 3dc295beb483..000000000000 --- a/net/core/dev_mcast.c +++ /dev/null | |||
| @@ -1,232 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Linux NET3: Multicast List maintenance. | ||
| 3 | * | ||
| 4 | * Authors: | ||
| 5 | * Tim Kordas <tjk@nostromo.eeap.cwru.edu> | ||
| 6 | * Richard Underwood <richard@wuzz.demon.co.uk> | ||
| 7 | * | ||
| 8 | * Stir fried together from the IP multicast and CAP patches above | ||
| 9 | * Alan Cox <alan@lxorguk.ukuu.org.uk> | ||
| 10 | * | ||
| 11 | * Fixes: | ||
| 12 | * Alan Cox : Update the device on a real delete | ||
| 13 | * rather than any time but... | ||
| 14 | * Alan Cox : IFF_ALLMULTI support. | ||
| 15 | * Alan Cox : New format set_multicast_list() calls. | ||
| 16 | * Gleb Natapov : Remove dev_mc_lock. | ||
| 17 | * | ||
| 18 | * This program is free software; you can redistribute it and/or | ||
| 19 | * modify it under the terms of the GNU General Public License | ||
| 20 | * as published by the Free Software Foundation; either version | ||
| 21 | * 2 of the License, or (at your option) any later version. | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <linux/module.h> | ||
| 25 | #include <asm/uaccess.h> | ||
| 26 | #include <asm/system.h> | ||
| 27 | #include <linux/bitops.h> | ||
| 28 | #include <linux/types.h> | ||
| 29 | #include <linux/kernel.h> | ||
| 30 | #include <linux/string.h> | ||
| 31 | #include <linux/mm.h> | ||
| 32 | #include <linux/socket.h> | ||
| 33 | #include <linux/sockios.h> | ||
| 34 | #include <linux/in.h> | ||
| 35 | #include <linux/errno.h> | ||
| 36 | #include <linux/interrupt.h> | ||
| 37 | #include <linux/if_ether.h> | ||
| 38 | #include <linux/inet.h> | ||
| 39 | #include <linux/netdevice.h> | ||
| 40 | #include <linux/etherdevice.h> | ||
| 41 | #include <linux/proc_fs.h> | ||
| 42 | #include <linux/seq_file.h> | ||
| 43 | #include <linux/init.h> | ||
| 44 | #include <net/net_namespace.h> | ||
| 45 | #include <net/ip.h> | ||
| 46 | #include <net/route.h> | ||
| 47 | #include <linux/skbuff.h> | ||
| 48 | #include <net/sock.h> | ||
| 49 | #include <net/arp.h> | ||
| 50 | |||
| 51 | |||
| 52 | /* | ||
| 53 | * Device multicast list maintenance. | ||
| 54 | * | ||
| 55 | * This is used both by IP and by the user level maintenance functions. | ||
| 56 | * Unlike BSD we maintain a usage count on a given multicast address so | ||
| 57 | * that a casual user application can add/delete multicasts used by | ||
| 58 | * protocols without doing damage to the protocols when it deletes the | ||
| 59 | * entries. It also helps IP as it tracks overlapping maps. | ||
| 60 | * | ||
| 61 | * Device mc lists are changed by bh at least if IPv6 is enabled, | ||
| 62 | * so that it must be bh protected. | ||
| 63 | * | ||
| 64 | * We block accesses to device mc filters with netif_tx_lock. | ||
| 65 | */ | ||
| 66 | |||
| 67 | /* | ||
| 68 | * Delete a device level multicast | ||
| 69 | */ | ||
| 70 | |||
| 71 | int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) | ||
| 72 | { | ||
| 73 | int err; | ||
| 74 | |||
| 75 | netif_addr_lock_bh(dev); | ||
| 76 | err = __dev_addr_delete(&dev->mc_list, &dev->mc_count, | ||
| 77 | addr, alen, glbl); | ||
| 78 | if (!err) { | ||
| 79 | /* | ||
| 80 | * We have altered the list, so the card | ||
| 81 | * loaded filter is now wrong. Fix it | ||
| 82 | */ | ||
| 83 | |||
| 84 | __dev_set_rx_mode(dev); | ||
| 85 | } | ||
| 86 | netif_addr_unlock_bh(dev); | ||
| 87 | return err; | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | ||
| 91 | * Add a device level multicast | ||
| 92 | */ | ||
| 93 | |||
| 94 | int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) | ||
| 95 | { | ||
| 96 | int err; | ||
| 97 | |||
| 98 | netif_addr_lock_bh(dev); | ||
| 99 | if (alen != dev->addr_len) | ||
| 100 | err = -EINVAL; | ||
| 101 | else | ||
| 102 | err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl); | ||
| 103 | if (!err) | ||
| 104 | __dev_set_rx_mode(dev); | ||
| 105 | netif_addr_unlock_bh(dev); | ||
| 106 | return err; | ||
| 107 | } | ||
| 108 | |||
| 109 | /** | ||
| 110 | * dev_mc_sync - Synchronize device's multicast list to another device | ||
| 111 | * @to: destination device | ||
| 112 | * @from: source device | ||
| 113 | * | ||
| 114 | * Add newly added addresses to the destination device and release | ||
| 115 | * addresses that have no users left. The source device must be | ||
| 116 | * locked by netif_tx_lock_bh. | ||
| 117 | * | ||
| 118 | * This function is intended to be called from the dev->set_multicast_list | ||
| 119 | * or dev->set_rx_mode function of layered software devices. | ||
| 120 | */ | ||
| 121 | int dev_mc_sync(struct net_device *to, struct net_device *from) | ||
| 122 | { | ||
| 123 | int err = 0; | ||
| 124 | |||
| 125 | netif_addr_lock_bh(to); | ||
| 126 | err = __dev_addr_sync(&to->mc_list, &to->mc_count, | ||
| 127 | &from->mc_list, &from->mc_count); | ||
| 128 | if (!err) | ||
| 129 | __dev_set_rx_mode(to); | ||
| 130 | netif_addr_unlock_bh(to); | ||
| 131 | |||
| 132 | return err; | ||
| 133 | } | ||
| 134 | EXPORT_SYMBOL(dev_mc_sync); | ||
| 135 | |||
| 136 | |||
| 137 | /** | ||
| 138 | * dev_mc_unsync - Remove synchronized addresses from the destination | ||
| 139 | * device | ||
| 140 | * @to: destination device | ||
| 141 | * @from: source device | ||
| 142 | * | ||
| 143 | * Remove all addresses that were added to the destination device by | ||
| 144 | * dev_mc_sync(). This function is intended to be called from the | ||
| 145 | * dev->stop function of layered software devices. | ||
| 146 | */ | ||
| 147 | void dev_mc_unsync(struct net_device *to, struct net_device *from) | ||
| 148 | { | ||
| 149 | netif_addr_lock_bh(from); | ||
| 150 | netif_addr_lock(to); | ||
| 151 | |||
| 152 | __dev_addr_unsync(&to->mc_list, &to->mc_count, | ||
| 153 | &from->mc_list, &from->mc_count); | ||
| 154 | __dev_set_rx_mode(to); | ||
| 155 | |||
| 156 | netif_addr_unlock(to); | ||
| 157 | netif_addr_unlock_bh(from); | ||
| 158 | } | ||
| 159 | EXPORT_SYMBOL(dev_mc_unsync); | ||
| 160 | |||
| 161 | #ifdef CONFIG_PROC_FS | ||
| 162 | static int dev_mc_seq_show(struct seq_file *seq, void *v) | ||
| 163 | { | ||
| 164 | struct dev_addr_list *m; | ||
| 165 | struct net_device *dev = v; | ||
| 166 | |||
| 167 | if (v == SEQ_START_TOKEN) | ||
| 168 | return 0; | ||
| 169 | |||
| 170 | netif_addr_lock_bh(dev); | ||
| 171 | for (m = dev->mc_list; m; m = m->next) { | ||
| 172 | int i; | ||
| 173 | |||
| 174 | seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex, | ||
| 175 | dev->name, m->dmi_users, m->dmi_gusers); | ||
| 176 | |||
| 177 | for (i = 0; i < m->dmi_addrlen; i++) | ||
| 178 | seq_printf(seq, "%02x", m->dmi_addr[i]); | ||
| 179 | |||
| 180 | seq_putc(seq, '\n'); | ||
| 181 | } | ||
| 182 | netif_addr_unlock_bh(dev); | ||
| 183 | return 0; | ||
| 184 | } | ||
| 185 | |||
| 186 | static const struct seq_operations dev_mc_seq_ops = { | ||
| 187 | .start = dev_seq_start, | ||
| 188 | .next = dev_seq_next, | ||
| 189 | .stop = dev_seq_stop, | ||
| 190 | .show = dev_mc_seq_show, | ||
| 191 | }; | ||
| 192 | |||
| 193 | static int dev_mc_seq_open(struct inode *inode, struct file *file) | ||
| 194 | { | ||
| 195 | return seq_open_net(inode, file, &dev_mc_seq_ops, | ||
| 196 | sizeof(struct seq_net_private)); | ||
| 197 | } | ||
| 198 | |||
| 199 | static const struct file_operations dev_mc_seq_fops = { | ||
| 200 | .owner = THIS_MODULE, | ||
| 201 | .open = dev_mc_seq_open, | ||
| 202 | .read = seq_read, | ||
| 203 | .llseek = seq_lseek, | ||
| 204 | .release = seq_release_net, | ||
| 205 | }; | ||
| 206 | |||
| 207 | #endif | ||
| 208 | |||
| 209 | static int __net_init dev_mc_net_init(struct net *net) | ||
| 210 | { | ||
| 211 | if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) | ||
| 212 | return -ENOMEM; | ||
| 213 | return 0; | ||
| 214 | } | ||
| 215 | |||
| 216 | static void __net_exit dev_mc_net_exit(struct net *net) | ||
| 217 | { | ||
| 218 | proc_net_remove(net, "dev_mcast"); | ||
| 219 | } | ||
| 220 | |||
| 221 | static struct pernet_operations __net_initdata dev_mc_net_ops = { | ||
| 222 | .init = dev_mc_net_init, | ||
| 223 | .exit = dev_mc_net_exit, | ||
| 224 | }; | ||
| 225 | |||
| 226 | void __init dev_mcast_init(void) | ||
| 227 | { | ||
| 228 | register_pernet_subsys(&dev_mc_net_ops); | ||
| 229 | } | ||
| 230 | |||
| 231 | EXPORT_SYMBOL(dev_mc_add); | ||
| 232 | EXPORT_SYMBOL(dev_mc_delete); | ||
diff --git a/net/core/dst.c b/net/core/dst.c index f307bc18f6a0..9920722cc82b 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
| @@ -44,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0); | |||
| 44 | */ | 44 | */ |
| 45 | static struct { | 45 | static struct { |
| 46 | spinlock_t lock; | 46 | spinlock_t lock; |
| 47 | struct dst_entry *list; | 47 | struct dst_entry *list; |
| 48 | unsigned long timer_inc; | 48 | unsigned long timer_inc; |
| 49 | unsigned long timer_expires; | 49 | unsigned long timer_expires; |
| 50 | } dst_garbage = { | 50 | } dst_garbage = { |
| @@ -52,7 +52,7 @@ static struct { | |||
| 52 | .timer_inc = DST_GC_MAX, | 52 | .timer_inc = DST_GC_MAX, |
| 53 | }; | 53 | }; |
| 54 | static void dst_gc_task(struct work_struct *work); | 54 | static void dst_gc_task(struct work_struct *work); |
| 55 | static void ___dst_free(struct dst_entry * dst); | 55 | static void ___dst_free(struct dst_entry *dst); |
| 56 | 56 | ||
| 57 | static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); | 57 | static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); |
| 58 | 58 | ||
| @@ -136,8 +136,8 @@ loop: | |||
| 136 | } | 136 | } |
| 137 | expires = dst_garbage.timer_expires; | 137 | expires = dst_garbage.timer_expires; |
| 138 | /* | 138 | /* |
| 139 | * if the next desired timer is more than 4 seconds in the future | 139 | * if the next desired timer is more than 4 seconds in the |
| 140 | * then round the timer to whole seconds | 140 | * future then round the timer to whole seconds |
| 141 | */ | 141 | */ |
| 142 | if (expires > 4*HZ) | 142 | if (expires > 4*HZ) |
| 143 | expires = round_jiffies_relative(expires); | 143 | expires = round_jiffies_relative(expires); |
| @@ -152,7 +152,8 @@ loop: | |||
| 152 | " expires: %lu elapsed: %lu us\n", | 152 | " expires: %lu elapsed: %lu us\n", |
| 153 | atomic_read(&dst_total), delayed, work_performed, | 153 | atomic_read(&dst_total), delayed, work_performed, |
| 154 | expires, | 154 | expires, |
| 155 | elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); | 155 | elapsed.tv_sec * USEC_PER_SEC + |
| 156 | elapsed.tv_nsec / NSEC_PER_USEC); | ||
| 156 | #endif | 157 | #endif |
| 157 | } | 158 | } |
| 158 | 159 | ||
| @@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb) | |||
| 163 | } | 164 | } |
| 164 | EXPORT_SYMBOL(dst_discard); | 165 | EXPORT_SYMBOL(dst_discard); |
| 165 | 166 | ||
| 166 | void * dst_alloc(struct dst_ops * ops) | 167 | void *dst_alloc(struct dst_ops *ops) |
| 167 | { | 168 | { |
| 168 | struct dst_entry * dst; | 169 | struct dst_entry *dst; |
| 169 | 170 | ||
| 170 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { | 171 | if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { |
| 171 | if (ops->gc(ops)) | 172 | if (ops->gc(ops)) |
| @@ -185,19 +186,20 @@ void * dst_alloc(struct dst_ops * ops) | |||
| 185 | atomic_inc(&ops->entries); | 186 | atomic_inc(&ops->entries); |
| 186 | return dst; | 187 | return dst; |
| 187 | } | 188 | } |
| 189 | EXPORT_SYMBOL(dst_alloc); | ||
| 188 | 190 | ||
| 189 | static void ___dst_free(struct dst_entry * dst) | 191 | static void ___dst_free(struct dst_entry *dst) |
| 190 | { | 192 | { |
| 191 | /* The first case (dev==NULL) is required, when | 193 | /* The first case (dev==NULL) is required, when |
| 192 | protocol module is unloaded. | 194 | protocol module is unloaded. |
| 193 | */ | 195 | */ |
| 194 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { | 196 | if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) |
| 195 | dst->input = dst->output = dst_discard; | 197 | dst->input = dst->output = dst_discard; |
| 196 | } | ||
| 197 | dst->obsolete = 2; | 198 | dst->obsolete = 2; |
| 198 | } | 199 | } |
| 200 | EXPORT_SYMBOL(__dst_free); | ||
| 199 | 201 | ||
| 200 | void __dst_free(struct dst_entry * dst) | 202 | void __dst_free(struct dst_entry *dst) |
| 201 | { | 203 | { |
| 202 | spin_lock_bh(&dst_garbage.lock); | 204 | spin_lock_bh(&dst_garbage.lock); |
| 203 | ___dst_free(dst); | 205 | ___dst_free(dst); |
| @@ -262,15 +264,16 @@ again: | |||
| 262 | } | 264 | } |
| 263 | return NULL; | 265 | return NULL; |
| 264 | } | 266 | } |
| 267 | EXPORT_SYMBOL(dst_destroy); | ||
| 265 | 268 | ||
| 266 | void dst_release(struct dst_entry *dst) | 269 | void dst_release(struct dst_entry *dst) |
| 267 | { | 270 | { |
| 268 | if (dst) { | 271 | if (dst) { |
| 269 | int newrefcnt; | 272 | int newrefcnt; |
| 270 | 273 | ||
| 271 | smp_mb__before_atomic_dec(); | 274 | smp_mb__before_atomic_dec(); |
| 272 | newrefcnt = atomic_dec_return(&dst->__refcnt); | 275 | newrefcnt = atomic_dec_return(&dst->__refcnt); |
| 273 | WARN_ON(newrefcnt < 0); | 276 | WARN_ON(newrefcnt < 0); |
| 274 | } | 277 | } |
| 275 | } | 278 | } |
| 276 | EXPORT_SYMBOL(dst_release); | 279 | EXPORT_SYMBOL(dst_release); |
| @@ -283,8 +286,8 @@ EXPORT_SYMBOL(dst_release); | |||
| 283 | * | 286 | * |
| 284 | * Commented and originally written by Alexey. | 287 | * Commented and originally written by Alexey. |
| 285 | */ | 288 | */ |
| 286 | static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 289 | static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
| 287 | int unregister) | 290 | int unregister) |
| 288 | { | 291 | { |
| 289 | if (dst->ops->ifdown) | 292 | if (dst->ops->ifdown) |
| 290 | dst->ops->ifdown(dst, dev, unregister); | 293 | dst->ops->ifdown(dst, dev, unregister); |
| @@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
| 306 | } | 309 | } |
| 307 | } | 310 | } |
| 308 | 311 | ||
| 309 | static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) | 312 | static int dst_dev_event(struct notifier_block *this, unsigned long event, |
| 313 | void *ptr) | ||
| 310 | { | 314 | { |
| 311 | struct net_device *dev = ptr; | 315 | struct net_device *dev = ptr; |
| 312 | struct dst_entry *dst, *last = NULL; | 316 | struct dst_entry *dst, *last = NULL; |
| @@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void | |||
| 329 | last->next = dst; | 333 | last->next = dst; |
| 330 | else | 334 | else |
| 331 | dst_busy_list = dst; | 335 | dst_busy_list = dst; |
| 332 | for (; dst; dst = dst->next) { | 336 | for (; dst; dst = dst->next) |
| 333 | dst_ifdown(dst, dev, event != NETDEV_DOWN); | 337 | dst_ifdown(dst, dev, event != NETDEV_DOWN); |
| 334 | } | ||
| 335 | mutex_unlock(&dst_gc_mutex); | 338 | mutex_unlock(&dst_gc_mutex); |
| 336 | break; | 339 | break; |
| 337 | } | 340 | } |
| @@ -346,7 +349,3 @@ void __init dst_init(void) | |||
| 346 | { | 349 | { |
| 347 | register_netdevice_notifier(&dst_dev_notifier); | 350 | register_netdevice_notifier(&dst_dev_notifier); |
| 348 | } | 351 | } |
| 349 | |||
| 350 | EXPORT_SYMBOL(__dst_free); | ||
| 351 | EXPORT_SYMBOL(dst_alloc); | ||
| 352 | EXPORT_SYMBOL(dst_destroy); | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 9d55c57f318a..a0f4964033d2 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include <linux/ethtool.h> | 18 | #include <linux/ethtool.h> |
| 19 | #include <linux/netdevice.h> | 19 | #include <linux/netdevice.h> |
| 20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
| 21 | #include <linux/uaccess.h> | ||
| 21 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
| 22 | #include <asm/uaccess.h> | ||
| 23 | 23 | ||
| 24 | /* | 24 | /* |
| 25 | * Some useful ethtool_ops methods that're device independent. | 25 | * Some useful ethtool_ops methods that're device independent. |
| @@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev) | |||
| 31 | { | 31 | { |
| 32 | return netif_carrier_ok(dev) ? 1 : 0; | 32 | return netif_carrier_ok(dev) ? 1 : 0; |
| 33 | } | 33 | } |
| 34 | EXPORT_SYMBOL(ethtool_op_get_link); | ||
| 34 | 35 | ||
| 35 | u32 ethtool_op_get_rx_csum(struct net_device *dev) | 36 | u32 ethtool_op_get_rx_csum(struct net_device *dev) |
| 36 | { | 37 | { |
| @@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) | |||
| 63 | 64 | ||
| 64 | return 0; | 65 | return 0; |
| 65 | } | 66 | } |
| 67 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
| 66 | 68 | ||
| 67 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | 69 | int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) |
| 68 | { | 70 | { |
| @@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) | |||
| 73 | 75 | ||
| 74 | return 0; | 76 | return 0; |
| 75 | } | 77 | } |
| 78 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
| 76 | 79 | ||
| 77 | u32 ethtool_op_get_sg(struct net_device *dev) | 80 | u32 ethtool_op_get_sg(struct net_device *dev) |
| 78 | { | 81 | { |
| 79 | return (dev->features & NETIF_F_SG) != 0; | 82 | return (dev->features & NETIF_F_SG) != 0; |
| 80 | } | 83 | } |
| 84 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
| 81 | 85 | ||
| 82 | int ethtool_op_set_sg(struct net_device *dev, u32 data) | 86 | int ethtool_op_set_sg(struct net_device *dev, u32 data) |
| 83 | { | 87 | { |
| @@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data) | |||
| 88 | 92 | ||
| 89 | return 0; | 93 | return 0; |
| 90 | } | 94 | } |
| 95 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
| 91 | 96 | ||
| 92 | u32 ethtool_op_get_tso(struct net_device *dev) | 97 | u32 ethtool_op_get_tso(struct net_device *dev) |
| 93 | { | 98 | { |
| 94 | return (dev->features & NETIF_F_TSO) != 0; | 99 | return (dev->features & NETIF_F_TSO) != 0; |
| 95 | } | 100 | } |
| 101 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
| 96 | 102 | ||
| 97 | int ethtool_op_set_tso(struct net_device *dev, u32 data) | 103 | int ethtool_op_set_tso(struct net_device *dev, u32 data) |
| 98 | { | 104 | { |
| @@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) | |||
| 103 | 109 | ||
| 104 | return 0; | 110 | return 0; |
| 105 | } | 111 | } |
| 112 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
| 106 | 113 | ||
| 107 | u32 ethtool_op_get_ufo(struct net_device *dev) | 114 | u32 ethtool_op_get_ufo(struct net_device *dev) |
| 108 | { | 115 | { |
| 109 | return (dev->features & NETIF_F_UFO) != 0; | 116 | return (dev->features & NETIF_F_UFO) != 0; |
| 110 | } | 117 | } |
| 118 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
| 111 | 119 | ||
| 112 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) | 120 | int ethtool_op_set_ufo(struct net_device *dev, u32 data) |
| 113 | { | 121 | { |
| @@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data) | |||
| 117 | dev->features &= ~NETIF_F_UFO; | 125 | dev->features &= ~NETIF_F_UFO; |
| 118 | return 0; | 126 | return 0; |
| 119 | } | 127 | } |
| 128 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
| 120 | 129 | ||
| 121 | /* the following list of flags are the same as their associated | 130 | /* the following list of flags are the same as their associated |
| 122 | * NETIF_F_xxx values in include/linux/netdevice.h | 131 | * NETIF_F_xxx values in include/linux/netdevice.h |
| 123 | */ | 132 | */ |
| 124 | static const u32 flags_dup_features = | 133 | static const u32 flags_dup_features = |
| 125 | (ETH_FLAG_LRO | ETH_FLAG_NTUPLE); | 134 | (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); |
| 126 | 135 | ||
| 127 | u32 ethtool_op_get_flags(struct net_device *dev) | 136 | u32 ethtool_op_get_flags(struct net_device *dev) |
| 128 | { | 137 | { |
| @@ -133,6 +142,7 @@ u32 ethtool_op_get_flags(struct net_device *dev) | |||
| 133 | 142 | ||
| 134 | return dev->features & flags_dup_features; | 143 | return dev->features & flags_dup_features; |
| 135 | } | 144 | } |
| 145 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
| 136 | 146 | ||
| 137 | int ethtool_op_set_flags(struct net_device *dev, u32 data) | 147 | int ethtool_op_set_flags(struct net_device *dev, u32 data) |
| 138 | { | 148 | { |
| @@ -153,9 +163,15 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data) | |||
| 153 | features &= ~NETIF_F_NTUPLE; | 163 | features &= ~NETIF_F_NTUPLE; |
| 154 | } | 164 | } |
| 155 | 165 | ||
| 166 | if (data & ETH_FLAG_RXHASH) | ||
| 167 | features |= NETIF_F_RXHASH; | ||
| 168 | else | ||
| 169 | features &= ~NETIF_F_RXHASH; | ||
| 170 | |||
| 156 | dev->features = features; | 171 | dev->features = features; |
| 157 | return 0; | 172 | return 0; |
| 158 | } | 173 | } |
| 174 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
| 159 | 175 | ||
| 160 | void ethtool_ntuple_flush(struct net_device *dev) | 176 | void ethtool_ntuple_flush(struct net_device *dev) |
| 161 | { | 177 | { |
| @@ -201,7 +217,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) | |||
| 201 | return dev->ethtool_ops->set_settings(dev, &cmd); | 217 | return dev->ethtool_ops->set_settings(dev, &cmd); |
| 202 | } | 218 | } |
| 203 | 219 | ||
| 204 | static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) | 220 | static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, |
| 221 | void __user *useraddr) | ||
| 205 | { | 222 | { |
| 206 | struct ethtool_drvinfo info; | 223 | struct ethtool_drvinfo info; |
| 207 | const struct ethtool_ops *ops = dev->ethtool_ops; | 224 | const struct ethtool_ops *ops = dev->ethtool_ops; |
| @@ -241,7 +258,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _ | |||
| 241 | } | 258 | } |
| 242 | 259 | ||
| 243 | static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, | 260 | static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, |
| 244 | void __user *useraddr) | 261 | void __user *useraddr) |
| 245 | { | 262 | { |
| 246 | struct ethtool_sset_info info; | 263 | struct ethtool_sset_info info; |
| 247 | const struct ethtool_ops *ops = dev->ethtool_ops; | 264 | const struct ethtool_ops *ops = dev->ethtool_ops; |
| @@ -300,7 +317,8 @@ out: | |||
| 300 | return ret; | 317 | return ret; |
| 301 | } | 318 | } |
| 302 | 319 | ||
| 303 | static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) | 320 | static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, |
| 321 | void __user *useraddr) | ||
| 304 | { | 322 | { |
| 305 | struct ethtool_rxnfc cmd; | 323 | struct ethtool_rxnfc cmd; |
| 306 | 324 | ||
| @@ -313,7 +331,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u | |||
| 313 | return dev->ethtool_ops->set_rxnfc(dev, &cmd); | 331 | return dev->ethtool_ops->set_rxnfc(dev, &cmd); |
| 314 | } | 332 | } |
| 315 | 333 | ||
| 316 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) | 334 | static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, |
| 335 | void __user *useraddr) | ||
| 317 | { | 336 | { |
| 318 | struct ethtool_rxnfc info; | 337 | struct ethtool_rxnfc info; |
| 319 | const struct ethtool_ops *ops = dev->ethtool_ops; | 338 | const struct ethtool_ops *ops = dev->ethtool_ops; |
| @@ -358,8 +377,8 @@ err_out: | |||
| 358 | } | 377 | } |
| 359 | 378 | ||
| 360 | static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, | 379 | static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, |
| 361 | struct ethtool_rx_ntuple_flow_spec *spec, | 380 | struct ethtool_rx_ntuple_flow_spec *spec, |
| 362 | struct ethtool_rx_ntuple_flow_spec_container *fsc) | 381 | struct ethtool_rx_ntuple_flow_spec_container *fsc) |
| 363 | { | 382 | { |
| 364 | 383 | ||
| 365 | /* don't add filters forever */ | 384 | /* don't add filters forever */ |
| @@ -385,7 +404,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, | |||
| 385 | list->count++; | 404 | list->count++; |
| 386 | } | 405 | } |
| 387 | 406 | ||
| 388 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) | 407 | static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, |
| 408 | void __user *useraddr) | ||
| 389 | { | 409 | { |
| 390 | struct ethtool_rx_ntuple cmd; | 410 | struct ethtool_rx_ntuple cmd; |
| 391 | const struct ethtool_ops *ops = dev->ethtool_ops; | 411 | const struct ethtool_ops *ops = dev->ethtool_ops; |
| @@ -502,7 +522,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
| 502 | p += ETH_GSTRING_LEN; | 522 | p += ETH_GSTRING_LEN; |
| 503 | num_strings++; | 523 | num_strings++; |
| 504 | goto unknown_filter; | 524 | goto unknown_filter; |
| 505 | }; | 525 | } |
| 506 | 526 | ||
| 507 | /* now the rest of the filters */ | 527 | /* now the rest of the filters */ |
| 508 | switch (fsc->fs.flow_type) { | 528 | switch (fsc->fs.flow_type) { |
| @@ -510,125 +530,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
| 510 | case UDP_V4_FLOW: | 530 | case UDP_V4_FLOW: |
| 511 | case SCTP_V4_FLOW: | 531 | case SCTP_V4_FLOW: |
| 512 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 532 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
| 513 | fsc->fs.h_u.tcp_ip4_spec.ip4src); | 533 | fsc->fs.h_u.tcp_ip4_spec.ip4src); |
| 514 | p += ETH_GSTRING_LEN; | 534 | p += ETH_GSTRING_LEN; |
| 515 | num_strings++; | 535 | num_strings++; |
| 516 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 536 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
| 517 | fsc->fs.m_u.tcp_ip4_spec.ip4src); | 537 | fsc->fs.m_u.tcp_ip4_spec.ip4src); |
| 518 | p += ETH_GSTRING_LEN; | 538 | p += ETH_GSTRING_LEN; |
| 519 | num_strings++; | 539 | num_strings++; |
| 520 | sprintf(p, "\tDest IP addr: 0x%x\n", | 540 | sprintf(p, "\tDest IP addr: 0x%x\n", |
| 521 | fsc->fs.h_u.tcp_ip4_spec.ip4dst); | 541 | fsc->fs.h_u.tcp_ip4_spec.ip4dst); |
| 522 | p += ETH_GSTRING_LEN; | 542 | p += ETH_GSTRING_LEN; |
| 523 | num_strings++; | 543 | num_strings++; |
| 524 | sprintf(p, "\tDest IP mask: 0x%x\n", | 544 | sprintf(p, "\tDest IP mask: 0x%x\n", |
| 525 | fsc->fs.m_u.tcp_ip4_spec.ip4dst); | 545 | fsc->fs.m_u.tcp_ip4_spec.ip4dst); |
| 526 | p += ETH_GSTRING_LEN; | 546 | p += ETH_GSTRING_LEN; |
| 527 | num_strings++; | 547 | num_strings++; |
| 528 | sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", | 548 | sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", |
| 529 | fsc->fs.h_u.tcp_ip4_spec.psrc, | 549 | fsc->fs.h_u.tcp_ip4_spec.psrc, |
| 530 | fsc->fs.m_u.tcp_ip4_spec.psrc); | 550 | fsc->fs.m_u.tcp_ip4_spec.psrc); |
| 531 | p += ETH_GSTRING_LEN; | 551 | p += ETH_GSTRING_LEN; |
| 532 | num_strings++; | 552 | num_strings++; |
| 533 | sprintf(p, "\tDest Port: %d, mask: 0x%x\n", | 553 | sprintf(p, "\tDest Port: %d, mask: 0x%x\n", |
| 534 | fsc->fs.h_u.tcp_ip4_spec.pdst, | 554 | fsc->fs.h_u.tcp_ip4_spec.pdst, |
| 535 | fsc->fs.m_u.tcp_ip4_spec.pdst); | 555 | fsc->fs.m_u.tcp_ip4_spec.pdst); |
| 536 | p += ETH_GSTRING_LEN; | 556 | p += ETH_GSTRING_LEN; |
| 537 | num_strings++; | 557 | num_strings++; |
| 538 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 558 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
| 539 | fsc->fs.h_u.tcp_ip4_spec.tos, | 559 | fsc->fs.h_u.tcp_ip4_spec.tos, |
| 540 | fsc->fs.m_u.tcp_ip4_spec.tos); | 560 | fsc->fs.m_u.tcp_ip4_spec.tos); |
| 541 | p += ETH_GSTRING_LEN; | 561 | p += ETH_GSTRING_LEN; |
| 542 | num_strings++; | 562 | num_strings++; |
| 543 | break; | 563 | break; |
| 544 | case AH_ESP_V4_FLOW: | 564 | case AH_ESP_V4_FLOW: |
| 545 | case ESP_V4_FLOW: | 565 | case ESP_V4_FLOW: |
| 546 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 566 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
| 547 | fsc->fs.h_u.ah_ip4_spec.ip4src); | 567 | fsc->fs.h_u.ah_ip4_spec.ip4src); |
| 548 | p += ETH_GSTRING_LEN; | 568 | p += ETH_GSTRING_LEN; |
| 549 | num_strings++; | 569 | num_strings++; |
| 550 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 570 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
| 551 | fsc->fs.m_u.ah_ip4_spec.ip4src); | 571 | fsc->fs.m_u.ah_ip4_spec.ip4src); |
| 552 | p += ETH_GSTRING_LEN; | 572 | p += ETH_GSTRING_LEN; |
| 553 | num_strings++; | 573 | num_strings++; |
| 554 | sprintf(p, "\tDest IP addr: 0x%x\n", | 574 | sprintf(p, "\tDest IP addr: 0x%x\n", |
| 555 | fsc->fs.h_u.ah_ip4_spec.ip4dst); | 575 | fsc->fs.h_u.ah_ip4_spec.ip4dst); |
| 556 | p += ETH_GSTRING_LEN; | 576 | p += ETH_GSTRING_LEN; |
| 557 | num_strings++; | 577 | num_strings++; |
| 558 | sprintf(p, "\tDest IP mask: 0x%x\n", | 578 | sprintf(p, "\tDest IP mask: 0x%x\n", |
| 559 | fsc->fs.m_u.ah_ip4_spec.ip4dst); | 579 | fsc->fs.m_u.ah_ip4_spec.ip4dst); |
| 560 | p += ETH_GSTRING_LEN; | 580 | p += ETH_GSTRING_LEN; |
| 561 | num_strings++; | 581 | num_strings++; |
| 562 | sprintf(p, "\tSPI: %d, mask: 0x%x\n", | 582 | sprintf(p, "\tSPI: %d, mask: 0x%x\n", |
| 563 | fsc->fs.h_u.ah_ip4_spec.spi, | 583 | fsc->fs.h_u.ah_ip4_spec.spi, |
| 564 | fsc->fs.m_u.ah_ip4_spec.spi); | 584 | fsc->fs.m_u.ah_ip4_spec.spi); |
| 565 | p += ETH_GSTRING_LEN; | 585 | p += ETH_GSTRING_LEN; |
| 566 | num_strings++; | 586 | num_strings++; |
| 567 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 587 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
| 568 | fsc->fs.h_u.ah_ip4_spec.tos, | 588 | fsc->fs.h_u.ah_ip4_spec.tos, |
| 569 | fsc->fs.m_u.ah_ip4_spec.tos); | 589 | fsc->fs.m_u.ah_ip4_spec.tos); |
| 570 | p += ETH_GSTRING_LEN; | 590 | p += ETH_GSTRING_LEN; |
| 571 | num_strings++; | 591 | num_strings++; |
| 572 | break; | 592 | break; |
| 573 | case IP_USER_FLOW: | 593 | case IP_USER_FLOW: |
| 574 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 594 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
| 575 | fsc->fs.h_u.raw_ip4_spec.ip4src); | 595 | fsc->fs.h_u.raw_ip4_spec.ip4src); |
| 576 | p += ETH_GSTRING_LEN; | 596 | p += ETH_GSTRING_LEN; |
| 577 | num_strings++; | 597 | num_strings++; |
| 578 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 598 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
| 579 | fsc->fs.m_u.raw_ip4_spec.ip4src); | 599 | fsc->fs.m_u.raw_ip4_spec.ip4src); |
| 580 | p += ETH_GSTRING_LEN; | 600 | p += ETH_GSTRING_LEN; |
| 581 | num_strings++; | 601 | num_strings++; |
| 582 | sprintf(p, "\tDest IP addr: 0x%x\n", | 602 | sprintf(p, "\tDest IP addr: 0x%x\n", |
| 583 | fsc->fs.h_u.raw_ip4_spec.ip4dst); | 603 | fsc->fs.h_u.raw_ip4_spec.ip4dst); |
| 584 | p += ETH_GSTRING_LEN; | 604 | p += ETH_GSTRING_LEN; |
| 585 | num_strings++; | 605 | num_strings++; |
| 586 | sprintf(p, "\tDest IP mask: 0x%x\n", | 606 | sprintf(p, "\tDest IP mask: 0x%x\n", |
| 587 | fsc->fs.m_u.raw_ip4_spec.ip4dst); | 607 | fsc->fs.m_u.raw_ip4_spec.ip4dst); |
| 588 | p += ETH_GSTRING_LEN; | 608 | p += ETH_GSTRING_LEN; |
| 589 | num_strings++; | 609 | num_strings++; |
| 590 | break; | 610 | break; |
| 591 | case IPV4_FLOW: | 611 | case IPV4_FLOW: |
| 592 | sprintf(p, "\tSrc IP addr: 0x%x\n", | 612 | sprintf(p, "\tSrc IP addr: 0x%x\n", |
| 593 | fsc->fs.h_u.usr_ip4_spec.ip4src); | 613 | fsc->fs.h_u.usr_ip4_spec.ip4src); |
| 594 | p += ETH_GSTRING_LEN; | 614 | p += ETH_GSTRING_LEN; |
| 595 | num_strings++; | 615 | num_strings++; |
| 596 | sprintf(p, "\tSrc IP mask: 0x%x\n", | 616 | sprintf(p, "\tSrc IP mask: 0x%x\n", |
| 597 | fsc->fs.m_u.usr_ip4_spec.ip4src); | 617 | fsc->fs.m_u.usr_ip4_spec.ip4src); |
| 598 | p += ETH_GSTRING_LEN; | 618 | p += ETH_GSTRING_LEN; |
| 599 | num_strings++; | 619 | num_strings++; |
| 600 | sprintf(p, "\tDest IP addr: 0x%x\n", | 620 | sprintf(p, "\tDest IP addr: 0x%x\n", |
| 601 | fsc->fs.h_u.usr_ip4_spec.ip4dst); | 621 | fsc->fs.h_u.usr_ip4_spec.ip4dst); |
| 602 | p += ETH_GSTRING_LEN; | 622 | p += ETH_GSTRING_LEN; |
| 603 | num_strings++; | 623 | num_strings++; |
| 604 | sprintf(p, "\tDest IP mask: 0x%x\n", | 624 | sprintf(p, "\tDest IP mask: 0x%x\n", |
| 605 | fsc->fs.m_u.usr_ip4_spec.ip4dst); | 625 | fsc->fs.m_u.usr_ip4_spec.ip4dst); |
| 606 | p += ETH_GSTRING_LEN; | 626 | p += ETH_GSTRING_LEN; |
| 607 | num_strings++; | 627 | num_strings++; |
| 608 | sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", | 628 | sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", |
| 609 | fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, | 629 | fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, |
| 610 | fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); | 630 | fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); |
| 611 | p += ETH_GSTRING_LEN; | 631 | p += ETH_GSTRING_LEN; |
| 612 | num_strings++; | 632 | num_strings++; |
| 613 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", | 633 | sprintf(p, "\tTOS: %d, mask: 0x%x\n", |
| 614 | fsc->fs.h_u.usr_ip4_spec.tos, | 634 | fsc->fs.h_u.usr_ip4_spec.tos, |
| 615 | fsc->fs.m_u.usr_ip4_spec.tos); | 635 | fsc->fs.m_u.usr_ip4_spec.tos); |
| 616 | p += ETH_GSTRING_LEN; | 636 | p += ETH_GSTRING_LEN; |
| 617 | num_strings++; | 637 | num_strings++; |
| 618 | sprintf(p, "\tIP Version: %d, mask: 0x%x\n", | 638 | sprintf(p, "\tIP Version: %d, mask: 0x%x\n", |
| 619 | fsc->fs.h_u.usr_ip4_spec.ip_ver, | 639 | fsc->fs.h_u.usr_ip4_spec.ip_ver, |
| 620 | fsc->fs.m_u.usr_ip4_spec.ip_ver); | 640 | fsc->fs.m_u.usr_ip4_spec.ip_ver); |
| 621 | p += ETH_GSTRING_LEN; | 641 | p += ETH_GSTRING_LEN; |
| 622 | num_strings++; | 642 | num_strings++; |
| 623 | sprintf(p, "\tProtocol: %d, mask: 0x%x\n", | 643 | sprintf(p, "\tProtocol: %d, mask: 0x%x\n", |
| 624 | fsc->fs.h_u.usr_ip4_spec.proto, | 644 | fsc->fs.h_u.usr_ip4_spec.proto, |
| 625 | fsc->fs.m_u.usr_ip4_spec.proto); | 645 | fsc->fs.m_u.usr_ip4_spec.proto); |
| 626 | p += ETH_GSTRING_LEN; | 646 | p += ETH_GSTRING_LEN; |
| 627 | num_strings++; | 647 | num_strings++; |
| 628 | break; | 648 | break; |
| 629 | }; | 649 | } |
| 630 | sprintf(p, "\tVLAN: %d, mask: 0x%x\n", | 650 | sprintf(p, "\tVLAN: %d, mask: 0x%x\n", |
| 631 | fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); | 651 | fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); |
| 632 | p += ETH_GSTRING_LEN; | 652 | p += ETH_GSTRING_LEN; |
| 633 | num_strings++; | 653 | num_strings++; |
| 634 | sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); | 654 | sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); |
| @@ -641,7 +661,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr) | |||
| 641 | sprintf(p, "\tAction: Drop\n"); | 661 | sprintf(p, "\tAction: Drop\n"); |
| 642 | else | 662 | else |
| 643 | sprintf(p, "\tAction: Direct to queue %d\n", | 663 | sprintf(p, "\tAction: Direct to queue %d\n", |
| 644 | fsc->fs.action); | 664 | fsc->fs.action); |
| 645 | p += ETH_GSTRING_LEN; | 665 | p += ETH_GSTRING_LEN; |
| 646 | num_strings++; | 666 | num_strings++; |
| 647 | unknown_filter: | 667 | unknown_filter: |
| @@ -853,7 +873,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) | |||
| 853 | return ret; | 873 | return ret; |
| 854 | } | 874 | } |
| 855 | 875 | ||
| 856 | static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) | 876 | static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, |
| 877 | void __user *useraddr) | ||
| 857 | { | 878 | { |
| 858 | struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; | 879 | struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; |
| 859 | 880 | ||
| @@ -867,7 +888,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void | |||
| 867 | return 0; | 888 | return 0; |
| 868 | } | 889 | } |
| 869 | 890 | ||
| 870 | static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) | 891 | static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, |
| 892 | void __user *useraddr) | ||
| 871 | { | 893 | { |
| 872 | struct ethtool_coalesce coalesce; | 894 | struct ethtool_coalesce coalesce; |
| 873 | 895 | ||
| @@ -971,6 +993,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) | |||
| 971 | 993 | ||
| 972 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); | 994 | return dev->ethtool_ops->set_tx_csum(dev, edata.data); |
| 973 | } | 995 | } |
| 996 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
| 974 | 997 | ||
| 975 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) | 998 | static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) |
| 976 | { | 999 | { |
| @@ -1042,7 +1065,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) | |||
| 1042 | 1065 | ||
| 1043 | edata.data = dev->features & NETIF_F_GSO; | 1066 | edata.data = dev->features & NETIF_F_GSO; |
| 1044 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 1067 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
| 1045 | return -EFAULT; | 1068 | return -EFAULT; |
| 1046 | return 0; | 1069 | return 0; |
| 1047 | } | 1070 | } |
| 1048 | 1071 | ||
| @@ -1065,7 +1088,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) | |||
| 1065 | 1088 | ||
| 1066 | edata.data = dev->features & NETIF_F_GRO; | 1089 | edata.data = dev->features & NETIF_F_GRO; |
| 1067 | if (copy_to_user(useraddr, &edata, sizeof(edata))) | 1090 | if (copy_to_user(useraddr, &edata, sizeof(edata))) |
| 1068 | return -EFAULT; | 1091 | return -EFAULT; |
| 1069 | return 0; | 1092 | return 0; |
| 1070 | } | 1093 | } |
| 1071 | 1094 | ||
| @@ -1277,7 +1300,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, | |||
| 1277 | return actor(dev, edata.data); | 1300 | return actor(dev, edata.data); |
| 1278 | } | 1301 | } |
| 1279 | 1302 | ||
| 1280 | static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) | 1303 | static noinline_for_stack int ethtool_flash_device(struct net_device *dev, |
| 1304 | char __user *useraddr) | ||
| 1281 | { | 1305 | { |
| 1282 | struct ethtool_flash efl; | 1306 | struct ethtool_flash efl; |
| 1283 | 1307 | ||
| @@ -1306,11 +1330,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
| 1306 | if (!dev->ethtool_ops) | 1330 | if (!dev->ethtool_ops) |
| 1307 | return -EOPNOTSUPP; | 1331 | return -EOPNOTSUPP; |
| 1308 | 1332 | ||
| 1309 | if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) | 1333 | if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) |
| 1310 | return -EFAULT; | 1334 | return -EFAULT; |
| 1311 | 1335 | ||
| 1312 | /* Allow some commands to be done by anyone */ | 1336 | /* Allow some commands to be done by anyone */ |
| 1313 | switch(ethcmd) { | 1337 | switch (ethcmd) { |
| 1314 | case ETHTOOL_GDRVINFO: | 1338 | case ETHTOOL_GDRVINFO: |
| 1315 | case ETHTOOL_GMSGLVL: | 1339 | case ETHTOOL_GMSGLVL: |
| 1316 | case ETHTOOL_GCOALESCE: | 1340 | case ETHTOOL_GCOALESCE: |
| @@ -1338,10 +1362,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
| 1338 | return -EPERM; | 1362 | return -EPERM; |
| 1339 | } | 1363 | } |
| 1340 | 1364 | ||
| 1341 | if (dev->ethtool_ops->begin) | 1365 | if (dev->ethtool_ops->begin) { |
| 1342 | if ((rc = dev->ethtool_ops->begin(dev)) < 0) | 1366 | rc = dev->ethtool_ops->begin(dev); |
| 1367 | if (rc < 0) | ||
| 1343 | return rc; | 1368 | return rc; |
| 1344 | 1369 | } | |
| 1345 | old_features = dev->features; | 1370 | old_features = dev->features; |
| 1346 | 1371 | ||
| 1347 | switch (ethcmd) { | 1372 | switch (ethcmd) { |
| @@ -1531,16 +1556,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
| 1531 | 1556 | ||
| 1532 | return rc; | 1557 | return rc; |
| 1533 | } | 1558 | } |
| 1534 | |||
| 1535 | EXPORT_SYMBOL(ethtool_op_get_link); | ||
| 1536 | EXPORT_SYMBOL(ethtool_op_get_sg); | ||
| 1537 | EXPORT_SYMBOL(ethtool_op_get_tso); | ||
| 1538 | EXPORT_SYMBOL(ethtool_op_set_sg); | ||
| 1539 | EXPORT_SYMBOL(ethtool_op_set_tso); | ||
| 1540 | EXPORT_SYMBOL(ethtool_op_set_tx_csum); | ||
| 1541 | EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); | ||
| 1542 | EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); | ||
| 1543 | EXPORT_SYMBOL(ethtool_op_set_ufo); | ||
| 1544 | EXPORT_SYMBOL(ethtool_op_get_ufo); | ||
| 1545 | EXPORT_SYMBOL(ethtool_op_set_flags); | ||
| 1546 | EXPORT_SYMBOL(ethtool_op_get_flags); | ||
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d2c3e7dc2e5f..42e84e08a1be 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
| @@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops, | |||
| 39 | } | 39 | } |
| 40 | EXPORT_SYMBOL(fib_default_rule_add); | 40 | EXPORT_SYMBOL(fib_default_rule_add); |
| 41 | 41 | ||
| 42 | u32 fib_default_rule_pref(struct fib_rules_ops *ops) | ||
| 43 | { | ||
| 44 | struct list_head *pos; | ||
| 45 | struct fib_rule *rule; | ||
| 46 | |||
| 47 | if (!list_empty(&ops->rules_list)) { | ||
| 48 | pos = ops->rules_list.next; | ||
| 49 | if (pos->next != &ops->rules_list) { | ||
| 50 | rule = list_entry(pos->next, struct fib_rule, list); | ||
| 51 | if (rule->pref) | ||
| 52 | return rule->pref - 1; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | return 0; | ||
| 57 | } | ||
| 58 | EXPORT_SYMBOL(fib_default_rule_pref); | ||
| 59 | |||
| 42 | static void notify_rule_change(int event, struct fib_rule *rule, | 60 | static void notify_rule_change(int event, struct fib_rule *rule, |
| 43 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, | 61 | struct fib_rules_ops *ops, struct nlmsghdr *nlh, |
| 44 | u32 pid); | 62 | u32 pid); |
| @@ -104,12 +122,12 @@ errout: | |||
| 104 | } | 122 | } |
| 105 | 123 | ||
| 106 | struct fib_rules_ops * | 124 | struct fib_rules_ops * |
| 107 | fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) | 125 | fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) |
| 108 | { | 126 | { |
| 109 | struct fib_rules_ops *ops; | 127 | struct fib_rules_ops *ops; |
| 110 | int err; | 128 | int err; |
| 111 | 129 | ||
| 112 | ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); | 130 | ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); |
| 113 | if (ops == NULL) | 131 | if (ops == NULL) |
| 114 | return ERR_PTR(-ENOMEM); | 132 | return ERR_PTR(-ENOMEM); |
| 115 | 133 | ||
| @@ -124,7 +142,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) | |||
| 124 | 142 | ||
| 125 | return ops; | 143 | return ops; |
| 126 | } | 144 | } |
| 127 | |||
| 128 | EXPORT_SYMBOL_GPL(fib_rules_register); | 145 | EXPORT_SYMBOL_GPL(fib_rules_register); |
| 129 | 146 | ||
| 130 | void fib_rules_cleanup_ops(struct fib_rules_ops *ops) | 147 | void fib_rules_cleanup_ops(struct fib_rules_ops *ops) |
| @@ -158,7 +175,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops) | |||
| 158 | 175 | ||
| 159 | call_rcu(&ops->rcu, fib_rules_put_rcu); | 176 | call_rcu(&ops->rcu, fib_rules_put_rcu); |
| 160 | } | 177 | } |
| 161 | |||
| 162 | EXPORT_SYMBOL_GPL(fib_rules_unregister); | 178 | EXPORT_SYMBOL_GPL(fib_rules_unregister); |
| 163 | 179 | ||
| 164 | static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, | 180 | static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, |
| @@ -221,7 +237,6 @@ out: | |||
| 221 | 237 | ||
| 222 | return err; | 238 | return err; |
| 223 | } | 239 | } |
| 224 | |||
| 225 | EXPORT_SYMBOL_GPL(fib_rules_lookup); | 240 | EXPORT_SYMBOL_GPL(fib_rules_lookup); |
| 226 | 241 | ||
| 227 | static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, | 242 | static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, |
| @@ -520,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, | |||
| 520 | return -EMSGSIZE; | 535 | return -EMSGSIZE; |
| 521 | 536 | ||
| 522 | frh = nlmsg_data(nlh); | 537 | frh = nlmsg_data(nlh); |
| 538 | frh->family = ops->family; | ||
| 523 | frh->table = rule->table; | 539 | frh->table = rule->table; |
| 524 | NLA_PUT_U32(skb, FRA_TABLE, rule->table); | 540 | NLA_PUT_U32(skb, FRA_TABLE, rule->table); |
| 525 | frh->res1 = 0; | 541 | frh->res1 = 0; |
| @@ -614,7 +630,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 614 | break; | 630 | break; |
| 615 | 631 | ||
| 616 | cb->args[1] = 0; | 632 | cb->args[1] = 0; |
| 617 | skip: | 633 | skip: |
| 618 | idx++; | 634 | idx++; |
| 619 | } | 635 | } |
| 620 | rcu_read_unlock(); | 636 | rcu_read_unlock(); |
| @@ -686,7 +702,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
| 686 | struct fib_rules_ops *ops; | 702 | struct fib_rules_ops *ops; |
| 687 | 703 | ||
| 688 | ASSERT_RTNL(); | 704 | ASSERT_RTNL(); |
| 689 | rcu_read_lock(); | ||
| 690 | 705 | ||
| 691 | switch (event) { | 706 | switch (event) { |
| 692 | case NETDEV_REGISTER: | 707 | case NETDEV_REGISTER: |
| @@ -700,8 +715,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, | |||
| 700 | break; | 715 | break; |
| 701 | } | 716 | } |
| 702 | 717 | ||
| 703 | rcu_read_unlock(); | ||
| 704 | |||
| 705 | return NOTIFY_DONE; | 718 | return NOTIFY_DONE; |
| 706 | } | 719 | } |
| 707 | 720 | ||
diff --git a/net/core/filter.c b/net/core/filter.c index ff943bed21af..da69fb728d32 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -302,6 +302,8 @@ load_b: | |||
| 302 | A = skb->pkt_type; | 302 | A = skb->pkt_type; |
| 303 | continue; | 303 | continue; |
| 304 | case SKF_AD_IFINDEX: | 304 | case SKF_AD_IFINDEX: |
| 305 | if (!skb->dev) | ||
| 306 | return 0; | ||
| 305 | A = skb->dev->ifindex; | 307 | A = skb->dev->ifindex; |
| 306 | continue; | 308 | continue; |
| 307 | case SKF_AD_MARK: | 309 | case SKF_AD_MARK: |
| @@ -310,6 +312,11 @@ load_b: | |||
| 310 | case SKF_AD_QUEUE: | 312 | case SKF_AD_QUEUE: |
| 311 | A = skb->queue_mapping; | 313 | A = skb->queue_mapping; |
| 312 | continue; | 314 | continue; |
| 315 | case SKF_AD_HATYPE: | ||
| 316 | if (!skb->dev) | ||
| 317 | return 0; | ||
| 318 | A = skb->dev->type; | ||
| 319 | continue; | ||
| 313 | case SKF_AD_NLATTR: { | 320 | case SKF_AD_NLATTR: { |
| 314 | struct nlattr *nla; | 321 | struct nlattr *nla; |
| 315 | 322 | ||
diff --git a/net/core/flow.c b/net/core/flow.c index 96015871ecea..161900674009 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
| @@ -26,113 +26,158 @@ | |||
| 26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
| 27 | 27 | ||
| 28 | struct flow_cache_entry { | 28 | struct flow_cache_entry { |
| 29 | struct flow_cache_entry *next; | 29 | union { |
| 30 | u16 family; | 30 | struct hlist_node hlist; |
| 31 | u8 dir; | 31 | struct list_head gc_list; |
| 32 | u32 genid; | 32 | } u; |
| 33 | struct flowi key; | 33 | u16 family; |
| 34 | void *object; | 34 | u8 dir; |
| 35 | atomic_t *object_ref; | 35 | u32 genid; |
| 36 | struct flowi key; | ||
| 37 | struct flow_cache_object *object; | ||
| 36 | }; | 38 | }; |
| 37 | 39 | ||
| 38 | atomic_t flow_cache_genid = ATOMIC_INIT(0); | 40 | struct flow_cache_percpu { |
| 39 | 41 | struct hlist_head *hash_table; | |
| 40 | static u32 flow_hash_shift; | 42 | int hash_count; |
| 41 | #define flow_hash_size (1 << flow_hash_shift) | 43 | u32 hash_rnd; |
| 42 | static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; | 44 | int hash_rnd_recalc; |
| 43 | 45 | struct tasklet_struct flush_tasklet; | |
| 44 | #define flow_table(cpu) (per_cpu(flow_tables, cpu)) | 46 | }; |
| 45 | |||
| 46 | static struct kmem_cache *flow_cachep __read_mostly; | ||
| 47 | 47 | ||
| 48 | static int flow_lwm, flow_hwm; | 48 | struct flow_flush_info { |
| 49 | struct flow_cache *cache; | ||
| 50 | atomic_t cpuleft; | ||
| 51 | struct completion completion; | ||
| 52 | }; | ||
| 49 | 53 | ||
| 50 | struct flow_percpu_info { | 54 | struct flow_cache { |
| 51 | int hash_rnd_recalc; | 55 | u32 hash_shift; |
| 52 | u32 hash_rnd; | 56 | unsigned long order; |
| 53 | int count; | 57 | struct flow_cache_percpu *percpu; |
| 58 | struct notifier_block hotcpu_notifier; | ||
| 59 | int low_watermark; | ||
| 60 | int high_watermark; | ||
| 61 | struct timer_list rnd_timer; | ||
| 54 | }; | 62 | }; |
| 55 | static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 }; | ||
| 56 | 63 | ||
| 57 | #define flow_hash_rnd_recalc(cpu) \ | 64 | atomic_t flow_cache_genid = ATOMIC_INIT(0); |
| 58 | (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) | 65 | static struct flow_cache flow_cache_global; |
| 59 | #define flow_hash_rnd(cpu) \ | 66 | static struct kmem_cache *flow_cachep; |
| 60 | (per_cpu(flow_hash_info, cpu).hash_rnd) | ||
| 61 | #define flow_count(cpu) \ | ||
| 62 | (per_cpu(flow_hash_info, cpu).count) | ||
| 63 | 67 | ||
| 64 | static struct timer_list flow_hash_rnd_timer; | 68 | static DEFINE_SPINLOCK(flow_cache_gc_lock); |
| 69 | static LIST_HEAD(flow_cache_gc_list); | ||
| 65 | 70 | ||
| 66 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | 71 | #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) |
| 67 | 72 | #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) | |
| 68 | struct flow_flush_info { | ||
| 69 | atomic_t cpuleft; | ||
| 70 | struct completion completion; | ||
| 71 | }; | ||
| 72 | static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL }; | ||
| 73 | |||
| 74 | #define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) | ||
| 75 | 73 | ||
| 76 | static void flow_cache_new_hashrnd(unsigned long arg) | 74 | static void flow_cache_new_hashrnd(unsigned long arg) |
| 77 | { | 75 | { |
| 76 | struct flow_cache *fc = (void *) arg; | ||
| 78 | int i; | 77 | int i; |
| 79 | 78 | ||
| 80 | for_each_possible_cpu(i) | 79 | for_each_possible_cpu(i) |
| 81 | flow_hash_rnd_recalc(i) = 1; | 80 | per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; |
| 82 | 81 | ||
| 83 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 82 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
| 84 | add_timer(&flow_hash_rnd_timer); | 83 | add_timer(&fc->rnd_timer); |
| 84 | } | ||
| 85 | |||
| 86 | static int flow_entry_valid(struct flow_cache_entry *fle) | ||
| 87 | { | ||
| 88 | if (atomic_read(&flow_cache_genid) != fle->genid) | ||
| 89 | return 0; | ||
| 90 | if (fle->object && !fle->object->ops->check(fle->object)) | ||
| 91 | return 0; | ||
| 92 | return 1; | ||
| 85 | } | 93 | } |
| 86 | 94 | ||
| 87 | static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) | 95 | static void flow_entry_kill(struct flow_cache_entry *fle) |
| 88 | { | 96 | { |
| 89 | if (fle->object) | 97 | if (fle->object) |
| 90 | atomic_dec(fle->object_ref); | 98 | fle->object->ops->delete(fle->object); |
| 91 | kmem_cache_free(flow_cachep, fle); | 99 | kmem_cache_free(flow_cachep, fle); |
| 92 | flow_count(cpu)--; | ||
| 93 | } | 100 | } |
| 94 | 101 | ||
| 95 | static void __flow_cache_shrink(int cpu, int shrink_to) | 102 | static void flow_cache_gc_task(struct work_struct *work) |
| 96 | { | 103 | { |
| 97 | struct flow_cache_entry *fle, **flp; | 104 | struct list_head gc_list; |
| 98 | int i; | 105 | struct flow_cache_entry *fce, *n; |
| 99 | 106 | ||
| 100 | for (i = 0; i < flow_hash_size; i++) { | 107 | INIT_LIST_HEAD(&gc_list); |
| 101 | int k = 0; | 108 | spin_lock_bh(&flow_cache_gc_lock); |
| 109 | list_splice_tail_init(&flow_cache_gc_list, &gc_list); | ||
| 110 | spin_unlock_bh(&flow_cache_gc_lock); | ||
| 102 | 111 | ||
| 103 | flp = &flow_table(cpu)[i]; | 112 | list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) |
| 104 | while ((fle = *flp) != NULL && k < shrink_to) { | 113 | flow_entry_kill(fce); |
| 105 | k++; | 114 | } |
| 106 | flp = &fle->next; | 115 | static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); |
| 107 | } | 116 | |
| 108 | while ((fle = *flp) != NULL) { | 117 | static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, |
| 109 | *flp = fle->next; | 118 | int deleted, struct list_head *gc_list) |
| 110 | flow_entry_kill(cpu, fle); | 119 | { |
| 111 | } | 120 | if (deleted) { |
| 121 | fcp->hash_count -= deleted; | ||
| 122 | spin_lock_bh(&flow_cache_gc_lock); | ||
| 123 | list_splice_tail(gc_list, &flow_cache_gc_list); | ||
| 124 | spin_unlock_bh(&flow_cache_gc_lock); | ||
| 125 | schedule_work(&flow_cache_gc_work); | ||
| 112 | } | 126 | } |
| 113 | } | 127 | } |
| 114 | 128 | ||
| 115 | static void flow_cache_shrink(int cpu) | 129 | static void __flow_cache_shrink(struct flow_cache *fc, |
| 130 | struct flow_cache_percpu *fcp, | ||
| 131 | int shrink_to) | ||
| 116 | { | 132 | { |
| 117 | int shrink_to = flow_lwm / flow_hash_size; | 133 | struct flow_cache_entry *fle; |
| 134 | struct hlist_node *entry, *tmp; | ||
| 135 | LIST_HEAD(gc_list); | ||
| 136 | int i, deleted = 0; | ||
| 137 | |||
| 138 | for (i = 0; i < flow_cache_hash_size(fc); i++) { | ||
| 139 | int saved = 0; | ||
| 140 | |||
| 141 | hlist_for_each_entry_safe(fle, entry, tmp, | ||
| 142 | &fcp->hash_table[i], u.hlist) { | ||
| 143 | if (saved < shrink_to && | ||
| 144 | flow_entry_valid(fle)) { | ||
| 145 | saved++; | ||
| 146 | } else { | ||
| 147 | deleted++; | ||
| 148 | hlist_del(&fle->u.hlist); | ||
| 149 | list_add_tail(&fle->u.gc_list, &gc_list); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | } | ||
| 118 | 153 | ||
| 119 | __flow_cache_shrink(cpu, shrink_to); | 154 | flow_cache_queue_garbage(fcp, deleted, &gc_list); |
| 120 | } | 155 | } |
| 121 | 156 | ||
| 122 | static void flow_new_hash_rnd(int cpu) | 157 | static void flow_cache_shrink(struct flow_cache *fc, |
| 158 | struct flow_cache_percpu *fcp) | ||
| 123 | { | 159 | { |
| 124 | get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); | 160 | int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); |
| 125 | flow_hash_rnd_recalc(cpu) = 0; | ||
| 126 | 161 | ||
| 127 | __flow_cache_shrink(cpu, 0); | 162 | __flow_cache_shrink(fc, fcp, shrink_to); |
| 128 | } | 163 | } |
| 129 | 164 | ||
| 130 | static u32 flow_hash_code(struct flowi *key, int cpu) | 165 | static void flow_new_hash_rnd(struct flow_cache *fc, |
| 166 | struct flow_cache_percpu *fcp) | ||
| 167 | { | ||
| 168 | get_random_bytes(&fcp->hash_rnd, sizeof(u32)); | ||
| 169 | fcp->hash_rnd_recalc = 0; | ||
| 170 | __flow_cache_shrink(fc, fcp, 0); | ||
| 171 | } | ||
| 172 | |||
| 173 | static u32 flow_hash_code(struct flow_cache *fc, | ||
| 174 | struct flow_cache_percpu *fcp, | ||
| 175 | struct flowi *key) | ||
| 131 | { | 176 | { |
| 132 | u32 *k = (u32 *) key; | 177 | u32 *k = (u32 *) key; |
| 133 | 178 | ||
| 134 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & | 179 | return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) |
| 135 | (flow_hash_size - 1)); | 180 | & (flow_cache_hash_size(fc) - 1)); |
| 136 | } | 181 | } |
| 137 | 182 | ||
| 138 | #if (BITS_PER_LONG == 64) | 183 | #if (BITS_PER_LONG == 64) |
| @@ -165,114 +210,117 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) | |||
| 165 | return 0; | 210 | return 0; |
| 166 | } | 211 | } |
| 167 | 212 | ||
| 168 | void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, | 213 | struct flow_cache_object * |
| 169 | flow_resolve_t resolver) | 214 | flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, |
| 215 | flow_resolve_t resolver, void *ctx) | ||
| 170 | { | 216 | { |
| 171 | struct flow_cache_entry *fle, **head; | 217 | struct flow_cache *fc = &flow_cache_global; |
| 218 | struct flow_cache_percpu *fcp; | ||
| 219 | struct flow_cache_entry *fle, *tfle; | ||
| 220 | struct hlist_node *entry; | ||
| 221 | struct flow_cache_object *flo; | ||
| 172 | unsigned int hash; | 222 | unsigned int hash; |
| 173 | int cpu; | ||
| 174 | 223 | ||
| 175 | local_bh_disable(); | 224 | local_bh_disable(); |
| 176 | cpu = smp_processor_id(); | 225 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
| 177 | 226 | ||
| 178 | fle = NULL; | 227 | fle = NULL; |
| 228 | flo = NULL; | ||
| 179 | /* Packet really early in init? Making flow_cache_init a | 229 | /* Packet really early in init? Making flow_cache_init a |
| 180 | * pre-smp initcall would solve this. --RR */ | 230 | * pre-smp initcall would solve this. --RR */ |
| 181 | if (!flow_table(cpu)) | 231 | if (!fcp->hash_table) |
| 182 | goto nocache; | 232 | goto nocache; |
| 183 | 233 | ||
| 184 | if (flow_hash_rnd_recalc(cpu)) | 234 | if (fcp->hash_rnd_recalc) |
| 185 | flow_new_hash_rnd(cpu); | 235 | flow_new_hash_rnd(fc, fcp); |
| 186 | hash = flow_hash_code(key, cpu); | ||
| 187 | 236 | ||
| 188 | head = &flow_table(cpu)[hash]; | 237 | hash = flow_hash_code(fc, fcp, key); |
| 189 | for (fle = *head; fle; fle = fle->next) { | 238 | hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { |
| 190 | if (fle->family == family && | 239 | if (tfle->family == family && |
| 191 | fle->dir == dir && | 240 | tfle->dir == dir && |
| 192 | flow_key_compare(key, &fle->key) == 0) { | 241 | flow_key_compare(key, &tfle->key) == 0) { |
| 193 | if (fle->genid == atomic_read(&flow_cache_genid)) { | 242 | fle = tfle; |
| 194 | void *ret = fle->object; | ||
| 195 | |||
| 196 | if (ret) | ||
| 197 | atomic_inc(fle->object_ref); | ||
| 198 | local_bh_enable(); | ||
| 199 | |||
| 200 | return ret; | ||
| 201 | } | ||
| 202 | break; | 243 | break; |
| 203 | } | 244 | } |
| 204 | } | 245 | } |
| 205 | 246 | ||
| 206 | if (!fle) { | 247 | if (unlikely(!fle)) { |
| 207 | if (flow_count(cpu) > flow_hwm) | 248 | if (fcp->hash_count > fc->high_watermark) |
| 208 | flow_cache_shrink(cpu); | 249 | flow_cache_shrink(fc, fcp); |
| 209 | 250 | ||
| 210 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); | 251 | fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); |
| 211 | if (fle) { | 252 | if (fle) { |
| 212 | fle->next = *head; | ||
| 213 | *head = fle; | ||
| 214 | fle->family = family; | 253 | fle->family = family; |
| 215 | fle->dir = dir; | 254 | fle->dir = dir; |
| 216 | memcpy(&fle->key, key, sizeof(*key)); | 255 | memcpy(&fle->key, key, sizeof(*key)); |
| 217 | fle->object = NULL; | 256 | fle->object = NULL; |
| 218 | flow_count(cpu)++; | 257 | hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); |
| 258 | fcp->hash_count++; | ||
| 219 | } | 259 | } |
| 260 | } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { | ||
| 261 | flo = fle->object; | ||
| 262 | if (!flo) | ||
| 263 | goto ret_object; | ||
| 264 | flo = flo->ops->get(flo); | ||
| 265 | if (flo) | ||
| 266 | goto ret_object; | ||
| 267 | } else if (fle->object) { | ||
| 268 | flo = fle->object; | ||
| 269 | flo->ops->delete(flo); | ||
| 270 | fle->object = NULL; | ||
| 220 | } | 271 | } |
| 221 | 272 | ||
| 222 | nocache: | 273 | nocache: |
| 223 | { | 274 | flo = NULL; |
| 224 | int err; | 275 | if (fle) { |
| 225 | void *obj; | 276 | flo = fle->object; |
| 226 | atomic_t *obj_ref; | 277 | fle->object = NULL; |
| 227 | |||
| 228 | err = resolver(net, key, family, dir, &obj, &obj_ref); | ||
| 229 | |||
| 230 | if (fle && !err) { | ||
| 231 | fle->genid = atomic_read(&flow_cache_genid); | ||
| 232 | |||
| 233 | if (fle->object) | ||
| 234 | atomic_dec(fle->object_ref); | ||
| 235 | |||
| 236 | fle->object = obj; | ||
| 237 | fle->object_ref = obj_ref; | ||
| 238 | if (obj) | ||
| 239 | atomic_inc(fle->object_ref); | ||
| 240 | } | ||
| 241 | local_bh_enable(); | ||
| 242 | |||
| 243 | if (err) | ||
| 244 | obj = ERR_PTR(err); | ||
| 245 | return obj; | ||
| 246 | } | 278 | } |
| 279 | flo = resolver(net, key, family, dir, flo, ctx); | ||
| 280 | if (fle) { | ||
| 281 | fle->genid = atomic_read(&flow_cache_genid); | ||
| 282 | if (!IS_ERR(flo)) | ||
| 283 | fle->object = flo; | ||
| 284 | else | ||
| 285 | fle->genid--; | ||
| 286 | } else { | ||
| 287 | if (flo && !IS_ERR(flo)) | ||
| 288 | flo->ops->delete(flo); | ||
| 289 | } | ||
| 290 | ret_object: | ||
| 291 | local_bh_enable(); | ||
| 292 | return flo; | ||
| 247 | } | 293 | } |
| 248 | 294 | ||
| 249 | static void flow_cache_flush_tasklet(unsigned long data) | 295 | static void flow_cache_flush_tasklet(unsigned long data) |
| 250 | { | 296 | { |
| 251 | struct flow_flush_info *info = (void *)data; | 297 | struct flow_flush_info *info = (void *)data; |
| 252 | int i; | 298 | struct flow_cache *fc = info->cache; |
| 253 | int cpu; | 299 | struct flow_cache_percpu *fcp; |
| 254 | 300 | struct flow_cache_entry *fle; | |
| 255 | cpu = smp_processor_id(); | 301 | struct hlist_node *entry, *tmp; |
| 256 | for (i = 0; i < flow_hash_size; i++) { | 302 | LIST_HEAD(gc_list); |
| 257 | struct flow_cache_entry *fle; | 303 | int i, deleted = 0; |
| 258 | 304 | ||
| 259 | fle = flow_table(cpu)[i]; | 305 | fcp = per_cpu_ptr(fc->percpu, smp_processor_id()); |
| 260 | for (; fle; fle = fle->next) { | 306 | for (i = 0; i < flow_cache_hash_size(fc); i++) { |
| 261 | unsigned genid = atomic_read(&flow_cache_genid); | 307 | hlist_for_each_entry_safe(fle, entry, tmp, |
| 262 | 308 | &fcp->hash_table[i], u.hlist) { | |
| 263 | if (!fle->object || fle->genid == genid) | 309 | if (flow_entry_valid(fle)) |
| 264 | continue; | 310 | continue; |
| 265 | 311 | ||
| 266 | fle->object = NULL; | 312 | deleted++; |
| 267 | atomic_dec(fle->object_ref); | 313 | hlist_del(&fle->u.hlist); |
| 314 | list_add_tail(&fle->u.gc_list, &gc_list); | ||
| 268 | } | 315 | } |
| 269 | } | 316 | } |
| 270 | 317 | ||
| 318 | flow_cache_queue_garbage(fcp, deleted, &gc_list); | ||
| 319 | |||
| 271 | if (atomic_dec_and_test(&info->cpuleft)) | 320 | if (atomic_dec_and_test(&info->cpuleft)) |
| 272 | complete(&info->completion); | 321 | complete(&info->completion); |
| 273 | } | 322 | } |
| 274 | 323 | ||
| 275 | static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__)); | ||
| 276 | static void flow_cache_flush_per_cpu(void *data) | 324 | static void flow_cache_flush_per_cpu(void *data) |
| 277 | { | 325 | { |
| 278 | struct flow_flush_info *info = data; | 326 | struct flow_flush_info *info = data; |
| @@ -280,8 +328,7 @@ static void flow_cache_flush_per_cpu(void *data) | |||
| 280 | struct tasklet_struct *tasklet; | 328 | struct tasklet_struct *tasklet; |
| 281 | 329 | ||
| 282 | cpu = smp_processor_id(); | 330 | cpu = smp_processor_id(); |
| 283 | 331 | tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; | |
| 284 | tasklet = flow_flush_tasklet(cpu); | ||
| 285 | tasklet->data = (unsigned long)info; | 332 | tasklet->data = (unsigned long)info; |
| 286 | tasklet_schedule(tasklet); | 333 | tasklet_schedule(tasklet); |
| 287 | } | 334 | } |
| @@ -294,6 +341,7 @@ void flow_cache_flush(void) | |||
| 294 | /* Don't want cpus going down or up during this. */ | 341 | /* Don't want cpus going down or up during this. */ |
| 295 | get_online_cpus(); | 342 | get_online_cpus(); |
| 296 | mutex_lock(&flow_flush_sem); | 343 | mutex_lock(&flow_flush_sem); |
| 344 | info.cache = &flow_cache_global; | ||
| 297 | atomic_set(&info.cpuleft, num_online_cpus()); | 345 | atomic_set(&info.cpuleft, num_online_cpus()); |
| 298 | init_completion(&info.completion); | 346 | init_completion(&info.completion); |
| 299 | 347 | ||
| @@ -307,62 +355,75 @@ void flow_cache_flush(void) | |||
| 307 | put_online_cpus(); | 355 | put_online_cpus(); |
| 308 | } | 356 | } |
| 309 | 357 | ||
| 310 | static void __init flow_cache_cpu_prepare(int cpu) | 358 | static void __init flow_cache_cpu_prepare(struct flow_cache *fc, |
| 359 | struct flow_cache_percpu *fcp) | ||
| 311 | { | 360 | { |
| 312 | struct tasklet_struct *tasklet; | 361 | fcp->hash_table = (struct hlist_head *) |
| 313 | unsigned long order; | 362 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); |
| 314 | 363 | if (!fcp->hash_table) | |
| 315 | for (order = 0; | 364 | panic("NET: failed to allocate flow cache order %lu\n", fc->order); |
| 316 | (PAGE_SIZE << order) < | 365 | |
| 317 | (sizeof(struct flow_cache_entry *)*flow_hash_size); | 366 | fcp->hash_rnd_recalc = 1; |
| 318 | order++) | 367 | fcp->hash_count = 0; |
| 319 | /* NOTHING */; | 368 | tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); |
| 320 | |||
| 321 | flow_table(cpu) = (struct flow_cache_entry **) | ||
| 322 | __get_free_pages(GFP_KERNEL|__GFP_ZERO, order); | ||
| 323 | if (!flow_table(cpu)) | ||
| 324 | panic("NET: failed to allocate flow cache order %lu\n", order); | ||
| 325 | |||
| 326 | flow_hash_rnd_recalc(cpu) = 1; | ||
| 327 | flow_count(cpu) = 0; | ||
| 328 | |||
| 329 | tasklet = flow_flush_tasklet(cpu); | ||
| 330 | tasklet_init(tasklet, flow_cache_flush_tasklet, 0); | ||
| 331 | } | 369 | } |
| 332 | 370 | ||
| 333 | static int flow_cache_cpu(struct notifier_block *nfb, | 371 | static int flow_cache_cpu(struct notifier_block *nfb, |
| 334 | unsigned long action, | 372 | unsigned long action, |
| 335 | void *hcpu) | 373 | void *hcpu) |
| 336 | { | 374 | { |
| 375 | struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); | ||
| 376 | int cpu = (unsigned long) hcpu; | ||
| 377 | struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); | ||
| 378 | |||
| 337 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | 379 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
| 338 | __flow_cache_shrink((unsigned long)hcpu, 0); | 380 | __flow_cache_shrink(fc, fcp, 0); |
| 339 | return NOTIFY_OK; | 381 | return NOTIFY_OK; |
| 340 | } | 382 | } |
| 341 | 383 | ||
| 342 | static int __init flow_cache_init(void) | 384 | static int flow_cache_init(struct flow_cache *fc) |
| 343 | { | 385 | { |
| 386 | unsigned long order; | ||
| 344 | int i; | 387 | int i; |
| 345 | 388 | ||
| 346 | flow_cachep = kmem_cache_create("flow_cache", | 389 | fc->hash_shift = 10; |
| 347 | sizeof(struct flow_cache_entry), | 390 | fc->low_watermark = 2 * flow_cache_hash_size(fc); |
| 348 | 0, SLAB_PANIC, | 391 | fc->high_watermark = 4 * flow_cache_hash_size(fc); |
| 349 | NULL); | 392 | |
| 350 | flow_hash_shift = 10; | 393 | for (order = 0; |
| 351 | flow_lwm = 2 * flow_hash_size; | 394 | (PAGE_SIZE << order) < |
| 352 | flow_hwm = 4 * flow_hash_size; | 395 | (sizeof(struct hlist_head)*flow_cache_hash_size(fc)); |
| 396 | order++) | ||
| 397 | /* NOTHING */; | ||
| 398 | fc->order = order; | ||
| 399 | fc->percpu = alloc_percpu(struct flow_cache_percpu); | ||
| 353 | 400 | ||
| 354 | setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); | 401 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, |
| 355 | flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; | 402 | (unsigned long) fc); |
| 356 | add_timer(&flow_hash_rnd_timer); | 403 | fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; |
| 404 | add_timer(&fc->rnd_timer); | ||
| 357 | 405 | ||
| 358 | for_each_possible_cpu(i) | 406 | for_each_possible_cpu(i) |
| 359 | flow_cache_cpu_prepare(i); | 407 | flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i)); |
| 408 | |||
| 409 | fc->hotcpu_notifier = (struct notifier_block){ | ||
| 410 | .notifier_call = flow_cache_cpu, | ||
| 411 | }; | ||
| 412 | register_hotcpu_notifier(&fc->hotcpu_notifier); | ||
| 360 | 413 | ||
| 361 | hotcpu_notifier(flow_cache_cpu, 0); | ||
| 362 | return 0; | 414 | return 0; |
| 363 | } | 415 | } |
| 364 | 416 | ||
| 365 | module_init(flow_cache_init); | 417 | static int __init flow_cache_init_global(void) |
| 418 | { | ||
| 419 | flow_cachep = kmem_cache_create("flow_cache", | ||
| 420 | sizeof(struct flow_cache_entry), | ||
| 421 | 0, SLAB_PANIC, NULL); | ||
| 422 | |||
| 423 | return flow_cache_init(&flow_cache_global); | ||
| 424 | } | ||
| 425 | |||
| 426 | module_init(flow_cache_init_global); | ||
| 366 | 427 | ||
| 367 | EXPORT_SYMBOL(flow_cache_genid); | 428 | EXPORT_SYMBOL(flow_cache_genid); |
| 368 | EXPORT_SYMBOL(flow_cache_lookup); | 429 | EXPORT_SYMBOL(flow_cache_lookup); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 59cfc7d8fc45..99e7052d7323 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
| @@ -14,9 +14,12 @@ | |||
| 14 | #include <linux/netdevice.h> | 14 | #include <linux/netdevice.h> |
| 15 | #include <linux/if_arp.h> | 15 | #include <linux/if_arp.h> |
| 16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 17 | #include <linux/nsproxy.h> | ||
| 17 | #include <net/sock.h> | 18 | #include <net/sock.h> |
| 19 | #include <net/net_namespace.h> | ||
| 18 | #include <linux/rtnetlink.h> | 20 | #include <linux/rtnetlink.h> |
| 19 | #include <linux/wireless.h> | 21 | #include <linux/wireless.h> |
| 22 | #include <linux/vmalloc.h> | ||
| 20 | #include <net/wext.h> | 23 | #include <net/wext.h> |
| 21 | 24 | ||
| 22 | #include "net-sysfs.h" | 25 | #include "net-sysfs.h" |
| @@ -466,18 +469,345 @@ static struct attribute_group wireless_group = { | |||
| 466 | .attrs = wireless_attrs, | 469 | .attrs = wireless_attrs, |
| 467 | }; | 470 | }; |
| 468 | #endif | 471 | #endif |
| 469 | |||
| 470 | #endif /* CONFIG_SYSFS */ | 472 | #endif /* CONFIG_SYSFS */ |
| 471 | 473 | ||
| 474 | #ifdef CONFIG_RPS | ||
| 475 | /* | ||
| 476 | * RX queue sysfs structures and functions. | ||
| 477 | */ | ||
| 478 | struct rx_queue_attribute { | ||
| 479 | struct attribute attr; | ||
| 480 | ssize_t (*show)(struct netdev_rx_queue *queue, | ||
| 481 | struct rx_queue_attribute *attr, char *buf); | ||
| 482 | ssize_t (*store)(struct netdev_rx_queue *queue, | ||
| 483 | struct rx_queue_attribute *attr, const char *buf, size_t len); | ||
| 484 | }; | ||
| 485 | #define to_rx_queue_attr(_attr) container_of(_attr, \ | ||
| 486 | struct rx_queue_attribute, attr) | ||
| 487 | |||
| 488 | #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) | ||
| 489 | |||
| 490 | static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, | ||
| 491 | char *buf) | ||
| 492 | { | ||
| 493 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
| 494 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
| 495 | |||
| 496 | if (!attribute->show) | ||
| 497 | return -EIO; | ||
| 498 | |||
| 499 | return attribute->show(queue, attribute, buf); | ||
| 500 | } | ||
| 501 | |||
| 502 | static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, | ||
| 503 | const char *buf, size_t count) | ||
| 504 | { | ||
| 505 | struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); | ||
| 506 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
| 507 | |||
| 508 | if (!attribute->store) | ||
| 509 | return -EIO; | ||
| 510 | |||
| 511 | return attribute->store(queue, attribute, buf, count); | ||
| 512 | } | ||
| 513 | |||
| 514 | static struct sysfs_ops rx_queue_sysfs_ops = { | ||
| 515 | .show = rx_queue_attr_show, | ||
| 516 | .store = rx_queue_attr_store, | ||
| 517 | }; | ||
| 518 | |||
| 519 | static ssize_t show_rps_map(struct netdev_rx_queue *queue, | ||
| 520 | struct rx_queue_attribute *attribute, char *buf) | ||
| 521 | { | ||
| 522 | struct rps_map *map; | ||
| 523 | cpumask_var_t mask; | ||
| 524 | size_t len = 0; | ||
| 525 | int i; | ||
| 526 | |||
| 527 | if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) | ||
| 528 | return -ENOMEM; | ||
| 529 | |||
| 530 | rcu_read_lock(); | ||
| 531 | map = rcu_dereference(queue->rps_map); | ||
| 532 | if (map) | ||
| 533 | for (i = 0; i < map->len; i++) | ||
| 534 | cpumask_set_cpu(map->cpus[i], mask); | ||
| 535 | |||
| 536 | len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); | ||
| 537 | if (PAGE_SIZE - len < 3) { | ||
| 538 | rcu_read_unlock(); | ||
| 539 | free_cpumask_var(mask); | ||
| 540 | return -EINVAL; | ||
| 541 | } | ||
| 542 | rcu_read_unlock(); | ||
| 543 | |||
| 544 | free_cpumask_var(mask); | ||
| 545 | len += sprintf(buf + len, "\n"); | ||
| 546 | return len; | ||
| 547 | } | ||
| 548 | |||
| 549 | static void rps_map_release(struct rcu_head *rcu) | ||
| 550 | { | ||
| 551 | struct rps_map *map = container_of(rcu, struct rps_map, rcu); | ||
| 552 | |||
| 553 | kfree(map); | ||
| 554 | } | ||
| 555 | |||
| 556 | static ssize_t store_rps_map(struct netdev_rx_queue *queue, | ||
| 557 | struct rx_queue_attribute *attribute, | ||
| 558 | const char *buf, size_t len) | ||
| 559 | { | ||
| 560 | struct rps_map *old_map, *map; | ||
| 561 | cpumask_var_t mask; | ||
| 562 | int err, cpu, i; | ||
| 563 | static DEFINE_SPINLOCK(rps_map_lock); | ||
| 564 | |||
| 565 | if (!capable(CAP_NET_ADMIN)) | ||
| 566 | return -EPERM; | ||
| 567 | |||
| 568 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
| 569 | return -ENOMEM; | ||
| 570 | |||
| 571 | err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits); | ||
| 572 | if (err) { | ||
| 573 | free_cpumask_var(mask); | ||
| 574 | return err; | ||
| 575 | } | ||
| 576 | |||
| 577 | map = kzalloc(max_t(unsigned, | ||
| 578 | RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), | ||
| 579 | GFP_KERNEL); | ||
| 580 | if (!map) { | ||
| 581 | free_cpumask_var(mask); | ||
| 582 | return -ENOMEM; | ||
| 583 | } | ||
| 584 | |||
| 585 | i = 0; | ||
| 586 | for_each_cpu_and(cpu, mask, cpu_online_mask) | ||
| 587 | map->cpus[i++] = cpu; | ||
| 588 | |||
| 589 | if (i) | ||
| 590 | map->len = i; | ||
| 591 | else { | ||
| 592 | kfree(map); | ||
| 593 | map = NULL; | ||
| 594 | } | ||
| 595 | |||
| 596 | spin_lock(&rps_map_lock); | ||
| 597 | old_map = queue->rps_map; | ||
| 598 | rcu_assign_pointer(queue->rps_map, map); | ||
| 599 | spin_unlock(&rps_map_lock); | ||
| 600 | |||
| 601 | if (old_map) | ||
| 602 | call_rcu(&old_map->rcu, rps_map_release); | ||
| 603 | |||
| 604 | free_cpumask_var(mask); | ||
| 605 | return len; | ||
| 606 | } | ||
| 607 | |||
| 608 | static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||
| 609 | struct rx_queue_attribute *attr, | ||
| 610 | char *buf) | ||
| 611 | { | ||
| 612 | struct rps_dev_flow_table *flow_table; | ||
| 613 | unsigned int val = 0; | ||
| 614 | |||
| 615 | rcu_read_lock(); | ||
| 616 | flow_table = rcu_dereference(queue->rps_flow_table); | ||
| 617 | if (flow_table) | ||
| 618 | val = flow_table->mask + 1; | ||
| 619 | rcu_read_unlock(); | ||
| 620 | |||
| 621 | return sprintf(buf, "%u\n", val); | ||
| 622 | } | ||
| 623 | |||
| 624 | static void rps_dev_flow_table_release_work(struct work_struct *work) | ||
| 625 | { | ||
| 626 | struct rps_dev_flow_table *table = container_of(work, | ||
| 627 | struct rps_dev_flow_table, free_work); | ||
| 628 | |||
| 629 | vfree(table); | ||
| 630 | } | ||
| 631 | |||
| 632 | static void rps_dev_flow_table_release(struct rcu_head *rcu) | ||
| 633 | { | ||
| 634 | struct rps_dev_flow_table *table = container_of(rcu, | ||
| 635 | struct rps_dev_flow_table, rcu); | ||
| 636 | |||
| 637 | INIT_WORK(&table->free_work, rps_dev_flow_table_release_work); | ||
| 638 | schedule_work(&table->free_work); | ||
| 639 | } | ||
| 640 | |||
| 641 | static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, | ||
| 642 | struct rx_queue_attribute *attr, | ||
| 643 | const char *buf, size_t len) | ||
| 644 | { | ||
| 645 | unsigned int count; | ||
| 646 | char *endp; | ||
| 647 | struct rps_dev_flow_table *table, *old_table; | ||
| 648 | static DEFINE_SPINLOCK(rps_dev_flow_lock); | ||
| 649 | |||
| 650 | if (!capable(CAP_NET_ADMIN)) | ||
| 651 | return -EPERM; | ||
| 652 | |||
| 653 | count = simple_strtoul(buf, &endp, 0); | ||
| 654 | if (endp == buf) | ||
| 655 | return -EINVAL; | ||
| 656 | |||
| 657 | if (count) { | ||
| 658 | int i; | ||
| 659 | |||
| 660 | if (count > 1<<30) { | ||
| 661 | /* Enforce a limit to prevent overflow */ | ||
| 662 | return -EINVAL; | ||
| 663 | } | ||
| 664 | count = roundup_pow_of_two(count); | ||
| 665 | table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count)); | ||
| 666 | if (!table) | ||
| 667 | return -ENOMEM; | ||
| 668 | |||
| 669 | table->mask = count - 1; | ||
| 670 | for (i = 0; i < count; i++) | ||
| 671 | table->flows[i].cpu = RPS_NO_CPU; | ||
| 672 | } else | ||
| 673 | table = NULL; | ||
| 674 | |||
| 675 | spin_lock(&rps_dev_flow_lock); | ||
| 676 | old_table = queue->rps_flow_table; | ||
| 677 | rcu_assign_pointer(queue->rps_flow_table, table); | ||
| 678 | spin_unlock(&rps_dev_flow_lock); | ||
| 679 | |||
| 680 | if (old_table) | ||
| 681 | call_rcu(&old_table->rcu, rps_dev_flow_table_release); | ||
| 682 | |||
| 683 | return len; | ||
| 684 | } | ||
| 685 | |||
| 686 | static struct rx_queue_attribute rps_cpus_attribute = | ||
| 687 | __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); | ||
| 688 | |||
| 689 | |||
| 690 | static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = | ||
| 691 | __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, | ||
| 692 | show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); | ||
| 693 | |||
| 694 | static struct attribute *rx_queue_default_attrs[] = { | ||
| 695 | &rps_cpus_attribute.attr, | ||
| 696 | &rps_dev_flow_table_cnt_attribute.attr, | ||
| 697 | NULL | ||
| 698 | }; | ||
| 699 | |||
| 700 | static void rx_queue_release(struct kobject *kobj) | ||
| 701 | { | ||
| 702 | struct netdev_rx_queue *queue = to_rx_queue(kobj); | ||
| 703 | struct netdev_rx_queue *first = queue->first; | ||
| 704 | |||
| 705 | if (queue->rps_map) | ||
| 706 | call_rcu(&queue->rps_map->rcu, rps_map_release); | ||
| 707 | |||
| 708 | if (queue->rps_flow_table) | ||
| 709 | call_rcu(&queue->rps_flow_table->rcu, | ||
| 710 | rps_dev_flow_table_release); | ||
| 711 | |||
| 712 | if (atomic_dec_and_test(&first->count)) | ||
| 713 | kfree(first); | ||
| 714 | } | ||
| 715 | |||
| 716 | static struct kobj_type rx_queue_ktype = { | ||
| 717 | .sysfs_ops = &rx_queue_sysfs_ops, | ||
| 718 | .release = rx_queue_release, | ||
| 719 | .default_attrs = rx_queue_default_attrs, | ||
| 720 | }; | ||
| 721 | |||
| 722 | static int rx_queue_add_kobject(struct net_device *net, int index) | ||
| 723 | { | ||
| 724 | struct netdev_rx_queue *queue = net->_rx + index; | ||
| 725 | struct kobject *kobj = &queue->kobj; | ||
| 726 | int error = 0; | ||
| 727 | |||
| 728 | kobj->kset = net->queues_kset; | ||
| 729 | error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, | ||
| 730 | "rx-%u", index); | ||
| 731 | if (error) { | ||
| 732 | kobject_put(kobj); | ||
| 733 | return error; | ||
| 734 | } | ||
| 735 | |||
| 736 | kobject_uevent(kobj, KOBJ_ADD); | ||
| 737 | |||
| 738 | return error; | ||
| 739 | } | ||
| 740 | |||
| 741 | static int rx_queue_register_kobjects(struct net_device *net) | ||
| 742 | { | ||
| 743 | int i; | ||
| 744 | int error = 0; | ||
| 745 | |||
| 746 | net->queues_kset = kset_create_and_add("queues", | ||
| 747 | NULL, &net->dev.kobj); | ||
| 748 | if (!net->queues_kset) | ||
| 749 | return -ENOMEM; | ||
| 750 | for (i = 0; i < net->num_rx_queues; i++) { | ||
| 751 | error = rx_queue_add_kobject(net, i); | ||
| 752 | if (error) | ||
| 753 | break; | ||
| 754 | } | ||
| 755 | |||
| 756 | if (error) | ||
| 757 | while (--i >= 0) | ||
| 758 | kobject_put(&net->_rx[i].kobj); | ||
| 759 | |||
| 760 | return error; | ||
| 761 | } | ||
| 762 | |||
| 763 | static void rx_queue_remove_kobjects(struct net_device *net) | ||
| 764 | { | ||
| 765 | int i; | ||
| 766 | |||
| 767 | for (i = 0; i < net->num_rx_queues; i++) | ||
| 768 | kobject_put(&net->_rx[i].kobj); | ||
| 769 | kset_unregister(net->queues_kset); | ||
| 770 | } | ||
| 771 | #endif /* CONFIG_RPS */ | ||
| 772 | |||
| 773 | static const void *net_current_ns(void) | ||
| 774 | { | ||
| 775 | return current->nsproxy->net_ns; | ||
| 776 | } | ||
| 777 | |||
| 778 | static const void *net_initial_ns(void) | ||
| 779 | { | ||
| 780 | return &init_net; | ||
| 781 | } | ||
| 782 | |||
| 783 | static const void *net_netlink_ns(struct sock *sk) | ||
| 784 | { | ||
| 785 | return sock_net(sk); | ||
| 786 | } | ||
| 787 | |||
| 788 | static struct kobj_ns_type_operations net_ns_type_operations = { | ||
| 789 | .type = KOBJ_NS_TYPE_NET, | ||
| 790 | .current_ns = net_current_ns, | ||
| 791 | .netlink_ns = net_netlink_ns, | ||
| 792 | .initial_ns = net_initial_ns, | ||
| 793 | }; | ||
| 794 | |||
| 795 | static void net_kobj_ns_exit(struct net *net) | ||
| 796 | { | ||
| 797 | kobj_ns_exit(KOBJ_NS_TYPE_NET, net); | ||
| 798 | } | ||
| 799 | |||
| 800 | static struct pernet_operations kobj_net_ops = { | ||
| 801 | .exit = net_kobj_ns_exit, | ||
| 802 | }; | ||
| 803 | |||
| 804 | |||
| 472 | #ifdef CONFIG_HOTPLUG | 805 | #ifdef CONFIG_HOTPLUG |
| 473 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) | 806 | static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) |
| 474 | { | 807 | { |
| 475 | struct net_device *dev = to_net_dev(d); | 808 | struct net_device *dev = to_net_dev(d); |
| 476 | int retval; | 809 | int retval; |
| 477 | 810 | ||
| 478 | if (!net_eq(dev_net(dev), &init_net)) | ||
| 479 | return 0; | ||
| 480 | |||
| 481 | /* pass interface to uevent. */ | 811 | /* pass interface to uevent. */ |
| 482 | retval = add_uevent_var(env, "INTERFACE=%s", dev->name); | 812 | retval = add_uevent_var(env, "INTERFACE=%s", dev->name); |
| 483 | if (retval) | 813 | if (retval) |
| @@ -507,6 +837,13 @@ static void netdev_release(struct device *d) | |||
| 507 | kfree((char *)dev - dev->padded); | 837 | kfree((char *)dev - dev->padded); |
| 508 | } | 838 | } |
| 509 | 839 | ||
| 840 | static const void *net_namespace(struct device *d) | ||
| 841 | { | ||
| 842 | struct net_device *dev; | ||
| 843 | dev = container_of(d, struct net_device, dev); | ||
| 844 | return dev_net(dev); | ||
| 845 | } | ||
| 846 | |||
| 510 | static struct class net_class = { | 847 | static struct class net_class = { |
| 511 | .name = "net", | 848 | .name = "net", |
| 512 | .dev_release = netdev_release, | 849 | .dev_release = netdev_release, |
| @@ -516,6 +853,8 @@ static struct class net_class = { | |||
| 516 | #ifdef CONFIG_HOTPLUG | 853 | #ifdef CONFIG_HOTPLUG |
| 517 | .dev_uevent = netdev_uevent, | 854 | .dev_uevent = netdev_uevent, |
| 518 | #endif | 855 | #endif |
| 856 | .ns_type = &net_ns_type_operations, | ||
| 857 | .namespace = net_namespace, | ||
| 519 | }; | 858 | }; |
| 520 | 859 | ||
| 521 | /* Delete sysfs entries but hold kobject reference until after all | 860 | /* Delete sysfs entries but hold kobject reference until after all |
| @@ -527,8 +866,9 @@ void netdev_unregister_kobject(struct net_device * net) | |||
| 527 | 866 | ||
| 528 | kobject_get(&dev->kobj); | 867 | kobject_get(&dev->kobj); |
| 529 | 868 | ||
| 530 | if (!net_eq(dev_net(net), &init_net)) | 869 | #ifdef CONFIG_RPS |
| 531 | return; | 870 | rx_queue_remove_kobjects(net); |
| 871 | #endif | ||
| 532 | 872 | ||
| 533 | device_del(dev); | 873 | device_del(dev); |
| 534 | } | 874 | } |
| @@ -538,7 +878,9 @@ int netdev_register_kobject(struct net_device *net) | |||
| 538 | { | 878 | { |
| 539 | struct device *dev = &(net->dev); | 879 | struct device *dev = &(net->dev); |
| 540 | const struct attribute_group **groups = net->sysfs_groups; | 880 | const struct attribute_group **groups = net->sysfs_groups; |
| 881 | int error = 0; | ||
| 541 | 882 | ||
| 883 | device_initialize(dev); | ||
| 542 | dev->class = &net_class; | 884 | dev->class = &net_class; |
| 543 | dev->platform_data = net; | 885 | dev->platform_data = net; |
| 544 | dev->groups = groups; | 886 | dev->groups = groups; |
| @@ -561,10 +903,19 @@ int netdev_register_kobject(struct net_device *net) | |||
| 561 | #endif | 903 | #endif |
| 562 | #endif /* CONFIG_SYSFS */ | 904 | #endif /* CONFIG_SYSFS */ |
| 563 | 905 | ||
| 564 | if (!net_eq(dev_net(net), &init_net)) | 906 | error = device_add(dev); |
| 565 | return 0; | 907 | if (error) |
| 908 | return error; | ||
| 909 | |||
| 910 | #ifdef CONFIG_RPS | ||
| 911 | error = rx_queue_register_kobjects(net); | ||
| 912 | if (error) { | ||
| 913 | device_del(dev); | ||
| 914 | return error; | ||
| 915 | } | ||
| 916 | #endif | ||
| 566 | 917 | ||
| 567 | return device_add(dev); | 918 | return error; |
| 568 | } | 919 | } |
| 569 | 920 | ||
| 570 | int netdev_class_create_file(struct class_attribute *class_attr) | 921 | int netdev_class_create_file(struct class_attribute *class_attr) |
| @@ -580,13 +931,9 @@ void netdev_class_remove_file(struct class_attribute *class_attr) | |||
| 580 | EXPORT_SYMBOL(netdev_class_create_file); | 931 | EXPORT_SYMBOL(netdev_class_create_file); |
| 581 | EXPORT_SYMBOL(netdev_class_remove_file); | 932 | EXPORT_SYMBOL(netdev_class_remove_file); |
| 582 | 933 | ||
| 583 | void netdev_initialize_kobject(struct net_device *net) | ||
| 584 | { | ||
| 585 | struct device *device = &(net->dev); | ||
| 586 | device_initialize(device); | ||
| 587 | } | ||
| 588 | |||
| 589 | int netdev_kobject_init(void) | 934 | int netdev_kobject_init(void) |
| 590 | { | 935 | { |
| 936 | kobj_ns_type_register(&net_ns_type_operations); | ||
| 937 | register_pernet_subsys(&kobj_net_ops); | ||
| 591 | return class_register(&net_class); | 938 | return class_register(&net_class); |
| 592 | } | 939 | } |
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 14e7524260b3..805555e8b187 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h | |||
| @@ -4,5 +4,4 @@ | |||
| 4 | int netdev_kobject_init(void); | 4 | int netdev_kobject_init(void); |
| 5 | int netdev_register_kobject(struct net_device *); | 5 | int netdev_register_kobject(struct net_device *); |
| 6 | void netdev_unregister_kobject(struct net_device *); | 6 | void netdev_unregister_kobject(struct net_device *); |
| 7 | void netdev_initialize_kobject(struct net_device *); | ||
| 8 | #endif | 7 | #endif |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index bd8c4712ea24..c988e685433a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
| @@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net); | |||
| 27 | 27 | ||
| 28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ | 28 | #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ |
| 29 | 29 | ||
| 30 | static void net_generic_release(struct rcu_head *rcu) | ||
| 31 | { | ||
| 32 | struct net_generic *ng; | ||
| 33 | |||
| 34 | ng = container_of(rcu, struct net_generic, rcu); | ||
| 35 | kfree(ng); | ||
| 36 | } | ||
| 37 | |||
| 38 | static int net_assign_generic(struct net *net, int id, void *data) | ||
| 39 | { | ||
| 40 | struct net_generic *ng, *old_ng; | ||
| 41 | |||
| 42 | BUG_ON(!mutex_is_locked(&net_mutex)); | ||
| 43 | BUG_ON(id == 0); | ||
| 44 | |||
| 45 | ng = old_ng = net->gen; | ||
| 46 | if (old_ng->len >= id) | ||
| 47 | goto assign; | ||
| 48 | |||
| 49 | ng = kzalloc(sizeof(struct net_generic) + | ||
| 50 | id * sizeof(void *), GFP_KERNEL); | ||
| 51 | if (ng == NULL) | ||
| 52 | return -ENOMEM; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Some synchronisation notes: | ||
| 56 | * | ||
| 57 | * The net_generic explores the net->gen array inside rcu | ||
| 58 | * read section. Besides once set the net->gen->ptr[x] | ||
| 59 | * pointer never changes (see rules in netns/generic.h). | ||
| 60 | * | ||
| 61 | * That said, we simply duplicate this array and schedule | ||
| 62 | * the old copy for kfree after a grace period. | ||
| 63 | */ | ||
| 64 | |||
| 65 | ng->len = id; | ||
| 66 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | ||
| 67 | |||
| 68 | rcu_assign_pointer(net->gen, ng); | ||
| 69 | call_rcu(&old_ng->rcu, net_generic_release); | ||
| 70 | assign: | ||
| 71 | ng->ptr[id - 1] = data; | ||
| 72 | return 0; | ||
| 73 | } | ||
| 74 | |||
| 30 | static int ops_init(const struct pernet_operations *ops, struct net *net) | 75 | static int ops_init(const struct pernet_operations *ops, struct net *net) |
| 31 | { | 76 | { |
| 32 | int err; | 77 | int err; |
| @@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); | |||
| 469 | * addition run the exit method for all existing network | 514 | * addition run the exit method for all existing network |
| 470 | * namespaces. | 515 | * namespaces. |
| 471 | */ | 516 | */ |
| 472 | void unregister_pernet_subsys(struct pernet_operations *module) | 517 | void unregister_pernet_subsys(struct pernet_operations *ops) |
| 473 | { | 518 | { |
| 474 | mutex_lock(&net_mutex); | 519 | mutex_lock(&net_mutex); |
| 475 | unregister_pernet_operations(module); | 520 | unregister_pernet_operations(ops); |
| 476 | mutex_unlock(&net_mutex); | 521 | mutex_unlock(&net_mutex); |
| 477 | } | 522 | } |
| 478 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); | 523 | EXPORT_SYMBOL_GPL(unregister_pernet_subsys); |
| @@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops) | |||
| 526 | mutex_unlock(&net_mutex); | 571 | mutex_unlock(&net_mutex); |
| 527 | } | 572 | } |
| 528 | EXPORT_SYMBOL_GPL(unregister_pernet_device); | 573 | EXPORT_SYMBOL_GPL(unregister_pernet_device); |
| 529 | |||
| 530 | static void net_generic_release(struct rcu_head *rcu) | ||
| 531 | { | ||
| 532 | struct net_generic *ng; | ||
| 533 | |||
| 534 | ng = container_of(rcu, struct net_generic, rcu); | ||
| 535 | kfree(ng); | ||
| 536 | } | ||
| 537 | |||
| 538 | int net_assign_generic(struct net *net, int id, void *data) | ||
| 539 | { | ||
| 540 | struct net_generic *ng, *old_ng; | ||
| 541 | |||
| 542 | BUG_ON(!mutex_is_locked(&net_mutex)); | ||
| 543 | BUG_ON(id == 0); | ||
| 544 | |||
| 545 | ng = old_ng = net->gen; | ||
| 546 | if (old_ng->len >= id) | ||
| 547 | goto assign; | ||
| 548 | |||
| 549 | ng = kzalloc(sizeof(struct net_generic) + | ||
| 550 | id * sizeof(void *), GFP_KERNEL); | ||
| 551 | if (ng == NULL) | ||
| 552 | return -ENOMEM; | ||
| 553 | |||
| 554 | /* | ||
| 555 | * Some synchronisation notes: | ||
| 556 | * | ||
| 557 | * The net_generic explores the net->gen array inside rcu | ||
| 558 | * read section. Besides once set the net->gen->ptr[x] | ||
| 559 | * pointer never changes (see rules in netns/generic.h). | ||
| 560 | * | ||
| 561 | * That said, we simply duplicate this array and schedule | ||
| 562 | * the old copy for kfree after a grace period. | ||
| 563 | */ | ||
| 564 | |||
| 565 | ng->len = id; | ||
| 566 | memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); | ||
| 567 | |||
| 568 | rcu_assign_pointer(net->gen, ng); | ||
| 569 | call_rcu(&old_ng->rcu, net_generic_release); | ||
| 570 | assign: | ||
| 571 | ng->ptr[id - 1] = data; | ||
| 572 | return 0; | ||
| 573 | } | ||
| 574 | EXPORT_SYMBOL_GPL(net_assign_generic); | ||
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a58f59b97597..94825b109551 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
| @@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi) | |||
| 179 | } | 179 | } |
| 180 | } | 180 | } |
| 181 | 181 | ||
| 182 | void netpoll_poll(struct netpoll *np) | 182 | void netpoll_poll_dev(struct net_device *dev) |
| 183 | { | 183 | { |
| 184 | struct net_device *dev = np->dev; | ||
| 185 | const struct net_device_ops *ops; | 184 | const struct net_device_ops *ops; |
| 186 | 185 | ||
| 187 | if (!dev || !netif_running(dev)) | 186 | if (!dev || !netif_running(dev)) |
| @@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np) | |||
| 201 | zap_completion_queue(); | 200 | zap_completion_queue(); |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 203 | void netpoll_poll(struct netpoll *np) | ||
| 204 | { | ||
| 205 | netpoll_poll_dev(np->dev); | ||
| 206 | } | ||
| 207 | |||
| 204 | static void refill_skbs(void) | 208 | static void refill_skbs(void) |
| 205 | { | 209 | { |
| 206 | struct sk_buff *skb; | 210 | struct sk_buff *skb; |
| @@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev) | |||
| 282 | return 0; | 286 | return 0; |
| 283 | } | 287 | } |
| 284 | 288 | ||
| 285 | static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | 289 | void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) |
| 286 | { | 290 | { |
| 287 | int status = NETDEV_TX_BUSY; | 291 | int status = NETDEV_TX_BUSY; |
| 288 | unsigned long tries; | 292 | unsigned long tries; |
| @@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) | |||
| 308 | tries > 0; --tries) { | 312 | tries > 0; --tries) { |
| 309 | if (__netif_tx_trylock(txq)) { | 313 | if (__netif_tx_trylock(txq)) { |
| 310 | if (!netif_tx_queue_stopped(txq)) { | 314 | if (!netif_tx_queue_stopped(txq)) { |
| 315 | dev->priv_flags |= IFF_IN_NETPOLL; | ||
| 311 | status = ops->ndo_start_xmit(skb, dev); | 316 | status = ops->ndo_start_xmit(skb, dev); |
| 317 | dev->priv_flags &= ~IFF_IN_NETPOLL; | ||
| 312 | if (status == NETDEV_TX_OK) | 318 | if (status == NETDEV_TX_OK) |
| 313 | txq_trans_update(txq); | 319 | txq_trans_update(txq); |
| 314 | } | 320 | } |
| @@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np) | |||
| 756 | atomic_inc(&npinfo->refcnt); | 762 | atomic_inc(&npinfo->refcnt); |
| 757 | } | 763 | } |
| 758 | 764 | ||
| 759 | if (!ndev->netdev_ops->ndo_poll_controller) { | 765 | npinfo->netpoll = np; |
| 766 | |||
| 767 | if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || | ||
| 768 | !ndev->netdev_ops->ndo_poll_controller) { | ||
| 760 | printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", | 769 | printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", |
| 761 | np->name, np->dev_name); | 770 | np->name, np->dev_name); |
| 762 | err = -ENOTSUPP; | 771 | err = -ENOTSUPP; |
| @@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np) | |||
| 878 | } | 887 | } |
| 879 | 888 | ||
| 880 | if (atomic_dec_and_test(&npinfo->refcnt)) { | 889 | if (atomic_dec_and_test(&npinfo->refcnt)) { |
| 890 | const struct net_device_ops *ops; | ||
| 881 | skb_queue_purge(&npinfo->arp_tx); | 891 | skb_queue_purge(&npinfo->arp_tx); |
| 882 | skb_queue_purge(&npinfo->txq); | 892 | skb_queue_purge(&npinfo->txq); |
| 883 | cancel_rearming_delayed_work(&npinfo->tx_work); | 893 | cancel_rearming_delayed_work(&npinfo->tx_work); |
| @@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np) | |||
| 885 | /* clean after last, unfinished work */ | 895 | /* clean after last, unfinished work */ |
| 886 | __skb_queue_purge(&npinfo->txq); | 896 | __skb_queue_purge(&npinfo->txq); |
| 887 | kfree(npinfo); | 897 | kfree(npinfo); |
| 888 | np->dev->npinfo = NULL; | 898 | ops = np->dev->netdev_ops; |
| 899 | if (ops->ndo_netpoll_cleanup) | ||
| 900 | ops->ndo_netpoll_cleanup(np->dev); | ||
| 901 | else | ||
| 902 | np->dev->npinfo = NULL; | ||
| 889 | } | 903 | } |
| 890 | } | 904 | } |
| 891 | 905 | ||
| @@ -908,6 +922,7 @@ void netpoll_set_trap(int trap) | |||
| 908 | atomic_dec(&trapped); | 922 | atomic_dec(&trapped); |
| 909 | } | 923 | } |
| 910 | 924 | ||
| 925 | EXPORT_SYMBOL(netpoll_send_skb); | ||
| 911 | EXPORT_SYMBOL(netpoll_set_trap); | 926 | EXPORT_SYMBOL(netpoll_set_trap); |
| 912 | EXPORT_SYMBOL(netpoll_trap); | 927 | EXPORT_SYMBOL(netpoll_trap); |
| 913 | EXPORT_SYMBOL(netpoll_print_options); | 928 | EXPORT_SYMBOL(netpoll_print_options); |
| @@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options); | |||
| 915 | EXPORT_SYMBOL(netpoll_setup); | 930 | EXPORT_SYMBOL(netpoll_setup); |
| 916 | EXPORT_SYMBOL(netpoll_cleanup); | 931 | EXPORT_SYMBOL(netpoll_cleanup); |
| 917 | EXPORT_SYMBOL(netpoll_send_udp); | 932 | EXPORT_SYMBOL(netpoll_send_udp); |
| 933 | EXPORT_SYMBOL(netpoll_poll_dev); | ||
| 918 | EXPORT_SYMBOL(netpoll_poll); | 934 | EXPORT_SYMBOL(netpoll_poll); |
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 43923811bd6a..2ad68da418df 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
| @@ -169,7 +169,7 @@ | |||
| 169 | #include <asm/dma.h> | 169 | #include <asm/dma.h> |
| 170 | #include <asm/div64.h> /* do_div */ | 170 | #include <asm/div64.h> /* do_div */ |
| 171 | 171 | ||
| 172 | #define VERSION "2.72" | 172 | #define VERSION "2.73" |
| 173 | #define IP_NAME_SZ 32 | 173 | #define IP_NAME_SZ 32 |
| 174 | #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ | 174 | #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ |
| 175 | #define MPLS_STACK_BOTTOM htonl(0x00000100) | 175 | #define MPLS_STACK_BOTTOM htonl(0x00000100) |
| @@ -190,6 +190,7 @@ | |||
| 190 | #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ | 190 | #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ |
| 191 | #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ | 191 | #define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ |
| 192 | #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ | 192 | #define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ |
| 193 | #define F_NODE (1<<15) /* Node memory alloc*/ | ||
| 193 | 194 | ||
| 194 | /* Thread control flag bits */ | 195 | /* Thread control flag bits */ |
| 195 | #define T_STOP (1<<0) /* Stop run */ | 196 | #define T_STOP (1<<0) /* Stop run */ |
| @@ -372,6 +373,7 @@ struct pktgen_dev { | |||
| 372 | 373 | ||
| 373 | u16 queue_map_min; | 374 | u16 queue_map_min; |
| 374 | u16 queue_map_max; | 375 | u16 queue_map_max; |
| 376 | int node; /* Memory node */ | ||
| 375 | 377 | ||
| 376 | #ifdef CONFIG_XFRM | 378 | #ifdef CONFIG_XFRM |
| 377 | __u8 ipsmode; /* IPSEC mode (config) */ | 379 | __u8 ipsmode; /* IPSEC mode (config) */ |
| @@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
| 607 | if (pkt_dev->traffic_class) | 609 | if (pkt_dev->traffic_class) |
| 608 | seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); | 610 | seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); |
| 609 | 611 | ||
| 612 | if (pkt_dev->node >= 0) | ||
| 613 | seq_printf(seq, " node: %d\n", pkt_dev->node); | ||
| 614 | |||
| 610 | seq_printf(seq, " Flags: "); | 615 | seq_printf(seq, " Flags: "); |
| 611 | 616 | ||
| 612 | if (pkt_dev->flags & F_IPV6) | 617 | if (pkt_dev->flags & F_IPV6) |
| @@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v) | |||
| 660 | if (pkt_dev->flags & F_SVID_RND) | 665 | if (pkt_dev->flags & F_SVID_RND) |
| 661 | seq_printf(seq, "SVID_RND "); | 666 | seq_printf(seq, "SVID_RND "); |
| 662 | 667 | ||
| 668 | if (pkt_dev->flags & F_NODE) | ||
| 669 | seq_printf(seq, "NODE_ALLOC "); | ||
| 670 | |||
| 663 | seq_puts(seq, "\n"); | 671 | seq_puts(seq, "\n"); |
| 664 | 672 | ||
| 665 | /* not really stopped, more like last-running-at */ | 673 | /* not really stopped, more like last-running-at */ |
| @@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file, | |||
| 1074 | pkt_dev->dst_mac_count); | 1082 | pkt_dev->dst_mac_count); |
| 1075 | return count; | 1083 | return count; |
| 1076 | } | 1084 | } |
| 1085 | if (!strcmp(name, "node")) { | ||
| 1086 | len = num_arg(&user_buffer[i], 10, &value); | ||
| 1087 | if (len < 0) | ||
| 1088 | return len; | ||
| 1089 | |||
| 1090 | i += len; | ||
| 1091 | |||
| 1092 | if (node_possible(value)) { | ||
| 1093 | pkt_dev->node = value; | ||
| 1094 | sprintf(pg_result, "OK: node=%d", pkt_dev->node); | ||
| 1095 | } | ||
| 1096 | else | ||
| 1097 | sprintf(pg_result, "ERROR: node not possible"); | ||
| 1098 | return count; | ||
| 1099 | } | ||
| 1077 | if (!strcmp(name, "flag")) { | 1100 | if (!strcmp(name, "flag")) { |
| 1078 | char f[32]; | 1101 | char f[32]; |
| 1079 | memset(f, 0, 32); | 1102 | memset(f, 0, 32); |
| @@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file, | |||
| 1166 | else if (strcmp(f, "!IPV6") == 0) | 1189 | else if (strcmp(f, "!IPV6") == 0) |
| 1167 | pkt_dev->flags &= ~F_IPV6; | 1190 | pkt_dev->flags &= ~F_IPV6; |
| 1168 | 1191 | ||
| 1192 | else if (strcmp(f, "NODE_ALLOC") == 0) | ||
| 1193 | pkt_dev->flags |= F_NODE; | ||
| 1194 | |||
| 1195 | else if (strcmp(f, "!NODE_ALLOC") == 0) | ||
| 1196 | pkt_dev->flags &= ~F_NODE; | ||
| 1197 | |||
| 1169 | else { | 1198 | else { |
| 1170 | sprintf(pg_result, | 1199 | sprintf(pg_result, |
| 1171 | "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", | 1200 | "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", |
| 1172 | f, | 1201 | f, |
| 1173 | "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " | 1202 | "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " |
| 1174 | "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); | 1203 | "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); |
| 1175 | return count; | 1204 | return count; |
| 1176 | } | 1205 | } |
| 1177 | sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); | 1206 | sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); |
| @@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, | |||
| 2572 | mod_cur_headers(pkt_dev); | 2601 | mod_cur_headers(pkt_dev); |
| 2573 | 2602 | ||
| 2574 | datalen = (odev->hard_header_len + 16) & ~0xf; | 2603 | datalen = (odev->hard_header_len + 16) & ~0xf; |
| 2575 | skb = __netdev_alloc_skb(odev, | 2604 | |
| 2576 | pkt_dev->cur_pkt_size + 64 | 2605 | if (pkt_dev->flags & F_NODE) { |
| 2577 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); | 2606 | int node; |
| 2607 | |||
| 2608 | if (pkt_dev->node >= 0) | ||
| 2609 | node = pkt_dev->node; | ||
| 2610 | else | ||
| 2611 | node = numa_node_id(); | ||
| 2612 | |||
| 2613 | skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 | ||
| 2614 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); | ||
| 2615 | if (likely(skb)) { | ||
| 2616 | skb_reserve(skb, NET_SKB_PAD); | ||
| 2617 | skb->dev = odev; | ||
| 2618 | } | ||
| 2619 | } | ||
| 2620 | else | ||
| 2621 | skb = __netdev_alloc_skb(odev, | ||
| 2622 | pkt_dev->cur_pkt_size + 64 | ||
| 2623 | + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); | ||
| 2624 | |||
| 2578 | if (!skb) { | 2625 | if (!skb) { |
| 2579 | sprintf(pkt_dev->result, "No memory"); | 2626 | sprintf(pkt_dev->result, "No memory"); |
| 2580 | return NULL; | 2627 | return NULL; |
| @@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) | |||
| 3674 | pkt_dev->svlan_p = 0; | 3721 | pkt_dev->svlan_p = 0; |
| 3675 | pkt_dev->svlan_cfi = 0; | 3722 | pkt_dev->svlan_cfi = 0; |
| 3676 | pkt_dev->svlan_id = 0xffff; | 3723 | pkt_dev->svlan_id = 0xffff; |
| 3724 | pkt_dev->node = -1; | ||
| 3677 | 3725 | ||
| 3678 | err = pktgen_setup_dev(pkt_dev, ifname); | 3726 | err = pktgen_setup_dev(pkt_dev, ifname); |
| 3679 | if (err) | 3727 | if (err) |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 31e85d327aa2..7ab86f3a1ea4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
| @@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void) | |||
| 98 | EXPORT_SYMBOL(lockdep_rtnl_is_held); | 98 | EXPORT_SYMBOL(lockdep_rtnl_is_held); |
| 99 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ | 99 | #endif /* #ifdef CONFIG_PROVE_LOCKING */ |
| 100 | 100 | ||
| 101 | static struct rtnl_link *rtnl_msg_handlers[NPROTO]; | 101 | static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; |
| 102 | 102 | ||
| 103 | static inline int rtm_msgindex(int msgtype) | 103 | static inline int rtm_msgindex(int msgtype) |
| 104 | { | 104 | { |
| @@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) | |||
| 118 | { | 118 | { |
| 119 | struct rtnl_link *tab; | 119 | struct rtnl_link *tab; |
| 120 | 120 | ||
| 121 | tab = rtnl_msg_handlers[protocol]; | 121 | if (protocol <= RTNL_FAMILY_MAX) |
| 122 | tab = rtnl_msg_handlers[protocol]; | ||
| 123 | else | ||
| 124 | tab = NULL; | ||
| 125 | |||
| 122 | if (tab == NULL || tab[msgindex].doit == NULL) | 126 | if (tab == NULL || tab[msgindex].doit == NULL) |
| 123 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 127 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
| 124 | 128 | ||
| @@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) | |||
| 129 | { | 133 | { |
| 130 | struct rtnl_link *tab; | 134 | struct rtnl_link *tab; |
| 131 | 135 | ||
| 132 | tab = rtnl_msg_handlers[protocol]; | 136 | if (protocol <= RTNL_FAMILY_MAX) |
| 137 | tab = rtnl_msg_handlers[protocol]; | ||
| 138 | else | ||
| 139 | tab = NULL; | ||
| 140 | |||
| 133 | if (tab == NULL || tab[msgindex].dumpit == NULL) | 141 | if (tab == NULL || tab[msgindex].dumpit == NULL) |
| 134 | tab = rtnl_msg_handlers[PF_UNSPEC]; | 142 | tab = rtnl_msg_handlers[PF_UNSPEC]; |
| 135 | 143 | ||
| @@ -159,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype, | |||
| 159 | struct rtnl_link *tab; | 167 | struct rtnl_link *tab; |
| 160 | int msgindex; | 168 | int msgindex; |
| 161 | 169 | ||
| 162 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 170 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
| 163 | msgindex = rtm_msgindex(msgtype); | 171 | msgindex = rtm_msgindex(msgtype); |
| 164 | 172 | ||
| 165 | tab = rtnl_msg_handlers[protocol]; | 173 | tab = rtnl_msg_handlers[protocol]; |
| @@ -211,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype) | |||
| 211 | { | 219 | { |
| 212 | int msgindex; | 220 | int msgindex; |
| 213 | 221 | ||
| 214 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 222 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
| 215 | msgindex = rtm_msgindex(msgtype); | 223 | msgindex = rtm_msgindex(msgtype); |
| 216 | 224 | ||
| 217 | if (rtnl_msg_handlers[protocol] == NULL) | 225 | if (rtnl_msg_handlers[protocol] == NULL) |
| @@ -233,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); | |||
| 233 | */ | 241 | */ |
| 234 | void rtnl_unregister_all(int protocol) | 242 | void rtnl_unregister_all(int protocol) |
| 235 | { | 243 | { |
| 236 | BUG_ON(protocol < 0 || protocol >= NPROTO); | 244 | BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); |
| 237 | 245 | ||
| 238 | kfree(rtnl_msg_handlers[protocol]); | 246 | kfree(rtnl_msg_handlers[protocol]); |
| 239 | rtnl_msg_handlers[protocol] = NULL; | 247 | rtnl_msg_handlers[protocol] = NULL; |
| @@ -600,7 +608,41 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, | |||
| 600 | 608 | ||
| 601 | a->rx_compressed = b->rx_compressed; | 609 | a->rx_compressed = b->rx_compressed; |
| 602 | a->tx_compressed = b->tx_compressed; | 610 | a->tx_compressed = b->tx_compressed; |
| 603 | }; | 611 | } |
| 612 | |||
| 613 | static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b) | ||
| 614 | { | ||
| 615 | struct rtnl_link_stats64 a; | ||
| 616 | |||
| 617 | a.rx_packets = b->rx_packets; | ||
| 618 | a.tx_packets = b->tx_packets; | ||
| 619 | a.rx_bytes = b->rx_bytes; | ||
| 620 | a.tx_bytes = b->tx_bytes; | ||
| 621 | a.rx_errors = b->rx_errors; | ||
| 622 | a.tx_errors = b->tx_errors; | ||
| 623 | a.rx_dropped = b->rx_dropped; | ||
| 624 | a.tx_dropped = b->tx_dropped; | ||
| 625 | |||
| 626 | a.multicast = b->multicast; | ||
| 627 | a.collisions = b->collisions; | ||
| 628 | |||
| 629 | a.rx_length_errors = b->rx_length_errors; | ||
| 630 | a.rx_over_errors = b->rx_over_errors; | ||
| 631 | a.rx_crc_errors = b->rx_crc_errors; | ||
| 632 | a.rx_frame_errors = b->rx_frame_errors; | ||
| 633 | a.rx_fifo_errors = b->rx_fifo_errors; | ||
| 634 | a.rx_missed_errors = b->rx_missed_errors; | ||
| 635 | |||
| 636 | a.tx_aborted_errors = b->tx_aborted_errors; | ||
| 637 | a.tx_carrier_errors = b->tx_carrier_errors; | ||
| 638 | a.tx_fifo_errors = b->tx_fifo_errors; | ||
| 639 | a.tx_heartbeat_errors = b->tx_heartbeat_errors; | ||
| 640 | a.tx_window_errors = b->tx_window_errors; | ||
| 641 | |||
| 642 | a.rx_compressed = b->rx_compressed; | ||
| 643 | a.tx_compressed = b->tx_compressed; | ||
| 644 | memcpy(v, &a, sizeof(a)); | ||
| 645 | } | ||
| 604 | 646 | ||
| 605 | /* All VF info */ | 647 | /* All VF info */ |
| 606 | static inline int rtnl_vfinfo_size(const struct net_device *dev) | 648 | static inline int rtnl_vfinfo_size(const struct net_device *dev) |
| @@ -618,6 +660,31 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) | |||
| 618 | return 0; | 660 | return 0; |
| 619 | } | 661 | } |
| 620 | 662 | ||
| 663 | static size_t rtnl_port_size(const struct net_device *dev) | ||
| 664 | { | ||
| 665 | size_t port_size = nla_total_size(4) /* PORT_VF */ | ||
| 666 | + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ | ||
| 667 | + nla_total_size(sizeof(struct ifla_port_vsi)) | ||
| 668 | /* PORT_VSI_TYPE */ | ||
| 669 | + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */ | ||
| 670 | + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */ | ||
| 671 | + nla_total_size(1) /* PROT_VDP_REQUEST */ | ||
| 672 | + nla_total_size(2); /* PORT_VDP_RESPONSE */ | ||
| 673 | size_t vf_ports_size = nla_total_size(sizeof(struct nlattr)); | ||
| 674 | size_t vf_port_size = nla_total_size(sizeof(struct nlattr)) | ||
| 675 | + port_size; | ||
| 676 | size_t port_self_size = nla_total_size(sizeof(struct nlattr)) | ||
| 677 | + port_size; | ||
| 678 | |||
| 679 | if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) | ||
| 680 | return 0; | ||
| 681 | if (dev_num_vf(dev->dev.parent)) | ||
| 682 | return port_self_size + vf_ports_size + | ||
| 683 | vf_port_size * dev_num_vf(dev->dev.parent); | ||
| 684 | else | ||
| 685 | return port_self_size; | ||
| 686 | } | ||
| 687 | |||
| 621 | static inline size_t if_nlmsg_size(const struct net_device *dev) | 688 | static inline size_t if_nlmsg_size(const struct net_device *dev) |
| 622 | { | 689 | { |
| 623 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) | 690 | return NLMSG_ALIGN(sizeof(struct ifinfomsg)) |
| @@ -626,6 +693,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) | |||
| 626 | + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ | 693 | + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ |
| 627 | + nla_total_size(sizeof(struct rtnl_link_ifmap)) | 694 | + nla_total_size(sizeof(struct rtnl_link_ifmap)) |
| 628 | + nla_total_size(sizeof(struct rtnl_link_stats)) | 695 | + nla_total_size(sizeof(struct rtnl_link_stats)) |
| 696 | + nla_total_size(sizeof(struct rtnl_link_stats64)) | ||
| 629 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ | 697 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ |
| 630 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ | 698 | + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ |
| 631 | + nla_total_size(4) /* IFLA_TXQLEN */ | 699 | + nla_total_size(4) /* IFLA_TXQLEN */ |
| @@ -637,9 +705,82 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) | |||
| 637 | + nla_total_size(1) /* IFLA_LINKMODE */ | 705 | + nla_total_size(1) /* IFLA_LINKMODE */ |
| 638 | + nla_total_size(4) /* IFLA_NUM_VF */ | 706 | + nla_total_size(4) /* IFLA_NUM_VF */ |
| 639 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ | 707 | + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ |
| 708 | + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ | ||
| 640 | + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ | 709 | + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ |
| 641 | } | 710 | } |
| 642 | 711 | ||
| 712 | static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) | ||
| 713 | { | ||
| 714 | struct nlattr *vf_ports; | ||
| 715 | struct nlattr *vf_port; | ||
| 716 | int vf; | ||
| 717 | int err; | ||
| 718 | |||
| 719 | vf_ports = nla_nest_start(skb, IFLA_VF_PORTS); | ||
| 720 | if (!vf_ports) | ||
| 721 | return -EMSGSIZE; | ||
| 722 | |||
| 723 | for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { | ||
| 724 | vf_port = nla_nest_start(skb, IFLA_VF_PORT); | ||
| 725 | if (!vf_port) { | ||
| 726 | nla_nest_cancel(skb, vf_ports); | ||
| 727 | return -EMSGSIZE; | ||
| 728 | } | ||
| 729 | NLA_PUT_U32(skb, IFLA_PORT_VF, vf); | ||
| 730 | err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); | ||
| 731 | if (err) { | ||
| 732 | nla_put_failure: | ||
| 733 | nla_nest_cancel(skb, vf_port); | ||
| 734 | continue; | ||
| 735 | } | ||
| 736 | nla_nest_end(skb, vf_port); | ||
| 737 | } | ||
| 738 | |||
| 739 | nla_nest_end(skb, vf_ports); | ||
| 740 | |||
| 741 | return 0; | ||
| 742 | } | ||
| 743 | |||
| 744 | static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) | ||
| 745 | { | ||
| 746 | struct nlattr *port_self; | ||
| 747 | int err; | ||
| 748 | |||
| 749 | port_self = nla_nest_start(skb, IFLA_PORT_SELF); | ||
| 750 | if (!port_self) | ||
| 751 | return -EMSGSIZE; | ||
| 752 | |||
| 753 | err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); | ||
| 754 | if (err) { | ||
| 755 | nla_nest_cancel(skb, port_self); | ||
| 756 | return err; | ||
| 757 | } | ||
| 758 | |||
| 759 | nla_nest_end(skb, port_self); | ||
| 760 | |||
| 761 | return 0; | ||
| 762 | } | ||
| 763 | |||
| 764 | static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev) | ||
| 765 | { | ||
| 766 | int err; | ||
| 767 | |||
| 768 | if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent) | ||
| 769 | return 0; | ||
| 770 | |||
| 771 | err = rtnl_port_self_fill(skb, dev); | ||
| 772 | if (err) | ||
| 773 | return err; | ||
| 774 | |||
| 775 | if (dev_num_vf(dev->dev.parent)) { | ||
| 776 | err = rtnl_vf_ports_fill(skb, dev); | ||
| 777 | if (err) | ||
| 778 | return err; | ||
| 779 | } | ||
| 780 | |||
| 781 | return 0; | ||
| 782 | } | ||
| 783 | |||
| 643 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | 784 | static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, |
| 644 | int type, u32 pid, u32 seq, u32 change, | 785 | int type, u32 pid, u32 seq, u32 change, |
| 645 | unsigned int flags) | 786 | unsigned int flags) |
| @@ -705,13 +846,21 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
| 705 | stats = dev_get_stats(dev); | 846 | stats = dev_get_stats(dev); |
| 706 | copy_rtnl_link_stats(nla_data(attr), stats); | 847 | copy_rtnl_link_stats(nla_data(attr), stats); |
| 707 | 848 | ||
| 849 | attr = nla_reserve(skb, IFLA_STATS64, | ||
| 850 | sizeof(struct rtnl_link_stats64)); | ||
| 851 | if (attr == NULL) | ||
| 852 | goto nla_put_failure; | ||
| 853 | copy_rtnl_link_stats64(nla_data(attr), stats); | ||
| 854 | |||
| 855 | if (dev->dev.parent) | ||
| 856 | NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); | ||
| 857 | |||
| 708 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { | 858 | if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { |
| 709 | int i; | 859 | int i; |
| 710 | 860 | ||
| 711 | struct nlattr *vfinfo, *vf; | 861 | struct nlattr *vfinfo, *vf; |
| 712 | int num_vfs = dev_num_vf(dev->dev.parent); | 862 | int num_vfs = dev_num_vf(dev->dev.parent); |
| 713 | 863 | ||
| 714 | NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs); | ||
| 715 | vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); | 864 | vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); |
| 716 | if (!vfinfo) | 865 | if (!vfinfo) |
| 717 | goto nla_put_failure; | 866 | goto nla_put_failure; |
| @@ -739,6 +888,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, | |||
| 739 | } | 888 | } |
| 740 | nla_nest_end(skb, vfinfo); | 889 | nla_nest_end(skb, vfinfo); |
| 741 | } | 890 | } |
| 891 | |||
| 892 | if (rtnl_port_fill(skb, dev)) | ||
| 893 | goto nla_put_failure; | ||
| 894 | |||
| 742 | if (dev->rtnl_link_ops) { | 895 | if (dev->rtnl_link_ops) { |
| 743 | if (rtnl_link_fill(skb, dev) < 0) | 896 | if (rtnl_link_fill(skb, dev) < 0) |
| 744 | goto nla_put_failure; | 897 | goto nla_put_failure; |
| @@ -800,6 +953,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { | |||
| 800 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, | 953 | [IFLA_NET_NS_PID] = { .type = NLA_U32 }, |
| 801 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, | 954 | [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, |
| 802 | [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, | 955 | [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, |
| 956 | [IFLA_VF_PORTS] = { .type = NLA_NESTED }, | ||
| 957 | [IFLA_PORT_SELF] = { .type = NLA_NESTED }, | ||
| 803 | }; | 958 | }; |
| 804 | EXPORT_SYMBOL(ifla_policy); | 959 | EXPORT_SYMBOL(ifla_policy); |
| 805 | 960 | ||
| @@ -821,6 +976,20 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { | |||
| 821 | .len = sizeof(struct ifla_vf_tx_rate) }, | 976 | .len = sizeof(struct ifla_vf_tx_rate) }, |
| 822 | }; | 977 | }; |
| 823 | 978 | ||
| 979 | static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { | ||
| 980 | [IFLA_PORT_VF] = { .type = NLA_U32 }, | ||
| 981 | [IFLA_PORT_PROFILE] = { .type = NLA_STRING, | ||
| 982 | .len = PORT_PROFILE_MAX }, | ||
| 983 | [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY, | ||
| 984 | .len = sizeof(struct ifla_port_vsi)}, | ||
| 985 | [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY, | ||
| 986 | .len = PORT_UUID_MAX }, | ||
| 987 | [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING, | ||
| 988 | .len = PORT_UUID_MAX }, | ||
| 989 | [IFLA_PORT_REQUEST] = { .type = NLA_U8, }, | ||
| 990 | [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, | ||
| 991 | }; | ||
| 992 | |||
| 824 | struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) | 993 | struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) |
| 825 | { | 994 | { |
| 826 | struct net *net; | 995 | struct net *net; |
| @@ -1030,8 +1199,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
| 1030 | struct nlattr *attr; | 1199 | struct nlattr *attr; |
| 1031 | int rem; | 1200 | int rem; |
| 1032 | nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { | 1201 | nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { |
| 1033 | if (nla_type(attr) != IFLA_VF_INFO) | 1202 | if (nla_type(attr) != IFLA_VF_INFO) { |
| 1203 | err = -EINVAL; | ||
| 1034 | goto errout; | 1204 | goto errout; |
| 1205 | } | ||
| 1035 | err = do_setvfinfo(dev, attr); | 1206 | err = do_setvfinfo(dev, attr); |
| 1036 | if (err < 0) | 1207 | if (err < 0) |
| 1037 | goto errout; | 1208 | goto errout; |
| @@ -1040,6 +1211,53 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
| 1040 | } | 1211 | } |
| 1041 | err = 0; | 1212 | err = 0; |
| 1042 | 1213 | ||
| 1214 | if (tb[IFLA_VF_PORTS]) { | ||
| 1215 | struct nlattr *port[IFLA_PORT_MAX+1]; | ||
| 1216 | struct nlattr *attr; | ||
| 1217 | int vf; | ||
| 1218 | int rem; | ||
| 1219 | |||
| 1220 | err = -EOPNOTSUPP; | ||
| 1221 | if (!ops->ndo_set_vf_port) | ||
| 1222 | goto errout; | ||
| 1223 | |||
| 1224 | nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { | ||
| 1225 | if (nla_type(attr) != IFLA_VF_PORT) | ||
| 1226 | continue; | ||
| 1227 | err = nla_parse_nested(port, IFLA_PORT_MAX, | ||
| 1228 | attr, ifla_port_policy); | ||
| 1229 | if (err < 0) | ||
| 1230 | goto errout; | ||
| 1231 | if (!port[IFLA_PORT_VF]) { | ||
| 1232 | err = -EOPNOTSUPP; | ||
| 1233 | goto errout; | ||
| 1234 | } | ||
| 1235 | vf = nla_get_u32(port[IFLA_PORT_VF]); | ||
| 1236 | err = ops->ndo_set_vf_port(dev, vf, port); | ||
| 1237 | if (err < 0) | ||
| 1238 | goto errout; | ||
| 1239 | modified = 1; | ||
| 1240 | } | ||
| 1241 | } | ||
| 1242 | err = 0; | ||
| 1243 | |||
| 1244 | if (tb[IFLA_PORT_SELF]) { | ||
| 1245 | struct nlattr *port[IFLA_PORT_MAX+1]; | ||
| 1246 | |||
| 1247 | err = nla_parse_nested(port, IFLA_PORT_MAX, | ||
| 1248 | tb[IFLA_PORT_SELF], ifla_port_policy); | ||
| 1249 | if (err < 0) | ||
| 1250 | goto errout; | ||
| 1251 | |||
| 1252 | err = -EOPNOTSUPP; | ||
| 1253 | if (ops->ndo_set_vf_port) | ||
| 1254 | err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port); | ||
| 1255 | if (err < 0) | ||
| 1256 | goto errout; | ||
| 1257 | modified = 1; | ||
| 1258 | } | ||
| 1259 | err = 0; | ||
| 1260 | |||
| 1043 | errout: | 1261 | errout: |
| 1044 | if (err < 0 && modified && net_ratelimit()) | 1262 | if (err < 0 && modified && net_ratelimit()) |
| 1045 | printk(KERN_WARNING "A link change request failed with " | 1263 | printk(KERN_WARNING "A link change request failed with " |
| @@ -1397,7 +1615,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) | |||
| 1397 | 1615 | ||
| 1398 | if (s_idx == 0) | 1616 | if (s_idx == 0) |
| 1399 | s_idx = 1; | 1617 | s_idx = 1; |
| 1400 | for (idx = 1; idx < NPROTO; idx++) { | 1618 | for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { |
| 1401 | int type = cb->nlh->nlmsg_type-RTM_BASE; | 1619 | int type = cb->nlh->nlmsg_type-RTM_BASE; |
| 1402 | if (idx < s_idx || idx == PF_PACKET) | 1620 | if (idx < s_idx || idx == PF_PACKET) |
| 1403 | continue; | 1621 | continue; |
| @@ -1465,9 +1683,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 1465 | return 0; | 1683 | return 0; |
| 1466 | 1684 | ||
| 1467 | family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; | 1685 | family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; |
| 1468 | if (family >= NPROTO) | ||
| 1469 | return -EAFNOSUPPORT; | ||
| 1470 | |||
| 1471 | sz_idx = type>>2; | 1686 | sz_idx = type>>2; |
| 1472 | kind = type&3; | 1687 | kind = type&3; |
| 1473 | 1688 | ||
| @@ -1535,6 +1750,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi | |||
| 1535 | case NETDEV_POST_INIT: | 1750 | case NETDEV_POST_INIT: |
| 1536 | case NETDEV_REGISTER: | 1751 | case NETDEV_REGISTER: |
| 1537 | case NETDEV_CHANGE: | 1752 | case NETDEV_CHANGE: |
| 1753 | case NETDEV_PRE_TYPE_CHANGE: | ||
| 1538 | case NETDEV_GOING_DOWN: | 1754 | case NETDEV_GOING_DOWN: |
| 1539 | case NETDEV_UNREGISTER: | 1755 | case NETDEV_UNREGISTER: |
| 1540 | case NETDEV_UNREGISTER_BATCH: | 1756 | case NETDEV_UNREGISTER_BATCH: |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e060c91e..f8abf68e3988 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = { | |||
| 117 | * | 117 | * |
| 118 | * Out of line support code for skb_put(). Not user callable. | 118 | * Out of line support code for skb_put(). Not user callable. |
| 119 | */ | 119 | */ |
| 120 | void skb_over_panic(struct sk_buff *skb, int sz, void *here) | 120 | static void skb_over_panic(struct sk_buff *skb, int sz, void *here) |
| 121 | { | 121 | { |
| 122 | printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " | 122 | printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " |
| 123 | "data:%p tail:%#lx end:%#lx dev:%s\n", | 123 | "data:%p tail:%#lx end:%#lx dev:%s\n", |
| @@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) | |||
| 126 | skb->dev ? skb->dev->name : "<NULL>"); | 126 | skb->dev ? skb->dev->name : "<NULL>"); |
| 127 | BUG(); | 127 | BUG(); |
| 128 | } | 128 | } |
| 129 | EXPORT_SYMBOL(skb_over_panic); | ||
| 130 | 129 | ||
| 131 | /** | 130 | /** |
| 132 | * skb_under_panic - private function | 131 | * skb_under_panic - private function |
| @@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic); | |||
| 137 | * Out of line support code for skb_push(). Not user callable. | 136 | * Out of line support code for skb_push(). Not user callable. |
| 138 | */ | 137 | */ |
| 139 | 138 | ||
| 140 | void skb_under_panic(struct sk_buff *skb, int sz, void *here) | 139 | static void skb_under_panic(struct sk_buff *skb, int sz, void *here) |
| 141 | { | 140 | { |
| 142 | printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " | 141 | printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " |
| 143 | "data:%p tail:%#lx end:%#lx dev:%s\n", | 142 | "data:%p tail:%#lx end:%#lx dev:%s\n", |
| @@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 146 | skb->dev ? skb->dev->name : "<NULL>"); | 145 | skb->dev ? skb->dev->name : "<NULL>"); |
| 147 | BUG(); | 146 | BUG(); |
| 148 | } | 147 | } |
| 149 | EXPORT_SYMBOL(skb_under_panic); | ||
| 150 | 148 | ||
| 151 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 149 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
| 152 | * 'private' fields and also do memory statistics to find all the | 150 | * 'private' fields and also do memory statistics to find all the |
| @@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 183 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 181 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
| 184 | if (!skb) | 182 | if (!skb) |
| 185 | goto out; | 183 | goto out; |
| 184 | prefetchw(skb); | ||
| 186 | 185 | ||
| 187 | size = SKB_DATA_ALIGN(size); | 186 | size = SKB_DATA_ALIGN(size); |
| 188 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), | 187 | data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), |
| 189 | gfp_mask, node); | 188 | gfp_mask, node); |
| 190 | if (!data) | 189 | if (!data) |
| 191 | goto nodata; | 190 | goto nodata; |
| 191 | prefetchw(data + size); | ||
| 192 | 192 | ||
| 193 | /* | 193 | /* |
| 194 | * Only clear those fields we need to clear, not those that we will | 194 | * Only clear those fields we need to clear, not those that we will |
| @@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 210 | 210 | ||
| 211 | /* make sure we initialize shinfo sequentially */ | 211 | /* make sure we initialize shinfo sequentially */ |
| 212 | shinfo = skb_shinfo(skb); | 212 | shinfo = skb_shinfo(skb); |
| 213 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
| 213 | atomic_set(&shinfo->dataref, 1); | 214 | atomic_set(&shinfo->dataref, 1); |
| 214 | shinfo->nr_frags = 0; | ||
| 215 | shinfo->gso_size = 0; | ||
| 216 | shinfo->gso_segs = 0; | ||
| 217 | shinfo->gso_type = 0; | ||
| 218 | shinfo->ip6_frag_id = 0; | ||
| 219 | shinfo->tx_flags.flags = 0; | ||
| 220 | skb_frag_list_init(skb); | ||
| 221 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
| 222 | 215 | ||
| 223 | if (fclone) { | 216 | if (fclone) { |
| 224 | struct sk_buff *child = skb + 1; | 217 | struct sk_buff *child = skb + 1; |
| @@ -507,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size) | |||
| 507 | return 0; | 500 | return 0; |
| 508 | 501 | ||
| 509 | skb_release_head_state(skb); | 502 | skb_release_head_state(skb); |
| 503 | |||
| 510 | shinfo = skb_shinfo(skb); | 504 | shinfo = skb_shinfo(skb); |
| 505 | memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); | ||
| 511 | atomic_set(&shinfo->dataref, 1); | 506 | atomic_set(&shinfo->dataref, 1); |
| 512 | shinfo->nr_frags = 0; | ||
| 513 | shinfo->gso_size = 0; | ||
| 514 | shinfo->gso_segs = 0; | ||
| 515 | shinfo->gso_type = 0; | ||
| 516 | shinfo->ip6_frag_id = 0; | ||
| 517 | shinfo->tx_flags.flags = 0; | ||
| 518 | skb_frag_list_init(skb); | ||
| 519 | memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); | ||
| 520 | 507 | ||
| 521 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 508 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
| 522 | skb->data = skb->head + NET_SKB_PAD; | 509 | skb->data = skb->head + NET_SKB_PAD; |
| @@ -533,7 +520,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 533 | new->transport_header = old->transport_header; | 520 | new->transport_header = old->transport_header; |
| 534 | new->network_header = old->network_header; | 521 | new->network_header = old->network_header; |
| 535 | new->mac_header = old->mac_header; | 522 | new->mac_header = old->mac_header; |
| 536 | skb_dst_set(new, dst_clone(skb_dst(old))); | 523 | skb_dst_copy(new, old); |
| 524 | new->rxhash = old->rxhash; | ||
| 537 | #ifdef CONFIG_XFRM | 525 | #ifdef CONFIG_XFRM |
| 538 | new->sp = secpath_get(old->sp); | 526 | new->sp = secpath_get(old->sp); |
| 539 | #endif | 527 | #endif |
| @@ -581,6 +569,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) | |||
| 581 | C(len); | 569 | C(len); |
| 582 | C(data_len); | 570 | C(data_len); |
| 583 | C(mac_len); | 571 | C(mac_len); |
| 572 | C(rxhash); | ||
| 584 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; | 573 | n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; |
| 585 | n->cloned = 1; | 574 | n->cloned = 1; |
| 586 | n->nohdr = 0; | 575 | n->nohdr = 0; |
| @@ -1051,7 +1040,7 @@ EXPORT_SYMBOL(skb_push); | |||
| 1051 | */ | 1040 | */ |
| 1052 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) | 1041 | unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) |
| 1053 | { | 1042 | { |
| 1054 | return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); | 1043 | return skb_pull_inline(skb, len); |
| 1055 | } | 1044 | } |
| 1056 | EXPORT_SYMBOL(skb_pull); | 1045 | EXPORT_SYMBOL(skb_pull); |
| 1057 | 1046 | ||
| @@ -1417,12 +1406,13 @@ new_page: | |||
| 1417 | /* | 1406 | /* |
| 1418 | * Fill page/offset/length into spd, if it can hold more pages. | 1407 | * Fill page/offset/length into spd, if it can hold more pages. |
| 1419 | */ | 1408 | */ |
| 1420 | static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, | 1409 | static inline int spd_fill_page(struct splice_pipe_desc *spd, |
| 1410 | struct pipe_inode_info *pipe, struct page *page, | ||
| 1421 | unsigned int *len, unsigned int offset, | 1411 | unsigned int *len, unsigned int offset, |
| 1422 | struct sk_buff *skb, int linear, | 1412 | struct sk_buff *skb, int linear, |
| 1423 | struct sock *sk) | 1413 | struct sock *sk) |
| 1424 | { | 1414 | { |
| 1425 | if (unlikely(spd->nr_pages == PIPE_BUFFERS)) | 1415 | if (unlikely(spd->nr_pages == pipe->buffers)) |
| 1426 | return 1; | 1416 | return 1; |
| 1427 | 1417 | ||
| 1428 | if (linear) { | 1418 | if (linear) { |
| @@ -1458,7 +1448,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
| 1458 | unsigned int plen, unsigned int *off, | 1448 | unsigned int plen, unsigned int *off, |
| 1459 | unsigned int *len, struct sk_buff *skb, | 1449 | unsigned int *len, struct sk_buff *skb, |
| 1460 | struct splice_pipe_desc *spd, int linear, | 1450 | struct splice_pipe_desc *spd, int linear, |
| 1461 | struct sock *sk) | 1451 | struct sock *sk, |
| 1452 | struct pipe_inode_info *pipe) | ||
| 1462 | { | 1453 | { |
| 1463 | if (!*len) | 1454 | if (!*len) |
| 1464 | return 1; | 1455 | return 1; |
| @@ -1481,7 +1472,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
| 1481 | /* the linear region may spread across several pages */ | 1472 | /* the linear region may spread across several pages */ |
| 1482 | flen = min_t(unsigned int, flen, PAGE_SIZE - poff); | 1473 | flen = min_t(unsigned int, flen, PAGE_SIZE - poff); |
| 1483 | 1474 | ||
| 1484 | if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) | 1475 | if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) |
| 1485 | return 1; | 1476 | return 1; |
| 1486 | 1477 | ||
| 1487 | __segment_seek(&page, &poff, &plen, flen); | 1478 | __segment_seek(&page, &poff, &plen, flen); |
| @@ -1496,9 +1487,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff, | |||
| 1496 | * Map linear and fragment data from the skb to spd. It reports failure if the | 1487 | * Map linear and fragment data from the skb to spd. It reports failure if the |
| 1497 | * pipe is full or if we already spliced the requested length. | 1488 | * pipe is full or if we already spliced the requested length. |
| 1498 | */ | 1489 | */ |
| 1499 | static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | 1490 | static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, |
| 1500 | unsigned int *len, struct splice_pipe_desc *spd, | 1491 | unsigned int *offset, unsigned int *len, |
| 1501 | struct sock *sk) | 1492 | struct splice_pipe_desc *spd, struct sock *sk) |
| 1502 | { | 1493 | { |
| 1503 | int seg; | 1494 | int seg; |
| 1504 | 1495 | ||
| @@ -1508,7 +1499,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | |||
| 1508 | if (__splice_segment(virt_to_page(skb->data), | 1499 | if (__splice_segment(virt_to_page(skb->data), |
| 1509 | (unsigned long) skb->data & (PAGE_SIZE - 1), | 1500 | (unsigned long) skb->data & (PAGE_SIZE - 1), |
| 1510 | skb_headlen(skb), | 1501 | skb_headlen(skb), |
| 1511 | offset, len, skb, spd, 1, sk)) | 1502 | offset, len, skb, spd, 1, sk, pipe)) |
| 1512 | return 1; | 1503 | return 1; |
| 1513 | 1504 | ||
| 1514 | /* | 1505 | /* |
| @@ -1518,7 +1509,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, | |||
| 1518 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; | 1509 | const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; |
| 1519 | 1510 | ||
| 1520 | if (__splice_segment(f->page, f->page_offset, f->size, | 1511 | if (__splice_segment(f->page, f->page_offset, f->size, |
| 1521 | offset, len, skb, spd, 0, sk)) | 1512 | offset, len, skb, spd, 0, sk, pipe)) |
| 1522 | return 1; | 1513 | return 1; |
| 1523 | } | 1514 | } |
| 1524 | 1515 | ||
| @@ -1535,8 +1526,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
| 1535 | struct pipe_inode_info *pipe, unsigned int tlen, | 1526 | struct pipe_inode_info *pipe, unsigned int tlen, |
| 1536 | unsigned int flags) | 1527 | unsigned int flags) |
| 1537 | { | 1528 | { |
| 1538 | struct partial_page partial[PIPE_BUFFERS]; | 1529 | struct partial_page partial[PIPE_DEF_BUFFERS]; |
| 1539 | struct page *pages[PIPE_BUFFERS]; | 1530 | struct page *pages[PIPE_DEF_BUFFERS]; |
| 1540 | struct splice_pipe_desc spd = { | 1531 | struct splice_pipe_desc spd = { |
| 1541 | .pages = pages, | 1532 | .pages = pages, |
| 1542 | .partial = partial, | 1533 | .partial = partial, |
| @@ -1546,12 +1537,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
| 1546 | }; | 1537 | }; |
| 1547 | struct sk_buff *frag_iter; | 1538 | struct sk_buff *frag_iter; |
| 1548 | struct sock *sk = skb->sk; | 1539 | struct sock *sk = skb->sk; |
| 1540 | int ret = 0; | ||
| 1541 | |||
| 1542 | if (splice_grow_spd(pipe, &spd)) | ||
| 1543 | return -ENOMEM; | ||
| 1549 | 1544 | ||
| 1550 | /* | 1545 | /* |
| 1551 | * __skb_splice_bits() only fails if the output has no room left, | 1546 | * __skb_splice_bits() only fails if the output has no room left, |
| 1552 | * so no point in going over the frag_list for the error case. | 1547 | * so no point in going over the frag_list for the error case. |
| 1553 | */ | 1548 | */ |
| 1554 | if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) | 1549 | if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) |
| 1555 | goto done; | 1550 | goto done; |
| 1556 | else if (!tlen) | 1551 | else if (!tlen) |
| 1557 | goto done; | 1552 | goto done; |
| @@ -1562,14 +1557,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, | |||
| 1562 | skb_walk_frags(skb, frag_iter) { | 1557 | skb_walk_frags(skb, frag_iter) { |
| 1563 | if (!tlen) | 1558 | if (!tlen) |
| 1564 | break; | 1559 | break; |
| 1565 | if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) | 1560 | if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) |
| 1566 | break; | 1561 | break; |
| 1567 | } | 1562 | } |
| 1568 | 1563 | ||
| 1569 | done: | 1564 | done: |
| 1570 | if (spd.nr_pages) { | 1565 | if (spd.nr_pages) { |
| 1571 | int ret; | ||
| 1572 | |||
| 1573 | /* | 1566 | /* |
| 1574 | * Drop the socket lock, otherwise we have reverse | 1567 | * Drop the socket lock, otherwise we have reverse |
| 1575 | * locking dependencies between sk_lock and i_mutex | 1568 | * locking dependencies between sk_lock and i_mutex |
| @@ -1582,10 +1575,10 @@ done: | |||
| 1582 | release_sock(sk); | 1575 | release_sock(sk); |
| 1583 | ret = splice_to_pipe(pipe, &spd); | 1576 | ret = splice_to_pipe(pipe, &spd); |
| 1584 | lock_sock(sk); | 1577 | lock_sock(sk); |
| 1585 | return ret; | ||
| 1586 | } | 1578 | } |
| 1587 | 1579 | ||
| 1588 | return 0; | 1580 | splice_shrink_spd(pipe, &spd); |
| 1581 | return ret; | ||
| 1589 | } | 1582 | } |
| 1590 | 1583 | ||
| 1591 | /** | 1584 | /** |
| @@ -2729,6 +2722,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
| 2729 | *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); | 2722 | *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); |
| 2730 | skb_shinfo(nskb)->frag_list = p; | 2723 | skb_shinfo(nskb)->frag_list = p; |
| 2731 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; | 2724 | skb_shinfo(nskb)->gso_size = pinfo->gso_size; |
| 2725 | pinfo->gso_size = 0; | ||
| 2732 | skb_header_release(p); | 2726 | skb_header_release(p); |
| 2733 | nskb->prev = p; | 2727 | nskb->prev = p; |
| 2734 | 2728 | ||
diff --git a/net/core/sock.c b/net/core/sock.c index c5812bbc2cc9..37fe9b6adade 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -123,6 +123,7 @@ | |||
| 123 | #include <linux/net_tstamp.h> | 123 | #include <linux/net_tstamp.h> |
| 124 | #include <net/xfrm.h> | 124 | #include <net/xfrm.h> |
| 125 | #include <linux/ipsec.h> | 125 | #include <linux/ipsec.h> |
| 126 | #include <net/cls_cgroup.h> | ||
| 126 | 127 | ||
| 127 | #include <linux/filter.h> | 128 | #include <linux/filter.h> |
| 128 | 129 | ||
| @@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
| 217 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 218 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
| 218 | EXPORT_SYMBOL(sysctl_optmem_max); | 219 | EXPORT_SYMBOL(sysctl_optmem_max); |
| 219 | 220 | ||
| 221 | #if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) | ||
| 222 | int net_cls_subsys_id = -1; | ||
| 223 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); | ||
| 224 | #endif | ||
| 225 | |||
| 220 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 226 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) |
| 221 | { | 227 | { |
| 222 | struct timeval tv; | 228 | struct timeval tv; |
| @@ -307,6 +313,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 307 | */ | 313 | */ |
| 308 | skb_len = skb->len; | 314 | skb_len = skb->len; |
| 309 | 315 | ||
| 316 | /* we escape from rcu protected region, make sure we dont leak | ||
| 317 | * a norefcounted dst | ||
| 318 | */ | ||
| 319 | skb_dst_force(skb); | ||
| 320 | |||
| 310 | spin_lock_irqsave(&list->lock, flags); | 321 | spin_lock_irqsave(&list->lock, flags); |
| 311 | skb->dropcount = atomic_read(&sk->sk_drops); | 322 | skb->dropcount = atomic_read(&sk->sk_drops); |
| 312 | __skb_queue_tail(list, skb); | 323 | __skb_queue_tail(list, skb); |
| @@ -327,6 +338,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) | |||
| 327 | 338 | ||
| 328 | skb->dev = NULL; | 339 | skb->dev = NULL; |
| 329 | 340 | ||
| 341 | if (sk_rcvqueues_full(sk, skb)) { | ||
| 342 | atomic_inc(&sk->sk_drops); | ||
| 343 | goto discard_and_relse; | ||
| 344 | } | ||
| 330 | if (nested) | 345 | if (nested) |
| 331 | bh_lock_sock_nested(sk); | 346 | bh_lock_sock_nested(sk); |
| 332 | else | 347 | else |
| @@ -364,11 +379,11 @@ EXPORT_SYMBOL(sk_reset_txq); | |||
| 364 | 379 | ||
| 365 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) | 380 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) |
| 366 | { | 381 | { |
| 367 | struct dst_entry *dst = sk->sk_dst_cache; | 382 | struct dst_entry *dst = __sk_dst_get(sk); |
| 368 | 383 | ||
| 369 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 384 | if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { |
| 370 | sk_tx_queue_clear(sk); | 385 | sk_tx_queue_clear(sk); |
| 371 | sk->sk_dst_cache = NULL; | 386 | rcu_assign_pointer(sk->sk_dst_cache, NULL); |
| 372 | dst_release(dst); | 387 | dst_release(dst); |
| 373 | return NULL; | 388 | return NULL; |
| 374 | } | 389 | } |
| @@ -1041,6 +1056,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) | |||
| 1041 | module_put(owner); | 1056 | module_put(owner); |
| 1042 | } | 1057 | } |
| 1043 | 1058 | ||
| 1059 | #ifdef CONFIG_CGROUPS | ||
| 1060 | void sock_update_classid(struct sock *sk) | ||
| 1061 | { | ||
| 1062 | u32 classid = task_cls_classid(current); | ||
| 1063 | |||
| 1064 | if (classid && classid != sk->sk_classid) | ||
| 1065 | sk->sk_classid = classid; | ||
| 1066 | } | ||
| 1067 | EXPORT_SYMBOL(sock_update_classid); | ||
| 1068 | #endif | ||
| 1069 | |||
| 1044 | /** | 1070 | /** |
| 1045 | * sk_alloc - All socket objects are allocated here | 1071 | * sk_alloc - All socket objects are allocated here |
| 1046 | * @net: the applicable net namespace | 1072 | * @net: the applicable net namespace |
| @@ -1064,6 +1090,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | |||
| 1064 | sock_lock_init(sk); | 1090 | sock_lock_init(sk); |
| 1065 | sock_net_set(sk, get_net(net)); | 1091 | sock_net_set(sk, get_net(net)); |
| 1066 | atomic_set(&sk->sk_wmem_alloc, 1); | 1092 | atomic_set(&sk->sk_wmem_alloc, 1); |
| 1093 | |||
| 1094 | sock_update_classid(sk); | ||
| 1067 | } | 1095 | } |
| 1068 | 1096 | ||
| 1069 | return sk; | 1097 | return sk; |
| @@ -1157,7 +1185,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 1157 | skb_queue_head_init(&newsk->sk_async_wait_queue); | 1185 | skb_queue_head_init(&newsk->sk_async_wait_queue); |
| 1158 | #endif | 1186 | #endif |
| 1159 | 1187 | ||
| 1160 | rwlock_init(&newsk->sk_dst_lock); | 1188 | spin_lock_init(&newsk->sk_dst_lock); |
| 1161 | rwlock_init(&newsk->sk_callback_lock); | 1189 | rwlock_init(&newsk->sk_callback_lock); |
| 1162 | lockdep_set_class_and_name(&newsk->sk_callback_lock, | 1190 | lockdep_set_class_and_name(&newsk->sk_callback_lock, |
| 1163 | af_callback_keys + newsk->sk_family, | 1191 | af_callback_keys + newsk->sk_family, |
| @@ -1207,7 +1235,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | |||
| 1207 | */ | 1235 | */ |
| 1208 | sk_refcnt_debug_inc(newsk); | 1236 | sk_refcnt_debug_inc(newsk); |
| 1209 | sk_set_socket(newsk, NULL); | 1237 | sk_set_socket(newsk, NULL); |
| 1210 | newsk->sk_sleep = NULL; | 1238 | newsk->sk_wq = NULL; |
| 1211 | 1239 | ||
| 1212 | if (newsk->sk_prot->sockets_allocated) | 1240 | if (newsk->sk_prot->sockets_allocated) |
| 1213 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 1241 | percpu_counter_inc(newsk->sk_prot->sockets_allocated); |
| @@ -1227,6 +1255,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
| 1227 | sk->sk_route_caps = dst->dev->features; | 1255 | sk->sk_route_caps = dst->dev->features; |
| 1228 | if (sk->sk_route_caps & NETIF_F_GSO) | 1256 | if (sk->sk_route_caps & NETIF_F_GSO) |
| 1229 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 1257 | sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; |
| 1258 | sk->sk_route_caps &= ~sk->sk_route_nocaps; | ||
| 1230 | if (sk_can_gso(sk)) { | 1259 | if (sk_can_gso(sk)) { |
| 1231 | if (dst->header_len) { | 1260 | if (dst->header_len) { |
| 1232 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1261 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
| @@ -1395,7 +1424,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
| 1395 | if (signal_pending(current)) | 1424 | if (signal_pending(current)) |
| 1396 | break; | 1425 | break; |
| 1397 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 1426 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
| 1398 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1427 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 1399 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) | 1428 | if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) |
| 1400 | break; | 1429 | break; |
| 1401 | if (sk->sk_shutdown & SEND_SHUTDOWN) | 1430 | if (sk->sk_shutdown & SEND_SHUTDOWN) |
| @@ -1404,7 +1433,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo) | |||
| 1404 | break; | 1433 | break; |
| 1405 | timeo = schedule_timeout(timeo); | 1434 | timeo = schedule_timeout(timeo); |
| 1406 | } | 1435 | } |
| 1407 | finish_wait(sk->sk_sleep, &wait); | 1436 | finish_wait(sk_sleep(sk), &wait); |
| 1408 | return timeo; | 1437 | return timeo; |
| 1409 | } | 1438 | } |
| 1410 | 1439 | ||
| @@ -1531,6 +1560,7 @@ static void __release_sock(struct sock *sk) | |||
| 1531 | do { | 1560 | do { |
| 1532 | struct sk_buff *next = skb->next; | 1561 | struct sk_buff *next = skb->next; |
| 1533 | 1562 | ||
| 1563 | WARN_ON_ONCE(skb_dst_is_noref(skb)); | ||
| 1534 | skb->next = NULL; | 1564 | skb->next = NULL; |
| 1535 | sk_backlog_rcv(sk, skb); | 1565 | sk_backlog_rcv(sk, skb); |
| 1536 | 1566 | ||
| @@ -1570,11 +1600,11 @@ int sk_wait_data(struct sock *sk, long *timeo) | |||
| 1570 | int rc; | 1600 | int rc; |
| 1571 | DEFINE_WAIT(wait); | 1601 | DEFINE_WAIT(wait); |
| 1572 | 1602 | ||
| 1573 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 1603 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 1574 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1604 | set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
| 1575 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); | 1605 | rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); |
| 1576 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 1606 | clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); |
| 1577 | finish_wait(sk->sk_sleep, &wait); | 1607 | finish_wait(sk_sleep(sk), &wait); |
| 1578 | return rc; | 1608 | return rc; |
| 1579 | } | 1609 | } |
| 1580 | EXPORT_SYMBOL(sk_wait_data); | 1610 | EXPORT_SYMBOL(sk_wait_data); |
| @@ -1796,41 +1826,53 @@ EXPORT_SYMBOL(sock_no_sendpage); | |||
| 1796 | 1826 | ||
| 1797 | static void sock_def_wakeup(struct sock *sk) | 1827 | static void sock_def_wakeup(struct sock *sk) |
| 1798 | { | 1828 | { |
| 1799 | read_lock(&sk->sk_callback_lock); | 1829 | struct socket_wq *wq; |
| 1800 | if (sk_has_sleeper(sk)) | 1830 | |
| 1801 | wake_up_interruptible_all(sk->sk_sleep); | 1831 | rcu_read_lock(); |
| 1802 | read_unlock(&sk->sk_callback_lock); | 1832 | wq = rcu_dereference(sk->sk_wq); |
| 1833 | if (wq_has_sleeper(wq)) | ||
| 1834 | wake_up_interruptible_all(&wq->wait); | ||
| 1835 | rcu_read_unlock(); | ||
| 1803 | } | 1836 | } |
| 1804 | 1837 | ||
| 1805 | static void sock_def_error_report(struct sock *sk) | 1838 | static void sock_def_error_report(struct sock *sk) |
| 1806 | { | 1839 | { |
| 1807 | read_lock(&sk->sk_callback_lock); | 1840 | struct socket_wq *wq; |
| 1808 | if (sk_has_sleeper(sk)) | 1841 | |
| 1809 | wake_up_interruptible_poll(sk->sk_sleep, POLLERR); | 1842 | rcu_read_lock(); |
| 1843 | wq = rcu_dereference(sk->sk_wq); | ||
| 1844 | if (wq_has_sleeper(wq)) | ||
| 1845 | wake_up_interruptible_poll(&wq->wait, POLLERR); | ||
| 1810 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 1846 | sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); |
| 1811 | read_unlock(&sk->sk_callback_lock); | 1847 | rcu_read_unlock(); |
| 1812 | } | 1848 | } |
| 1813 | 1849 | ||
| 1814 | static void sock_def_readable(struct sock *sk, int len) | 1850 | static void sock_def_readable(struct sock *sk, int len) |
| 1815 | { | 1851 | { |
| 1816 | read_lock(&sk->sk_callback_lock); | 1852 | struct socket_wq *wq; |
| 1817 | if (sk_has_sleeper(sk)) | 1853 | |
| 1818 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | | 1854 | rcu_read_lock(); |
| 1855 | wq = rcu_dereference(sk->sk_wq); | ||
| 1856 | if (wq_has_sleeper(wq)) | ||
| 1857 | wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | ||
| 1819 | POLLRDNORM | POLLRDBAND); | 1858 | POLLRDNORM | POLLRDBAND); |
| 1820 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 1859 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
| 1821 | read_unlock(&sk->sk_callback_lock); | 1860 | rcu_read_unlock(); |
| 1822 | } | 1861 | } |
| 1823 | 1862 | ||
| 1824 | static void sock_def_write_space(struct sock *sk) | 1863 | static void sock_def_write_space(struct sock *sk) |
| 1825 | { | 1864 | { |
| 1826 | read_lock(&sk->sk_callback_lock); | 1865 | struct socket_wq *wq; |
| 1866 | |||
| 1867 | rcu_read_lock(); | ||
| 1827 | 1868 | ||
| 1828 | /* Do not wake up a writer until he can make "significant" | 1869 | /* Do not wake up a writer until he can make "significant" |
| 1829 | * progress. --DaveM | 1870 | * progress. --DaveM |
| 1830 | */ | 1871 | */ |
| 1831 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 1872 | if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { |
| 1832 | if (sk_has_sleeper(sk)) | 1873 | wq = rcu_dereference(sk->sk_wq); |
| 1833 | wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | | 1874 | if (wq_has_sleeper(wq)) |
| 1875 | wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | ||
| 1834 | POLLWRNORM | POLLWRBAND); | 1876 | POLLWRNORM | POLLWRBAND); |
| 1835 | 1877 | ||
| 1836 | /* Should agree with poll, otherwise some programs break */ | 1878 | /* Should agree with poll, otherwise some programs break */ |
| @@ -1838,7 +1880,7 @@ static void sock_def_write_space(struct sock *sk) | |||
| 1838 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 1880 | sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); |
| 1839 | } | 1881 | } |
| 1840 | 1882 | ||
| 1841 | read_unlock(&sk->sk_callback_lock); | 1883 | rcu_read_unlock(); |
| 1842 | } | 1884 | } |
| 1843 | 1885 | ||
| 1844 | static void sock_def_destruct(struct sock *sk) | 1886 | static void sock_def_destruct(struct sock *sk) |
| @@ -1885,7 +1927,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1885 | sk->sk_allocation = GFP_KERNEL; | 1927 | sk->sk_allocation = GFP_KERNEL; |
| 1886 | sk->sk_rcvbuf = sysctl_rmem_default; | 1928 | sk->sk_rcvbuf = sysctl_rmem_default; |
| 1887 | sk->sk_sndbuf = sysctl_wmem_default; | 1929 | sk->sk_sndbuf = sysctl_wmem_default; |
| 1888 | sk->sk_backlog.limit = sk->sk_rcvbuf << 1; | ||
| 1889 | sk->sk_state = TCP_CLOSE; | 1930 | sk->sk_state = TCP_CLOSE; |
| 1890 | sk_set_socket(sk, sock); | 1931 | sk_set_socket(sk, sock); |
| 1891 | 1932 | ||
| @@ -1893,12 +1934,12 @@ void sock_init_data(struct socket *sock, struct sock *sk) | |||
| 1893 | 1934 | ||
| 1894 | if (sock) { | 1935 | if (sock) { |
| 1895 | sk->sk_type = sock->type; | 1936 | sk->sk_type = sock->type; |
| 1896 | sk->sk_sleep = &sock->wait; | 1937 | sk->sk_wq = sock->wq; |
| 1897 | sock->sk = sk; | 1938 | sock->sk = sk; |
| 1898 | } else | 1939 | } else |
| 1899 | sk->sk_sleep = NULL; | 1940 | sk->sk_wq = NULL; |
| 1900 | 1941 | ||
| 1901 | rwlock_init(&sk->sk_dst_lock); | 1942 | spin_lock_init(&sk->sk_dst_lock); |
| 1902 | rwlock_init(&sk->sk_callback_lock); | 1943 | rwlock_init(&sk->sk_callback_lock); |
| 1903 | lockdep_set_class_and_name(&sk->sk_callback_lock, | 1944 | lockdep_set_class_and_name(&sk->sk_callback_lock, |
| 1904 | af_callback_keys + sk->sk_family, | 1945 | af_callback_keys + sk->sk_family, |
diff --git a/net/core/stream.c b/net/core/stream.c index a37debfeb1b2..cc196f42b8d8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c | |||
| @@ -28,15 +28,19 @@ | |||
| 28 | void sk_stream_write_space(struct sock *sk) | 28 | void sk_stream_write_space(struct sock *sk) |
| 29 | { | 29 | { |
| 30 | struct socket *sock = sk->sk_socket; | 30 | struct socket *sock = sk->sk_socket; |
| 31 | struct socket_wq *wq; | ||
| 31 | 32 | ||
| 32 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { | 33 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { |
| 33 | clear_bit(SOCK_NOSPACE, &sock->flags); | 34 | clear_bit(SOCK_NOSPACE, &sock->flags); |
| 34 | 35 | ||
| 35 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 36 | rcu_read_lock(); |
| 36 | wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | | 37 | wq = rcu_dereference(sk->sk_wq); |
| 38 | if (wq_has_sleeper(wq)) | ||
| 39 | wake_up_interruptible_poll(&wq->wait, POLLOUT | | ||
| 37 | POLLWRNORM | POLLWRBAND); | 40 | POLLWRNORM | POLLWRBAND); |
| 38 | if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) | 41 | if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) |
| 39 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); | 42 | sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); |
| 43 | rcu_read_unlock(); | ||
| 40 | } | 44 | } |
| 41 | } | 45 | } |
| 42 | 46 | ||
| @@ -66,13 +70,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p) | |||
| 66 | if (signal_pending(tsk)) | 70 | if (signal_pending(tsk)) |
| 67 | return sock_intr_errno(*timeo_p); | 71 | return sock_intr_errno(*timeo_p); |
| 68 | 72 | ||
| 69 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 73 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 70 | sk->sk_write_pending++; | 74 | sk->sk_write_pending++; |
| 71 | done = sk_wait_event(sk, timeo_p, | 75 | done = sk_wait_event(sk, timeo_p, |
| 72 | !sk->sk_err && | 76 | !sk->sk_err && |
| 73 | !((1 << sk->sk_state) & | 77 | !((1 << sk->sk_state) & |
| 74 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); | 78 | ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); |
| 75 | finish_wait(sk->sk_sleep, &wait); | 79 | finish_wait(sk_sleep(sk), &wait); |
| 76 | sk->sk_write_pending--; | 80 | sk->sk_write_pending--; |
| 77 | } while (!done); | 81 | } while (!done); |
| 78 | return 0; | 82 | return 0; |
| @@ -96,13 +100,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout) | |||
| 96 | DEFINE_WAIT(wait); | 100 | DEFINE_WAIT(wait); |
| 97 | 101 | ||
| 98 | do { | 102 | do { |
| 99 | prepare_to_wait(sk->sk_sleep, &wait, | 103 | prepare_to_wait(sk_sleep(sk), &wait, |
| 100 | TASK_INTERRUPTIBLE); | 104 | TASK_INTERRUPTIBLE); |
| 101 | if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) | 105 | if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) |
| 102 | break; | 106 | break; |
| 103 | } while (!signal_pending(current) && timeout); | 107 | } while (!signal_pending(current) && timeout); |
| 104 | 108 | ||
| 105 | finish_wait(sk->sk_sleep, &wait); | 109 | finish_wait(sk_sleep(sk), &wait); |
| 106 | } | 110 | } |
| 107 | } | 111 | } |
| 108 | 112 | ||
| @@ -126,7 +130,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
| 126 | while (1) { | 130 | while (1) { |
| 127 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 131 | set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
| 128 | 132 | ||
| 129 | prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); | 133 | prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); |
| 130 | 134 | ||
| 131 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 135 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
| 132 | goto do_error; | 136 | goto do_error; |
| @@ -157,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) | |||
| 157 | *timeo_p = current_timeo; | 161 | *timeo_p = current_timeo; |
| 158 | } | 162 | } |
| 159 | out: | 163 | out: |
| 160 | finish_wait(sk->sk_sleep, &wait); | 164 | finish_wait(sk_sleep(sk), &wait); |
| 161 | return err; | 165 | return err; |
| 162 | 166 | ||
| 163 | do_error: | 167 | do_error: |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b7b6b8208f75..01eee5d984be 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
| @@ -11,12 +11,72 @@ | |||
| 11 | #include <linux/socket.h> | 11 | #include <linux/socket.h> |
| 12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
| 13 | #include <linux/ratelimit.h> | 13 | #include <linux/ratelimit.h> |
| 14 | #include <linux/vmalloc.h> | ||
| 14 | #include <linux/init.h> | 15 | #include <linux/init.h> |
| 15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
| 16 | 17 | ||
| 17 | #include <net/ip.h> | 18 | #include <net/ip.h> |
| 18 | #include <net/sock.h> | 19 | #include <net/sock.h> |
| 19 | 20 | ||
| 21 | #ifdef CONFIG_RPS | ||
| 22 | static int rps_sock_flow_sysctl(ctl_table *table, int write, | ||
| 23 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 24 | { | ||
| 25 | unsigned int orig_size, size; | ||
| 26 | int ret, i; | ||
| 27 | ctl_table tmp = { | ||
| 28 | .data = &size, | ||
| 29 | .maxlen = sizeof(size), | ||
| 30 | .mode = table->mode | ||
| 31 | }; | ||
| 32 | struct rps_sock_flow_table *orig_sock_table, *sock_table; | ||
| 33 | static DEFINE_MUTEX(sock_flow_mutex); | ||
| 34 | |||
| 35 | mutex_lock(&sock_flow_mutex); | ||
| 36 | |||
| 37 | orig_sock_table = rps_sock_flow_table; | ||
| 38 | size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; | ||
| 39 | |||
| 40 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); | ||
| 41 | |||
| 42 | if (write) { | ||
| 43 | if (size) { | ||
| 44 | if (size > 1<<30) { | ||
| 45 | /* Enforce limit to prevent overflow */ | ||
| 46 | mutex_unlock(&sock_flow_mutex); | ||
| 47 | return -EINVAL; | ||
| 48 | } | ||
| 49 | size = roundup_pow_of_two(size); | ||
| 50 | if (size != orig_size) { | ||
| 51 | sock_table = | ||
| 52 | vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); | ||
| 53 | if (!sock_table) { | ||
| 54 | mutex_unlock(&sock_flow_mutex); | ||
| 55 | return -ENOMEM; | ||
| 56 | } | ||
| 57 | |||
| 58 | sock_table->mask = size - 1; | ||
| 59 | } else | ||
| 60 | sock_table = orig_sock_table; | ||
| 61 | |||
| 62 | for (i = 0; i < size; i++) | ||
| 63 | sock_table->ents[i] = RPS_NO_CPU; | ||
| 64 | } else | ||
| 65 | sock_table = NULL; | ||
| 66 | |||
| 67 | if (sock_table != orig_sock_table) { | ||
| 68 | rcu_assign_pointer(rps_sock_flow_table, sock_table); | ||
| 69 | synchronize_rcu(); | ||
| 70 | vfree(orig_sock_table); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | mutex_unlock(&sock_flow_mutex); | ||
| 75 | |||
| 76 | return ret; | ||
| 77 | } | ||
| 78 | #endif /* CONFIG_RPS */ | ||
| 79 | |||
| 20 | static struct ctl_table net_core_table[] = { | 80 | static struct ctl_table net_core_table[] = { |
| 21 | #ifdef CONFIG_NET | 81 | #ifdef CONFIG_NET |
| 22 | { | 82 | { |
| @@ -62,6 +122,13 @@ static struct ctl_table net_core_table[] = { | |||
| 62 | .proc_handler = proc_dointvec | 122 | .proc_handler = proc_dointvec |
| 63 | }, | 123 | }, |
| 64 | { | 124 | { |
| 125 | .procname = "netdev_tstamp_prequeue", | ||
| 126 | .data = &netdev_tstamp_prequeue, | ||
| 127 | .maxlen = sizeof(int), | ||
| 128 | .mode = 0644, | ||
| 129 | .proc_handler = proc_dointvec | ||
| 130 | }, | ||
| 131 | { | ||
| 65 | .procname = "message_cost", | 132 | .procname = "message_cost", |
| 66 | .data = &net_ratelimit_state.interval, | 133 | .data = &net_ratelimit_state.interval, |
| 67 | .maxlen = sizeof(int), | 134 | .maxlen = sizeof(int), |
| @@ -82,6 +149,14 @@ static struct ctl_table net_core_table[] = { | |||
| 82 | .mode = 0644, | 149 | .mode = 0644, |
| 83 | .proc_handler = proc_dointvec | 150 | .proc_handler = proc_dointvec |
| 84 | }, | 151 | }, |
| 152 | #ifdef CONFIG_RPS | ||
| 153 | { | ||
| 154 | .procname = "rps_sock_flow_entries", | ||
| 155 | .maxlen = sizeof(int), | ||
| 156 | .mode = 0644, | ||
| 157 | .proc_handler = rps_sock_flow_sysctl | ||
| 158 | }, | ||
| 159 | #endif | ||
| 85 | #endif /* CONFIG_NET */ | 160 | #endif /* CONFIG_NET */ |
| 86 | { | 161 | { |
| 87 | .procname = "netdev_budget", | 162 | .procname = "netdev_budget", |
