aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-10-19 16:05:38 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-10-19 16:05:38 -0400
commita7204d72db251784808b0c050220992d7f833a2c (patch)
tree3491452ea74b039f3278fd95bb7ad7c88b6b3631 /net
parentba6857b2d49646f2d4c245ff58d95d145f380177 (diff)
parent31d141e3a666269a3b6fcccddb0351caf7454240 (diff)
Merge 3.12-rc6 into driver-core-next
We want these fixes here too. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/l2cap_core.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c35
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c10
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c10
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/sit.c86
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/sched/sch_fq.c102
-rw-r--r--net/sysctl_net.c4
37 files changed, 516 insertions, 356 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
24static unsigned int mrp_join_time __read_mostly = 200; 24static unsigned int mrp_join_time __read_mostly = 200;
25module_param(mrp_join_time, uint, 0644); 25module_param(mrp_join_time, uint, 0644);
26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); 26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
27
28static unsigned int mrp_periodic_time __read_mostly = 1000;
29module_param(mrp_periodic_time, uint, 0644);
30MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
31
27MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
28 33
29static const u8 34static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
595 mrp_join_timer_arm(app); 600 mrp_join_timer_arm(app);
596} 601}
597 602
603static void mrp_periodic_timer_arm(struct mrp_applicant *app)
604{
605 mod_timer(&app->periodic_timer,
606 jiffies + msecs_to_jiffies(mrp_periodic_time));
607}
608
609static void mrp_periodic_timer(unsigned long data)
610{
611 struct mrp_applicant *app = (struct mrp_applicant *)data;
612
613 spin_lock(&app->lock);
614 mrp_mad_event(app, MRP_EVENT_PERIODIC);
615 mrp_pdu_queue(app);
616 spin_unlock(&app->lock);
617
618 mrp_periodic_timer_arm(app);
619}
620
598static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) 621static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
599{ 622{
600 __be16 endmark; 623 __be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
845 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); 868 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
846 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); 869 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
847 mrp_join_timer_arm(app); 870 mrp_join_timer_arm(app);
871 setup_timer(&app->periodic_timer, mrp_periodic_timer,
872 (unsigned long)app);
873 mrp_periodic_timer_arm(app);
848 return 0; 874 return 0;
849 875
850err3: 876err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
870 * all pending messages before the applicant is gone. 896 * all pending messages before the applicant is gone.
871 */ 897 */
872 del_timer_sync(&app->join_timer); 898 del_timer_sync(&app->join_timer);
899 del_timer_sync(&app->periodic_timer);
873 900
874 spin_lock_bh(&app->lock); 901 spin_lock_bh(&app->lock);
875 mrp_mad_event(app, MRP_EVENT_TX); 902 mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 634debab4d54..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev)
1146 goto done; 1146 goto done;
1147 } 1147 }
1148 1148
1149 if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { 1149 /* Check for rfkill but allow the HCI setup stage to proceed
1150 * (which in itself doesn't cause any RF activity).
1151 */
1152 if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
1153 !test_bit(HCI_SETUP, &hdev->dev_flags)) {
1150 ret = -ERFKILL; 1154 ret = -ERFKILL;
1151 goto done; 1155 goto done;
1152 } 1156 }
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
1566 1570
1567 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); 1571 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
1568 1572
1569 if (!blocked) 1573 if (blocked) {
1570 return 0; 1574 set_bit(HCI_RFKILLED, &hdev->dev_flags);
1571 1575 if (!test_bit(HCI_SETUP, &hdev->dev_flags))
1572 hci_dev_do_close(hdev); 1576 hci_dev_do_close(hdev);
1577 } else {
1578 clear_bit(HCI_RFKILLED, &hdev->dev_flags);
1579 }
1573 1580
1574 return 0; 1581 return 0;
1575} 1582}
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
1591 return; 1598 return;
1592 } 1599 }
1593 1600
1594 if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) 1601 if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
1602 clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
1603 hci_dev_do_close(hdev);
1604 } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
1595 queue_delayed_work(hdev->req_workqueue, &hdev->power_off, 1605 queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
1596 HCI_AUTO_OFF_TIMEOUT); 1606 HCI_AUTO_OFF_TIMEOUT);
1607 }
1597 1608
1598 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) 1609 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
1599 mgmt_index_added(hdev); 1610 mgmt_index_added(hdev);
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
2209 } 2220 }
2210 } 2221 }
2211 2222
2223 if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
2224 set_bit(HCI_RFKILLED, &hdev->dev_flags);
2225
2212 set_bit(HCI_SETUP, &hdev->dev_flags); 2226 set_bit(HCI_SETUP, &hdev->dev_flags);
2213 2227
2214 if (hdev->dev_type != HCI_AMP) 2228 if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 94aab73f89d4..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3557 cp.handle = cpu_to_le16(conn->handle); 3557 cp.handle = cpu_to_le16(conn->handle);
3558 3558
3559 if (ltk->authenticated) 3559 if (ltk->authenticated)
3560 conn->sec_level = BT_SECURITY_HIGH; 3560 conn->pending_sec_level = BT_SECURITY_HIGH;
3561 else
3562 conn->pending_sec_level = BT_SECURITY_MEDIUM;
3563
3564 conn->enc_key_size = ltk->enc_size;
3561 3565
3562 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); 3566 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
3563 3567
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b3bb7bca8e60..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
3755 3755
3756 sk = chan->sk; 3756 sk = chan->sk;
3757 3757
3758 /* For certain devices (ex: HID mouse), support for authentication,
3759 * pairing and bonding is optional. For such devices, inorder to avoid
3760 * the ACL alive for too long after L2CAP disconnection, reset the ACL
3761 * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
3762 */
3763 conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
3764
3758 bacpy(&bt_sk(sk)->src, conn->src); 3765 bacpy(&bt_sk(sk)->src, conn->src);
3759 bacpy(&bt_sk(sk)->dst, conn->dst); 3766 bacpy(&bt_sk(sk)->dst, conn->dst);
3760 chan->psm = psm; 3767 chan->psm = psm;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6d126faf145f..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
569static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) 569static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
570{ 570{
571 struct rfcomm_dev *dev = dlc->owner; 571 struct rfcomm_dev *dev = dlc->owner;
572 struct tty_struct *tty;
573 if (!dev) 572 if (!dev)
574 return; 573 return;
575 574
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
581 DPM_ORDER_DEV_AFTER_PARENT); 580 DPM_ORDER_DEV_AFTER_PARENT);
582 581
583 wake_up_interruptible(&dev->port.open_wait); 582 wake_up_interruptible(&dev->port.open_wait);
584 } else if (dlc->state == BT_CLOSED) { 583 } else if (dlc->state == BT_CLOSED)
585 tty = tty_port_tty_get(&dev->port); 584 tty_port_tty_hangup(&dev->port, false);
586 if (!tty) {
587 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
588 /* Drop DLC lock here to avoid deadlock
589 * 1. rfcomm_dev_get will take rfcomm_dev_lock
590 * but in rfcomm_dev_add there's lock order:
591 * rfcomm_dev_lock -> dlc lock
592 * 2. tty_port_put will deadlock if it's
593 * the last reference
594 *
595 * FIXME: when we release the lock anything
596 * could happen to dev, even its destruction
597 */
598 rfcomm_dlc_unlock(dlc);
599 if (rfcomm_dev_get(dev->id) == NULL) {
600 rfcomm_dlc_lock(dlc);
601 return;
602 }
603
604 if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
605 &dev->flags))
606 tty_port_put(&dev->port);
607
608 tty_port_put(&dev->port);
609 rfcomm_dlc_lock(dlc);
610 }
611 } else {
612 tty_hangup(tty);
613 tty_kref_put(tty);
614 }
615 }
616} 585}
617 586
618static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) 587static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
5247 5247
5248/* Delayed registration/unregisteration */ 5248/* Delayed registration/unregisteration */
5249static LIST_HEAD(net_todo_list); 5249static LIST_HEAD(net_todo_list);
5250static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5250 5251
5251static void net_set_todo(struct net_device *dev) 5252static void net_set_todo(struct net_device *dev)
5252{ 5253{
5253 list_add_tail(&dev->todo_list, &net_todo_list); 5254 list_add_tail(&dev->todo_list, &net_todo_list);
5255 dev_net(dev)->dev_unreg_count++;
5254} 5256}
5255 5257
5256static void rollback_registered_many(struct list_head *head) 5258static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
5918 if (dev->destructor) 5920 if (dev->destructor)
5919 dev->destructor(dev); 5921 dev->destructor(dev);
5920 5922
5923 /* Report a network device has been unregistered */
5924 rtnl_lock();
5925 dev_net(dev)->dev_unreg_count--;
5926 __rtnl_unlock();
5927 wake_up(&netdev_unregistering_wq);
5928
5921 /* Free network device */ 5929 /* Free network device */
5922 kobject_put(&dev->dev.kobj); 5930 kobject_put(&dev->dev.kobj);
5923 } 5931 }
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
6603 rtnl_unlock(); 6611 rtnl_unlock();
6604} 6612}
6605 6613
6614static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6615{
6616 /* Return with the rtnl_lock held when there are no network
6617 * devices unregistering in any network namespace in net_list.
6618 */
6619 struct net *net;
6620 bool unregistering;
6621 DEFINE_WAIT(wait);
6622
6623 for (;;) {
6624 prepare_to_wait(&netdev_unregistering_wq, &wait,
6625 TASK_UNINTERRUPTIBLE);
6626 unregistering = false;
6627 rtnl_lock();
6628 list_for_each_entry(net, net_list, exit_list) {
6629 if (net->dev_unreg_count > 0) {
6630 unregistering = true;
6631 break;
6632 }
6633 }
6634 if (!unregistering)
6635 break;
6636 __rtnl_unlock();
6637 schedule();
6638 }
6639 finish_wait(&netdev_unregistering_wq, &wait);
6640}
6641
6606static void __net_exit default_device_exit_batch(struct list_head *net_list) 6642static void __net_exit default_device_exit_batch(struct list_head *net_list)
6607{ 6643{
6608 /* At exit all network devices most be removed from a network 6644 /* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
6614 struct net *net; 6650 struct net *net;
6615 LIST_HEAD(dev_kill_list); 6651 LIST_HEAD(dev_kill_list);
6616 6652
6617 rtnl_lock(); 6653 /* To prevent network device cleanup code from dereferencing
6654 * loopback devices or network devices that have been freed
6655 * wait here for all pending unregistrations to complete,
6656 * before unregistring the loopback device and allowing the
6657 * network namespace be freed.
6658 *
6659 * The netdev todo list containing all network devices
6660 * unregistrations that happen in default_device_exit_batch
6661 * will run in the rtnl_unlock() at the end of
6662 * default_device_exit_batch.
6663 */
6664 rtnl_lock_unregistering(net_list);
6618 list_for_each_entry(net, net_list, exit_list) { 6665 list_for_each_entry(net, net_list, exit_list) {
6619 for_each_netdev_reverse(net, dev) { 6666 for_each_netdev_reverse(net, dev) {
6620 if (dev->rtnl_link_ops) 6667 if (dev->rtnl_link_ops)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1929af87b260..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,8 +154,8 @@ ipv6:
154 if (poff >= 0) { 154 if (poff >= 0) {
155 __be32 *ports, _ports; 155 __be32 *ports, _ports;
156 156
157 nhoff += poff; 157 ports = skb_header_pointer(skb, nhoff + poff,
158 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); 158 sizeof(_ports), &_ports);
159 if (ports) 159 if (ports)
160 flow->ports = *ports; 160 flow->ports = *ports;
161 } 161 }
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..3f1ec1586ae1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
10 10
11#include <net/secure_seq.h> 11#include <net/secure_seq.h>
12 12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; 13#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
14 14
15void net_secret_init(void) 15static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
16
17static void net_secret_init(void)
16{ 18{
17 get_random_bytes(net_secret, sizeof(net_secret)); 19 u32 tmp;
20 int i;
21
22 if (likely(net_secret[0]))
23 return;
24
25 for (i = NET_SECRET_SIZE; i > 0;) {
26 do {
27 get_random_bytes(&tmp, sizeof(tmp));
28 } while (!tmp);
29 cmpxchg(&net_secret[--i], 0, tmp);
30 }
18} 31}
19 32
20#ifdef CONFIG_INET 33#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
42 u32 hash[MD5_DIGEST_WORDS]; 55 u32 hash[MD5_DIGEST_WORDS];
43 u32 i; 56 u32 i;
44 57
58 net_secret_init();
45 memcpy(hash, saddr, 16); 59 memcpy(hash, saddr, 16);
46 for (i = 0; i < 4; i++) 60 for (i = 0; i < 4; i++)
47 secret[i] = net_secret[i] + (__force u32)daddr[i]; 61 secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
63 u32 hash[MD5_DIGEST_WORDS]; 77 u32 hash[MD5_DIGEST_WORDS];
64 u32 i; 78 u32 i;
65 79
80 net_secret_init();
66 memcpy(hash, saddr, 16); 81 memcpy(hash, saddr, 16);
67 for (i = 0; i < 4; i++) 82 for (i = 0; i < 4; i++)
68 secret[i] = net_secret[i] + (__force u32) daddr[i]; 83 secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
82{ 97{
83 u32 hash[MD5_DIGEST_WORDS]; 98 u32 hash[MD5_DIGEST_WORDS];
84 99
100 net_secret_init();
85 hash[0] = (__force __u32) daddr; 101 hash[0] = (__force __u32) daddr;
86 hash[1] = net_secret[13]; 102 hash[1] = net_secret[13];
87 hash[2] = net_secret[14]; 103 hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
96{ 112{
97 __u32 hash[4]; 113 __u32 hash[4];
98 114
115 net_secret_init();
99 memcpy(hash, daddr, 16); 116 memcpy(hash, daddr, 16);
100 md5_transform(hash, net_secret); 117 md5_transform(hash, net_secret);
101 118
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
107{ 124{
108 u32 hash[MD5_DIGEST_WORDS]; 125 u32 hash[MD5_DIGEST_WORDS];
109 126
127 net_secret_init();
110 hash[0] = (__force u32)saddr; 128 hash[0] = (__force u32)saddr;
111 hash[1] = (__force u32)daddr; 129 hash[1] = (__force u32)daddr;
112 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 130 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
121{ 139{
122 u32 hash[MD5_DIGEST_WORDS]; 140 u32 hash[MD5_DIGEST_WORDS];
123 141
142 net_secret_init();
124 hash[0] = (__force u32)saddr; 143 hash[0] = (__force u32)saddr;
125 hash[1] = (__force u32)daddr; 144 hash[1] = (__force u32)daddr;
126 hash[2] = (__force u32)dport ^ net_secret[14]; 145 hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
140 u32 hash[MD5_DIGEST_WORDS]; 159 u32 hash[MD5_DIGEST_WORDS];
141 u64 seq; 160 u64 seq;
142 161
162 net_secret_init();
143 hash[0] = (__force u32)saddr; 163 hash[0] = (__force u32)saddr;
144 hash[1] = (__force u32)daddr; 164 hash[1] = (__force u32)daddr;
145 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 165 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
164 u64 seq; 184 u64 seq;
165 u32 i; 185 u32 i;
166 186
187 net_secret_init();
167 memcpy(hash, saddr, 16); 188 memcpy(hash, saddr, 16);
168 for (i = 0; i < 4; i++) 189 for (i = 0; i < 4; i++)
169 secret[i] = net_secret[i] + daddr[i]; 190 secret[i] = net_secret[i] + daddr[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b7b8fd..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
263 get_random_bytes(&rnd, sizeof(rnd)); 263 get_random_bytes(&rnd, sizeof(rnd));
264 } while (rnd == 0); 264 } while (rnd == 0);
265 265
266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { 266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); 267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
268 net_secret_init();
269 }
270} 268}
271EXPORT_SYMBOL(build_ehash_secret); 269EXPORT_SYMBOL(build_ehash_secret);
272 270
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index dace87f06e5f..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
736 736
737 in_dev->mr_gq_running = 0; 737 in_dev->mr_gq_running = 0;
738 igmpv3_send_report(in_dev, NULL); 738 igmpv3_send_report(in_dev, NULL);
739 __in_dev_put(in_dev); 739 in_dev_put(in_dev);
740} 740}
741 741
742static void igmp_ifc_timer_expire(unsigned long data) 742static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
749 igmp_ifc_start_timer(in_dev, 749 igmp_ifc_start_timer(in_dev,
750 unsolicited_report_interval(in_dev)); 750 unsolicited_report_interval(in_dev));
751 } 751 }
752 __in_dev_put(in_dev); 752 in_dev_put(in_dev);
753} 753}
754 754
755static void igmp_ifc_event(struct in_device *in_dev) 755static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ac9fabe0300f..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
623 tunnel->err_count = 0; 623 tunnel->err_count = 0;
624 } 624 }
625 625
626 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
626 ttl = tnl_params->ttl; 627 ttl = tnl_params->ttl;
627 if (ttl == 0) { 628 if (ttl == 0) {
628 if (skb->protocol == htons(ETH_P_IP)) 629 if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
641 642
642 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 643 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
643 + rt->dst.header_len; 644 + rt->dst.header_len;
644 if (max_headroom > dev->needed_headroom) { 645 if (max_headroom > dev->needed_headroom)
645 dev->needed_headroom = max_headroom; 646 dev->needed_headroom = max_headroom;
646 if (skb_cow_head(skb, dev->needed_headroom)) { 647
647 dev->stats.tx_dropped++; 648 if (skb_cow_head(skb, dev->needed_headroom)) {
648 dev_kfree_skb(skb); 649 dev->stats.tx_dropped++;
649 return; 650 dev_kfree_skb(skb);
650 } 651 return;
651 } 652 }
652 653
653 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 654 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
654 ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, 655 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
655 !net_eq(tunnel->net, dev_net(dev)));
656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
657 657
658 return; 658 return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
853 /* FB netdevice is special: we have one, and only one per netns. 853 /* FB netdevice is special: we have one, and only one per netns.
854 * Allowing to move it to another netns is clearly unsafe. 854 * Allowing to move it to another netns is clearly unsafe.
855 */ 855 */
856 if (!IS_ERR(itn->fb_tunnel_dev)) 856 if (!IS_ERR(itn->fb_tunnel_dev)) {
857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
858 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
859 }
858 rtnl_unlock(); 860 rtnl_unlock();
859 861
860 return PTR_RET(itn->fb_tunnel_dev); 862 return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
884 if (!net_eq(dev_net(t->dev), net)) 886 if (!net_eq(dev_net(t->dev), net))
885 unregister_netdevice_queue(t->dev, head); 887 unregister_netdevice_queue(t->dev, head);
886 } 888 }
887 if (itn->fb_tunnel_dev)
888 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
889} 889}
890 890
891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d6c856b17fd4..c31e3ad98ef2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
61 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
62 62
63 /* Push down and install the IP header. */ 63 /* Push down and install the IP header. */
64 __skb_push(skb, sizeof(struct iphdr)); 64 skb_push(skb, sizeof(struct iphdr));
65 skb_reset_network_header(skb); 65 skb_reset_network_header(skb);
66 66
67 iph = ip_hdr(skb); 67 iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 67e17dcda65e..b6346bf2fde3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
267 if (th == NULL) 267 if (th == NULL)
268 return NF_DROP; 268 return NF_DROP;
269 269
270 synproxy_parse_options(skb, par->thoff, th, &opts); 270 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
271 return NF_DROP;
271 272
272 if (th->syn && !(th->ack || th->fin || th->rst)) { 273 if (th->syn && !(th->ack || th->fin || th->rst)) {
273 /* Initial SYN from client */ 274 /* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
350 351
351 /* fall through */ 352 /* fall through */
352 case TCP_CONNTRACK_SYN_SENT: 353 case TCP_CONNTRACK_SYN_SENT:
353 synproxy_parse_options(skb, thoff, th, &opts); 354 if (!synproxy_parse_options(skb, thoff, th, &opts))
355 return NF_DROP;
354 356
355 if (!th->syn && th->ack && 357 if (!th->syn && th->ack &&
356 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 358 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
373 if (!th->syn || !th->ack) 375 if (!th->syn || !th->ack)
374 break; 376 break;
375 377
376 synproxy_parse_options(skb, thoff, th, &opts); 378 if (!synproxy_parse_options(skb, thoff, th, &opts))
379 return NF_DROP;
380
377 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) 381 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
378 synproxy->tsoff = opts.tsval - synproxy->its; 382 synproxy->tsoff = opts.tsval - synproxy->its;
379 383
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bfec521c717f..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
218 218
219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) 219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
220 ipv4_sk_update_pmtu(skb, sk, info); 220 ipv4_sk_update_pmtu(skb, sk, info);
221 else if (type == ICMP_REDIRECT) 221 else if (type == ICMP_REDIRECT) {
222 ipv4_sk_redirect(skb, sk); 222 ipv4_sk_redirect(skb, sk);
223 return;
224 }
223 225
224 /* Report error on raw socket, if: 226 /* Report error on raw socket, if:
225 1. User requested ip_recverr. 227 1. User requested ip_recverr.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c83cb8bf137..e6bb8256e59f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
895 895
896 skb_orphan(skb); 896 skb_orphan(skb);
897 skb->sk = sk; 897 skb->sk = sk;
898 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? 898 skb->destructor = tcp_wfree;
899 tcp_wfree : sock_wfree;
900 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 899 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
901 900
902 /* Build TCP header and checksum it. */ 901 /* Build TCP header and checksum it. */
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1840 while ((skb = tcp_send_head(sk))) { 1839 while ((skb = tcp_send_head(sk))) {
1841 unsigned int limit; 1840 unsigned int limit;
1842 1841
1843
1844 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1842 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1845 BUG_ON(!tso_segs); 1843 BUG_ON(!tso_segs);
1846 1844
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1869 break; 1867 break;
1870 } 1868 }
1871 1869
1872 /* TSQ : sk_wmem_alloc accounts skb truesize, 1870 /* TCP Small Queues :
1873 * including skb overhead. But thats OK. 1871 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
1872 * This allows for :
1873 * - better RTT estimation and ACK scheduling
1874 * - faster recovery
1875 * - high rates
1874 */ 1876 */
1875 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { 1877 limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
1878
1879 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1876 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1880 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1877 break; 1881 break;
1878 } 1882 }
1883
1879 limit = mss_now; 1884 limit = mss_now;
1880 if (tso_segs > 1 && !tcp_urg_mode(tp)) 1885 if (tso_segs > 1 && !tcp_urg_mode(tp))
1881 limit = tcp_mss_split_point(sk, skb, mss_now, 1886 limit = tcp_mss_split_point(sk, skb, mss_now,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95db57f..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
658 break; 658 break;
659 case ICMP_REDIRECT: 659 case ICMP_REDIRECT:
660 ipv4_sk_redirect(skb, sk); 660 ipv4_sk_redirect(skb, sk);
661 break; 661 goto out;
662 } 662 }
663 663
664 /* 664 /*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff12617f36..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1499 return false; 1499 return false;
1500} 1500}
1501 1501
1502/* Compares an address/prefix_len with addresses on device @dev.
1503 * If one is found it returns true.
1504 */
1505bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
1506 const unsigned int prefix_len, struct net_device *dev)
1507{
1508 struct inet6_dev *idev;
1509 struct inet6_ifaddr *ifa;
1510 bool ret = false;
1511
1512 rcu_read_lock();
1513 idev = __in6_dev_get(dev);
1514 if (idev) {
1515 read_lock_bh(&idev->lock);
1516 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1517 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
1518 if (ret)
1519 break;
1520 }
1521 read_unlock_bh(&idev->lock);
1522 }
1523 rcu_read_unlock();
1524
1525 return ret;
1526}
1527EXPORT_SYMBOL(ipv6_chk_custom_prefix);
1528
1502int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) 1529int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1503{ 1530{
1504 struct inet6_dev *idev; 1531 struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
2193 else 2220 else
2194 stored_lft = 0; 2221 stored_lft = 0;
2195 if (!update_lft && !create && stored_lft) { 2222 if (!update_lft && !create && stored_lft) {
2196 if (valid_lft > MIN_VALID_LIFETIME || 2223 const u32 minimum_lft = min(
2197 valid_lft > stored_lft) 2224 stored_lft, (u32)MIN_VALID_LIFETIME);
2198 update_lft = 1; 2225 valid_lft = max(valid_lft, minimum_lft);
2199 else if (stored_lft <= MIN_VALID_LIFETIME) { 2226
2200 /* valid_lft <= stored_lft is always true */ 2227 /* RFC4862 Section 5.5.3e:
2201 /* 2228 * "Note that the preferred lifetime of the
2202 * RFC 4862 Section 5.5.3e: 2229 * corresponding address is always reset to
2203 * "Note that the preferred lifetime of 2230 * the Preferred Lifetime in the received
2204 * the corresponding address is always 2231 * Prefix Information option, regardless of
2205 * reset to the Preferred Lifetime in 2232 * whether the valid lifetime is also reset or
2206 * the received Prefix Information 2233 * ignored."
2207 * option, regardless of whether the 2234 *
2208 * valid lifetime is also reset or 2235 * So we should always update prefered_lft here.
2209 * ignored." 2236 */
2210 * 2237 update_lft = 1;
2211 * So if the preferred lifetime in
2212 * this advertisement is different
2213 * than what we have stored, but the
2214 * valid lifetime is invalid, just
2215 * reset prefered_lft.
2216 *
2217 * We must set the valid lifetime
2218 * to the stored lifetime since we'll
2219 * be updating the timestamp below,
2220 * else we'll set it back to the
2221 * minimum.
2222 */
2223 if (prefered_lft != ifp->prefered_lft) {
2224 valid_lft = stored_lft;
2225 update_lft = 1;
2226 }
2227 } else {
2228 valid_lft = MIN_VALID_LIFETIME;
2229 if (valid_lft < prefered_lft)
2230 prefered_lft = valid_lft;
2231 update_lft = 1;
2232 }
2233 } 2238 }
2234 2239
2235 if (update_lft) { 2240 if (update_lft) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9feafb9..7bb5446b9d73 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
618 struct ip6_tnl *tunnel = netdev_priv(dev); 618 struct ip6_tnl *tunnel = netdev_priv(dev);
619 struct net_device *tdev; /* Device to other host */ 619 struct net_device *tdev; /* Device to other host */
620 struct ipv6hdr *ipv6h; /* Our new IP header */ 620 struct ipv6hdr *ipv6h; /* Our new IP header */
621 unsigned int max_headroom; /* The extra header space needed */ 621 unsigned int max_headroom = 0; /* The extra header space needed */
622 int gre_hlen; 622 int gre_hlen;
623 struct ipv6_tel_txoption opt; 623 struct ipv6_tel_txoption opt;
624 int mtu; 624 int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
693 693
694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); 694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
695 695
696 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; 696 max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
697 697
698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3a692d529163..a54c45ce4a48 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1015 * udp datagram 1015 * udp datagram
1016 */ 1016 */
1017 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1017 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1018 struct frag_hdr fhdr;
1019
1018 skb = sock_alloc_send_skb(sk, 1020 skb = sock_alloc_send_skb(sk,
1019 hh_len + fragheaderlen + transhdrlen + 20, 1021 hh_len + fragheaderlen + transhdrlen + 20,
1020 (flags & MSG_DONTWAIT), &err); 1022 (flags & MSG_DONTWAIT), &err);
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1036 skb->protocol = htons(ETH_P_IPV6); 1038 skb->protocol = htons(ETH_P_IPV6);
1037 skb->ip_summed = CHECKSUM_PARTIAL; 1039 skb->ip_summed = CHECKSUM_PARTIAL;
1038 skb->csum = 0; 1040 skb->csum = 0;
1039 }
1040
1041 err = skb_append_datato_frags(sk,skb, getfrag, from,
1042 (length - transhdrlen));
1043 if (!err) {
1044 struct frag_hdr fhdr;
1045 1041
1046 /* Specify the length of each IPv6 datagram fragment. 1042 /* Specify the length of each IPv6 datagram fragment.
1047 * It has to be a multiple of 8. 1043 * It has to be a multiple of 8.
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1052 ipv6_select_ident(&fhdr, rt); 1048 ipv6_select_ident(&fhdr, rt);
1053 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1049 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1054 __skb_queue_tail(&sk->sk_write_queue, skb); 1050 __skb_queue_tail(&sk->sk_write_queue, skb);
1055
1056 return 0;
1057 } 1051 }
1058 /* There is not enough support do UPD LSO,
1059 * so follow normal path
1060 */
1061 kfree_skb(skb);
1062 1052
1063 return err; 1053 return skb_append_datato_frags(sk, skb, getfrag, from,
1054 (length - transhdrlen));
1064} 1055}
1065 1056
1066static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1057static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1227 * --yoshfuji 1218 * --yoshfuji
1228 */ 1219 */
1229 1220
1230 cork->length += length; 1221 if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1231 if (length > mtu) { 1222 sk->sk_protocol == IPPROTO_RAW)) {
1232 int proto = sk->sk_protocol; 1223 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1233 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1224 return -EMSGSIZE;
1234 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1225 }
1235 return -EMSGSIZE;
1236 }
1237
1238 if (proto == IPPROTO_UDP &&
1239 (rt->dst.dev->features & NETIF_F_UFO)) {
1240 1226
1241 err = ip6_ufo_append_data(sk, getfrag, from, length, 1227 skb = skb_peek_tail(&sk->sk_write_queue);
1242 hh_len, fragheaderlen, 1228 cork->length += length;
1243 transhdrlen, mtu, flags, rt); 1229 if (((length > mtu) ||
1244 if (err) 1230 (skb && skb_is_gso(skb))) &&
1245 goto error; 1231 (sk->sk_protocol == IPPROTO_UDP) &&
1246 return 0; 1232 (rt->dst.dev->features & NETIF_F_UFO)) {
1247 } 1233 err = ip6_ufo_append_data(sk, getfrag, from, length,
1234 hh_len, fragheaderlen,
1235 transhdrlen, mtu, flags, rt);
1236 if (err)
1237 goto error;
1238 return 0;
1248 } 1239 }
1249 1240
1250 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1241 if (!skb)
1251 goto alloc_new_skb; 1242 goto alloc_new_skb;
1252 1243
1253 while (length > 0) { 1244 while (length > 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2d8f4829575b..a791552e0422 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1731 } 1731 }
1732 } 1732 }
1733 1733
1734 t = rtnl_dereference(ip6n->tnls_wc[0]);
1735 unregister_netdevice_queue(t->dev, &list);
1736 unregister_netdevice_many(&list); 1734 unregister_netdevice_many(&list);
1737} 1735}
1738 1736
@@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1752 if (!ip6n->fb_tnl_dev) 1750 if (!ip6n->fb_tnl_dev)
1753 goto err_alloc_dev; 1751 goto err_alloc_dev;
1754 dev_net_set(ip6n->fb_tnl_dev, net); 1752 dev_net_set(ip6n->fb_tnl_dev, net);
1753 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1755 /* FB netdevice is special: we have one, and only one per netns. 1754 /* FB netdevice is special: we have one, and only one per netns.
1756 * Allowing to move it to another netns is clearly unsafe. 1755 * Allowing to move it to another netns is clearly unsafe.
1757 */ 1756 */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 096cd67b737c..d18f9f903db6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
2034 if (idev->mc_dad_count) 2034 if (idev->mc_dad_count)
2035 mld_dad_start_timer(idev, idev->mc_maxdelay); 2035 mld_dad_start_timer(idev, idev->mc_maxdelay);
2036 } 2036 }
2037 __in6_dev_put(idev); 2037 in6_dev_put(idev);
2038} 2038}
2039 2039
2040static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, 2040static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
2379 2379
2380 idev->mc_gq_running = 0; 2380 idev->mc_gq_running = 0;
2381 mld_send_report(idev, NULL); 2381 mld_send_report(idev, NULL);
2382 __in6_dev_put(idev); 2382 in6_dev_put(idev);
2383} 2383}
2384 2384
2385static void mld_ifc_timer_expire(unsigned long data) 2385static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data)
2392 if (idev->mc_ifc_count) 2392 if (idev->mc_ifc_count)
2393 mld_ifc_start_timer(idev, idev->mc_maxdelay); 2393 mld_ifc_start_timer(idev, idev->mc_maxdelay);
2394 } 2394 }
2395 __in6_dev_put(idev); 2395 in6_dev_put(idev);
2396} 2396}
2397 2397
2398static void mld_ifc_event(struct inet6_dev *idev) 2398static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 19cfea8dbcaa..2748b042da72 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
282 if (th == NULL) 282 if (th == NULL)
283 return NF_DROP; 283 return NF_DROP;
284 284
285 synproxy_parse_options(skb, par->thoff, th, &opts); 285 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
286 return NF_DROP;
286 287
287 if (th->syn && !(th->ack || th->fin || th->rst)) { 288 if (th->syn && !(th->ack || th->fin || th->rst)) {
288 /* Initial SYN from client */ 289 /* Initial SYN from client */
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
372 373
373 /* fall through */ 374 /* fall through */
374 case TCP_CONNTRACK_SYN_SENT: 375 case TCP_CONNTRACK_SYN_SENT:
375 synproxy_parse_options(skb, thoff, th, &opts); 376 if (!synproxy_parse_options(skb, thoff, th, &opts))
377 return NF_DROP;
376 378
377 if (!th->syn && th->ack && 379 if (!th->syn && th->ack &&
378 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 380 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
395 if (!th->syn || !th->ack) 397 if (!th->syn || !th->ack)
396 break; 398 break;
397 399
398 synproxy_parse_options(skb, thoff, th, &opts); 400 if (!synproxy_parse_options(skb, thoff, th, &opts))
401 return NF_DROP;
402
399 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) 403 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
400 synproxy->tsoff = opts.tsval - synproxy->its; 404 synproxy->tsoff = opts.tsval - synproxy->its;
401 405
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 58916bbb1728..a4ed2416399e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
335 ip6_sk_update_pmtu(skb, sk, info); 335 ip6_sk_update_pmtu(skb, sk, info);
336 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 336 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
337 } 337 }
338 if (type == NDISC_REDIRECT) 338 if (type == NDISC_REDIRECT) {
339 ip6_sk_redirect(skb, sk); 339 ip6_sk_redirect(skb, sk);
340 return;
341 }
340 if (np->recverr) { 342 if (np->recverr) {
341 u8 *payload = skb->data; 343 u8 *payload = skb->data;
342 if (!inet->hdrincl) 344 if (!inet->hdrincl)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb96db34..19269453a8ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
566 return false; 566 return false;
567} 567}
568 568
569/* Checks if an address matches an address on the tunnel interface.
570 * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
571 * Long story:
572 * This function is called after we considered the packet as spoofed
573 * in is_spoofed_6rd.
574 * We may have a router that is doing NAT for proto 41 packets
575 * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
576 * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
577 * function will return true, dropping the packet.
578 * But, we can still check if is spoofed against the IP
579 * addresses associated with the interface.
580 */
581static bool only_dnatted(const struct ip_tunnel *tunnel,
582 const struct in6_addr *v6dst)
583{
584 int prefix_len;
585
586#ifdef CONFIG_IPV6_SIT_6RD
587 prefix_len = tunnel->ip6rd.prefixlen + 32
588 - tunnel->ip6rd.relay_prefixlen;
589#else
590 prefix_len = 48;
591#endif
592 return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
593}
594
595/* Returns true if a packet is spoofed */
596static bool packet_is_spoofed(struct sk_buff *skb,
597 const struct iphdr *iph,
598 struct ip_tunnel *tunnel)
599{
600 const struct ipv6hdr *ipv6h;
601
602 if (tunnel->dev->priv_flags & IFF_ISATAP) {
603 if (!isatap_chksrc(skb, iph, tunnel))
604 return true;
605
606 return false;
607 }
608
609 if (tunnel->dev->flags & IFF_POINTOPOINT)
610 return false;
611
612 ipv6h = ipv6_hdr(skb);
613
614 if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
615 net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
616 &iph->saddr, &ipv6h->saddr,
617 &iph->daddr, &ipv6h->daddr);
618 return true;
619 }
620
621 if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
622 return false;
623
624 if (only_dnatted(tunnel, &ipv6h->daddr))
625 return false;
626
627 net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
628 &iph->saddr, &ipv6h->saddr,
629 &iph->daddr, &ipv6h->daddr);
630 return true;
631}
632
569static int ipip6_rcv(struct sk_buff *skb) 633static int ipip6_rcv(struct sk_buff *skb)
570{ 634{
571 const struct iphdr *iph = ip_hdr(skb); 635 const struct iphdr *iph = ip_hdr(skb);
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb)
586 IPCB(skb)->flags = 0; 650 IPCB(skb)->flags = 0;
587 skb->protocol = htons(ETH_P_IPV6); 651 skb->protocol = htons(ETH_P_IPV6);
588 652
589 if (tunnel->dev->priv_flags & IFF_ISATAP) { 653 if (packet_is_spoofed(skb, iph, tunnel)) {
590 if (!isatap_chksrc(skb, iph, tunnel)) { 654 tunnel->dev->stats.rx_errors++;
591 tunnel->dev->stats.rx_errors++; 655 goto out;
592 goto out;
593 }
594 } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
595 if (is_spoofed_6rd(tunnel, iph->saddr,
596 &ipv6_hdr(skb)->saddr) ||
597 is_spoofed_6rd(tunnel, iph->daddr,
598 &ipv6_hdr(skb)->daddr)) {
599 tunnel->dev->stats.rx_errors++;
600 goto out;
601 }
602 } 656 }
603 657
604 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); 658 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
748 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 802 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
749 803
750 if (neigh == NULL) { 804 if (neigh == NULL) {
751 net_dbg_ratelimited("sit: nexthop == NULL\n"); 805 net_dbg_ratelimited("nexthop == NULL\n");
752 goto tx_error; 806 goto tx_error;
753 } 807 }
754 808
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
777 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 831 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
778 832
779 if (neigh == NULL) { 833 if (neigh == NULL) {
780 net_dbg_ratelimited("sit: nexthop == NULL\n"); 834 net_dbg_ratelimited("nexthop == NULL\n");
781 goto tx_error; 835 goto tx_error;
782 } 836 }
783 837
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
1612 goto err_alloc_dev; 1666 goto err_alloc_dev;
1613 } 1667 }
1614 dev_net_set(sitn->fb_tunnel_dev, net); 1668 dev_net_set(sitn->fb_tunnel_dev, net);
1669 sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1615 /* FB netdevice is special: we have one, and only one per netns. 1670 /* FB netdevice is special: we have one, and only one per netns.
1616 * Allowing to move it to another netns is clearly unsafe. 1671 * Allowing to move it to another netns is clearly unsafe.
1617 */ 1672 */
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
1646 1701
1647 rtnl_lock(); 1702 rtnl_lock();
1648 sit_destroy_tunnels(sitn, &list); 1703 sit_destroy_tunnels(sitn, &list);
1649 unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
1650 unregister_netdevice_many(&list); 1704 unregister_netdevice_many(&list);
1651 rtnl_unlock(); 1705 rtnl_unlock();
1652} 1706}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4058150262b..72b7eaaf3ca0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
525 525
526 if (type == ICMPV6_PKT_TOOBIG) 526 if (type == ICMPV6_PKT_TOOBIG)
527 ip6_sk_update_pmtu(skb, sk, info); 527 ip6_sk_update_pmtu(skb, sk, info);
528 if (type == NDISC_REDIRECT) 528 if (type == NDISC_REDIRECT) {
529 ip6_sk_redirect(skb, sk); 529 ip6_sk_redirect(skb, sk);
530 goto out;
531 }
530 532
531 np = inet6_sk(sk); 533 np = inet6_sk(sk);
532 534
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad8aeb1..355cc3b6fa4d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
154 } else { 154 } else {
155 lapb->n2count++; 155 lapb->n2count++;
156 lapb_requeue_frames(lapb); 156 lapb_requeue_frames(lapb);
157 lapb_kick(lapb);
157 } 158 }
158 break; 159 break;
159 160
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f69e83ff836..74fd00c27210 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
116 116
117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
118 struct ip_vs_cpu_stats *s; 118 struct ip_vs_cpu_stats *s;
119 struct ip_vs_service *svc;
119 120
120 s = this_cpu_ptr(dest->stats.cpustats); 121 s = this_cpu_ptr(dest->stats.cpustats);
121 s->ustats.inpkts++; 122 s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
123 s->ustats.inbytes += skb->len; 124 s->ustats.inbytes += skb->len;
124 u64_stats_update_end(&s->syncp); 125 u64_stats_update_end(&s->syncp);
125 126
126 s = this_cpu_ptr(dest->svc->stats.cpustats); 127 rcu_read_lock();
128 svc = rcu_dereference(dest->svc);
129 s = this_cpu_ptr(svc->stats.cpustats);
127 s->ustats.inpkts++; 130 s->ustats.inpkts++;
128 u64_stats_update_begin(&s->syncp); 131 u64_stats_update_begin(&s->syncp);
129 s->ustats.inbytes += skb->len; 132 s->ustats.inbytes += skb->len;
130 u64_stats_update_end(&s->syncp); 133 u64_stats_update_end(&s->syncp);
134 rcu_read_unlock();
131 135
132 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 136 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
133 s->ustats.inpkts++; 137 s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
146 150
147 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 151 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
148 struct ip_vs_cpu_stats *s; 152 struct ip_vs_cpu_stats *s;
153 struct ip_vs_service *svc;
149 154
150 s = this_cpu_ptr(dest->stats.cpustats); 155 s = this_cpu_ptr(dest->stats.cpustats);
151 s->ustats.outpkts++; 156 s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153 s->ustats.outbytes += skb->len; 158 s->ustats.outbytes += skb->len;
154 u64_stats_update_end(&s->syncp); 159 u64_stats_update_end(&s->syncp);
155 160
156 s = this_cpu_ptr(dest->svc->stats.cpustats); 161 rcu_read_lock();
162 svc = rcu_dereference(dest->svc);
163 s = this_cpu_ptr(svc->stats.cpustats);
157 s->ustats.outpkts++; 164 s->ustats.outpkts++;
158 u64_stats_update_begin(&s->syncp); 165 u64_stats_update_begin(&s->syncp);
159 s->ustats.outbytes += skb->len; 166 s->ustats.outbytes += skb->len;
160 u64_stats_update_end(&s->syncp); 167 u64_stats_update_end(&s->syncp);
168 rcu_read_unlock();
161 169
162 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 170 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
163 s->ustats.outpkts++; 171 s->ustats.outpkts++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c8148e487386..a3df9bddc4f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{ 461{
462 atomic_inc(&svc->refcnt); 462 atomic_inc(&svc->refcnt);
463 dest->svc = svc; 463 rcu_assign_pointer(dest->svc, svc);
464} 464}
465 465
466static void ip_vs_service_free(struct ip_vs_service *svc) 466static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
470 kfree(svc); 470 kfree(svc);
471} 471}
472 472
473static void 473static void ip_vs_service_rcu_free(struct rcu_head *head)
474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
475{ 474{
476 struct ip_vs_service *svc = dest->svc; 475 struct ip_vs_service *svc;
476
477 svc = container_of(head, struct ip_vs_service, rcu_head);
478 ip_vs_service_free(svc);
479}
477 480
478 dest->svc = NULL; 481static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
482{
479 if (atomic_dec_and_test(&svc->refcnt)) { 483 if (atomic_dec_and_test(&svc->refcnt)) {
480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 484 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
481 svc->fwmark, 485 svc->fwmark,
482 IP_VS_DBG_ADDR(svc->af, &svc->addr), 486 IP_VS_DBG_ADDR(svc->af, &svc->addr),
483 ntohs(svc->port)); 487 ntohs(svc->port));
484 ip_vs_service_free(svc); 488 if (do_delay)
489 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
490 else
491 ip_vs_service_free(svc);
485 } 492 }
486} 493}
487 494
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
667 IP_VS_DBG_ADDR(svc->af, &dest->addr), 674 IP_VS_DBG_ADDR(svc->af, &dest->addr),
668 ntohs(dest->port), 675 ntohs(dest->port),
669 atomic_read(&dest->refcnt)); 676 atomic_read(&dest->refcnt));
670 /* We can not reuse dest while in grace period
671 * because conns still can use dest->svc
672 */
673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
674 continue;
675 if (dest->af == svc->af && 677 if (dest->af == svc->af &&
676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 678 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
677 dest->port == dport && 679 dest->port == dport &&
@@ -697,8 +699,10 @@ out:
697 699
698static void ip_vs_dest_free(struct ip_vs_dest *dest) 700static void ip_vs_dest_free(struct ip_vs_dest *dest)
699{ 701{
702 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
703
700 __ip_vs_dst_cache_reset(dest); 704 __ip_vs_dst_cache_reset(dest);
701 __ip_vs_unbind_svc(dest); 705 __ip_vs_svc_put(svc, false);
702 free_percpu(dest->stats.cpustats); 706 free_percpu(dest->stats.cpustats);
703 kfree(dest); 707 kfree(dest);
704} 708}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add) 775 struct ip_vs_dest_user_kern *udest, int add)
772{ 776{
773 struct netns_ipvs *ipvs = net_ipvs(svc->net); 777 struct netns_ipvs *ipvs = net_ipvs(svc->net);
778 struct ip_vs_service *old_svc;
774 struct ip_vs_scheduler *sched; 779 struct ip_vs_scheduler *sched;
775 int conn_flags; 780 int conn_flags;
776 781
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
792 atomic_set(&dest->conn_flags, conn_flags); 797 atomic_set(&dest->conn_flags, conn_flags);
793 798
794 /* bind the service */ 799 /* bind the service */
795 if (!dest->svc) { 800 old_svc = rcu_dereference_protected(dest->svc, 1);
801 if (!old_svc) {
796 __ip_vs_bind_svc(dest, svc); 802 __ip_vs_bind_svc(dest, svc);
797 } else { 803 } else {
798 if (dest->svc != svc) { 804 if (old_svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats); 805 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc); 806 __ip_vs_bind_svc(dest, svc);
807 __ip_vs_svc_put(old_svc, true);
802 } 808 }
803 } 809 }
804 810
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
998 return 0; 1004 return 0;
999} 1005}
1000 1006
1001static void ip_vs_dest_wait_readers(struct rcu_head *head)
1002{
1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1004 rcu_head);
1005
1006 /* End of grace period after unlinking */
1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1008}
1009
1010
1011/* 1007/*
1012 * Delete a destination (must be already unlinked from the service) 1008 * Delete a destination (must be already unlinked from the service)
1013 */ 1009 */
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1023 */ 1019 */
1024 ip_vs_rs_unhash(dest); 1020 ip_vs_rs_unhash(dest);
1025 1021
1026 if (!cleanup) {
1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1029 }
1030
1031 spin_lock_bh(&ipvs->dest_trash_lock); 1022 spin_lock_bh(&ipvs->dest_trash_lock);
1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1023 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1024 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1034 atomic_read(&dest->refcnt)); 1025 atomic_read(&dest->refcnt));
1035 if (list_empty(&ipvs->dest_trash) && !cleanup) 1026 if (list_empty(&ipvs->dest_trash) && !cleanup)
1036 mod_timer(&ipvs->dest_trash_timer, 1027 mod_timer(&ipvs->dest_trash_timer,
1037 jiffies + IP_VS_DEST_TRASH_PERIOD); 1028 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1038 /* dest lives in trash without reference */ 1029 /* dest lives in trash without reference */
1039 list_add(&dest->t_list, &ipvs->dest_trash); 1030 list_add(&dest->t_list, &ipvs->dest_trash);
1031 dest->idle_start = 0;
1040 spin_unlock_bh(&ipvs->dest_trash_lock); 1032 spin_unlock_bh(&ipvs->dest_trash_lock);
1041 ip_vs_dest_put(dest); 1033 ip_vs_dest_put(dest);
1042} 1034}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
1108 struct net *net = (struct net *) data; 1100 struct net *net = (struct net *) data;
1109 struct netns_ipvs *ipvs = net_ipvs(net); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1110 struct ip_vs_dest *dest, *next; 1102 struct ip_vs_dest *dest, *next;
1103 unsigned long now = jiffies;
1111 1104
1112 spin_lock(&ipvs->dest_trash_lock); 1105 spin_lock(&ipvs->dest_trash_lock);
1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1106 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1114 /* Skip if dest is in grace period */
1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1116 continue;
1117 if (atomic_read(&dest->refcnt) > 0) 1107 if (atomic_read(&dest->refcnt) > 0)
1118 continue; 1108 continue;
1109 if (dest->idle_start) {
1110 if (time_before(now, dest->idle_start +
1111 IP_VS_DEST_TRASH_PERIOD))
1112 continue;
1113 } else {
1114 dest->idle_start = max(1UL, now);
1115 continue;
1116 }
1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1117 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1120 dest->vfwmark, 1118 dest->vfwmark,
1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), 1119 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1122 ntohs(dest->port)); 1120 ntohs(dest->port));
1123 list_del(&dest->t_list); 1121 list_del(&dest->t_list);
1124 ip_vs_dest_free(dest); 1122 ip_vs_dest_free(dest);
1125 } 1123 }
1126 if (!list_empty(&ipvs->dest_trash)) 1124 if (!list_empty(&ipvs->dest_trash))
1127 mod_timer(&ipvs->dest_trash_timer, 1125 mod_timer(&ipvs->dest_trash_timer,
1128 jiffies + IP_VS_DEST_TRASH_PERIOD); 1126 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1129 spin_unlock(&ipvs->dest_trash_lock); 1127 spin_unlock(&ipvs->dest_trash_lock);
1130} 1128}
1131 1129
@@ -1320,14 +1318,6 @@ out:
1320 return ret; 1318 return ret;
1321} 1319}
1322 1320
1323static void ip_vs_service_rcu_free(struct rcu_head *head)
1324{
1325 struct ip_vs_service *svc;
1326
1327 svc = container_of(head, struct ip_vs_service, rcu_head);
1328 ip_vs_service_free(svc);
1329}
1330
1331/* 1321/*
1332 * Delete a service from the service list 1322 * Delete a service from the service list
1333 * - The service must be unlinked, unlocked and not referenced! 1323 * - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1376 /* 1366 /*
1377 * Free the service if nobody refers to it 1367 * Free the service if nobody refers to it
1378 */ 1368 */
1379 if (atomic_dec_and_test(&svc->refcnt)) { 1369 __ip_vs_svc_put(svc, true);
1380 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
1381 svc->fwmark,
1382 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1383 ntohs(svc->port));
1384 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
1385 }
1386 1370
1387 /* decrease the module use count */ 1371 /* decrease the module use count */
1388 ip_vs_use_count_dec(); 1372 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0c73a5..1425e9a924c4 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats __percpu *stats) 59 struct ip_vs_cpu_stats __percpu *stats)
60{ 60{
61 int i; 61 int i;
62 bool add = false;
62 63
63 for_each_possible_cpu(i) { 64 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 65 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start; 66 unsigned int start;
66 __u64 inbytes, outbytes; 67 __u64 inbytes, outbytes;
67 if (i) { 68 if (add) {
68 sum->conns += s->ustats.conns; 69 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts; 70 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts; 71 sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
76 sum->inbytes += inbytes; 77 sum->inbytes += inbytes;
77 sum->outbytes += outbytes; 78 sum->outbytes += outbytes;
78 } else { 79 } else {
80 add = true;
79 sum->conns = s->ustats.conns; 81 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts; 82 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts; 83 sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1383b0eadc0e..eff13c94498e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
93 struct hlist_node list; 93 struct hlist_node list;
94 int af; /* address family */ 94 int af; /* address family */
95 union nf_inet_addr addr; /* destination IP address */ 95 union nf_inet_addr addr; /* destination IP address */
96 struct ip_vs_dest __rcu *dest; /* real server (cache) */ 96 struct ip_vs_dest *dest; /* real server (cache) */
97 unsigned long lastuse; /* last used time */ 97 unsigned long lastuse; /* last used time */
98 struct rcu_head rcu_head; 98 struct rcu_head rcu_head;
99}; 99};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
130}; 130};
131#endif 131#endif
132 132
133static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 133static void ip_vs_lblc_rcu_free(struct rcu_head *head)
134{ 134{
135 struct ip_vs_dest *dest; 135 struct ip_vs_lblc_entry *en = container_of(head,
136 struct ip_vs_lblc_entry,
137 rcu_head);
136 138
137 hlist_del_rcu(&en->list); 139 ip_vs_dest_put(en->dest);
138 /* 140 kfree(en);
139 * We don't kfree dest because it is referred either by its service
140 * or the trash dest list.
141 */
142 dest = rcu_dereference_protected(en->dest, 1);
143 ip_vs_dest_put(dest);
144 kfree_rcu(en, rcu_head);
145} 141}
146 142
143static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
144{
145 hlist_del_rcu(&en->list);
146 call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
147}
147 148
148/* 149/*
149 * Returns hash value for IPVS LBLC entry 150 * Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
203 struct ip_vs_lblc_entry *en; 204 struct ip_vs_lblc_entry *en;
204 205
205 en = ip_vs_lblc_get(dest->af, tbl, daddr); 206 en = ip_vs_lblc_get(dest->af, tbl, daddr);
206 if (!en) { 207 if (en) {
207 en = kmalloc(sizeof(*en), GFP_ATOMIC); 208 if (en->dest == dest)
208 if (!en) 209 return en;
209 return NULL; 210 ip_vs_lblc_del(en);
210 211 }
211 en->af = dest->af; 212 en = kmalloc(sizeof(*en), GFP_ATOMIC);
212 ip_vs_addr_copy(dest->af, &en->addr, daddr); 213 if (!en)
213 en->lastuse = jiffies; 214 return NULL;
214 215
215 ip_vs_dest_hold(dest); 216 en->af = dest->af;
216 RCU_INIT_POINTER(en->dest, dest); 217 ip_vs_addr_copy(dest->af, &en->addr, daddr);
218 en->lastuse = jiffies;
217 219
218 ip_vs_lblc_hash(tbl, en); 220 ip_vs_dest_hold(dest);
219 } else { 221 en->dest = dest;
220 struct ip_vs_dest *old_dest;
221 222
222 old_dest = rcu_dereference_protected(en->dest, 1); 223 ip_vs_lblc_hash(tbl, en);
223 if (old_dest != dest) {
224 ip_vs_dest_put(old_dest);
225 ip_vs_dest_hold(dest);
226 /* No ordering constraints for refcnt */
227 RCU_INIT_POINTER(en->dest, dest);
228 }
229 }
230 224
231 return en; 225 return en;
232} 226}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
246 tbl->dead = 1; 240 tbl->dead = 1;
247 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 241 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
248 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 242 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
249 ip_vs_lblc_free(en); 243 ip_vs_lblc_del(en);
250 atomic_dec(&tbl->entries); 244 atomic_dec(&tbl->entries);
251 } 245 }
252 } 246 }
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
281 sysctl_lblc_expiration(svc))) 275 sysctl_lblc_expiration(svc)))
282 continue; 276 continue;
283 277
284 ip_vs_lblc_free(en); 278 ip_vs_lblc_del(en);
285 atomic_dec(&tbl->entries); 279 atomic_dec(&tbl->entries);
286 } 280 }
287 spin_unlock(&svc->sched_lock); 281 spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
335 if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) 329 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
336 continue; 330 continue;
337 331
338 ip_vs_lblc_free(en); 332 ip_vs_lblc_del(en);
339 atomic_dec(&tbl->entries); 333 atomic_dec(&tbl->entries);
340 goal--; 334 goal--;
341 } 335 }
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
443 continue; 437 continue;
444 438
445 doh = ip_vs_dest_conn_overhead(dest); 439 doh = ip_vs_dest_conn_overhead(dest);
446 if (loh * atomic_read(&dest->weight) > 440 if ((__s64)loh * atomic_read(&dest->weight) >
447 doh * atomic_read(&least->weight)) { 441 (__s64)doh * atomic_read(&least->weight)) {
448 least = dest; 442 least = dest;
449 loh = doh; 443 loh = doh;
450 } 444 }
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
511 * free up entries from the trash at any time. 505 * free up entries from the trash at any time.
512 */ 506 */
513 507
514 dest = rcu_dereference(en->dest); 508 dest = en->dest;
515 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && 509 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
516 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) 510 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
517 goto out; 511 goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
631{ 625{
632 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 626 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
633 unregister_pernet_subsys(&ip_vs_lblc_ops); 627 unregister_pernet_subsys(&ip_vs_lblc_ops);
634 synchronize_rcu(); 628 rcu_barrier();
635} 629}
636 630
637 631
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 5199448697f6..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest __rcu *dest; /* destination server */ 92 struct ip_vs_dest *dest; /* destination server */
93 struct rcu_head rcu_head; 93 struct rcu_head rcu_head;
94}; 94};
95 95
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
107 107
108 if (check) { 108 if (check) {
109 list_for_each_entry(e, &set->list, list) { 109 list_for_each_entry(e, &set->list, list) {
110 struct ip_vs_dest *d; 110 if (e->dest == dest)
111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return; 111 return;
116 } 112 }
117 } 113 }
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
121 return; 117 return;
122 118
123 ip_vs_dest_hold(dest); 119 ip_vs_dest_hold(dest);
124 RCU_INIT_POINTER(e->dest, dest); 120 e->dest = dest;
125 121
126 list_add_rcu(&e->list, &set->list); 122 list_add_rcu(&e->list, &set->list);
127 atomic_inc(&set->size); 123 atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
129 set->lastmod = jiffies; 125 set->lastmod = jiffies;
130} 126}
131 127
128static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
129{
130 struct ip_vs_dest_set_elem *e;
131
132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
133 ip_vs_dest_put(e->dest);
134 kfree(e);
135}
136
132static void 137static void
133ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 138ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
134{ 139{
135 struct ip_vs_dest_set_elem *e; 140 struct ip_vs_dest_set_elem *e;
136 141
137 list_for_each_entry(e, &set->list, list) { 142 list_for_each_entry(e, &set->list, list) {
138 struct ip_vs_dest *d; 143 if (e->dest == dest) {
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
142 /* HIT */ 144 /* HIT */
143 atomic_dec(&set->size); 145 atomic_dec(&set->size);
144 set->lastmod = jiffies; 146 set->lastmod = jiffies;
145 ip_vs_dest_put(dest);
146 list_del_rcu(&e->list); 147 list_del_rcu(&e->list);
147 kfree_rcu(e, rcu_head); 148 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
148 break; 149 break;
149 } 150 }
150 } 151 }
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
155 struct ip_vs_dest_set_elem *e, *ep; 156 struct ip_vs_dest_set_elem *e, *ep;
156 157
157 list_for_each_entry_safe(e, ep, &set->list, list) { 158 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
161 /*
162 * We don't kfree dest because it is referred either
163 * by its service or by the trash dest list.
164 */
165 ip_vs_dest_put(d);
166 list_del_rcu(&e->list); 159 list_del_rcu(&e->list);
167 kfree_rcu(e, rcu_head); 160 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
168 } 161 }
169} 162}
170 163
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
175 struct ip_vs_dest *dest, *least; 168 struct ip_vs_dest *dest, *least;
176 int loh, doh; 169 int loh, doh;
177 170
178 if (set == NULL)
179 return NULL;
180
181 /* select the first destination server, whose weight > 0 */ 171 /* select the first destination server, whose weight > 0 */
182 list_for_each_entry_rcu(e, &set->list, list) { 172 list_for_each_entry_rcu(e, &set->list, list) {
183 least = rcu_dereference(e->dest); 173 least = e->dest;
184 if (least->flags & IP_VS_DEST_F_OVERLOAD) 174 if (least->flags & IP_VS_DEST_F_OVERLOAD)
185 continue; 175 continue;
186 176
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
195 /* find the destination with the weighted least load */ 185 /* find the destination with the weighted least load */
196 nextstage: 186 nextstage:
197 list_for_each_entry_continue_rcu(e, &set->list, list) { 187 list_for_each_entry_continue_rcu(e, &set->list, list) {
198 dest = rcu_dereference(e->dest); 188 dest = e->dest;
199 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 189 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
200 continue; 190 continue;
201 191
202 doh = ip_vs_dest_conn_overhead(dest); 192 doh = ip_vs_dest_conn_overhead(dest);
203 if ((loh * atomic_read(&dest->weight) > 193 if (((__s64)loh * atomic_read(&dest->weight) >
204 doh * atomic_read(&least->weight)) 194 (__s64)doh * atomic_read(&least->weight))
205 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 195 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
206 least = dest; 196 least = dest;
207 loh = doh; 197 loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
232 222
233 /* select the first destination server, whose weight > 0 */ 223 /* select the first destination server, whose weight > 0 */
234 list_for_each_entry(e, &set->list, list) { 224 list_for_each_entry(e, &set->list, list) {
235 most = rcu_dereference_protected(e->dest, 1); 225 most = e->dest;
236 if (atomic_read(&most->weight) > 0) { 226 if (atomic_read(&most->weight) > 0) {
237 moh = ip_vs_dest_conn_overhead(most); 227 moh = ip_vs_dest_conn_overhead(most);
238 goto nextstage; 228 goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
243 /* find the destination with the weighted most load */ 233 /* find the destination with the weighted most load */
244 nextstage: 234 nextstage:
245 list_for_each_entry_continue(e, &set->list, list) { 235 list_for_each_entry_continue(e, &set->list, list) {
246 dest = rcu_dereference_protected(e->dest, 1); 236 dest = e->dest;
247 doh = ip_vs_dest_conn_overhead(dest); 237 doh = ip_vs_dest_conn_overhead(dest);
248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 238 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
249 if ((moh * atomic_read(&dest->weight) < 239 if (((__s64)moh * atomic_read(&dest->weight) <
250 doh * atomic_read(&most->weight)) 240 (__s64)doh * atomic_read(&most->weight))
251 && (atomic_read(&dest->weight) > 0)) { 241 && (atomic_read(&dest->weight) > 0)) {
252 most = dest; 242 most = dest;
253 moh = doh; 243 moh = doh;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
611 continue; 601 continue;
612 602
613 doh = ip_vs_dest_conn_overhead(dest); 603 doh = ip_vs_dest_conn_overhead(dest);
614 if (loh * atomic_read(&dest->weight) > 604 if ((__s64)loh * atomic_read(&dest->weight) >
615 doh * atomic_read(&least->weight)) { 605 (__s64)doh * atomic_read(&least->weight)) {
616 least = dest; 606 least = dest;
617 loh = doh; 607 loh = doh;
618 } 608 }
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
819{ 809{
820 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
821 unregister_pernet_subsys(&ip_vs_lblcr_ops); 811 unregister_pernet_subsys(&ip_vs_lblcr_ops);
822 synchronize_rcu(); 812 rcu_barrier();
823} 813}
824 814
825 815
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index d8d9860934fe..961a6de9bb29 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
40#include <net/ip_vs.h> 40#include <net/ip_vs.h>
41 41
42 42
43static inline unsigned int 43static inline int
44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) 44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
45{ 45{
46 /* 46 /*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
59 struct ip_vs_iphdr *iph) 59 struct ip_vs_iphdr *iph)
60{ 60{
61 struct ip_vs_dest *dest, *least = NULL; 61 struct ip_vs_dest *dest, *least = NULL;
62 unsigned int loh = 0, doh; 62 int loh = 0, doh;
63 63
64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
65 65
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
92 } 92 }
93 93
94 if (!least || 94 if (!least ||
95 (loh * atomic_read(&dest->weight) > 95 ((__s64)loh * atomic_read(&dest->weight) >
96 doh * atomic_read(&least->weight))) { 96 (__s64)doh * atomic_read(&least->weight))) {
97 least = dest; 97 least = dest;
98 loh = doh; 98 loh = doh;
99 } 99 }
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a5284cc3d882..e446b9fa7424 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
44#include <net/ip_vs.h> 44#include <net/ip_vs.h>
45 45
46 46
47static inline unsigned int 47static inline int
48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) 48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
49{ 49{
50 /* 50 /*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
63 struct ip_vs_iphdr *iph) 63 struct ip_vs_iphdr *iph)
64{ 64{
65 struct ip_vs_dest *dest, *least; 65 struct ip_vs_dest *dest, *least;
66 unsigned int loh, doh; 66 int loh, doh;
67 67
68 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 68 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
69 69
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
99 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 99 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
100 continue; 100 continue;
101 doh = ip_vs_sed_dest_overhead(dest); 101 doh = ip_vs_sed_dest_overhead(dest);
102 if (loh * atomic_read(&dest->weight) > 102 if ((__s64)loh * atomic_read(&dest->weight) >
103 doh * atomic_read(&least->weight)) { 103 (__s64)doh * atomic_read(&least->weight)) {
104 least = dest; 104 least = dest;
105 loh = doh; 105 loh = doh;
106 } 106 }
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6dc1fa128840..b5b4650d50a9 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
35 struct ip_vs_iphdr *iph) 35 struct ip_vs_iphdr *iph)
36{ 36{
37 struct ip_vs_dest *dest, *least; 37 struct ip_vs_dest *dest, *least;
38 unsigned int loh, doh; 38 int loh, doh;
39 39
40 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); 40 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
41 41
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
71 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 71 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
72 continue; 72 continue;
73 doh = ip_vs_dest_conn_overhead(dest); 73 doh = ip_vs_dest_conn_overhead(dest);
74 if (loh * atomic_read(&dest->weight) > 74 if ((__s64)loh * atomic_read(&dest->weight) >
75 doh * atomic_read(&least->weight)) { 75 (__s64)doh * atomic_read(&least->weight)) {
76 least = dest; 76 least = dest;
77 loh = doh; 77 loh = doh;
78 } 78 }
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6fd967c6278c..cdf4567ba9b3 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
24int synproxy_net_id; 24int synproxy_net_id;
25EXPORT_SYMBOL_GPL(synproxy_net_id); 25EXPORT_SYMBOL_GPL(synproxy_net_id);
26 26
27void 27bool
28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, 28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
29 const struct tcphdr *th, struct synproxy_options *opts) 29 const struct tcphdr *th, struct synproxy_options *opts)
30{ 30{
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
32 u8 buf[40], *ptr; 32 u8 buf[40], *ptr;
33 33
34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); 34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
35 BUG_ON(ptr == NULL); 35 if (ptr == NULL)
36 return false;
36 37
37 opts->options = 0; 38 opts->options = 0;
38 while (length > 0) { 39 while (length > 0) {
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
41 42
42 switch (opcode) { 43 switch (opcode) {
43 case TCPOPT_EOL: 44 case TCPOPT_EOL:
44 return; 45 return true;
45 case TCPOPT_NOP: 46 case TCPOPT_NOP:
46 length--; 47 length--;
47 continue; 48 continue;
48 default: 49 default:
49 opsize = *ptr++; 50 opsize = *ptr++;
50 if (opsize < 2) 51 if (opsize < 2)
51 return; 52 return true;
52 if (opsize > length) 53 if (opsize > length)
53 return; 54 return true;
54 55
55 switch (opcode) { 56 switch (opcode) {
56 case TCPOPT_MSS: 57 case TCPOPT_MSS:
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
84 length -= opsize; 85 length -= opsize;
85 } 86 }
86 } 87 }
88 return true;
87} 89}
88EXPORT_SYMBOL_GPL(synproxy_parse_options); 90EXPORT_SYMBOL_GPL(synproxy_parse_options);
89 91
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 32ad015ee8ce..a2fef8b10b96 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
285 285
286 286
287/* remove one skb from head of flow queue */ 287/* remove one skb from head of flow queue */
288static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) 288static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
289{ 289{
290 struct sk_buff *skb = flow->head; 290 struct sk_buff *skb = flow->head;
291 291
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
293 flow->head = skb->next; 293 flow->head = skb->next;
294 skb->next = NULL; 294 skb->next = NULL;
295 flow->qlen--; 295 flow->qlen--;
296 sch->qstats.backlog -= qdisc_pkt_len(skb);
297 sch->q.qlen--;
296 } 298 }
297 return skb; 299 return skb;
298} 300}
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
418 struct fq_flow_head *head; 420 struct fq_flow_head *head;
419 struct sk_buff *skb; 421 struct sk_buff *skb;
420 struct fq_flow *f; 422 struct fq_flow *f;
423 u32 rate;
421 424
422 skb = fq_dequeue_head(&q->internal); 425 skb = fq_dequeue_head(sch, &q->internal);
423 if (skb) 426 if (skb)
424 goto out; 427 goto out;
425 fq_check_throttled(q, now); 428 fq_check_throttled(q, now);
@@ -449,7 +452,7 @@ begin:
449 goto begin; 452 goto begin;
450 } 453 }
451 454
452 skb = fq_dequeue_head(f); 455 skb = fq_dequeue_head(sch, f);
453 if (!skb) { 456 if (!skb) {
454 head->first = f->next; 457 head->first = f->next;
455 /* force a pass through old_flows to prevent starvation */ 458 /* force a pass through old_flows to prevent starvation */
@@ -466,43 +469,74 @@ begin:
466 f->time_next_packet = now; 469 f->time_next_packet = now;
467 f->credit -= qdisc_pkt_len(skb); 470 f->credit -= qdisc_pkt_len(skb);
468 471
469 if (f->credit <= 0 && 472 if (f->credit > 0 || !q->rate_enable)
470 q->rate_enable && 473 goto out;
471 skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
472 u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
473 474
474 rate = min(rate, q->flow_max_rate); 475 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
475 if (rate) { 476 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
476 u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
477
478 do_div(len, rate);
479 /* Since socket rate can change later,
480 * clamp the delay to 125 ms.
481 * TODO: maybe segment the too big skb, as in commit
482 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
483 */
484 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
485 len = 125 * NSEC_PER_MSEC;
486 q->stat_pkts_too_long++;
487 }
488 477
489 f->time_next_packet = now + len; 478 rate = min(rate, q->flow_max_rate);
479 } else {
480 rate = q->flow_max_rate;
481 if (rate == ~0U)
482 goto out;
483 }
484 if (rate) {
485 u32 plen = max(qdisc_pkt_len(skb), q->quantum);
486 u64 len = (u64)plen * NSEC_PER_SEC;
487
488 do_div(len, rate);
489 /* Since socket rate can change later,
490 * clamp the delay to 125 ms.
491 * TODO: maybe segment the too big skb, as in commit
492 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
493 */
494 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
495 len = 125 * NSEC_PER_MSEC;
496 q->stat_pkts_too_long++;
490 } 497 }
498
499 f->time_next_packet = now + len;
491 } 500 }
492out: 501out:
493 sch->qstats.backlog -= qdisc_pkt_len(skb);
494 qdisc_bstats_update(sch, skb); 502 qdisc_bstats_update(sch, skb);
495 sch->q.qlen--;
496 qdisc_unthrottled(sch); 503 qdisc_unthrottled(sch);
497 return skb; 504 return skb;
498} 505}
499 506
500static void fq_reset(struct Qdisc *sch) 507static void fq_reset(struct Qdisc *sch)
501{ 508{
509 struct fq_sched_data *q = qdisc_priv(sch);
510 struct rb_root *root;
502 struct sk_buff *skb; 511 struct sk_buff *skb;
512 struct rb_node *p;
513 struct fq_flow *f;
514 unsigned int idx;
503 515
504 while ((skb = fq_dequeue(sch)) != NULL) 516 while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
505 kfree_skb(skb); 517 kfree_skb(skb);
518
519 if (!q->fq_root)
520 return;
521
522 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
523 root = &q->fq_root[idx];
524 while ((p = rb_first(root)) != NULL) {
525 f = container_of(p, struct fq_flow, fq_node);
526 rb_erase(p, root);
527
528 while ((skb = fq_dequeue_head(sch, f)) != NULL)
529 kfree_skb(skb);
530
531 kmem_cache_free(fq_flow_cachep, f);
532 }
533 }
534 q->new_flows.first = NULL;
535 q->old_flows.first = NULL;
536 q->delayed = RB_ROOT;
537 q->flows = 0;
538 q->inactive_flows = 0;
539 q->throttled_flows = 0;
506} 540}
507 541
508static void fq_rehash(struct fq_sched_data *q, 542static void fq_rehash(struct fq_sched_data *q,
@@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
645 while (sch->q.qlen > sch->limit) { 679 while (sch->q.qlen > sch->limit) {
646 struct sk_buff *skb = fq_dequeue(sch); 680 struct sk_buff *skb = fq_dequeue(sch);
647 681
682 if (!skb)
683 break;
648 kfree_skb(skb); 684 kfree_skb(skb);
649 drop_count++; 685 drop_count++;
650 } 686 }
@@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
657static void fq_destroy(struct Qdisc *sch) 693static void fq_destroy(struct Qdisc *sch)
658{ 694{
659 struct fq_sched_data *q = qdisc_priv(sch); 695 struct fq_sched_data *q = qdisc_priv(sch);
660 struct rb_root *root;
661 struct rb_node *p;
662 unsigned int idx;
663 696
664 if (q->fq_root) { 697 fq_reset(sch);
665 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { 698 kfree(q->fq_root);
666 root = &q->fq_root[idx];
667 while ((p = rb_first(root)) != NULL) {
668 rb_erase(p, root);
669 kmem_cache_free(fq_flow_cachep,
670 container_of(p, struct fq_flow, fq_node));
671 }
672 }
673 kfree(q->fq_root);
674 }
675 qdisc_watchdog_cancel(&q->watchdog); 699 qdisc_watchdog_cancel(&q->watchdog);
676} 700}
677 701
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
47 47
48 /* Allow network administrator to have same access as root. */ 48 /* Allow network administrator to have same access as root. */
49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) || 49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
50 uid_eq(root_uid, current_uid())) { 50 uid_eq(root_uid, current_euid())) {
51 int mode = (table->mode >> 6) & 7; 51 int mode = (table->mode >> 6) & 7;
52 return (mode << 6) | (mode << 3) | mode; 52 return (mode << 6) | (mode << 3) | mode;
53 } 53 }
54 /* Allow netns root group to have the same access as the root group */ 54 /* Allow netns root group to have the same access as the root group */
55 if (gid_eq(root_gid, current_gid())) { 55 if (in_egroup_p(root_gid)) {
56 int mode = (table->mode >> 3) & 7; 56 int mode = (table->mode >> 3) & 7;
57 return (mode << 3) | mode; 57 return (mode << 3) | mode;
58 } 58 }