aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2013-10-25 05:43:47 -0400
committerTakashi Iwai <tiwai@suse.de>2013-10-25 05:43:47 -0400
commit6913a9dbf18f08e3577695032da15812bda92b66 (patch)
tree05ca8620b11f2898022a7fd8a00f1f8566161428 /net
parent7342017f4a0f129d277f78b8761f2732661ba30a (diff)
parent9645083ca5ef365b7b750cf219bb20b61bb925f8 (diff)
Merge tag 'asoc-v3.13' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-next
ASoC: Updates for v3.13 - Further work on the dmaengine helpers, including support for configuring the parameters for DMA by reading the capabilities of the DMA controller which removes some guesswork and magic numbers fromm drivers. - A refresh of the documentation. - Conversions of many drivers to direct regmap API usage in order to allow the ASoC level register I/O code to be removed, this will hopefully be completed by v3.14. - Support for using async register I/O in DAPM, reducing the time taken to implement power transitions on systems that support it.
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/l2cap_core.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c35
-rw-r--r--net/core/dev.c49
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/secure_seq.c27
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/igmp.c4
-rw-r--r--net/ipv4/ip_tunnel.c22
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c10
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c79
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_output.c53
-rw-r--r--net/ipv6/ip6_tunnel.c3
-rw-r--r--net/ipv6/mcast.c6
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c10
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/sit.c86
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c62
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c12
-rw-r--r--net/sched/sch_fq.c102
-rw-r--r--net/sysctl_net.c4
37 files changed, 516 insertions, 356 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
24static unsigned int mrp_join_time __read_mostly = 200; 24static unsigned int mrp_join_time __read_mostly = 200;
25module_param(mrp_join_time, uint, 0644); 25module_param(mrp_join_time, uint, 0644);
26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); 26MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
27
28static unsigned int mrp_periodic_time __read_mostly = 1000;
29module_param(mrp_periodic_time, uint, 0644);
30MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
31
27MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
28 33
29static const u8 34static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
595 mrp_join_timer_arm(app); 600 mrp_join_timer_arm(app);
596} 601}
597 602
603static void mrp_periodic_timer_arm(struct mrp_applicant *app)
604{
605 mod_timer(&app->periodic_timer,
606 jiffies + msecs_to_jiffies(mrp_periodic_time));
607}
608
609static void mrp_periodic_timer(unsigned long data)
610{
611 struct mrp_applicant *app = (struct mrp_applicant *)data;
612
613 spin_lock(&app->lock);
614 mrp_mad_event(app, MRP_EVENT_PERIODIC);
615 mrp_pdu_queue(app);
616 spin_unlock(&app->lock);
617
618 mrp_periodic_timer_arm(app);
619}
620
598static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) 621static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
599{ 622{
600 __be16 endmark; 623 __be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
845 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); 868 rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
846 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); 869 setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
847 mrp_join_timer_arm(app); 870 mrp_join_timer_arm(app);
871 setup_timer(&app->periodic_timer, mrp_periodic_timer,
872 (unsigned long)app);
873 mrp_periodic_timer_arm(app);
848 return 0; 874 return 0;
849 875
850err3: 876err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
870 * all pending messages before the applicant is gone. 896 * all pending messages before the applicant is gone.
871 */ 897 */
872 del_timer_sync(&app->join_timer); 898 del_timer_sync(&app->join_timer);
899 del_timer_sync(&app->periodic_timer);
873 900
874 spin_lock_bh(&app->lock); 901 spin_lock_bh(&app->lock);
875 mrp_mad_event(app, MRP_EVENT_TX); 902 mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 634debab4d54..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev)
1146 goto done; 1146 goto done;
1147 } 1147 }
1148 1148
1149 if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { 1149 /* Check for rfkill but allow the HCI setup stage to proceed
1150 * (which in itself doesn't cause any RF activity).
1151 */
1152 if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
1153 !test_bit(HCI_SETUP, &hdev->dev_flags)) {
1150 ret = -ERFKILL; 1154 ret = -ERFKILL;
1151 goto done; 1155 goto done;
1152 } 1156 }
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
1566 1570
1567 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); 1571 BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
1568 1572
1569 if (!blocked) 1573 if (blocked) {
1570 return 0; 1574 set_bit(HCI_RFKILLED, &hdev->dev_flags);
1571 1575 if (!test_bit(HCI_SETUP, &hdev->dev_flags))
1572 hci_dev_do_close(hdev); 1576 hci_dev_do_close(hdev);
1577 } else {
1578 clear_bit(HCI_RFKILLED, &hdev->dev_flags);
1579 }
1573 1580
1574 return 0; 1581 return 0;
1575} 1582}
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
1591 return; 1598 return;
1592 } 1599 }
1593 1600
1594 if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) 1601 if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
1602 clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
1603 hci_dev_do_close(hdev);
1604 } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
1595 queue_delayed_work(hdev->req_workqueue, &hdev->power_off, 1605 queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
1596 HCI_AUTO_OFF_TIMEOUT); 1606 HCI_AUTO_OFF_TIMEOUT);
1607 }
1597 1608
1598 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) 1609 if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
1599 mgmt_index_added(hdev); 1610 mgmt_index_added(hdev);
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
2209 } 2220 }
2210 } 2221 }
2211 2222
2223 if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
2224 set_bit(HCI_RFKILLED, &hdev->dev_flags);
2225
2212 set_bit(HCI_SETUP, &hdev->dev_flags); 2226 set_bit(HCI_SETUP, &hdev->dev_flags);
2213 2227
2214 if (hdev->dev_type != HCI_AMP) 2228 if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 94aab73f89d4..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3557 cp.handle = cpu_to_le16(conn->handle); 3557 cp.handle = cpu_to_le16(conn->handle);
3558 3558
3559 if (ltk->authenticated) 3559 if (ltk->authenticated)
3560 conn->sec_level = BT_SECURITY_HIGH; 3560 conn->pending_sec_level = BT_SECURITY_HIGH;
3561 else
3562 conn->pending_sec_level = BT_SECURITY_MEDIUM;
3563
3564 conn->enc_key_size = ltk->enc_size;
3561 3565
3562 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); 3566 hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
3563 3567
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b3bb7bca8e60..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
3755 3755
3756 sk = chan->sk; 3756 sk = chan->sk;
3757 3757
3758 /* For certain devices (ex: HID mouse), support for authentication,
3759 * pairing and bonding is optional. For such devices, inorder to avoid
3760 * the ACL alive for too long after L2CAP disconnection, reset the ACL
3761 * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
3762 */
3763 conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
3764
3758 bacpy(&bt_sk(sk)->src, conn->src); 3765 bacpy(&bt_sk(sk)->src, conn->src);
3759 bacpy(&bt_sk(sk)->dst, conn->dst); 3766 bacpy(&bt_sk(sk)->dst, conn->dst);
3760 chan->psm = psm; 3767 chan->psm = psm;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6d126faf145f..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb)
569static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) 569static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
570{ 570{
571 struct rfcomm_dev *dev = dlc->owner; 571 struct rfcomm_dev *dev = dlc->owner;
572 struct tty_struct *tty;
573 if (!dev) 572 if (!dev)
574 return; 573 return;
575 574
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
581 DPM_ORDER_DEV_AFTER_PARENT); 580 DPM_ORDER_DEV_AFTER_PARENT);
582 581
583 wake_up_interruptible(&dev->port.open_wait); 582 wake_up_interruptible(&dev->port.open_wait);
584 } else if (dlc->state == BT_CLOSED) { 583 } else if (dlc->state == BT_CLOSED)
585 tty = tty_port_tty_get(&dev->port); 584 tty_port_tty_hangup(&dev->port, false);
586 if (!tty) {
587 if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
588 /* Drop DLC lock here to avoid deadlock
589 * 1. rfcomm_dev_get will take rfcomm_dev_lock
590 * but in rfcomm_dev_add there's lock order:
591 * rfcomm_dev_lock -> dlc lock
592 * 2. tty_port_put will deadlock if it's
593 * the last reference
594 *
595 * FIXME: when we release the lock anything
596 * could happen to dev, even its destruction
597 */
598 rfcomm_dlc_unlock(dlc);
599 if (rfcomm_dev_get(dev->id) == NULL) {
600 rfcomm_dlc_lock(dlc);
601 return;
602 }
603
604 if (!test_and_set_bit(RFCOMM_TTY_RELEASED,
605 &dev->flags))
606 tty_port_put(&dev->port);
607
608 tty_port_put(&dev->port);
609 rfcomm_dlc_lock(dlc);
610 }
611 } else {
612 tty_hangup(tty);
613 tty_kref_put(tty);
614 }
615 }
616} 585}
617 586
618static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) 587static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
diff --git a/net/core/dev.c b/net/core/dev.c
index 5c713f2239cc..65f829cfd928 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net)
5247 5247
5248/* Delayed registration/unregisteration */ 5248/* Delayed registration/unregisteration */
5249static LIST_HEAD(net_todo_list); 5249static LIST_HEAD(net_todo_list);
5250static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5250 5251
5251static void net_set_todo(struct net_device *dev) 5252static void net_set_todo(struct net_device *dev)
5252{ 5253{
5253 list_add_tail(&dev->todo_list, &net_todo_list); 5254 list_add_tail(&dev->todo_list, &net_todo_list);
5255 dev_net(dev)->dev_unreg_count++;
5254} 5256}
5255 5257
5256static void rollback_registered_many(struct list_head *head) 5258static void rollback_registered_many(struct list_head *head)
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void)
5918 if (dev->destructor) 5920 if (dev->destructor)
5919 dev->destructor(dev); 5921 dev->destructor(dev);
5920 5922
5923 /* Report a network device has been unregistered */
5924 rtnl_lock();
5925 dev_net(dev)->dev_unreg_count--;
5926 __rtnl_unlock();
5927 wake_up(&netdev_unregistering_wq);
5928
5921 /* Free network device */ 5929 /* Free network device */
5922 kobject_put(&dev->dev.kobj); 5930 kobject_put(&dev->dev.kobj);
5923 } 5931 }
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net)
6603 rtnl_unlock(); 6611 rtnl_unlock();
6604} 6612}
6605 6613
6614static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6615{
6616 /* Return with the rtnl_lock held when there are no network
6617 * devices unregistering in any network namespace in net_list.
6618 */
6619 struct net *net;
6620 bool unregistering;
6621 DEFINE_WAIT(wait);
6622
6623 for (;;) {
6624 prepare_to_wait(&netdev_unregistering_wq, &wait,
6625 TASK_UNINTERRUPTIBLE);
6626 unregistering = false;
6627 rtnl_lock();
6628 list_for_each_entry(net, net_list, exit_list) {
6629 if (net->dev_unreg_count > 0) {
6630 unregistering = true;
6631 break;
6632 }
6633 }
6634 if (!unregistering)
6635 break;
6636 __rtnl_unlock();
6637 schedule();
6638 }
6639 finish_wait(&netdev_unregistering_wq, &wait);
6640}
6641
6606static void __net_exit default_device_exit_batch(struct list_head *net_list) 6642static void __net_exit default_device_exit_batch(struct list_head *net_list)
6607{ 6643{
6608 /* At exit all network devices most be removed from a network 6644 /* At exit all network devices most be removed from a network
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
6614 struct net *net; 6650 struct net *net;
6615 LIST_HEAD(dev_kill_list); 6651 LIST_HEAD(dev_kill_list);
6616 6652
6617 rtnl_lock(); 6653 /* To prevent network device cleanup code from dereferencing
6654 * loopback devices or network devices that have been freed
6655 * wait here for all pending unregistrations to complete,
6656 * before unregistring the loopback device and allowing the
6657 * network namespace be freed.
6658 *
6659 * The netdev todo list containing all network devices
6660 * unregistrations that happen in default_device_exit_batch
6661 * will run in the rtnl_unlock() at the end of
6662 * default_device_exit_batch.
6663 */
6664 rtnl_lock_unregistering(net_list);
6618 list_for_each_entry(net, net_list, exit_list) { 6665 list_for_each_entry(net, net_list, exit_list) {
6619 for_each_netdev_reverse(net, dev) { 6666 for_each_netdev_reverse(net, dev) {
6620 if (dev->rtnl_link_ops) 6667 if (dev->rtnl_link_ops)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1929af87b260..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -154,8 +154,8 @@ ipv6:
154 if (poff >= 0) { 154 if (poff >= 0) {
155 __be32 *ports, _ports; 155 __be32 *ports, _ports;
156 156
157 nhoff += poff; 157 ports = skb_header_pointer(skb, nhoff + poff,
158 ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); 158 sizeof(_ports), &_ports);
159 if (ports) 159 if (ports)
160 flow->ports = *ports; 160 flow->ports = *ports;
161 } 161 }
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..3f1ec1586ae1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,11 +10,24 @@
10 10
11#include <net/secure_seq.h> 11#include <net/secure_seq.h>
12 12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; 13#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
14 14
15void net_secret_init(void) 15static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
16
17static void net_secret_init(void)
16{ 18{
17 get_random_bytes(net_secret, sizeof(net_secret)); 19 u32 tmp;
20 int i;
21
22 if (likely(net_secret[0]))
23 return;
24
25 for (i = NET_SECRET_SIZE; i > 0;) {
26 do {
27 get_random_bytes(&tmp, sizeof(tmp));
28 } while (!tmp);
29 cmpxchg(&net_secret[--i], 0, tmp);
30 }
18} 31}
19 32
20#ifdef CONFIG_INET 33#ifdef CONFIG_INET
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
42 u32 hash[MD5_DIGEST_WORDS]; 55 u32 hash[MD5_DIGEST_WORDS];
43 u32 i; 56 u32 i;
44 57
58 net_secret_init();
45 memcpy(hash, saddr, 16); 59 memcpy(hash, saddr, 16);
46 for (i = 0; i < 4; i++) 60 for (i = 0; i < 4; i++)
47 secret[i] = net_secret[i] + (__force u32)daddr[i]; 61 secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
63 u32 hash[MD5_DIGEST_WORDS]; 77 u32 hash[MD5_DIGEST_WORDS];
64 u32 i; 78 u32 i;
65 79
80 net_secret_init();
66 memcpy(hash, saddr, 16); 81 memcpy(hash, saddr, 16);
67 for (i = 0; i < 4; i++) 82 for (i = 0; i < 4; i++)
68 secret[i] = net_secret[i] + (__force u32) daddr[i]; 83 secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr)
82{ 97{
83 u32 hash[MD5_DIGEST_WORDS]; 98 u32 hash[MD5_DIGEST_WORDS];
84 99
100 net_secret_init();
85 hash[0] = (__force __u32) daddr; 101 hash[0] = (__force __u32) daddr;
86 hash[1] = net_secret[13]; 102 hash[1] = net_secret[13];
87 hash[2] = net_secret[14]; 103 hash[2] = net_secret[14];
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
96{ 112{
97 __u32 hash[4]; 113 __u32 hash[4];
98 114
115 net_secret_init();
99 memcpy(hash, daddr, 16); 116 memcpy(hash, daddr, 16);
100 md5_transform(hash, net_secret); 117 md5_transform(hash, net_secret);
101 118
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
107{ 124{
108 u32 hash[MD5_DIGEST_WORDS]; 125 u32 hash[MD5_DIGEST_WORDS];
109 126
127 net_secret_init();
110 hash[0] = (__force u32)saddr; 128 hash[0] = (__force u32)saddr;
111 hash[1] = (__force u32)daddr; 129 hash[1] = (__force u32)daddr;
112 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 130 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
121{ 139{
122 u32 hash[MD5_DIGEST_WORDS]; 140 u32 hash[MD5_DIGEST_WORDS];
123 141
142 net_secret_init();
124 hash[0] = (__force u32)saddr; 143 hash[0] = (__force u32)saddr;
125 hash[1] = (__force u32)daddr; 144 hash[1] = (__force u32)daddr;
126 hash[2] = (__force u32)dport ^ net_secret[14]; 145 hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
140 u32 hash[MD5_DIGEST_WORDS]; 159 u32 hash[MD5_DIGEST_WORDS];
141 u64 seq; 160 u64 seq;
142 161
162 net_secret_init();
143 hash[0] = (__force u32)saddr; 163 hash[0] = (__force u32)saddr;
144 hash[1] = (__force u32)daddr; 164 hash[1] = (__force u32)daddr;
145 hash[2] = ((__force u16)sport << 16) + (__force u16)dport; 165 hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
164 u64 seq; 184 u64 seq;
165 u32 i; 185 u32 i;
166 186
187 net_secret_init();
167 memcpy(hash, saddr, 16); 188 memcpy(hash, saddr, 16);
168 for (i = 0; i < 4; i++) 189 for (i = 0; i < 4; i++)
169 secret[i] = net_secret[i] + daddr[i]; 190 secret[i] = net_secret[i] + daddr[i];
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 7a1874b7b8fd..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
263 get_random_bytes(&rnd, sizeof(rnd)); 263 get_random_bytes(&rnd, sizeof(rnd));
264 } while (rnd == 0); 264 } while (rnd == 0);
265 265
266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { 266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); 267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
268 net_secret_init();
269 }
270} 268}
271EXPORT_SYMBOL(build_ehash_secret); 269EXPORT_SYMBOL(build_ehash_secret);
272 270
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index dace87f06e5f..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
736 736
737 in_dev->mr_gq_running = 0; 737 in_dev->mr_gq_running = 0;
738 igmpv3_send_report(in_dev, NULL); 738 igmpv3_send_report(in_dev, NULL);
739 __in_dev_put(in_dev); 739 in_dev_put(in_dev);
740} 740}
741 741
742static void igmp_ifc_timer_expire(unsigned long data) 742static void igmp_ifc_timer_expire(unsigned long data)
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
749 igmp_ifc_start_timer(in_dev, 749 igmp_ifc_start_timer(in_dev,
750 unsolicited_report_interval(in_dev)); 750 unsolicited_report_interval(in_dev));
751 } 751 }
752 __in_dev_put(in_dev); 752 in_dev_put(in_dev);
753} 753}
754 754
755static void igmp_ifc_event(struct in_device *in_dev) 755static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ac9fabe0300f..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
623 tunnel->err_count = 0; 623 tunnel->err_count = 0;
624 } 624 }
625 625
626 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
626 ttl = tnl_params->ttl; 627 ttl = tnl_params->ttl;
627 if (ttl == 0) { 628 if (ttl == 0) {
628 if (skb->protocol == htons(ETH_P_IP)) 629 if (skb->protocol == htons(ETH_P_IP))
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
641 642
642 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) 643 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
643 + rt->dst.header_len; 644 + rt->dst.header_len;
644 if (max_headroom > dev->needed_headroom) { 645 if (max_headroom > dev->needed_headroom)
645 dev->needed_headroom = max_headroom; 646 dev->needed_headroom = max_headroom;
646 if (skb_cow_head(skb, dev->needed_headroom)) { 647
647 dev->stats.tx_dropped++; 648 if (skb_cow_head(skb, dev->needed_headroom)) {
648 dev_kfree_skb(skb); 649 dev->stats.tx_dropped++;
649 return; 650 dev_kfree_skb(skb);
650 } 651 return;
651 } 652 }
652 653
653 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, 654 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
654 ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, 655 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
655 !net_eq(tunnel->net, dev_net(dev)));
656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats); 656 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
657 657
658 return; 658 return;
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
853 /* FB netdevice is special: we have one, and only one per netns. 853 /* FB netdevice is special: we have one, and only one per netns.
854 * Allowing to move it to another netns is clearly unsafe. 854 * Allowing to move it to another netns is clearly unsafe.
855 */ 855 */
856 if (!IS_ERR(itn->fb_tunnel_dev)) 856 if (!IS_ERR(itn->fb_tunnel_dev)) {
857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; 857 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
858 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
859 }
858 rtnl_unlock(); 860 rtnl_unlock();
859 861
860 return PTR_RET(itn->fb_tunnel_dev); 862 return PTR_RET(itn->fb_tunnel_dev);
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
884 if (!net_eq(dev_net(t->dev), net)) 886 if (!net_eq(dev_net(t->dev), net))
885 unregister_netdevice_queue(t->dev, head); 887 unregister_netdevice_queue(t->dev, head);
886 } 888 }
887 if (itn->fb_tunnel_dev)
888 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
889} 889}
890 890
891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) 891void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index d6c856b17fd4..c31e3ad98ef2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
61 memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 61 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
62 62
63 /* Push down and install the IP header. */ 63 /* Push down and install the IP header. */
64 __skb_push(skb, sizeof(struct iphdr)); 64 skb_push(skb, sizeof(struct iphdr));
65 skb_reset_network_header(skb); 65 skb_reset_network_header(skb);
66 66
67 iph = ip_hdr(skb); 67 iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 67e17dcda65e..b6346bf2fde3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
267 if (th == NULL) 267 if (th == NULL)
268 return NF_DROP; 268 return NF_DROP;
269 269
270 synproxy_parse_options(skb, par->thoff, th, &opts); 270 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
271 return NF_DROP;
271 272
272 if (th->syn && !(th->ack || th->fin || th->rst)) { 273 if (th->syn && !(th->ack || th->fin || th->rst)) {
273 /* Initial SYN from client */ 274 /* Initial SYN from client */
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
350 351
351 /* fall through */ 352 /* fall through */
352 case TCP_CONNTRACK_SYN_SENT: 353 case TCP_CONNTRACK_SYN_SENT:
353 synproxy_parse_options(skb, thoff, th, &opts); 354 if (!synproxy_parse_options(skb, thoff, th, &opts))
355 return NF_DROP;
354 356
355 if (!th->syn && th->ack && 357 if (!th->syn && th->ack &&
356 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 358 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
373 if (!th->syn || !th->ack) 375 if (!th->syn || !th->ack)
374 break; 376 break;
375 377
376 synproxy_parse_options(skb, thoff, th, &opts); 378 if (!synproxy_parse_options(skb, thoff, th, &opts))
379 return NF_DROP;
380
377 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) 381 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
378 synproxy->tsoff = opts.tsval - synproxy->its; 382 synproxy->tsoff = opts.tsval - synproxy->its;
379 383
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bfec521c717f..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
218 218
219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) 219 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
220 ipv4_sk_update_pmtu(skb, sk, info); 220 ipv4_sk_update_pmtu(skb, sk, info);
221 else if (type == ICMP_REDIRECT) 221 else if (type == ICMP_REDIRECT) {
222 ipv4_sk_redirect(skb, sk); 222 ipv4_sk_redirect(skb, sk);
223 return;
224 }
223 225
224 /* Report error on raw socket, if: 226 /* Report error on raw socket, if:
225 1. User requested ip_recverr. 227 1. User requested ip_recverr.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7c83cb8bf137..e6bb8256e59f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
895 895
896 skb_orphan(skb); 896 skb_orphan(skb);
897 skb->sk = sk; 897 skb->sk = sk;
898 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? 898 skb->destructor = tcp_wfree;
899 tcp_wfree : sock_wfree;
900 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 899 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
901 900
902 /* Build TCP header and checksum it. */ 901 /* Build TCP header and checksum it. */
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1840 while ((skb = tcp_send_head(sk))) { 1839 while ((skb = tcp_send_head(sk))) {
1841 unsigned int limit; 1840 unsigned int limit;
1842 1841
1843
1844 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1842 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1845 BUG_ON(!tso_segs); 1843 BUG_ON(!tso_segs);
1846 1844
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1869 break; 1867 break;
1870 } 1868 }
1871 1869
1872 /* TSQ : sk_wmem_alloc accounts skb truesize, 1870 /* TCP Small Queues :
1873 * including skb overhead. But thats OK. 1871 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
1872 * This allows for :
1873 * - better RTT estimation and ACK scheduling
1874 * - faster recovery
1875 * - high rates
1874 */ 1876 */
1875 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { 1877 limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
1878
1879 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1876 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1880 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1877 break; 1881 break;
1878 } 1882 }
1883
1879 limit = mss_now; 1884 limit = mss_now;
1880 if (tso_segs > 1 && !tcp_urg_mode(tp)) 1885 if (tso_segs > 1 && !tcp_urg_mode(tp))
1881 limit = tcp_mss_split_point(sk, skb, mss_now, 1886 limit = tcp_mss_split_point(sk, skb, mss_now,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 74d2c95db57f..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
658 break; 658 break;
659 case ICMP_REDIRECT: 659 case ICMP_REDIRECT:
660 ipv4_sk_redirect(skb, sk); 660 ipv4_sk_redirect(skb, sk);
661 break; 661 goto out;
662 } 662 }
663 663
664 /* 664 /*
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6ff12617f36..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1499 return false; 1499 return false;
1500} 1500}
1501 1501
1502/* Compares an address/prefix_len with addresses on device @dev.
1503 * If one is found it returns true.
1504 */
1505bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
1506 const unsigned int prefix_len, struct net_device *dev)
1507{
1508 struct inet6_dev *idev;
1509 struct inet6_ifaddr *ifa;
1510 bool ret = false;
1511
1512 rcu_read_lock();
1513 idev = __in6_dev_get(dev);
1514 if (idev) {
1515 read_lock_bh(&idev->lock);
1516 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1517 ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
1518 if (ret)
1519 break;
1520 }
1521 read_unlock_bh(&idev->lock);
1522 }
1523 rcu_read_unlock();
1524
1525 return ret;
1526}
1527EXPORT_SYMBOL(ipv6_chk_custom_prefix);
1528
1502int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) 1529int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
1503{ 1530{
1504 struct inet6_dev *idev; 1531 struct inet6_dev *idev;
@@ -2193,43 +2220,21 @@ ok:
2193 else 2220 else
2194 stored_lft = 0; 2221 stored_lft = 0;
2195 if (!update_lft && !create && stored_lft) { 2222 if (!update_lft && !create && stored_lft) {
2196 if (valid_lft > MIN_VALID_LIFETIME || 2223 const u32 minimum_lft = min(
2197 valid_lft > stored_lft) 2224 stored_lft, (u32)MIN_VALID_LIFETIME);
2198 update_lft = 1; 2225 valid_lft = max(valid_lft, minimum_lft);
2199 else if (stored_lft <= MIN_VALID_LIFETIME) { 2226
2200 /* valid_lft <= stored_lft is always true */ 2227 /* RFC4862 Section 5.5.3e:
2201 /* 2228 * "Note that the preferred lifetime of the
2202 * RFC 4862 Section 5.5.3e: 2229 * corresponding address is always reset to
2203 * "Note that the preferred lifetime of 2230 * the Preferred Lifetime in the received
2204 * the corresponding address is always 2231 * Prefix Information option, regardless of
2205 * reset to the Preferred Lifetime in 2232 * whether the valid lifetime is also reset or
2206 * the received Prefix Information 2233 * ignored."
2207 * option, regardless of whether the 2234 *
2208 * valid lifetime is also reset or 2235 * So we should always update prefered_lft here.
2209 * ignored." 2236 */
2210 * 2237 update_lft = 1;
2211 * So if the preferred lifetime in
2212 * this advertisement is different
2213 * than what we have stored, but the
2214 * valid lifetime is invalid, just
2215 * reset prefered_lft.
2216 *
2217 * We must set the valid lifetime
2218 * to the stored lifetime since we'll
2219 * be updating the timestamp below,
2220 * else we'll set it back to the
2221 * minimum.
2222 */
2223 if (prefered_lft != ifp->prefered_lft) {
2224 valid_lft = stored_lft;
2225 update_lft = 1;
2226 }
2227 } else {
2228 valid_lft = MIN_VALID_LIFETIME;
2229 if (valid_lft < prefered_lft)
2230 prefered_lft = valid_lft;
2231 update_lft = 1;
2232 }
2233 } 2238 }
2234 2239
2235 if (update_lft) { 2240 if (update_lft) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9feafb9..7bb5446b9d73 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
618 struct ip6_tnl *tunnel = netdev_priv(dev); 618 struct ip6_tnl *tunnel = netdev_priv(dev);
619 struct net_device *tdev; /* Device to other host */ 619 struct net_device *tdev; /* Device to other host */
620 struct ipv6hdr *ipv6h; /* Our new IP header */ 620 struct ipv6hdr *ipv6h; /* Our new IP header */
621 unsigned int max_headroom; /* The extra header space needed */ 621 unsigned int max_headroom = 0; /* The extra header space needed */
622 int gre_hlen; 622 int gre_hlen;
623 struct ipv6_tel_txoption opt; 623 struct ipv6_tel_txoption opt;
624 int mtu; 624 int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
693 693
694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); 694 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
695 695
696 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; 696 max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
697 697
698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) || 698 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 699 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3a692d529163..a54c45ce4a48 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1015 * udp datagram 1015 * udp datagram
1016 */ 1016 */
1017 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { 1017 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1018 struct frag_hdr fhdr;
1019
1018 skb = sock_alloc_send_skb(sk, 1020 skb = sock_alloc_send_skb(sk,
1019 hh_len + fragheaderlen + transhdrlen + 20, 1021 hh_len + fragheaderlen + transhdrlen + 20,
1020 (flags & MSG_DONTWAIT), &err); 1022 (flags & MSG_DONTWAIT), &err);
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1036 skb->protocol = htons(ETH_P_IPV6); 1038 skb->protocol = htons(ETH_P_IPV6);
1037 skb->ip_summed = CHECKSUM_PARTIAL; 1039 skb->ip_summed = CHECKSUM_PARTIAL;
1038 skb->csum = 0; 1040 skb->csum = 0;
1039 }
1040
1041 err = skb_append_datato_frags(sk,skb, getfrag, from,
1042 (length - transhdrlen));
1043 if (!err) {
1044 struct frag_hdr fhdr;
1045 1041
1046 /* Specify the length of each IPv6 datagram fragment. 1042 /* Specify the length of each IPv6 datagram fragment.
1047 * It has to be a multiple of 8. 1043 * It has to be a multiple of 8.
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
1052 ipv6_select_ident(&fhdr, rt); 1048 ipv6_select_ident(&fhdr, rt);
1053 skb_shinfo(skb)->ip6_frag_id = fhdr.identification; 1049 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1054 __skb_queue_tail(&sk->sk_write_queue, skb); 1050 __skb_queue_tail(&sk->sk_write_queue, skb);
1055
1056 return 0;
1057 } 1051 }
1058 /* There is not enough support do UPD LSO,
1059 * so follow normal path
1060 */
1061 kfree_skb(skb);
1062 1052
1063 return err; 1053 return skb_append_datato_frags(sk, skb, getfrag, from,
1054 (length - transhdrlen));
1064} 1055}
1065 1056
1066static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1057static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1227 * --yoshfuji 1218 * --yoshfuji
1228 */ 1219 */
1229 1220
1230 cork->length += length; 1221 if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1231 if (length > mtu) { 1222 sk->sk_protocol == IPPROTO_RAW)) {
1232 int proto = sk->sk_protocol; 1223 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1233 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ 1224 return -EMSGSIZE;
1234 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); 1225 }
1235 return -EMSGSIZE;
1236 }
1237
1238 if (proto == IPPROTO_UDP &&
1239 (rt->dst.dev->features & NETIF_F_UFO)) {
1240 1226
1241 err = ip6_ufo_append_data(sk, getfrag, from, length, 1227 skb = skb_peek_tail(&sk->sk_write_queue);
1242 hh_len, fragheaderlen, 1228 cork->length += length;
1243 transhdrlen, mtu, flags, rt); 1229 if (((length > mtu) ||
1244 if (err) 1230 (skb && skb_is_gso(skb))) &&
1245 goto error; 1231 (sk->sk_protocol == IPPROTO_UDP) &&
1246 return 0; 1232 (rt->dst.dev->features & NETIF_F_UFO)) {
1247 } 1233 err = ip6_ufo_append_data(sk, getfrag, from, length,
1234 hh_len, fragheaderlen,
1235 transhdrlen, mtu, flags, rt);
1236 if (err)
1237 goto error;
1238 return 0;
1248 } 1239 }
1249 1240
1250 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1241 if (!skb)
1251 goto alloc_new_skb; 1242 goto alloc_new_skb;
1252 1243
1253 while (length > 0) { 1244 while (length > 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2d8f4829575b..a791552e0422 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1731 } 1731 }
1732 } 1732 }
1733 1733
1734 t = rtnl_dereference(ip6n->tnls_wc[0]);
1735 unregister_netdevice_queue(t->dev, &list);
1736 unregister_netdevice_many(&list); 1734 unregister_netdevice_many(&list);
1737} 1735}
1738 1736
@@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1752 if (!ip6n->fb_tnl_dev) 1750 if (!ip6n->fb_tnl_dev)
1753 goto err_alloc_dev; 1751 goto err_alloc_dev;
1754 dev_net_set(ip6n->fb_tnl_dev, net); 1752 dev_net_set(ip6n->fb_tnl_dev, net);
1753 ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
1755 /* FB netdevice is special: we have one, and only one per netns. 1754 /* FB netdevice is special: we have one, and only one per netns.
1756 * Allowing to move it to another netns is clearly unsafe. 1755 * Allowing to move it to another netns is clearly unsafe.
1757 */ 1756 */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 096cd67b737c..d18f9f903db6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
2034 if (idev->mc_dad_count) 2034 if (idev->mc_dad_count)
2035 mld_dad_start_timer(idev, idev->mc_maxdelay); 2035 mld_dad_start_timer(idev, idev->mc_maxdelay);
2036 } 2036 }
2037 __in6_dev_put(idev); 2037 in6_dev_put(idev);
2038} 2038}
2039 2039
2040static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, 2040static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
2379 2379
2380 idev->mc_gq_running = 0; 2380 idev->mc_gq_running = 0;
2381 mld_send_report(idev, NULL); 2381 mld_send_report(idev, NULL);
2382 __in6_dev_put(idev); 2382 in6_dev_put(idev);
2383} 2383}
2384 2384
2385static void mld_ifc_timer_expire(unsigned long data) 2385static void mld_ifc_timer_expire(unsigned long data)
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data)
2392 if (idev->mc_ifc_count) 2392 if (idev->mc_ifc_count)
2393 mld_ifc_start_timer(idev, idev->mc_maxdelay); 2393 mld_ifc_start_timer(idev, idev->mc_maxdelay);
2394 } 2394 }
2395 __in6_dev_put(idev); 2395 in6_dev_put(idev);
2396} 2396}
2397 2397
2398static void mld_ifc_event(struct inet6_dev *idev) 2398static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 19cfea8dbcaa..2748b042da72 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
282 if (th == NULL) 282 if (th == NULL)
283 return NF_DROP; 283 return NF_DROP;
284 284
285 synproxy_parse_options(skb, par->thoff, th, &opts); 285 if (!synproxy_parse_options(skb, par->thoff, th, &opts))
286 return NF_DROP;
286 287
287 if (th->syn && !(th->ack || th->fin || th->rst)) { 288 if (th->syn && !(th->ack || th->fin || th->rst)) {
288 /* Initial SYN from client */ 289 /* Initial SYN from client */
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
372 373
373 /* fall through */ 374 /* fall through */
374 case TCP_CONNTRACK_SYN_SENT: 375 case TCP_CONNTRACK_SYN_SENT:
375 synproxy_parse_options(skb, thoff, th, &opts); 376 if (!synproxy_parse_options(skb, thoff, th, &opts))
377 return NF_DROP;
376 378
377 if (!th->syn && th->ack && 379 if (!th->syn && th->ack &&
378 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 380 CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
395 if (!th->syn || !th->ack) 397 if (!th->syn || !th->ack)
396 break; 398 break;
397 399
398 synproxy_parse_options(skb, thoff, th, &opts); 400 if (!synproxy_parse_options(skb, thoff, th, &opts))
401 return NF_DROP;
402
399 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) 403 if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
400 synproxy->tsoff = opts.tsval - synproxy->its; 404 synproxy->tsoff = opts.tsval - synproxy->its;
401 405
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 58916bbb1728..a4ed2416399e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
335 ip6_sk_update_pmtu(skb, sk, info); 335 ip6_sk_update_pmtu(skb, sk, info);
336 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); 336 harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
337 } 337 }
338 if (type == NDISC_REDIRECT) 338 if (type == NDISC_REDIRECT) {
339 ip6_sk_redirect(skb, sk); 339 ip6_sk_redirect(skb, sk);
340 return;
341 }
340 if (np->recverr) { 342 if (np->recverr) {
341 u8 *payload = skb->data; 343 u8 *payload = skb->data;
342 if (!inet->hdrincl) 344 if (!inet->hdrincl)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb96db34..19269453a8ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
566 return false; 566 return false;
567} 567}
568 568
569/* Checks if an address matches an address on the tunnel interface.
570 * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
571 * Long story:
572 * This function is called after we considered the packet as spoofed
573 * in is_spoofed_6rd.
574 * We may have a router that is doing NAT for proto 41 packets
575 * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
576 * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
577 * function will return true, dropping the packet.
578 * But, we can still check if is spoofed against the IP
579 * addresses associated with the interface.
580 */
581static bool only_dnatted(const struct ip_tunnel *tunnel,
582 const struct in6_addr *v6dst)
583{
584 int prefix_len;
585
586#ifdef CONFIG_IPV6_SIT_6RD
587 prefix_len = tunnel->ip6rd.prefixlen + 32
588 - tunnel->ip6rd.relay_prefixlen;
589#else
590 prefix_len = 48;
591#endif
592 return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
593}
594
595/* Returns true if a packet is spoofed */
596static bool packet_is_spoofed(struct sk_buff *skb,
597 const struct iphdr *iph,
598 struct ip_tunnel *tunnel)
599{
600 const struct ipv6hdr *ipv6h;
601
602 if (tunnel->dev->priv_flags & IFF_ISATAP) {
603 if (!isatap_chksrc(skb, iph, tunnel))
604 return true;
605
606 return false;
607 }
608
609 if (tunnel->dev->flags & IFF_POINTOPOINT)
610 return false;
611
612 ipv6h = ipv6_hdr(skb);
613
614 if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
615 net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
616 &iph->saddr, &ipv6h->saddr,
617 &iph->daddr, &ipv6h->daddr);
618 return true;
619 }
620
621 if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
622 return false;
623
624 if (only_dnatted(tunnel, &ipv6h->daddr))
625 return false;
626
627 net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
628 &iph->saddr, &ipv6h->saddr,
629 &iph->daddr, &ipv6h->daddr);
630 return true;
631}
632
569static int ipip6_rcv(struct sk_buff *skb) 633static int ipip6_rcv(struct sk_buff *skb)
570{ 634{
571 const struct iphdr *iph = ip_hdr(skb); 635 const struct iphdr *iph = ip_hdr(skb);
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb)
586 IPCB(skb)->flags = 0; 650 IPCB(skb)->flags = 0;
587 skb->protocol = htons(ETH_P_IPV6); 651 skb->protocol = htons(ETH_P_IPV6);
588 652
589 if (tunnel->dev->priv_flags & IFF_ISATAP) { 653 if (packet_is_spoofed(skb, iph, tunnel)) {
590 if (!isatap_chksrc(skb, iph, tunnel)) { 654 tunnel->dev->stats.rx_errors++;
591 tunnel->dev->stats.rx_errors++; 655 goto out;
592 goto out;
593 }
594 } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
595 if (is_spoofed_6rd(tunnel, iph->saddr,
596 &ipv6_hdr(skb)->saddr) ||
597 is_spoofed_6rd(tunnel, iph->daddr,
598 &ipv6_hdr(skb)->daddr)) {
599 tunnel->dev->stats.rx_errors++;
600 goto out;
601 }
602 } 656 }
603 657
604 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); 658 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
748 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 802 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
749 803
750 if (neigh == NULL) { 804 if (neigh == NULL) {
751 net_dbg_ratelimited("sit: nexthop == NULL\n"); 805 net_dbg_ratelimited("nexthop == NULL\n");
752 goto tx_error; 806 goto tx_error;
753 } 807 }
754 808
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
777 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); 831 neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
778 832
779 if (neigh == NULL) { 833 if (neigh == NULL) {
780 net_dbg_ratelimited("sit: nexthop == NULL\n"); 834 net_dbg_ratelimited("nexthop == NULL\n");
781 goto tx_error; 835 goto tx_error;
782 } 836 }
783 837
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
1612 goto err_alloc_dev; 1666 goto err_alloc_dev;
1613 } 1667 }
1614 dev_net_set(sitn->fb_tunnel_dev, net); 1668 dev_net_set(sitn->fb_tunnel_dev, net);
1669 sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
1615 /* FB netdevice is special: we have one, and only one per netns. 1670 /* FB netdevice is special: we have one, and only one per netns.
1616 * Allowing to move it to another netns is clearly unsafe. 1671 * Allowing to move it to another netns is clearly unsafe.
1617 */ 1672 */
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
1646 1701
1647 rtnl_lock(); 1702 rtnl_lock();
1648 sit_destroy_tunnels(sitn, &list); 1703 sit_destroy_tunnels(sitn, &list);
1649 unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
1650 unregister_netdevice_many(&list); 1704 unregister_netdevice_many(&list);
1651 rtnl_unlock(); 1705 rtnl_unlock();
1652} 1706}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4058150262b..72b7eaaf3ca0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
525 525
526 if (type == ICMPV6_PKT_TOOBIG) 526 if (type == ICMPV6_PKT_TOOBIG)
527 ip6_sk_update_pmtu(skb, sk, info); 527 ip6_sk_update_pmtu(skb, sk, info);
528 if (type == NDISC_REDIRECT) 528 if (type == NDISC_REDIRECT) {
529 ip6_sk_redirect(skb, sk); 529 ip6_sk_redirect(skb, sk);
530 goto out;
531 }
530 532
531 np = inet6_sk(sk); 533 np = inet6_sk(sk);
532 534
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad8aeb1..355cc3b6fa4d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
154 } else { 154 } else {
155 lapb->n2count++; 155 lapb->n2count++;
156 lapb_requeue_frames(lapb); 156 lapb_requeue_frames(lapb);
157 lapb_kick(lapb);
157 } 158 }
158 break; 159 break;
159 160
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f69e83ff836..74fd00c27210 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
116 116
117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 117 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
118 struct ip_vs_cpu_stats *s; 118 struct ip_vs_cpu_stats *s;
119 struct ip_vs_service *svc;
119 120
120 s = this_cpu_ptr(dest->stats.cpustats); 121 s = this_cpu_ptr(dest->stats.cpustats);
121 s->ustats.inpkts++; 122 s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
123 s->ustats.inbytes += skb->len; 124 s->ustats.inbytes += skb->len;
124 u64_stats_update_end(&s->syncp); 125 u64_stats_update_end(&s->syncp);
125 126
126 s = this_cpu_ptr(dest->svc->stats.cpustats); 127 rcu_read_lock();
128 svc = rcu_dereference(dest->svc);
129 s = this_cpu_ptr(svc->stats.cpustats);
127 s->ustats.inpkts++; 130 s->ustats.inpkts++;
128 u64_stats_update_begin(&s->syncp); 131 u64_stats_update_begin(&s->syncp);
129 s->ustats.inbytes += skb->len; 132 s->ustats.inbytes += skb->len;
130 u64_stats_update_end(&s->syncp); 133 u64_stats_update_end(&s->syncp);
134 rcu_read_unlock();
131 135
132 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 136 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
133 s->ustats.inpkts++; 137 s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
146 150
147 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 151 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
148 struct ip_vs_cpu_stats *s; 152 struct ip_vs_cpu_stats *s;
153 struct ip_vs_service *svc;
149 154
150 s = this_cpu_ptr(dest->stats.cpustats); 155 s = this_cpu_ptr(dest->stats.cpustats);
151 s->ustats.outpkts++; 156 s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
153 s->ustats.outbytes += skb->len; 158 s->ustats.outbytes += skb->len;
154 u64_stats_update_end(&s->syncp); 159 u64_stats_update_end(&s->syncp);
155 160
156 s = this_cpu_ptr(dest->svc->stats.cpustats); 161 rcu_read_lock();
162 svc = rcu_dereference(dest->svc);
163 s = this_cpu_ptr(svc->stats.cpustats);
157 s->ustats.outpkts++; 164 s->ustats.outpkts++;
158 u64_stats_update_begin(&s->syncp); 165 u64_stats_update_begin(&s->syncp);
159 s->ustats.outbytes += skb->len; 166 s->ustats.outbytes += skb->len;
160 u64_stats_update_end(&s->syncp); 167 u64_stats_update_end(&s->syncp);
168 rcu_read_unlock();
161 169
162 s = this_cpu_ptr(ipvs->tot_stats.cpustats); 170 s = this_cpu_ptr(ipvs->tot_stats.cpustats);
163 s->ustats.outpkts++; 171 s->ustats.outpkts++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c8148e487386..a3df9bddc4f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 460__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
461{ 461{
462 atomic_inc(&svc->refcnt); 462 atomic_inc(&svc->refcnt);
463 dest->svc = svc; 463 rcu_assign_pointer(dest->svc, svc);
464} 464}
465 465
466static void ip_vs_service_free(struct ip_vs_service *svc) 466static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
470 kfree(svc); 470 kfree(svc);
471} 471}
472 472
473static void 473static void ip_vs_service_rcu_free(struct rcu_head *head)
474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
475{ 474{
476 struct ip_vs_service *svc = dest->svc; 475 struct ip_vs_service *svc;
476
477 svc = container_of(head, struct ip_vs_service, rcu_head);
478 ip_vs_service_free(svc);
479}
477 480
478 dest->svc = NULL; 481static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
482{
479 if (atomic_dec_and_test(&svc->refcnt)) { 483 if (atomic_dec_and_test(&svc->refcnt)) {
480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 484 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
481 svc->fwmark, 485 svc->fwmark,
482 IP_VS_DBG_ADDR(svc->af, &svc->addr), 486 IP_VS_DBG_ADDR(svc->af, &svc->addr),
483 ntohs(svc->port)); 487 ntohs(svc->port));
484 ip_vs_service_free(svc); 488 if (do_delay)
489 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
490 else
491 ip_vs_service_free(svc);
485 } 492 }
486} 493}
487 494
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
667 IP_VS_DBG_ADDR(svc->af, &dest->addr), 674 IP_VS_DBG_ADDR(svc->af, &dest->addr),
668 ntohs(dest->port), 675 ntohs(dest->port),
669 atomic_read(&dest->refcnt)); 676 atomic_read(&dest->refcnt));
670 /* We can not reuse dest while in grace period
671 * because conns still can use dest->svc
672 */
673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
674 continue;
675 if (dest->af == svc->af && 677 if (dest->af == svc->af &&
676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 678 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
677 dest->port == dport && 679 dest->port == dport &&
@@ -697,8 +699,10 @@ out:
697 699
698static void ip_vs_dest_free(struct ip_vs_dest *dest) 700static void ip_vs_dest_free(struct ip_vs_dest *dest)
699{ 701{
702 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
703
700 __ip_vs_dst_cache_reset(dest); 704 __ip_vs_dst_cache_reset(dest);
701 __ip_vs_unbind_svc(dest); 705 __ip_vs_svc_put(svc, false);
702 free_percpu(dest->stats.cpustats); 706 free_percpu(dest->stats.cpustats);
703 kfree(dest); 707 kfree(dest);
704} 708}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
771 struct ip_vs_dest_user_kern *udest, int add) 775 struct ip_vs_dest_user_kern *udest, int add)
772{ 776{
773 struct netns_ipvs *ipvs = net_ipvs(svc->net); 777 struct netns_ipvs *ipvs = net_ipvs(svc->net);
778 struct ip_vs_service *old_svc;
774 struct ip_vs_scheduler *sched; 779 struct ip_vs_scheduler *sched;
775 int conn_flags; 780 int conn_flags;
776 781
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
792 atomic_set(&dest->conn_flags, conn_flags); 797 atomic_set(&dest->conn_flags, conn_flags);
793 798
794 /* bind the service */ 799 /* bind the service */
795 if (!dest->svc) { 800 old_svc = rcu_dereference_protected(dest->svc, 1);
801 if (!old_svc) {
796 __ip_vs_bind_svc(dest, svc); 802 __ip_vs_bind_svc(dest, svc);
797 } else { 803 } else {
798 if (dest->svc != svc) { 804 if (old_svc != svc) {
799 __ip_vs_unbind_svc(dest);
800 ip_vs_zero_stats(&dest->stats); 805 ip_vs_zero_stats(&dest->stats);
801 __ip_vs_bind_svc(dest, svc); 806 __ip_vs_bind_svc(dest, svc);
807 __ip_vs_svc_put(old_svc, true);
802 } 808 }
803 } 809 }
804 810
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
998 return 0; 1004 return 0;
999} 1005}
1000 1006
1001static void ip_vs_dest_wait_readers(struct rcu_head *head)
1002{
1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1004 rcu_head);
1005
1006 /* End of grace period after unlinking */
1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1008}
1009
1010
1011/* 1007/*
1012 * Delete a destination (must be already unlinked from the service) 1008 * Delete a destination (must be already unlinked from the service)
1013 */ 1009 */
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1023 */ 1019 */
1024 ip_vs_rs_unhash(dest); 1020 ip_vs_rs_unhash(dest);
1025 1021
1026 if (!cleanup) {
1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1029 }
1030
1031 spin_lock_bh(&ipvs->dest_trash_lock); 1022 spin_lock_bh(&ipvs->dest_trash_lock);
1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1023 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1024 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1034 atomic_read(&dest->refcnt)); 1025 atomic_read(&dest->refcnt));
1035 if (list_empty(&ipvs->dest_trash) && !cleanup) 1026 if (list_empty(&ipvs->dest_trash) && !cleanup)
1036 mod_timer(&ipvs->dest_trash_timer, 1027 mod_timer(&ipvs->dest_trash_timer,
1037 jiffies + IP_VS_DEST_TRASH_PERIOD); 1028 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1038 /* dest lives in trash without reference */ 1029 /* dest lives in trash without reference */
1039 list_add(&dest->t_list, &ipvs->dest_trash); 1030 list_add(&dest->t_list, &ipvs->dest_trash);
1031 dest->idle_start = 0;
1040 spin_unlock_bh(&ipvs->dest_trash_lock); 1032 spin_unlock_bh(&ipvs->dest_trash_lock);
1041 ip_vs_dest_put(dest); 1033 ip_vs_dest_put(dest);
1042} 1034}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
1108 struct net *net = (struct net *) data; 1100 struct net *net = (struct net *) data;
1109 struct netns_ipvs *ipvs = net_ipvs(net); 1101 struct netns_ipvs *ipvs = net_ipvs(net);
1110 struct ip_vs_dest *dest, *next; 1102 struct ip_vs_dest *dest, *next;
1103 unsigned long now = jiffies;
1111 1104
1112 spin_lock(&ipvs->dest_trash_lock); 1105 spin_lock(&ipvs->dest_trash_lock);
1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1106 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1114 /* Skip if dest is in grace period */
1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1116 continue;
1117 if (atomic_read(&dest->refcnt) > 0) 1107 if (atomic_read(&dest->refcnt) > 0)
1118 continue; 1108 continue;
1109 if (dest->idle_start) {
1110 if (time_before(now, dest->idle_start +
1111 IP_VS_DEST_TRASH_PERIOD))
1112 continue;
1113 } else {
1114 dest->idle_start = max(1UL, now);
1115 continue;
1116 }
1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1117 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1120 dest->vfwmark, 1118 dest->vfwmark,
1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), 1119 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1122 ntohs(dest->port)); 1120 ntohs(dest->port));
1123 list_del(&dest->t_list); 1121 list_del(&dest->t_list);
1124 ip_vs_dest_free(dest); 1122 ip_vs_dest_free(dest);
1125 } 1123 }
1126 if (!list_empty(&ipvs->dest_trash)) 1124 if (!list_empty(&ipvs->dest_trash))
1127 mod_timer(&ipvs->dest_trash_timer, 1125 mod_timer(&ipvs->dest_trash_timer,
1128 jiffies + IP_VS_DEST_TRASH_PERIOD); 1126 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
1129 spin_unlock(&ipvs->dest_trash_lock); 1127 spin_unlock(&ipvs->dest_trash_lock);
1130} 1128}
1131 1129
@@ -1320,14 +1318,6 @@ out:
1320 return ret; 1318 return ret;
1321} 1319}
1322 1320
1323static void ip_vs_service_rcu_free(struct rcu_head *head)
1324{
1325 struct ip_vs_service *svc;
1326
1327 svc = container_of(head, struct ip_vs_service, rcu_head);
1328 ip_vs_service_free(svc);
1329}
1330
1331/* 1321/*
1332 * Delete a service from the service list 1322 * Delete a service from the service list
1333 * - The service must be unlinked, unlocked and not referenced! 1323 * - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1376 /* 1366 /*
1377 * Free the service if nobody refers to it 1367 * Free the service if nobody refers to it
1378 */ 1368 */
1379 if (atomic_dec_and_test(&svc->refcnt)) { 1369 __ip_vs_svc_put(svc, true);
1380 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
1381 svc->fwmark,
1382 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1383 ntohs(svc->port));
1384 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
1385 }
1386 1370
1387 /* decrease the module use count */ 1371 /* decrease the module use count */
1388 ip_vs_use_count_dec(); 1372 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0c73a5..1425e9a924c4 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats __percpu *stats) 59 struct ip_vs_cpu_stats __percpu *stats)
60{ 60{
61 int i; 61 int i;
62 bool add = false;
62 63
63 for_each_possible_cpu(i) { 64 for_each_possible_cpu(i) {
64 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 65 struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
65 unsigned int start; 66 unsigned int start;
66 __u64 inbytes, outbytes; 67 __u64 inbytes, outbytes;
67 if (i) { 68 if (add) {
68 sum->conns += s->ustats.conns; 69 sum->conns += s->ustats.conns;
69 sum->inpkts += s->ustats.inpkts; 70 sum->inpkts += s->ustats.inpkts;
70 sum->outpkts += s->ustats.outpkts; 71 sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
76 sum->inbytes += inbytes; 77 sum->inbytes += inbytes;
77 sum->outbytes += outbytes; 78 sum->outbytes += outbytes;
78 } else { 79 } else {
80 add = true;
79 sum->conns = s->ustats.conns; 81 sum->conns = s->ustats.conns;
80 sum->inpkts = s->ustats.inpkts; 82 sum->inpkts = s->ustats.inpkts;
81 sum->outpkts = s->ustats.outpkts; 83 sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1383b0eadc0e..eff13c94498e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
93 struct hlist_node list; 93 struct hlist_node list;
94 int af; /* address family */ 94 int af; /* address family */
95 union nf_inet_addr addr; /* destination IP address */ 95 union nf_inet_addr addr; /* destination IP address */
96 struct ip_vs_dest __rcu *dest; /* real server (cache) */ 96 struct ip_vs_dest *dest; /* real server (cache) */
97 unsigned long lastuse; /* last used time */ 97 unsigned long lastuse; /* last used time */
98 struct rcu_head rcu_head; 98 struct rcu_head rcu_head;
99}; 99};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
130}; 130};
131#endif 131#endif
132 132
133static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 133static void ip_vs_lblc_rcu_free(struct rcu_head *head)
134{ 134{
135 struct ip_vs_dest *dest; 135 struct ip_vs_lblc_entry *en = container_of(head,
136 struct ip_vs_lblc_entry,
137 rcu_head);
136 138
137 hlist_del_rcu(&en->list); 139 ip_vs_dest_put(en->dest);
138 /* 140 kfree(en);
139 * We don't kfree dest because it is referred either by its service
140 * or the trash dest list.
141 */
142 dest = rcu_dereference_protected(en->dest, 1);
143 ip_vs_dest_put(dest);
144 kfree_rcu(en, rcu_head);
145} 141}
146 142
143static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
144{
145 hlist_del_rcu(&en->list);
146 call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
147}
147 148
148/* 149/*
149 * Returns hash value for IPVS LBLC entry 150 * Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
203 struct ip_vs_lblc_entry *en; 204 struct ip_vs_lblc_entry *en;
204 205
205 en = ip_vs_lblc_get(dest->af, tbl, daddr); 206 en = ip_vs_lblc_get(dest->af, tbl, daddr);
206 if (!en) { 207 if (en) {
207 en = kmalloc(sizeof(*en), GFP_ATOMIC); 208 if (en->dest == dest)
208 if (!en) 209 return en;
209 return NULL; 210 ip_vs_lblc_del(en);
210 211 }
211 en->af = dest->af; 212 en = kmalloc(sizeof(*en), GFP_ATOMIC);
212 ip_vs_addr_copy(dest->af, &en->addr, daddr); 213 if (!en)
213 en->lastuse = jiffies; 214 return NULL;
214 215
215 ip_vs_dest_hold(dest); 216 en->af = dest->af;
216 RCU_INIT_POINTER(en->dest, dest); 217 ip_vs_addr_copy(dest->af, &en->addr, daddr);
218 en->lastuse = jiffies;
217 219
218 ip_vs_lblc_hash(tbl, en); 220 ip_vs_dest_hold(dest);
219 } else { 221 en->dest = dest;
220 struct ip_vs_dest *old_dest;
221 222
222 old_dest = rcu_dereference_protected(en->dest, 1); 223 ip_vs_lblc_hash(tbl, en);
223 if (old_dest != dest) {
224 ip_vs_dest_put(old_dest);
225 ip_vs_dest_hold(dest);
226 /* No ordering constraints for refcnt */
227 RCU_INIT_POINTER(en->dest, dest);
228 }
229 }
230 224
231 return en; 225 return en;
232} 226}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
246 tbl->dead = 1; 240 tbl->dead = 1;
247 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 241 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
248 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { 242 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
249 ip_vs_lblc_free(en); 243 ip_vs_lblc_del(en);
250 atomic_dec(&tbl->entries); 244 atomic_dec(&tbl->entries);
251 } 245 }
252 } 246 }
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
281 sysctl_lblc_expiration(svc))) 275 sysctl_lblc_expiration(svc)))
282 continue; 276 continue;
283 277
284 ip_vs_lblc_free(en); 278 ip_vs_lblc_del(en);
285 atomic_dec(&tbl->entries); 279 atomic_dec(&tbl->entries);
286 } 280 }
287 spin_unlock(&svc->sched_lock); 281 spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
335 if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) 329 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
336 continue; 330 continue;
337 331
338 ip_vs_lblc_free(en); 332 ip_vs_lblc_del(en);
339 atomic_dec(&tbl->entries); 333 atomic_dec(&tbl->entries);
340 goal--; 334 goal--;
341 } 335 }
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
443 continue; 437 continue;
444 438
445 doh = ip_vs_dest_conn_overhead(dest); 439 doh = ip_vs_dest_conn_overhead(dest);
446 if (loh * atomic_read(&dest->weight) > 440 if ((__s64)loh * atomic_read(&dest->weight) >
447 doh * atomic_read(&least->weight)) { 441 (__s64)doh * atomic_read(&least->weight)) {
448 least = dest; 442 least = dest;
449 loh = doh; 443 loh = doh;
450 } 444 }
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
511 * free up entries from the trash at any time. 505 * free up entries from the trash at any time.
512 */ 506 */
513 507
514 dest = rcu_dereference(en->dest); 508 dest = en->dest;
515 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && 509 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
516 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) 510 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
517 goto out; 511 goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
631{ 625{
632 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 626 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
633 unregister_pernet_subsys(&ip_vs_lblc_ops); 627 unregister_pernet_subsys(&ip_vs_lblc_ops);
634 synchronize_rcu(); 628 rcu_barrier();
635} 629}
636 630
637 631
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 5199448697f6..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest __rcu *dest; /* destination server */ 92 struct ip_vs_dest *dest; /* destination server */
93 struct rcu_head rcu_head; 93 struct rcu_head rcu_head;
94}; 94};
95 95
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
107 107
108 if (check) { 108 if (check) {
109 list_for_each_entry(e, &set->list, list) { 109 list_for_each_entry(e, &set->list, list) {
110 struct ip_vs_dest *d; 110 if (e->dest == dest)
111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return; 111 return;
116 } 112 }
117 } 113 }
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
121 return; 117 return;
122 118
123 ip_vs_dest_hold(dest); 119 ip_vs_dest_hold(dest);
124 RCU_INIT_POINTER(e->dest, dest); 120 e->dest = dest;
125 121
126 list_add_rcu(&e->list, &set->list); 122 list_add_rcu(&e->list, &set->list);
127 atomic_inc(&set->size); 123 atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
129 set->lastmod = jiffies; 125 set->lastmod = jiffies;
130} 126}
131 127
128static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
129{
130 struct ip_vs_dest_set_elem *e;
131
132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
133 ip_vs_dest_put(e->dest);
134 kfree(e);
135}
136
132static void 137static void
133ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 138ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
134{ 139{
135 struct ip_vs_dest_set_elem *e; 140 struct ip_vs_dest_set_elem *e;
136 141
137 list_for_each_entry(e, &set->list, list) { 142 list_for_each_entry(e, &set->list, list) {
138 struct ip_vs_dest *d; 143 if (e->dest == dest) {
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
142 /* HIT */ 144 /* HIT */
143 atomic_dec(&set->size); 145 atomic_dec(&set->size);
144 set->lastmod = jiffies; 146 set->lastmod = jiffies;
145 ip_vs_dest_put(dest);
146 list_del_rcu(&e->list); 147 list_del_rcu(&e->list);
147 kfree_rcu(e, rcu_head); 148 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
148 break; 149 break;
149 } 150 }
150 } 151 }
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
155 struct ip_vs_dest_set_elem *e, *ep; 156 struct ip_vs_dest_set_elem *e, *ep;
156 157
157 list_for_each_entry_safe(e, ep, &set->list, list) { 158 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
161 /*
162 * We don't kfree dest because it is referred either
163 * by its service or by the trash dest list.
164 */
165 ip_vs_dest_put(d);
166 list_del_rcu(&e->list); 159 list_del_rcu(&e->list);
167 kfree_rcu(e, rcu_head); 160 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
168 } 161 }
169} 162}
170 163
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
175 struct ip_vs_dest *dest, *least; 168 struct ip_vs_dest *dest, *least;
176 int loh, doh; 169 int loh, doh;
177 170
178 if (set == NULL)
179 return NULL;
180
181 /* select the first destination server, whose weight > 0 */ 171 /* select the first destination server, whose weight > 0 */
182 list_for_each_entry_rcu(e, &set->list, list) { 172 list_for_each_entry_rcu(e, &set->list, list) {
183 least = rcu_dereference(e->dest); 173 least = e->dest;
184 if (least->flags & IP_VS_DEST_F_OVERLOAD) 174 if (least->flags & IP_VS_DEST_F_OVERLOAD)
185 continue; 175 continue;
186 176
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
195 /* find the destination with the weighted least load */ 185 /* find the destination with the weighted least load */
196 nextstage: 186 nextstage:
197 list_for_each_entry_continue_rcu(e, &set->list, list) { 187 list_for_each_entry_continue_rcu(e, &set->list, list) {
198 dest = rcu_dereference(e->dest); 188 dest = e->dest;
199 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 189 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
200 continue; 190 continue;
201 191
202 doh = ip_vs_dest_conn_overhead(dest); 192 doh = ip_vs_dest_conn_overhead(dest);
203 if ((loh * atomic_read(&dest->weight) > 193 if (((__s64)loh * atomic_read(&dest->weight) >
204 doh * atomic_read(&least->weight)) 194 (__s64)doh * atomic_read(&least->weight))
205 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 195 && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
206 least = dest; 196 least = dest;
207 loh = doh; 197 loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
232 222
233 /* select the first destination server, whose weight > 0 */ 223 /* select the first destination server, whose weight > 0 */
234 list_for_each_entry(e, &set->list, list) { 224 list_for_each_entry(e, &set->list, list) {
235 most = rcu_dereference_protected(e->dest, 1); 225 most = e->dest;
236 if (atomic_read(&most->weight) > 0) { 226 if (atomic_read(&most->weight) > 0) {
237 moh = ip_vs_dest_conn_overhead(most); 227 moh = ip_vs_dest_conn_overhead(most);
238 goto nextstage; 228 goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
243 /* find the destination with the weighted most load */ 233 /* find the destination with the weighted most load */
244 nextstage: 234 nextstage:
245 list_for_each_entry_continue(e, &set->list, list) { 235 list_for_each_entry_continue(e, &set->list, list) {
246 dest = rcu_dereference_protected(e->dest, 1); 236 dest = e->dest;
247 doh = ip_vs_dest_conn_overhead(dest); 237 doh = ip_vs_dest_conn_overhead(dest);
248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 238 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
249 if ((moh * atomic_read(&dest->weight) < 239 if (((__s64)moh * atomic_read(&dest->weight) <
250 doh * atomic_read(&most->weight)) 240 (__s64)doh * atomic_read(&most->weight))
251 && (atomic_read(&dest->weight) > 0)) { 241 && (atomic_read(&dest->weight) > 0)) {
252 most = dest; 242 most = dest;
253 moh = doh; 243 moh = doh;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
611 continue; 601 continue;
612 602
613 doh = ip_vs_dest_conn_overhead(dest); 603 doh = ip_vs_dest_conn_overhead(dest);
614 if (loh * atomic_read(&dest->weight) > 604 if ((__s64)loh * atomic_read(&dest->weight) >
615 doh * atomic_read(&least->weight)) { 605 (__s64)doh * atomic_read(&least->weight)) {
616 least = dest; 606 least = dest;
617 loh = doh; 607 loh = doh;
618 } 608 }
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
819{ 809{
820 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
821 unregister_pernet_subsys(&ip_vs_lblcr_ops); 811 unregister_pernet_subsys(&ip_vs_lblcr_ops);
822 synchronize_rcu(); 812 rcu_barrier();
823} 813}
824 814
825 815
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index d8d9860934fe..961a6de9bb29 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
40#include <net/ip_vs.h> 40#include <net/ip_vs.h>
41 41
42 42
43static inline unsigned int 43static inline int
44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) 44ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
45{ 45{
46 /* 46 /*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
59 struct ip_vs_iphdr *iph) 59 struct ip_vs_iphdr *iph)
60{ 60{
61 struct ip_vs_dest *dest, *least = NULL; 61 struct ip_vs_dest *dest, *least = NULL;
62 unsigned int loh = 0, doh; 62 int loh = 0, doh;
63 63
64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
65 65
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
92 } 92 }
93 93
94 if (!least || 94 if (!least ||
95 (loh * atomic_read(&dest->weight) > 95 ((__s64)loh * atomic_read(&dest->weight) >
96 doh * atomic_read(&least->weight))) { 96 (__s64)doh * atomic_read(&least->weight))) {
97 least = dest; 97 least = dest;
98 loh = doh; 98 loh = doh;
99 } 99 }
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a5284cc3d882..e446b9fa7424 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
44#include <net/ip_vs.h> 44#include <net/ip_vs.h>
45 45
46 46
47static inline unsigned int 47static inline int
48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) 48ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
49{ 49{
50 /* 50 /*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
63 struct ip_vs_iphdr *iph) 63 struct ip_vs_iphdr *iph)
64{ 64{
65 struct ip_vs_dest *dest, *least; 65 struct ip_vs_dest *dest, *least;
66 unsigned int loh, doh; 66 int loh, doh;
67 67
68 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 68 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
69 69
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
99 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 99 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
100 continue; 100 continue;
101 doh = ip_vs_sed_dest_overhead(dest); 101 doh = ip_vs_sed_dest_overhead(dest);
102 if (loh * atomic_read(&dest->weight) > 102 if ((__s64)loh * atomic_read(&dest->weight) >
103 doh * atomic_read(&least->weight)) { 103 (__s64)doh * atomic_read(&least->weight)) {
104 least = dest; 104 least = dest;
105 loh = doh; 105 loh = doh;
106 } 106 }
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6dc1fa128840..b5b4650d50a9 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
35 struct ip_vs_iphdr *iph) 35 struct ip_vs_iphdr *iph)
36{ 36{
37 struct ip_vs_dest *dest, *least; 37 struct ip_vs_dest *dest, *least;
38 unsigned int loh, doh; 38 int loh, doh;
39 39
40 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); 40 IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
41 41
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
71 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 71 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
72 continue; 72 continue;
73 doh = ip_vs_dest_conn_overhead(dest); 73 doh = ip_vs_dest_conn_overhead(dest);
74 if (loh * atomic_read(&dest->weight) > 74 if ((__s64)loh * atomic_read(&dest->weight) >
75 doh * atomic_read(&least->weight)) { 75 (__s64)doh * atomic_read(&least->weight)) {
76 least = dest; 76 least = dest;
77 loh = doh; 77 loh = doh;
78 } 78 }
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6fd967c6278c..cdf4567ba9b3 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -24,7 +24,7 @@
24int synproxy_net_id; 24int synproxy_net_id;
25EXPORT_SYMBOL_GPL(synproxy_net_id); 25EXPORT_SYMBOL_GPL(synproxy_net_id);
26 26
27void 27bool
28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, 28synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
29 const struct tcphdr *th, struct synproxy_options *opts) 29 const struct tcphdr *th, struct synproxy_options *opts)
30{ 30{
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
32 u8 buf[40], *ptr; 32 u8 buf[40], *ptr;
33 33
34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); 34 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
35 BUG_ON(ptr == NULL); 35 if (ptr == NULL)
36 return false;
36 37
37 opts->options = 0; 38 opts->options = 0;
38 while (length > 0) { 39 while (length > 0) {
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
41 42
42 switch (opcode) { 43 switch (opcode) {
43 case TCPOPT_EOL: 44 case TCPOPT_EOL:
44 return; 45 return true;
45 case TCPOPT_NOP: 46 case TCPOPT_NOP:
46 length--; 47 length--;
47 continue; 48 continue;
48 default: 49 default:
49 opsize = *ptr++; 50 opsize = *ptr++;
50 if (opsize < 2) 51 if (opsize < 2)
51 return; 52 return true;
52 if (opsize > length) 53 if (opsize > length)
53 return; 54 return true;
54 55
55 switch (opcode) { 56 switch (opcode) {
56 case TCPOPT_MSS: 57 case TCPOPT_MSS:
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
84 length -= opsize; 85 length -= opsize;
85 } 86 }
86 } 87 }
88 return true;
87} 89}
88EXPORT_SYMBOL_GPL(synproxy_parse_options); 90EXPORT_SYMBOL_GPL(synproxy_parse_options);
89 91
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 32ad015ee8ce..a2fef8b10b96 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
285 285
286 286
287/* remove one skb from head of flow queue */ 287/* remove one skb from head of flow queue */
288static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) 288static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
289{ 289{
290 struct sk_buff *skb = flow->head; 290 struct sk_buff *skb = flow->head;
291 291
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow)
293 flow->head = skb->next; 293 flow->head = skb->next;
294 skb->next = NULL; 294 skb->next = NULL;
295 flow->qlen--; 295 flow->qlen--;
296 sch->qstats.backlog -= qdisc_pkt_len(skb);
297 sch->q.qlen--;
296 } 298 }
297 return skb; 299 return skb;
298} 300}
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
418 struct fq_flow_head *head; 420 struct fq_flow_head *head;
419 struct sk_buff *skb; 421 struct sk_buff *skb;
420 struct fq_flow *f; 422 struct fq_flow *f;
423 u32 rate;
421 424
422 skb = fq_dequeue_head(&q->internal); 425 skb = fq_dequeue_head(sch, &q->internal);
423 if (skb) 426 if (skb)
424 goto out; 427 goto out;
425 fq_check_throttled(q, now); 428 fq_check_throttled(q, now);
@@ -449,7 +452,7 @@ begin:
449 goto begin; 452 goto begin;
450 } 453 }
451 454
452 skb = fq_dequeue_head(f); 455 skb = fq_dequeue_head(sch, f);
453 if (!skb) { 456 if (!skb) {
454 head->first = f->next; 457 head->first = f->next;
455 /* force a pass through old_flows to prevent starvation */ 458 /* force a pass through old_flows to prevent starvation */
@@ -466,43 +469,74 @@ begin:
466 f->time_next_packet = now; 469 f->time_next_packet = now;
467 f->credit -= qdisc_pkt_len(skb); 470 f->credit -= qdisc_pkt_len(skb);
468 471
469 if (f->credit <= 0 && 472 if (f->credit > 0 || !q->rate_enable)
470 q->rate_enable && 473 goto out;
471 skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
472 u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
473 474
474 rate = min(rate, q->flow_max_rate); 475 if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) {
475 if (rate) { 476 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate;
476 u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC;
477
478 do_div(len, rate);
479 /* Since socket rate can change later,
480 * clamp the delay to 125 ms.
481 * TODO: maybe segment the too big skb, as in commit
482 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
483 */
484 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
485 len = 125 * NSEC_PER_MSEC;
486 q->stat_pkts_too_long++;
487 }
488 477
489 f->time_next_packet = now + len; 478 rate = min(rate, q->flow_max_rate);
479 } else {
480 rate = q->flow_max_rate;
481 if (rate == ~0U)
482 goto out;
483 }
484 if (rate) {
485 u32 plen = max(qdisc_pkt_len(skb), q->quantum);
486 u64 len = (u64)plen * NSEC_PER_SEC;
487
488 do_div(len, rate);
489 /* Since socket rate can change later,
490 * clamp the delay to 125 ms.
491 * TODO: maybe segment the too big skb, as in commit
492 * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
493 */
494 if (unlikely(len > 125 * NSEC_PER_MSEC)) {
495 len = 125 * NSEC_PER_MSEC;
496 q->stat_pkts_too_long++;
490 } 497 }
498
499 f->time_next_packet = now + len;
491 } 500 }
492out: 501out:
493 sch->qstats.backlog -= qdisc_pkt_len(skb);
494 qdisc_bstats_update(sch, skb); 502 qdisc_bstats_update(sch, skb);
495 sch->q.qlen--;
496 qdisc_unthrottled(sch); 503 qdisc_unthrottled(sch);
497 return skb; 504 return skb;
498} 505}
499 506
500static void fq_reset(struct Qdisc *sch) 507static void fq_reset(struct Qdisc *sch)
501{ 508{
509 struct fq_sched_data *q = qdisc_priv(sch);
510 struct rb_root *root;
502 struct sk_buff *skb; 511 struct sk_buff *skb;
512 struct rb_node *p;
513 struct fq_flow *f;
514 unsigned int idx;
503 515
504 while ((skb = fq_dequeue(sch)) != NULL) 516 while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
505 kfree_skb(skb); 517 kfree_skb(skb);
518
519 if (!q->fq_root)
520 return;
521
522 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
523 root = &q->fq_root[idx];
524 while ((p = rb_first(root)) != NULL) {
525 f = container_of(p, struct fq_flow, fq_node);
526 rb_erase(p, root);
527
528 while ((skb = fq_dequeue_head(sch, f)) != NULL)
529 kfree_skb(skb);
530
531 kmem_cache_free(fq_flow_cachep, f);
532 }
533 }
534 q->new_flows.first = NULL;
535 q->old_flows.first = NULL;
536 q->delayed = RB_ROOT;
537 q->flows = 0;
538 q->inactive_flows = 0;
539 q->throttled_flows = 0;
506} 540}
507 541
508static void fq_rehash(struct fq_sched_data *q, 542static void fq_rehash(struct fq_sched_data *q,
@@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
645 while (sch->q.qlen > sch->limit) { 679 while (sch->q.qlen > sch->limit) {
646 struct sk_buff *skb = fq_dequeue(sch); 680 struct sk_buff *skb = fq_dequeue(sch);
647 681
682 if (!skb)
683 break;
648 kfree_skb(skb); 684 kfree_skb(skb);
649 drop_count++; 685 drop_count++;
650 } 686 }
@@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
657static void fq_destroy(struct Qdisc *sch) 693static void fq_destroy(struct Qdisc *sch)
658{ 694{
659 struct fq_sched_data *q = qdisc_priv(sch); 695 struct fq_sched_data *q = qdisc_priv(sch);
660 struct rb_root *root;
661 struct rb_node *p;
662 unsigned int idx;
663 696
664 if (q->fq_root) { 697 fq_reset(sch);
665 for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { 698 kfree(q->fq_root);
666 root = &q->fq_root[idx];
667 while ((p = rb_first(root)) != NULL) {
668 rb_erase(p, root);
669 kmem_cache_free(fq_flow_cachep,
670 container_of(p, struct fq_flow, fq_node));
671 }
672 }
673 kfree(q->fq_root);
674 }
675 qdisc_watchdog_cancel(&q->watchdog); 699 qdisc_watchdog_cancel(&q->watchdog);
676} 700}
677 701
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
47 47
48 /* Allow network administrator to have same access as root. */ 48 /* Allow network administrator to have same access as root. */
49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) || 49 if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
50 uid_eq(root_uid, current_uid())) { 50 uid_eq(root_uid, current_euid())) {
51 int mode = (table->mode >> 6) & 7; 51 int mode = (table->mode >> 6) & 7;
52 return (mode << 6) | (mode << 3) | mode; 52 return (mode << 6) | (mode << 3) | mode;
53 } 53 }
54 /* Allow netns root group to have the same access as the root group */ 54 /* Allow netns root group to have the same access as the root group */
55 if (gid_eq(root_gid, current_gid())) { 55 if (in_egroup_p(root_gid)) {
56 int mode = (table->mode >> 3) & 7; 56 int mode = (table->mode >> 3) & 7;
57 return (mode << 3) | mode; 57 return (mode << 3) | mode;
58 } 58 }