diff options
author | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-10-19 16:05:38 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2013-10-19 16:05:38 -0400 |
commit | a7204d72db251784808b0c050220992d7f833a2c (patch) | |
tree | 3491452ea74b039f3278fd95bb7ad7c88b6b3631 /net | |
parent | ba6857b2d49646f2d4c245ff58d95d145f380177 (diff) | |
parent | 31d141e3a666269a3b6fcccddb0351caf7454240 (diff) |
Merge 3.12-rc6 into driver-core-next
We want these fixes here too.
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
37 files changed, 516 insertions, 356 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d80b07b..3ed616215870 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c | |||
@@ -24,6 +24,11 @@ | |||
24 | static unsigned int mrp_join_time __read_mostly = 200; | 24 | static unsigned int mrp_join_time __read_mostly = 200; |
25 | module_param(mrp_join_time, uint, 0644); | 25 | module_param(mrp_join_time, uint, 0644); |
26 | MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); | 26 | MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); |
27 | |||
28 | static unsigned int mrp_periodic_time __read_mostly = 1000; | ||
29 | module_param(mrp_periodic_time, uint, 0644); | ||
30 | MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); | ||
31 | |||
27 | MODULE_LICENSE("GPL"); | 32 | MODULE_LICENSE("GPL"); |
28 | 33 | ||
29 | static const u8 | 34 | static const u8 |
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) | |||
595 | mrp_join_timer_arm(app); | 600 | mrp_join_timer_arm(app); |
596 | } | 601 | } |
597 | 602 | ||
603 | static void mrp_periodic_timer_arm(struct mrp_applicant *app) | ||
604 | { | ||
605 | mod_timer(&app->periodic_timer, | ||
606 | jiffies + msecs_to_jiffies(mrp_periodic_time)); | ||
607 | } | ||
608 | |||
609 | static void mrp_periodic_timer(unsigned long data) | ||
610 | { | ||
611 | struct mrp_applicant *app = (struct mrp_applicant *)data; | ||
612 | |||
613 | spin_lock(&app->lock); | ||
614 | mrp_mad_event(app, MRP_EVENT_PERIODIC); | ||
615 | mrp_pdu_queue(app); | ||
616 | spin_unlock(&app->lock); | ||
617 | |||
618 | mrp_periodic_timer_arm(app); | ||
619 | } | ||
620 | |||
598 | static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) | 621 | static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) |
599 | { | 622 | { |
600 | __be16 endmark; | 623 | __be16 endmark; |
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) | |||
845 | rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); | 868 | rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); |
846 | setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); | 869 | setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); |
847 | mrp_join_timer_arm(app); | 870 | mrp_join_timer_arm(app); |
871 | setup_timer(&app->periodic_timer, mrp_periodic_timer, | ||
872 | (unsigned long)app); | ||
873 | mrp_periodic_timer_arm(app); | ||
848 | return 0; | 874 | return 0; |
849 | 875 | ||
850 | err3: | 876 | err3: |
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) | |||
870 | * all pending messages before the applicant is gone. | 896 | * all pending messages before the applicant is gone. |
871 | */ | 897 | */ |
872 | del_timer_sync(&app->join_timer); | 898 | del_timer_sync(&app->join_timer); |
899 | del_timer_sync(&app->periodic_timer); | ||
873 | 900 | ||
874 | spin_lock_bh(&app->lock); | 901 | spin_lock_bh(&app->lock); |
875 | mrp_mad_event(app, MRP_EVENT_TX); | 902 | mrp_mad_event(app, MRP_EVENT_TX); |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 634debab4d54..fb7356fcfe51 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
@@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev) | |||
1146 | goto done; | 1146 | goto done; |
1147 | } | 1147 | } |
1148 | 1148 | ||
1149 | if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { | 1149 | /* Check for rfkill but allow the HCI setup stage to proceed |
1150 | * (which in itself doesn't cause any RF activity). | ||
1151 | */ | ||
1152 | if (test_bit(HCI_RFKILLED, &hdev->dev_flags) && | ||
1153 | !test_bit(HCI_SETUP, &hdev->dev_flags)) { | ||
1150 | ret = -ERFKILL; | 1154 | ret = -ERFKILL; |
1151 | goto done; | 1155 | goto done; |
1152 | } | 1156 | } |
@@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked) | |||
1566 | 1570 | ||
1567 | BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); | 1571 | BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); |
1568 | 1572 | ||
1569 | if (!blocked) | 1573 | if (blocked) { |
1570 | return 0; | 1574 | set_bit(HCI_RFKILLED, &hdev->dev_flags); |
1571 | 1575 | if (!test_bit(HCI_SETUP, &hdev->dev_flags)) | |
1572 | hci_dev_do_close(hdev); | 1576 | hci_dev_do_close(hdev); |
1577 | } else { | ||
1578 | clear_bit(HCI_RFKILLED, &hdev->dev_flags); | ||
1579 | } | ||
1573 | 1580 | ||
1574 | return 0; | 1581 | return 0; |
1575 | } | 1582 | } |
@@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work) | |||
1591 | return; | 1598 | return; |
1592 | } | 1599 | } |
1593 | 1600 | ||
1594 | if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) | 1601 | if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { |
1602 | clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); | ||
1603 | hci_dev_do_close(hdev); | ||
1604 | } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { | ||
1595 | queue_delayed_work(hdev->req_workqueue, &hdev->power_off, | 1605 | queue_delayed_work(hdev->req_workqueue, &hdev->power_off, |
1596 | HCI_AUTO_OFF_TIMEOUT); | 1606 | HCI_AUTO_OFF_TIMEOUT); |
1607 | } | ||
1597 | 1608 | ||
1598 | if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) | 1609 | if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) |
1599 | mgmt_index_added(hdev); | 1610 | mgmt_index_added(hdev); |
@@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev) | |||
2209 | } | 2220 | } |
2210 | } | 2221 | } |
2211 | 2222 | ||
2223 | if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) | ||
2224 | set_bit(HCI_RFKILLED, &hdev->dev_flags); | ||
2225 | |||
2212 | set_bit(HCI_SETUP, &hdev->dev_flags); | 2226 | set_bit(HCI_SETUP, &hdev->dev_flags); |
2213 | 2227 | ||
2214 | if (hdev->dev_type != HCI_AMP) | 2228 | if (hdev->dev_type != HCI_AMP) |
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 94aab73f89d4..8db3e89fae35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c | |||
@@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) | |||
3557 | cp.handle = cpu_to_le16(conn->handle); | 3557 | cp.handle = cpu_to_le16(conn->handle); |
3558 | 3558 | ||
3559 | if (ltk->authenticated) | 3559 | if (ltk->authenticated) |
3560 | conn->sec_level = BT_SECURITY_HIGH; | 3560 | conn->pending_sec_level = BT_SECURITY_HIGH; |
3561 | else | ||
3562 | conn->pending_sec_level = BT_SECURITY_MEDIUM; | ||
3563 | |||
3564 | conn->enc_key_size = ltk->enc_size; | ||
3561 | 3565 | ||
3562 | hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); | 3566 | hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); |
3563 | 3567 | ||
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b3bb7bca8e60..63fa11109a1c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c | |||
@@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, | |||
3755 | 3755 | ||
3756 | sk = chan->sk; | 3756 | sk = chan->sk; |
3757 | 3757 | ||
3758 | /* For certain devices (ex: HID mouse), support for authentication, | ||
3759 | * pairing and bonding is optional. For such devices, inorder to avoid | ||
3760 | * the ACL alive for too long after L2CAP disconnection, reset the ACL | ||
3761 | * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. | ||
3762 | */ | ||
3763 | conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; | ||
3764 | |||
3758 | bacpy(&bt_sk(sk)->src, conn->src); | 3765 | bacpy(&bt_sk(sk)->src, conn->src); |
3759 | bacpy(&bt_sk(sk)->dst, conn->dst); | 3766 | bacpy(&bt_sk(sk)->dst, conn->dst); |
3760 | chan->psm = psm; | 3767 | chan->psm = psm; |
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6d126faf145f..84fcf9fff3ea 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c | |||
@@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) | |||
569 | static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) | 569 | static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) |
570 | { | 570 | { |
571 | struct rfcomm_dev *dev = dlc->owner; | 571 | struct rfcomm_dev *dev = dlc->owner; |
572 | struct tty_struct *tty; | ||
573 | if (!dev) | 572 | if (!dev) |
574 | return; | 573 | return; |
575 | 574 | ||
@@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) | |||
581 | DPM_ORDER_DEV_AFTER_PARENT); | 580 | DPM_ORDER_DEV_AFTER_PARENT); |
582 | 581 | ||
583 | wake_up_interruptible(&dev->port.open_wait); | 582 | wake_up_interruptible(&dev->port.open_wait); |
584 | } else if (dlc->state == BT_CLOSED) { | 583 | } else if (dlc->state == BT_CLOSED) |
585 | tty = tty_port_tty_get(&dev->port); | 584 | tty_port_tty_hangup(&dev->port, false); |
586 | if (!tty) { | ||
587 | if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { | ||
588 | /* Drop DLC lock here to avoid deadlock | ||
589 | * 1. rfcomm_dev_get will take rfcomm_dev_lock | ||
590 | * but in rfcomm_dev_add there's lock order: | ||
591 | * rfcomm_dev_lock -> dlc lock | ||
592 | * 2. tty_port_put will deadlock if it's | ||
593 | * the last reference | ||
594 | * | ||
595 | * FIXME: when we release the lock anything | ||
596 | * could happen to dev, even its destruction | ||
597 | */ | ||
598 | rfcomm_dlc_unlock(dlc); | ||
599 | if (rfcomm_dev_get(dev->id) == NULL) { | ||
600 | rfcomm_dlc_lock(dlc); | ||
601 | return; | ||
602 | } | ||
603 | |||
604 | if (!test_and_set_bit(RFCOMM_TTY_RELEASED, | ||
605 | &dev->flags)) | ||
606 | tty_port_put(&dev->port); | ||
607 | |||
608 | tty_port_put(&dev->port); | ||
609 | rfcomm_dlc_lock(dlc); | ||
610 | } | ||
611 | } else { | ||
612 | tty_hangup(tty); | ||
613 | tty_kref_put(tty); | ||
614 | } | ||
615 | } | ||
616 | } | 585 | } |
617 | 586 | ||
618 | static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) | 587 | static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) |
diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..65f829cfd928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) | |||
5247 | 5247 | ||
5248 | /* Delayed registration/unregisteration */ | 5248 | /* Delayed registration/unregisteration */ |
5249 | static LIST_HEAD(net_todo_list); | 5249 | static LIST_HEAD(net_todo_list); |
5250 | static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); | ||
5250 | 5251 | ||
5251 | static void net_set_todo(struct net_device *dev) | 5252 | static void net_set_todo(struct net_device *dev) |
5252 | { | 5253 | { |
5253 | list_add_tail(&dev->todo_list, &net_todo_list); | 5254 | list_add_tail(&dev->todo_list, &net_todo_list); |
5255 | dev_net(dev)->dev_unreg_count++; | ||
5254 | } | 5256 | } |
5255 | 5257 | ||
5256 | static void rollback_registered_many(struct list_head *head) | 5258 | static void rollback_registered_many(struct list_head *head) |
@@ -5918,6 +5920,12 @@ void netdev_run_todo(void) | |||
5918 | if (dev->destructor) | 5920 | if (dev->destructor) |
5919 | dev->destructor(dev); | 5921 | dev->destructor(dev); |
5920 | 5922 | ||
5923 | /* Report a network device has been unregistered */ | ||
5924 | rtnl_lock(); | ||
5925 | dev_net(dev)->dev_unreg_count--; | ||
5926 | __rtnl_unlock(); | ||
5927 | wake_up(&netdev_unregistering_wq); | ||
5928 | |||
5921 | /* Free network device */ | 5929 | /* Free network device */ |
5922 | kobject_put(&dev->dev.kobj); | 5930 | kobject_put(&dev->dev.kobj); |
5923 | } | 5931 | } |
@@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) | |||
6603 | rtnl_unlock(); | 6611 | rtnl_unlock(); |
6604 | } | 6612 | } |
6605 | 6613 | ||
6614 | static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) | ||
6615 | { | ||
6616 | /* Return with the rtnl_lock held when there are no network | ||
6617 | * devices unregistering in any network namespace in net_list. | ||
6618 | */ | ||
6619 | struct net *net; | ||
6620 | bool unregistering; | ||
6621 | DEFINE_WAIT(wait); | ||
6622 | |||
6623 | for (;;) { | ||
6624 | prepare_to_wait(&netdev_unregistering_wq, &wait, | ||
6625 | TASK_UNINTERRUPTIBLE); | ||
6626 | unregistering = false; | ||
6627 | rtnl_lock(); | ||
6628 | list_for_each_entry(net, net_list, exit_list) { | ||
6629 | if (net->dev_unreg_count > 0) { | ||
6630 | unregistering = true; | ||
6631 | break; | ||
6632 | } | ||
6633 | } | ||
6634 | if (!unregistering) | ||
6635 | break; | ||
6636 | __rtnl_unlock(); | ||
6637 | schedule(); | ||
6638 | } | ||
6639 | finish_wait(&netdev_unregistering_wq, &wait); | ||
6640 | } | ||
6641 | |||
6606 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | 6642 | static void __net_exit default_device_exit_batch(struct list_head *net_list) |
6607 | { | 6643 | { |
6608 | /* At exit all network devices most be removed from a network | 6644 | /* At exit all network devices most be removed from a network |
@@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) | |||
6614 | struct net *net; | 6650 | struct net *net; |
6615 | LIST_HEAD(dev_kill_list); | 6651 | LIST_HEAD(dev_kill_list); |
6616 | 6652 | ||
6617 | rtnl_lock(); | 6653 | /* To prevent network device cleanup code from dereferencing |
6654 | * loopback devices or network devices that have been freed | ||
6655 | * wait here for all pending unregistrations to complete, | ||
6656 | * before unregistring the loopback device and allowing the | ||
6657 | * network namespace be freed. | ||
6658 | * | ||
6659 | * The netdev todo list containing all network devices | ||
6660 | * unregistrations that happen in default_device_exit_batch | ||
6661 | * will run in the rtnl_unlock() at the end of | ||
6662 | * default_device_exit_batch. | ||
6663 | */ | ||
6664 | rtnl_lock_unregistering(net_list); | ||
6618 | list_for_each_entry(net, net_list, exit_list) { | 6665 | list_for_each_entry(net, net_list, exit_list) { |
6619 | for_each_netdev_reverse(net, dev) { | 6666 | for_each_netdev_reverse(net, dev) { |
6620 | if (dev->rtnl_link_ops) | 6667 | if (dev->rtnl_link_ops) |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af87b260..8d7d0dd72db2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
@@ -154,8 +154,8 @@ ipv6: | |||
154 | if (poff >= 0) { | 154 | if (poff >= 0) { |
155 | __be32 *ports, _ports; | 155 | __be32 *ports, _ports; |
156 | 156 | ||
157 | nhoff += poff; | 157 | ports = skb_header_pointer(skb, nhoff + poff, |
158 | ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); | 158 | sizeof(_ports), &_ports); |
159 | if (ports) | 159 | if (ports) |
160 | flow->ports = *ports; | 160 | flow->ports = *ports; |
161 | } | 161 | } |
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..3f1ec1586ae1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c | |||
@@ -10,11 +10,24 @@ | |||
10 | 10 | ||
11 | #include <net/secure_seq.h> | 11 | #include <net/secure_seq.h> |
12 | 12 | ||
13 | static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; | 13 | #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) |
14 | 14 | ||
15 | void net_secret_init(void) | 15 | static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; |
16 | |||
17 | static void net_secret_init(void) | ||
16 | { | 18 | { |
17 | get_random_bytes(net_secret, sizeof(net_secret)); | 19 | u32 tmp; |
20 | int i; | ||
21 | |||
22 | if (likely(net_secret[0])) | ||
23 | return; | ||
24 | |||
25 | for (i = NET_SECRET_SIZE; i > 0;) { | ||
26 | do { | ||
27 | get_random_bytes(&tmp, sizeof(tmp)); | ||
28 | } while (!tmp); | ||
29 | cmpxchg(&net_secret[--i], 0, tmp); | ||
30 | } | ||
18 | } | 31 | } |
19 | 32 | ||
20 | #ifdef CONFIG_INET | 33 | #ifdef CONFIG_INET |
@@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | |||
42 | u32 hash[MD5_DIGEST_WORDS]; | 55 | u32 hash[MD5_DIGEST_WORDS]; |
43 | u32 i; | 56 | u32 i; |
44 | 57 | ||
58 | net_secret_init(); | ||
45 | memcpy(hash, saddr, 16); | 59 | memcpy(hash, saddr, 16); |
46 | for (i = 0; i < 4; i++) | 60 | for (i = 0; i < 4; i++) |
47 | secret[i] = net_secret[i] + (__force u32)daddr[i]; | 61 | secret[i] = net_secret[i] + (__force u32)daddr[i]; |
@@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, | |||
63 | u32 hash[MD5_DIGEST_WORDS]; | 77 | u32 hash[MD5_DIGEST_WORDS]; |
64 | u32 i; | 78 | u32 i; |
65 | 79 | ||
80 | net_secret_init(); | ||
66 | memcpy(hash, saddr, 16); | 81 | memcpy(hash, saddr, 16); |
67 | for (i = 0; i < 4; i++) | 82 | for (i = 0; i < 4; i++) |
68 | secret[i] = net_secret[i] + (__force u32) daddr[i]; | 83 | secret[i] = net_secret[i] + (__force u32) daddr[i]; |
@@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) | |||
82 | { | 97 | { |
83 | u32 hash[MD5_DIGEST_WORDS]; | 98 | u32 hash[MD5_DIGEST_WORDS]; |
84 | 99 | ||
100 | net_secret_init(); | ||
85 | hash[0] = (__force __u32) daddr; | 101 | hash[0] = (__force __u32) daddr; |
86 | hash[1] = net_secret[13]; | 102 | hash[1] = net_secret[13]; |
87 | hash[2] = net_secret[14]; | 103 | hash[2] = net_secret[14]; |
@@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) | |||
96 | { | 112 | { |
97 | __u32 hash[4]; | 113 | __u32 hash[4]; |
98 | 114 | ||
115 | net_secret_init(); | ||
99 | memcpy(hash, daddr, 16); | 116 | memcpy(hash, daddr, 16); |
100 | md5_transform(hash, net_secret); | 117 | md5_transform(hash, net_secret); |
101 | 118 | ||
@@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, | |||
107 | { | 124 | { |
108 | u32 hash[MD5_DIGEST_WORDS]; | 125 | u32 hash[MD5_DIGEST_WORDS]; |
109 | 126 | ||
127 | net_secret_init(); | ||
110 | hash[0] = (__force u32)saddr; | 128 | hash[0] = (__force u32)saddr; |
111 | hash[1] = (__force u32)daddr; | 129 | hash[1] = (__force u32)daddr; |
112 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; | 130 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; |
@@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) | |||
121 | { | 139 | { |
122 | u32 hash[MD5_DIGEST_WORDS]; | 140 | u32 hash[MD5_DIGEST_WORDS]; |
123 | 141 | ||
142 | net_secret_init(); | ||
124 | hash[0] = (__force u32)saddr; | 143 | hash[0] = (__force u32)saddr; |
125 | hash[1] = (__force u32)daddr; | 144 | hash[1] = (__force u32)daddr; |
126 | hash[2] = (__force u32)dport ^ net_secret[14]; | 145 | hash[2] = (__force u32)dport ^ net_secret[14]; |
@@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | |||
140 | u32 hash[MD5_DIGEST_WORDS]; | 159 | u32 hash[MD5_DIGEST_WORDS]; |
141 | u64 seq; | 160 | u64 seq; |
142 | 161 | ||
162 | net_secret_init(); | ||
143 | hash[0] = (__force u32)saddr; | 163 | hash[0] = (__force u32)saddr; |
144 | hash[1] = (__force u32)daddr; | 164 | hash[1] = (__force u32)daddr; |
145 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; | 165 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; |
@@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | |||
164 | u64 seq; | 184 | u64 seq; |
165 | u32 i; | 185 | u32 i; |
166 | 186 | ||
187 | net_secret_init(); | ||
167 | memcpy(hash, saddr, 16); | 188 | memcpy(hash, saddr, 16); |
168 | for (i = 0; i < 4; i++) | 189 | for (i = 0; i < 4; i++) |
169 | secret[i] = net_secret[i] + daddr[i]; | 190 | secret[i] = net_secret[i] + daddr[i]; |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b7b8fd..cfeb85cff4f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -263,10 +263,8 @@ void build_ehash_secret(void) | |||
263 | get_random_bytes(&rnd, sizeof(rnd)); | 263 | get_random_bytes(&rnd, sizeof(rnd)); |
264 | } while (rnd == 0); | 264 | } while (rnd == 0); |
265 | 265 | ||
266 | if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { | 266 | if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) |
267 | get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); | 267 | get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); |
268 | net_secret_init(); | ||
269 | } | ||
270 | } | 268 | } |
271 | EXPORT_SYMBOL(build_ehash_secret); | 269 | EXPORT_SYMBOL(build_ehash_secret); |
272 | 270 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index dace87f06e5f..7defdc9ba167 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) | |||
736 | 736 | ||
737 | in_dev->mr_gq_running = 0; | 737 | in_dev->mr_gq_running = 0; |
738 | igmpv3_send_report(in_dev, NULL); | 738 | igmpv3_send_report(in_dev, NULL); |
739 | __in_dev_put(in_dev); | 739 | in_dev_put(in_dev); |
740 | } | 740 | } |
741 | 741 | ||
742 | static void igmp_ifc_timer_expire(unsigned long data) | 742 | static void igmp_ifc_timer_expire(unsigned long data) |
@@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data) | |||
749 | igmp_ifc_start_timer(in_dev, | 749 | igmp_ifc_start_timer(in_dev, |
750 | unsolicited_report_interval(in_dev)); | 750 | unsolicited_report_interval(in_dev)); |
751 | } | 751 | } |
752 | __in_dev_put(in_dev); | 752 | in_dev_put(in_dev); |
753 | } | 753 | } |
754 | 754 | ||
755 | static void igmp_ifc_event(struct in_device *in_dev) | 755 | static void igmp_ifc_event(struct in_device *in_dev) |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0300f..63a6d6d6b875 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
@@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
623 | tunnel->err_count = 0; | 623 | tunnel->err_count = 0; |
624 | } | 624 | } |
625 | 625 | ||
626 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); | ||
626 | ttl = tnl_params->ttl; | 627 | ttl = tnl_params->ttl; |
627 | if (ttl == 0) { | 628 | if (ttl == 0) { |
628 | if (skb->protocol == htons(ETH_P_IP)) | 629 | if (skb->protocol == htons(ETH_P_IP)) |
@@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
641 | 642 | ||
642 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) | 643 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
643 | + rt->dst.header_len; | 644 | + rt->dst.header_len; |
644 | if (max_headroom > dev->needed_headroom) { | 645 | if (max_headroom > dev->needed_headroom) |
645 | dev->needed_headroom = max_headroom; | 646 | dev->needed_headroom = max_headroom; |
646 | if (skb_cow_head(skb, dev->needed_headroom)) { | 647 | |
647 | dev->stats.tx_dropped++; | 648 | if (skb_cow_head(skb, dev->needed_headroom)) { |
648 | dev_kfree_skb(skb); | 649 | dev->stats.tx_dropped++; |
649 | return; | 650 | dev_kfree_skb(skb); |
650 | } | 651 | return; |
651 | } | 652 | } |
652 | 653 | ||
653 | err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, | 654 | err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, |
654 | ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, | 655 | tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); |
655 | !net_eq(tunnel->net, dev_net(dev))); | ||
656 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); | 656 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
657 | 657 | ||
658 | return; | 658 | return; |
@@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, | |||
853 | /* FB netdevice is special: we have one, and only one per netns. | 853 | /* FB netdevice is special: we have one, and only one per netns. |
854 | * Allowing to move it to another netns is clearly unsafe. | 854 | * Allowing to move it to another netns is clearly unsafe. |
855 | */ | 855 | */ |
856 | if (!IS_ERR(itn->fb_tunnel_dev)) | 856 | if (!IS_ERR(itn->fb_tunnel_dev)) { |
857 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; | 857 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; |
858 | ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); | ||
859 | } | ||
858 | rtnl_unlock(); | 860 | rtnl_unlock(); |
859 | 861 | ||
860 | return PTR_RET(itn->fb_tunnel_dev); | 862 | return PTR_RET(itn->fb_tunnel_dev); |
@@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, | |||
884 | if (!net_eq(dev_net(t->dev), net)) | 886 | if (!net_eq(dev_net(t->dev), net)) |
885 | unregister_netdevice_queue(t->dev, head); | 887 | unregister_netdevice_queue(t->dev, head); |
886 | } | 888 | } |
887 | if (itn->fb_tunnel_dev) | ||
888 | unregister_netdevice_queue(itn->fb_tunnel_dev, head); | ||
889 | } | 889 | } |
890 | 890 | ||
891 | void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) | 891 | void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) |
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d6c856b17fd4..c31e3ad98ef2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
@@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, | |||
61 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | 61 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
62 | 62 | ||
63 | /* Push down and install the IP header. */ | 63 | /* Push down and install the IP header. */ |
64 | __skb_push(skb, sizeof(struct iphdr)); | 64 | skb_push(skb, sizeof(struct iphdr)); |
65 | skb_reset_network_header(skb); | 65 | skb_reset_network_header(skb); |
66 | 66 | ||
67 | iph = ip_hdr(skb); | 67 | iph = ip_hdr(skb); |
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 67e17dcda65e..b6346bf2fde3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c | |||
@@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) | |||
267 | if (th == NULL) | 267 | if (th == NULL) |
268 | return NF_DROP; | 268 | return NF_DROP; |
269 | 269 | ||
270 | synproxy_parse_options(skb, par->thoff, th, &opts); | 270 | if (!synproxy_parse_options(skb, par->thoff, th, &opts)) |
271 | return NF_DROP; | ||
271 | 272 | ||
272 | if (th->syn && !(th->ack || th->fin || th->rst)) { | 273 | if (th->syn && !(th->ack || th->fin || th->rst)) { |
273 | /* Initial SYN from client */ | 274 | /* Initial SYN from client */ |
@@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, | |||
350 | 351 | ||
351 | /* fall through */ | 352 | /* fall through */ |
352 | case TCP_CONNTRACK_SYN_SENT: | 353 | case TCP_CONNTRACK_SYN_SENT: |
353 | synproxy_parse_options(skb, thoff, th, &opts); | 354 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
355 | return NF_DROP; | ||
354 | 356 | ||
355 | if (!th->syn && th->ack && | 357 | if (!th->syn && th->ack && |
356 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { | 358 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { |
@@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, | |||
373 | if (!th->syn || !th->ack) | 375 | if (!th->syn || !th->ack) |
374 | break; | 376 | break; |
375 | 377 | ||
376 | synproxy_parse_options(skb, thoff, th, &opts); | 378 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
379 | return NF_DROP; | ||
380 | |||
377 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) | 381 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) |
378 | synproxy->tsoff = opts.tsval - synproxy->its; | 382 | synproxy->tsoff = opts.tsval - synproxy->its; |
379 | 383 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bfec521c717f..193db03540ad 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) | |||
218 | 218 | ||
219 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) | 219 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) |
220 | ipv4_sk_update_pmtu(skb, sk, info); | 220 | ipv4_sk_update_pmtu(skb, sk, info); |
221 | else if (type == ICMP_REDIRECT) | 221 | else if (type == ICMP_REDIRECT) { |
222 | ipv4_sk_redirect(skb, sk); | 222 | ipv4_sk_redirect(skb, sk); |
223 | return; | ||
224 | } | ||
223 | 225 | ||
224 | /* Report error on raw socket, if: | 226 | /* Report error on raw socket, if: |
225 | 1. User requested ip_recverr. | 227 | 1. User requested ip_recverr. |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8bf137..e6bb8256e59f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
895 | 895 | ||
896 | skb_orphan(skb); | 896 | skb_orphan(skb); |
897 | skb->sk = sk; | 897 | skb->sk = sk; |
898 | skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? | 898 | skb->destructor = tcp_wfree; |
899 | tcp_wfree : sock_wfree; | ||
900 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 899 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
901 | 900 | ||
902 | /* Build TCP header and checksum it. */ | 901 | /* Build TCP header and checksum it. */ |
@@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1840 | while ((skb = tcp_send_head(sk))) { | 1839 | while ((skb = tcp_send_head(sk))) { |
1841 | unsigned int limit; | 1840 | unsigned int limit; |
1842 | 1841 | ||
1843 | |||
1844 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1842 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
1845 | BUG_ON(!tso_segs); | 1843 | BUG_ON(!tso_segs); |
1846 | 1844 | ||
@@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1869 | break; | 1867 | break; |
1870 | } | 1868 | } |
1871 | 1869 | ||
1872 | /* TSQ : sk_wmem_alloc accounts skb truesize, | 1870 | /* TCP Small Queues : |
1873 | * including skb overhead. But thats OK. | 1871 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
1872 | * This allows for : | ||
1873 | * - better RTT estimation and ACK scheduling | ||
1874 | * - faster recovery | ||
1875 | * - high rates | ||
1874 | */ | 1876 | */ |
1875 | if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { | 1877 | limit = max(skb->truesize, sk->sk_pacing_rate >> 10); |
1878 | |||
1879 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | ||
1876 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1880 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
1877 | break; | 1881 | break; |
1878 | } | 1882 | } |
1883 | |||
1879 | limit = mss_now; | 1884 | limit = mss_now; |
1880 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 1885 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
1881 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1886 | limit = tcp_mss_split_point(sk, skb, mss_now, |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95db57f..0ca44df51ee9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
658 | break; | 658 | break; |
659 | case ICMP_REDIRECT: | 659 | case ICMP_REDIRECT: |
660 | ipv4_sk_redirect(skb, sk); | 660 | ipv4_sk_redirect(skb, sk); |
661 | break; | 661 | goto out; |
662 | } | 662 | } |
663 | 663 | ||
664 | /* | 664 | /* |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff12617f36..cd3fb301da38 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, | |||
1499 | return false; | 1499 | return false; |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | /* Compares an address/prefix_len with addresses on device @dev. | ||
1503 | * If one is found it returns true. | ||
1504 | */ | ||
1505 | bool ipv6_chk_custom_prefix(const struct in6_addr *addr, | ||
1506 | const unsigned int prefix_len, struct net_device *dev) | ||
1507 | { | ||
1508 | struct inet6_dev *idev; | ||
1509 | struct inet6_ifaddr *ifa; | ||
1510 | bool ret = false; | ||
1511 | |||
1512 | rcu_read_lock(); | ||
1513 | idev = __in6_dev_get(dev); | ||
1514 | if (idev) { | ||
1515 | read_lock_bh(&idev->lock); | ||
1516 | list_for_each_entry(ifa, &idev->addr_list, if_list) { | ||
1517 | ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); | ||
1518 | if (ret) | ||
1519 | break; | ||
1520 | } | ||
1521 | read_unlock_bh(&idev->lock); | ||
1522 | } | ||
1523 | rcu_read_unlock(); | ||
1524 | |||
1525 | return ret; | ||
1526 | } | ||
1527 | EXPORT_SYMBOL(ipv6_chk_custom_prefix); | ||
1528 | |||
1502 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) | 1529 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) |
1503 | { | 1530 | { |
1504 | struct inet6_dev *idev; | 1531 | struct inet6_dev *idev; |
@@ -2193,43 +2220,21 @@ ok: | |||
2193 | else | 2220 | else |
2194 | stored_lft = 0; | 2221 | stored_lft = 0; |
2195 | if (!update_lft && !create && stored_lft) { | 2222 | if (!update_lft && !create && stored_lft) { |
2196 | if (valid_lft > MIN_VALID_LIFETIME || | 2223 | const u32 minimum_lft = min( |
2197 | valid_lft > stored_lft) | 2224 | stored_lft, (u32)MIN_VALID_LIFETIME); |
2198 | update_lft = 1; | 2225 | valid_lft = max(valid_lft, minimum_lft); |
2199 | else if (stored_lft <= MIN_VALID_LIFETIME) { | 2226 | |
2200 | /* valid_lft <= stored_lft is always true */ | 2227 | /* RFC4862 Section 5.5.3e: |
2201 | /* | 2228 | * "Note that the preferred lifetime of the |
2202 | * RFC 4862 Section 5.5.3e: | 2229 | * corresponding address is always reset to |
2203 | * "Note that the preferred lifetime of | 2230 | * the Preferred Lifetime in the received |
2204 | * the corresponding address is always | 2231 | * Prefix Information option, regardless of |
2205 | * reset to the Preferred Lifetime in | 2232 | * whether the valid lifetime is also reset or |
2206 | * the received Prefix Information | 2233 | * ignored." |
2207 | * option, regardless of whether the | 2234 | * |
2208 | * valid lifetime is also reset or | 2235 | * So we should always update prefered_lft here. |
2209 | * ignored." | 2236 | */ |
2210 | * | 2237 | update_lft = 1; |
2211 | * So if the preferred lifetime in | ||
2212 | * this advertisement is different | ||
2213 | * than what we have stored, but the | ||
2214 | * valid lifetime is invalid, just | ||
2215 | * reset prefered_lft. | ||
2216 | * | ||
2217 | * We must set the valid lifetime | ||
2218 | * to the stored lifetime since we'll | ||
2219 | * be updating the timestamp below, | ||
2220 | * else we'll set it back to the | ||
2221 | * minimum. | ||
2222 | */ | ||
2223 | if (prefered_lft != ifp->prefered_lft) { | ||
2224 | valid_lft = stored_lft; | ||
2225 | update_lft = 1; | ||
2226 | } | ||
2227 | } else { | ||
2228 | valid_lft = MIN_VALID_LIFETIME; | ||
2229 | if (valid_lft < prefered_lft) | ||
2230 | prefered_lft = valid_lft; | ||
2231 | update_lft = 1; | ||
2232 | } | ||
2233 | } | 2238 | } |
2234 | 2239 | ||
2235 | if (update_lft) { | 2240 | if (update_lft) { |
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9feafb9..7bb5446b9d73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
618 | struct ip6_tnl *tunnel = netdev_priv(dev); | 618 | struct ip6_tnl *tunnel = netdev_priv(dev); |
619 | struct net_device *tdev; /* Device to other host */ | 619 | struct net_device *tdev; /* Device to other host */ |
620 | struct ipv6hdr *ipv6h; /* Our new IP header */ | 620 | struct ipv6hdr *ipv6h; /* Our new IP header */ |
621 | unsigned int max_headroom; /* The extra header space needed */ | 621 | unsigned int max_headroom = 0; /* The extra header space needed */ |
622 | int gre_hlen; | 622 | int gre_hlen; |
623 | struct ipv6_tel_txoption opt; | 623 | struct ipv6_tel_txoption opt; |
624 | int mtu; | 624 | int mtu; |
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
693 | 693 | ||
694 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); | 694 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); |
695 | 695 | ||
696 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; | 696 | max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; |
697 | 697 | ||
698 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | 698 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || |
699 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 699 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d529163..a54c45ce4a48 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
1015 | * udp datagram | 1015 | * udp datagram |
1016 | */ | 1016 | */ |
1017 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 1017 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { |
1018 | struct frag_hdr fhdr; | ||
1019 | |||
1018 | skb = sock_alloc_send_skb(sk, | 1020 | skb = sock_alloc_send_skb(sk, |
1019 | hh_len + fragheaderlen + transhdrlen + 20, | 1021 | hh_len + fragheaderlen + transhdrlen + 20, |
1020 | (flags & MSG_DONTWAIT), &err); | 1022 | (flags & MSG_DONTWAIT), &err); |
@@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
1036 | skb->protocol = htons(ETH_P_IPV6); | 1038 | skb->protocol = htons(ETH_P_IPV6); |
1037 | skb->ip_summed = CHECKSUM_PARTIAL; | 1039 | skb->ip_summed = CHECKSUM_PARTIAL; |
1038 | skb->csum = 0; | 1040 | skb->csum = 0; |
1039 | } | ||
1040 | |||
1041 | err = skb_append_datato_frags(sk,skb, getfrag, from, | ||
1042 | (length - transhdrlen)); | ||
1043 | if (!err) { | ||
1044 | struct frag_hdr fhdr; | ||
1045 | 1041 | ||
1046 | /* Specify the length of each IPv6 datagram fragment. | 1042 | /* Specify the length of each IPv6 datagram fragment. |
1047 | * It has to be a multiple of 8. | 1043 | * It has to be a multiple of 8. |
@@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
1052 | ipv6_select_ident(&fhdr, rt); | 1048 | ipv6_select_ident(&fhdr, rt); |
1053 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; | 1049 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; |
1054 | __skb_queue_tail(&sk->sk_write_queue, skb); | 1050 | __skb_queue_tail(&sk->sk_write_queue, skb); |
1055 | |||
1056 | return 0; | ||
1057 | } | 1051 | } |
1058 | /* There is not enough support do UPD LSO, | ||
1059 | * so follow normal path | ||
1060 | */ | ||
1061 | kfree_skb(skb); | ||
1062 | 1052 | ||
1063 | return err; | 1053 | return skb_append_datato_frags(sk, skb, getfrag, from, |
1054 | (length - transhdrlen)); | ||
1064 | } | 1055 | } |
1065 | 1056 | ||
1066 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, | 1057 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, |
@@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, | |||
1227 | * --yoshfuji | 1218 | * --yoshfuji |
1228 | */ | 1219 | */ |
1229 | 1220 | ||
1230 | cork->length += length; | 1221 | if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || |
1231 | if (length > mtu) { | 1222 | sk->sk_protocol == IPPROTO_RAW)) { |
1232 | int proto = sk->sk_protocol; | 1223 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); |
1233 | if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ | 1224 | return -EMSGSIZE; |
1234 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); | 1225 | } |
1235 | return -EMSGSIZE; | ||
1236 | } | ||
1237 | |||
1238 | if (proto == IPPROTO_UDP && | ||
1239 | (rt->dst.dev->features & NETIF_F_UFO)) { | ||
1240 | 1226 | ||
1241 | err = ip6_ufo_append_data(sk, getfrag, from, length, | 1227 | skb = skb_peek_tail(&sk->sk_write_queue); |
1242 | hh_len, fragheaderlen, | 1228 | cork->length += length; |
1243 | transhdrlen, mtu, flags, rt); | 1229 | if (((length > mtu) || |
1244 | if (err) | 1230 | (skb && skb_is_gso(skb))) && |
1245 | goto error; | 1231 | (sk->sk_protocol == IPPROTO_UDP) && |
1246 | return 0; | 1232 | (rt->dst.dev->features & NETIF_F_UFO)) { |
1247 | } | 1233 | err = ip6_ufo_append_data(sk, getfrag, from, length, |
1234 | hh_len, fragheaderlen, | ||
1235 | transhdrlen, mtu, flags, rt); | ||
1236 | if (err) | ||
1237 | goto error; | ||
1238 | return 0; | ||
1248 | } | 1239 | } |
1249 | 1240 | ||
1250 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1241 | if (!skb) |
1251 | goto alloc_new_skb; | 1242 | goto alloc_new_skb; |
1252 | 1243 | ||
1253 | while (length > 0) { | 1244 | while (length > 0) { |
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2d8f4829575b..a791552e0422 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c | |||
@@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) | |||
1731 | } | 1731 | } |
1732 | } | 1732 | } |
1733 | 1733 | ||
1734 | t = rtnl_dereference(ip6n->tnls_wc[0]); | ||
1735 | unregister_netdevice_queue(t->dev, &list); | ||
1736 | unregister_netdevice_many(&list); | 1734 | unregister_netdevice_many(&list); |
1737 | } | 1735 | } |
1738 | 1736 | ||
@@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) | |||
1752 | if (!ip6n->fb_tnl_dev) | 1750 | if (!ip6n->fb_tnl_dev) |
1753 | goto err_alloc_dev; | 1751 | goto err_alloc_dev; |
1754 | dev_net_set(ip6n->fb_tnl_dev, net); | 1752 | dev_net_set(ip6n->fb_tnl_dev, net); |
1753 | ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; | ||
1755 | /* FB netdevice is special: we have one, and only one per netns. | 1754 | /* FB netdevice is special: we have one, and only one per netns. |
1756 | * Allowing to move it to another netns is clearly unsafe. | 1755 | * Allowing to move it to another netns is clearly unsafe. |
1757 | */ | 1756 | */ |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67b737c..d18f9f903db6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
@@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) | |||
2034 | if (idev->mc_dad_count) | 2034 | if (idev->mc_dad_count) |
2035 | mld_dad_start_timer(idev, idev->mc_maxdelay); | 2035 | mld_dad_start_timer(idev, idev->mc_maxdelay); |
2036 | } | 2036 | } |
2037 | __in6_dev_put(idev); | 2037 | in6_dev_put(idev); |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, | 2040 | static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, |
@@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) | |||
2379 | 2379 | ||
2380 | idev->mc_gq_running = 0; | 2380 | idev->mc_gq_running = 0; |
2381 | mld_send_report(idev, NULL); | 2381 | mld_send_report(idev, NULL); |
2382 | __in6_dev_put(idev); | 2382 | in6_dev_put(idev); |
2383 | } | 2383 | } |
2384 | 2384 | ||
2385 | static void mld_ifc_timer_expire(unsigned long data) | 2385 | static void mld_ifc_timer_expire(unsigned long data) |
@@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) | |||
2392 | if (idev->mc_ifc_count) | 2392 | if (idev->mc_ifc_count) |
2393 | mld_ifc_start_timer(idev, idev->mc_maxdelay); | 2393 | mld_ifc_start_timer(idev, idev->mc_maxdelay); |
2394 | } | 2394 | } |
2395 | __in6_dev_put(idev); | 2395 | in6_dev_put(idev); |
2396 | } | 2396 | } |
2397 | 2397 | ||
2398 | static void mld_ifc_event(struct inet6_dev *idev) | 2398 | static void mld_ifc_event(struct inet6_dev *idev) |
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8dbcaa..2748b042da72 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c | |||
@@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) | |||
282 | if (th == NULL) | 282 | if (th == NULL) |
283 | return NF_DROP; | 283 | return NF_DROP; |
284 | 284 | ||
285 | synproxy_parse_options(skb, par->thoff, th, &opts); | 285 | if (!synproxy_parse_options(skb, par->thoff, th, &opts)) |
286 | return NF_DROP; | ||
286 | 287 | ||
287 | if (th->syn && !(th->ack || th->fin || th->rst)) { | 288 | if (th->syn && !(th->ack || th->fin || th->rst)) { |
288 | /* Initial SYN from client */ | 289 | /* Initial SYN from client */ |
@@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, | |||
372 | 373 | ||
373 | /* fall through */ | 374 | /* fall through */ |
374 | case TCP_CONNTRACK_SYN_SENT: | 375 | case TCP_CONNTRACK_SYN_SENT: |
375 | synproxy_parse_options(skb, thoff, th, &opts); | 376 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
377 | return NF_DROP; | ||
376 | 378 | ||
377 | if (!th->syn && th->ack && | 379 | if (!th->syn && th->ack && |
378 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { | 380 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { |
@@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, | |||
395 | if (!th->syn || !th->ack) | 397 | if (!th->syn || !th->ack) |
396 | break; | 398 | break; |
397 | 399 | ||
398 | synproxy_parse_options(skb, thoff, th, &opts); | 400 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
401 | return NF_DROP; | ||
402 | |||
399 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) | 403 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) |
400 | synproxy->tsoff = opts.tsval - synproxy->its; | 404 | synproxy->tsoff = opts.tsval - synproxy->its; |
401 | 405 | ||
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bbb1728..a4ed2416399e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
@@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, | |||
335 | ip6_sk_update_pmtu(skb, sk, info); | 335 | ip6_sk_update_pmtu(skb, sk, info); |
336 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); | 336 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); |
337 | } | 337 | } |
338 | if (type == NDISC_REDIRECT) | 338 | if (type == NDISC_REDIRECT) { |
339 | ip6_sk_redirect(skb, sk); | 339 | ip6_sk_redirect(skb, sk); |
340 | return; | ||
341 | } | ||
340 | if (np->recverr) { | 342 | if (np->recverr) { |
341 | u8 *payload = skb->data; | 343 | u8 *payload = skb->data; |
342 | if (!inet->hdrincl) | 344 | if (!inet->hdrincl) |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb96db34..19269453a8ea 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, | |||
566 | return false; | 566 | return false; |
567 | } | 567 | } |
568 | 568 | ||
569 | /* Checks if an address matches an address on the tunnel interface. | ||
570 | * Used to detect the NAT of proto 41 packets and let them pass spoofing test. | ||
571 | * Long story: | ||
572 | * This function is called after we considered the packet as spoofed | ||
573 | * in is_spoofed_6rd. | ||
574 | * We may have a router that is doing NAT for proto 41 packets | ||
575 | * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb | ||
576 | * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd | ||
577 | * function will return true, dropping the packet. | ||
578 | * But, we can still check if is spoofed against the IP | ||
579 | * addresses associated with the interface. | ||
580 | */ | ||
581 | static bool only_dnatted(const struct ip_tunnel *tunnel, | ||
582 | const struct in6_addr *v6dst) | ||
583 | { | ||
584 | int prefix_len; | ||
585 | |||
586 | #ifdef CONFIG_IPV6_SIT_6RD | ||
587 | prefix_len = tunnel->ip6rd.prefixlen + 32 | ||
588 | - tunnel->ip6rd.relay_prefixlen; | ||
589 | #else | ||
590 | prefix_len = 48; | ||
591 | #endif | ||
592 | return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); | ||
593 | } | ||
594 | |||
595 | /* Returns true if a packet is spoofed */ | ||
596 | static bool packet_is_spoofed(struct sk_buff *skb, | ||
597 | const struct iphdr *iph, | ||
598 | struct ip_tunnel *tunnel) | ||
599 | { | ||
600 | const struct ipv6hdr *ipv6h; | ||
601 | |||
602 | if (tunnel->dev->priv_flags & IFF_ISATAP) { | ||
603 | if (!isatap_chksrc(skb, iph, tunnel)) | ||
604 | return true; | ||
605 | |||
606 | return false; | ||
607 | } | ||
608 | |||
609 | if (tunnel->dev->flags & IFF_POINTOPOINT) | ||
610 | return false; | ||
611 | |||
612 | ipv6h = ipv6_hdr(skb); | ||
613 | |||
614 | if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { | ||
615 | net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", | ||
616 | &iph->saddr, &ipv6h->saddr, | ||
617 | &iph->daddr, &ipv6h->daddr); | ||
618 | return true; | ||
619 | } | ||
620 | |||
621 | if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) | ||
622 | return false; | ||
623 | |||
624 | if (only_dnatted(tunnel, &ipv6h->daddr)) | ||
625 | return false; | ||
626 | |||
627 | net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", | ||
628 | &iph->saddr, &ipv6h->saddr, | ||
629 | &iph->daddr, &ipv6h->daddr); | ||
630 | return true; | ||
631 | } | ||
632 | |||
569 | static int ipip6_rcv(struct sk_buff *skb) | 633 | static int ipip6_rcv(struct sk_buff *skb) |
570 | { | 634 | { |
571 | const struct iphdr *iph = ip_hdr(skb); | 635 | const struct iphdr *iph = ip_hdr(skb); |
@@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) | |||
586 | IPCB(skb)->flags = 0; | 650 | IPCB(skb)->flags = 0; |
587 | skb->protocol = htons(ETH_P_IPV6); | 651 | skb->protocol = htons(ETH_P_IPV6); |
588 | 652 | ||
589 | if (tunnel->dev->priv_flags & IFF_ISATAP) { | 653 | if (packet_is_spoofed(skb, iph, tunnel)) { |
590 | if (!isatap_chksrc(skb, iph, tunnel)) { | 654 | tunnel->dev->stats.rx_errors++; |
591 | tunnel->dev->stats.rx_errors++; | 655 | goto out; |
592 | goto out; | ||
593 | } | ||
594 | } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { | ||
595 | if (is_spoofed_6rd(tunnel, iph->saddr, | ||
596 | &ipv6_hdr(skb)->saddr) || | ||
597 | is_spoofed_6rd(tunnel, iph->daddr, | ||
598 | &ipv6_hdr(skb)->daddr)) { | ||
599 | tunnel->dev->stats.rx_errors++; | ||
600 | goto out; | ||
601 | } | ||
602 | } | 656 | } |
603 | 657 | ||
604 | __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); | 658 | __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); |
@@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
748 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); | 802 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); |
749 | 803 | ||
750 | if (neigh == NULL) { | 804 | if (neigh == NULL) { |
751 | net_dbg_ratelimited("sit: nexthop == NULL\n"); | 805 | net_dbg_ratelimited("nexthop == NULL\n"); |
752 | goto tx_error; | 806 | goto tx_error; |
753 | } | 807 | } |
754 | 808 | ||
@@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
777 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); | 831 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); |
778 | 832 | ||
779 | if (neigh == NULL) { | 833 | if (neigh == NULL) { |
780 | net_dbg_ratelimited("sit: nexthop == NULL\n"); | 834 | net_dbg_ratelimited("nexthop == NULL\n"); |
781 | goto tx_error; | 835 | goto tx_error; |
782 | } | 836 | } |
783 | 837 | ||
@@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net) | |||
1612 | goto err_alloc_dev; | 1666 | goto err_alloc_dev; |
1613 | } | 1667 | } |
1614 | dev_net_set(sitn->fb_tunnel_dev, net); | 1668 | dev_net_set(sitn->fb_tunnel_dev, net); |
1669 | sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; | ||
1615 | /* FB netdevice is special: we have one, and only one per netns. | 1670 | /* FB netdevice is special: we have one, and only one per netns. |
1616 | * Allowing to move it to another netns is clearly unsafe. | 1671 | * Allowing to move it to another netns is clearly unsafe. |
1617 | */ | 1672 | */ |
@@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net) | |||
1646 | 1701 | ||
1647 | rtnl_lock(); | 1702 | rtnl_lock(); |
1648 | sit_destroy_tunnels(sitn, &list); | 1703 | sit_destroy_tunnels(sitn, &list); |
1649 | unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); | ||
1650 | unregister_netdevice_many(&list); | 1704 | unregister_netdevice_many(&list); |
1651 | rtnl_unlock(); | 1705 | rtnl_unlock(); |
1652 | } | 1706 | } |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f4058150262b..72b7eaaf3ca0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
525 | 525 | ||
526 | if (type == ICMPV6_PKT_TOOBIG) | 526 | if (type == ICMPV6_PKT_TOOBIG) |
527 | ip6_sk_update_pmtu(skb, sk, info); | 527 | ip6_sk_update_pmtu(skb, sk, info); |
528 | if (type == NDISC_REDIRECT) | 528 | if (type == NDISC_REDIRECT) { |
529 | ip6_sk_redirect(skb, sk); | 529 | ip6_sk_redirect(skb, sk); |
530 | goto out; | ||
531 | } | ||
530 | 532 | ||
531 | np = inet6_sk(sk); | 533 | np = inet6_sk(sk); |
532 | 534 | ||
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 54563ad8aeb1..355cc3b6fa4d 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c | |||
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param) | |||
154 | } else { | 154 | } else { |
155 | lapb->n2count++; | 155 | lapb->n2count++; |
156 | lapb_requeue_frames(lapb); | 156 | lapb_requeue_frames(lapb); |
157 | lapb_kick(lapb); | ||
157 | } | 158 | } |
158 | break; | 159 | break; |
159 | 160 | ||
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f69e83ff836..74fd00c27210 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
116 | 116 | ||
117 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 117 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
118 | struct ip_vs_cpu_stats *s; | 118 | struct ip_vs_cpu_stats *s; |
119 | struct ip_vs_service *svc; | ||
119 | 120 | ||
120 | s = this_cpu_ptr(dest->stats.cpustats); | 121 | s = this_cpu_ptr(dest->stats.cpustats); |
121 | s->ustats.inpkts++; | 122 | s->ustats.inpkts++; |
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
123 | s->ustats.inbytes += skb->len; | 124 | s->ustats.inbytes += skb->len; |
124 | u64_stats_update_end(&s->syncp); | 125 | u64_stats_update_end(&s->syncp); |
125 | 126 | ||
126 | s = this_cpu_ptr(dest->svc->stats.cpustats); | 127 | rcu_read_lock(); |
128 | svc = rcu_dereference(dest->svc); | ||
129 | s = this_cpu_ptr(svc->stats.cpustats); | ||
127 | s->ustats.inpkts++; | 130 | s->ustats.inpkts++; |
128 | u64_stats_update_begin(&s->syncp); | 131 | u64_stats_update_begin(&s->syncp); |
129 | s->ustats.inbytes += skb->len; | 132 | s->ustats.inbytes += skb->len; |
130 | u64_stats_update_end(&s->syncp); | 133 | u64_stats_update_end(&s->syncp); |
134 | rcu_read_unlock(); | ||
131 | 135 | ||
132 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); | 136 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); |
133 | s->ustats.inpkts++; | 137 | s->ustats.inpkts++; |
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
146 | 150 | ||
147 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 151 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
148 | struct ip_vs_cpu_stats *s; | 152 | struct ip_vs_cpu_stats *s; |
153 | struct ip_vs_service *svc; | ||
149 | 154 | ||
150 | s = this_cpu_ptr(dest->stats.cpustats); | 155 | s = this_cpu_ptr(dest->stats.cpustats); |
151 | s->ustats.outpkts++; | 156 | s->ustats.outpkts++; |
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
153 | s->ustats.outbytes += skb->len; | 158 | s->ustats.outbytes += skb->len; |
154 | u64_stats_update_end(&s->syncp); | 159 | u64_stats_update_end(&s->syncp); |
155 | 160 | ||
156 | s = this_cpu_ptr(dest->svc->stats.cpustats); | 161 | rcu_read_lock(); |
162 | svc = rcu_dereference(dest->svc); | ||
163 | s = this_cpu_ptr(svc->stats.cpustats); | ||
157 | s->ustats.outpkts++; | 164 | s->ustats.outpkts++; |
158 | u64_stats_update_begin(&s->syncp); | 165 | u64_stats_update_begin(&s->syncp); |
159 | s->ustats.outbytes += skb->len; | 166 | s->ustats.outbytes += skb->len; |
160 | u64_stats_update_end(&s->syncp); | 167 | u64_stats_update_end(&s->syncp); |
168 | rcu_read_unlock(); | ||
161 | 169 | ||
162 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); | 170 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); |
163 | s->ustats.outpkts++; | 171 | s->ustats.outpkts++; |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8148e487386..a3df9bddc4f7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
@@ -460,7 +460,7 @@ static inline void | |||
460 | __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) | 460 | __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) |
461 | { | 461 | { |
462 | atomic_inc(&svc->refcnt); | 462 | atomic_inc(&svc->refcnt); |
463 | dest->svc = svc; | 463 | rcu_assign_pointer(dest->svc, svc); |
464 | } | 464 | } |
465 | 465 | ||
466 | static void ip_vs_service_free(struct ip_vs_service *svc) | 466 | static void ip_vs_service_free(struct ip_vs_service *svc) |
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc) | |||
470 | kfree(svc); | 470 | kfree(svc); |
471 | } | 471 | } |
472 | 472 | ||
473 | static void | 473 | static void ip_vs_service_rcu_free(struct rcu_head *head) |
474 | __ip_vs_unbind_svc(struct ip_vs_dest *dest) | ||
475 | { | 474 | { |
476 | struct ip_vs_service *svc = dest->svc; | 475 | struct ip_vs_service *svc; |
476 | |||
477 | svc = container_of(head, struct ip_vs_service, rcu_head); | ||
478 | ip_vs_service_free(svc); | ||
479 | } | ||
477 | 480 | ||
478 | dest->svc = NULL; | 481 | static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) |
482 | { | ||
479 | if (atomic_dec_and_test(&svc->refcnt)) { | 483 | if (atomic_dec_and_test(&svc->refcnt)) { |
480 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", | 484 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", |
481 | svc->fwmark, | 485 | svc->fwmark, |
482 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 486 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
483 | ntohs(svc->port)); | 487 | ntohs(svc->port)); |
484 | ip_vs_service_free(svc); | 488 | if (do_delay) |
489 | call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); | ||
490 | else | ||
491 | ip_vs_service_free(svc); | ||
485 | } | 492 | } |
486 | } | 493 | } |
487 | 494 | ||
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
667 | IP_VS_DBG_ADDR(svc->af, &dest->addr), | 674 | IP_VS_DBG_ADDR(svc->af, &dest->addr), |
668 | ntohs(dest->port), | 675 | ntohs(dest->port), |
669 | atomic_read(&dest->refcnt)); | 676 | atomic_read(&dest->refcnt)); |
670 | /* We can not reuse dest while in grace period | ||
671 | * because conns still can use dest->svc | ||
672 | */ | ||
673 | if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) | ||
674 | continue; | ||
675 | if (dest->af == svc->af && | 677 | if (dest->af == svc->af && |
676 | ip_vs_addr_equal(svc->af, &dest->addr, daddr) && | 678 | ip_vs_addr_equal(svc->af, &dest->addr, daddr) && |
677 | dest->port == dport && | 679 | dest->port == dport && |
@@ -697,8 +699,10 @@ out: | |||
697 | 699 | ||
698 | static void ip_vs_dest_free(struct ip_vs_dest *dest) | 700 | static void ip_vs_dest_free(struct ip_vs_dest *dest) |
699 | { | 701 | { |
702 | struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); | ||
703 | |||
700 | __ip_vs_dst_cache_reset(dest); | 704 | __ip_vs_dst_cache_reset(dest); |
701 | __ip_vs_unbind_svc(dest); | 705 | __ip_vs_svc_put(svc, false); |
702 | free_percpu(dest->stats.cpustats); | 706 | free_percpu(dest->stats.cpustats); |
703 | kfree(dest); | 707 | kfree(dest); |
704 | } | 708 | } |
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
771 | struct ip_vs_dest_user_kern *udest, int add) | 775 | struct ip_vs_dest_user_kern *udest, int add) |
772 | { | 776 | { |
773 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | 777 | struct netns_ipvs *ipvs = net_ipvs(svc->net); |
778 | struct ip_vs_service *old_svc; | ||
774 | struct ip_vs_scheduler *sched; | 779 | struct ip_vs_scheduler *sched; |
775 | int conn_flags; | 780 | int conn_flags; |
776 | 781 | ||
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
792 | atomic_set(&dest->conn_flags, conn_flags); | 797 | atomic_set(&dest->conn_flags, conn_flags); |
793 | 798 | ||
794 | /* bind the service */ | 799 | /* bind the service */ |
795 | if (!dest->svc) { | 800 | old_svc = rcu_dereference_protected(dest->svc, 1); |
801 | if (!old_svc) { | ||
796 | __ip_vs_bind_svc(dest, svc); | 802 | __ip_vs_bind_svc(dest, svc); |
797 | } else { | 803 | } else { |
798 | if (dest->svc != svc) { | 804 | if (old_svc != svc) { |
799 | __ip_vs_unbind_svc(dest); | ||
800 | ip_vs_zero_stats(&dest->stats); | 805 | ip_vs_zero_stats(&dest->stats); |
801 | __ip_vs_bind_svc(dest, svc); | 806 | __ip_vs_bind_svc(dest, svc); |
807 | __ip_vs_svc_put(old_svc, true); | ||
802 | } | 808 | } |
803 | } | 809 | } |
804 | 810 | ||
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
998 | return 0; | 1004 | return 0; |
999 | } | 1005 | } |
1000 | 1006 | ||
1001 | static void ip_vs_dest_wait_readers(struct rcu_head *head) | ||
1002 | { | ||
1003 | struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, | ||
1004 | rcu_head); | ||
1005 | |||
1006 | /* End of grace period after unlinking */ | ||
1007 | clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); | ||
1008 | } | ||
1009 | |||
1010 | |||
1011 | /* | 1007 | /* |
1012 | * Delete a destination (must be already unlinked from the service) | 1008 | * Delete a destination (must be already unlinked from the service) |
1013 | */ | 1009 | */ |
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, | |||
1023 | */ | 1019 | */ |
1024 | ip_vs_rs_unhash(dest); | 1020 | ip_vs_rs_unhash(dest); |
1025 | 1021 | ||
1026 | if (!cleanup) { | ||
1027 | set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); | ||
1028 | call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); | ||
1029 | } | ||
1030 | |||
1031 | spin_lock_bh(&ipvs->dest_trash_lock); | 1022 | spin_lock_bh(&ipvs->dest_trash_lock); |
1032 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", | 1023 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", |
1033 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), | 1024 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), |
1034 | atomic_read(&dest->refcnt)); | 1025 | atomic_read(&dest->refcnt)); |
1035 | if (list_empty(&ipvs->dest_trash) && !cleanup) | 1026 | if (list_empty(&ipvs->dest_trash) && !cleanup) |
1036 | mod_timer(&ipvs->dest_trash_timer, | 1027 | mod_timer(&ipvs->dest_trash_timer, |
1037 | jiffies + IP_VS_DEST_TRASH_PERIOD); | 1028 | jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); |
1038 | /* dest lives in trash without reference */ | 1029 | /* dest lives in trash without reference */ |
1039 | list_add(&dest->t_list, &ipvs->dest_trash); | 1030 | list_add(&dest->t_list, &ipvs->dest_trash); |
1031 | dest->idle_start = 0; | ||
1040 | spin_unlock_bh(&ipvs->dest_trash_lock); | 1032 | spin_unlock_bh(&ipvs->dest_trash_lock); |
1041 | ip_vs_dest_put(dest); | 1033 | ip_vs_dest_put(dest); |
1042 | } | 1034 | } |
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data) | |||
1108 | struct net *net = (struct net *) data; | 1100 | struct net *net = (struct net *) data; |
1109 | struct netns_ipvs *ipvs = net_ipvs(net); | 1101 | struct netns_ipvs *ipvs = net_ipvs(net); |
1110 | struct ip_vs_dest *dest, *next; | 1102 | struct ip_vs_dest *dest, *next; |
1103 | unsigned long now = jiffies; | ||
1111 | 1104 | ||
1112 | spin_lock(&ipvs->dest_trash_lock); | 1105 | spin_lock(&ipvs->dest_trash_lock); |
1113 | list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { | 1106 | list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { |
1114 | /* Skip if dest is in grace period */ | ||
1115 | if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) | ||
1116 | continue; | ||
1117 | if (atomic_read(&dest->refcnt) > 0) | 1107 | if (atomic_read(&dest->refcnt) > 0) |
1118 | continue; | 1108 | continue; |
1109 | if (dest->idle_start) { | ||
1110 | if (time_before(now, dest->idle_start + | ||
1111 | IP_VS_DEST_TRASH_PERIOD)) | ||
1112 | continue; | ||
1113 | } else { | ||
1114 | dest->idle_start = max(1UL, now); | ||
1115 | continue; | ||
1116 | } | ||
1119 | IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", | 1117 | IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", |
1120 | dest->vfwmark, | 1118 | dest->vfwmark, |
1121 | IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), | 1119 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
1122 | ntohs(dest->port)); | 1120 | ntohs(dest->port)); |
1123 | list_del(&dest->t_list); | 1121 | list_del(&dest->t_list); |
1124 | ip_vs_dest_free(dest); | 1122 | ip_vs_dest_free(dest); |
1125 | } | 1123 | } |
1126 | if (!list_empty(&ipvs->dest_trash)) | 1124 | if (!list_empty(&ipvs->dest_trash)) |
1127 | mod_timer(&ipvs->dest_trash_timer, | 1125 | mod_timer(&ipvs->dest_trash_timer, |
1128 | jiffies + IP_VS_DEST_TRASH_PERIOD); | 1126 | jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); |
1129 | spin_unlock(&ipvs->dest_trash_lock); | 1127 | spin_unlock(&ipvs->dest_trash_lock); |
1130 | } | 1128 | } |
1131 | 1129 | ||
@@ -1320,14 +1318,6 @@ out: | |||
1320 | return ret; | 1318 | return ret; |
1321 | } | 1319 | } |
1322 | 1320 | ||
1323 | static void ip_vs_service_rcu_free(struct rcu_head *head) | ||
1324 | { | ||
1325 | struct ip_vs_service *svc; | ||
1326 | |||
1327 | svc = container_of(head, struct ip_vs_service, rcu_head); | ||
1328 | ip_vs_service_free(svc); | ||
1329 | } | ||
1330 | |||
1331 | /* | 1321 | /* |
1332 | * Delete a service from the service list | 1322 | * Delete a service from the service list |
1333 | * - The service must be unlinked, unlocked and not referenced! | 1323 | * - The service must be unlinked, unlocked and not referenced! |
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) | |||
1376 | /* | 1366 | /* |
1377 | * Free the service if nobody refers to it | 1367 | * Free the service if nobody refers to it |
1378 | */ | 1368 | */ |
1379 | if (atomic_dec_and_test(&svc->refcnt)) { | 1369 | __ip_vs_svc_put(svc, true); |
1380 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", | ||
1381 | svc->fwmark, | ||
1382 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | ||
1383 | ntohs(svc->port)); | ||
1384 | call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); | ||
1385 | } | ||
1386 | 1370 | ||
1387 | /* decrease the module use count */ | 1371 | /* decrease the module use count */ |
1388 | ip_vs_use_count_dec(); | 1372 | ip_vs_use_count_dec(); |
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 6bee6d0c73a5..1425e9a924c4 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c | |||
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | |||
59 | struct ip_vs_cpu_stats __percpu *stats) | 59 | struct ip_vs_cpu_stats __percpu *stats) |
60 | { | 60 | { |
61 | int i; | 61 | int i; |
62 | bool add = false; | ||
62 | 63 | ||
63 | for_each_possible_cpu(i) { | 64 | for_each_possible_cpu(i) { |
64 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); | 65 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); |
65 | unsigned int start; | 66 | unsigned int start; |
66 | __u64 inbytes, outbytes; | 67 | __u64 inbytes, outbytes; |
67 | if (i) { | 68 | if (add) { |
68 | sum->conns += s->ustats.conns; | 69 | sum->conns += s->ustats.conns; |
69 | sum->inpkts += s->ustats.inpkts; | 70 | sum->inpkts += s->ustats.inpkts; |
70 | sum->outpkts += s->ustats.outpkts; | 71 | sum->outpkts += s->ustats.outpkts; |
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | |||
76 | sum->inbytes += inbytes; | 77 | sum->inbytes += inbytes; |
77 | sum->outbytes += outbytes; | 78 | sum->outbytes += outbytes; |
78 | } else { | 79 | } else { |
80 | add = true; | ||
79 | sum->conns = s->ustats.conns; | 81 | sum->conns = s->ustats.conns; |
80 | sum->inpkts = s->ustats.inpkts; | 82 | sum->inpkts = s->ustats.inpkts; |
81 | sum->outpkts = s->ustats.outpkts; | 83 | sum->outpkts = s->ustats.outpkts; |
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1383b0eadc0e..eff13c94498e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry { | |||
93 | struct hlist_node list; | 93 | struct hlist_node list; |
94 | int af; /* address family */ | 94 | int af; /* address family */ |
95 | union nf_inet_addr addr; /* destination IP address */ | 95 | union nf_inet_addr addr; /* destination IP address */ |
96 | struct ip_vs_dest __rcu *dest; /* real server (cache) */ | 96 | struct ip_vs_dest *dest; /* real server (cache) */ |
97 | unsigned long lastuse; /* last used time */ | 97 | unsigned long lastuse; /* last used time */ |
98 | struct rcu_head rcu_head; | 98 | struct rcu_head rcu_head; |
99 | }; | 99 | }; |
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = { | |||
130 | }; | 130 | }; |
131 | #endif | 131 | #endif |
132 | 132 | ||
133 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 133 | static void ip_vs_lblc_rcu_free(struct rcu_head *head) |
134 | { | 134 | { |
135 | struct ip_vs_dest *dest; | 135 | struct ip_vs_lblc_entry *en = container_of(head, |
136 | struct ip_vs_lblc_entry, | ||
137 | rcu_head); | ||
136 | 138 | ||
137 | hlist_del_rcu(&en->list); | 139 | ip_vs_dest_put(en->dest); |
138 | /* | 140 | kfree(en); |
139 | * We don't kfree dest because it is referred either by its service | ||
140 | * or the trash dest list. | ||
141 | */ | ||
142 | dest = rcu_dereference_protected(en->dest, 1); | ||
143 | ip_vs_dest_put(dest); | ||
144 | kfree_rcu(en, rcu_head); | ||
145 | } | 141 | } |
146 | 142 | ||
143 | static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en) | ||
144 | { | ||
145 | hlist_del_rcu(&en->list); | ||
146 | call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free); | ||
147 | } | ||
147 | 148 | ||
148 | /* | 149 | /* |
149 | * Returns hash value for IPVS LBLC entry | 150 | * Returns hash value for IPVS LBLC entry |
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, | |||
203 | struct ip_vs_lblc_entry *en; | 204 | struct ip_vs_lblc_entry *en; |
204 | 205 | ||
205 | en = ip_vs_lblc_get(dest->af, tbl, daddr); | 206 | en = ip_vs_lblc_get(dest->af, tbl, daddr); |
206 | if (!en) { | 207 | if (en) { |
207 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | 208 | if (en->dest == dest) |
208 | if (!en) | 209 | return en; |
209 | return NULL; | 210 | ip_vs_lblc_del(en); |
210 | 211 | } | |
211 | en->af = dest->af; | 212 | en = kmalloc(sizeof(*en), GFP_ATOMIC); |
212 | ip_vs_addr_copy(dest->af, &en->addr, daddr); | 213 | if (!en) |
213 | en->lastuse = jiffies; | 214 | return NULL; |
214 | 215 | ||
215 | ip_vs_dest_hold(dest); | 216 | en->af = dest->af; |
216 | RCU_INIT_POINTER(en->dest, dest); | 217 | ip_vs_addr_copy(dest->af, &en->addr, daddr); |
218 | en->lastuse = jiffies; | ||
217 | 219 | ||
218 | ip_vs_lblc_hash(tbl, en); | 220 | ip_vs_dest_hold(dest); |
219 | } else { | 221 | en->dest = dest; |
220 | struct ip_vs_dest *old_dest; | ||
221 | 222 | ||
222 | old_dest = rcu_dereference_protected(en->dest, 1); | 223 | ip_vs_lblc_hash(tbl, en); |
223 | if (old_dest != dest) { | ||
224 | ip_vs_dest_put(old_dest); | ||
225 | ip_vs_dest_hold(dest); | ||
226 | /* No ordering constraints for refcnt */ | ||
227 | RCU_INIT_POINTER(en->dest, dest); | ||
228 | } | ||
229 | } | ||
230 | 224 | ||
231 | return en; | 225 | return en; |
232 | } | 226 | } |
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) | |||
246 | tbl->dead = 1; | 240 | tbl->dead = 1; |
247 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 241 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
248 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { | 242 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { |
249 | ip_vs_lblc_free(en); | 243 | ip_vs_lblc_del(en); |
250 | atomic_dec(&tbl->entries); | 244 | atomic_dec(&tbl->entries); |
251 | } | 245 | } |
252 | } | 246 | } |
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
281 | sysctl_lblc_expiration(svc))) | 275 | sysctl_lblc_expiration(svc))) |
282 | continue; | 276 | continue; |
283 | 277 | ||
284 | ip_vs_lblc_free(en); | 278 | ip_vs_lblc_del(en); |
285 | atomic_dec(&tbl->entries); | 279 | atomic_dec(&tbl->entries); |
286 | } | 280 | } |
287 | spin_unlock(&svc->sched_lock); | 281 | spin_unlock(&svc->sched_lock); |
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
335 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) | 329 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) |
336 | continue; | 330 | continue; |
337 | 331 | ||
338 | ip_vs_lblc_free(en); | 332 | ip_vs_lblc_del(en); |
339 | atomic_dec(&tbl->entries); | 333 | atomic_dec(&tbl->entries); |
340 | goal--; | 334 | goal--; |
341 | } | 335 | } |
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) | |||
443 | continue; | 437 | continue; |
444 | 438 | ||
445 | doh = ip_vs_dest_conn_overhead(dest); | 439 | doh = ip_vs_dest_conn_overhead(dest); |
446 | if (loh * atomic_read(&dest->weight) > | 440 | if ((__s64)loh * atomic_read(&dest->weight) > |
447 | doh * atomic_read(&least->weight)) { | 441 | (__s64)doh * atomic_read(&least->weight)) { |
448 | least = dest; | 442 | least = dest; |
449 | loh = doh; | 443 | loh = doh; |
450 | } | 444 | } |
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
511 | * free up entries from the trash at any time. | 505 | * free up entries from the trash at any time. |
512 | */ | 506 | */ |
513 | 507 | ||
514 | dest = rcu_dereference(en->dest); | 508 | dest = en->dest; |
515 | if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && | 509 | if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && |
516 | atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) | 510 | atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) |
517 | goto out; | 511 | goto out; |
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void) | |||
631 | { | 625 | { |
632 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 626 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
633 | unregister_pernet_subsys(&ip_vs_lblc_ops); | 627 | unregister_pernet_subsys(&ip_vs_lblc_ops); |
634 | synchronize_rcu(); | 628 | rcu_barrier(); |
635 | } | 629 | } |
636 | 630 | ||
637 | 631 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 5199448697f6..0b8550089a2e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
@@ -89,7 +89,7 @@ | |||
89 | */ | 89 | */ |
90 | struct ip_vs_dest_set_elem { | 90 | struct ip_vs_dest_set_elem { |
91 | struct list_head list; /* list link */ | 91 | struct list_head list; /* list link */ |
92 | struct ip_vs_dest __rcu *dest; /* destination server */ | 92 | struct ip_vs_dest *dest; /* destination server */ |
93 | struct rcu_head rcu_head; | 93 | struct rcu_head rcu_head; |
94 | }; | 94 | }; |
95 | 95 | ||
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
107 | 107 | ||
108 | if (check) { | 108 | if (check) { |
109 | list_for_each_entry(e, &set->list, list) { | 109 | list_for_each_entry(e, &set->list, list) { |
110 | struct ip_vs_dest *d; | 110 | if (e->dest == dest) |
111 | |||
112 | d = rcu_dereference_protected(e->dest, 1); | ||
113 | if (d == dest) | ||
114 | /* already existed */ | ||
115 | return; | 111 | return; |
116 | } | 112 | } |
117 | } | 113 | } |
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
121 | return; | 117 | return; |
122 | 118 | ||
123 | ip_vs_dest_hold(dest); | 119 | ip_vs_dest_hold(dest); |
124 | RCU_INIT_POINTER(e->dest, dest); | 120 | e->dest = dest; |
125 | 121 | ||
126 | list_add_rcu(&e->list, &set->list); | 122 | list_add_rcu(&e->list, &set->list); |
127 | atomic_inc(&set->size); | 123 | atomic_inc(&set->size); |
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
129 | set->lastmod = jiffies; | 125 | set->lastmod = jiffies; |
130 | } | 126 | } |
131 | 127 | ||
128 | static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) | ||
129 | { | ||
130 | struct ip_vs_dest_set_elem *e; | ||
131 | |||
132 | e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); | ||
133 | ip_vs_dest_put(e->dest); | ||
134 | kfree(e); | ||
135 | } | ||
136 | |||
132 | static void | 137 | static void |
133 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | 138 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) |
134 | { | 139 | { |
135 | struct ip_vs_dest_set_elem *e; | 140 | struct ip_vs_dest_set_elem *e; |
136 | 141 | ||
137 | list_for_each_entry(e, &set->list, list) { | 142 | list_for_each_entry(e, &set->list, list) { |
138 | struct ip_vs_dest *d; | 143 | if (e->dest == dest) { |
139 | |||
140 | d = rcu_dereference_protected(e->dest, 1); | ||
141 | if (d == dest) { | ||
142 | /* HIT */ | 144 | /* HIT */ |
143 | atomic_dec(&set->size); | 145 | atomic_dec(&set->size); |
144 | set->lastmod = jiffies; | 146 | set->lastmod = jiffies; |
145 | ip_vs_dest_put(dest); | ||
146 | list_del_rcu(&e->list); | 147 | list_del_rcu(&e->list); |
147 | kfree_rcu(e, rcu_head); | 148 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
148 | break; | 149 | break; |
149 | } | 150 | } |
150 | } | 151 | } |
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) | |||
155 | struct ip_vs_dest_set_elem *e, *ep; | 156 | struct ip_vs_dest_set_elem *e, *ep; |
156 | 157 | ||
157 | list_for_each_entry_safe(e, ep, &set->list, list) { | 158 | list_for_each_entry_safe(e, ep, &set->list, list) { |
158 | struct ip_vs_dest *d; | ||
159 | |||
160 | d = rcu_dereference_protected(e->dest, 1); | ||
161 | /* | ||
162 | * We don't kfree dest because it is referred either | ||
163 | * by its service or by the trash dest list. | ||
164 | */ | ||
165 | ip_vs_dest_put(d); | ||
166 | list_del_rcu(&e->list); | 159 | list_del_rcu(&e->list); |
167 | kfree_rcu(e, rcu_head); | 160 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
168 | } | 161 | } |
169 | } | 162 | } |
170 | 163 | ||
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
175 | struct ip_vs_dest *dest, *least; | 168 | struct ip_vs_dest *dest, *least; |
176 | int loh, doh; | 169 | int loh, doh; |
177 | 170 | ||
178 | if (set == NULL) | ||
179 | return NULL; | ||
180 | |||
181 | /* select the first destination server, whose weight > 0 */ | 171 | /* select the first destination server, whose weight > 0 */ |
182 | list_for_each_entry_rcu(e, &set->list, list) { | 172 | list_for_each_entry_rcu(e, &set->list, list) { |
183 | least = rcu_dereference(e->dest); | 173 | least = e->dest; |
184 | if (least->flags & IP_VS_DEST_F_OVERLOAD) | 174 | if (least->flags & IP_VS_DEST_F_OVERLOAD) |
185 | continue; | 175 | continue; |
186 | 176 | ||
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
195 | /* find the destination with the weighted least load */ | 185 | /* find the destination with the weighted least load */ |
196 | nextstage: | 186 | nextstage: |
197 | list_for_each_entry_continue_rcu(e, &set->list, list) { | 187 | list_for_each_entry_continue_rcu(e, &set->list, list) { |
198 | dest = rcu_dereference(e->dest); | 188 | dest = e->dest; |
199 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 189 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
200 | continue; | 190 | continue; |
201 | 191 | ||
202 | doh = ip_vs_dest_conn_overhead(dest); | 192 | doh = ip_vs_dest_conn_overhead(dest); |
203 | if ((loh * atomic_read(&dest->weight) > | 193 | if (((__s64)loh * atomic_read(&dest->weight) > |
204 | doh * atomic_read(&least->weight)) | 194 | (__s64)doh * atomic_read(&least->weight)) |
205 | && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 195 | && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
206 | least = dest; | 196 | least = dest; |
207 | loh = doh; | 197 | loh = doh; |
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
232 | 222 | ||
233 | /* select the first destination server, whose weight > 0 */ | 223 | /* select the first destination server, whose weight > 0 */ |
234 | list_for_each_entry(e, &set->list, list) { | 224 | list_for_each_entry(e, &set->list, list) { |
235 | most = rcu_dereference_protected(e->dest, 1); | 225 | most = e->dest; |
236 | if (atomic_read(&most->weight) > 0) { | 226 | if (atomic_read(&most->weight) > 0) { |
237 | moh = ip_vs_dest_conn_overhead(most); | 227 | moh = ip_vs_dest_conn_overhead(most); |
238 | goto nextstage; | 228 | goto nextstage; |
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
243 | /* find the destination with the weighted most load */ | 233 | /* find the destination with the weighted most load */ |
244 | nextstage: | 234 | nextstage: |
245 | list_for_each_entry_continue(e, &set->list, list) { | 235 | list_for_each_entry_continue(e, &set->list, list) { |
246 | dest = rcu_dereference_protected(e->dest, 1); | 236 | dest = e->dest; |
247 | doh = ip_vs_dest_conn_overhead(dest); | 237 | doh = ip_vs_dest_conn_overhead(dest); |
248 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ | 238 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ |
249 | if ((moh * atomic_read(&dest->weight) < | 239 | if (((__s64)moh * atomic_read(&dest->weight) < |
250 | doh * atomic_read(&most->weight)) | 240 | (__s64)doh * atomic_read(&most->weight)) |
251 | && (atomic_read(&dest->weight) > 0)) { | 241 | && (atomic_read(&dest->weight) > 0)) { |
252 | most = dest; | 242 | most = dest; |
253 | moh = doh; | 243 | moh = doh; |
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) | |||
611 | continue; | 601 | continue; |
612 | 602 | ||
613 | doh = ip_vs_dest_conn_overhead(dest); | 603 | doh = ip_vs_dest_conn_overhead(dest); |
614 | if (loh * atomic_read(&dest->weight) > | 604 | if ((__s64)loh * atomic_read(&dest->weight) > |
615 | doh * atomic_read(&least->weight)) { | 605 | (__s64)doh * atomic_read(&least->weight)) { |
616 | least = dest; | 606 | least = dest; |
617 | loh = doh; | 607 | loh = doh; |
618 | } | 608 | } |
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void) | |||
819 | { | 809 | { |
820 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 810 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
821 | unregister_pernet_subsys(&ip_vs_lblcr_ops); | 811 | unregister_pernet_subsys(&ip_vs_lblcr_ops); |
822 | synchronize_rcu(); | 812 | rcu_barrier(); |
823 | } | 813 | } |
824 | 814 | ||
825 | 815 | ||
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index d8d9860934fe..961a6de9bb29 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c | |||
@@ -40,7 +40,7 @@ | |||
40 | #include <net/ip_vs.h> | 40 | #include <net/ip_vs.h> |
41 | 41 | ||
42 | 42 | ||
43 | static inline unsigned int | 43 | static inline int |
44 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) | 44 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) |
45 | { | 45 | { |
46 | /* | 46 | /* |
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
59 | struct ip_vs_iphdr *iph) | 59 | struct ip_vs_iphdr *iph) |
60 | { | 60 | { |
61 | struct ip_vs_dest *dest, *least = NULL; | 61 | struct ip_vs_dest *dest, *least = NULL; |
62 | unsigned int loh = 0, doh; | 62 | int loh = 0, doh; |
63 | 63 | ||
64 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); | 64 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); |
65 | 65 | ||
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
92 | } | 92 | } |
93 | 93 | ||
94 | if (!least || | 94 | if (!least || |
95 | (loh * atomic_read(&dest->weight) > | 95 | ((__s64)loh * atomic_read(&dest->weight) > |
96 | doh * atomic_read(&least->weight))) { | 96 | (__s64)doh * atomic_read(&least->weight))) { |
97 | least = dest; | 97 | least = dest; |
98 | loh = doh; | 98 | loh = doh; |
99 | } | 99 | } |
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index a5284cc3d882..e446b9fa7424 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c | |||
@@ -44,7 +44,7 @@ | |||
44 | #include <net/ip_vs.h> | 44 | #include <net/ip_vs.h> |
45 | 45 | ||
46 | 46 | ||
47 | static inline unsigned int | 47 | static inline int |
48 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | 48 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) |
49 | { | 49 | { |
50 | /* | 50 | /* |
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
63 | struct ip_vs_iphdr *iph) | 63 | struct ip_vs_iphdr *iph) |
64 | { | 64 | { |
65 | struct ip_vs_dest *dest, *least; | 65 | struct ip_vs_dest *dest, *least; |
66 | unsigned int loh, doh; | 66 | int loh, doh; |
67 | 67 | ||
68 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); | 68 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); |
69 | 69 | ||
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
99 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 99 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
100 | continue; | 100 | continue; |
101 | doh = ip_vs_sed_dest_overhead(dest); | 101 | doh = ip_vs_sed_dest_overhead(dest); |
102 | if (loh * atomic_read(&dest->weight) > | 102 | if ((__s64)loh * atomic_read(&dest->weight) > |
103 | doh * atomic_read(&least->weight)) { | 103 | (__s64)doh * atomic_read(&least->weight)) { |
104 | least = dest; | 104 | least = dest; |
105 | loh = doh; | 105 | loh = doh; |
106 | } | 106 | } |
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6dc1fa128840..b5b4650d50a9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c | |||
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
35 | struct ip_vs_iphdr *iph) | 35 | struct ip_vs_iphdr *iph) |
36 | { | 36 | { |
37 | struct ip_vs_dest *dest, *least; | 37 | struct ip_vs_dest *dest, *least; |
38 | unsigned int loh, doh; | 38 | int loh, doh; |
39 | 39 | ||
40 | IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); | 40 | IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); |
41 | 41 | ||
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
71 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 71 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
72 | continue; | 72 | continue; |
73 | doh = ip_vs_dest_conn_overhead(dest); | 73 | doh = ip_vs_dest_conn_overhead(dest); |
74 | if (loh * atomic_read(&dest->weight) > | 74 | if ((__s64)loh * atomic_read(&dest->weight) > |
75 | doh * atomic_read(&least->weight)) { | 75 | (__s64)doh * atomic_read(&least->weight)) { |
76 | least = dest; | 76 | least = dest; |
77 | loh = doh; | 77 | loh = doh; |
78 | } | 78 | } |
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6fd967c6278c..cdf4567ba9b3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c | |||
@@ -24,7 +24,7 @@ | |||
24 | int synproxy_net_id; | 24 | int synproxy_net_id; |
25 | EXPORT_SYMBOL_GPL(synproxy_net_id); | 25 | EXPORT_SYMBOL_GPL(synproxy_net_id); |
26 | 26 | ||
27 | void | 27 | bool |
28 | synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | 28 | synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, |
29 | const struct tcphdr *th, struct synproxy_options *opts) | 29 | const struct tcphdr *th, struct synproxy_options *opts) |
30 | { | 30 | { |
@@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
32 | u8 buf[40], *ptr; | 32 | u8 buf[40], *ptr; |
33 | 33 | ||
34 | ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); | 34 | ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); |
35 | BUG_ON(ptr == NULL); | 35 | if (ptr == NULL) |
36 | return false; | ||
36 | 37 | ||
37 | opts->options = 0; | 38 | opts->options = 0; |
38 | while (length > 0) { | 39 | while (length > 0) { |
@@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
41 | 42 | ||
42 | switch (opcode) { | 43 | switch (opcode) { |
43 | case TCPOPT_EOL: | 44 | case TCPOPT_EOL: |
44 | return; | 45 | return true; |
45 | case TCPOPT_NOP: | 46 | case TCPOPT_NOP: |
46 | length--; | 47 | length--; |
47 | continue; | 48 | continue; |
48 | default: | 49 | default: |
49 | opsize = *ptr++; | 50 | opsize = *ptr++; |
50 | if (opsize < 2) | 51 | if (opsize < 2) |
51 | return; | 52 | return true; |
52 | if (opsize > length) | 53 | if (opsize > length) |
53 | return; | 54 | return true; |
54 | 55 | ||
55 | switch (opcode) { | 56 | switch (opcode) { |
56 | case TCPOPT_MSS: | 57 | case TCPOPT_MSS: |
@@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
84 | length -= opsize; | 85 | length -= opsize; |
85 | } | 86 | } |
86 | } | 87 | } |
88 | return true; | ||
87 | } | 89 | } |
88 | EXPORT_SYMBOL_GPL(synproxy_parse_options); | 90 | EXPORT_SYMBOL_GPL(synproxy_parse_options); |
89 | 91 | ||
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015ee8ce..a2fef8b10b96 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c | |||
@@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) | |||
285 | 285 | ||
286 | 286 | ||
287 | /* remove one skb from head of flow queue */ | 287 | /* remove one skb from head of flow queue */ |
288 | static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) | 288 | static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) |
289 | { | 289 | { |
290 | struct sk_buff *skb = flow->head; | 290 | struct sk_buff *skb = flow->head; |
291 | 291 | ||
@@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) | |||
293 | flow->head = skb->next; | 293 | flow->head = skb->next; |
294 | skb->next = NULL; | 294 | skb->next = NULL; |
295 | flow->qlen--; | 295 | flow->qlen--; |
296 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
297 | sch->q.qlen--; | ||
296 | } | 298 | } |
297 | return skb; | 299 | return skb; |
298 | } | 300 | } |
@@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) | |||
418 | struct fq_flow_head *head; | 420 | struct fq_flow_head *head; |
419 | struct sk_buff *skb; | 421 | struct sk_buff *skb; |
420 | struct fq_flow *f; | 422 | struct fq_flow *f; |
423 | u32 rate; | ||
421 | 424 | ||
422 | skb = fq_dequeue_head(&q->internal); | 425 | skb = fq_dequeue_head(sch, &q->internal); |
423 | if (skb) | 426 | if (skb) |
424 | goto out; | 427 | goto out; |
425 | fq_check_throttled(q, now); | 428 | fq_check_throttled(q, now); |
@@ -449,7 +452,7 @@ begin: | |||
449 | goto begin; | 452 | goto begin; |
450 | } | 453 | } |
451 | 454 | ||
452 | skb = fq_dequeue_head(f); | 455 | skb = fq_dequeue_head(sch, f); |
453 | if (!skb) { | 456 | if (!skb) { |
454 | head->first = f->next; | 457 | head->first = f->next; |
455 | /* force a pass through old_flows to prevent starvation */ | 458 | /* force a pass through old_flows to prevent starvation */ |
@@ -466,43 +469,74 @@ begin: | |||
466 | f->time_next_packet = now; | 469 | f->time_next_packet = now; |
467 | f->credit -= qdisc_pkt_len(skb); | 470 | f->credit -= qdisc_pkt_len(skb); |
468 | 471 | ||
469 | if (f->credit <= 0 && | 472 | if (f->credit > 0 || !q->rate_enable) |
470 | q->rate_enable && | 473 | goto out; |
471 | skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { | ||
472 | u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; | ||
473 | 474 | ||
474 | rate = min(rate, q->flow_max_rate); | 475 | if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { |
475 | if (rate) { | 476 | rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; |
476 | u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; | ||
477 | |||
478 | do_div(len, rate); | ||
479 | /* Since socket rate can change later, | ||
480 | * clamp the delay to 125 ms. | ||
481 | * TODO: maybe segment the too big skb, as in commit | ||
482 | * e43ac79a4bc ("sch_tbf: segment too big GSO packets") | ||
483 | */ | ||
484 | if (unlikely(len > 125 * NSEC_PER_MSEC)) { | ||
485 | len = 125 * NSEC_PER_MSEC; | ||
486 | q->stat_pkts_too_long++; | ||
487 | } | ||
488 | 477 | ||
489 | f->time_next_packet = now + len; | 478 | rate = min(rate, q->flow_max_rate); |
479 | } else { | ||
480 | rate = q->flow_max_rate; | ||
481 | if (rate == ~0U) | ||
482 | goto out; | ||
483 | } | ||
484 | if (rate) { | ||
485 | u32 plen = max(qdisc_pkt_len(skb), q->quantum); | ||
486 | u64 len = (u64)plen * NSEC_PER_SEC; | ||
487 | |||
488 | do_div(len, rate); | ||
489 | /* Since socket rate can change later, | ||
490 | * clamp the delay to 125 ms. | ||
491 | * TODO: maybe segment the too big skb, as in commit | ||
492 | * e43ac79a4bc ("sch_tbf: segment too big GSO packets") | ||
493 | */ | ||
494 | if (unlikely(len > 125 * NSEC_PER_MSEC)) { | ||
495 | len = 125 * NSEC_PER_MSEC; | ||
496 | q->stat_pkts_too_long++; | ||
490 | } | 497 | } |
498 | |||
499 | f->time_next_packet = now + len; | ||
491 | } | 500 | } |
492 | out: | 501 | out: |
493 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
494 | qdisc_bstats_update(sch, skb); | 502 | qdisc_bstats_update(sch, skb); |
495 | sch->q.qlen--; | ||
496 | qdisc_unthrottled(sch); | 503 | qdisc_unthrottled(sch); |
497 | return skb; | 504 | return skb; |
498 | } | 505 | } |
499 | 506 | ||
500 | static void fq_reset(struct Qdisc *sch) | 507 | static void fq_reset(struct Qdisc *sch) |
501 | { | 508 | { |
509 | struct fq_sched_data *q = qdisc_priv(sch); | ||
510 | struct rb_root *root; | ||
502 | struct sk_buff *skb; | 511 | struct sk_buff *skb; |
512 | struct rb_node *p; | ||
513 | struct fq_flow *f; | ||
514 | unsigned int idx; | ||
503 | 515 | ||
504 | while ((skb = fq_dequeue(sch)) != NULL) | 516 | while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) |
505 | kfree_skb(skb); | 517 | kfree_skb(skb); |
518 | |||
519 | if (!q->fq_root) | ||
520 | return; | ||
521 | |||
522 | for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { | ||
523 | root = &q->fq_root[idx]; | ||
524 | while ((p = rb_first(root)) != NULL) { | ||
525 | f = container_of(p, struct fq_flow, fq_node); | ||
526 | rb_erase(p, root); | ||
527 | |||
528 | while ((skb = fq_dequeue_head(sch, f)) != NULL) | ||
529 | kfree_skb(skb); | ||
530 | |||
531 | kmem_cache_free(fq_flow_cachep, f); | ||
532 | } | ||
533 | } | ||
534 | q->new_flows.first = NULL; | ||
535 | q->old_flows.first = NULL; | ||
536 | q->delayed = RB_ROOT; | ||
537 | q->flows = 0; | ||
538 | q->inactive_flows = 0; | ||
539 | q->throttled_flows = 0; | ||
506 | } | 540 | } |
507 | 541 | ||
508 | static void fq_rehash(struct fq_sched_data *q, | 542 | static void fq_rehash(struct fq_sched_data *q, |
@@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) | |||
645 | while (sch->q.qlen > sch->limit) { | 679 | while (sch->q.qlen > sch->limit) { |
646 | struct sk_buff *skb = fq_dequeue(sch); | 680 | struct sk_buff *skb = fq_dequeue(sch); |
647 | 681 | ||
682 | if (!skb) | ||
683 | break; | ||
648 | kfree_skb(skb); | 684 | kfree_skb(skb); |
649 | drop_count++; | 685 | drop_count++; |
650 | } | 686 | } |
@@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) | |||
657 | static void fq_destroy(struct Qdisc *sch) | 693 | static void fq_destroy(struct Qdisc *sch) |
658 | { | 694 | { |
659 | struct fq_sched_data *q = qdisc_priv(sch); | 695 | struct fq_sched_data *q = qdisc_priv(sch); |
660 | struct rb_root *root; | ||
661 | struct rb_node *p; | ||
662 | unsigned int idx; | ||
663 | 696 | ||
664 | if (q->fq_root) { | 697 | fq_reset(sch); |
665 | for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { | 698 | kfree(q->fq_root); |
666 | root = &q->fq_root[idx]; | ||
667 | while ((p = rb_first(root)) != NULL) { | ||
668 | rb_erase(p, root); | ||
669 | kmem_cache_free(fq_flow_cachep, | ||
670 | container_of(p, struct fq_flow, fq_node)); | ||
671 | } | ||
672 | } | ||
673 | kfree(q->fq_root); | ||
674 | } | ||
675 | qdisc_watchdog_cancel(&q->watchdog); | 699 | qdisc_watchdog_cancel(&q->watchdog); |
676 | } | 700 | } |
677 | 701 | ||
diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db04be3e..e7000be321b0 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c | |||
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, | |||
47 | 47 | ||
48 | /* Allow network administrator to have same access as root. */ | 48 | /* Allow network administrator to have same access as root. */ |
49 | if (ns_capable(net->user_ns, CAP_NET_ADMIN) || | 49 | if (ns_capable(net->user_ns, CAP_NET_ADMIN) || |
50 | uid_eq(root_uid, current_uid())) { | 50 | uid_eq(root_uid, current_euid())) { |
51 | int mode = (table->mode >> 6) & 7; | 51 | int mode = (table->mode >> 6) & 7; |
52 | return (mode << 6) | (mode << 3) | mode; | 52 | return (mode << 6) | (mode << 3) | mode; |
53 | } | 53 | } |
54 | /* Allow netns root group to have the same access as the root group */ | 54 | /* Allow netns root group to have the same access as the root group */ |
55 | if (gid_eq(root_gid, current_gid())) { | 55 | if (in_egroup_p(root_gid)) { |
56 | int mode = (table->mode >> 3) & 7; | 56 | int mode = (table->mode >> 3) & 7; |
57 | return (mode << 3) | mode; | 57 | return (mode << 3) | mode; |
58 | } | 58 | } |