diff options
Diffstat (limited to 'net')
36 files changed, 514 insertions, 354 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d80b07b..3ed616215870 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c | |||
| @@ -24,6 +24,11 @@ | |||
| 24 | static unsigned int mrp_join_time __read_mostly = 200; | 24 | static unsigned int mrp_join_time __read_mostly = 200; |
| 25 | module_param(mrp_join_time, uint, 0644); | 25 | module_param(mrp_join_time, uint, 0644); |
| 26 | MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); | 26 | MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); |
| 27 | |||
| 28 | static unsigned int mrp_periodic_time __read_mostly = 1000; | ||
| 29 | module_param(mrp_periodic_time, uint, 0644); | ||
| 30 | MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); | ||
| 31 | |||
| 27 | MODULE_LICENSE("GPL"); | 32 | MODULE_LICENSE("GPL"); |
| 28 | 33 | ||
| 29 | static const u8 | 34 | static const u8 |
| @@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) | |||
| 595 | mrp_join_timer_arm(app); | 600 | mrp_join_timer_arm(app); |
| 596 | } | 601 | } |
| 597 | 602 | ||
| 603 | static void mrp_periodic_timer_arm(struct mrp_applicant *app) | ||
| 604 | { | ||
| 605 | mod_timer(&app->periodic_timer, | ||
| 606 | jiffies + msecs_to_jiffies(mrp_periodic_time)); | ||
| 607 | } | ||
| 608 | |||
| 609 | static void mrp_periodic_timer(unsigned long data) | ||
| 610 | { | ||
| 611 | struct mrp_applicant *app = (struct mrp_applicant *)data; | ||
| 612 | |||
| 613 | spin_lock(&app->lock); | ||
| 614 | mrp_mad_event(app, MRP_EVENT_PERIODIC); | ||
| 615 | mrp_pdu_queue(app); | ||
| 616 | spin_unlock(&app->lock); | ||
| 617 | |||
| 618 | mrp_periodic_timer_arm(app); | ||
| 619 | } | ||
| 620 | |||
| 598 | static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) | 621 | static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) |
| 599 | { | 622 | { |
| 600 | __be16 endmark; | 623 | __be16 endmark; |
| @@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) | |||
| 845 | rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); | 868 | rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); |
| 846 | setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); | 869 | setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); |
| 847 | mrp_join_timer_arm(app); | 870 | mrp_join_timer_arm(app); |
| 871 | setup_timer(&app->periodic_timer, mrp_periodic_timer, | ||
| 872 | (unsigned long)app); | ||
| 873 | mrp_periodic_timer_arm(app); | ||
| 848 | return 0; | 874 | return 0; |
| 849 | 875 | ||
| 850 | err3: | 876 | err3: |
| @@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) | |||
| 870 | * all pending messages before the applicant is gone. | 896 | * all pending messages before the applicant is gone. |
| 871 | */ | 897 | */ |
| 872 | del_timer_sync(&app->join_timer); | 898 | del_timer_sync(&app->join_timer); |
| 899 | del_timer_sync(&app->periodic_timer); | ||
| 873 | 900 | ||
| 874 | spin_lock_bh(&app->lock); | 901 | spin_lock_bh(&app->lock); |
| 875 | mrp_mad_event(app, MRP_EVENT_TX); | 902 | mrp_mad_event(app, MRP_EVENT_TX); |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 634debab4d54..fb7356fcfe51 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
| @@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev) | |||
| 1146 | goto done; | 1146 | goto done; |
| 1147 | } | 1147 | } |
| 1148 | 1148 | ||
| 1149 | if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { | 1149 | /* Check for rfkill but allow the HCI setup stage to proceed |
| 1150 | * (which in itself doesn't cause any RF activity). | ||
| 1151 | */ | ||
| 1152 | if (test_bit(HCI_RFKILLED, &hdev->dev_flags) && | ||
| 1153 | !test_bit(HCI_SETUP, &hdev->dev_flags)) { | ||
| 1150 | ret = -ERFKILL; | 1154 | ret = -ERFKILL; |
| 1151 | goto done; | 1155 | goto done; |
| 1152 | } | 1156 | } |
| @@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked) | |||
| 1566 | 1570 | ||
| 1567 | BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); | 1571 | BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); |
| 1568 | 1572 | ||
| 1569 | if (!blocked) | 1573 | if (blocked) { |
| 1570 | return 0; | 1574 | set_bit(HCI_RFKILLED, &hdev->dev_flags); |
| 1571 | 1575 | if (!test_bit(HCI_SETUP, &hdev->dev_flags)) | |
| 1572 | hci_dev_do_close(hdev); | 1576 | hci_dev_do_close(hdev); |
| 1577 | } else { | ||
| 1578 | clear_bit(HCI_RFKILLED, &hdev->dev_flags); | ||
| 1579 | } | ||
| 1573 | 1580 | ||
| 1574 | return 0; | 1581 | return 0; |
| 1575 | } | 1582 | } |
| @@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work) | |||
| 1591 | return; | 1598 | return; |
| 1592 | } | 1599 | } |
| 1593 | 1600 | ||
| 1594 | if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) | 1601 | if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { |
| 1602 | clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); | ||
| 1603 | hci_dev_do_close(hdev); | ||
| 1604 | } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { | ||
| 1595 | queue_delayed_work(hdev->req_workqueue, &hdev->power_off, | 1605 | queue_delayed_work(hdev->req_workqueue, &hdev->power_off, |
| 1596 | HCI_AUTO_OFF_TIMEOUT); | 1606 | HCI_AUTO_OFF_TIMEOUT); |
| 1607 | } | ||
| 1597 | 1608 | ||
| 1598 | if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) | 1609 | if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) |
| 1599 | mgmt_index_added(hdev); | 1610 | mgmt_index_added(hdev); |
| @@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev) | |||
| 2209 | } | 2220 | } |
| 2210 | } | 2221 | } |
| 2211 | 2222 | ||
| 2223 | if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) | ||
| 2224 | set_bit(HCI_RFKILLED, &hdev->dev_flags); | ||
| 2225 | |||
| 2212 | set_bit(HCI_SETUP, &hdev->dev_flags); | 2226 | set_bit(HCI_SETUP, &hdev->dev_flags); |
| 2213 | 2227 | ||
| 2214 | if (hdev->dev_type != HCI_AMP) | 2228 | if (hdev->dev_type != HCI_AMP) |
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 94aab73f89d4..8db3e89fae35 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c | |||
| @@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) | |||
| 3557 | cp.handle = cpu_to_le16(conn->handle); | 3557 | cp.handle = cpu_to_le16(conn->handle); |
| 3558 | 3558 | ||
| 3559 | if (ltk->authenticated) | 3559 | if (ltk->authenticated) |
| 3560 | conn->sec_level = BT_SECURITY_HIGH; | 3560 | conn->pending_sec_level = BT_SECURITY_HIGH; |
| 3561 | else | ||
| 3562 | conn->pending_sec_level = BT_SECURITY_MEDIUM; | ||
| 3563 | |||
| 3564 | conn->enc_key_size = ltk->enc_size; | ||
| 3561 | 3565 | ||
| 3562 | hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); | 3566 | hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); |
| 3563 | 3567 | ||
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b3bb7bca8e60..63fa11109a1c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c | |||
| @@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, | |||
| 3755 | 3755 | ||
| 3756 | sk = chan->sk; | 3756 | sk = chan->sk; |
| 3757 | 3757 | ||
| 3758 | /* For certain devices (ex: HID mouse), support for authentication, | ||
| 3759 | * pairing and bonding is optional. For such devices, inorder to avoid | ||
| 3760 | * the ACL alive for too long after L2CAP disconnection, reset the ACL | ||
| 3761 | * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. | ||
| 3762 | */ | ||
| 3763 | conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; | ||
| 3764 | |||
| 3758 | bacpy(&bt_sk(sk)->src, conn->src); | 3765 | bacpy(&bt_sk(sk)->src, conn->src); |
| 3759 | bacpy(&bt_sk(sk)->dst, conn->dst); | 3766 | bacpy(&bt_sk(sk)->dst, conn->dst); |
| 3760 | chan->psm = psm; | 3767 | chan->psm = psm; |
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6d126faf145f..84fcf9fff3ea 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c | |||
| @@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) | |||
| 569 | static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) | 569 | static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) |
| 570 | { | 570 | { |
| 571 | struct rfcomm_dev *dev = dlc->owner; | 571 | struct rfcomm_dev *dev = dlc->owner; |
| 572 | struct tty_struct *tty; | ||
| 573 | if (!dev) | 572 | if (!dev) |
| 574 | return; | 573 | return; |
| 575 | 574 | ||
| @@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) | |||
| 581 | DPM_ORDER_DEV_AFTER_PARENT); | 580 | DPM_ORDER_DEV_AFTER_PARENT); |
| 582 | 581 | ||
| 583 | wake_up_interruptible(&dev->port.open_wait); | 582 | wake_up_interruptible(&dev->port.open_wait); |
| 584 | } else if (dlc->state == BT_CLOSED) { | 583 | } else if (dlc->state == BT_CLOSED) |
| 585 | tty = tty_port_tty_get(&dev->port); | 584 | tty_port_tty_hangup(&dev->port, false); |
| 586 | if (!tty) { | ||
| 587 | if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { | ||
| 588 | /* Drop DLC lock here to avoid deadlock | ||
| 589 | * 1. rfcomm_dev_get will take rfcomm_dev_lock | ||
| 590 | * but in rfcomm_dev_add there's lock order: | ||
| 591 | * rfcomm_dev_lock -> dlc lock | ||
| 592 | * 2. tty_port_put will deadlock if it's | ||
| 593 | * the last reference | ||
| 594 | * | ||
| 595 | * FIXME: when we release the lock anything | ||
| 596 | * could happen to dev, even its destruction | ||
| 597 | */ | ||
| 598 | rfcomm_dlc_unlock(dlc); | ||
| 599 | if (rfcomm_dev_get(dev->id) == NULL) { | ||
| 600 | rfcomm_dlc_lock(dlc); | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | |||
| 604 | if (!test_and_set_bit(RFCOMM_TTY_RELEASED, | ||
| 605 | &dev->flags)) | ||
| 606 | tty_port_put(&dev->port); | ||
| 607 | |||
| 608 | tty_port_put(&dev->port); | ||
| 609 | rfcomm_dlc_lock(dlc); | ||
| 610 | } | ||
| 611 | } else { | ||
| 612 | tty_hangup(tty); | ||
| 613 | tty_kref_put(tty); | ||
| 614 | } | ||
| 615 | } | ||
| 616 | } | 585 | } |
| 617 | 586 | ||
| 618 | static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) | 587 | static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) |
diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2239cc..65f829cfd928 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) | |||
| 5247 | 5247 | ||
| 5248 | /* Delayed registration/unregisteration */ | 5248 | /* Delayed registration/unregisteration */ |
| 5249 | static LIST_HEAD(net_todo_list); | 5249 | static LIST_HEAD(net_todo_list); |
| 5250 | static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); | ||
| 5250 | 5251 | ||
| 5251 | static void net_set_todo(struct net_device *dev) | 5252 | static void net_set_todo(struct net_device *dev) |
| 5252 | { | 5253 | { |
| 5253 | list_add_tail(&dev->todo_list, &net_todo_list); | 5254 | list_add_tail(&dev->todo_list, &net_todo_list); |
| 5255 | dev_net(dev)->dev_unreg_count++; | ||
| 5254 | } | 5256 | } |
| 5255 | 5257 | ||
| 5256 | static void rollback_registered_many(struct list_head *head) | 5258 | static void rollback_registered_many(struct list_head *head) |
| @@ -5918,6 +5920,12 @@ void netdev_run_todo(void) | |||
| 5918 | if (dev->destructor) | 5920 | if (dev->destructor) |
| 5919 | dev->destructor(dev); | 5921 | dev->destructor(dev); |
| 5920 | 5922 | ||
| 5923 | /* Report a network device has been unregistered */ | ||
| 5924 | rtnl_lock(); | ||
| 5925 | dev_net(dev)->dev_unreg_count--; | ||
| 5926 | __rtnl_unlock(); | ||
| 5927 | wake_up(&netdev_unregistering_wq); | ||
| 5928 | |||
| 5921 | /* Free network device */ | 5929 | /* Free network device */ |
| 5922 | kobject_put(&dev->dev.kobj); | 5930 | kobject_put(&dev->dev.kobj); |
| 5923 | } | 5931 | } |
| @@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) | |||
| 6603 | rtnl_unlock(); | 6611 | rtnl_unlock(); |
| 6604 | } | 6612 | } |
| 6605 | 6613 | ||
| 6614 | static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) | ||
| 6615 | { | ||
| 6616 | /* Return with the rtnl_lock held when there are no network | ||
| 6617 | * devices unregistering in any network namespace in net_list. | ||
| 6618 | */ | ||
| 6619 | struct net *net; | ||
| 6620 | bool unregistering; | ||
| 6621 | DEFINE_WAIT(wait); | ||
| 6622 | |||
| 6623 | for (;;) { | ||
| 6624 | prepare_to_wait(&netdev_unregistering_wq, &wait, | ||
| 6625 | TASK_UNINTERRUPTIBLE); | ||
| 6626 | unregistering = false; | ||
| 6627 | rtnl_lock(); | ||
| 6628 | list_for_each_entry(net, net_list, exit_list) { | ||
| 6629 | if (net->dev_unreg_count > 0) { | ||
| 6630 | unregistering = true; | ||
| 6631 | break; | ||
| 6632 | } | ||
| 6633 | } | ||
| 6634 | if (!unregistering) | ||
| 6635 | break; | ||
| 6636 | __rtnl_unlock(); | ||
| 6637 | schedule(); | ||
| 6638 | } | ||
| 6639 | finish_wait(&netdev_unregistering_wq, &wait); | ||
| 6640 | } | ||
| 6641 | |||
| 6606 | static void __net_exit default_device_exit_batch(struct list_head *net_list) | 6642 | static void __net_exit default_device_exit_batch(struct list_head *net_list) |
| 6607 | { | 6643 | { |
| 6608 | /* At exit all network devices most be removed from a network | 6644 | /* At exit all network devices most be removed from a network |
| @@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) | |||
| 6614 | struct net *net; | 6650 | struct net *net; |
| 6615 | LIST_HEAD(dev_kill_list); | 6651 | LIST_HEAD(dev_kill_list); |
| 6616 | 6652 | ||
| 6617 | rtnl_lock(); | 6653 | /* To prevent network device cleanup code from dereferencing |
| 6654 | * loopback devices or network devices that have been freed | ||
| 6655 | * wait here for all pending unregistrations to complete, | ||
| 6656 | * before unregistring the loopback device and allowing the | ||
| 6657 | * network namespace be freed. | ||
| 6658 | * | ||
| 6659 | * The netdev todo list containing all network devices | ||
| 6660 | * unregistrations that happen in default_device_exit_batch | ||
| 6661 | * will run in the rtnl_unlock() at the end of | ||
| 6662 | * default_device_exit_batch. | ||
| 6663 | */ | ||
| 6664 | rtnl_lock_unregistering(net_list); | ||
| 6618 | list_for_each_entry(net, net_list, exit_list) { | 6665 | list_for_each_entry(net, net_list, exit_list) { |
| 6619 | for_each_netdev_reverse(net, dev) { | 6666 | for_each_netdev_reverse(net, dev) { |
| 6620 | if (dev->rtnl_link_ops) | 6667 | if (dev->rtnl_link_ops) |
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af87b260..8d7d0dd72db2 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c | |||
| @@ -154,8 +154,8 @@ ipv6: | |||
| 154 | if (poff >= 0) { | 154 | if (poff >= 0) { |
| 155 | __be32 *ports, _ports; | 155 | __be32 *ports, _ports; |
| 156 | 156 | ||
| 157 | nhoff += poff; | 157 | ports = skb_header_pointer(skb, nhoff + poff, |
| 158 | ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); | 158 | sizeof(_ports), &_ports); |
| 159 | if (ports) | 159 | if (ports) |
| 160 | flow->ports = *ports; | 160 | flow->ports = *ports; |
| 161 | } | 161 | } |
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..3f1ec1586ae1 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c | |||
| @@ -10,11 +10,24 @@ | |||
| 10 | 10 | ||
| 11 | #include <net/secure_seq.h> | 11 | #include <net/secure_seq.h> |
| 12 | 12 | ||
| 13 | static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; | 13 | #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) |
| 14 | 14 | ||
| 15 | void net_secret_init(void) | 15 | static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; |
| 16 | |||
| 17 | static void net_secret_init(void) | ||
| 16 | { | 18 | { |
| 17 | get_random_bytes(net_secret, sizeof(net_secret)); | 19 | u32 tmp; |
| 20 | int i; | ||
| 21 | |||
| 22 | if (likely(net_secret[0])) | ||
| 23 | return; | ||
| 24 | |||
| 25 | for (i = NET_SECRET_SIZE; i > 0;) { | ||
| 26 | do { | ||
| 27 | get_random_bytes(&tmp, sizeof(tmp)); | ||
| 28 | } while (!tmp); | ||
| 29 | cmpxchg(&net_secret[--i], 0, tmp); | ||
| 30 | } | ||
| 18 | } | 31 | } |
| 19 | 32 | ||
| 20 | #ifdef CONFIG_INET | 33 | #ifdef CONFIG_INET |
| @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, | |||
| 42 | u32 hash[MD5_DIGEST_WORDS]; | 55 | u32 hash[MD5_DIGEST_WORDS]; |
| 43 | u32 i; | 56 | u32 i; |
| 44 | 57 | ||
| 58 | net_secret_init(); | ||
| 45 | memcpy(hash, saddr, 16); | 59 | memcpy(hash, saddr, 16); |
| 46 | for (i = 0; i < 4; i++) | 60 | for (i = 0; i < 4; i++) |
| 47 | secret[i] = net_secret[i] + (__force u32)daddr[i]; | 61 | secret[i] = net_secret[i] + (__force u32)daddr[i]; |
| @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, | |||
| 63 | u32 hash[MD5_DIGEST_WORDS]; | 77 | u32 hash[MD5_DIGEST_WORDS]; |
| 64 | u32 i; | 78 | u32 i; |
| 65 | 79 | ||
| 80 | net_secret_init(); | ||
| 66 | memcpy(hash, saddr, 16); | 81 | memcpy(hash, saddr, 16); |
| 67 | for (i = 0; i < 4; i++) | 82 | for (i = 0; i < 4; i++) |
| 68 | secret[i] = net_secret[i] + (__force u32) daddr[i]; | 83 | secret[i] = net_secret[i] + (__force u32) daddr[i]; |
| @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) | |||
| 82 | { | 97 | { |
| 83 | u32 hash[MD5_DIGEST_WORDS]; | 98 | u32 hash[MD5_DIGEST_WORDS]; |
| 84 | 99 | ||
| 100 | net_secret_init(); | ||
| 85 | hash[0] = (__force __u32) daddr; | 101 | hash[0] = (__force __u32) daddr; |
| 86 | hash[1] = net_secret[13]; | 102 | hash[1] = net_secret[13]; |
| 87 | hash[2] = net_secret[14]; | 103 | hash[2] = net_secret[14]; |
| @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) | |||
| 96 | { | 112 | { |
| 97 | __u32 hash[4]; | 113 | __u32 hash[4]; |
| 98 | 114 | ||
| 115 | net_secret_init(); | ||
| 99 | memcpy(hash, daddr, 16); | 116 | memcpy(hash, daddr, 16); |
| 100 | md5_transform(hash, net_secret); | 117 | md5_transform(hash, net_secret); |
| 101 | 118 | ||
| @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, | |||
| 107 | { | 124 | { |
| 108 | u32 hash[MD5_DIGEST_WORDS]; | 125 | u32 hash[MD5_DIGEST_WORDS]; |
| 109 | 126 | ||
| 127 | net_secret_init(); | ||
| 110 | hash[0] = (__force u32)saddr; | 128 | hash[0] = (__force u32)saddr; |
| 111 | hash[1] = (__force u32)daddr; | 129 | hash[1] = (__force u32)daddr; |
| 112 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; | 130 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; |
| @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) | |||
| 121 | { | 139 | { |
| 122 | u32 hash[MD5_DIGEST_WORDS]; | 140 | u32 hash[MD5_DIGEST_WORDS]; |
| 123 | 141 | ||
| 142 | net_secret_init(); | ||
| 124 | hash[0] = (__force u32)saddr; | 143 | hash[0] = (__force u32)saddr; |
| 125 | hash[1] = (__force u32)daddr; | 144 | hash[1] = (__force u32)daddr; |
| 126 | hash[2] = (__force u32)dport ^ net_secret[14]; | 145 | hash[2] = (__force u32)dport ^ net_secret[14]; |
| @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, | |||
| 140 | u32 hash[MD5_DIGEST_WORDS]; | 159 | u32 hash[MD5_DIGEST_WORDS]; |
| 141 | u64 seq; | 160 | u64 seq; |
| 142 | 161 | ||
| 162 | net_secret_init(); | ||
| 143 | hash[0] = (__force u32)saddr; | 163 | hash[0] = (__force u32)saddr; |
| 144 | hash[1] = (__force u32)daddr; | 164 | hash[1] = (__force u32)daddr; |
| 145 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; | 165 | hash[2] = ((__force u16)sport << 16) + (__force u16)dport; |
| @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, | |||
| 164 | u64 seq; | 184 | u64 seq; |
| 165 | u32 i; | 185 | u32 i; |
| 166 | 186 | ||
| 187 | net_secret_init(); | ||
| 167 | memcpy(hash, saddr, 16); | 188 | memcpy(hash, saddr, 16); |
| 168 | for (i = 0; i < 4; i++) | 189 | for (i = 0; i < 4; i++) |
| 169 | secret[i] = net_secret[i] + daddr[i]; | 190 | secret[i] = net_secret[i] + daddr[i]; |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b7b8fd..cfeb85cff4f0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
| @@ -263,10 +263,8 @@ void build_ehash_secret(void) | |||
| 263 | get_random_bytes(&rnd, sizeof(rnd)); | 263 | get_random_bytes(&rnd, sizeof(rnd)); |
| 264 | } while (rnd == 0); | 264 | } while (rnd == 0); |
| 265 | 265 | ||
| 266 | if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { | 266 | if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) |
| 267 | get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); | 267 | get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); |
| 268 | net_secret_init(); | ||
| 269 | } | ||
| 270 | } | 268 | } |
| 271 | EXPORT_SYMBOL(build_ehash_secret); | 269 | EXPORT_SYMBOL(build_ehash_secret); |
| 272 | 270 | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index dace87f06e5f..7defdc9ba167 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
| @@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) | |||
| 736 | 736 | ||
| 737 | in_dev->mr_gq_running = 0; | 737 | in_dev->mr_gq_running = 0; |
| 738 | igmpv3_send_report(in_dev, NULL); | 738 | igmpv3_send_report(in_dev, NULL); |
| 739 | __in_dev_put(in_dev); | 739 | in_dev_put(in_dev); |
| 740 | } | 740 | } |
| 741 | 741 | ||
| 742 | static void igmp_ifc_timer_expire(unsigned long data) | 742 | static void igmp_ifc_timer_expire(unsigned long data) |
| @@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data) | |||
| 749 | igmp_ifc_start_timer(in_dev, | 749 | igmp_ifc_start_timer(in_dev, |
| 750 | unsolicited_report_interval(in_dev)); | 750 | unsolicited_report_interval(in_dev)); |
| 751 | } | 751 | } |
| 752 | __in_dev_put(in_dev); | 752 | in_dev_put(in_dev); |
| 753 | } | 753 | } |
| 754 | 754 | ||
| 755 | static void igmp_ifc_event(struct in_device *in_dev) | 755 | static void igmp_ifc_event(struct in_device *in_dev) |
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0300f..63a6d6d6b875 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c | |||
| @@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 623 | tunnel->err_count = 0; | 623 | tunnel->err_count = 0; |
| 624 | } | 624 | } |
| 625 | 625 | ||
| 626 | tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); | ||
| 626 | ttl = tnl_params->ttl; | 627 | ttl = tnl_params->ttl; |
| 627 | if (ttl == 0) { | 628 | if (ttl == 0) { |
| 628 | if (skb->protocol == htons(ETH_P_IP)) | 629 | if (skb->protocol == htons(ETH_P_IP)) |
| @@ -641,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, | |||
| 641 | 642 | ||
| 642 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) | 643 | max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) |
| 643 | + rt->dst.header_len; | 644 | + rt->dst.header_len; |
| 644 | if (max_headroom > dev->needed_headroom) { | 645 | if (max_headroom > dev->needed_headroom) |
| 645 | dev->needed_headroom = max_headroom; | 646 | dev->needed_headroom = max_headroom; |
| 646 | if (skb_cow_head(skb, dev->needed_headroom)) { | 647 | |
| 647 | dev->stats.tx_dropped++; | 648 | if (skb_cow_head(skb, dev->needed_headroom)) { |
| 648 | dev_kfree_skb(skb); | 649 | dev->stats.tx_dropped++; |
| 649 | return; | 650 | dev_kfree_skb(skb); |
| 650 | } | 651 | return; |
| 651 | } | 652 | } |
| 652 | 653 | ||
| 653 | err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, | 654 | err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, |
| 654 | ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, | 655 | tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); |
| 655 | !net_eq(tunnel->net, dev_net(dev))); | ||
| 656 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); | 656 | iptunnel_xmit_stats(err, &dev->stats, dev->tstats); |
| 657 | 657 | ||
| 658 | return; | 658 | return; |
| @@ -853,8 +853,10 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, | |||
| 853 | /* FB netdevice is special: we have one, and only one per netns. | 853 | /* FB netdevice is special: we have one, and only one per netns. |
| 854 | * Allowing to move it to another netns is clearly unsafe. | 854 | * Allowing to move it to another netns is clearly unsafe. |
| 855 | */ | 855 | */ |
| 856 | if (!IS_ERR(itn->fb_tunnel_dev)) | 856 | if (!IS_ERR(itn->fb_tunnel_dev)) { |
| 857 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; | 857 | itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; |
| 858 | ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); | ||
| 859 | } | ||
| 858 | rtnl_unlock(); | 860 | rtnl_unlock(); |
| 859 | 861 | ||
| 860 | return PTR_RET(itn->fb_tunnel_dev); | 862 | return PTR_RET(itn->fb_tunnel_dev); |
| @@ -884,8 +886,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, | |||
| 884 | if (!net_eq(dev_net(t->dev), net)) | 886 | if (!net_eq(dev_net(t->dev), net)) |
| 885 | unregister_netdevice_queue(t->dev, head); | 887 | unregister_netdevice_queue(t->dev, head); |
| 886 | } | 888 | } |
| 887 | if (itn->fb_tunnel_dev) | ||
| 888 | unregister_netdevice_queue(itn->fb_tunnel_dev, head); | ||
| 889 | } | 889 | } |
| 890 | 890 | ||
| 891 | void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) | 891 | void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) |
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index d6c856b17fd4..c31e3ad98ef2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c | |||
| @@ -61,7 +61,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb, | |||
| 61 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); | 61 | memset(IPCB(skb), 0, sizeof(*IPCB(skb))); |
| 62 | 62 | ||
| 63 | /* Push down and install the IP header. */ | 63 | /* Push down and install the IP header. */ |
| 64 | __skb_push(skb, sizeof(struct iphdr)); | 64 | skb_push(skb, sizeof(struct iphdr)); |
| 65 | skb_reset_network_header(skb); | 65 | skb_reset_network_header(skb); |
| 66 | 66 | ||
| 67 | iph = ip_hdr(skb); | 67 | iph = ip_hdr(skb); |
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 67e17dcda65e..b6346bf2fde3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c | |||
| @@ -267,7 +267,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 267 | if (th == NULL) | 267 | if (th == NULL) |
| 268 | return NF_DROP; | 268 | return NF_DROP; |
| 269 | 269 | ||
| 270 | synproxy_parse_options(skb, par->thoff, th, &opts); | 270 | if (!synproxy_parse_options(skb, par->thoff, th, &opts)) |
| 271 | return NF_DROP; | ||
| 271 | 272 | ||
| 272 | if (th->syn && !(th->ack || th->fin || th->rst)) { | 273 | if (th->syn && !(th->ack || th->fin || th->rst)) { |
| 273 | /* Initial SYN from client */ | 274 | /* Initial SYN from client */ |
| @@ -350,7 +351,8 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, | |||
| 350 | 351 | ||
| 351 | /* fall through */ | 352 | /* fall through */ |
| 352 | case TCP_CONNTRACK_SYN_SENT: | 353 | case TCP_CONNTRACK_SYN_SENT: |
| 353 | synproxy_parse_options(skb, thoff, th, &opts); | 354 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
| 355 | return NF_DROP; | ||
| 354 | 356 | ||
| 355 | if (!th->syn && th->ack && | 357 | if (!th->syn && th->ack && |
| 356 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { | 358 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { |
| @@ -373,7 +375,9 @@ static unsigned int ipv4_synproxy_hook(unsigned int hooknum, | |||
| 373 | if (!th->syn || !th->ack) | 375 | if (!th->syn || !th->ack) |
| 374 | break; | 376 | break; |
| 375 | 377 | ||
| 376 | synproxy_parse_options(skb, thoff, th, &opts); | 378 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
| 379 | return NF_DROP; | ||
| 380 | |||
| 377 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) | 381 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) |
| 378 | synproxy->tsoff = opts.tsval - synproxy->its; | 382 | synproxy->tsoff = opts.tsval - synproxy->its; |
| 379 | 383 | ||
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bfec521c717f..193db03540ad 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
| @@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) | |||
| 218 | 218 | ||
| 219 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) | 219 | if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) |
| 220 | ipv4_sk_update_pmtu(skb, sk, info); | 220 | ipv4_sk_update_pmtu(skb, sk, info); |
| 221 | else if (type == ICMP_REDIRECT) | 221 | else if (type == ICMP_REDIRECT) { |
| 222 | ipv4_sk_redirect(skb, sk); | 222 | ipv4_sk_redirect(skb, sk); |
| 223 | return; | ||
| 224 | } | ||
| 223 | 225 | ||
| 224 | /* Report error on raw socket, if: | 226 | /* Report error on raw socket, if: |
| 225 | 1. User requested ip_recverr. | 227 | 1. User requested ip_recverr. |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8bf137..e6bb8256e59f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 895 | 895 | ||
| 896 | skb_orphan(skb); | 896 | skb_orphan(skb); |
| 897 | skb->sk = sk; | 897 | skb->sk = sk; |
| 898 | skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? | 898 | skb->destructor = tcp_wfree; |
| 899 | tcp_wfree : sock_wfree; | ||
| 900 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 899 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
| 901 | 900 | ||
| 902 | /* Build TCP header and checksum it. */ | 901 | /* Build TCP header and checksum it. */ |
| @@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1840 | while ((skb = tcp_send_head(sk))) { | 1839 | while ((skb = tcp_send_head(sk))) { |
| 1841 | unsigned int limit; | 1840 | unsigned int limit; |
| 1842 | 1841 | ||
| 1843 | |||
| 1844 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1842 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
| 1845 | BUG_ON(!tso_segs); | 1843 | BUG_ON(!tso_segs); |
| 1846 | 1844 | ||
| @@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1869 | break; | 1867 | break; |
| 1870 | } | 1868 | } |
| 1871 | 1869 | ||
| 1872 | /* TSQ : sk_wmem_alloc accounts skb truesize, | 1870 | /* TCP Small Queues : |
| 1873 | * including skb overhead. But thats OK. | 1871 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
| 1872 | * This allows for : | ||
| 1873 | * - better RTT estimation and ACK scheduling | ||
| 1874 | * - faster recovery | ||
| 1875 | * - high rates | ||
| 1874 | */ | 1876 | */ |
| 1875 | if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { | 1877 | limit = max(skb->truesize, sk->sk_pacing_rate >> 10); |
| 1878 | |||
| 1879 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | ||
| 1876 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1880 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
| 1877 | break; | 1881 | break; |
| 1878 | } | 1882 | } |
| 1883 | |||
| 1879 | limit = mss_now; | 1884 | limit = mss_now; |
| 1880 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 1885 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
| 1881 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1886 | limit = tcp_mss_split_point(sk, skb, mss_now, |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95db57f..0ca44df51ee9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
| 658 | break; | 658 | break; |
| 659 | case ICMP_REDIRECT: | 659 | case ICMP_REDIRECT: |
| 660 | ipv4_sk_redirect(skb, sk); | 660 | ipv4_sk_redirect(skb, sk); |
| 661 | break; | 661 | goto out; |
| 662 | } | 662 | } |
| 663 | 663 | ||
| 664 | /* | 664 | /* |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff12617f36..cd3fb301da38 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
| @@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, | |||
| 1499 | return false; | 1499 | return false; |
| 1500 | } | 1500 | } |
| 1501 | 1501 | ||
| 1502 | /* Compares an address/prefix_len with addresses on device @dev. | ||
| 1503 | * If one is found it returns true. | ||
| 1504 | */ | ||
| 1505 | bool ipv6_chk_custom_prefix(const struct in6_addr *addr, | ||
| 1506 | const unsigned int prefix_len, struct net_device *dev) | ||
| 1507 | { | ||
| 1508 | struct inet6_dev *idev; | ||
| 1509 | struct inet6_ifaddr *ifa; | ||
| 1510 | bool ret = false; | ||
| 1511 | |||
| 1512 | rcu_read_lock(); | ||
| 1513 | idev = __in6_dev_get(dev); | ||
| 1514 | if (idev) { | ||
| 1515 | read_lock_bh(&idev->lock); | ||
| 1516 | list_for_each_entry(ifa, &idev->addr_list, if_list) { | ||
| 1517 | ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); | ||
| 1518 | if (ret) | ||
| 1519 | break; | ||
| 1520 | } | ||
| 1521 | read_unlock_bh(&idev->lock); | ||
| 1522 | } | ||
| 1523 | rcu_read_unlock(); | ||
| 1524 | |||
| 1525 | return ret; | ||
| 1526 | } | ||
| 1527 | EXPORT_SYMBOL(ipv6_chk_custom_prefix); | ||
| 1528 | |||
| 1502 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) | 1529 | int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) |
| 1503 | { | 1530 | { |
| 1504 | struct inet6_dev *idev; | 1531 | struct inet6_dev *idev; |
| @@ -2193,43 +2220,21 @@ ok: | |||
| 2193 | else | 2220 | else |
| 2194 | stored_lft = 0; | 2221 | stored_lft = 0; |
| 2195 | if (!update_lft && !create && stored_lft) { | 2222 | if (!update_lft && !create && stored_lft) { |
| 2196 | if (valid_lft > MIN_VALID_LIFETIME || | 2223 | const u32 minimum_lft = min( |
| 2197 | valid_lft > stored_lft) | 2224 | stored_lft, (u32)MIN_VALID_LIFETIME); |
| 2198 | update_lft = 1; | 2225 | valid_lft = max(valid_lft, minimum_lft); |
| 2199 | else if (stored_lft <= MIN_VALID_LIFETIME) { | 2226 | |
| 2200 | /* valid_lft <= stored_lft is always true */ | 2227 | /* RFC4862 Section 5.5.3e: |
| 2201 | /* | 2228 | * "Note that the preferred lifetime of the |
| 2202 | * RFC 4862 Section 5.5.3e: | 2229 | * corresponding address is always reset to |
| 2203 | * "Note that the preferred lifetime of | 2230 | * the Preferred Lifetime in the received |
| 2204 | * the corresponding address is always | 2231 | * Prefix Information option, regardless of |
| 2205 | * reset to the Preferred Lifetime in | 2232 | * whether the valid lifetime is also reset or |
| 2206 | * the received Prefix Information | 2233 | * ignored." |
| 2207 | * option, regardless of whether the | 2234 | * |
| 2208 | * valid lifetime is also reset or | 2235 | * So we should always update prefered_lft here. |
| 2209 | * ignored." | 2236 | */ |
| 2210 | * | 2237 | update_lft = 1; |
| 2211 | * So if the preferred lifetime in | ||
| 2212 | * this advertisement is different | ||
| 2213 | * than what we have stored, but the | ||
| 2214 | * valid lifetime is invalid, just | ||
| 2215 | * reset prefered_lft. | ||
| 2216 | * | ||
| 2217 | * We must set the valid lifetime | ||
| 2218 | * to the stored lifetime since we'll | ||
| 2219 | * be updating the timestamp below, | ||
| 2220 | * else we'll set it back to the | ||
| 2221 | * minimum. | ||
| 2222 | */ | ||
| 2223 | if (prefered_lft != ifp->prefered_lft) { | ||
| 2224 | valid_lft = stored_lft; | ||
| 2225 | update_lft = 1; | ||
| 2226 | } | ||
| 2227 | } else { | ||
| 2228 | valid_lft = MIN_VALID_LIFETIME; | ||
| 2229 | if (valid_lft < prefered_lft) | ||
| 2230 | prefered_lft = valid_lft; | ||
| 2231 | update_lft = 1; | ||
| 2232 | } | ||
| 2233 | } | 2238 | } |
| 2234 | 2239 | ||
| 2235 | if (update_lft) { | 2240 | if (update_lft) { |
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9feafb9..7bb5446b9d73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c | |||
| @@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
| 618 | struct ip6_tnl *tunnel = netdev_priv(dev); | 618 | struct ip6_tnl *tunnel = netdev_priv(dev); |
| 619 | struct net_device *tdev; /* Device to other host */ | 619 | struct net_device *tdev; /* Device to other host */ |
| 620 | struct ipv6hdr *ipv6h; /* Our new IP header */ | 620 | struct ipv6hdr *ipv6h; /* Our new IP header */ |
| 621 | unsigned int max_headroom; /* The extra header space needed */ | 621 | unsigned int max_headroom = 0; /* The extra header space needed */ |
| 622 | int gre_hlen; | 622 | int gre_hlen; |
| 623 | struct ipv6_tel_txoption opt; | 623 | struct ipv6_tel_txoption opt; |
| 624 | int mtu; | 624 | int mtu; |
| @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, | |||
| 693 | 693 | ||
| 694 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); | 694 | skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); |
| 695 | 695 | ||
| 696 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; | 696 | max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; |
| 697 | 697 | ||
| 698 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || | 698 | if (skb_headroom(skb) < max_headroom || skb_shared(skb) || |
| 699 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 699 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d529163..a54c45ce4a48 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
| @@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
| 1015 | * udp datagram | 1015 | * udp datagram |
| 1016 | */ | 1016 | */ |
| 1017 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { | 1017 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { |
| 1018 | struct frag_hdr fhdr; | ||
| 1019 | |||
| 1018 | skb = sock_alloc_send_skb(sk, | 1020 | skb = sock_alloc_send_skb(sk, |
| 1019 | hh_len + fragheaderlen + transhdrlen + 20, | 1021 | hh_len + fragheaderlen + transhdrlen + 20, |
| 1020 | (flags & MSG_DONTWAIT), &err); | 1022 | (flags & MSG_DONTWAIT), &err); |
| @@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
| 1036 | skb->protocol = htons(ETH_P_IPV6); | 1038 | skb->protocol = htons(ETH_P_IPV6); |
| 1037 | skb->ip_summed = CHECKSUM_PARTIAL; | 1039 | skb->ip_summed = CHECKSUM_PARTIAL; |
| 1038 | skb->csum = 0; | 1040 | skb->csum = 0; |
| 1039 | } | ||
| 1040 | |||
| 1041 | err = skb_append_datato_frags(sk,skb, getfrag, from, | ||
| 1042 | (length - transhdrlen)); | ||
| 1043 | if (!err) { | ||
| 1044 | struct frag_hdr fhdr; | ||
| 1045 | 1041 | ||
| 1046 | /* Specify the length of each IPv6 datagram fragment. | 1042 | /* Specify the length of each IPv6 datagram fragment. |
| 1047 | * It has to be a multiple of 8. | 1043 | * It has to be a multiple of 8. |
| @@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, | |||
| 1052 | ipv6_select_ident(&fhdr, rt); | 1048 | ipv6_select_ident(&fhdr, rt); |
| 1053 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; | 1049 | skb_shinfo(skb)->ip6_frag_id = fhdr.identification; |
| 1054 | __skb_queue_tail(&sk->sk_write_queue, skb); | 1050 | __skb_queue_tail(&sk->sk_write_queue, skb); |
| 1055 | |||
| 1056 | return 0; | ||
| 1057 | } | 1051 | } |
| 1058 | /* There is not enough support do UPD LSO, | ||
| 1059 | * so follow normal path | ||
| 1060 | */ | ||
| 1061 | kfree_skb(skb); | ||
| 1062 | 1052 | ||
| 1063 | return err; | 1053 | return skb_append_datato_frags(sk, skb, getfrag, from, |
| 1054 | (length - transhdrlen)); | ||
| 1064 | } | 1055 | } |
| 1065 | 1056 | ||
| 1066 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, | 1057 | static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, |
| @@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, | |||
| 1227 | * --yoshfuji | 1218 | * --yoshfuji |
| 1228 | */ | 1219 | */ |
| 1229 | 1220 | ||
| 1230 | cork->length += length; | 1221 | if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || |
| 1231 | if (length > mtu) { | 1222 | sk->sk_protocol == IPPROTO_RAW)) { |
| 1232 | int proto = sk->sk_protocol; | 1223 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); |
| 1233 | if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ | 1224 | return -EMSGSIZE; |
| 1234 | ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); | 1225 | } |
| 1235 | return -EMSGSIZE; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | if (proto == IPPROTO_UDP && | ||
| 1239 | (rt->dst.dev->features & NETIF_F_UFO)) { | ||
| 1240 | 1226 | ||
| 1241 | err = ip6_ufo_append_data(sk, getfrag, from, length, | 1227 | skb = skb_peek_tail(&sk->sk_write_queue); |
| 1242 | hh_len, fragheaderlen, | 1228 | cork->length += length; |
| 1243 | transhdrlen, mtu, flags, rt); | 1229 | if (((length > mtu) || |
| 1244 | if (err) | 1230 | (skb && skb_is_gso(skb))) && |
| 1245 | goto error; | 1231 | (sk->sk_protocol == IPPROTO_UDP) && |
| 1246 | return 0; | 1232 | (rt->dst.dev->features & NETIF_F_UFO)) { |
| 1247 | } | 1233 | err = ip6_ufo_append_data(sk, getfrag, from, length, |
| 1234 | hh_len, fragheaderlen, | ||
| 1235 | transhdrlen, mtu, flags, rt); | ||
| 1236 | if (err) | ||
| 1237 | goto error; | ||
| 1238 | return 0; | ||
| 1248 | } | 1239 | } |
| 1249 | 1240 | ||
| 1250 | if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) | 1241 | if (!skb) |
| 1251 | goto alloc_new_skb; | 1242 | goto alloc_new_skb; |
| 1252 | 1243 | ||
| 1253 | while (length > 0) { | 1244 | while (length > 0) { |
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2d8f4829575b..a791552e0422 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c | |||
| @@ -1731,8 +1731,6 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) | |||
| 1731 | } | 1731 | } |
| 1732 | } | 1732 | } |
| 1733 | 1733 | ||
| 1734 | t = rtnl_dereference(ip6n->tnls_wc[0]); | ||
| 1735 | unregister_netdevice_queue(t->dev, &list); | ||
| 1736 | unregister_netdevice_many(&list); | 1734 | unregister_netdevice_many(&list); |
| 1737 | } | 1735 | } |
| 1738 | 1736 | ||
| @@ -1752,6 +1750,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) | |||
| 1752 | if (!ip6n->fb_tnl_dev) | 1750 | if (!ip6n->fb_tnl_dev) |
| 1753 | goto err_alloc_dev; | 1751 | goto err_alloc_dev; |
| 1754 | dev_net_set(ip6n->fb_tnl_dev, net); | 1752 | dev_net_set(ip6n->fb_tnl_dev, net); |
| 1753 | ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; | ||
| 1755 | /* FB netdevice is special: we have one, and only one per netns. | 1754 | /* FB netdevice is special: we have one, and only one per netns. |
| 1756 | * Allowing to move it to another netns is clearly unsafe. | 1755 | * Allowing to move it to another netns is clearly unsafe. |
| 1757 | */ | 1756 | */ |
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67b737c..d18f9f903db6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c | |||
| @@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) | |||
| 2034 | if (idev->mc_dad_count) | 2034 | if (idev->mc_dad_count) |
| 2035 | mld_dad_start_timer(idev, idev->mc_maxdelay); | 2035 | mld_dad_start_timer(idev, idev->mc_maxdelay); |
| 2036 | } | 2036 | } |
| 2037 | __in6_dev_put(idev); | 2037 | in6_dev_put(idev); |
| 2038 | } | 2038 | } |
| 2039 | 2039 | ||
| 2040 | static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, | 2040 | static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, |
| @@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) | |||
| 2379 | 2379 | ||
| 2380 | idev->mc_gq_running = 0; | 2380 | idev->mc_gq_running = 0; |
| 2381 | mld_send_report(idev, NULL); | 2381 | mld_send_report(idev, NULL); |
| 2382 | __in6_dev_put(idev); | 2382 | in6_dev_put(idev); |
| 2383 | } | 2383 | } |
| 2384 | 2384 | ||
| 2385 | static void mld_ifc_timer_expire(unsigned long data) | 2385 | static void mld_ifc_timer_expire(unsigned long data) |
| @@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) | |||
| 2392 | if (idev->mc_ifc_count) | 2392 | if (idev->mc_ifc_count) |
| 2393 | mld_ifc_start_timer(idev, idev->mc_maxdelay); | 2393 | mld_ifc_start_timer(idev, idev->mc_maxdelay); |
| 2394 | } | 2394 | } |
| 2395 | __in6_dev_put(idev); | 2395 | in6_dev_put(idev); |
| 2396 | } | 2396 | } |
| 2397 | 2397 | ||
| 2398 | static void mld_ifc_event(struct inet6_dev *idev) | 2398 | static void mld_ifc_event(struct inet6_dev *idev) |
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 19cfea8dbcaa..2748b042da72 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c | |||
| @@ -282,7 +282,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) | |||
| 282 | if (th == NULL) | 282 | if (th == NULL) |
| 283 | return NF_DROP; | 283 | return NF_DROP; |
| 284 | 284 | ||
| 285 | synproxy_parse_options(skb, par->thoff, th, &opts); | 285 | if (!synproxy_parse_options(skb, par->thoff, th, &opts)) |
| 286 | return NF_DROP; | ||
| 286 | 287 | ||
| 287 | if (th->syn && !(th->ack || th->fin || th->rst)) { | 288 | if (th->syn && !(th->ack || th->fin || th->rst)) { |
| 288 | /* Initial SYN from client */ | 289 | /* Initial SYN from client */ |
| @@ -372,7 +373,8 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, | |||
| 372 | 373 | ||
| 373 | /* fall through */ | 374 | /* fall through */ |
| 374 | case TCP_CONNTRACK_SYN_SENT: | 375 | case TCP_CONNTRACK_SYN_SENT: |
| 375 | synproxy_parse_options(skb, thoff, th, &opts); | 376 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
| 377 | return NF_DROP; | ||
| 376 | 378 | ||
| 377 | if (!th->syn && th->ack && | 379 | if (!th->syn && th->ack && |
| 378 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { | 380 | CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { |
| @@ -395,7 +397,9 @@ static unsigned int ipv6_synproxy_hook(unsigned int hooknum, | |||
| 395 | if (!th->syn || !th->ack) | 397 | if (!th->syn || !th->ack) |
| 396 | break; | 398 | break; |
| 397 | 399 | ||
| 398 | synproxy_parse_options(skb, thoff, th, &opts); | 400 | if (!synproxy_parse_options(skb, thoff, th, &opts)) |
| 401 | return NF_DROP; | ||
| 402 | |||
| 399 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) | 403 | if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) |
| 400 | synproxy->tsoff = opts.tsval - synproxy->its; | 404 | synproxy->tsoff = opts.tsval - synproxy->its; |
| 401 | 405 | ||
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bbb1728..a4ed2416399e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c | |||
| @@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, | |||
| 335 | ip6_sk_update_pmtu(skb, sk, info); | 335 | ip6_sk_update_pmtu(skb, sk, info); |
| 336 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); | 336 | harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); |
| 337 | } | 337 | } |
| 338 | if (type == NDISC_REDIRECT) | 338 | if (type == NDISC_REDIRECT) { |
| 339 | ip6_sk_redirect(skb, sk); | 339 | ip6_sk_redirect(skb, sk); |
| 340 | return; | ||
| 341 | } | ||
| 340 | if (np->recverr) { | 342 | if (np->recverr) { |
| 341 | u8 *payload = skb->data; | 343 | u8 *payload = skb->data; |
| 342 | if (!inet->hdrincl) | 344 | if (!inet->hdrincl) |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb96db34..19269453a8ea 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
| @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, | |||
| 566 | return false; | 566 | return false; |
| 567 | } | 567 | } |
| 568 | 568 | ||
| 569 | /* Checks if an address matches an address on the tunnel interface. | ||
| 570 | * Used to detect the NAT of proto 41 packets and let them pass spoofing test. | ||
| 571 | * Long story: | ||
| 572 | * This function is called after we considered the packet as spoofed | ||
| 573 | * in is_spoofed_6rd. | ||
| 574 | * We may have a router that is doing NAT for proto 41 packets | ||
| 575 | * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb | ||
| 576 | * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd | ||
| 577 | * function will return true, dropping the packet. | ||
| 578 | * But, we can still check if is spoofed against the IP | ||
| 579 | * addresses associated with the interface. | ||
| 580 | */ | ||
| 581 | static bool only_dnatted(const struct ip_tunnel *tunnel, | ||
| 582 | const struct in6_addr *v6dst) | ||
| 583 | { | ||
| 584 | int prefix_len; | ||
| 585 | |||
| 586 | #ifdef CONFIG_IPV6_SIT_6RD | ||
| 587 | prefix_len = tunnel->ip6rd.prefixlen + 32 | ||
| 588 | - tunnel->ip6rd.relay_prefixlen; | ||
| 589 | #else | ||
| 590 | prefix_len = 48; | ||
| 591 | #endif | ||
| 592 | return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); | ||
| 593 | } | ||
| 594 | |||
| 595 | /* Returns true if a packet is spoofed */ | ||
| 596 | static bool packet_is_spoofed(struct sk_buff *skb, | ||
| 597 | const struct iphdr *iph, | ||
| 598 | struct ip_tunnel *tunnel) | ||
| 599 | { | ||
| 600 | const struct ipv6hdr *ipv6h; | ||
| 601 | |||
| 602 | if (tunnel->dev->priv_flags & IFF_ISATAP) { | ||
| 603 | if (!isatap_chksrc(skb, iph, tunnel)) | ||
| 604 | return true; | ||
| 605 | |||
| 606 | return false; | ||
| 607 | } | ||
| 608 | |||
| 609 | if (tunnel->dev->flags & IFF_POINTOPOINT) | ||
| 610 | return false; | ||
| 611 | |||
| 612 | ipv6h = ipv6_hdr(skb); | ||
| 613 | |||
| 614 | if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { | ||
| 615 | net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", | ||
| 616 | &iph->saddr, &ipv6h->saddr, | ||
| 617 | &iph->daddr, &ipv6h->daddr); | ||
| 618 | return true; | ||
| 619 | } | ||
| 620 | |||
| 621 | if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) | ||
| 622 | return false; | ||
| 623 | |||
| 624 | if (only_dnatted(tunnel, &ipv6h->daddr)) | ||
| 625 | return false; | ||
| 626 | |||
| 627 | net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", | ||
| 628 | &iph->saddr, &ipv6h->saddr, | ||
| 629 | &iph->daddr, &ipv6h->daddr); | ||
| 630 | return true; | ||
| 631 | } | ||
| 632 | |||
| 569 | static int ipip6_rcv(struct sk_buff *skb) | 633 | static int ipip6_rcv(struct sk_buff *skb) |
| 570 | { | 634 | { |
| 571 | const struct iphdr *iph = ip_hdr(skb); | 635 | const struct iphdr *iph = ip_hdr(skb); |
| @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) | |||
| 586 | IPCB(skb)->flags = 0; | 650 | IPCB(skb)->flags = 0; |
| 587 | skb->protocol = htons(ETH_P_IPV6); | 651 | skb->protocol = htons(ETH_P_IPV6); |
| 588 | 652 | ||
| 589 | if (tunnel->dev->priv_flags & IFF_ISATAP) { | 653 | if (packet_is_spoofed(skb, iph, tunnel)) { |
| 590 | if (!isatap_chksrc(skb, iph, tunnel)) { | 654 | tunnel->dev->stats.rx_errors++; |
| 591 | tunnel->dev->stats.rx_errors++; | 655 | goto out; |
| 592 | goto out; | ||
| 593 | } | ||
| 594 | } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { | ||
| 595 | if (is_spoofed_6rd(tunnel, iph->saddr, | ||
| 596 | &ipv6_hdr(skb)->saddr) || | ||
| 597 | is_spoofed_6rd(tunnel, iph->daddr, | ||
| 598 | &ipv6_hdr(skb)->daddr)) { | ||
| 599 | tunnel->dev->stats.rx_errors++; | ||
| 600 | goto out; | ||
| 601 | } | ||
| 602 | } | 656 | } |
| 603 | 657 | ||
| 604 | __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); | 658 | __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); |
| @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
| 748 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); | 802 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); |
| 749 | 803 | ||
| 750 | if (neigh == NULL) { | 804 | if (neigh == NULL) { |
| 751 | net_dbg_ratelimited("sit: nexthop == NULL\n"); | 805 | net_dbg_ratelimited("nexthop == NULL\n"); |
| 752 | goto tx_error; | 806 | goto tx_error; |
| 753 | } | 807 | } |
| 754 | 808 | ||
| @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, | |||
| 777 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); | 831 | neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); |
| 778 | 832 | ||
| 779 | if (neigh == NULL) { | 833 | if (neigh == NULL) { |
| 780 | net_dbg_ratelimited("sit: nexthop == NULL\n"); | 834 | net_dbg_ratelimited("nexthop == NULL\n"); |
| 781 | goto tx_error; | 835 | goto tx_error; |
| 782 | } | 836 | } |
| 783 | 837 | ||
| @@ -1612,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net) | |||
| 1612 | goto err_alloc_dev; | 1666 | goto err_alloc_dev; |
| 1613 | } | 1667 | } |
| 1614 | dev_net_set(sitn->fb_tunnel_dev, net); | 1668 | dev_net_set(sitn->fb_tunnel_dev, net); |
| 1669 | sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; | ||
| 1615 | /* FB netdevice is special: we have one, and only one per netns. | 1670 | /* FB netdevice is special: we have one, and only one per netns. |
| 1616 | * Allowing to move it to another netns is clearly unsafe. | 1671 | * Allowing to move it to another netns is clearly unsafe. |
| 1617 | */ | 1672 | */ |
| @@ -1646,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net) | |||
| 1646 | 1701 | ||
| 1647 | rtnl_lock(); | 1702 | rtnl_lock(); |
| 1648 | sit_destroy_tunnels(sitn, &list); | 1703 | sit_destroy_tunnels(sitn, &list); |
| 1649 | unregister_netdevice_queue(sitn->fb_tunnel_dev, &list); | ||
| 1650 | unregister_netdevice_many(&list); | 1704 | unregister_netdevice_many(&list); |
| 1651 | rtnl_unlock(); | 1705 | rtnl_unlock(); |
| 1652 | } | 1706 | } |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f4058150262b..72b7eaaf3ca0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
| @@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
| 525 | 525 | ||
| 526 | if (type == ICMPV6_PKT_TOOBIG) | 526 | if (type == ICMPV6_PKT_TOOBIG) |
| 527 | ip6_sk_update_pmtu(skb, sk, info); | 527 | ip6_sk_update_pmtu(skb, sk, info); |
| 528 | if (type == NDISC_REDIRECT) | 528 | if (type == NDISC_REDIRECT) { |
| 529 | ip6_sk_redirect(skb, sk); | 529 | ip6_sk_redirect(skb, sk); |
| 530 | goto out; | ||
| 531 | } | ||
| 530 | 532 | ||
| 531 | np = inet6_sk(sk); | 533 | np = inet6_sk(sk); |
| 532 | 534 | ||
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 54563ad8aeb1..355cc3b6fa4d 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c | |||
| @@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param) | |||
| 154 | } else { | 154 | } else { |
| 155 | lapb->n2count++; | 155 | lapb->n2count++; |
| 156 | lapb_requeue_frames(lapb); | 156 | lapb_requeue_frames(lapb); |
| 157 | lapb_kick(lapb); | ||
| 157 | } | 158 | } |
| 158 | break; | 159 | break; |
| 159 | 160 | ||
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f69e83ff836..74fd00c27210 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
| @@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
| 116 | 116 | ||
| 117 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 117 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
| 118 | struct ip_vs_cpu_stats *s; | 118 | struct ip_vs_cpu_stats *s; |
| 119 | struct ip_vs_service *svc; | ||
| 119 | 120 | ||
| 120 | s = this_cpu_ptr(dest->stats.cpustats); | 121 | s = this_cpu_ptr(dest->stats.cpustats); |
| 121 | s->ustats.inpkts++; | 122 | s->ustats.inpkts++; |
| @@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
| 123 | s->ustats.inbytes += skb->len; | 124 | s->ustats.inbytes += skb->len; |
| 124 | u64_stats_update_end(&s->syncp); | 125 | u64_stats_update_end(&s->syncp); |
| 125 | 126 | ||
| 126 | s = this_cpu_ptr(dest->svc->stats.cpustats); | 127 | rcu_read_lock(); |
| 128 | svc = rcu_dereference(dest->svc); | ||
| 129 | s = this_cpu_ptr(svc->stats.cpustats); | ||
| 127 | s->ustats.inpkts++; | 130 | s->ustats.inpkts++; |
| 128 | u64_stats_update_begin(&s->syncp); | 131 | u64_stats_update_begin(&s->syncp); |
| 129 | s->ustats.inbytes += skb->len; | 132 | s->ustats.inbytes += skb->len; |
| 130 | u64_stats_update_end(&s->syncp); | 133 | u64_stats_update_end(&s->syncp); |
| 134 | rcu_read_unlock(); | ||
| 131 | 135 | ||
| 132 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); | 136 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); |
| 133 | s->ustats.inpkts++; | 137 | s->ustats.inpkts++; |
| @@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
| 146 | 150 | ||
| 147 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 151 | if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
| 148 | struct ip_vs_cpu_stats *s; | 152 | struct ip_vs_cpu_stats *s; |
| 153 | struct ip_vs_service *svc; | ||
| 149 | 154 | ||
| 150 | s = this_cpu_ptr(dest->stats.cpustats); | 155 | s = this_cpu_ptr(dest->stats.cpustats); |
| 151 | s->ustats.outpkts++; | 156 | s->ustats.outpkts++; |
| @@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) | |||
| 153 | s->ustats.outbytes += skb->len; | 158 | s->ustats.outbytes += skb->len; |
| 154 | u64_stats_update_end(&s->syncp); | 159 | u64_stats_update_end(&s->syncp); |
| 155 | 160 | ||
| 156 | s = this_cpu_ptr(dest->svc->stats.cpustats); | 161 | rcu_read_lock(); |
| 162 | svc = rcu_dereference(dest->svc); | ||
| 163 | s = this_cpu_ptr(svc->stats.cpustats); | ||
| 157 | s->ustats.outpkts++; | 164 | s->ustats.outpkts++; |
| 158 | u64_stats_update_begin(&s->syncp); | 165 | u64_stats_update_begin(&s->syncp); |
| 159 | s->ustats.outbytes += skb->len; | 166 | s->ustats.outbytes += skb->len; |
| 160 | u64_stats_update_end(&s->syncp); | 167 | u64_stats_update_end(&s->syncp); |
| 168 | rcu_read_unlock(); | ||
| 161 | 169 | ||
| 162 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); | 170 | s = this_cpu_ptr(ipvs->tot_stats.cpustats); |
| 163 | s->ustats.outpkts++; | 171 | s->ustats.outpkts++; |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c8148e487386..a3df9bddc4f7 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
| @@ -460,7 +460,7 @@ static inline void | |||
| 460 | __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) | 460 | __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) |
| 461 | { | 461 | { |
| 462 | atomic_inc(&svc->refcnt); | 462 | atomic_inc(&svc->refcnt); |
| 463 | dest->svc = svc; | 463 | rcu_assign_pointer(dest->svc, svc); |
| 464 | } | 464 | } |
| 465 | 465 | ||
| 466 | static void ip_vs_service_free(struct ip_vs_service *svc) | 466 | static void ip_vs_service_free(struct ip_vs_service *svc) |
| @@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc) | |||
| 470 | kfree(svc); | 470 | kfree(svc); |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | static void | 473 | static void ip_vs_service_rcu_free(struct rcu_head *head) |
| 474 | __ip_vs_unbind_svc(struct ip_vs_dest *dest) | ||
| 475 | { | 474 | { |
| 476 | struct ip_vs_service *svc = dest->svc; | 475 | struct ip_vs_service *svc; |
| 476 | |||
| 477 | svc = container_of(head, struct ip_vs_service, rcu_head); | ||
| 478 | ip_vs_service_free(svc); | ||
| 479 | } | ||
| 477 | 480 | ||
| 478 | dest->svc = NULL; | 481 | static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) |
| 482 | { | ||
| 479 | if (atomic_dec_and_test(&svc->refcnt)) { | 483 | if (atomic_dec_and_test(&svc->refcnt)) { |
| 480 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", | 484 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", |
| 481 | svc->fwmark, | 485 | svc->fwmark, |
| 482 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | 486 | IP_VS_DBG_ADDR(svc->af, &svc->addr), |
| 483 | ntohs(svc->port)); | 487 | ntohs(svc->port)); |
| 484 | ip_vs_service_free(svc); | 488 | if (do_delay) |
| 489 | call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); | ||
| 490 | else | ||
| 491 | ip_vs_service_free(svc); | ||
| 485 | } | 492 | } |
| 486 | } | 493 | } |
| 487 | 494 | ||
| @@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, | |||
| 667 | IP_VS_DBG_ADDR(svc->af, &dest->addr), | 674 | IP_VS_DBG_ADDR(svc->af, &dest->addr), |
| 668 | ntohs(dest->port), | 675 | ntohs(dest->port), |
| 669 | atomic_read(&dest->refcnt)); | 676 | atomic_read(&dest->refcnt)); |
| 670 | /* We can not reuse dest while in grace period | ||
| 671 | * because conns still can use dest->svc | ||
| 672 | */ | ||
| 673 | if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) | ||
| 674 | continue; | ||
| 675 | if (dest->af == svc->af && | 677 | if (dest->af == svc->af && |
| 676 | ip_vs_addr_equal(svc->af, &dest->addr, daddr) && | 678 | ip_vs_addr_equal(svc->af, &dest->addr, daddr) && |
| 677 | dest->port == dport && | 679 | dest->port == dport && |
| @@ -697,8 +699,10 @@ out: | |||
| 697 | 699 | ||
| 698 | static void ip_vs_dest_free(struct ip_vs_dest *dest) | 700 | static void ip_vs_dest_free(struct ip_vs_dest *dest) |
| 699 | { | 701 | { |
| 702 | struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); | ||
| 703 | |||
| 700 | __ip_vs_dst_cache_reset(dest); | 704 | __ip_vs_dst_cache_reset(dest); |
| 701 | __ip_vs_unbind_svc(dest); | 705 | __ip_vs_svc_put(svc, false); |
| 702 | free_percpu(dest->stats.cpustats); | 706 | free_percpu(dest->stats.cpustats); |
| 703 | kfree(dest); | 707 | kfree(dest); |
| 704 | } | 708 | } |
| @@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
| 771 | struct ip_vs_dest_user_kern *udest, int add) | 775 | struct ip_vs_dest_user_kern *udest, int add) |
| 772 | { | 776 | { |
| 773 | struct netns_ipvs *ipvs = net_ipvs(svc->net); | 777 | struct netns_ipvs *ipvs = net_ipvs(svc->net); |
| 778 | struct ip_vs_service *old_svc; | ||
| 774 | struct ip_vs_scheduler *sched; | 779 | struct ip_vs_scheduler *sched; |
| 775 | int conn_flags; | 780 | int conn_flags; |
| 776 | 781 | ||
| @@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, | |||
| 792 | atomic_set(&dest->conn_flags, conn_flags); | 797 | atomic_set(&dest->conn_flags, conn_flags); |
| 793 | 798 | ||
| 794 | /* bind the service */ | 799 | /* bind the service */ |
| 795 | if (!dest->svc) { | 800 | old_svc = rcu_dereference_protected(dest->svc, 1); |
| 801 | if (!old_svc) { | ||
| 796 | __ip_vs_bind_svc(dest, svc); | 802 | __ip_vs_bind_svc(dest, svc); |
| 797 | } else { | 803 | } else { |
| 798 | if (dest->svc != svc) { | 804 | if (old_svc != svc) { |
| 799 | __ip_vs_unbind_svc(dest); | ||
| 800 | ip_vs_zero_stats(&dest->stats); | 805 | ip_vs_zero_stats(&dest->stats); |
| 801 | __ip_vs_bind_svc(dest, svc); | 806 | __ip_vs_bind_svc(dest, svc); |
| 807 | __ip_vs_svc_put(old_svc, true); | ||
| 802 | } | 808 | } |
| 803 | } | 809 | } |
| 804 | 810 | ||
| @@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) | |||
| 998 | return 0; | 1004 | return 0; |
| 999 | } | 1005 | } |
| 1000 | 1006 | ||
| 1001 | static void ip_vs_dest_wait_readers(struct rcu_head *head) | ||
| 1002 | { | ||
| 1003 | struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, | ||
| 1004 | rcu_head); | ||
| 1005 | |||
| 1006 | /* End of grace period after unlinking */ | ||
| 1007 | clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | |||
| 1011 | /* | 1007 | /* |
| 1012 | * Delete a destination (must be already unlinked from the service) | 1008 | * Delete a destination (must be already unlinked from the service) |
| 1013 | */ | 1009 | */ |
| @@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, | |||
| 1023 | */ | 1019 | */ |
| 1024 | ip_vs_rs_unhash(dest); | 1020 | ip_vs_rs_unhash(dest); |
| 1025 | 1021 | ||
| 1026 | if (!cleanup) { | ||
| 1027 | set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); | ||
| 1028 | call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | spin_lock_bh(&ipvs->dest_trash_lock); | 1022 | spin_lock_bh(&ipvs->dest_trash_lock); |
| 1032 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", | 1023 | IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", |
| 1033 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), | 1024 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), |
| 1034 | atomic_read(&dest->refcnt)); | 1025 | atomic_read(&dest->refcnt)); |
| 1035 | if (list_empty(&ipvs->dest_trash) && !cleanup) | 1026 | if (list_empty(&ipvs->dest_trash) && !cleanup) |
| 1036 | mod_timer(&ipvs->dest_trash_timer, | 1027 | mod_timer(&ipvs->dest_trash_timer, |
| 1037 | jiffies + IP_VS_DEST_TRASH_PERIOD); | 1028 | jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); |
| 1038 | /* dest lives in trash without reference */ | 1029 | /* dest lives in trash without reference */ |
| 1039 | list_add(&dest->t_list, &ipvs->dest_trash); | 1030 | list_add(&dest->t_list, &ipvs->dest_trash); |
| 1031 | dest->idle_start = 0; | ||
| 1040 | spin_unlock_bh(&ipvs->dest_trash_lock); | 1032 | spin_unlock_bh(&ipvs->dest_trash_lock); |
| 1041 | ip_vs_dest_put(dest); | 1033 | ip_vs_dest_put(dest); |
| 1042 | } | 1034 | } |
| @@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data) | |||
| 1108 | struct net *net = (struct net *) data; | 1100 | struct net *net = (struct net *) data; |
| 1109 | struct netns_ipvs *ipvs = net_ipvs(net); | 1101 | struct netns_ipvs *ipvs = net_ipvs(net); |
| 1110 | struct ip_vs_dest *dest, *next; | 1102 | struct ip_vs_dest *dest, *next; |
| 1103 | unsigned long now = jiffies; | ||
| 1111 | 1104 | ||
| 1112 | spin_lock(&ipvs->dest_trash_lock); | 1105 | spin_lock(&ipvs->dest_trash_lock); |
| 1113 | list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { | 1106 | list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { |
| 1114 | /* Skip if dest is in grace period */ | ||
| 1115 | if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) | ||
| 1116 | continue; | ||
| 1117 | if (atomic_read(&dest->refcnt) > 0) | 1107 | if (atomic_read(&dest->refcnt) > 0) |
| 1118 | continue; | 1108 | continue; |
| 1109 | if (dest->idle_start) { | ||
| 1110 | if (time_before(now, dest->idle_start + | ||
| 1111 | IP_VS_DEST_TRASH_PERIOD)) | ||
| 1112 | continue; | ||
| 1113 | } else { | ||
| 1114 | dest->idle_start = max(1UL, now); | ||
| 1115 | continue; | ||
| 1116 | } | ||
| 1119 | IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", | 1117 | IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", |
| 1120 | dest->vfwmark, | 1118 | dest->vfwmark, |
| 1121 | IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), | 1119 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
| 1122 | ntohs(dest->port)); | 1120 | ntohs(dest->port)); |
| 1123 | list_del(&dest->t_list); | 1121 | list_del(&dest->t_list); |
| 1124 | ip_vs_dest_free(dest); | 1122 | ip_vs_dest_free(dest); |
| 1125 | } | 1123 | } |
| 1126 | if (!list_empty(&ipvs->dest_trash)) | 1124 | if (!list_empty(&ipvs->dest_trash)) |
| 1127 | mod_timer(&ipvs->dest_trash_timer, | 1125 | mod_timer(&ipvs->dest_trash_timer, |
| 1128 | jiffies + IP_VS_DEST_TRASH_PERIOD); | 1126 | jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); |
| 1129 | spin_unlock(&ipvs->dest_trash_lock); | 1127 | spin_unlock(&ipvs->dest_trash_lock); |
| 1130 | } | 1128 | } |
| 1131 | 1129 | ||
| @@ -1320,14 +1318,6 @@ out: | |||
| 1320 | return ret; | 1318 | return ret; |
| 1321 | } | 1319 | } |
| 1322 | 1320 | ||
| 1323 | static void ip_vs_service_rcu_free(struct rcu_head *head) | ||
| 1324 | { | ||
| 1325 | struct ip_vs_service *svc; | ||
| 1326 | |||
| 1327 | svc = container_of(head, struct ip_vs_service, rcu_head); | ||
| 1328 | ip_vs_service_free(svc); | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | /* | 1321 | /* |
| 1332 | * Delete a service from the service list | 1322 | * Delete a service from the service list |
| 1333 | * - The service must be unlinked, unlocked and not referenced! | 1323 | * - The service must be unlinked, unlocked and not referenced! |
| @@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) | |||
| 1376 | /* | 1366 | /* |
| 1377 | * Free the service if nobody refers to it | 1367 | * Free the service if nobody refers to it |
| 1378 | */ | 1368 | */ |
| 1379 | if (atomic_dec_and_test(&svc->refcnt)) { | 1369 | __ip_vs_svc_put(svc, true); |
| 1380 | IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", | ||
| 1381 | svc->fwmark, | ||
| 1382 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | ||
| 1383 | ntohs(svc->port)); | ||
| 1384 | call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); | ||
| 1385 | } | ||
| 1386 | 1370 | ||
| 1387 | /* decrease the module use count */ | 1371 | /* decrease the module use count */ |
| 1388 | ip_vs_use_count_dec(); | 1372 | ip_vs_use_count_dec(); |
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 6bee6d0c73a5..1425e9a924c4 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c | |||
| @@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | |||
| 59 | struct ip_vs_cpu_stats __percpu *stats) | 59 | struct ip_vs_cpu_stats __percpu *stats) |
| 60 | { | 60 | { |
| 61 | int i; | 61 | int i; |
| 62 | bool add = false; | ||
| 62 | 63 | ||
| 63 | for_each_possible_cpu(i) { | 64 | for_each_possible_cpu(i) { |
| 64 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); | 65 | struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); |
| 65 | unsigned int start; | 66 | unsigned int start; |
| 66 | __u64 inbytes, outbytes; | 67 | __u64 inbytes, outbytes; |
| 67 | if (i) { | 68 | if (add) { |
| 68 | sum->conns += s->ustats.conns; | 69 | sum->conns += s->ustats.conns; |
| 69 | sum->inpkts += s->ustats.inpkts; | 70 | sum->inpkts += s->ustats.inpkts; |
| 70 | sum->outpkts += s->ustats.outpkts; | 71 | sum->outpkts += s->ustats.outpkts; |
| @@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, | |||
| 76 | sum->inbytes += inbytes; | 77 | sum->inbytes += inbytes; |
| 77 | sum->outbytes += outbytes; | 78 | sum->outbytes += outbytes; |
| 78 | } else { | 79 | } else { |
| 80 | add = true; | ||
| 79 | sum->conns = s->ustats.conns; | 81 | sum->conns = s->ustats.conns; |
| 80 | sum->inpkts = s->ustats.inpkts; | 82 | sum->inpkts = s->ustats.inpkts; |
| 81 | sum->outpkts = s->ustats.outpkts; | 83 | sum->outpkts = s->ustats.outpkts; |
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1383b0eadc0e..eff13c94498e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c | |||
| @@ -93,7 +93,7 @@ struct ip_vs_lblc_entry { | |||
| 93 | struct hlist_node list; | 93 | struct hlist_node list; |
| 94 | int af; /* address family */ | 94 | int af; /* address family */ |
| 95 | union nf_inet_addr addr; /* destination IP address */ | 95 | union nf_inet_addr addr; /* destination IP address */ |
| 96 | struct ip_vs_dest __rcu *dest; /* real server (cache) */ | 96 | struct ip_vs_dest *dest; /* real server (cache) */ |
| 97 | unsigned long lastuse; /* last used time */ | 97 | unsigned long lastuse; /* last used time */ |
| 98 | struct rcu_head rcu_head; | 98 | struct rcu_head rcu_head; |
| 99 | }; | 99 | }; |
| @@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = { | |||
| 130 | }; | 130 | }; |
| 131 | #endif | 131 | #endif |
| 132 | 132 | ||
| 133 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 133 | static void ip_vs_lblc_rcu_free(struct rcu_head *head) |
| 134 | { | 134 | { |
| 135 | struct ip_vs_dest *dest; | 135 | struct ip_vs_lblc_entry *en = container_of(head, |
| 136 | struct ip_vs_lblc_entry, | ||
| 137 | rcu_head); | ||
| 136 | 138 | ||
| 137 | hlist_del_rcu(&en->list); | 139 | ip_vs_dest_put(en->dest); |
| 138 | /* | 140 | kfree(en); |
| 139 | * We don't kfree dest because it is referred either by its service | ||
| 140 | * or the trash dest list. | ||
| 141 | */ | ||
| 142 | dest = rcu_dereference_protected(en->dest, 1); | ||
| 143 | ip_vs_dest_put(dest); | ||
| 144 | kfree_rcu(en, rcu_head); | ||
| 145 | } | 141 | } |
| 146 | 142 | ||
| 143 | static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en) | ||
| 144 | { | ||
| 145 | hlist_del_rcu(&en->list); | ||
| 146 | call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free); | ||
| 147 | } | ||
| 147 | 148 | ||
| 148 | /* | 149 | /* |
| 149 | * Returns hash value for IPVS LBLC entry | 150 | * Returns hash value for IPVS LBLC entry |
| @@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, | |||
| 203 | struct ip_vs_lblc_entry *en; | 204 | struct ip_vs_lblc_entry *en; |
| 204 | 205 | ||
| 205 | en = ip_vs_lblc_get(dest->af, tbl, daddr); | 206 | en = ip_vs_lblc_get(dest->af, tbl, daddr); |
| 206 | if (!en) { | 207 | if (en) { |
| 207 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | 208 | if (en->dest == dest) |
| 208 | if (!en) | 209 | return en; |
| 209 | return NULL; | 210 | ip_vs_lblc_del(en); |
| 210 | 211 | } | |
| 211 | en->af = dest->af; | 212 | en = kmalloc(sizeof(*en), GFP_ATOMIC); |
| 212 | ip_vs_addr_copy(dest->af, &en->addr, daddr); | 213 | if (!en) |
| 213 | en->lastuse = jiffies; | 214 | return NULL; |
| 214 | 215 | ||
| 215 | ip_vs_dest_hold(dest); | 216 | en->af = dest->af; |
| 216 | RCU_INIT_POINTER(en->dest, dest); | 217 | ip_vs_addr_copy(dest->af, &en->addr, daddr); |
| 218 | en->lastuse = jiffies; | ||
| 217 | 219 | ||
| 218 | ip_vs_lblc_hash(tbl, en); | 220 | ip_vs_dest_hold(dest); |
| 219 | } else { | 221 | en->dest = dest; |
| 220 | struct ip_vs_dest *old_dest; | ||
| 221 | 222 | ||
| 222 | old_dest = rcu_dereference_protected(en->dest, 1); | 223 | ip_vs_lblc_hash(tbl, en); |
| 223 | if (old_dest != dest) { | ||
| 224 | ip_vs_dest_put(old_dest); | ||
| 225 | ip_vs_dest_hold(dest); | ||
| 226 | /* No ordering constraints for refcnt */ | ||
| 227 | RCU_INIT_POINTER(en->dest, dest); | ||
| 228 | } | ||
| 229 | } | ||
| 230 | 224 | ||
| 231 | return en; | 225 | return en; |
| 232 | } | 226 | } |
| @@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) | |||
| 246 | tbl->dead = 1; | 240 | tbl->dead = 1; |
| 247 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 241 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
| 248 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { | 242 | hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) { |
| 249 | ip_vs_lblc_free(en); | 243 | ip_vs_lblc_del(en); |
| 250 | atomic_dec(&tbl->entries); | 244 | atomic_dec(&tbl->entries); |
| 251 | } | 245 | } |
| 252 | } | 246 | } |
| @@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) | |||
| 281 | sysctl_lblc_expiration(svc))) | 275 | sysctl_lblc_expiration(svc))) |
| 282 | continue; | 276 | continue; |
| 283 | 277 | ||
| 284 | ip_vs_lblc_free(en); | 278 | ip_vs_lblc_del(en); |
| 285 | atomic_dec(&tbl->entries); | 279 | atomic_dec(&tbl->entries); |
| 286 | } | 280 | } |
| 287 | spin_unlock(&svc->sched_lock); | 281 | spin_unlock(&svc->sched_lock); |
| @@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
| 335 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) | 329 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) |
| 336 | continue; | 330 | continue; |
| 337 | 331 | ||
| 338 | ip_vs_lblc_free(en); | 332 | ip_vs_lblc_del(en); |
| 339 | atomic_dec(&tbl->entries); | 333 | atomic_dec(&tbl->entries); |
| 340 | goal--; | 334 | goal--; |
| 341 | } | 335 | } |
| @@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc) | |||
| 443 | continue; | 437 | continue; |
| 444 | 438 | ||
| 445 | doh = ip_vs_dest_conn_overhead(dest); | 439 | doh = ip_vs_dest_conn_overhead(dest); |
| 446 | if (loh * atomic_read(&dest->weight) > | 440 | if ((__s64)loh * atomic_read(&dest->weight) > |
| 447 | doh * atomic_read(&least->weight)) { | 441 | (__s64)doh * atomic_read(&least->weight)) { |
| 448 | least = dest; | 442 | least = dest; |
| 449 | loh = doh; | 443 | loh = doh; |
| 450 | } | 444 | } |
| @@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 511 | * free up entries from the trash at any time. | 505 | * free up entries from the trash at any time. |
| 512 | */ | 506 | */ |
| 513 | 507 | ||
| 514 | dest = rcu_dereference(en->dest); | 508 | dest = en->dest; |
| 515 | if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && | 509 | if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && |
| 516 | atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) | 510 | atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) |
| 517 | goto out; | 511 | goto out; |
| @@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void) | |||
| 631 | { | 625 | { |
| 632 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); | 626 | unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); |
| 633 | unregister_pernet_subsys(&ip_vs_lblc_ops); | 627 | unregister_pernet_subsys(&ip_vs_lblc_ops); |
| 634 | synchronize_rcu(); | 628 | rcu_barrier(); |
| 635 | } | 629 | } |
| 636 | 630 | ||
| 637 | 631 | ||
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 5199448697f6..0b8550089a2e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
| @@ -89,7 +89,7 @@ | |||
| 89 | */ | 89 | */ |
| 90 | struct ip_vs_dest_set_elem { | 90 | struct ip_vs_dest_set_elem { |
| 91 | struct list_head list; /* list link */ | 91 | struct list_head list; /* list link */ |
| 92 | struct ip_vs_dest __rcu *dest; /* destination server */ | 92 | struct ip_vs_dest *dest; /* destination server */ |
| 93 | struct rcu_head rcu_head; | 93 | struct rcu_head rcu_head; |
| 94 | }; | 94 | }; |
| 95 | 95 | ||
| @@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
| 107 | 107 | ||
| 108 | if (check) { | 108 | if (check) { |
| 109 | list_for_each_entry(e, &set->list, list) { | 109 | list_for_each_entry(e, &set->list, list) { |
| 110 | struct ip_vs_dest *d; | 110 | if (e->dest == dest) |
| 111 | |||
| 112 | d = rcu_dereference_protected(e->dest, 1); | ||
| 113 | if (d == dest) | ||
| 114 | /* already existed */ | ||
| 115 | return; | 111 | return; |
| 116 | } | 112 | } |
| 117 | } | 113 | } |
| @@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
| 121 | return; | 117 | return; |
| 122 | 118 | ||
| 123 | ip_vs_dest_hold(dest); | 119 | ip_vs_dest_hold(dest); |
| 124 | RCU_INIT_POINTER(e->dest, dest); | 120 | e->dest = dest; |
| 125 | 121 | ||
| 126 | list_add_rcu(&e->list, &set->list); | 122 | list_add_rcu(&e->list, &set->list); |
| 127 | atomic_inc(&set->size); | 123 | atomic_inc(&set->size); |
| @@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
| 129 | set->lastmod = jiffies; | 125 | set->lastmod = jiffies; |
| 130 | } | 126 | } |
| 131 | 127 | ||
| 128 | static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) | ||
| 129 | { | ||
| 130 | struct ip_vs_dest_set_elem *e; | ||
| 131 | |||
| 132 | e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); | ||
| 133 | ip_vs_dest_put(e->dest); | ||
| 134 | kfree(e); | ||
| 135 | } | ||
| 136 | |||
| 132 | static void | 137 | static void |
| 133 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | 138 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) |
| 134 | { | 139 | { |
| 135 | struct ip_vs_dest_set_elem *e; | 140 | struct ip_vs_dest_set_elem *e; |
| 136 | 141 | ||
| 137 | list_for_each_entry(e, &set->list, list) { | 142 | list_for_each_entry(e, &set->list, list) { |
| 138 | struct ip_vs_dest *d; | 143 | if (e->dest == dest) { |
| 139 | |||
| 140 | d = rcu_dereference_protected(e->dest, 1); | ||
| 141 | if (d == dest) { | ||
| 142 | /* HIT */ | 144 | /* HIT */ |
| 143 | atomic_dec(&set->size); | 145 | atomic_dec(&set->size); |
| 144 | set->lastmod = jiffies; | 146 | set->lastmod = jiffies; |
| 145 | ip_vs_dest_put(dest); | ||
| 146 | list_del_rcu(&e->list); | 147 | list_del_rcu(&e->list); |
| 147 | kfree_rcu(e, rcu_head); | 148 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
| 148 | break; | 149 | break; |
| 149 | } | 150 | } |
| 150 | } | 151 | } |
| @@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) | |||
| 155 | struct ip_vs_dest_set_elem *e, *ep; | 156 | struct ip_vs_dest_set_elem *e, *ep; |
| 156 | 157 | ||
| 157 | list_for_each_entry_safe(e, ep, &set->list, list) { | 158 | list_for_each_entry_safe(e, ep, &set->list, list) { |
| 158 | struct ip_vs_dest *d; | ||
| 159 | |||
| 160 | d = rcu_dereference_protected(e->dest, 1); | ||
| 161 | /* | ||
| 162 | * We don't kfree dest because it is referred either | ||
| 163 | * by its service or by the trash dest list. | ||
| 164 | */ | ||
| 165 | ip_vs_dest_put(d); | ||
| 166 | list_del_rcu(&e->list); | 159 | list_del_rcu(&e->list); |
| 167 | kfree_rcu(e, rcu_head); | 160 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
| 168 | } | 161 | } |
| 169 | } | 162 | } |
| 170 | 163 | ||
| @@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
| 175 | struct ip_vs_dest *dest, *least; | 168 | struct ip_vs_dest *dest, *least; |
| 176 | int loh, doh; | 169 | int loh, doh; |
| 177 | 170 | ||
| 178 | if (set == NULL) | ||
| 179 | return NULL; | ||
| 180 | |||
| 181 | /* select the first destination server, whose weight > 0 */ | 171 | /* select the first destination server, whose weight > 0 */ |
| 182 | list_for_each_entry_rcu(e, &set->list, list) { | 172 | list_for_each_entry_rcu(e, &set->list, list) { |
| 183 | least = rcu_dereference(e->dest); | 173 | least = e->dest; |
| 184 | if (least->flags & IP_VS_DEST_F_OVERLOAD) | 174 | if (least->flags & IP_VS_DEST_F_OVERLOAD) |
| 185 | continue; | 175 | continue; |
| 186 | 176 | ||
| @@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
| 195 | /* find the destination with the weighted least load */ | 185 | /* find the destination with the weighted least load */ |
| 196 | nextstage: | 186 | nextstage: |
| 197 | list_for_each_entry_continue_rcu(e, &set->list, list) { | 187 | list_for_each_entry_continue_rcu(e, &set->list, list) { |
| 198 | dest = rcu_dereference(e->dest); | 188 | dest = e->dest; |
| 199 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 189 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
| 200 | continue; | 190 | continue; |
| 201 | 191 | ||
| 202 | doh = ip_vs_dest_conn_overhead(dest); | 192 | doh = ip_vs_dest_conn_overhead(dest); |
| 203 | if ((loh * atomic_read(&dest->weight) > | 193 | if (((__s64)loh * atomic_read(&dest->weight) > |
| 204 | doh * atomic_read(&least->weight)) | 194 | (__s64)doh * atomic_read(&least->weight)) |
| 205 | && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { | 195 | && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { |
| 206 | least = dest; | 196 | least = dest; |
| 207 | loh = doh; | 197 | loh = doh; |
| @@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
| 232 | 222 | ||
| 233 | /* select the first destination server, whose weight > 0 */ | 223 | /* select the first destination server, whose weight > 0 */ |
| 234 | list_for_each_entry(e, &set->list, list) { | 224 | list_for_each_entry(e, &set->list, list) { |
| 235 | most = rcu_dereference_protected(e->dest, 1); | 225 | most = e->dest; |
| 236 | if (atomic_read(&most->weight) > 0) { | 226 | if (atomic_read(&most->weight) > 0) { |
| 237 | moh = ip_vs_dest_conn_overhead(most); | 227 | moh = ip_vs_dest_conn_overhead(most); |
| 238 | goto nextstage; | 228 | goto nextstage; |
| @@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
| 243 | /* find the destination with the weighted most load */ | 233 | /* find the destination with the weighted most load */ |
| 244 | nextstage: | 234 | nextstage: |
| 245 | list_for_each_entry_continue(e, &set->list, list) { | 235 | list_for_each_entry_continue(e, &set->list, list) { |
| 246 | dest = rcu_dereference_protected(e->dest, 1); | 236 | dest = e->dest; |
| 247 | doh = ip_vs_dest_conn_overhead(dest); | 237 | doh = ip_vs_dest_conn_overhead(dest); |
| 248 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ | 238 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ |
| 249 | if ((moh * atomic_read(&dest->weight) < | 239 | if (((__s64)moh * atomic_read(&dest->weight) < |
| 250 | doh * atomic_read(&most->weight)) | 240 | (__s64)doh * atomic_read(&most->weight)) |
| 251 | && (atomic_read(&dest->weight) > 0)) { | 241 | && (atomic_read(&dest->weight) > 0)) { |
| 252 | most = dest; | 242 | most = dest; |
| 253 | moh = doh; | 243 | moh = doh; |
| @@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc) | |||
| 611 | continue; | 601 | continue; |
| 612 | 602 | ||
| 613 | doh = ip_vs_dest_conn_overhead(dest); | 603 | doh = ip_vs_dest_conn_overhead(dest); |
| 614 | if (loh * atomic_read(&dest->weight) > | 604 | if ((__s64)loh * atomic_read(&dest->weight) > |
| 615 | doh * atomic_read(&least->weight)) { | 605 | (__s64)doh * atomic_read(&least->weight)) { |
| 616 | least = dest; | 606 | least = dest; |
| 617 | loh = doh; | 607 | loh = doh; |
| 618 | } | 608 | } |
| @@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void) | |||
| 819 | { | 809 | { |
| 820 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 810 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
| 821 | unregister_pernet_subsys(&ip_vs_lblcr_ops); | 811 | unregister_pernet_subsys(&ip_vs_lblcr_ops); |
| 822 | synchronize_rcu(); | 812 | rcu_barrier(); |
| 823 | } | 813 | } |
| 824 | 814 | ||
| 825 | 815 | ||
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index d8d9860934fe..961a6de9bb29 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | #include <net/ip_vs.h> | 40 | #include <net/ip_vs.h> |
| 41 | 41 | ||
| 42 | 42 | ||
| 43 | static inline unsigned int | 43 | static inline int |
| 44 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) | 44 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) |
| 45 | { | 45 | { |
| 46 | /* | 46 | /* |
| @@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 59 | struct ip_vs_iphdr *iph) | 59 | struct ip_vs_iphdr *iph) |
| 60 | { | 60 | { |
| 61 | struct ip_vs_dest *dest, *least = NULL; | 61 | struct ip_vs_dest *dest, *least = NULL; |
| 62 | unsigned int loh = 0, doh; | 62 | int loh = 0, doh; |
| 63 | 63 | ||
| 64 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); | 64 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); |
| 65 | 65 | ||
| @@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | if (!least || | 94 | if (!least || |
| 95 | (loh * atomic_read(&dest->weight) > | 95 | ((__s64)loh * atomic_read(&dest->weight) > |
| 96 | doh * atomic_read(&least->weight))) { | 96 | (__s64)doh * atomic_read(&least->weight))) { |
| 97 | least = dest; | 97 | least = dest; |
| 98 | loh = doh; | 98 | loh = doh; |
| 99 | } | 99 | } |
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index a5284cc3d882..e446b9fa7424 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c | |||
| @@ -44,7 +44,7 @@ | |||
| 44 | #include <net/ip_vs.h> | 44 | #include <net/ip_vs.h> |
| 45 | 45 | ||
| 46 | 46 | ||
| 47 | static inline unsigned int | 47 | static inline int |
| 48 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | 48 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) |
| 49 | { | 49 | { |
| 50 | /* | 50 | /* |
| @@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 63 | struct ip_vs_iphdr *iph) | 63 | struct ip_vs_iphdr *iph) |
| 64 | { | 64 | { |
| 65 | struct ip_vs_dest *dest, *least; | 65 | struct ip_vs_dest *dest, *least; |
| 66 | unsigned int loh, doh; | 66 | int loh, doh; |
| 67 | 67 | ||
| 68 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); | 68 | IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); |
| 69 | 69 | ||
| @@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 99 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 99 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
| 100 | continue; | 100 | continue; |
| 101 | doh = ip_vs_sed_dest_overhead(dest); | 101 | doh = ip_vs_sed_dest_overhead(dest); |
| 102 | if (loh * atomic_read(&dest->weight) > | 102 | if ((__s64)loh * atomic_read(&dest->weight) > |
| 103 | doh * atomic_read(&least->weight)) { | 103 | (__s64)doh * atomic_read(&least->weight)) { |
| 104 | least = dest; | 104 | least = dest; |
| 105 | loh = doh; | 105 | loh = doh; |
| 106 | } | 106 | } |
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 6dc1fa128840..b5b4650d50a9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c | |||
| @@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 35 | struct ip_vs_iphdr *iph) | 35 | struct ip_vs_iphdr *iph) |
| 36 | { | 36 | { |
| 37 | struct ip_vs_dest *dest, *least; | 37 | struct ip_vs_dest *dest, *least; |
| 38 | unsigned int loh, doh; | 38 | int loh, doh; |
| 39 | 39 | ||
| 40 | IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); | 40 | IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); |
| 41 | 41 | ||
| @@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | |||
| 71 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 71 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
| 72 | continue; | 72 | continue; |
| 73 | doh = ip_vs_dest_conn_overhead(dest); | 73 | doh = ip_vs_dest_conn_overhead(dest); |
| 74 | if (loh * atomic_read(&dest->weight) > | 74 | if ((__s64)loh * atomic_read(&dest->weight) > |
| 75 | doh * atomic_read(&least->weight)) { | 75 | (__s64)doh * atomic_read(&least->weight)) { |
| 76 | least = dest; | 76 | least = dest; |
| 77 | loh = doh; | 77 | loh = doh; |
| 78 | } | 78 | } |
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 6fd967c6278c..cdf4567ba9b3 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | int synproxy_net_id; | 24 | int synproxy_net_id; |
| 25 | EXPORT_SYMBOL_GPL(synproxy_net_id); | 25 | EXPORT_SYMBOL_GPL(synproxy_net_id); |
| 26 | 26 | ||
| 27 | void | 27 | bool |
| 28 | synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | 28 | synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, |
| 29 | const struct tcphdr *th, struct synproxy_options *opts) | 29 | const struct tcphdr *th, struct synproxy_options *opts) |
| 30 | { | 30 | { |
| @@ -32,7 +32,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
| 32 | u8 buf[40], *ptr; | 32 | u8 buf[40], *ptr; |
| 33 | 33 | ||
| 34 | ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); | 34 | ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); |
| 35 | BUG_ON(ptr == NULL); | 35 | if (ptr == NULL) |
| 36 | return false; | ||
| 36 | 37 | ||
| 37 | opts->options = 0; | 38 | opts->options = 0; |
| 38 | while (length > 0) { | 39 | while (length > 0) { |
| @@ -41,16 +42,16 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
| 41 | 42 | ||
| 42 | switch (opcode) { | 43 | switch (opcode) { |
| 43 | case TCPOPT_EOL: | 44 | case TCPOPT_EOL: |
| 44 | return; | 45 | return true; |
| 45 | case TCPOPT_NOP: | 46 | case TCPOPT_NOP: |
| 46 | length--; | 47 | length--; |
| 47 | continue; | 48 | continue; |
| 48 | default: | 49 | default: |
| 49 | opsize = *ptr++; | 50 | opsize = *ptr++; |
| 50 | if (opsize < 2) | 51 | if (opsize < 2) |
| 51 | return; | 52 | return true; |
| 52 | if (opsize > length) | 53 | if (opsize > length) |
| 53 | return; | 54 | return true; |
| 54 | 55 | ||
| 55 | switch (opcode) { | 56 | switch (opcode) { |
| 56 | case TCPOPT_MSS: | 57 | case TCPOPT_MSS: |
| @@ -84,6 +85,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, | |||
| 84 | length -= opsize; | 85 | length -= opsize; |
| 85 | } | 86 | } |
| 86 | } | 87 | } |
| 88 | return true; | ||
| 87 | } | 89 | } |
| 88 | EXPORT_SYMBOL_GPL(synproxy_parse_options); | 90 | EXPORT_SYMBOL_GPL(synproxy_parse_options); |
| 89 | 91 | ||
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015ee8ce..a2fef8b10b96 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c | |||
| @@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) | |||
| 285 | 285 | ||
| 286 | 286 | ||
| 287 | /* remove one skb from head of flow queue */ | 287 | /* remove one skb from head of flow queue */ |
| 288 | static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) | 288 | static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) |
| 289 | { | 289 | { |
| 290 | struct sk_buff *skb = flow->head; | 290 | struct sk_buff *skb = flow->head; |
| 291 | 291 | ||
| @@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) | |||
| 293 | flow->head = skb->next; | 293 | flow->head = skb->next; |
| 294 | skb->next = NULL; | 294 | skb->next = NULL; |
| 295 | flow->qlen--; | 295 | flow->qlen--; |
| 296 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
| 297 | sch->q.qlen--; | ||
| 296 | } | 298 | } |
| 297 | return skb; | 299 | return skb; |
| 298 | } | 300 | } |
| @@ -418,8 +420,9 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) | |||
| 418 | struct fq_flow_head *head; | 420 | struct fq_flow_head *head; |
| 419 | struct sk_buff *skb; | 421 | struct sk_buff *skb; |
| 420 | struct fq_flow *f; | 422 | struct fq_flow *f; |
| 423 | u32 rate; | ||
| 421 | 424 | ||
| 422 | skb = fq_dequeue_head(&q->internal); | 425 | skb = fq_dequeue_head(sch, &q->internal); |
| 423 | if (skb) | 426 | if (skb) |
| 424 | goto out; | 427 | goto out; |
| 425 | fq_check_throttled(q, now); | 428 | fq_check_throttled(q, now); |
| @@ -449,7 +452,7 @@ begin: | |||
| 449 | goto begin; | 452 | goto begin; |
| 450 | } | 453 | } |
| 451 | 454 | ||
| 452 | skb = fq_dequeue_head(f); | 455 | skb = fq_dequeue_head(sch, f); |
| 453 | if (!skb) { | 456 | if (!skb) { |
| 454 | head->first = f->next; | 457 | head->first = f->next; |
| 455 | /* force a pass through old_flows to prevent starvation */ | 458 | /* force a pass through old_flows to prevent starvation */ |
| @@ -466,43 +469,74 @@ begin: | |||
| 466 | f->time_next_packet = now; | 469 | f->time_next_packet = now; |
| 467 | f->credit -= qdisc_pkt_len(skb); | 470 | f->credit -= qdisc_pkt_len(skb); |
| 468 | 471 | ||
| 469 | if (f->credit <= 0 && | 472 | if (f->credit > 0 || !q->rate_enable) |
| 470 | q->rate_enable && | 473 | goto out; |
| 471 | skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { | ||
| 472 | u32 rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; | ||
| 473 | 474 | ||
| 474 | rate = min(rate, q->flow_max_rate); | 475 | if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT) { |
| 475 | if (rate) { | 476 | rate = skb->sk->sk_pacing_rate ?: q->flow_default_rate; |
| 476 | u64 len = (u64)qdisc_pkt_len(skb) * NSEC_PER_SEC; | ||
| 477 | |||
| 478 | do_div(len, rate); | ||
| 479 | /* Since socket rate can change later, | ||
| 480 | * clamp the delay to 125 ms. | ||
| 481 | * TODO: maybe segment the too big skb, as in commit | ||
| 482 | * e43ac79a4bc ("sch_tbf: segment too big GSO packets") | ||
| 483 | */ | ||
| 484 | if (unlikely(len > 125 * NSEC_PER_MSEC)) { | ||
| 485 | len = 125 * NSEC_PER_MSEC; | ||
| 486 | q->stat_pkts_too_long++; | ||
| 487 | } | ||
| 488 | 477 | ||
| 489 | f->time_next_packet = now + len; | 478 | rate = min(rate, q->flow_max_rate); |
| 479 | } else { | ||
| 480 | rate = q->flow_max_rate; | ||
| 481 | if (rate == ~0U) | ||
| 482 | goto out; | ||
| 483 | } | ||
| 484 | if (rate) { | ||
| 485 | u32 plen = max(qdisc_pkt_len(skb), q->quantum); | ||
| 486 | u64 len = (u64)plen * NSEC_PER_SEC; | ||
| 487 | |||
| 488 | do_div(len, rate); | ||
| 489 | /* Since socket rate can change later, | ||
| 490 | * clamp the delay to 125 ms. | ||
| 491 | * TODO: maybe segment the too big skb, as in commit | ||
| 492 | * e43ac79a4bc ("sch_tbf: segment too big GSO packets") | ||
| 493 | */ | ||
| 494 | if (unlikely(len > 125 * NSEC_PER_MSEC)) { | ||
| 495 | len = 125 * NSEC_PER_MSEC; | ||
| 496 | q->stat_pkts_too_long++; | ||
| 490 | } | 497 | } |
| 498 | |||
| 499 | f->time_next_packet = now + len; | ||
| 491 | } | 500 | } |
| 492 | out: | 501 | out: |
| 493 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
| 494 | qdisc_bstats_update(sch, skb); | 502 | qdisc_bstats_update(sch, skb); |
| 495 | sch->q.qlen--; | ||
| 496 | qdisc_unthrottled(sch); | 503 | qdisc_unthrottled(sch); |
| 497 | return skb; | 504 | return skb; |
| 498 | } | 505 | } |
| 499 | 506 | ||
| 500 | static void fq_reset(struct Qdisc *sch) | 507 | static void fq_reset(struct Qdisc *sch) |
| 501 | { | 508 | { |
| 509 | struct fq_sched_data *q = qdisc_priv(sch); | ||
| 510 | struct rb_root *root; | ||
| 502 | struct sk_buff *skb; | 511 | struct sk_buff *skb; |
| 512 | struct rb_node *p; | ||
| 513 | struct fq_flow *f; | ||
| 514 | unsigned int idx; | ||
| 503 | 515 | ||
| 504 | while ((skb = fq_dequeue(sch)) != NULL) | 516 | while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) |
| 505 | kfree_skb(skb); | 517 | kfree_skb(skb); |
| 518 | |||
| 519 | if (!q->fq_root) | ||
| 520 | return; | ||
| 521 | |||
| 522 | for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { | ||
| 523 | root = &q->fq_root[idx]; | ||
| 524 | while ((p = rb_first(root)) != NULL) { | ||
| 525 | f = container_of(p, struct fq_flow, fq_node); | ||
| 526 | rb_erase(p, root); | ||
| 527 | |||
| 528 | while ((skb = fq_dequeue_head(sch, f)) != NULL) | ||
| 529 | kfree_skb(skb); | ||
| 530 | |||
| 531 | kmem_cache_free(fq_flow_cachep, f); | ||
| 532 | } | ||
| 533 | } | ||
| 534 | q->new_flows.first = NULL; | ||
| 535 | q->old_flows.first = NULL; | ||
| 536 | q->delayed = RB_ROOT; | ||
| 537 | q->flows = 0; | ||
| 538 | q->inactive_flows = 0; | ||
| 539 | q->throttled_flows = 0; | ||
| 506 | } | 540 | } |
| 507 | 541 | ||
| 508 | static void fq_rehash(struct fq_sched_data *q, | 542 | static void fq_rehash(struct fq_sched_data *q, |
| @@ -645,6 +679,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) | |||
| 645 | while (sch->q.qlen > sch->limit) { | 679 | while (sch->q.qlen > sch->limit) { |
| 646 | struct sk_buff *skb = fq_dequeue(sch); | 680 | struct sk_buff *skb = fq_dequeue(sch); |
| 647 | 681 | ||
| 682 | if (!skb) | ||
| 683 | break; | ||
| 648 | kfree_skb(skb); | 684 | kfree_skb(skb); |
| 649 | drop_count++; | 685 | drop_count++; |
| 650 | } | 686 | } |
| @@ -657,21 +693,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) | |||
| 657 | static void fq_destroy(struct Qdisc *sch) | 693 | static void fq_destroy(struct Qdisc *sch) |
| 658 | { | 694 | { |
| 659 | struct fq_sched_data *q = qdisc_priv(sch); | 695 | struct fq_sched_data *q = qdisc_priv(sch); |
| 660 | struct rb_root *root; | ||
| 661 | struct rb_node *p; | ||
| 662 | unsigned int idx; | ||
| 663 | 696 | ||
| 664 | if (q->fq_root) { | 697 | fq_reset(sch); |
| 665 | for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { | 698 | kfree(q->fq_root); |
| 666 | root = &q->fq_root[idx]; | ||
| 667 | while ((p = rb_first(root)) != NULL) { | ||
| 668 | rb_erase(p, root); | ||
| 669 | kmem_cache_free(fq_flow_cachep, | ||
| 670 | container_of(p, struct fq_flow, fq_node)); | ||
| 671 | } | ||
| 672 | } | ||
| 673 | kfree(q->fq_root); | ||
| 674 | } | ||
| 675 | qdisc_watchdog_cancel(&q->watchdog); | 699 | qdisc_watchdog_cancel(&q->watchdog); |
| 676 | } | 700 | } |
| 677 | 701 | ||
