diff options
Diffstat (limited to 'net')
121 files changed, 2346 insertions, 1182 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 73a2a83ee2da..402442402af7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c | |||
| @@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, | |||
| 137 | return rc; | 137 | return rc; |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb) | ||
| 141 | { | ||
| 142 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
| 143 | if (vlan->netpoll) | ||
| 144 | netpoll_send_skb(vlan->netpoll, skb); | ||
| 145 | #else | ||
| 146 | BUG(); | ||
| 147 | #endif | ||
| 148 | return NETDEV_TX_OK; | ||
| 149 | } | ||
| 150 | |||
| 140 | static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, | 151 | static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, |
| 141 | struct net_device *dev) | 152 | struct net_device *dev) |
| 142 | { | 153 | { |
| 154 | struct vlan_dev_priv *vlan = vlan_dev_priv(dev); | ||
| 143 | struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); | 155 | struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); |
| 144 | unsigned int len; | 156 | unsigned int len; |
| 145 | int ret; | 157 | int ret; |
| @@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb, | |||
| 150 | * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... | 162 | * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... |
| 151 | */ | 163 | */ |
| 152 | if (veth->h_vlan_proto != htons(ETH_P_8021Q) || | 164 | if (veth->h_vlan_proto != htons(ETH_P_8021Q) || |
| 153 | vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) { | 165 | vlan->flags & VLAN_FLAG_REORDER_HDR) { |
| 154 | u16 vlan_tci; | 166 | u16 vlan_tci; |
| 155 | vlan_tci = vlan_dev_priv(dev)->vlan_id; | 167 | vlan_tci = vlan->vlan_id; |
| 156 | vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); | 168 | vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); |
| 157 | skb = __vlan_hwaccel_put_tag(skb, vlan_tci); | 169 | skb = __vlan_hwaccel_put_tag(skb, vlan_tci); |
| 158 | } | 170 | } |
| 159 | 171 | ||
| 160 | skb->dev = vlan_dev_priv(dev)->real_dev; | 172 | skb->dev = vlan->real_dev; |
| 161 | len = skb->len; | 173 | len = skb->len; |
| 162 | if (netpoll_tx_running(dev)) | 174 | if (unlikely(netpoll_tx_running(dev))) |
| 163 | return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); | 175 | return vlan_netpoll_send_skb(vlan, skb); |
| 176 | |||
| 164 | ret = dev_queue_xmit(skb); | 177 | ret = dev_queue_xmit(skb); |
| 165 | 178 | ||
| 166 | if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { | 179 | if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { |
| 167 | struct vlan_pcpu_stats *stats; | 180 | struct vlan_pcpu_stats *stats; |
| 168 | 181 | ||
| 169 | stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats); | 182 | stats = this_cpu_ptr(vlan->vlan_pcpu_stats); |
| 170 | u64_stats_update_begin(&stats->syncp); | 183 | u64_stats_update_begin(&stats->syncp); |
| 171 | stats->tx_packets++; | 184 | stats->tx_packets++; |
| 172 | stats->tx_bytes += len; | 185 | stats->tx_bytes += len; |
| 173 | u64_stats_update_end(&stats->syncp); | 186 | u64_stats_update_end(&stats->syncp); |
| 174 | } else { | 187 | } else { |
| 175 | this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped); | 188 | this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped); |
| 176 | } | 189 | } |
| 177 | 190 | ||
| 178 | return ret; | 191 | return ret; |
| @@ -669,25 +682,26 @@ static void vlan_dev_poll_controller(struct net_device *dev) | |||
| 669 | return; | 682 | return; |
| 670 | } | 683 | } |
| 671 | 684 | ||
| 672 | static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo) | 685 | static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo, |
| 686 | gfp_t gfp) | ||
| 673 | { | 687 | { |
| 674 | struct vlan_dev_priv *info = vlan_dev_priv(dev); | 688 | struct vlan_dev_priv *vlan = vlan_dev_priv(dev); |
| 675 | struct net_device *real_dev = info->real_dev; | 689 | struct net_device *real_dev = vlan->real_dev; |
| 676 | struct netpoll *netpoll; | 690 | struct netpoll *netpoll; |
| 677 | int err = 0; | 691 | int err = 0; |
| 678 | 692 | ||
| 679 | netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL); | 693 | netpoll = kzalloc(sizeof(*netpoll), gfp); |
| 680 | err = -ENOMEM; | 694 | err = -ENOMEM; |
| 681 | if (!netpoll) | 695 | if (!netpoll) |
| 682 | goto out; | 696 | goto out; |
| 683 | 697 | ||
| 684 | err = __netpoll_setup(netpoll, real_dev); | 698 | err = __netpoll_setup(netpoll, real_dev, gfp); |
| 685 | if (err) { | 699 | if (err) { |
| 686 | kfree(netpoll); | 700 | kfree(netpoll); |
| 687 | goto out; | 701 | goto out; |
| 688 | } | 702 | } |
| 689 | 703 | ||
| 690 | info->netpoll = netpoll; | 704 | vlan->netpoll = netpoll; |
| 691 | 705 | ||
| 692 | out: | 706 | out: |
| 693 | return err; | 707 | return err; |
| @@ -695,19 +709,15 @@ out: | |||
| 695 | 709 | ||
| 696 | static void vlan_dev_netpoll_cleanup(struct net_device *dev) | 710 | static void vlan_dev_netpoll_cleanup(struct net_device *dev) |
| 697 | { | 711 | { |
| 698 | struct vlan_dev_priv *info = vlan_dev_priv(dev); | 712 | struct vlan_dev_priv *vlan= vlan_dev_priv(dev); |
| 699 | struct netpoll *netpoll = info->netpoll; | 713 | struct netpoll *netpoll = vlan->netpoll; |
| 700 | 714 | ||
| 701 | if (!netpoll) | 715 | if (!netpoll) |
| 702 | return; | 716 | return; |
| 703 | 717 | ||
| 704 | info->netpoll = NULL; | 718 | vlan->netpoll = NULL; |
| 705 | |||
| 706 | /* Wait for transmitting packets to finish before freeing. */ | ||
| 707 | synchronize_rcu_bh(); | ||
| 708 | 719 | ||
| 709 | __netpoll_cleanup(netpoll); | 720 | __netpoll_free_rcu(netpoll); |
| 710 | kfree(netpoll); | ||
| 711 | } | 721 | } |
| 712 | #endif /* CONFIG_NET_POLL_CONTROLLER */ | 722 | #endif /* CONFIG_NET_POLL_CONTROLLER */ |
| 713 | 723 | ||
diff --git a/net/atm/common.c b/net/atm/common.c index b4b44dbed645..0c0ad930a632 100644 --- a/net/atm/common.c +++ b/net/atm/common.c | |||
| @@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname, | |||
| 812 | 812 | ||
| 813 | if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) | 813 | if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags)) |
| 814 | return -ENOTCONN; | 814 | return -ENOTCONN; |
| 815 | memset(&pvc, 0, sizeof(pvc)); | ||
| 815 | pvc.sap_family = AF_ATMPVC; | 816 | pvc.sap_family = AF_ATMPVC; |
| 816 | pvc.sap_addr.itf = vcc->dev->number; | 817 | pvc.sap_addr.itf = vcc->dev->number; |
| 817 | pvc.sap_addr.vpi = vcc->vpi; | 818 | pvc.sap_addr.vpi = vcc->vpi; |
diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 3a734919c36c..ae0324021407 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c | |||
| @@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr, | |||
| 95 | return -ENOTCONN; | 95 | return -ENOTCONN; |
| 96 | *sockaddr_len = sizeof(struct sockaddr_atmpvc); | 96 | *sockaddr_len = sizeof(struct sockaddr_atmpvc); |
| 97 | addr = (struct sockaddr_atmpvc *)sockaddr; | 97 | addr = (struct sockaddr_atmpvc *)sockaddr; |
| 98 | memset(addr, 0, sizeof(*addr)); | ||
| 98 | addr->sap_family = AF_ATMPVC; | 99 | addr->sap_family = AF_ATMPVC; |
| 99 | addr->sap_addr.itf = vcc->dev->number; | 100 | addr->sap_addr.itf = vcc->dev->number; |
| 100 | addr->sap_addr.vpi = vcc->vpi; | 101 | addr->sap_addr.vpi = vcc->vpi; |
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index b421cc49d2cd..fc866f2e4528 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c | |||
| @@ -200,11 +200,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv) | |||
| 200 | if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) | 200 | if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) |
| 201 | goto out; | 201 | goto out; |
| 202 | 202 | ||
| 203 | if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect)) | ||
| 204 | goto out; | ||
| 205 | |||
| 206 | curr_gw = batadv_gw_get_selected_gw_node(bat_priv); | 203 | curr_gw = batadv_gw_get_selected_gw_node(bat_priv); |
| 207 | 204 | ||
| 205 | if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect) && curr_gw) | ||
| 206 | goto out; | ||
| 207 | |||
| 208 | next_gw = batadv_gw_get_best_gw_node(bat_priv); | 208 | next_gw = batadv_gw_get_best_gw_node(bat_priv); |
| 209 | 209 | ||
| 210 | if (curr_gw == next_gw) | 210 | if (curr_gw == next_gw) |
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a438f4b582fc..99dd8f75b3ff 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c | |||
| @@ -197,6 +197,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, | |||
| 197 | del: | 197 | del: |
| 198 | list_del(&entry->list); | 198 | list_del(&entry->list); |
| 199 | kfree(entry); | 199 | kfree(entry); |
| 200 | kfree(tt_change_node); | ||
| 200 | event_removed = true; | 201 | event_removed = true; |
| 201 | goto unlock; | 202 | goto unlock; |
| 202 | } | 203 | } |
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 41ff978a33f9..715d7e33fba0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c | |||
| @@ -1365,6 +1365,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev) | |||
| 1365 | return false; | 1365 | return false; |
| 1366 | 1366 | ||
| 1367 | e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); | 1367 | e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED); |
| 1368 | if (!e) | ||
| 1369 | return false; | ||
| 1370 | |||
| 1368 | if (hci_resolve_name(hdev, e) == 0) { | 1371 | if (hci_resolve_name(hdev, e) == 0) { |
| 1369 | e->name_state = NAME_PENDING; | 1372 | e->name_state = NAME_PENDING; |
| 1370 | return true; | 1373 | return true; |
| @@ -1393,12 +1396,20 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn, | |||
| 1393 | return; | 1396 | return; |
| 1394 | 1397 | ||
| 1395 | e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); | 1398 | e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING); |
| 1396 | if (e) { | 1399 | /* If the device was not found in a list of found devices names of which |
| 1400 | * are pending. there is no need to continue resolving a next name as it | ||
| 1401 | * will be done upon receiving another Remote Name Request Complete | ||
| 1402 | * Event */ | ||
| 1403 | if (!e) | ||
| 1404 | return; | ||
| 1405 | |||
| 1406 | list_del(&e->list); | ||
| 1407 | if (name) { | ||
| 1397 | e->name_state = NAME_KNOWN; | 1408 | e->name_state = NAME_KNOWN; |
| 1398 | list_del(&e->list); | 1409 | mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, |
| 1399 | if (name) | 1410 | e->data.rssi, name, name_len); |
| 1400 | mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00, | 1411 | } else { |
| 1401 | e->data.rssi, name, name_len); | 1412 | e->name_state = NAME_NOT_KNOWN; |
| 1402 | } | 1413 | } |
| 1403 | 1414 | ||
| 1404 | if (hci_resolve_next_name(hdev)) | 1415 | if (hci_resolve_next_name(hdev)) |
| @@ -1762,7 +1773,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) | |||
| 1762 | if (conn->type == ACL_LINK) { | 1773 | if (conn->type == ACL_LINK) { |
| 1763 | conn->state = BT_CONFIG; | 1774 | conn->state = BT_CONFIG; |
| 1764 | hci_conn_hold(conn); | 1775 | hci_conn_hold(conn); |
| 1765 | conn->disc_timeout = HCI_DISCONN_TIMEOUT; | 1776 | |
| 1777 | if (!conn->out && !hci_conn_ssp_enabled(conn) && | ||
| 1778 | !hci_find_link_key(hdev, &ev->bdaddr)) | ||
| 1779 | conn->disc_timeout = HCI_PAIRING_TIMEOUT; | ||
| 1780 | else | ||
| 1781 | conn->disc_timeout = HCI_DISCONN_TIMEOUT; | ||
| 1766 | } else | 1782 | } else |
| 1767 | conn->state = BT_CONNECTED; | 1783 | conn->state = BT_CONNECTED; |
| 1768 | 1784 | ||
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index a7f04de03d79..19fdac78e555 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c | |||
| @@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, | |||
| 694 | *addr_len = sizeof(*haddr); | 694 | *addr_len = sizeof(*haddr); |
| 695 | haddr->hci_family = AF_BLUETOOTH; | 695 | haddr->hci_family = AF_BLUETOOTH; |
| 696 | haddr->hci_dev = hdev->id; | 696 | haddr->hci_dev = hdev->id; |
| 697 | haddr->hci_channel= 0; | ||
| 697 | 698 | ||
| 698 | release_sock(sk); | 699 | release_sock(sk); |
| 699 | return 0; | 700 | return 0; |
| @@ -1009,6 +1010,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, | |||
| 1009 | { | 1010 | { |
| 1010 | struct hci_filter *f = &hci_pi(sk)->filter; | 1011 | struct hci_filter *f = &hci_pi(sk)->filter; |
| 1011 | 1012 | ||
| 1013 | memset(&uf, 0, sizeof(uf)); | ||
| 1012 | uf.type_mask = f->type_mask; | 1014 | uf.type_mask = f->type_mask; |
| 1013 | uf.opcode = f->opcode; | 1015 | uf.opcode = f->opcode; |
| 1014 | uf.event_mask[0] = *((u32 *) f->event_mask + 0); | 1016 | uf.event_mask[0] = *((u32 *) f->event_mask + 0); |
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index a8964db04bfb..daa149b7003c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c | |||
| @@ -1181,6 +1181,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) | |||
| 1181 | sk = chan->sk; | 1181 | sk = chan->sk; |
| 1182 | 1182 | ||
| 1183 | hci_conn_hold(conn->hcon); | 1183 | hci_conn_hold(conn->hcon); |
| 1184 | conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; | ||
| 1184 | 1185 | ||
| 1185 | bacpy(&bt_sk(sk)->src, conn->src); | 1186 | bacpy(&bt_sk(sk)->src, conn->src); |
| 1186 | bacpy(&bt_sk(sk)->dst, conn->dst); | 1187 | bacpy(&bt_sk(sk)->dst, conn->dst); |
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4bb27e8427e..1497edd191a2 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c | |||
| @@ -245,6 +245,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l | |||
| 245 | 245 | ||
| 246 | BT_DBG("sock %p, sk %p", sock, sk); | 246 | BT_DBG("sock %p, sk %p", sock, sk); |
| 247 | 247 | ||
| 248 | memset(la, 0, sizeof(struct sockaddr_l2)); | ||
| 248 | addr->sa_family = AF_BLUETOOTH; | 249 | addr->sa_family = AF_BLUETOOTH; |
| 249 | *len = sizeof(struct sockaddr_l2); | 250 | *len = sizeof(struct sockaddr_l2); |
| 250 | 251 | ||
| @@ -1174,7 +1175,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p | |||
| 1174 | 1175 | ||
| 1175 | chan = l2cap_chan_create(); | 1176 | chan = l2cap_chan_create(); |
| 1176 | if (!chan) { | 1177 | if (!chan) { |
| 1177 | l2cap_sock_kill(sk); | 1178 | sk_free(sk); |
| 1178 | return NULL; | 1179 | return NULL; |
| 1179 | } | 1180 | } |
| 1180 | 1181 | ||
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7e1e59645c05..1a17850d093c 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c | |||
| @@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int * | |||
| 528 | 528 | ||
| 529 | BT_DBG("sock %p, sk %p", sock, sk); | 529 | BT_DBG("sock %p, sk %p", sock, sk); |
| 530 | 530 | ||
| 531 | memset(sa, 0, sizeof(*sa)); | ||
| 531 | sa->rc_family = AF_BLUETOOTH; | 532 | sa->rc_family = AF_BLUETOOTH; |
| 532 | sa->rc_channel = rfcomm_pi(sk)->channel; | 533 | sa->rc_channel = rfcomm_pi(sk)->channel; |
| 533 | if (peer) | 534 | if (peer) |
| @@ -822,6 +823,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c | |||
| 822 | } | 823 | } |
| 823 | 824 | ||
| 824 | sec.level = rfcomm_pi(sk)->sec_level; | 825 | sec.level = rfcomm_pi(sk)->sec_level; |
| 826 | sec.key_size = 0; | ||
| 825 | 827 | ||
| 826 | len = min_t(unsigned int, len, sizeof(sec)); | 828 | len = min_t(unsigned int, len, sizeof(sec)); |
| 827 | if (copy_to_user(optval, (char *) &sec, len)) | 829 | if (copy_to_user(optval, (char *) &sec, len)) |
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index cb960773c002..56f182393c4c 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c | |||
| @@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg) | |||
| 456 | 456 | ||
| 457 | size = sizeof(*dl) + dev_num * sizeof(*di); | 457 | size = sizeof(*dl) + dev_num * sizeof(*di); |
| 458 | 458 | ||
| 459 | dl = kmalloc(size, GFP_KERNEL); | 459 | dl = kzalloc(size, GFP_KERNEL); |
| 460 | if (!dl) | 460 | if (!dl) |
| 461 | return -ENOMEM; | 461 | return -ENOMEM; |
| 462 | 462 | ||
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 40bbe25dcff7..3589e21edb09 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c | |||
| @@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err) | |||
| 131 | sco_sock_clear_timer(sk); | 131 | sco_sock_clear_timer(sk); |
| 132 | sco_chan_del(sk, err); | 132 | sco_chan_del(sk, err); |
| 133 | bh_unlock_sock(sk); | 133 | bh_unlock_sock(sk); |
| 134 | |||
| 135 | sco_conn_lock(conn); | ||
| 136 | conn->sk = NULL; | ||
| 137 | sco_pi(sk)->conn = NULL; | ||
| 138 | sco_conn_unlock(conn); | ||
| 139 | |||
| 140 | if (conn->hcon) | ||
| 141 | hci_conn_put(conn->hcon); | ||
| 142 | |||
| 134 | sco_sock_kill(sk); | 143 | sco_sock_kill(sk); |
| 135 | } | 144 | } |
| 136 | 145 | ||
| @@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err) | |||
| 821 | 830 | ||
| 822 | BT_DBG("sk %p, conn %p, err %d", sk, conn, err); | 831 | BT_DBG("sk %p, conn %p, err %d", sk, conn, err); |
| 823 | 832 | ||
| 824 | if (conn) { | ||
| 825 | sco_conn_lock(conn); | ||
| 826 | conn->sk = NULL; | ||
| 827 | sco_pi(sk)->conn = NULL; | ||
| 828 | sco_conn_unlock(conn); | ||
| 829 | |||
| 830 | if (conn->hcon) | ||
| 831 | hci_conn_put(conn->hcon); | ||
| 832 | } | ||
| 833 | |||
| 834 | sk->sk_state = BT_CLOSED; | 833 | sk->sk_state = BT_CLOSED; |
| 835 | sk->sk_err = err; | 834 | sk->sk_err = err; |
| 836 | sk->sk_state_change(sk); | 835 | sk->sk_state_change(sk); |
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 16ef0dc85a0a..901a616c8083 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c | |||
| @@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) | |||
| 579 | 579 | ||
| 580 | if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) | 580 | if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) |
| 581 | smp = smp_chan_create(conn); | 581 | smp = smp_chan_create(conn); |
| 582 | else | ||
| 583 | smp = conn->smp_chan; | ||
| 582 | 584 | ||
| 583 | smp = conn->smp_chan; | 585 | if (!smp) |
| 586 | return SMP_UNSPECIFIED; | ||
| 584 | 587 | ||
| 585 | smp->preq[0] = SMP_CMD_PAIRING_REQ; | 588 | smp->preq[0] = SMP_CMD_PAIRING_REQ; |
| 586 | memcpy(&smp->preq[1], req, sizeof(*req)); | 589 | memcpy(&smp->preq[1], req, sizeof(*req)); |
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 333484537600..070e8a68cfc6 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c | |||
| @@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 31 | struct net_bridge_mdb_entry *mdst; | 31 | struct net_bridge_mdb_entry *mdst; |
| 32 | struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); | 32 | struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats); |
| 33 | 33 | ||
| 34 | rcu_read_lock(); | ||
| 34 | #ifdef CONFIG_BRIDGE_NETFILTER | 35 | #ifdef CONFIG_BRIDGE_NETFILTER |
| 35 | if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { | 36 | if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { |
| 36 | br_nf_pre_routing_finish_bridge_slow(skb); | 37 | br_nf_pre_routing_finish_bridge_slow(skb); |
| 38 | rcu_read_unlock(); | ||
| 37 | return NETDEV_TX_OK; | 39 | return NETDEV_TX_OK; |
| 38 | } | 40 | } |
| 39 | #endif | 41 | #endif |
| @@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) | |||
| 48 | skb_reset_mac_header(skb); | 50 | skb_reset_mac_header(skb); |
| 49 | skb_pull(skb, ETH_HLEN); | 51 | skb_pull(skb, ETH_HLEN); |
| 50 | 52 | ||
| 51 | rcu_read_lock(); | ||
| 52 | if (is_broadcast_ether_addr(dest)) | 53 | if (is_broadcast_ether_addr(dest)) |
| 53 | br_flood_deliver(br, skb); | 54 | br_flood_deliver(br, skb); |
| 54 | else if (is_multicast_ether_addr(dest)) { | 55 | else if (is_multicast_ether_addr(dest)) { |
| @@ -206,24 +207,23 @@ static void br_poll_controller(struct net_device *br_dev) | |||
| 206 | static void br_netpoll_cleanup(struct net_device *dev) | 207 | static void br_netpoll_cleanup(struct net_device *dev) |
| 207 | { | 208 | { |
| 208 | struct net_bridge *br = netdev_priv(dev); | 209 | struct net_bridge *br = netdev_priv(dev); |
| 209 | struct net_bridge_port *p, *n; | 210 | struct net_bridge_port *p; |
| 210 | 211 | ||
| 211 | list_for_each_entry_safe(p, n, &br->port_list, list) { | 212 | list_for_each_entry(p, &br->port_list, list) |
| 212 | br_netpoll_disable(p); | 213 | br_netpoll_disable(p); |
| 213 | } | ||
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) | 216 | static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, |
| 217 | gfp_t gfp) | ||
| 217 | { | 218 | { |
| 218 | struct net_bridge *br = netdev_priv(dev); | 219 | struct net_bridge *br = netdev_priv(dev); |
| 219 | struct net_bridge_port *p, *n; | 220 | struct net_bridge_port *p; |
| 220 | int err = 0; | 221 | int err = 0; |
| 221 | 222 | ||
| 222 | list_for_each_entry_safe(p, n, &br->port_list, list) { | 223 | list_for_each_entry(p, &br->port_list, list) { |
| 223 | if (!p->dev) | 224 | if (!p->dev) |
| 224 | continue; | 225 | continue; |
| 225 | 226 | err = br_netpoll_enable(p, gfp); | |
| 226 | err = br_netpoll_enable(p); | ||
| 227 | if (err) | 227 | if (err) |
| 228 | goto fail; | 228 | goto fail; |
| 229 | } | 229 | } |
| @@ -236,17 +236,17 @@ fail: | |||
| 236 | goto out; | 236 | goto out; |
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | int br_netpoll_enable(struct net_bridge_port *p) | 239 | int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) |
| 240 | { | 240 | { |
| 241 | struct netpoll *np; | 241 | struct netpoll *np; |
| 242 | int err = 0; | 242 | int err = 0; |
| 243 | 243 | ||
| 244 | np = kzalloc(sizeof(*p->np), GFP_KERNEL); | 244 | np = kzalloc(sizeof(*p->np), gfp); |
| 245 | err = -ENOMEM; | 245 | err = -ENOMEM; |
| 246 | if (!np) | 246 | if (!np) |
| 247 | goto out; | 247 | goto out; |
| 248 | 248 | ||
| 249 | err = __netpoll_setup(np, p->dev); | 249 | err = __netpoll_setup(np, p->dev, gfp); |
| 250 | if (err) { | 250 | if (err) { |
| 251 | kfree(np); | 251 | kfree(np); |
| 252 | goto out; | 252 | goto out; |
| @@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p) | |||
| 267 | 267 | ||
| 268 | p->np = NULL; | 268 | p->np = NULL; |
| 269 | 269 | ||
| 270 | /* Wait for transmitting packets to finish before freeing. */ | 270 | __netpoll_free_rcu(np); |
| 271 | synchronize_rcu_bh(); | ||
| 272 | |||
| 273 | __netpoll_cleanup(np); | ||
| 274 | kfree(np); | ||
| 275 | } | 271 | } |
| 276 | 272 | ||
| 277 | #endif | 273 | #endif |
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e9466d412707..02015a505d2a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c | |||
| @@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) | |||
| 65 | { | 65 | { |
| 66 | skb->dev = to->dev; | 66 | skb->dev = to->dev; |
| 67 | 67 | ||
| 68 | if (unlikely(netpoll_tx_running(to->dev))) { | 68 | if (unlikely(netpoll_tx_running(to->br->dev))) { |
| 69 | if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) | 69 | if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) |
| 70 | kfree_skb(skb); | 70 | kfree_skb(skb); |
| 71 | else { | 71 | else { |
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index e1144e1617be..1c8fdc3558cd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c | |||
| @@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) | |||
| 361 | if (err) | 361 | if (err) |
| 362 | goto err2; | 362 | goto err2; |
| 363 | 363 | ||
| 364 | if (br_netpoll_info(br) && ((err = br_netpoll_enable(p)))) | 364 | if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL)))) |
| 365 | goto err3; | 365 | goto err3; |
| 366 | 366 | ||
| 367 | err = netdev_set_master(dev, br->dev); | 367 | err = netdev_set_master(dev, br->dev); |
| @@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) | |||
| 427 | if (!p || p->br != br) | 427 | if (!p || p->br != br) |
| 428 | return -EINVAL; | 428 | return -EINVAL; |
| 429 | 429 | ||
| 430 | /* Since more than one interface can be attached to a bridge, | ||
| 431 | * there still maybe an alternate path for netconsole to use; | ||
| 432 | * therefore there is no reason for a NETDEV_RELEASE event. | ||
| 433 | */ | ||
| 430 | del_nbp(p); | 434 | del_nbp(p); |
| 431 | 435 | ||
| 432 | spin_lock_bh(&br->lock); | 436 | spin_lock_bh(&br->lock); |
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a768b2408edf..f507d2af9646 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h | |||
| @@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, | |||
| 316 | netpoll_send_skb(np, skb); | 316 | netpoll_send_skb(np, skb); |
| 317 | } | 317 | } |
| 318 | 318 | ||
| 319 | extern int br_netpoll_enable(struct net_bridge_port *p); | 319 | extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp); |
| 320 | extern void br_netpoll_disable(struct net_bridge_port *p); | 320 | extern void br_netpoll_disable(struct net_bridge_port *p); |
| 321 | #else | 321 | #else |
| 322 | static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) | 322 | static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br) |
| @@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p, | |||
| 329 | { | 329 | { |
| 330 | } | 330 | } |
| 331 | 331 | ||
| 332 | static inline int br_netpoll_enable(struct net_bridge_port *p) | 332 | static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp) |
| 333 | { | 333 | { |
| 334 | return 0; | 334 | return 0; |
| 335 | } | 335 | } |
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 6229b62749e8..13b36bdc76a7 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c | |||
| @@ -27,7 +27,7 @@ struct brport_attribute { | |||
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | #define BRPORT_ATTR(_name,_mode,_show,_store) \ | 29 | #define BRPORT_ATTR(_name,_mode,_show,_store) \ |
| 30 | struct brport_attribute brport_attr_##_name = { \ | 30 | const struct brport_attribute brport_attr_##_name = { \ |
| 31 | .attr = {.name = __stringify(_name), \ | 31 | .attr = {.name = __stringify(_name), \ |
| 32 | .mode = _mode }, \ | 32 | .mode = _mode }, \ |
| 33 | .show = _show, \ | 33 | .show = _show, \ |
| @@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, | |||
| 164 | store_multicast_router); | 164 | store_multicast_router); |
| 165 | #endif | 165 | #endif |
| 166 | 166 | ||
| 167 | static struct brport_attribute *brport_attrs[] = { | 167 | static const struct brport_attribute *brport_attrs[] = { |
| 168 | &brport_attr_path_cost, | 168 | &brport_attr_path_cost, |
| 169 | &brport_attr_priority, | 169 | &brport_attr_priority, |
| 170 | &brport_attr_port_id, | 170 | &brport_attr_port_id, |
| @@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = { | |||
| 241 | int br_sysfs_addif(struct net_bridge_port *p) | 241 | int br_sysfs_addif(struct net_bridge_port *p) |
| 242 | { | 242 | { |
| 243 | struct net_bridge *br = p->br; | 243 | struct net_bridge *br = p->br; |
| 244 | struct brport_attribute **a; | 244 | const struct brport_attribute **a; |
| 245 | int err; | 245 | int err; |
| 246 | 246 | ||
| 247 | err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, | 247 | err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, |
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 78f1cdad5b33..095259f83902 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c | |||
| @@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 141 | err = sk_filter(sk, skb); | 141 | err = sk_filter(sk, skb); |
| 142 | if (err) | 142 | if (err) |
| 143 | return err; | 143 | return err; |
| 144 | if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { | 144 | if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { |
| 145 | set_rx_flow_off(cf_sk); | 145 | set_rx_flow_off(cf_sk); |
| 146 | net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); | 146 | net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); |
| 147 | caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); | 147 | caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); |
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 69771c04ba8f..e597733affb8 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c | |||
| @@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) | |||
| 94 | 94 | ||
| 95 | /* check the version of IP */ | 95 | /* check the version of IP */ |
| 96 | ip_version = skb_header_pointer(skb, 0, 1, &buf); | 96 | ip_version = skb_header_pointer(skb, 0, 1, &buf); |
| 97 | if (!ip_version) { | ||
| 98 | kfree_skb(skb); | ||
| 99 | return -EINVAL; | ||
| 100 | } | ||
| 97 | 101 | ||
| 98 | switch (*ip_version >> 4) { | 102 | switch (*ip_version >> 4) { |
| 99 | case 4: | 103 | case 4: |
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ba4323bce0e9..a8020293f342 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/string.h> | 17 | #include <linux/string.h> |
| 18 | 18 | ||
| 19 | 19 | ||
| 20 | #include <linux/ceph/ceph_features.h> | ||
| 20 | #include <linux/ceph/libceph.h> | 21 | #include <linux/ceph/libceph.h> |
| 21 | #include <linux/ceph/debugfs.h> | 22 | #include <linux/ceph/debugfs.h> |
| 22 | #include <linux/ceph/decode.h> | 23 | #include <linux/ceph/decode.h> |
| @@ -83,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | |||
| 83 | return -1; | 84 | return -1; |
| 84 | } | 85 | } |
| 85 | } else { | 86 | } else { |
| 86 | pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid); | ||
| 87 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 87 | memcpy(&client->fsid, fsid, sizeof(*fsid)); |
| 88 | } | 88 | } |
| 89 | return 0; | 89 | return 0; |
| @@ -460,27 +460,23 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, | |||
| 460 | client->auth_err = 0; | 460 | client->auth_err = 0; |
| 461 | 461 | ||
| 462 | client->extra_mon_dispatch = NULL; | 462 | client->extra_mon_dispatch = NULL; |
| 463 | client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT | | 463 | client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT | |
| 464 | supported_features; | 464 | supported_features; |
| 465 | client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT | | 465 | client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT | |
| 466 | required_features; | 466 | required_features; |
| 467 | 467 | ||
| 468 | /* msgr */ | 468 | /* msgr */ |
| 469 | if (ceph_test_opt(client, MYIP)) | 469 | if (ceph_test_opt(client, MYIP)) |
| 470 | myaddr = &client->options->my_addr; | 470 | myaddr = &client->options->my_addr; |
| 471 | client->msgr = ceph_messenger_create(myaddr, | 471 | ceph_messenger_init(&client->msgr, myaddr, |
| 472 | client->supported_features, | 472 | client->supported_features, |
| 473 | client->required_features); | 473 | client->required_features, |
| 474 | if (IS_ERR(client->msgr)) { | 474 | ceph_test_opt(client, NOCRC)); |
| 475 | err = PTR_ERR(client->msgr); | ||
| 476 | goto fail; | ||
| 477 | } | ||
| 478 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
| 479 | 475 | ||
| 480 | /* subsystems */ | 476 | /* subsystems */ |
| 481 | err = ceph_monc_init(&client->monc, client); | 477 | err = ceph_monc_init(&client->monc, client); |
| 482 | if (err < 0) | 478 | if (err < 0) |
| 483 | goto fail_msgr; | 479 | goto fail; |
| 484 | err = ceph_osdc_init(&client->osdc, client); | 480 | err = ceph_osdc_init(&client->osdc, client); |
| 485 | if (err < 0) | 481 | if (err < 0) |
| 486 | goto fail_monc; | 482 | goto fail_monc; |
| @@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, | |||
| 489 | 485 | ||
| 490 | fail_monc: | 486 | fail_monc: |
| 491 | ceph_monc_stop(&client->monc); | 487 | ceph_monc_stop(&client->monc); |
| 492 | fail_msgr: | ||
| 493 | ceph_messenger_destroy(client->msgr); | ||
| 494 | fail: | 488 | fail: |
| 495 | kfree(client); | 489 | kfree(client); |
| 496 | return ERR_PTR(err); | 490 | return ERR_PTR(err); |
| @@ -501,6 +495,8 @@ void ceph_destroy_client(struct ceph_client *client) | |||
| 501 | { | 495 | { |
| 502 | dout("destroy_client %p\n", client); | 496 | dout("destroy_client %p\n", client); |
| 503 | 497 | ||
| 498 | atomic_set(&client->msgr.stopping, 1); | ||
| 499 | |||
| 504 | /* unmount */ | 500 | /* unmount */ |
| 505 | ceph_osdc_stop(&client->osdc); | 501 | ceph_osdc_stop(&client->osdc); |
| 506 | 502 | ||
| @@ -508,8 +504,6 @@ void ceph_destroy_client(struct ceph_client *client) | |||
| 508 | 504 | ||
| 509 | ceph_debugfs_client_cleanup(client); | 505 | ceph_debugfs_client_cleanup(client); |
| 510 | 506 | ||
| 511 | ceph_messenger_destroy(client->msgr); | ||
| 512 | |||
| 513 | ceph_destroy_options(client->options); | 507 | ceph_destroy_options(client->options); |
| 514 | 508 | ||
| 515 | kfree(client); | 509 | kfree(client); |
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index d7edc24333b8..35fce755ce10 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
| @@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map, | |||
| 306 | int item = 0; | 306 | int item = 0; |
| 307 | int itemtype; | 307 | int itemtype; |
| 308 | int collide, reject; | 308 | int collide, reject; |
| 309 | const unsigned int orig_tries = 5; /* attempts before we fall back to search */ | ||
| 310 | 309 | ||
| 311 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", | 310 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", |
| 312 | bucket->id, x, outpos, numrep); | 311 | bucket->id, x, outpos, numrep); |
| @@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map, | |||
| 351 | reject = 1; | 350 | reject = 1; |
| 352 | goto reject; | 351 | goto reject; |
| 353 | } | 352 | } |
| 354 | if (flocal >= (in->size>>1) && | 353 | if (map->choose_local_fallback_tries > 0 && |
| 355 | flocal > orig_tries) | 354 | flocal >= (in->size>>1) && |
| 355 | flocal > map->choose_local_fallback_tries) | ||
| 356 | item = bucket_perm_choose(in, x, r); | 356 | item = bucket_perm_choose(in, x, r); |
| 357 | else | 357 | else |
| 358 | item = crush_bucket_choose(in, x, r); | 358 | item = crush_bucket_choose(in, x, r); |
| @@ -422,13 +422,14 @@ reject: | |||
| 422 | ftotal++; | 422 | ftotal++; |
| 423 | flocal++; | 423 | flocal++; |
| 424 | 424 | ||
| 425 | if (collide && flocal < 3) | 425 | if (collide && flocal <= map->choose_local_tries) |
| 426 | /* retry locally a few times */ | 426 | /* retry locally a few times */ |
| 427 | retry_bucket = 1; | 427 | retry_bucket = 1; |
| 428 | else if (flocal <= in->size + orig_tries) | 428 | else if (map->choose_local_fallback_tries > 0 && |
| 429 | flocal <= in->size + map->choose_local_fallback_tries) | ||
| 429 | /* exhaustive bucket search */ | 430 | /* exhaustive bucket search */ |
| 430 | retry_bucket = 1; | 431 | retry_bucket = 1; |
| 431 | else if (ftotal < 20) | 432 | else if (ftotal <= map->choose_total_tries) |
| 432 | /* then retry descent */ | 433 | /* then retry descent */ |
| 433 | retry_descent = 1; | 434 | retry_descent = 1; |
| 434 | else | 435 | else |
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index b780cb7947dd..9da7fdd3cd8a 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c | |||
| @@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) { | |||
| 466 | struct ceph_crypto_key *ckey = key->payload.data; | 466 | struct ceph_crypto_key *ckey = key->payload.data; |
| 467 | 467 | ||
| 468 | ceph_crypto_key_destroy(ckey); | 468 | ceph_crypto_key_destroy(ckey); |
| 469 | kfree(ckey); | ||
| 469 | } | 470 | } |
| 470 | 471 | ||
| 471 | struct key_type key_type_ceph = { | 472 | struct key_type key_type_ceph = { |
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 1919d1550d75..3572dc518bc9 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h | |||
| @@ -16,7 +16,8 @@ struct ceph_crypto_key { | |||
| 16 | 16 | ||
| 17 | static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) | 17 | static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) |
| 18 | { | 18 | { |
| 19 | kfree(key->key); | 19 | if (key) |
| 20 | kfree(key->key); | ||
| 20 | } | 21 | } |
| 21 | 22 | ||
| 22 | extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, | 23 | extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst, |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 54b531a01121..38b5dc1823d4 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
| @@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client) | |||
| 189 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | 189 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, |
| 190 | client->monc.auth->global_id); | 190 | client->monc.auth->global_id); |
| 191 | 191 | ||
| 192 | dout("ceph_debugfs_client_init %p %s\n", client, name); | ||
| 193 | |||
| 194 | BUG_ON(client->debugfs_dir); | ||
| 192 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 195 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); |
| 193 | if (!client->debugfs_dir) | 196 | if (!client->debugfs_dir) |
| 194 | goto out; | 197 | goto out; |
| @@ -234,6 +237,7 @@ out: | |||
| 234 | 237 | ||
| 235 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 238 | void ceph_debugfs_client_cleanup(struct ceph_client *client) |
| 236 | { | 239 | { |
| 240 | dout("ceph_debugfs_client_cleanup %p\n", client); | ||
| 237 | debugfs_remove(client->debugfs_osdmap); | 241 | debugfs_remove(client->debugfs_osdmap); |
| 238 | debugfs_remove(client->debugfs_monmap); | 242 | debugfs_remove(client->debugfs_monmap); |
| 239 | debugfs_remove(client->osdc.debugfs_file); | 243 | debugfs_remove(client->osdc.debugfs_file); |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 10255e81be79..24c5eea8c45b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -29,6 +29,74 @@ | |||
| 29 | * the sender. | 29 | * the sender. |
| 30 | */ | 30 | */ |
| 31 | 31 | ||
| 32 | /* | ||
| 33 | * We track the state of the socket on a given connection using | ||
| 34 | * values defined below. The transition to a new socket state is | ||
| 35 | * handled by a function which verifies we aren't coming from an | ||
| 36 | * unexpected state. | ||
| 37 | * | ||
| 38 | * -------- | ||
| 39 | * | NEW* | transient initial state | ||
| 40 | * -------- | ||
| 41 | * | con_sock_state_init() | ||
| 42 | * v | ||
| 43 | * ---------- | ||
| 44 | * | CLOSED | initialized, but no socket (and no | ||
| 45 | * ---------- TCP connection) | ||
| 46 | * ^ \ | ||
| 47 | * | \ con_sock_state_connecting() | ||
| 48 | * | ---------------------- | ||
| 49 | * | \ | ||
| 50 | * + con_sock_state_closed() \ | ||
| 51 | * |+--------------------------- \ | ||
| 52 | * | \ \ \ | ||
| 53 | * | ----------- \ \ | ||
| 54 | * | | CLOSING | socket event; \ \ | ||
| 55 | * | ----------- await close \ \ | ||
| 56 | * | ^ \ | | ||
| 57 | * | | \ | | ||
| 58 | * | + con_sock_state_closing() \ | | ||
| 59 | * | / \ | | | ||
| 60 | * | / --------------- | | | ||
| 61 | * | / \ v v | ||
| 62 | * | / -------------- | ||
| 63 | * | / -----------------| CONNECTING | socket created, TCP | ||
| 64 | * | | / -------------- connect initiated | ||
| 65 | * | | | con_sock_state_connected() | ||
| 66 | * | | v | ||
| 67 | * ------------- | ||
| 68 | * | CONNECTED | TCP connection established | ||
| 69 | * ------------- | ||
| 70 | * | ||
| 71 | * State values for ceph_connection->sock_state; NEW is assumed to be 0. | ||
| 72 | */ | ||
| 73 | |||
| 74 | #define CON_SOCK_STATE_NEW 0 /* -> CLOSED */ | ||
| 75 | #define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */ | ||
| 76 | #define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */ | ||
| 77 | #define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */ | ||
| 78 | #define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */ | ||
| 79 | |||
| 80 | /* | ||
| 81 | * connection states | ||
| 82 | */ | ||
| 83 | #define CON_STATE_CLOSED 1 /* -> PREOPEN */ | ||
| 84 | #define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */ | ||
| 85 | #define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */ | ||
| 86 | #define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */ | ||
| 87 | #define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */ | ||
| 88 | #define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */ | ||
| 89 | |||
| 90 | /* | ||
| 91 | * ceph_connection flag bits | ||
| 92 | */ | ||
| 93 | #define CON_FLAG_LOSSYTX 0 /* we can close channel or drop | ||
| 94 | * messages on errors */ | ||
| 95 | #define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */ | ||
| 96 | #define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */ | ||
| 97 | #define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ | ||
| 98 | #define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ | ||
| 99 | |||
| 32 | /* static tag bytes (protocol control messages) */ | 100 | /* static tag bytes (protocol control messages) */ |
| 33 | static char tag_msg = CEPH_MSGR_TAG_MSG; | 101 | static char tag_msg = CEPH_MSGR_TAG_MSG; |
| 34 | static char tag_ack = CEPH_MSGR_TAG_ACK; | 102 | static char tag_ack = CEPH_MSGR_TAG_ACK; |
| @@ -147,72 +215,130 @@ void ceph_msgr_flush(void) | |||
| 147 | } | 215 | } |
| 148 | EXPORT_SYMBOL(ceph_msgr_flush); | 216 | EXPORT_SYMBOL(ceph_msgr_flush); |
| 149 | 217 | ||
| 218 | /* Connection socket state transition functions */ | ||
| 219 | |||
| 220 | static void con_sock_state_init(struct ceph_connection *con) | ||
| 221 | { | ||
| 222 | int old_state; | ||
| 223 | |||
| 224 | old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); | ||
| 225 | if (WARN_ON(old_state != CON_SOCK_STATE_NEW)) | ||
| 226 | printk("%s: unexpected old state %d\n", __func__, old_state); | ||
| 227 | dout("%s con %p sock %d -> %d\n", __func__, con, old_state, | ||
| 228 | CON_SOCK_STATE_CLOSED); | ||
| 229 | } | ||
| 230 | |||
| 231 | static void con_sock_state_connecting(struct ceph_connection *con) | ||
| 232 | { | ||
| 233 | int old_state; | ||
| 234 | |||
| 235 | old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING); | ||
| 236 | if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED)) | ||
| 237 | printk("%s: unexpected old state %d\n", __func__, old_state); | ||
| 238 | dout("%s con %p sock %d -> %d\n", __func__, con, old_state, | ||
| 239 | CON_SOCK_STATE_CONNECTING); | ||
| 240 | } | ||
| 241 | |||
| 242 | static void con_sock_state_connected(struct ceph_connection *con) | ||
| 243 | { | ||
| 244 | int old_state; | ||
| 245 | |||
| 246 | old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED); | ||
| 247 | if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING)) | ||
| 248 | printk("%s: unexpected old state %d\n", __func__, old_state); | ||
| 249 | dout("%s con %p sock %d -> %d\n", __func__, con, old_state, | ||
| 250 | CON_SOCK_STATE_CONNECTED); | ||
| 251 | } | ||
| 252 | |||
| 253 | static void con_sock_state_closing(struct ceph_connection *con) | ||
| 254 | { | ||
| 255 | int old_state; | ||
| 256 | |||
| 257 | old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING); | ||
| 258 | if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING && | ||
| 259 | old_state != CON_SOCK_STATE_CONNECTED && | ||
| 260 | old_state != CON_SOCK_STATE_CLOSING)) | ||
| 261 | printk("%s: unexpected old state %d\n", __func__, old_state); | ||
| 262 | dout("%s con %p sock %d -> %d\n", __func__, con, old_state, | ||
| 263 | CON_SOCK_STATE_CLOSING); | ||
| 264 | } | ||
| 265 | |||
| 266 | static void con_sock_state_closed(struct ceph_connection *con) | ||
| 267 | { | ||
| 268 | int old_state; | ||
| 269 | |||
| 270 | old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED); | ||
| 271 | if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED && | ||
| 272 | old_state != CON_SOCK_STATE_CLOSING && | ||
| 273 | old_state != CON_SOCK_STATE_CONNECTING && | ||
| 274 | old_state != CON_SOCK_STATE_CLOSED)) | ||
| 275 | printk("%s: unexpected old state %d\n", __func__, old_state); | ||
| 276 | dout("%s con %p sock %d -> %d\n", __func__, con, old_state, | ||
| 277 | CON_SOCK_STATE_CLOSED); | ||
| 278 | } | ||
| 150 | 279 | ||
| 151 | /* | 280 | /* |
| 152 | * socket callback functions | 281 | * socket callback functions |
| 153 | */ | 282 | */ |
| 154 | 283 | ||
| 155 | /* data available on socket, or listen socket received a connect */ | 284 | /* data available on socket, or listen socket received a connect */ |
| 156 | static void ceph_data_ready(struct sock *sk, int count_unused) | 285 | static void ceph_sock_data_ready(struct sock *sk, int count_unused) |
| 157 | { | 286 | { |
| 158 | struct ceph_connection *con = sk->sk_user_data; | 287 | struct ceph_connection *con = sk->sk_user_data; |
| 288 | if (atomic_read(&con->msgr->stopping)) { | ||
| 289 | return; | ||
| 290 | } | ||
| 159 | 291 | ||
| 160 | if (sk->sk_state != TCP_CLOSE_WAIT) { | 292 | if (sk->sk_state != TCP_CLOSE_WAIT) { |
| 161 | dout("ceph_data_ready on %p state = %lu, queueing work\n", | 293 | dout("%s on %p state = %lu, queueing work\n", __func__, |
| 162 | con, con->state); | 294 | con, con->state); |
| 163 | queue_con(con); | 295 | queue_con(con); |
| 164 | } | 296 | } |
| 165 | } | 297 | } |
| 166 | 298 | ||
| 167 | /* socket has buffer space for writing */ | 299 | /* socket has buffer space for writing */ |
| 168 | static void ceph_write_space(struct sock *sk) | 300 | static void ceph_sock_write_space(struct sock *sk) |
| 169 | { | 301 | { |
| 170 | struct ceph_connection *con = sk->sk_user_data; | 302 | struct ceph_connection *con = sk->sk_user_data; |
| 171 | 303 | ||
| 172 | /* only queue to workqueue if there is data we want to write, | 304 | /* only queue to workqueue if there is data we want to write, |
| 173 | * and there is sufficient space in the socket buffer to accept | 305 | * and there is sufficient space in the socket buffer to accept |
| 174 | * more data. clear SOCK_NOSPACE so that ceph_write_space() | 306 | * more data. clear SOCK_NOSPACE so that ceph_sock_write_space() |
| 175 | * doesn't get called again until try_write() fills the socket | 307 | * doesn't get called again until try_write() fills the socket |
| 176 | * buffer. See net/ipv4/tcp_input.c:tcp_check_space() | 308 | * buffer. See net/ipv4/tcp_input.c:tcp_check_space() |
| 177 | * and net/core/stream.c:sk_stream_write_space(). | 309 | * and net/core/stream.c:sk_stream_write_space(). |
| 178 | */ | 310 | */ |
| 179 | if (test_bit(WRITE_PENDING, &con->state)) { | 311 | if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { |
| 180 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | 312 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { |
| 181 | dout("ceph_write_space %p queueing write work\n", con); | 313 | dout("%s %p queueing write work\n", __func__, con); |
| 182 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 314 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
| 183 | queue_con(con); | 315 | queue_con(con); |
| 184 | } | 316 | } |
| 185 | } else { | 317 | } else { |
| 186 | dout("ceph_write_space %p nothing to write\n", con); | 318 | dout("%s %p nothing to write\n", __func__, con); |
| 187 | } | 319 | } |
| 188 | } | 320 | } |
| 189 | 321 | ||
| 190 | /* socket's state has changed */ | 322 | /* socket's state has changed */ |
| 191 | static void ceph_state_change(struct sock *sk) | 323 | static void ceph_sock_state_change(struct sock *sk) |
| 192 | { | 324 | { |
| 193 | struct ceph_connection *con = sk->sk_user_data; | 325 | struct ceph_connection *con = sk->sk_user_data; |
| 194 | 326 | ||
| 195 | dout("ceph_state_change %p state = %lu sk_state = %u\n", | 327 | dout("%s %p state = %lu sk_state = %u\n", __func__, |
| 196 | con, con->state, sk->sk_state); | 328 | con, con->state, sk->sk_state); |
| 197 | 329 | ||
| 198 | if (test_bit(CLOSED, &con->state)) | ||
| 199 | return; | ||
| 200 | |||
| 201 | switch (sk->sk_state) { | 330 | switch (sk->sk_state) { |
| 202 | case TCP_CLOSE: | 331 | case TCP_CLOSE: |
| 203 | dout("ceph_state_change TCP_CLOSE\n"); | 332 | dout("%s TCP_CLOSE\n", __func__); |
| 204 | case TCP_CLOSE_WAIT: | 333 | case TCP_CLOSE_WAIT: |
| 205 | dout("ceph_state_change TCP_CLOSE_WAIT\n"); | 334 | dout("%s TCP_CLOSE_WAIT\n", __func__); |
| 206 | if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { | 335 | con_sock_state_closing(con); |
| 207 | if (test_bit(CONNECTING, &con->state)) | 336 | set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); |
| 208 | con->error_msg = "connection failed"; | 337 | queue_con(con); |
| 209 | else | ||
| 210 | con->error_msg = "socket closed"; | ||
| 211 | queue_con(con); | ||
| 212 | } | ||
| 213 | break; | 338 | break; |
| 214 | case TCP_ESTABLISHED: | 339 | case TCP_ESTABLISHED: |
| 215 | dout("ceph_state_change TCP_ESTABLISHED\n"); | 340 | dout("%s TCP_ESTABLISHED\n", __func__); |
| 341 | con_sock_state_connected(con); | ||
| 216 | queue_con(con); | 342 | queue_con(con); |
| 217 | break; | 343 | break; |
| 218 | default: /* Everything else is uninteresting */ | 344 | default: /* Everything else is uninteresting */ |
| @@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock, | |||
| 228 | { | 354 | { |
| 229 | struct sock *sk = sock->sk; | 355 | struct sock *sk = sock->sk; |
| 230 | sk->sk_user_data = con; | 356 | sk->sk_user_data = con; |
| 231 | sk->sk_data_ready = ceph_data_ready; | 357 | sk->sk_data_ready = ceph_sock_data_ready; |
| 232 | sk->sk_write_space = ceph_write_space; | 358 | sk->sk_write_space = ceph_sock_write_space; |
| 233 | sk->sk_state_change = ceph_state_change; | 359 | sk->sk_state_change = ceph_sock_state_change; |
| 234 | } | 360 | } |
| 235 | 361 | ||
| 236 | 362 | ||
| @@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) | |||
| 262 | 388 | ||
| 263 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); | 389 | dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); |
| 264 | 390 | ||
| 391 | con_sock_state_connecting(con); | ||
| 265 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | 392 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), |
| 266 | O_NONBLOCK); | 393 | O_NONBLOCK); |
| 267 | if (ret == -EINPROGRESS) { | 394 | if (ret == -EINPROGRESS) { |
| @@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) | |||
| 277 | return ret; | 404 | return ret; |
| 278 | } | 405 | } |
| 279 | con->sock = sock; | 406 | con->sock = sock; |
| 280 | |||
| 281 | return 0; | 407 | return 0; |
| 282 | } | 408 | } |
| 283 | 409 | ||
| @@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, | |||
| 333 | */ | 459 | */ |
| 334 | static int con_close_socket(struct ceph_connection *con) | 460 | static int con_close_socket(struct ceph_connection *con) |
| 335 | { | 461 | { |
| 336 | int rc; | 462 | int rc = 0; |
| 337 | 463 | ||
| 338 | dout("con_close_socket on %p sock %p\n", con, con->sock); | 464 | dout("con_close_socket on %p sock %p\n", con, con->sock); |
| 339 | if (!con->sock) | 465 | if (con->sock) { |
| 340 | return 0; | 466 | rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); |
| 341 | set_bit(SOCK_CLOSED, &con->state); | 467 | sock_release(con->sock); |
| 342 | rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); | 468 | con->sock = NULL; |
| 343 | sock_release(con->sock); | 469 | } |
| 344 | con->sock = NULL; | 470 | |
| 345 | clear_bit(SOCK_CLOSED, &con->state); | 471 | /* |
| 472 | * Forcibly clear the SOCK_CLOSED flag. It gets set | ||
| 473 | * independent of the connection mutex, and we could have | ||
| 474 | * received a socket close event before we had the chance to | ||
| 475 | * shut the socket down. | ||
| 476 | */ | ||
| 477 | clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); | ||
| 478 | |||
| 479 | con_sock_state_closed(con); | ||
| 346 | return rc; | 480 | return rc; |
| 347 | } | 481 | } |
| 348 | 482 | ||
| @@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con) | |||
| 353 | static void ceph_msg_remove(struct ceph_msg *msg) | 487 | static void ceph_msg_remove(struct ceph_msg *msg) |
| 354 | { | 488 | { |
| 355 | list_del_init(&msg->list_head); | 489 | list_del_init(&msg->list_head); |
| 490 | BUG_ON(msg->con == NULL); | ||
| 491 | msg->con->ops->put(msg->con); | ||
| 492 | msg->con = NULL; | ||
| 493 | |||
| 356 | ceph_msg_put(msg); | 494 | ceph_msg_put(msg); |
| 357 | } | 495 | } |
| 358 | static void ceph_msg_remove_list(struct list_head *head) | 496 | static void ceph_msg_remove_list(struct list_head *head) |
| @@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con) | |||
| 372 | ceph_msg_remove_list(&con->out_sent); | 510 | ceph_msg_remove_list(&con->out_sent); |
| 373 | 511 | ||
| 374 | if (con->in_msg) { | 512 | if (con->in_msg) { |
| 513 | BUG_ON(con->in_msg->con != con); | ||
| 514 | con->in_msg->con = NULL; | ||
| 375 | ceph_msg_put(con->in_msg); | 515 | ceph_msg_put(con->in_msg); |
| 376 | con->in_msg = NULL; | 516 | con->in_msg = NULL; |
| 517 | con->ops->put(con); | ||
| 377 | } | 518 | } |
| 378 | 519 | ||
| 379 | con->connect_seq = 0; | 520 | con->connect_seq = 0; |
| @@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con) | |||
| 391 | */ | 532 | */ |
| 392 | void ceph_con_close(struct ceph_connection *con) | 533 | void ceph_con_close(struct ceph_connection *con) |
| 393 | { | 534 | { |
| 535 | mutex_lock(&con->mutex); | ||
| 394 | dout("con_close %p peer %s\n", con, | 536 | dout("con_close %p peer %s\n", con, |
| 395 | ceph_pr_addr(&con->peer_addr.in_addr)); | 537 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 396 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 538 | con->state = CON_STATE_CLOSED; |
| 397 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | 539 | |
| 398 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | 540 | clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ |
| 399 | clear_bit(KEEPALIVE_PENDING, &con->state); | 541 | clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); |
| 400 | clear_bit(WRITE_PENDING, &con->state); | 542 | clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 401 | mutex_lock(&con->mutex); | 543 | clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); |
| 544 | clear_bit(CON_FLAG_BACKOFF, &con->flags); | ||
| 545 | |||
| 402 | reset_connection(con); | 546 | reset_connection(con); |
| 403 | con->peer_global_seq = 0; | 547 | con->peer_global_seq = 0; |
| 404 | cancel_delayed_work(&con->work); | 548 | cancel_delayed_work(&con->work); |
| 549 | con_close_socket(con); | ||
| 405 | mutex_unlock(&con->mutex); | 550 | mutex_unlock(&con->mutex); |
| 406 | queue_con(con); | ||
| 407 | } | 551 | } |
| 408 | EXPORT_SYMBOL(ceph_con_close); | 552 | EXPORT_SYMBOL(ceph_con_close); |
| 409 | 553 | ||
| 410 | /* | 554 | /* |
| 411 | * Reopen a closed connection, with a new peer address. | 555 | * Reopen a closed connection, with a new peer address. |
| 412 | */ | 556 | */ |
| 413 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | 557 | void ceph_con_open(struct ceph_connection *con, |
| 558 | __u8 entity_type, __u64 entity_num, | ||
| 559 | struct ceph_entity_addr *addr) | ||
| 414 | { | 560 | { |
| 561 | mutex_lock(&con->mutex); | ||
| 415 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); | 562 | dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); |
| 416 | set_bit(OPENING, &con->state); | 563 | |
| 417 | clear_bit(CLOSED, &con->state); | 564 | BUG_ON(con->state != CON_STATE_CLOSED); |
| 565 | con->state = CON_STATE_PREOPEN; | ||
| 566 | |||
| 567 | con->peer_name.type = (__u8) entity_type; | ||
| 568 | con->peer_name.num = cpu_to_le64(entity_num); | ||
| 569 | |||
| 418 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | 570 | memcpy(&con->peer_addr, addr, sizeof(*addr)); |
| 419 | con->delay = 0; /* reset backoff memory */ | 571 | con->delay = 0; /* reset backoff memory */ |
| 572 | mutex_unlock(&con->mutex); | ||
| 420 | queue_con(con); | 573 | queue_con(con); |
| 421 | } | 574 | } |
| 422 | EXPORT_SYMBOL(ceph_con_open); | 575 | EXPORT_SYMBOL(ceph_con_open); |
| @@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con) | |||
| 430 | } | 583 | } |
| 431 | 584 | ||
| 432 | /* | 585 | /* |
| 433 | * generic get/put | ||
| 434 | */ | ||
| 435 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | ||
| 436 | { | ||
| 437 | int nref = __atomic_add_unless(&con->nref, 1, 0); | ||
| 438 | |||
| 439 | dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1); | ||
| 440 | |||
| 441 | return nref ? con : NULL; | ||
| 442 | } | ||
| 443 | |||
| 444 | void ceph_con_put(struct ceph_connection *con) | ||
| 445 | { | ||
| 446 | int nref = atomic_dec_return(&con->nref); | ||
| 447 | |||
| 448 | BUG_ON(nref < 0); | ||
| 449 | if (nref == 0) { | ||
| 450 | BUG_ON(con->sock); | ||
| 451 | kfree(con); | ||
| 452 | } | ||
| 453 | dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref); | ||
| 454 | } | ||
| 455 | |||
| 456 | /* | ||
| 457 | * initialize a new connection. | 586 | * initialize a new connection. |
| 458 | */ | 587 | */ |
| 459 | void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | 588 | void ceph_con_init(struct ceph_connection *con, void *private, |
| 589 | const struct ceph_connection_operations *ops, | ||
| 590 | struct ceph_messenger *msgr) | ||
| 460 | { | 591 | { |
| 461 | dout("con_init %p\n", con); | 592 | dout("con_init %p\n", con); |
| 462 | memset(con, 0, sizeof(*con)); | 593 | memset(con, 0, sizeof(*con)); |
| 463 | atomic_set(&con->nref, 1); | 594 | con->private = private; |
| 595 | con->ops = ops; | ||
| 464 | con->msgr = msgr; | 596 | con->msgr = msgr; |
| 597 | |||
| 598 | con_sock_state_init(con); | ||
| 599 | |||
| 465 | mutex_init(&con->mutex); | 600 | mutex_init(&con->mutex); |
| 466 | INIT_LIST_HEAD(&con->out_queue); | 601 | INIT_LIST_HEAD(&con->out_queue); |
| 467 | INIT_LIST_HEAD(&con->out_sent); | 602 | INIT_LIST_HEAD(&con->out_sent); |
| 468 | INIT_DELAYED_WORK(&con->work, con_work); | 603 | INIT_DELAYED_WORK(&con->work, con_work); |
| 604 | |||
| 605 | con->state = CON_STATE_CLOSED; | ||
| 469 | } | 606 | } |
| 470 | EXPORT_SYMBOL(ceph_con_init); | 607 | EXPORT_SYMBOL(ceph_con_init); |
| 471 | 608 | ||
| @@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | |||
| 486 | return ret; | 623 | return ret; |
| 487 | } | 624 | } |
| 488 | 625 | ||
| 489 | static void ceph_con_out_kvec_reset(struct ceph_connection *con) | 626 | static void con_out_kvec_reset(struct ceph_connection *con) |
| 490 | { | 627 | { |
| 491 | con->out_kvec_left = 0; | 628 | con->out_kvec_left = 0; |
| 492 | con->out_kvec_bytes = 0; | 629 | con->out_kvec_bytes = 0; |
| 493 | con->out_kvec_cur = &con->out_kvec[0]; | 630 | con->out_kvec_cur = &con->out_kvec[0]; |
| 494 | } | 631 | } |
| 495 | 632 | ||
| 496 | static void ceph_con_out_kvec_add(struct ceph_connection *con, | 633 | static void con_out_kvec_add(struct ceph_connection *con, |
| 497 | size_t size, void *data) | 634 | size_t size, void *data) |
| 498 | { | 635 | { |
| 499 | int index; | 636 | int index; |
| @@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con, | |||
| 507 | con->out_kvec_bytes += size; | 644 | con->out_kvec_bytes += size; |
| 508 | } | 645 | } |
| 509 | 646 | ||
| 647 | #ifdef CONFIG_BLOCK | ||
| 648 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | ||
| 649 | { | ||
| 650 | if (!bio) { | ||
| 651 | *iter = NULL; | ||
| 652 | *seg = 0; | ||
| 653 | return; | ||
| 654 | } | ||
| 655 | *iter = bio; | ||
| 656 | *seg = bio->bi_idx; | ||
| 657 | } | ||
| 658 | |||
| 659 | static void iter_bio_next(struct bio **bio_iter, int *seg) | ||
| 660 | { | ||
| 661 | if (*bio_iter == NULL) | ||
| 662 | return; | ||
| 663 | |||
| 664 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | ||
| 665 | |||
| 666 | (*seg)++; | ||
| 667 | if (*seg == (*bio_iter)->bi_vcnt) | ||
| 668 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | ||
| 669 | } | ||
| 670 | #endif | ||
| 671 | |||
| 672 | static void prepare_write_message_data(struct ceph_connection *con) | ||
| 673 | { | ||
| 674 | struct ceph_msg *msg = con->out_msg; | ||
| 675 | |||
| 676 | BUG_ON(!msg); | ||
| 677 | BUG_ON(!msg->hdr.data_len); | ||
| 678 | |||
| 679 | /* initialize page iterator */ | ||
| 680 | con->out_msg_pos.page = 0; | ||
| 681 | if (msg->pages) | ||
| 682 | con->out_msg_pos.page_pos = msg->page_alignment; | ||
| 683 | else | ||
| 684 | con->out_msg_pos.page_pos = 0; | ||
| 685 | #ifdef CONFIG_BLOCK | ||
| 686 | if (msg->bio) | ||
| 687 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | ||
| 688 | #endif | ||
| 689 | con->out_msg_pos.data_pos = 0; | ||
| 690 | con->out_msg_pos.did_page_crc = false; | ||
| 691 | con->out_more = 1; /* data + footer will follow */ | ||
| 692 | } | ||
| 693 | |||
| 510 | /* | 694 | /* |
| 511 | * Prepare footer for currently outgoing message, and finish things | 695 | * Prepare footer for currently outgoing message, and finish things |
| 512 | * off. Assumes out_kvec* are already valid.. we just add on to the end. | 696 | * off. Assumes out_kvec* are already valid.. we just add on to the end. |
| @@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con) | |||
| 516 | struct ceph_msg *m = con->out_msg; | 700 | struct ceph_msg *m = con->out_msg; |
| 517 | int v = con->out_kvec_left; | 701 | int v = con->out_kvec_left; |
| 518 | 702 | ||
| 703 | m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; | ||
| 704 | |||
| 519 | dout("prepare_write_message_footer %p\n", con); | 705 | dout("prepare_write_message_footer %p\n", con); |
| 520 | con->out_kvec_is_msg = true; | 706 | con->out_kvec_is_msg = true; |
| 521 | con->out_kvec[v].iov_base = &m->footer; | 707 | con->out_kvec[v].iov_base = &m->footer; |
| @@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 534 | struct ceph_msg *m; | 720 | struct ceph_msg *m; |
| 535 | u32 crc; | 721 | u32 crc; |
| 536 | 722 | ||
| 537 | ceph_con_out_kvec_reset(con); | 723 | con_out_kvec_reset(con); |
| 538 | con->out_kvec_is_msg = true; | 724 | con->out_kvec_is_msg = true; |
| 539 | con->out_msg_done = false; | 725 | con->out_msg_done = false; |
| 540 | 726 | ||
| @@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 542 | * TCP packet that's a good thing. */ | 728 | * TCP packet that's a good thing. */ |
| 543 | if (con->in_seq > con->in_seq_acked) { | 729 | if (con->in_seq > con->in_seq_acked) { |
| 544 | con->in_seq_acked = con->in_seq; | 730 | con->in_seq_acked = con->in_seq; |
| 545 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); | 731 | con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); |
| 546 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 732 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
| 547 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), | 733 | con_out_kvec_add(con, sizeof (con->out_temp_ack), |
| 548 | &con->out_temp_ack); | 734 | &con->out_temp_ack); |
| 549 | } | 735 | } |
| 550 | 736 | ||
| 737 | BUG_ON(list_empty(&con->out_queue)); | ||
| 551 | m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); | 738 | m = list_first_entry(&con->out_queue, struct ceph_msg, list_head); |
| 552 | con->out_msg = m; | 739 | con->out_msg = m; |
| 740 | BUG_ON(m->con != con); | ||
| 553 | 741 | ||
| 554 | /* put message on sent list */ | 742 | /* put message on sent list */ |
| 555 | ceph_msg_get(m); | 743 | ceph_msg_get(m); |
| @@ -576,18 +764,18 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 576 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); | 764 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); |
| 577 | 765 | ||
| 578 | /* tag + hdr + front + middle */ | 766 | /* tag + hdr + front + middle */ |
| 579 | ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); | 767 | con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); |
| 580 | ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); | 768 | con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); |
| 581 | ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); | 769 | con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); |
| 582 | 770 | ||
| 583 | if (m->middle) | 771 | if (m->middle) |
| 584 | ceph_con_out_kvec_add(con, m->middle->vec.iov_len, | 772 | con_out_kvec_add(con, m->middle->vec.iov_len, |
| 585 | m->middle->vec.iov_base); | 773 | m->middle->vec.iov_base); |
| 586 | 774 | ||
| 587 | /* fill in crc (except data pages), footer */ | 775 | /* fill in crc (except data pages), footer */ |
| 588 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); | 776 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); |
| 589 | con->out_msg->hdr.crc = cpu_to_le32(crc); | 777 | con->out_msg->hdr.crc = cpu_to_le32(crc); |
| 590 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; | 778 | con->out_msg->footer.flags = 0; |
| 591 | 779 | ||
| 592 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); | 780 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); |
| 593 | con->out_msg->footer.front_crc = cpu_to_le32(crc); | 781 | con->out_msg->footer.front_crc = cpu_to_le32(crc); |
| @@ -597,28 +785,19 @@ static void prepare_write_message(struct ceph_connection *con) | |||
| 597 | con->out_msg->footer.middle_crc = cpu_to_le32(crc); | 785 | con->out_msg->footer.middle_crc = cpu_to_le32(crc); |
| 598 | } else | 786 | } else |
| 599 | con->out_msg->footer.middle_crc = 0; | 787 | con->out_msg->footer.middle_crc = 0; |
| 600 | con->out_msg->footer.data_crc = 0; | 788 | dout("%s front_crc %u middle_crc %u\n", __func__, |
| 601 | dout("prepare_write_message front_crc %u data_crc %u\n", | ||
| 602 | le32_to_cpu(con->out_msg->footer.front_crc), | 789 | le32_to_cpu(con->out_msg->footer.front_crc), |
| 603 | le32_to_cpu(con->out_msg->footer.middle_crc)); | 790 | le32_to_cpu(con->out_msg->footer.middle_crc)); |
| 604 | 791 | ||
| 605 | /* is there a data payload? */ | 792 | /* is there a data payload? */ |
| 606 | if (le32_to_cpu(m->hdr.data_len) > 0) { | 793 | con->out_msg->footer.data_crc = 0; |
| 607 | /* initialize page iterator */ | 794 | if (m->hdr.data_len) |
| 608 | con->out_msg_pos.page = 0; | 795 | prepare_write_message_data(con); |
| 609 | if (m->pages) | 796 | else |
| 610 | con->out_msg_pos.page_pos = m->page_alignment; | ||
| 611 | else | ||
| 612 | con->out_msg_pos.page_pos = 0; | ||
| 613 | con->out_msg_pos.data_pos = 0; | ||
| 614 | con->out_msg_pos.did_page_crc = false; | ||
| 615 | con->out_more = 1; /* data + footer will follow */ | ||
| 616 | } else { | ||
| 617 | /* no, queue up footer too and be done */ | 797 | /* no, queue up footer too and be done */ |
| 618 | prepare_write_message_footer(con); | 798 | prepare_write_message_footer(con); |
| 619 | } | ||
| 620 | 799 | ||
| 621 | set_bit(WRITE_PENDING, &con->state); | 800 | set_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 622 | } | 801 | } |
| 623 | 802 | ||
| 624 | /* | 803 | /* |
| @@ -630,16 +809,16 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
| 630 | con->in_seq_acked, con->in_seq); | 809 | con->in_seq_acked, con->in_seq); |
| 631 | con->in_seq_acked = con->in_seq; | 810 | con->in_seq_acked = con->in_seq; |
| 632 | 811 | ||
| 633 | ceph_con_out_kvec_reset(con); | 812 | con_out_kvec_reset(con); |
| 634 | 813 | ||
| 635 | ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); | 814 | con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); |
| 636 | 815 | ||
| 637 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | 816 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); |
| 638 | ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack), | 817 | con_out_kvec_add(con, sizeof (con->out_temp_ack), |
| 639 | &con->out_temp_ack); | 818 | &con->out_temp_ack); |
| 640 | 819 | ||
| 641 | con->out_more = 1; /* more will follow.. eventually.. */ | 820 | con->out_more = 1; /* more will follow.. eventually.. */ |
| 642 | set_bit(WRITE_PENDING, &con->state); | 821 | set_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 643 | } | 822 | } |
| 644 | 823 | ||
| 645 | /* | 824 | /* |
| @@ -648,9 +827,9 @@ static void prepare_write_ack(struct ceph_connection *con) | |||
| 648 | static void prepare_write_keepalive(struct ceph_connection *con) | 827 | static void prepare_write_keepalive(struct ceph_connection *con) |
| 649 | { | 828 | { |
| 650 | dout("prepare_write_keepalive %p\n", con); | 829 | dout("prepare_write_keepalive %p\n", con); |
| 651 | ceph_con_out_kvec_reset(con); | 830 | con_out_kvec_reset(con); |
| 652 | ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); | 831 | con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); |
| 653 | set_bit(WRITE_PENDING, &con->state); | 832 | set_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 654 | } | 833 | } |
| 655 | 834 | ||
| 656 | /* | 835 | /* |
| @@ -665,27 +844,21 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection | |||
| 665 | if (!con->ops->get_authorizer) { | 844 | if (!con->ops->get_authorizer) { |
| 666 | con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; | 845 | con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; |
| 667 | con->out_connect.authorizer_len = 0; | 846 | con->out_connect.authorizer_len = 0; |
| 668 | |||
| 669 | return NULL; | 847 | return NULL; |
| 670 | } | 848 | } |
| 671 | 849 | ||
| 672 | /* Can't hold the mutex while getting authorizer */ | 850 | /* Can't hold the mutex while getting authorizer */ |
| 673 | |||
| 674 | mutex_unlock(&con->mutex); | 851 | mutex_unlock(&con->mutex); |
| 675 | |||
| 676 | auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); | 852 | auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry); |
| 677 | |||
| 678 | mutex_lock(&con->mutex); | 853 | mutex_lock(&con->mutex); |
| 679 | 854 | ||
| 680 | if (IS_ERR(auth)) | 855 | if (IS_ERR(auth)) |
| 681 | return auth; | 856 | return auth; |
| 682 | if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state)) | 857 | if (con->state != CON_STATE_NEGOTIATING) |
| 683 | return ERR_PTR(-EAGAIN); | 858 | return ERR_PTR(-EAGAIN); |
| 684 | 859 | ||
| 685 | con->auth_reply_buf = auth->authorizer_reply_buf; | 860 | con->auth_reply_buf = auth->authorizer_reply_buf; |
| 686 | con->auth_reply_buf_len = auth->authorizer_reply_buf_len; | 861 | con->auth_reply_buf_len = auth->authorizer_reply_buf_len; |
| 687 | |||
| 688 | |||
| 689 | return auth; | 862 | return auth; |
| 690 | } | 863 | } |
| 691 | 864 | ||
| @@ -694,12 +867,12 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection | |||
| 694 | */ | 867 | */ |
| 695 | static void prepare_write_banner(struct ceph_connection *con) | 868 | static void prepare_write_banner(struct ceph_connection *con) |
| 696 | { | 869 | { |
| 697 | ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); | 870 | con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); |
| 698 | ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), | 871 | con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), |
| 699 | &con->msgr->my_enc_addr); | 872 | &con->msgr->my_enc_addr); |
| 700 | 873 | ||
| 701 | con->out_more = 0; | 874 | con->out_more = 0; |
| 702 | set_bit(WRITE_PENDING, &con->state); | 875 | set_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 703 | } | 876 | } |
| 704 | 877 | ||
| 705 | static int prepare_write_connect(struct ceph_connection *con) | 878 | static int prepare_write_connect(struct ceph_connection *con) |
| @@ -742,14 +915,14 @@ static int prepare_write_connect(struct ceph_connection *con) | |||
| 742 | con->out_connect.authorizer_len = auth ? | 915 | con->out_connect.authorizer_len = auth ? |
| 743 | cpu_to_le32(auth->authorizer_buf_len) : 0; | 916 | cpu_to_le32(auth->authorizer_buf_len) : 0; |
| 744 | 917 | ||
| 745 | ceph_con_out_kvec_add(con, sizeof (con->out_connect), | 918 | con_out_kvec_add(con, sizeof (con->out_connect), |
| 746 | &con->out_connect); | 919 | &con->out_connect); |
| 747 | if (auth && auth->authorizer_buf_len) | 920 | if (auth && auth->authorizer_buf_len) |
| 748 | ceph_con_out_kvec_add(con, auth->authorizer_buf_len, | 921 | con_out_kvec_add(con, auth->authorizer_buf_len, |
| 749 | auth->authorizer_buf); | 922 | auth->authorizer_buf); |
| 750 | 923 | ||
| 751 | con->out_more = 0; | 924 | con->out_more = 0; |
| 752 | set_bit(WRITE_PENDING, &con->state); | 925 | set_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 753 | 926 | ||
| 754 | return 0; | 927 | return 0; |
| 755 | } | 928 | } |
| @@ -797,30 +970,34 @@ out: | |||
| 797 | return ret; /* done! */ | 970 | return ret; /* done! */ |
| 798 | } | 971 | } |
| 799 | 972 | ||
| 800 | #ifdef CONFIG_BLOCK | 973 | static void out_msg_pos_next(struct ceph_connection *con, struct page *page, |
| 801 | static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) | 974 | size_t len, size_t sent, bool in_trail) |
| 802 | { | 975 | { |
| 803 | if (!bio) { | 976 | struct ceph_msg *msg = con->out_msg; |
| 804 | *iter = NULL; | ||
| 805 | *seg = 0; | ||
| 806 | return; | ||
| 807 | } | ||
| 808 | *iter = bio; | ||
| 809 | *seg = bio->bi_idx; | ||
| 810 | } | ||
| 811 | 977 | ||
| 812 | static void iter_bio_next(struct bio **bio_iter, int *seg) | 978 | BUG_ON(!msg); |
| 813 | { | 979 | BUG_ON(!sent); |
| 814 | if (*bio_iter == NULL) | ||
| 815 | return; | ||
| 816 | 980 | ||
| 817 | BUG_ON(*seg >= (*bio_iter)->bi_vcnt); | 981 | con->out_msg_pos.data_pos += sent; |
| 982 | con->out_msg_pos.page_pos += sent; | ||
| 983 | if (sent < len) | ||
| 984 | return; | ||
| 818 | 985 | ||
| 819 | (*seg)++; | 986 | BUG_ON(sent != len); |
| 820 | if (*seg == (*bio_iter)->bi_vcnt) | 987 | con->out_msg_pos.page_pos = 0; |
| 821 | init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); | 988 | con->out_msg_pos.page++; |
| 822 | } | 989 | con->out_msg_pos.did_page_crc = false; |
| 990 | if (in_trail) | ||
| 991 | list_move_tail(&page->lru, | ||
| 992 | &msg->trail->head); | ||
| 993 | else if (msg->pagelist) | ||
| 994 | list_move_tail(&page->lru, | ||
| 995 | &msg->pagelist->head); | ||
| 996 | #ifdef CONFIG_BLOCK | ||
| 997 | else if (msg->bio) | ||
| 998 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||
| 823 | #endif | 999 | #endif |
| 1000 | } | ||
| 824 | 1001 | ||
| 825 | /* | 1002 | /* |
| 826 | * Write as much message data payload as we can. If we finish, queue | 1003 | * Write as much message data payload as we can. If we finish, queue |
| @@ -837,41 +1014,36 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 837 | bool do_datacrc = !con->msgr->nocrc; | 1014 | bool do_datacrc = !con->msgr->nocrc; |
| 838 | int ret; | 1015 | int ret; |
| 839 | int total_max_write; | 1016 | int total_max_write; |
| 840 | int in_trail = 0; | 1017 | bool in_trail = false; |
| 841 | size_t trail_len = (msg->trail ? msg->trail->length : 0); | 1018 | const size_t trail_len = (msg->trail ? msg->trail->length : 0); |
| 1019 | const size_t trail_off = data_len - trail_len; | ||
| 842 | 1020 | ||
| 843 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | 1021 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", |
| 844 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | 1022 | con, msg, con->out_msg_pos.page, msg->nr_pages, |
| 845 | con->out_msg_pos.page_pos); | 1023 | con->out_msg_pos.page_pos); |
| 846 | 1024 | ||
| 847 | #ifdef CONFIG_BLOCK | 1025 | /* |
| 848 | if (msg->bio && !msg->bio_iter) | 1026 | * Iterate through each page that contains data to be |
| 849 | init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); | 1027 | * written, and send as much as possible for each. |
| 850 | #endif | 1028 | * |
| 851 | 1029 | * If we are calculating the data crc (the default), we will | |
| 1030 | * need to map the page. If we have no pages, they have | ||
| 1031 | * been revoked, so use the zero page. | ||
| 1032 | */ | ||
| 852 | while (data_len > con->out_msg_pos.data_pos) { | 1033 | while (data_len > con->out_msg_pos.data_pos) { |
| 853 | struct page *page = NULL; | 1034 | struct page *page = NULL; |
| 854 | int max_write = PAGE_SIZE; | 1035 | int max_write = PAGE_SIZE; |
| 855 | int bio_offset = 0; | 1036 | int bio_offset = 0; |
| 856 | 1037 | ||
| 857 | total_max_write = data_len - trail_len - | 1038 | in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; |
| 858 | con->out_msg_pos.data_pos; | 1039 | if (!in_trail) |
| 859 | 1040 | total_max_write = trail_off - con->out_msg_pos.data_pos; | |
| 860 | /* | ||
| 861 | * if we are calculating the data crc (the default), we need | ||
| 862 | * to map the page. if our pages[] has been revoked, use the | ||
| 863 | * zero page. | ||
| 864 | */ | ||
| 865 | |||
| 866 | /* have we reached the trail part of the data? */ | ||
| 867 | if (con->out_msg_pos.data_pos >= data_len - trail_len) { | ||
| 868 | in_trail = 1; | ||
| 869 | 1041 | ||
| 1042 | if (in_trail) { | ||
| 870 | total_max_write = data_len - con->out_msg_pos.data_pos; | 1043 | total_max_write = data_len - con->out_msg_pos.data_pos; |
| 871 | 1044 | ||
| 872 | page = list_first_entry(&msg->trail->head, | 1045 | page = list_first_entry(&msg->trail->head, |
| 873 | struct page, lru); | 1046 | struct page, lru); |
| 874 | max_write = PAGE_SIZE; | ||
| 875 | } else if (msg->pages) { | 1047 | } else if (msg->pages) { |
| 876 | page = msg->pages[con->out_msg_pos.page]; | 1048 | page = msg->pages[con->out_msg_pos.page]; |
| 877 | } else if (msg->pagelist) { | 1049 | } else if (msg->pagelist) { |
| @@ -894,15 +1066,14 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 894 | 1066 | ||
| 895 | if (do_datacrc && !con->out_msg_pos.did_page_crc) { | 1067 | if (do_datacrc && !con->out_msg_pos.did_page_crc) { |
| 896 | void *base; | 1068 | void *base; |
| 897 | u32 crc; | 1069 | u32 crc = le32_to_cpu(msg->footer.data_crc); |
| 898 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | ||
| 899 | char *kaddr; | 1070 | char *kaddr; |
| 900 | 1071 | ||
| 901 | kaddr = kmap(page); | 1072 | kaddr = kmap(page); |
| 902 | BUG_ON(kaddr == NULL); | 1073 | BUG_ON(kaddr == NULL); |
| 903 | base = kaddr + con->out_msg_pos.page_pos + bio_offset; | 1074 | base = kaddr + con->out_msg_pos.page_pos + bio_offset; |
| 904 | crc = crc32c(tmpcrc, base, len); | 1075 | crc = crc32c(crc, base, len); |
| 905 | con->out_msg->footer.data_crc = cpu_to_le32(crc); | 1076 | msg->footer.data_crc = cpu_to_le32(crc); |
| 906 | con->out_msg_pos.did_page_crc = true; | 1077 | con->out_msg_pos.did_page_crc = true; |
| 907 | } | 1078 | } |
| 908 | ret = ceph_tcp_sendpage(con->sock, page, | 1079 | ret = ceph_tcp_sendpage(con->sock, page, |
| @@ -915,31 +1086,15 @@ static int write_partial_msg_pages(struct ceph_connection *con) | |||
| 915 | if (ret <= 0) | 1086 | if (ret <= 0) |
| 916 | goto out; | 1087 | goto out; |
| 917 | 1088 | ||
| 918 | con->out_msg_pos.data_pos += ret; | 1089 | out_msg_pos_next(con, page, len, (size_t) ret, in_trail); |
| 919 | con->out_msg_pos.page_pos += ret; | ||
| 920 | if (ret == len) { | ||
| 921 | con->out_msg_pos.page_pos = 0; | ||
| 922 | con->out_msg_pos.page++; | ||
| 923 | con->out_msg_pos.did_page_crc = false; | ||
| 924 | if (in_trail) | ||
| 925 | list_move_tail(&page->lru, | ||
| 926 | &msg->trail->head); | ||
| 927 | else if (msg->pagelist) | ||
| 928 | list_move_tail(&page->lru, | ||
| 929 | &msg->pagelist->head); | ||
| 930 | #ifdef CONFIG_BLOCK | ||
| 931 | else if (msg->bio) | ||
| 932 | iter_bio_next(&msg->bio_iter, &msg->bio_seg); | ||
| 933 | #endif | ||
| 934 | } | ||
| 935 | } | 1090 | } |
| 936 | 1091 | ||
| 937 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); | 1092 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); |
| 938 | 1093 | ||
| 939 | /* prepare and queue up footer, too */ | 1094 | /* prepare and queue up footer, too */ |
| 940 | if (!do_datacrc) | 1095 | if (!do_datacrc) |
| 941 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; | 1096 | msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; |
| 942 | ceph_con_out_kvec_reset(con); | 1097 | con_out_kvec_reset(con); |
| 943 | prepare_write_message_footer(con); | 1098 | prepare_write_message_footer(con); |
| 944 | ret = 1; | 1099 | ret = 1; |
| 945 | out: | 1100 | out: |
| @@ -1351,20 +1506,14 @@ static int process_banner(struct ceph_connection *con) | |||
| 1351 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); | 1506 | ceph_pr_addr(&con->msgr->inst.addr.in_addr)); |
| 1352 | } | 1507 | } |
| 1353 | 1508 | ||
| 1354 | set_bit(NEGOTIATING, &con->state); | ||
| 1355 | prepare_read_connect(con); | ||
| 1356 | return 0; | 1509 | return 0; |
| 1357 | } | 1510 | } |
| 1358 | 1511 | ||
| 1359 | static void fail_protocol(struct ceph_connection *con) | 1512 | static void fail_protocol(struct ceph_connection *con) |
| 1360 | { | 1513 | { |
| 1361 | reset_connection(con); | 1514 | reset_connection(con); |
| 1362 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | 1515 | BUG_ON(con->state != CON_STATE_NEGOTIATING); |
| 1363 | 1516 | con->state = CON_STATE_CLOSED; | |
| 1364 | mutex_unlock(&con->mutex); | ||
| 1365 | if (con->ops->bad_proto) | ||
| 1366 | con->ops->bad_proto(con); | ||
| 1367 | mutex_lock(&con->mutex); | ||
| 1368 | } | 1517 | } |
| 1369 | 1518 | ||
| 1370 | static int process_connect(struct ceph_connection *con) | 1519 | static int process_connect(struct ceph_connection *con) |
| @@ -1407,7 +1556,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1407 | return -1; | 1556 | return -1; |
| 1408 | } | 1557 | } |
| 1409 | con->auth_retry = 1; | 1558 | con->auth_retry = 1; |
| 1410 | ceph_con_out_kvec_reset(con); | 1559 | con_out_kvec_reset(con); |
| 1411 | ret = prepare_write_connect(con); | 1560 | ret = prepare_write_connect(con); |
| 1412 | if (ret < 0) | 1561 | if (ret < 0) |
| 1413 | return ret; | 1562 | return ret; |
| @@ -1428,7 +1577,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1428 | ENTITY_NAME(con->peer_name), | 1577 | ENTITY_NAME(con->peer_name), |
| 1429 | ceph_pr_addr(&con->peer_addr.in_addr)); | 1578 | ceph_pr_addr(&con->peer_addr.in_addr)); |
| 1430 | reset_connection(con); | 1579 | reset_connection(con); |
| 1431 | ceph_con_out_kvec_reset(con); | 1580 | con_out_kvec_reset(con); |
| 1432 | ret = prepare_write_connect(con); | 1581 | ret = prepare_write_connect(con); |
| 1433 | if (ret < 0) | 1582 | if (ret < 0) |
| 1434 | return ret; | 1583 | return ret; |
| @@ -1440,8 +1589,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1440 | if (con->ops->peer_reset) | 1589 | if (con->ops->peer_reset) |
| 1441 | con->ops->peer_reset(con); | 1590 | con->ops->peer_reset(con); |
| 1442 | mutex_lock(&con->mutex); | 1591 | mutex_lock(&con->mutex); |
| 1443 | if (test_bit(CLOSED, &con->state) || | 1592 | if (con->state != CON_STATE_NEGOTIATING) |
| 1444 | test_bit(OPENING, &con->state)) | ||
| 1445 | return -EAGAIN; | 1593 | return -EAGAIN; |
| 1446 | break; | 1594 | break; |
| 1447 | 1595 | ||
| @@ -1454,7 +1602,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1454 | le32_to_cpu(con->out_connect.connect_seq), | 1602 | le32_to_cpu(con->out_connect.connect_seq), |
| 1455 | le32_to_cpu(con->in_reply.connect_seq)); | 1603 | le32_to_cpu(con->in_reply.connect_seq)); |
| 1456 | con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); | 1604 | con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); |
| 1457 | ceph_con_out_kvec_reset(con); | 1605 | con_out_kvec_reset(con); |
| 1458 | ret = prepare_write_connect(con); | 1606 | ret = prepare_write_connect(con); |
| 1459 | if (ret < 0) | 1607 | if (ret < 0) |
| 1460 | return ret; | 1608 | return ret; |
| @@ -1471,7 +1619,7 @@ static int process_connect(struct ceph_connection *con) | |||
| 1471 | le32_to_cpu(con->in_reply.global_seq)); | 1619 | le32_to_cpu(con->in_reply.global_seq)); |
| 1472 | get_global_seq(con->msgr, | 1620 | get_global_seq(con->msgr, |
| 1473 | le32_to_cpu(con->in_reply.global_seq)); | 1621 | le32_to_cpu(con->in_reply.global_seq)); |
| 1474 | ceph_con_out_kvec_reset(con); | 1622 | con_out_kvec_reset(con); |
| 1475 | ret = prepare_write_connect(con); | 1623 | ret = prepare_write_connect(con); |
| 1476 | if (ret < 0) | 1624 | if (ret < 0) |
| 1477 | return ret; | 1625 | return ret; |
| @@ -1489,7 +1637,10 @@ static int process_connect(struct ceph_connection *con) | |||
| 1489 | fail_protocol(con); | 1637 | fail_protocol(con); |
| 1490 | return -1; | 1638 | return -1; |
| 1491 | } | 1639 | } |
| 1492 | clear_bit(CONNECTING, &con->state); | 1640 | |
| 1641 | BUG_ON(con->state != CON_STATE_NEGOTIATING); | ||
| 1642 | con->state = CON_STATE_OPEN; | ||
| 1643 | |||
| 1493 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | 1644 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); |
| 1494 | con->connect_seq++; | 1645 | con->connect_seq++; |
| 1495 | con->peer_features = server_feat; | 1646 | con->peer_features = server_feat; |
| @@ -1501,7 +1652,9 @@ static int process_connect(struct ceph_connection *con) | |||
| 1501 | le32_to_cpu(con->in_reply.connect_seq)); | 1652 | le32_to_cpu(con->in_reply.connect_seq)); |
| 1502 | 1653 | ||
| 1503 | if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) | 1654 | if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) |
| 1504 | set_bit(LOSSYTX, &con->state); | 1655 | set_bit(CON_FLAG_LOSSYTX, &con->flags); |
| 1656 | |||
| 1657 | con->delay = 0; /* reset backoff memory */ | ||
| 1505 | 1658 | ||
| 1506 | prepare_read_tag(con); | 1659 | prepare_read_tag(con); |
| 1507 | break; | 1660 | break; |
| @@ -1587,10 +1740,7 @@ static int read_partial_message_section(struct ceph_connection *con, | |||
| 1587 | return 1; | 1740 | return 1; |
| 1588 | } | 1741 | } |
| 1589 | 1742 | ||
| 1590 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 1743 | static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); |
| 1591 | struct ceph_msg_header *hdr, | ||
| 1592 | int *skip); | ||
| 1593 | |||
| 1594 | 1744 | ||
| 1595 | static int read_partial_message_pages(struct ceph_connection *con, | 1745 | static int read_partial_message_pages(struct ceph_connection *con, |
| 1596 | struct page **pages, | 1746 | struct page **pages, |
| @@ -1633,9 +1783,6 @@ static int read_partial_message_bio(struct ceph_connection *con, | |||
| 1633 | void *p; | 1783 | void *p; |
| 1634 | int ret, left; | 1784 | int ret, left; |
| 1635 | 1785 | ||
| 1636 | if (IS_ERR(bv)) | ||
| 1637 | return PTR_ERR(bv); | ||
| 1638 | |||
| 1639 | left = min((int)(data_len - con->in_msg_pos.data_pos), | 1786 | left = min((int)(data_len - con->in_msg_pos.data_pos), |
| 1640 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); | 1787 | (int)(bv->bv_len - con->in_msg_pos.page_pos)); |
| 1641 | 1788 | ||
| @@ -1672,7 +1819,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1672 | int ret; | 1819 | int ret; |
| 1673 | unsigned int front_len, middle_len, data_len; | 1820 | unsigned int front_len, middle_len, data_len; |
| 1674 | bool do_datacrc = !con->msgr->nocrc; | 1821 | bool do_datacrc = !con->msgr->nocrc; |
| 1675 | int skip; | ||
| 1676 | u64 seq; | 1822 | u64 seq; |
| 1677 | u32 crc; | 1823 | u32 crc; |
| 1678 | 1824 | ||
| @@ -1723,10 +1869,13 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1723 | 1869 | ||
| 1724 | /* allocate message? */ | 1870 | /* allocate message? */ |
| 1725 | if (!con->in_msg) { | 1871 | if (!con->in_msg) { |
| 1872 | int skip = 0; | ||
| 1873 | |||
| 1726 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, | 1874 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, |
| 1727 | con->in_hdr.front_len, con->in_hdr.data_len); | 1875 | con->in_hdr.front_len, con->in_hdr.data_len); |
| 1728 | skip = 0; | 1876 | ret = ceph_con_in_msg_alloc(con, &skip); |
| 1729 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | 1877 | if (ret < 0) |
| 1878 | return ret; | ||
| 1730 | if (skip) { | 1879 | if (skip) { |
| 1731 | /* skip this message */ | 1880 | /* skip this message */ |
| 1732 | dout("alloc_msg said skip message\n"); | 1881 | dout("alloc_msg said skip message\n"); |
| @@ -1737,11 +1886,9 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1737 | con->in_seq++; | 1886 | con->in_seq++; |
| 1738 | return 0; | 1887 | return 0; |
| 1739 | } | 1888 | } |
| 1740 | if (!con->in_msg) { | 1889 | |
| 1741 | con->error_msg = | 1890 | BUG_ON(!con->in_msg); |
| 1742 | "error allocating memory for incoming message"; | 1891 | BUG_ON(con->in_msg->con != con); |
| 1743 | return -ENOMEM; | ||
| 1744 | } | ||
| 1745 | m = con->in_msg; | 1892 | m = con->in_msg; |
| 1746 | m->front.iov_len = 0; /* haven't read it yet */ | 1893 | m->front.iov_len = 0; /* haven't read it yet */ |
| 1747 | if (m->middle) | 1894 | if (m->middle) |
| @@ -1753,6 +1900,11 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1753 | else | 1900 | else |
| 1754 | con->in_msg_pos.page_pos = 0; | 1901 | con->in_msg_pos.page_pos = 0; |
| 1755 | con->in_msg_pos.data_pos = 0; | 1902 | con->in_msg_pos.data_pos = 0; |
| 1903 | |||
| 1904 | #ifdef CONFIG_BLOCK | ||
| 1905 | if (m->bio) | ||
| 1906 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||
| 1907 | #endif | ||
| 1756 | } | 1908 | } |
| 1757 | 1909 | ||
| 1758 | /* front */ | 1910 | /* front */ |
| @@ -1769,10 +1921,6 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1769 | if (ret <= 0) | 1921 | if (ret <= 0) |
| 1770 | return ret; | 1922 | return ret; |
| 1771 | } | 1923 | } |
| 1772 | #ifdef CONFIG_BLOCK | ||
| 1773 | if (m->bio && !m->bio_iter) | ||
| 1774 | init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg); | ||
| 1775 | #endif | ||
| 1776 | 1924 | ||
| 1777 | /* (page) data */ | 1925 | /* (page) data */ |
| 1778 | while (con->in_msg_pos.data_pos < data_len) { | 1926 | while (con->in_msg_pos.data_pos < data_len) { |
| @@ -1783,7 +1931,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
| 1783 | return ret; | 1931 | return ret; |
| 1784 | #ifdef CONFIG_BLOCK | 1932 | #ifdef CONFIG_BLOCK |
| 1785 | } else if (m->bio) { | 1933 | } else if (m->bio) { |
| 1786 | 1934 | BUG_ON(!m->bio_iter); | |
| 1787 | ret = read_partial_message_bio(con, | 1935 | ret = read_partial_message_bio(con, |
| 1788 | &m->bio_iter, &m->bio_seg, | 1936 | &m->bio_iter, &m->bio_seg, |
| 1789 | data_len, do_datacrc); | 1937 | data_len, do_datacrc); |
| @@ -1837,8 +1985,11 @@ static void process_message(struct ceph_connection *con) | |||
| 1837 | { | 1985 | { |
| 1838 | struct ceph_msg *msg; | 1986 | struct ceph_msg *msg; |
| 1839 | 1987 | ||
| 1988 | BUG_ON(con->in_msg->con != con); | ||
| 1989 | con->in_msg->con = NULL; | ||
| 1840 | msg = con->in_msg; | 1990 | msg = con->in_msg; |
| 1841 | con->in_msg = NULL; | 1991 | con->in_msg = NULL; |
| 1992 | con->ops->put(con); | ||
| 1842 | 1993 | ||
| 1843 | /* if first message, set peer_name */ | 1994 | /* if first message, set peer_name */ |
| 1844 | if (con->peer_name.type == 0) | 1995 | if (con->peer_name.type == 0) |
| @@ -1858,7 +2009,6 @@ static void process_message(struct ceph_connection *con) | |||
| 1858 | con->ops->dispatch(con, msg); | 2009 | con->ops->dispatch(con, msg); |
| 1859 | 2010 | ||
| 1860 | mutex_lock(&con->mutex); | 2011 | mutex_lock(&con->mutex); |
| 1861 | prepare_read_tag(con); | ||
| 1862 | } | 2012 | } |
| 1863 | 2013 | ||
| 1864 | 2014 | ||
| @@ -1870,22 +2020,19 @@ static int try_write(struct ceph_connection *con) | |||
| 1870 | { | 2020 | { |
| 1871 | int ret = 1; | 2021 | int ret = 1; |
| 1872 | 2022 | ||
| 1873 | dout("try_write start %p state %lu nref %d\n", con, con->state, | 2023 | dout("try_write start %p state %lu\n", con, con->state); |
| 1874 | atomic_read(&con->nref)); | ||
| 1875 | 2024 | ||
| 1876 | more: | 2025 | more: |
| 1877 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | 2026 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); |
| 1878 | 2027 | ||
| 1879 | /* open the socket first? */ | 2028 | /* open the socket first? */ |
| 1880 | if (con->sock == NULL) { | 2029 | if (con->state == CON_STATE_PREOPEN) { |
| 1881 | ceph_con_out_kvec_reset(con); | 2030 | BUG_ON(con->sock); |
| 2031 | con->state = CON_STATE_CONNECTING; | ||
| 2032 | |||
| 2033 | con_out_kvec_reset(con); | ||
| 1882 | prepare_write_banner(con); | 2034 | prepare_write_banner(con); |
| 1883 | ret = prepare_write_connect(con); | ||
| 1884 | if (ret < 0) | ||
| 1885 | goto out; | ||
| 1886 | prepare_read_banner(con); | 2035 | prepare_read_banner(con); |
| 1887 | set_bit(CONNECTING, &con->state); | ||
| 1888 | clear_bit(NEGOTIATING, &con->state); | ||
| 1889 | 2036 | ||
| 1890 | BUG_ON(con->in_msg); | 2037 | BUG_ON(con->in_msg); |
| 1891 | con->in_tag = CEPH_MSGR_TAG_READY; | 2038 | con->in_tag = CEPH_MSGR_TAG_READY; |
| @@ -1932,7 +2079,7 @@ more_kvec: | |||
| 1932 | } | 2079 | } |
| 1933 | 2080 | ||
| 1934 | do_next: | 2081 | do_next: |
| 1935 | if (!test_bit(CONNECTING, &con->state)) { | 2082 | if (con->state == CON_STATE_OPEN) { |
| 1936 | /* is anything else pending? */ | 2083 | /* is anything else pending? */ |
| 1937 | if (!list_empty(&con->out_queue)) { | 2084 | if (!list_empty(&con->out_queue)) { |
| 1938 | prepare_write_message(con); | 2085 | prepare_write_message(con); |
| @@ -1942,14 +2089,15 @@ do_next: | |||
| 1942 | prepare_write_ack(con); | 2089 | prepare_write_ack(con); |
| 1943 | goto more; | 2090 | goto more; |
| 1944 | } | 2091 | } |
| 1945 | if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { | 2092 | if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, |
| 2093 | &con->flags)) { | ||
| 1946 | prepare_write_keepalive(con); | 2094 | prepare_write_keepalive(con); |
| 1947 | goto more; | 2095 | goto more; |
| 1948 | } | 2096 | } |
| 1949 | } | 2097 | } |
| 1950 | 2098 | ||
| 1951 | /* Nothing to do! */ | 2099 | /* Nothing to do! */ |
| 1952 | clear_bit(WRITE_PENDING, &con->state); | 2100 | clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 1953 | dout("try_write nothing else to write.\n"); | 2101 | dout("try_write nothing else to write.\n"); |
| 1954 | ret = 0; | 2102 | ret = 0; |
| 1955 | out: | 2103 | out: |
| @@ -1966,38 +2114,46 @@ static int try_read(struct ceph_connection *con) | |||
| 1966 | { | 2114 | { |
| 1967 | int ret = -1; | 2115 | int ret = -1; |
| 1968 | 2116 | ||
| 1969 | if (!con->sock) | 2117 | more: |
| 1970 | return 0; | 2118 | dout("try_read start on %p state %lu\n", con, con->state); |
| 1971 | 2119 | if (con->state != CON_STATE_CONNECTING && | |
| 1972 | if (test_bit(STANDBY, &con->state)) | 2120 | con->state != CON_STATE_NEGOTIATING && |
| 2121 | con->state != CON_STATE_OPEN) | ||
| 1973 | return 0; | 2122 | return 0; |
| 1974 | 2123 | ||
| 1975 | dout("try_read start on %p\n", con); | 2124 | BUG_ON(!con->sock); |
| 1976 | 2125 | ||
| 1977 | more: | ||
| 1978 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | 2126 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, |
| 1979 | con->in_base_pos); | 2127 | con->in_base_pos); |
| 1980 | 2128 | ||
| 1981 | /* | 2129 | if (con->state == CON_STATE_CONNECTING) { |
| 1982 | * process_connect and process_message drop and re-take | 2130 | dout("try_read connecting\n"); |
| 1983 | * con->mutex. make sure we handle a racing close or reopen. | 2131 | ret = read_partial_banner(con); |
| 1984 | */ | 2132 | if (ret <= 0) |
| 1985 | if (test_bit(CLOSED, &con->state) || | 2133 | goto out; |
| 1986 | test_bit(OPENING, &con->state)) { | 2134 | ret = process_banner(con); |
| 1987 | ret = -EAGAIN; | 2135 | if (ret < 0) |
| 2136 | goto out; | ||
| 2137 | |||
| 2138 | BUG_ON(con->state != CON_STATE_CONNECTING); | ||
| 2139 | con->state = CON_STATE_NEGOTIATING; | ||
| 2140 | |||
| 2141 | /* | ||
| 2142 | * Received banner is good, exchange connection info. | ||
| 2143 | * Do not reset out_kvec, as sending our banner raced | ||
| 2144 | * with receiving peer banner after connect completed. | ||
| 2145 | */ | ||
| 2146 | ret = prepare_write_connect(con); | ||
| 2147 | if (ret < 0) | ||
| 2148 | goto out; | ||
| 2149 | prepare_read_connect(con); | ||
| 2150 | |||
| 2151 | /* Send connection info before awaiting response */ | ||
| 1988 | goto out; | 2152 | goto out; |
| 1989 | } | 2153 | } |
| 1990 | 2154 | ||
| 1991 | if (test_bit(CONNECTING, &con->state)) { | 2155 | if (con->state == CON_STATE_NEGOTIATING) { |
| 1992 | if (!test_bit(NEGOTIATING, &con->state)) { | 2156 | dout("try_read negotiating\n"); |
| 1993 | dout("try_read connecting\n"); | ||
| 1994 | ret = read_partial_banner(con); | ||
| 1995 | if (ret <= 0) | ||
| 1996 | goto out; | ||
| 1997 | ret = process_banner(con); | ||
| 1998 | if (ret < 0) | ||
| 1999 | goto out; | ||
| 2000 | } | ||
| 2001 | ret = read_partial_connect(con); | 2157 | ret = read_partial_connect(con); |
| 2002 | if (ret <= 0) | 2158 | if (ret <= 0) |
| 2003 | goto out; | 2159 | goto out; |
| @@ -2007,6 +2163,8 @@ more: | |||
| 2007 | goto more; | 2163 | goto more; |
| 2008 | } | 2164 | } |
| 2009 | 2165 | ||
| 2166 | BUG_ON(con->state != CON_STATE_OPEN); | ||
| 2167 | |||
| 2010 | if (con->in_base_pos < 0) { | 2168 | if (con->in_base_pos < 0) { |
| 2011 | /* | 2169 | /* |
| 2012 | * skipping + discarding content. | 2170 | * skipping + discarding content. |
| @@ -2040,7 +2198,8 @@ more: | |||
| 2040 | prepare_read_ack(con); | 2198 | prepare_read_ack(con); |
| 2041 | break; | 2199 | break; |
| 2042 | case CEPH_MSGR_TAG_CLOSE: | 2200 | case CEPH_MSGR_TAG_CLOSE: |
| 2043 | set_bit(CLOSED, &con->state); /* fixme */ | 2201 | con_close_socket(con); |
| 2202 | con->state = CON_STATE_CLOSED; | ||
| 2044 | goto out; | 2203 | goto out; |
| 2045 | default: | 2204 | default: |
| 2046 | goto bad_tag; | 2205 | goto bad_tag; |
| @@ -2063,6 +2222,8 @@ more: | |||
| 2063 | if (con->in_tag == CEPH_MSGR_TAG_READY) | 2222 | if (con->in_tag == CEPH_MSGR_TAG_READY) |
| 2064 | goto more; | 2223 | goto more; |
| 2065 | process_message(con); | 2224 | process_message(con); |
| 2225 | if (con->state == CON_STATE_OPEN) | ||
| 2226 | prepare_read_tag(con); | ||
| 2066 | goto more; | 2227 | goto more; |
| 2067 | } | 2228 | } |
| 2068 | if (con->in_tag == CEPH_MSGR_TAG_ACK) { | 2229 | if (con->in_tag == CEPH_MSGR_TAG_ACK) { |
| @@ -2091,12 +2252,6 @@ bad_tag: | |||
| 2091 | */ | 2252 | */ |
| 2092 | static void queue_con(struct ceph_connection *con) | 2253 | static void queue_con(struct ceph_connection *con) |
| 2093 | { | 2254 | { |
| 2094 | if (test_bit(DEAD, &con->state)) { | ||
| 2095 | dout("queue_con %p ignoring: DEAD\n", | ||
| 2096 | con); | ||
| 2097 | return; | ||
| 2098 | } | ||
| 2099 | |||
| 2100 | if (!con->ops->get(con)) { | 2255 | if (!con->ops->get(con)) { |
| 2101 | dout("queue_con %p ref count 0\n", con); | 2256 | dout("queue_con %p ref count 0\n", con); |
| 2102 | return; | 2257 | return; |
| @@ -2121,7 +2276,26 @@ static void con_work(struct work_struct *work) | |||
| 2121 | 2276 | ||
| 2122 | mutex_lock(&con->mutex); | 2277 | mutex_lock(&con->mutex); |
| 2123 | restart: | 2278 | restart: |
| 2124 | if (test_and_clear_bit(BACKOFF, &con->state)) { | 2279 | if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) { |
| 2280 | switch (con->state) { | ||
| 2281 | case CON_STATE_CONNECTING: | ||
| 2282 | con->error_msg = "connection failed"; | ||
| 2283 | break; | ||
| 2284 | case CON_STATE_NEGOTIATING: | ||
| 2285 | con->error_msg = "negotiation failed"; | ||
| 2286 | break; | ||
| 2287 | case CON_STATE_OPEN: | ||
| 2288 | con->error_msg = "socket closed"; | ||
| 2289 | break; | ||
| 2290 | default: | ||
| 2291 | dout("unrecognized con state %d\n", (int)con->state); | ||
| 2292 | con->error_msg = "unrecognized con state"; | ||
| 2293 | BUG(); | ||
| 2294 | } | ||
| 2295 | goto fault; | ||
| 2296 | } | ||
| 2297 | |||
| 2298 | if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { | ||
| 2125 | dout("con_work %p backing off\n", con); | 2299 | dout("con_work %p backing off\n", con); |
| 2126 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | 2300 | if (queue_delayed_work(ceph_msgr_wq, &con->work, |
| 2127 | round_jiffies_relative(con->delay))) { | 2301 | round_jiffies_relative(con->delay))) { |
| @@ -2135,35 +2309,35 @@ restart: | |||
| 2135 | } | 2309 | } |
| 2136 | } | 2310 | } |
| 2137 | 2311 | ||
| 2138 | if (test_bit(STANDBY, &con->state)) { | 2312 | if (con->state == CON_STATE_STANDBY) { |
| 2139 | dout("con_work %p STANDBY\n", con); | 2313 | dout("con_work %p STANDBY\n", con); |
| 2140 | goto done; | 2314 | goto done; |
| 2141 | } | 2315 | } |
| 2142 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | 2316 | if (con->state == CON_STATE_CLOSED) { |
| 2143 | dout("con_work CLOSED\n"); | 2317 | dout("con_work %p CLOSED\n", con); |
| 2144 | con_close_socket(con); | 2318 | BUG_ON(con->sock); |
| 2145 | goto done; | 2319 | goto done; |
| 2146 | } | 2320 | } |
| 2147 | if (test_and_clear_bit(OPENING, &con->state)) { | 2321 | if (con->state == CON_STATE_PREOPEN) { |
| 2148 | /* reopen w/ new peer */ | ||
| 2149 | dout("con_work OPENING\n"); | 2322 | dout("con_work OPENING\n"); |
| 2150 | con_close_socket(con); | 2323 | BUG_ON(con->sock); |
| 2151 | } | 2324 | } |
| 2152 | 2325 | ||
| 2153 | if (test_and_clear_bit(SOCK_CLOSED, &con->state)) | ||
| 2154 | goto fault; | ||
| 2155 | |||
| 2156 | ret = try_read(con); | 2326 | ret = try_read(con); |
| 2157 | if (ret == -EAGAIN) | 2327 | if (ret == -EAGAIN) |
| 2158 | goto restart; | 2328 | goto restart; |
| 2159 | if (ret < 0) | 2329 | if (ret < 0) { |
| 2330 | con->error_msg = "socket error on read"; | ||
| 2160 | goto fault; | 2331 | goto fault; |
| 2332 | } | ||
| 2161 | 2333 | ||
| 2162 | ret = try_write(con); | 2334 | ret = try_write(con); |
| 2163 | if (ret == -EAGAIN) | 2335 | if (ret == -EAGAIN) |
| 2164 | goto restart; | 2336 | goto restart; |
| 2165 | if (ret < 0) | 2337 | if (ret < 0) { |
| 2338 | con->error_msg = "socket error on write"; | ||
| 2166 | goto fault; | 2339 | goto fault; |
| 2340 | } | ||
| 2167 | 2341 | ||
| 2168 | done: | 2342 | done: |
| 2169 | mutex_unlock(&con->mutex); | 2343 | mutex_unlock(&con->mutex); |
| @@ -2172,7 +2346,6 @@ done_unlocked: | |||
| 2172 | return; | 2346 | return; |
| 2173 | 2347 | ||
| 2174 | fault: | 2348 | fault: |
| 2175 | mutex_unlock(&con->mutex); | ||
| 2176 | ceph_fault(con); /* error/fault path */ | 2349 | ceph_fault(con); /* error/fault path */ |
| 2177 | goto done_unlocked; | 2350 | goto done_unlocked; |
| 2178 | } | 2351 | } |
| @@ -2183,26 +2356,31 @@ fault: | |||
| 2183 | * exponential backoff | 2356 | * exponential backoff |
| 2184 | */ | 2357 | */ |
| 2185 | static void ceph_fault(struct ceph_connection *con) | 2358 | static void ceph_fault(struct ceph_connection *con) |
| 2359 | __releases(con->mutex) | ||
| 2186 | { | 2360 | { |
| 2187 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | 2361 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), |
| 2188 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); | 2362 | ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); |
| 2189 | dout("fault %p state %lu to peer %s\n", | 2363 | dout("fault %p state %lu to peer %s\n", |
| 2190 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); | 2364 | con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); |
| 2191 | 2365 | ||
| 2192 | if (test_bit(LOSSYTX, &con->state)) { | 2366 | BUG_ON(con->state != CON_STATE_CONNECTING && |
| 2193 | dout("fault on LOSSYTX channel\n"); | 2367 | con->state != CON_STATE_NEGOTIATING && |
| 2194 | goto out; | 2368 | con->state != CON_STATE_OPEN); |
| 2195 | } | ||
| 2196 | |||
| 2197 | mutex_lock(&con->mutex); | ||
| 2198 | if (test_bit(CLOSED, &con->state)) | ||
| 2199 | goto out_unlock; | ||
| 2200 | 2369 | ||
| 2201 | con_close_socket(con); | 2370 | con_close_socket(con); |
| 2202 | 2371 | ||
| 2372 | if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { | ||
| 2373 | dout("fault on LOSSYTX channel, marking CLOSED\n"); | ||
| 2374 | con->state = CON_STATE_CLOSED; | ||
| 2375 | goto out_unlock; | ||
| 2376 | } | ||
| 2377 | |||
| 2203 | if (con->in_msg) { | 2378 | if (con->in_msg) { |
| 2379 | BUG_ON(con->in_msg->con != con); | ||
| 2380 | con->in_msg->con = NULL; | ||
| 2204 | ceph_msg_put(con->in_msg); | 2381 | ceph_msg_put(con->in_msg); |
| 2205 | con->in_msg = NULL; | 2382 | con->in_msg = NULL; |
| 2383 | con->ops->put(con); | ||
| 2206 | } | 2384 | } |
| 2207 | 2385 | ||
| 2208 | /* Requeue anything that hasn't been acked */ | 2386 | /* Requeue anything that hasn't been acked */ |
| @@ -2211,12 +2389,13 @@ static void ceph_fault(struct ceph_connection *con) | |||
| 2211 | /* If there are no messages queued or keepalive pending, place | 2389 | /* If there are no messages queued or keepalive pending, place |
| 2212 | * the connection in a STANDBY state */ | 2390 | * the connection in a STANDBY state */ |
| 2213 | if (list_empty(&con->out_queue) && | 2391 | if (list_empty(&con->out_queue) && |
| 2214 | !test_bit(KEEPALIVE_PENDING, &con->state)) { | 2392 | !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { |
| 2215 | dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); | 2393 | dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); |
| 2216 | clear_bit(WRITE_PENDING, &con->state); | 2394 | clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); |
| 2217 | set_bit(STANDBY, &con->state); | 2395 | con->state = CON_STATE_STANDBY; |
| 2218 | } else { | 2396 | } else { |
| 2219 | /* retry after a delay. */ | 2397 | /* retry after a delay. */ |
| 2398 | con->state = CON_STATE_PREOPEN; | ||
| 2220 | if (con->delay == 0) | 2399 | if (con->delay == 0) |
| 2221 | con->delay = BASE_DELAY_INTERVAL; | 2400 | con->delay = BASE_DELAY_INTERVAL; |
| 2222 | else if (con->delay < MAX_DELAY_INTERVAL) | 2401 | else if (con->delay < MAX_DELAY_INTERVAL) |
| @@ -2237,13 +2416,12 @@ static void ceph_fault(struct ceph_connection *con) | |||
| 2237 | * that when con_work restarts we schedule the | 2416 | * that when con_work restarts we schedule the |
| 2238 | * delay then. | 2417 | * delay then. |
| 2239 | */ | 2418 | */ |
| 2240 | set_bit(BACKOFF, &con->state); | 2419 | set_bit(CON_FLAG_BACKOFF, &con->flags); |
| 2241 | } | 2420 | } |
| 2242 | } | 2421 | } |
| 2243 | 2422 | ||
| 2244 | out_unlock: | 2423 | out_unlock: |
| 2245 | mutex_unlock(&con->mutex); | 2424 | mutex_unlock(&con->mutex); |
| 2246 | out: | ||
| 2247 | /* | 2425 | /* |
| 2248 | * in case we faulted due to authentication, invalidate our | 2426 | * in case we faulted due to authentication, invalidate our |
| 2249 | * current tickets so that we can get new ones. | 2427 | * current tickets so that we can get new ones. |
| @@ -2260,18 +2438,14 @@ out: | |||
| 2260 | 2438 | ||
| 2261 | 2439 | ||
| 2262 | /* | 2440 | /* |
| 2263 | * create a new messenger instance | 2441 | * initialize a new messenger instance |
| 2264 | */ | 2442 | */ |
| 2265 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | 2443 | void ceph_messenger_init(struct ceph_messenger *msgr, |
| 2266 | u32 supported_features, | 2444 | struct ceph_entity_addr *myaddr, |
| 2267 | u32 required_features) | 2445 | u32 supported_features, |
| 2446 | u32 required_features, | ||
| 2447 | bool nocrc) | ||
| 2268 | { | 2448 | { |
| 2269 | struct ceph_messenger *msgr; | ||
| 2270 | |||
| 2271 | msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); | ||
| 2272 | if (msgr == NULL) | ||
| 2273 | return ERR_PTR(-ENOMEM); | ||
| 2274 | |||
| 2275 | msgr->supported_features = supported_features; | 2449 | msgr->supported_features = supported_features; |
| 2276 | msgr->required_features = required_features; | 2450 | msgr->required_features = required_features; |
| 2277 | 2451 | ||
| @@ -2284,30 +2458,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr, | |||
| 2284 | msgr->inst.addr.type = 0; | 2458 | msgr->inst.addr.type = 0; |
| 2285 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); | 2459 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); |
| 2286 | encode_my_addr(msgr); | 2460 | encode_my_addr(msgr); |
| 2461 | msgr->nocrc = nocrc; | ||
| 2287 | 2462 | ||
| 2288 | dout("messenger_create %p\n", msgr); | 2463 | atomic_set(&msgr->stopping, 0); |
| 2289 | return msgr; | ||
| 2290 | } | ||
| 2291 | EXPORT_SYMBOL(ceph_messenger_create); | ||
| 2292 | 2464 | ||
| 2293 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | 2465 | dout("%s %p\n", __func__, msgr); |
| 2294 | { | ||
| 2295 | dout("destroy %p\n", msgr); | ||
| 2296 | kfree(msgr); | ||
| 2297 | dout("destroyed messenger %p\n", msgr); | ||
| 2298 | } | 2466 | } |
| 2299 | EXPORT_SYMBOL(ceph_messenger_destroy); | 2467 | EXPORT_SYMBOL(ceph_messenger_init); |
| 2300 | 2468 | ||
| 2301 | static void clear_standby(struct ceph_connection *con) | 2469 | static void clear_standby(struct ceph_connection *con) |
| 2302 | { | 2470 | { |
| 2303 | /* come back from STANDBY? */ | 2471 | /* come back from STANDBY? */ |
| 2304 | if (test_and_clear_bit(STANDBY, &con->state)) { | 2472 | if (con->state == CON_STATE_STANDBY) { |
| 2305 | mutex_lock(&con->mutex); | ||
| 2306 | dout("clear_standby %p and ++connect_seq\n", con); | 2473 | dout("clear_standby %p and ++connect_seq\n", con); |
| 2474 | con->state = CON_STATE_PREOPEN; | ||
| 2307 | con->connect_seq++; | 2475 | con->connect_seq++; |
| 2308 | WARN_ON(test_bit(WRITE_PENDING, &con->state)); | 2476 | WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); |
| 2309 | WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state)); | 2477 | WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); |
| 2310 | mutex_unlock(&con->mutex); | ||
| 2311 | } | 2478 | } |
| 2312 | } | 2479 | } |
| 2313 | 2480 | ||
| @@ -2316,21 +2483,24 @@ static void clear_standby(struct ceph_connection *con) | |||
| 2316 | */ | 2483 | */ |
| 2317 | void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | 2484 | void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) |
| 2318 | { | 2485 | { |
| 2319 | if (test_bit(CLOSED, &con->state)) { | ||
| 2320 | dout("con_send %p closed, dropping %p\n", con, msg); | ||
| 2321 | ceph_msg_put(msg); | ||
| 2322 | return; | ||
| 2323 | } | ||
| 2324 | |||
| 2325 | /* set src+dst */ | 2486 | /* set src+dst */ |
| 2326 | msg->hdr.src = con->msgr->inst.name; | 2487 | msg->hdr.src = con->msgr->inst.name; |
| 2327 | |||
| 2328 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | 2488 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); |
| 2329 | |||
| 2330 | msg->needs_out_seq = true; | 2489 | msg->needs_out_seq = true; |
| 2331 | 2490 | ||
| 2332 | /* queue */ | ||
| 2333 | mutex_lock(&con->mutex); | 2491 | mutex_lock(&con->mutex); |
| 2492 | |||
| 2493 | if (con->state == CON_STATE_CLOSED) { | ||
| 2494 | dout("con_send %p closed, dropping %p\n", con, msg); | ||
| 2495 | ceph_msg_put(msg); | ||
| 2496 | mutex_unlock(&con->mutex); | ||
| 2497 | return; | ||
| 2498 | } | ||
| 2499 | |||
| 2500 | BUG_ON(msg->con != NULL); | ||
| 2501 | msg->con = con->ops->get(con); | ||
| 2502 | BUG_ON(msg->con == NULL); | ||
| 2503 | |||
| 2334 | BUG_ON(!list_empty(&msg->list_head)); | 2504 | BUG_ON(!list_empty(&msg->list_head)); |
| 2335 | list_add_tail(&msg->list_head, &con->out_queue); | 2505 | list_add_tail(&msg->list_head, &con->out_queue); |
| 2336 | dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, | 2506 | dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, |
| @@ -2339,12 +2509,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2339 | le32_to_cpu(msg->hdr.front_len), | 2509 | le32_to_cpu(msg->hdr.front_len), |
| 2340 | le32_to_cpu(msg->hdr.middle_len), | 2510 | le32_to_cpu(msg->hdr.middle_len), |
| 2341 | le32_to_cpu(msg->hdr.data_len)); | 2511 | le32_to_cpu(msg->hdr.data_len)); |
| 2512 | |||
| 2513 | clear_standby(con); | ||
| 2342 | mutex_unlock(&con->mutex); | 2514 | mutex_unlock(&con->mutex); |
| 2343 | 2515 | ||
| 2344 | /* if there wasn't anything waiting to send before, queue | 2516 | /* if there wasn't anything waiting to send before, queue |
| 2345 | * new work */ | 2517 | * new work */ |
| 2346 | clear_standby(con); | 2518 | if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) |
| 2347 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | ||
| 2348 | queue_con(con); | 2519 | queue_con(con); |
| 2349 | } | 2520 | } |
| 2350 | EXPORT_SYMBOL(ceph_con_send); | 2521 | EXPORT_SYMBOL(ceph_con_send); |
| @@ -2352,24 +2523,34 @@ EXPORT_SYMBOL(ceph_con_send); | |||
| 2352 | /* | 2523 | /* |
| 2353 | * Revoke a message that was previously queued for send | 2524 | * Revoke a message that was previously queued for send |
| 2354 | */ | 2525 | */ |
| 2355 | void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) | 2526 | void ceph_msg_revoke(struct ceph_msg *msg) |
| 2356 | { | 2527 | { |
| 2528 | struct ceph_connection *con = msg->con; | ||
| 2529 | |||
| 2530 | if (!con) | ||
| 2531 | return; /* Message not in our possession */ | ||
| 2532 | |||
| 2357 | mutex_lock(&con->mutex); | 2533 | mutex_lock(&con->mutex); |
| 2358 | if (!list_empty(&msg->list_head)) { | 2534 | if (!list_empty(&msg->list_head)) { |
| 2359 | dout("con_revoke %p msg %p - was on queue\n", con, msg); | 2535 | dout("%s %p msg %p - was on queue\n", __func__, con, msg); |
| 2360 | list_del_init(&msg->list_head); | 2536 | list_del_init(&msg->list_head); |
| 2361 | ceph_msg_put(msg); | 2537 | BUG_ON(msg->con == NULL); |
| 2538 | msg->con->ops->put(msg->con); | ||
| 2539 | msg->con = NULL; | ||
| 2362 | msg->hdr.seq = 0; | 2540 | msg->hdr.seq = 0; |
| 2541 | |||
| 2542 | ceph_msg_put(msg); | ||
| 2363 | } | 2543 | } |
| 2364 | if (con->out_msg == msg) { | 2544 | if (con->out_msg == msg) { |
| 2365 | dout("con_revoke %p msg %p - was sending\n", con, msg); | 2545 | dout("%s %p msg %p - was sending\n", __func__, con, msg); |
| 2366 | con->out_msg = NULL; | 2546 | con->out_msg = NULL; |
| 2367 | if (con->out_kvec_is_msg) { | 2547 | if (con->out_kvec_is_msg) { |
| 2368 | con->out_skip = con->out_kvec_bytes; | 2548 | con->out_skip = con->out_kvec_bytes; |
| 2369 | con->out_kvec_is_msg = false; | 2549 | con->out_kvec_is_msg = false; |
| 2370 | } | 2550 | } |
| 2371 | ceph_msg_put(msg); | ||
| 2372 | msg->hdr.seq = 0; | 2551 | msg->hdr.seq = 0; |
| 2552 | |||
| 2553 | ceph_msg_put(msg); | ||
| 2373 | } | 2554 | } |
| 2374 | mutex_unlock(&con->mutex); | 2555 | mutex_unlock(&con->mutex); |
| 2375 | } | 2556 | } |
| @@ -2377,17 +2558,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2377 | /* | 2558 | /* |
| 2378 | * Revoke a message that we may be reading data into | 2559 | * Revoke a message that we may be reading data into |
| 2379 | */ | 2560 | */ |
| 2380 | void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | 2561 | void ceph_msg_revoke_incoming(struct ceph_msg *msg) |
| 2381 | { | 2562 | { |
| 2563 | struct ceph_connection *con; | ||
| 2564 | |||
| 2565 | BUG_ON(msg == NULL); | ||
| 2566 | if (!msg->con) { | ||
| 2567 | dout("%s msg %p null con\n", __func__, msg); | ||
| 2568 | |||
| 2569 | return; /* Message not in our possession */ | ||
| 2570 | } | ||
| 2571 | |||
| 2572 | con = msg->con; | ||
| 2382 | mutex_lock(&con->mutex); | 2573 | mutex_lock(&con->mutex); |
| 2383 | if (con->in_msg && con->in_msg == msg) { | 2574 | if (con->in_msg == msg) { |
| 2384 | unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); | 2575 | unsigned int front_len = le32_to_cpu(con->in_hdr.front_len); |
| 2385 | unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len); | 2576 | unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len); |
| 2386 | unsigned int data_len = le32_to_cpu(con->in_hdr.data_len); | 2577 | unsigned int data_len = le32_to_cpu(con->in_hdr.data_len); |
| 2387 | 2578 | ||
| 2388 | /* skip rest of message */ | 2579 | /* skip rest of message */ |
| 2389 | dout("con_revoke_pages %p msg %p revoked\n", con, msg); | 2580 | dout("%s %p msg %p revoked\n", __func__, con, msg); |
| 2390 | con->in_base_pos = con->in_base_pos - | 2581 | con->in_base_pos = con->in_base_pos - |
| 2391 | sizeof(struct ceph_msg_header) - | 2582 | sizeof(struct ceph_msg_header) - |
| 2392 | front_len - | 2583 | front_len - |
| 2393 | middle_len - | 2584 | middle_len - |
| @@ -2398,8 +2589,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2398 | con->in_tag = CEPH_MSGR_TAG_READY; | 2589 | con->in_tag = CEPH_MSGR_TAG_READY; |
| 2399 | con->in_seq++; | 2590 | con->in_seq++; |
| 2400 | } else { | 2591 | } else { |
| 2401 | dout("con_revoke_pages %p msg %p pages %p no-op\n", | 2592 | dout("%s %p in_msg %p msg %p no-op\n", |
| 2402 | con, con->in_msg, msg); | 2593 | __func__, con, con->in_msg, msg); |
| 2403 | } | 2594 | } |
| 2404 | mutex_unlock(&con->mutex); | 2595 | mutex_unlock(&con->mutex); |
| 2405 | } | 2596 | } |
| @@ -2410,9 +2601,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2410 | void ceph_con_keepalive(struct ceph_connection *con) | 2601 | void ceph_con_keepalive(struct ceph_connection *con) |
| 2411 | { | 2602 | { |
| 2412 | dout("con_keepalive %p\n", con); | 2603 | dout("con_keepalive %p\n", con); |
| 2604 | mutex_lock(&con->mutex); | ||
| 2413 | clear_standby(con); | 2605 | clear_standby(con); |
| 2414 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && | 2606 | mutex_unlock(&con->mutex); |
| 2415 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | 2607 | if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && |
| 2608 | test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) | ||
| 2416 | queue_con(con); | 2609 | queue_con(con); |
| 2417 | } | 2610 | } |
| 2418 | EXPORT_SYMBOL(ceph_con_keepalive); | 2611 | EXPORT_SYMBOL(ceph_con_keepalive); |
| @@ -2431,6 +2624,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, | |||
| 2431 | if (m == NULL) | 2624 | if (m == NULL) |
| 2432 | goto out; | 2625 | goto out; |
| 2433 | kref_init(&m->kref); | 2626 | kref_init(&m->kref); |
| 2627 | |||
| 2628 | m->con = NULL; | ||
| 2434 | INIT_LIST_HEAD(&m->list_head); | 2629 | INIT_LIST_HEAD(&m->list_head); |
| 2435 | 2630 | ||
| 2436 | m->hdr.tid = 0; | 2631 | m->hdr.tid = 0; |
| @@ -2526,46 +2721,77 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) | |||
| 2526 | } | 2721 | } |
| 2527 | 2722 | ||
| 2528 | /* | 2723 | /* |
| 2529 | * Generic message allocator, for incoming messages. | 2724 | * Allocate a message for receiving an incoming message on a |
| 2725 | * connection, and save the result in con->in_msg. Uses the | ||
| 2726 | * connection's private alloc_msg op if available. | ||
| 2727 | * | ||
| 2728 | * Returns 0 on success, or a negative error code. | ||
| 2729 | * | ||
| 2730 | * On success, if we set *skip = 1: | ||
| 2731 | * - the next message should be skipped and ignored. | ||
| 2732 | * - con->in_msg == NULL | ||
| 2733 | * or if we set *skip = 0: | ||
| 2734 | * - con->in_msg is non-null. | ||
| 2735 | * On error (ENOMEM, EAGAIN, ...), | ||
| 2736 | * - con->in_msg == NULL | ||
| 2530 | */ | 2737 | */ |
| 2531 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | 2738 | static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) |
| 2532 | struct ceph_msg_header *hdr, | ||
| 2533 | int *skip) | ||
| 2534 | { | 2739 | { |
| 2740 | struct ceph_msg_header *hdr = &con->in_hdr; | ||
| 2535 | int type = le16_to_cpu(hdr->type); | 2741 | int type = le16_to_cpu(hdr->type); |
| 2536 | int front_len = le32_to_cpu(hdr->front_len); | 2742 | int front_len = le32_to_cpu(hdr->front_len); |
| 2537 | int middle_len = le32_to_cpu(hdr->middle_len); | 2743 | int middle_len = le32_to_cpu(hdr->middle_len); |
| 2538 | struct ceph_msg *msg = NULL; | 2744 | int ret = 0; |
| 2539 | int ret; | 2745 | |
| 2746 | BUG_ON(con->in_msg != NULL); | ||
| 2540 | 2747 | ||
| 2541 | if (con->ops->alloc_msg) { | 2748 | if (con->ops->alloc_msg) { |
| 2749 | struct ceph_msg *msg; | ||
| 2750 | |||
| 2542 | mutex_unlock(&con->mutex); | 2751 | mutex_unlock(&con->mutex); |
| 2543 | msg = con->ops->alloc_msg(con, hdr, skip); | 2752 | msg = con->ops->alloc_msg(con, hdr, skip); |
| 2544 | mutex_lock(&con->mutex); | 2753 | mutex_lock(&con->mutex); |
| 2545 | if (!msg || *skip) | 2754 | if (con->state != CON_STATE_OPEN) { |
| 2546 | return NULL; | 2755 | ceph_msg_put(msg); |
| 2756 | return -EAGAIN; | ||
| 2757 | } | ||
| 2758 | con->in_msg = msg; | ||
| 2759 | if (con->in_msg) { | ||
| 2760 | con->in_msg->con = con->ops->get(con); | ||
| 2761 | BUG_ON(con->in_msg->con == NULL); | ||
| 2762 | } | ||
| 2763 | if (*skip) { | ||
| 2764 | con->in_msg = NULL; | ||
| 2765 | return 0; | ||
| 2766 | } | ||
| 2767 | if (!con->in_msg) { | ||
| 2768 | con->error_msg = | ||
| 2769 | "error allocating memory for incoming message"; | ||
| 2770 | return -ENOMEM; | ||
| 2771 | } | ||
| 2547 | } | 2772 | } |
| 2548 | if (!msg) { | 2773 | if (!con->in_msg) { |
| 2549 | *skip = 0; | 2774 | con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); |
| 2550 | msg = ceph_msg_new(type, front_len, GFP_NOFS, false); | 2775 | if (!con->in_msg) { |
| 2551 | if (!msg) { | ||
| 2552 | pr_err("unable to allocate msg type %d len %d\n", | 2776 | pr_err("unable to allocate msg type %d len %d\n", |
| 2553 | type, front_len); | 2777 | type, front_len); |
| 2554 | return NULL; | 2778 | return -ENOMEM; |
| 2555 | } | 2779 | } |
| 2556 | msg->page_alignment = le16_to_cpu(hdr->data_off); | 2780 | con->in_msg->con = con->ops->get(con); |
| 2781 | BUG_ON(con->in_msg->con == NULL); | ||
| 2782 | con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); | ||
| 2557 | } | 2783 | } |
| 2558 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2784 | memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
| 2559 | 2785 | ||
| 2560 | if (middle_len && !msg->middle) { | 2786 | if (middle_len && !con->in_msg->middle) { |
| 2561 | ret = ceph_alloc_middle(con, msg); | 2787 | ret = ceph_alloc_middle(con, con->in_msg); |
| 2562 | if (ret < 0) { | 2788 | if (ret < 0) { |
| 2563 | ceph_msg_put(msg); | 2789 | ceph_msg_put(con->in_msg); |
| 2564 | return NULL; | 2790 | con->in_msg = NULL; |
| 2565 | } | 2791 | } |
| 2566 | } | 2792 | } |
| 2567 | 2793 | ||
| 2568 | return msg; | 2794 | return ret; |
| 2569 | } | 2795 | } |
| 2570 | 2796 | ||
| 2571 | 2797 | ||
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index d0649a9655be..900ea0f043fc 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
| @@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
| 106 | monc->pending_auth = 1; | 106 | monc->pending_auth = 1; |
| 107 | monc->m_auth->front.iov_len = len; | 107 | monc->m_auth->front.iov_len = len; |
| 108 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | 108 | monc->m_auth->hdr.front_len = cpu_to_le32(len); |
| 109 | ceph_con_revoke(monc->con, monc->m_auth); | 109 | ceph_msg_revoke(monc->m_auth); |
| 110 | ceph_msg_get(monc->m_auth); /* keep our ref */ | 110 | ceph_msg_get(monc->m_auth); /* keep our ref */ |
| 111 | ceph_con_send(monc->con, monc->m_auth); | 111 | ceph_con_send(&monc->con, monc->m_auth); |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | /* | 114 | /* |
| @@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | |||
| 117 | static void __close_session(struct ceph_mon_client *monc) | 117 | static void __close_session(struct ceph_mon_client *monc) |
| 118 | { | 118 | { |
| 119 | dout("__close_session closing mon%d\n", monc->cur_mon); | 119 | dout("__close_session closing mon%d\n", monc->cur_mon); |
| 120 | ceph_con_revoke(monc->con, monc->m_auth); | 120 | ceph_msg_revoke(monc->m_auth); |
| 121 | ceph_con_close(monc->con); | 121 | ceph_msg_revoke_incoming(monc->m_auth_reply); |
| 122 | ceph_msg_revoke(monc->m_subscribe); | ||
| 123 | ceph_msg_revoke_incoming(monc->m_subscribe_ack); | ||
| 124 | ceph_con_close(&monc->con); | ||
| 122 | monc->cur_mon = -1; | 125 | monc->cur_mon = -1; |
| 123 | monc->pending_auth = 0; | 126 | monc->pending_auth = 0; |
| 124 | ceph_auth_reset(monc->auth); | 127 | ceph_auth_reset(monc->auth); |
| @@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc) | |||
| 142 | monc->want_next_osdmap = !!monc->want_next_osdmap; | 145 | monc->want_next_osdmap = !!monc->want_next_osdmap; |
| 143 | 146 | ||
| 144 | dout("open_session mon%d opening\n", monc->cur_mon); | 147 | dout("open_session mon%d opening\n", monc->cur_mon); |
| 145 | monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON; | 148 | ceph_con_open(&monc->con, |
| 146 | monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); | 149 | CEPH_ENTITY_TYPE_MON, monc->cur_mon, |
| 147 | ceph_con_open(monc->con, | ||
| 148 | &monc->monmap->mon_inst[monc->cur_mon].addr); | 150 | &monc->monmap->mon_inst[monc->cur_mon].addr); |
| 149 | 151 | ||
| 150 | /* initiatiate authentication handshake */ | 152 | /* initiatiate authentication handshake */ |
| @@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc) | |||
| 226 | 228 | ||
| 227 | msg->front.iov_len = p - msg->front.iov_base; | 229 | msg->front.iov_len = p - msg->front.iov_base; |
| 228 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | 230 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); |
| 229 | ceph_con_revoke(monc->con, msg); | 231 | ceph_msg_revoke(msg); |
| 230 | ceph_con_send(monc->con, ceph_msg_get(msg)); | 232 | ceph_con_send(&monc->con, ceph_msg_get(msg)); |
| 231 | 233 | ||
| 232 | monc->sub_sent = jiffies | 1; /* never 0 */ | 234 | monc->sub_sent = jiffies | 1; /* never 0 */ |
| 233 | } | 235 | } |
| @@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc, | |||
| 247 | if (monc->hunting) { | 249 | if (monc->hunting) { |
| 248 | pr_info("mon%d %s session established\n", | 250 | pr_info("mon%d %s session established\n", |
| 249 | monc->cur_mon, | 251 | monc->cur_mon, |
| 250 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); | 252 | ceph_pr_addr(&monc->con.peer_addr.in_addr)); |
| 251 | monc->hunting = false; | 253 | monc->hunting = false; |
| 252 | } | 254 | } |
| 253 | dout("handle_subscribe_ack after %d seconds\n", seconds); | 255 | dout("handle_subscribe_ack after %d seconds\n", seconds); |
| @@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc) | |||
| 309 | EXPORT_SYMBOL(ceph_monc_open_session); | 311 | EXPORT_SYMBOL(ceph_monc_open_session); |
| 310 | 312 | ||
| 311 | /* | 313 | /* |
| 314 | * We require the fsid and global_id in order to initialize our | ||
| 315 | * debugfs dir. | ||
| 316 | */ | ||
| 317 | static bool have_debugfs_info(struct ceph_mon_client *monc) | ||
| 318 | { | ||
| 319 | dout("have_debugfs_info fsid %d globalid %lld\n", | ||
| 320 | (int)monc->client->have_fsid, monc->auth->global_id); | ||
| 321 | return monc->client->have_fsid && monc->auth->global_id > 0; | ||
| 322 | } | ||
| 323 | |||
| 324 | /* | ||
| 312 | * The monitor responds with mount ack indicate mount success. The | 325 | * The monitor responds with mount ack indicate mount success. The |
| 313 | * included client ticket allows the client to talk to MDSs and OSDs. | 326 | * included client ticket allows the client to talk to MDSs and OSDs. |
| 314 | */ | 327 | */ |
| @@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, | |||
| 318 | struct ceph_client *client = monc->client; | 331 | struct ceph_client *client = monc->client; |
| 319 | struct ceph_monmap *monmap = NULL, *old = monc->monmap; | 332 | struct ceph_monmap *monmap = NULL, *old = monc->monmap; |
| 320 | void *p, *end; | 333 | void *p, *end; |
| 334 | int had_debugfs_info, init_debugfs = 0; | ||
| 321 | 335 | ||
| 322 | mutex_lock(&monc->mutex); | 336 | mutex_lock(&monc->mutex); |
| 323 | 337 | ||
| 338 | had_debugfs_info = have_debugfs_info(monc); | ||
| 339 | |||
| 324 | dout("handle_monmap\n"); | 340 | dout("handle_monmap\n"); |
| 325 | p = msg->front.iov_base; | 341 | p = msg->front.iov_base; |
| 326 | end = p + msg->front.iov_len; | 342 | end = p + msg->front.iov_len; |
| @@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc, | |||
| 342 | 358 | ||
| 343 | if (!client->have_fsid) { | 359 | if (!client->have_fsid) { |
| 344 | client->have_fsid = true; | 360 | client->have_fsid = true; |
| 361 | if (!had_debugfs_info && have_debugfs_info(monc)) { | ||
| 362 | pr_info("client%lld fsid %pU\n", | ||
| 363 | ceph_client_id(monc->client), | ||
| 364 | &monc->client->fsid); | ||
| 365 | init_debugfs = 1; | ||
| 366 | } | ||
| 345 | mutex_unlock(&monc->mutex); | 367 | mutex_unlock(&monc->mutex); |
| 346 | /* | 368 | |
| 347 | * do debugfs initialization without mutex to avoid | 369 | if (init_debugfs) { |
| 348 | * creating a locking dependency | 370 | /* |
| 349 | */ | 371 | * do debugfs initialization without mutex to avoid |
| 350 | ceph_debugfs_client_init(client); | 372 | * creating a locking dependency |
| 373 | */ | ||
| 374 | ceph_debugfs_client_init(monc->client); | ||
| 375 | } | ||
| 376 | |||
| 351 | goto out_unlocked; | 377 | goto out_unlocked; |
| 352 | } | 378 | } |
| 353 | out: | 379 | out: |
| @@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | |||
| 439 | m = NULL; | 465 | m = NULL; |
| 440 | } else { | 466 | } else { |
| 441 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | 467 | dout("get_generic_reply %lld got %p\n", tid, req->reply); |
| 468 | *skip = 0; | ||
| 442 | m = ceph_msg_get(req->reply); | 469 | m = ceph_msg_get(req->reply); |
| 443 | /* | 470 | /* |
| 444 | * we don't need to track the connection reading into | 471 | * we don't need to track the connection reading into |
| @@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc, | |||
| 461 | req->request->hdr.tid = cpu_to_le64(req->tid); | 488 | req->request->hdr.tid = cpu_to_le64(req->tid); |
| 462 | __insert_generic_request(monc, req); | 489 | __insert_generic_request(monc, req); |
| 463 | monc->num_generic_requests++; | 490 | monc->num_generic_requests++; |
| 464 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 491 | ceph_con_send(&monc->con, ceph_msg_get(req->request)); |
| 465 | mutex_unlock(&monc->mutex); | 492 | mutex_unlock(&monc->mutex); |
| 466 | 493 | ||
| 467 | err = wait_for_completion_interruptible(&req->completion); | 494 | err = wait_for_completion_interruptible(&req->completion); |
| @@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc) | |||
| 684 | 711 | ||
| 685 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { | 712 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { |
| 686 | req = rb_entry(p, struct ceph_mon_generic_request, node); | 713 | req = rb_entry(p, struct ceph_mon_generic_request, node); |
| 687 | ceph_con_revoke(monc->con, req->request); | 714 | ceph_msg_revoke(req->request); |
| 688 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | 715 | ceph_msg_revoke_incoming(req->reply); |
| 716 | ceph_con_send(&monc->con, ceph_msg_get(req->request)); | ||
| 689 | } | 717 | } |
| 690 | } | 718 | } |
| 691 | 719 | ||
| @@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work) | |||
| 705 | __close_session(monc); | 733 | __close_session(monc); |
| 706 | __open_session(monc); /* continue hunting */ | 734 | __open_session(monc); /* continue hunting */ |
| 707 | } else { | 735 | } else { |
| 708 | ceph_con_keepalive(monc->con); | 736 | ceph_con_keepalive(&monc->con); |
| 709 | 737 | ||
| 710 | __validate_auth(monc); | 738 | __validate_auth(monc); |
| 711 | 739 | ||
| @@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 760 | goto out; | 788 | goto out; |
| 761 | 789 | ||
| 762 | /* connection */ | 790 | /* connection */ |
| 763 | monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); | ||
| 764 | if (!monc->con) | ||
| 765 | goto out_monmap; | ||
| 766 | ceph_con_init(monc->client->msgr, monc->con); | ||
| 767 | monc->con->private = monc; | ||
| 768 | monc->con->ops = &mon_con_ops; | ||
| 769 | |||
| 770 | /* authentication */ | 791 | /* authentication */ |
| 771 | monc->auth = ceph_auth_init(cl->options->name, | 792 | monc->auth = ceph_auth_init(cl->options->name, |
| 772 | cl->options->key); | 793 | cl->options->key); |
| 773 | if (IS_ERR(monc->auth)) { | 794 | if (IS_ERR(monc->auth)) { |
| 774 | err = PTR_ERR(monc->auth); | 795 | err = PTR_ERR(monc->auth); |
| 775 | goto out_con; | 796 | goto out_monmap; |
| 776 | } | 797 | } |
| 777 | monc->auth->want_keys = | 798 | monc->auth->want_keys = |
| 778 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | 799 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | |
| @@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | |||
| 801 | if (!monc->m_auth) | 822 | if (!monc->m_auth) |
| 802 | goto out_auth_reply; | 823 | goto out_auth_reply; |
| 803 | 824 | ||
| 825 | ceph_con_init(&monc->con, monc, &mon_con_ops, | ||
| 826 | &monc->client->msgr); | ||
| 827 | |||
| 804 | monc->cur_mon = -1; | 828 | monc->cur_mon = -1; |
| 805 | monc->hunting = true; | 829 | monc->hunting = true; |
| 806 | monc->sub_renew_after = jiffies; | 830 | monc->sub_renew_after = jiffies; |
| @@ -824,8 +848,6 @@ out_subscribe_ack: | |||
| 824 | ceph_msg_put(monc->m_subscribe_ack); | 848 | ceph_msg_put(monc->m_subscribe_ack); |
| 825 | out_auth: | 849 | out_auth: |
| 826 | ceph_auth_destroy(monc->auth); | 850 | ceph_auth_destroy(monc->auth); |
| 827 | out_con: | ||
| 828 | monc->con->ops->put(monc->con); | ||
| 829 | out_monmap: | 851 | out_monmap: |
| 830 | kfree(monc->monmap); | 852 | kfree(monc->monmap); |
| 831 | out: | 853 | out: |
| @@ -841,10 +863,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc) | |||
| 841 | mutex_lock(&monc->mutex); | 863 | mutex_lock(&monc->mutex); |
| 842 | __close_session(monc); | 864 | __close_session(monc); |
| 843 | 865 | ||
| 844 | monc->con->private = NULL; | ||
| 845 | monc->con->ops->put(monc->con); | ||
| 846 | monc->con = NULL; | ||
| 847 | |||
| 848 | mutex_unlock(&monc->mutex); | 866 | mutex_unlock(&monc->mutex); |
| 849 | 867 | ||
| 850 | /* | 868 | /* |
| @@ -871,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
| 871 | { | 889 | { |
| 872 | int ret; | 890 | int ret; |
| 873 | int was_auth = 0; | 891 | int was_auth = 0; |
| 892 | int had_debugfs_info, init_debugfs = 0; | ||
| 874 | 893 | ||
| 875 | mutex_lock(&monc->mutex); | 894 | mutex_lock(&monc->mutex); |
| 895 | had_debugfs_info = have_debugfs_info(monc); | ||
| 876 | if (monc->auth->ops) | 896 | if (monc->auth->ops) |
| 877 | was_auth = monc->auth->ops->is_authenticated(monc->auth); | 897 | was_auth = monc->auth->ops->is_authenticated(monc->auth); |
| 878 | monc->pending_auth = 0; | 898 | monc->pending_auth = 0; |
| @@ -888,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc, | |||
| 888 | } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { | 908 | } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { |
| 889 | dout("authenticated, starting session\n"); | 909 | dout("authenticated, starting session\n"); |
| 890 | 910 | ||
| 891 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; | 911 | monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; |
| 892 | monc->client->msgr->inst.name.num = | 912 | monc->client->msgr.inst.name.num = |
| 893 | cpu_to_le64(monc->auth->global_id); | 913 | cpu_to_le64(monc->auth->global_id); |
| 894 | 914 | ||
| 895 | __send_subscribe(monc); | 915 | __send_subscribe(monc); |
| 896 | __resend_generic_request(monc); | 916 | __resend_generic_request(monc); |
| 897 | } | 917 | } |
| 918 | |||
| 919 | if (!had_debugfs_info && have_debugfs_info(monc)) { | ||
| 920 | pr_info("client%lld fsid %pU\n", | ||
| 921 | ceph_client_id(monc->client), | ||
| 922 | &monc->client->fsid); | ||
| 923 | init_debugfs = 1; | ||
| 924 | } | ||
| 898 | mutex_unlock(&monc->mutex); | 925 | mutex_unlock(&monc->mutex); |
| 926 | |||
| 927 | if (init_debugfs) { | ||
| 928 | /* | ||
| 929 | * do debugfs initialization without mutex to avoid | ||
| 930 | * creating a locking dependency | ||
| 931 | */ | ||
| 932 | ceph_debugfs_client_init(monc->client); | ||
| 933 | } | ||
| 899 | } | 934 | } |
| 900 | 935 | ||
| 901 | static int __validate_auth(struct ceph_mon_client *monc) | 936 | static int __validate_auth(struct ceph_mon_client *monc) |
| @@ -1000,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | |||
| 1000 | case CEPH_MSG_MDS_MAP: | 1035 | case CEPH_MSG_MDS_MAP: |
| 1001 | case CEPH_MSG_OSD_MAP: | 1036 | case CEPH_MSG_OSD_MAP: |
| 1002 | m = ceph_msg_new(type, front_len, GFP_NOFS, false); | 1037 | m = ceph_msg_new(type, front_len, GFP_NOFS, false); |
| 1038 | if (!m) | ||
| 1039 | return NULL; /* ENOMEM--return skip == 0 */ | ||
| 1003 | break; | 1040 | break; |
| 1004 | } | 1041 | } |
| 1005 | 1042 | ||
| @@ -1029,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con) | |||
| 1029 | if (!monc->hunting) | 1066 | if (!monc->hunting) |
| 1030 | pr_info("mon%d %s session lost, " | 1067 | pr_info("mon%d %s session lost, " |
| 1031 | "hunting for new mon\n", monc->cur_mon, | 1068 | "hunting for new mon\n", monc->cur_mon, |
| 1032 | ceph_pr_addr(&monc->con->peer_addr.in_addr)); | 1069 | ceph_pr_addr(&monc->con.peer_addr.in_addr)); |
| 1033 | 1070 | ||
| 1034 | __close_session(monc); | 1071 | __close_session(monc); |
| 1035 | if (!monc->hunting) { | 1072 | if (!monc->hunting) { |
| @@ -1044,9 +1081,23 @@ out: | |||
| 1044 | mutex_unlock(&monc->mutex); | 1081 | mutex_unlock(&monc->mutex); |
| 1045 | } | 1082 | } |
| 1046 | 1083 | ||
| 1084 | /* | ||
| 1085 | * We can ignore refcounting on the connection struct, as all references | ||
| 1086 | * will come from the messenger workqueue, which is drained prior to | ||
| 1087 | * mon_client destruction. | ||
| 1088 | */ | ||
| 1089 | static struct ceph_connection *con_get(struct ceph_connection *con) | ||
| 1090 | { | ||
| 1091 | return con; | ||
| 1092 | } | ||
| 1093 | |||
| 1094 | static void con_put(struct ceph_connection *con) | ||
| 1095 | { | ||
| 1096 | } | ||
| 1097 | |||
| 1047 | static const struct ceph_connection_operations mon_con_ops = { | 1098 | static const struct ceph_connection_operations mon_con_ops = { |
| 1048 | .get = ceph_con_get, | 1099 | .get = con_get, |
| 1049 | .put = ceph_con_put, | 1100 | .put = con_put, |
| 1050 | .dispatch = dispatch, | 1101 | .dispatch = dispatch, |
| 1051 | .fault = mon_fault, | 1102 | .fault = mon_fault, |
| 1052 | .alloc_msg = mon_alloc_msg, | 1103 | .alloc_msg = mon_alloc_msg, |
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index 11d5f4196a73..ddec1c10ac80 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c | |||
| @@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg) | |||
| 12 | struct ceph_msgpool *pool = arg; | 12 | struct ceph_msgpool *pool = arg; |
| 13 | struct ceph_msg *msg; | 13 | struct ceph_msg *msg; |
| 14 | 14 | ||
| 15 | msg = ceph_msg_new(0, pool->front_len, gfp_mask, true); | 15 | msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true); |
| 16 | if (!msg) { | 16 | if (!msg) { |
| 17 | dout("msgpool_alloc %s failed\n", pool->name); | 17 | dout("msgpool_alloc %s failed\n", pool->name); |
| 18 | } else { | 18 | } else { |
| @@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg) | |||
| 32 | ceph_msg_put(msg); | 32 | ceph_msg_put(msg); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | int ceph_msgpool_init(struct ceph_msgpool *pool, | 35 | int ceph_msgpool_init(struct ceph_msgpool *pool, int type, |
| 36 | int front_len, int size, bool blocking, const char *name) | 36 | int front_len, int size, bool blocking, const char *name) |
| 37 | { | 37 | { |
| 38 | dout("msgpool %s init\n", name); | 38 | dout("msgpool %s init\n", name); |
| 39 | pool->type = type; | ||
| 39 | pool->front_len = front_len; | 40 | pool->front_len = front_len; |
| 40 | pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); | 41 | pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); |
| 41 | if (!pool->pool) | 42 | if (!pool->pool) |
| @@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, | |||
| 61 | WARN_ON(1); | 62 | WARN_ON(1); |
| 62 | 63 | ||
| 63 | /* try to alloc a fresh message */ | 64 | /* try to alloc a fresh message */ |
| 64 | return ceph_msg_new(0, front_len, GFP_NOFS, false); | 65 | return ceph_msg_new(pool->type, front_len, GFP_NOFS, false); |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 67 | msg = mempool_alloc(pool->pool, GFP_NOFS); | 68 | msg = mempool_alloc(pool->pool, GFP_NOFS); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ca59e66c9787..42119c05e82c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kref *kref) | |||
| 140 | if (req->r_request) | 140 | if (req->r_request) |
| 141 | ceph_msg_put(req->r_request); | 141 | ceph_msg_put(req->r_request); |
| 142 | if (req->r_con_filling_msg) { | 142 | if (req->r_con_filling_msg) { |
| 143 | dout("release_request revoking pages %p from con %p\n", | 143 | dout("%s revoking pages %p from con %p\n", __func__, |
| 144 | req->r_pages, req->r_con_filling_msg); | 144 | req->r_pages, req->r_con_filling_msg); |
| 145 | ceph_con_revoke_message(req->r_con_filling_msg, | 145 | ceph_msg_revoke_incoming(req->r_reply); |
| 146 | req->r_reply); | ||
| 147 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); | 146 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); |
| 148 | } | 147 | } |
| 149 | if (req->r_reply) | 148 | if (req->r_reply) |
| @@ -214,10 +213,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
| 214 | kref_init(&req->r_kref); | 213 | kref_init(&req->r_kref); |
| 215 | init_completion(&req->r_completion); | 214 | init_completion(&req->r_completion); |
| 216 | init_completion(&req->r_safe_completion); | 215 | init_completion(&req->r_safe_completion); |
| 216 | rb_init_node(&req->r_node); | ||
| 217 | INIT_LIST_HEAD(&req->r_unsafe_item); | 217 | INIT_LIST_HEAD(&req->r_unsafe_item); |
| 218 | INIT_LIST_HEAD(&req->r_linger_item); | 218 | INIT_LIST_HEAD(&req->r_linger_item); |
| 219 | INIT_LIST_HEAD(&req->r_linger_osd); | 219 | INIT_LIST_HEAD(&req->r_linger_osd); |
| 220 | INIT_LIST_HEAD(&req->r_req_lru_item); | 220 | INIT_LIST_HEAD(&req->r_req_lru_item); |
| 221 | INIT_LIST_HEAD(&req->r_osd_item); | ||
| 222 | |||
| 221 | req->r_flags = flags; | 223 | req->r_flags = flags; |
| 222 | 224 | ||
| 223 | WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); | 225 | WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); |
| @@ -243,6 +245,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
| 243 | } | 245 | } |
| 244 | ceph_pagelist_init(req->r_trail); | 246 | ceph_pagelist_init(req->r_trail); |
| 245 | } | 247 | } |
| 248 | |||
| 246 | /* create request message; allow space for oid */ | 249 | /* create request message; allow space for oid */ |
| 247 | msg_size += MAX_OBJ_NAME_SIZE; | 250 | msg_size += MAX_OBJ_NAME_SIZE; |
| 248 | if (snapc) | 251 | if (snapc) |
| @@ -256,7 +259,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, | |||
| 256 | return NULL; | 259 | return NULL; |
| 257 | } | 260 | } |
| 258 | 261 | ||
| 259 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | ||
| 260 | memset(msg->front.iov_base, 0, msg->front.iov_len); | 262 | memset(msg->front.iov_base, 0, msg->front.iov_len); |
| 261 | 263 | ||
| 262 | req->r_request = msg; | 264 | req->r_request = msg; |
| @@ -624,7 +626,7 @@ static void osd_reset(struct ceph_connection *con) | |||
| 624 | /* | 626 | /* |
| 625 | * Track open sessions with osds. | 627 | * Track open sessions with osds. |
| 626 | */ | 628 | */ |
| 627 | static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) | 629 | static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum) |
| 628 | { | 630 | { |
| 629 | struct ceph_osd *osd; | 631 | struct ceph_osd *osd; |
| 630 | 632 | ||
| @@ -634,15 +636,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) | |||
| 634 | 636 | ||
| 635 | atomic_set(&osd->o_ref, 1); | 637 | atomic_set(&osd->o_ref, 1); |
| 636 | osd->o_osdc = osdc; | 638 | osd->o_osdc = osdc; |
| 639 | osd->o_osd = onum; | ||
| 637 | INIT_LIST_HEAD(&osd->o_requests); | 640 | INIT_LIST_HEAD(&osd->o_requests); |
| 638 | INIT_LIST_HEAD(&osd->o_linger_requests); | 641 | INIT_LIST_HEAD(&osd->o_linger_requests); |
| 639 | INIT_LIST_HEAD(&osd->o_osd_lru); | 642 | INIT_LIST_HEAD(&osd->o_osd_lru); |
| 640 | osd->o_incarnation = 1; | 643 | osd->o_incarnation = 1; |
| 641 | 644 | ||
| 642 | ceph_con_init(osdc->client->msgr, &osd->o_con); | 645 | ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr); |
| 643 | osd->o_con.private = osd; | ||
| 644 | osd->o_con.ops = &osd_con_ops; | ||
| 645 | osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; | ||
| 646 | 646 | ||
| 647 | INIT_LIST_HEAD(&osd->o_keepalive_item); | 647 | INIT_LIST_HEAD(&osd->o_keepalive_item); |
| 648 | return osd; | 648 | return osd; |
| @@ -688,7 +688,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | |||
| 688 | 688 | ||
| 689 | static void remove_all_osds(struct ceph_osd_client *osdc) | 689 | static void remove_all_osds(struct ceph_osd_client *osdc) |
| 690 | { | 690 | { |
| 691 | dout("__remove_old_osds %p\n", osdc); | 691 | dout("%s %p\n", __func__, osdc); |
| 692 | mutex_lock(&osdc->request_mutex); | 692 | mutex_lock(&osdc->request_mutex); |
| 693 | while (!RB_EMPTY_ROOT(&osdc->osds)) { | 693 | while (!RB_EMPTY_ROOT(&osdc->osds)) { |
| 694 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), | 694 | struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds), |
| @@ -752,7 +752,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | |||
| 752 | ret = -EAGAIN; | 752 | ret = -EAGAIN; |
| 753 | } else { | 753 | } else { |
| 754 | ceph_con_close(&osd->o_con); | 754 | ceph_con_close(&osd->o_con); |
| 755 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); | 755 | ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, |
| 756 | &osdc->osdmap->osd_addr[osd->o_osd]); | ||
| 756 | osd->o_incarnation++; | 757 | osd->o_incarnation++; |
| 757 | } | 758 | } |
| 758 | return ret; | 759 | return ret; |
| @@ -853,7 +854,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, | |||
| 853 | 854 | ||
| 854 | if (req->r_osd) { | 855 | if (req->r_osd) { |
| 855 | /* make sure the original request isn't in flight. */ | 856 | /* make sure the original request isn't in flight. */ |
| 856 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | 857 | ceph_msg_revoke(req->r_request); |
| 857 | 858 | ||
| 858 | list_del_init(&req->r_osd_item); | 859 | list_del_init(&req->r_osd_item); |
| 859 | if (list_empty(&req->r_osd->o_requests) && | 860 | if (list_empty(&req->r_osd->o_requests) && |
| @@ -880,7 +881,7 @@ static void __unregister_request(struct ceph_osd_client *osdc, | |||
| 880 | static void __cancel_request(struct ceph_osd_request *req) | 881 | static void __cancel_request(struct ceph_osd_request *req) |
| 881 | { | 882 | { |
| 882 | if (req->r_sent && req->r_osd) { | 883 | if (req->r_sent && req->r_osd) { |
| 883 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | 884 | ceph_msg_revoke(req->r_request); |
| 884 | req->r_sent = 0; | 885 | req->r_sent = 0; |
| 885 | } | 886 | } |
| 886 | } | 887 | } |
| @@ -890,7 +891,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc, | |||
| 890 | { | 891 | { |
| 891 | dout("__register_linger_request %p\n", req); | 892 | dout("__register_linger_request %p\n", req); |
| 892 | list_add_tail(&req->r_linger_item, &osdc->req_linger); | 893 | list_add_tail(&req->r_linger_item, &osdc->req_linger); |
| 893 | list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests); | 894 | if (req->r_osd) |
| 895 | list_add_tail(&req->r_linger_osd, | ||
| 896 | &req->r_osd->o_linger_requests); | ||
| 894 | } | 897 | } |
| 895 | 898 | ||
| 896 | static void __unregister_linger_request(struct ceph_osd_client *osdc, | 899 | static void __unregister_linger_request(struct ceph_osd_client *osdc, |
| @@ -998,18 +1001,18 @@ static int __map_request(struct ceph_osd_client *osdc, | |||
| 998 | req->r_osd = __lookup_osd(osdc, o); | 1001 | req->r_osd = __lookup_osd(osdc, o); |
| 999 | if (!req->r_osd && o >= 0) { | 1002 | if (!req->r_osd && o >= 0) { |
| 1000 | err = -ENOMEM; | 1003 | err = -ENOMEM; |
| 1001 | req->r_osd = create_osd(osdc); | 1004 | req->r_osd = create_osd(osdc, o); |
| 1002 | if (!req->r_osd) { | 1005 | if (!req->r_osd) { |
| 1003 | list_move(&req->r_req_lru_item, &osdc->req_notarget); | 1006 | list_move(&req->r_req_lru_item, &osdc->req_notarget); |
| 1004 | goto out; | 1007 | goto out; |
| 1005 | } | 1008 | } |
| 1006 | 1009 | ||
| 1007 | dout("map_request osd %p is osd%d\n", req->r_osd, o); | 1010 | dout("map_request osd %p is osd%d\n", req->r_osd, o); |
| 1008 | req->r_osd->o_osd = o; | ||
| 1009 | req->r_osd->o_con.peer_name.num = cpu_to_le64(o); | ||
| 1010 | __insert_osd(osdc, req->r_osd); | 1011 | __insert_osd(osdc, req->r_osd); |
| 1011 | 1012 | ||
| 1012 | ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); | 1013 | ceph_con_open(&req->r_osd->o_con, |
| 1014 | CEPH_ENTITY_TYPE_OSD, o, | ||
| 1015 | &osdc->osdmap->osd_addr[o]); | ||
| 1013 | } | 1016 | } |
| 1014 | 1017 | ||
| 1015 | if (req->r_osd) { | 1018 | if (req->r_osd) { |
| @@ -1304,8 +1307,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) | |||
| 1304 | 1307 | ||
| 1305 | dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); | 1308 | dout("kick_requests %s\n", force_resend ? " (force resend)" : ""); |
| 1306 | mutex_lock(&osdc->request_mutex); | 1309 | mutex_lock(&osdc->request_mutex); |
| 1307 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | 1310 | for (p = rb_first(&osdc->requests); p; ) { |
| 1308 | req = rb_entry(p, struct ceph_osd_request, r_node); | 1311 | req = rb_entry(p, struct ceph_osd_request, r_node); |
| 1312 | p = rb_next(p); | ||
| 1309 | err = __map_request(osdc, req, force_resend); | 1313 | err = __map_request(osdc, req, force_resend); |
| 1310 | if (err < 0) | 1314 | if (err < 0) |
| 1311 | continue; /* error */ | 1315 | continue; /* error */ |
| @@ -1313,10 +1317,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) | |||
| 1313 | dout("%p tid %llu maps to no osd\n", req, req->r_tid); | 1317 | dout("%p tid %llu maps to no osd\n", req, req->r_tid); |
| 1314 | needmap++; /* request a newer map */ | 1318 | needmap++; /* request a newer map */ |
| 1315 | } else if (err > 0) { | 1319 | } else if (err > 0) { |
| 1316 | dout("%p tid %llu requeued on osd%d\n", req, req->r_tid, | 1320 | if (!req->r_linger) { |
| 1317 | req->r_osd ? req->r_osd->o_osd : -1); | 1321 | dout("%p tid %llu requeued on osd%d\n", req, |
| 1318 | if (!req->r_linger) | 1322 | req->r_tid, |
| 1323 | req->r_osd ? req->r_osd->o_osd : -1); | ||
| 1319 | req->r_flags |= CEPH_OSD_FLAG_RETRY; | 1324 | req->r_flags |= CEPH_OSD_FLAG_RETRY; |
| 1325 | } | ||
| 1326 | } | ||
| 1327 | if (req->r_linger && list_empty(&req->r_linger_item)) { | ||
| 1328 | /* | ||
| 1329 | * register as a linger so that we will | ||
| 1330 | * re-submit below and get a new tid | ||
| 1331 | */ | ||
| 1332 | dout("%p tid %llu restart on osd%d\n", | ||
| 1333 | req, req->r_tid, | ||
| 1334 | req->r_osd ? req->r_osd->o_osd : -1); | ||
| 1335 | __register_linger_request(osdc, req); | ||
| 1336 | __unregister_request(osdc, req); | ||
| 1320 | } | 1337 | } |
| 1321 | } | 1338 | } |
| 1322 | 1339 | ||
| @@ -1391,7 +1408,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
| 1391 | epoch, maplen); | 1408 | epoch, maplen); |
| 1392 | newmap = osdmap_apply_incremental(&p, next, | 1409 | newmap = osdmap_apply_incremental(&p, next, |
| 1393 | osdc->osdmap, | 1410 | osdc->osdmap, |
| 1394 | osdc->client->msgr); | 1411 | &osdc->client->msgr); |
| 1395 | if (IS_ERR(newmap)) { | 1412 | if (IS_ERR(newmap)) { |
| 1396 | err = PTR_ERR(newmap); | 1413 | err = PTR_ERR(newmap); |
| 1397 | goto bad; | 1414 | goto bad; |
| @@ -1839,11 +1856,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | |||
| 1839 | if (!osdc->req_mempool) | 1856 | if (!osdc->req_mempool) |
| 1840 | goto out; | 1857 | goto out; |
| 1841 | 1858 | ||
| 1842 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, | 1859 | err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP, |
| 1860 | OSD_OP_FRONT_LEN, 10, true, | ||
| 1843 | "osd_op"); | 1861 | "osd_op"); |
| 1844 | if (err < 0) | 1862 | if (err < 0) |
| 1845 | goto out_mempool; | 1863 | goto out_mempool; |
| 1846 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | 1864 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY, |
| 1847 | OSD_OPREPLY_FRONT_LEN, 10, true, | 1865 | OSD_OPREPLY_FRONT_LEN, 10, true, |
| 1848 | "osd_op_reply"); | 1866 | "osd_op_reply"); |
| 1849 | if (err < 0) | 1867 | if (err < 0) |
| @@ -2019,15 +2037,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
| 2019 | if (!req) { | 2037 | if (!req) { |
| 2020 | *skip = 1; | 2038 | *skip = 1; |
| 2021 | m = NULL; | 2039 | m = NULL; |
| 2022 | pr_info("get_reply unknown tid %llu from osd%d\n", tid, | 2040 | dout("get_reply unknown tid %llu from osd%d\n", tid, |
| 2023 | osd->o_osd); | 2041 | osd->o_osd); |
| 2024 | goto out; | 2042 | goto out; |
| 2025 | } | 2043 | } |
| 2026 | 2044 | ||
| 2027 | if (req->r_con_filling_msg) { | 2045 | if (req->r_con_filling_msg) { |
| 2028 | dout("get_reply revoking msg %p from old con %p\n", | 2046 | dout("%s revoking msg %p from old con %p\n", __func__, |
| 2029 | req->r_reply, req->r_con_filling_msg); | 2047 | req->r_reply, req->r_con_filling_msg); |
| 2030 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | 2048 | ceph_msg_revoke_incoming(req->r_reply); |
| 2031 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); | 2049 | req->r_con_filling_msg->ops->put(req->r_con_filling_msg); |
| 2032 | req->r_con_filling_msg = NULL; | 2050 | req->r_con_filling_msg = NULL; |
| 2033 | } | 2051 | } |
| @@ -2080,6 +2098,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, | |||
| 2080 | int type = le16_to_cpu(hdr->type); | 2098 | int type = le16_to_cpu(hdr->type); |
| 2081 | int front = le32_to_cpu(hdr->front_len); | 2099 | int front = le32_to_cpu(hdr->front_len); |
| 2082 | 2100 | ||
| 2101 | *skip = 0; | ||
| 2083 | switch (type) { | 2102 | switch (type) { |
| 2084 | case CEPH_MSG_OSD_MAP: | 2103 | case CEPH_MSG_OSD_MAP: |
| 2085 | case CEPH_MSG_WATCH_NOTIFY: | 2104 | case CEPH_MSG_WATCH_NOTIFY: |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 81e3b84a77ef..3124b71a8883 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -135,6 +135,21 @@ bad: | |||
| 135 | return -EINVAL; | 135 | return -EINVAL; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | static int skip_name_map(void **p, void *end) | ||
| 139 | { | ||
| 140 | int len; | ||
| 141 | ceph_decode_32_safe(p, end, len ,bad); | ||
| 142 | while (len--) { | ||
| 143 | int strlen; | ||
| 144 | *p += sizeof(u32); | ||
| 145 | ceph_decode_32_safe(p, end, strlen, bad); | ||
| 146 | *p += strlen; | ||
| 147 | } | ||
| 148 | return 0; | ||
| 149 | bad: | ||
| 150 | return -EINVAL; | ||
| 151 | } | ||
| 152 | |||
| 138 | static struct crush_map *crush_decode(void *pbyval, void *end) | 153 | static struct crush_map *crush_decode(void *pbyval, void *end) |
| 139 | { | 154 | { |
| 140 | struct crush_map *c; | 155 | struct crush_map *c; |
| @@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
| 143 | void **p = &pbyval; | 158 | void **p = &pbyval; |
| 144 | void *start = pbyval; | 159 | void *start = pbyval; |
| 145 | u32 magic; | 160 | u32 magic; |
| 161 | u32 num_name_maps; | ||
| 146 | 162 | ||
| 147 | dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | 163 | dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); |
| 148 | 164 | ||
| @@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
| 150 | if (c == NULL) | 166 | if (c == NULL) |
| 151 | return ERR_PTR(-ENOMEM); | 167 | return ERR_PTR(-ENOMEM); |
| 152 | 168 | ||
| 169 | /* set tunables to default values */ | ||
| 170 | c->choose_local_tries = 2; | ||
| 171 | c->choose_local_fallback_tries = 5; | ||
| 172 | c->choose_total_tries = 19; | ||
| 173 | |||
| 153 | ceph_decode_need(p, end, 4*sizeof(u32), bad); | 174 | ceph_decode_need(p, end, 4*sizeof(u32), bad); |
| 154 | magic = ceph_decode_32(p); | 175 | magic = ceph_decode_32(p); |
| 155 | if (magic != CRUSH_MAGIC) { | 176 | if (magic != CRUSH_MAGIC) { |
| @@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
| 297 | } | 318 | } |
| 298 | 319 | ||
| 299 | /* ignore trailing name maps. */ | 320 | /* ignore trailing name maps. */ |
| 321 | for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) { | ||
| 322 | err = skip_name_map(p, end); | ||
| 323 | if (err < 0) | ||
| 324 | goto done; | ||
| 325 | } | ||
| 326 | |||
| 327 | /* tunables */ | ||
| 328 | ceph_decode_need(p, end, 3*sizeof(u32), done); | ||
| 329 | c->choose_local_tries = ceph_decode_32(p); | ||
| 330 | c->choose_local_fallback_tries = ceph_decode_32(p); | ||
| 331 | c->choose_total_tries = ceph_decode_32(p); | ||
| 332 | dout("crush decode tunable choose_local_tries = %d", | ||
| 333 | c->choose_local_tries); | ||
| 334 | dout("crush decode tunable choose_local_fallback_tries = %d", | ||
| 335 | c->choose_local_fallback_tries); | ||
| 336 | dout("crush decode tunable choose_total_tries = %d", | ||
| 337 | c->choose_total_tries); | ||
| 300 | 338 | ||
| 339 | done: | ||
| 301 | dout("crush_decode success\n"); | 340 | dout("crush_decode success\n"); |
| 302 | return c; | 341 | return c; |
| 303 | 342 | ||
| @@ -488,15 +527,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | |||
| 488 | ceph_decode_32_safe(p, end, pool, bad); | 527 | ceph_decode_32_safe(p, end, pool, bad); |
| 489 | ceph_decode_32_safe(p, end, len, bad); | 528 | ceph_decode_32_safe(p, end, len, bad); |
| 490 | dout(" pool %d len %d\n", pool, len); | 529 | dout(" pool %d len %d\n", pool, len); |
| 530 | ceph_decode_need(p, end, len, bad); | ||
| 491 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 531 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
| 492 | if (pi) { | 532 | if (pi) { |
| 533 | char *name = kstrndup(*p, len, GFP_NOFS); | ||
| 534 | |||
| 535 | if (!name) | ||
| 536 | return -ENOMEM; | ||
| 493 | kfree(pi->name); | 537 | kfree(pi->name); |
| 494 | pi->name = kmalloc(len + 1, GFP_NOFS); | 538 | pi->name = name; |
| 495 | if (pi->name) { | 539 | dout(" name is %s\n", pi->name); |
| 496 | memcpy(pi->name, *p, len); | ||
| 497 | pi->name[len] = '\0'; | ||
| 498 | dout(" name is %s\n", pi->name); | ||
| 499 | } | ||
| 500 | } | 540 | } |
| 501 | *p += len; | 541 | *p += len; |
| 502 | } | 542 | } |
| @@ -666,6 +706,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) | |||
| 666 | ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); | 706 | ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); |
| 667 | ceph_decode_copy(p, &pgid, sizeof(pgid)); | 707 | ceph_decode_copy(p, &pgid, sizeof(pgid)); |
| 668 | n = ceph_decode_32(p); | 708 | n = ceph_decode_32(p); |
| 709 | err = -EINVAL; | ||
| 710 | if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | ||
| 711 | goto bad; | ||
| 669 | ceph_decode_need(p, end, n * sizeof(u32), bad); | 712 | ceph_decode_need(p, end, n * sizeof(u32), bad); |
| 670 | err = -ENOMEM; | 713 | err = -ENOMEM; |
| 671 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); | 714 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); |
| @@ -889,6 +932,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 889 | (void) __remove_pg_mapping(&map->pg_temp, pgid); | 932 | (void) __remove_pg_mapping(&map->pg_temp, pgid); |
| 890 | 933 | ||
| 891 | /* insert */ | 934 | /* insert */ |
| 935 | if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) { | ||
| 936 | err = -EINVAL; | ||
| 937 | goto bad; | ||
| 938 | } | ||
| 892 | pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); | 939 | pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); |
| 893 | if (!pg) { | 940 | if (!pg) { |
| 894 | err = -ENOMEM; | 941 | err = -ENOMEM; |
diff --git a/net/core/dev.c b/net/core/dev.c index 0ebaea16632f..83988362805e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -1055,6 +1055,8 @@ rollback: | |||
| 1055 | */ | 1055 | */ |
| 1056 | int dev_set_alias(struct net_device *dev, const char *alias, size_t len) | 1056 | int dev_set_alias(struct net_device *dev, const char *alias, size_t len) |
| 1057 | { | 1057 | { |
| 1058 | char *new_ifalias; | ||
| 1059 | |||
| 1058 | ASSERT_RTNL(); | 1060 | ASSERT_RTNL(); |
| 1059 | 1061 | ||
| 1060 | if (len >= IFALIASZ) | 1062 | if (len >= IFALIASZ) |
| @@ -1068,9 +1070,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) | |||
| 1068 | return 0; | 1070 | return 0; |
| 1069 | } | 1071 | } |
| 1070 | 1072 | ||
| 1071 | dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); | 1073 | new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); |
| 1072 | if (!dev->ifalias) | 1074 | if (!new_ifalias) |
| 1073 | return -ENOMEM; | 1075 | return -ENOMEM; |
| 1076 | dev->ifalias = new_ifalias; | ||
| 1074 | 1077 | ||
| 1075 | strlcpy(dev->ifalias, alias, len+1); | 1078 | strlcpy(dev->ifalias, alias, len+1); |
| 1076 | return len; | 1079 | return len; |
| @@ -1172,6 +1175,7 @@ static int __dev_open(struct net_device *dev) | |||
| 1172 | net_dmaengine_get(); | 1175 | net_dmaengine_get(); |
| 1173 | dev_set_rx_mode(dev); | 1176 | dev_set_rx_mode(dev); |
| 1174 | dev_activate(dev); | 1177 | dev_activate(dev); |
| 1178 | add_device_randomness(dev->dev_addr, dev->addr_len); | ||
| 1175 | } | 1179 | } |
| 1176 | 1180 | ||
| 1177 | return ret; | 1181 | return ret; |
| @@ -1638,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb, | |||
| 1638 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 1642 | return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
| 1639 | } | 1643 | } |
| 1640 | 1644 | ||
| 1645 | static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) | ||
| 1646 | { | ||
| 1647 | if (ptype->af_packet_priv == NULL) | ||
| 1648 | return false; | ||
| 1649 | |||
| 1650 | if (ptype->id_match) | ||
| 1651 | return ptype->id_match(ptype, skb->sk); | ||
| 1652 | else if ((struct sock *)ptype->af_packet_priv == skb->sk) | ||
| 1653 | return true; | ||
| 1654 | |||
| 1655 | return false; | ||
| 1656 | } | ||
| 1657 | |||
| 1641 | /* | 1658 | /* |
| 1642 | * Support routine. Sends outgoing frames to any network | 1659 | * Support routine. Sends outgoing frames to any network |
| 1643 | * taps currently in use. | 1660 | * taps currently in use. |
| @@ -1655,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |||
| 1655 | * they originated from - MvS (miquels@drinkel.ow.org) | 1672 | * they originated from - MvS (miquels@drinkel.ow.org) |
| 1656 | */ | 1673 | */ |
| 1657 | if ((ptype->dev == dev || !ptype->dev) && | 1674 | if ((ptype->dev == dev || !ptype->dev) && |
| 1658 | (ptype->af_packet_priv == NULL || | 1675 | (!skb_loop_sk(ptype, skb))) { |
| 1659 | (struct sock *)ptype->af_packet_priv != skb->sk)) { | ||
| 1660 | if (pt_prev) { | 1676 | if (pt_prev) { |
| 1661 | deliver_skb(skb2, pt_prev, skb->dev); | 1677 | deliver_skb(skb2, pt_prev, skb->dev); |
| 1662 | pt_prev = ptype; | 1678 | pt_prev = ptype; |
| @@ -2133,6 +2149,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) | |||
| 2133 | __be16 protocol = skb->protocol; | 2149 | __be16 protocol = skb->protocol; |
| 2134 | netdev_features_t features = skb->dev->features; | 2150 | netdev_features_t features = skb->dev->features; |
| 2135 | 2151 | ||
| 2152 | if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) | ||
| 2153 | features &= ~NETIF_F_GSO_MASK; | ||
| 2154 | |||
| 2136 | if (protocol == htons(ETH_P_8021Q)) { | 2155 | if (protocol == htons(ETH_P_8021Q)) { |
| 2137 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; | 2156 | struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; |
| 2138 | protocol = veh->h_vlan_encapsulated_proto; | 2157 | protocol = veh->h_vlan_encapsulated_proto; |
| @@ -3155,6 +3174,23 @@ void netdev_rx_handler_unregister(struct net_device *dev) | |||
| 3155 | } | 3174 | } |
| 3156 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); | 3175 | EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); |
| 3157 | 3176 | ||
| 3177 | /* | ||
| 3178 | * Limit the use of PFMEMALLOC reserves to those protocols that implement | ||
| 3179 | * the special handling of PFMEMALLOC skbs. | ||
| 3180 | */ | ||
| 3181 | static bool skb_pfmemalloc_protocol(struct sk_buff *skb) | ||
| 3182 | { | ||
| 3183 | switch (skb->protocol) { | ||
| 3184 | case __constant_htons(ETH_P_ARP): | ||
| 3185 | case __constant_htons(ETH_P_IP): | ||
| 3186 | case __constant_htons(ETH_P_IPV6): | ||
| 3187 | case __constant_htons(ETH_P_8021Q): | ||
| 3188 | return true; | ||
| 3189 | default: | ||
| 3190 | return false; | ||
| 3191 | } | ||
| 3192 | } | ||
| 3193 | |||
| 3158 | static int __netif_receive_skb(struct sk_buff *skb) | 3194 | static int __netif_receive_skb(struct sk_buff *skb) |
| 3159 | { | 3195 | { |
| 3160 | struct packet_type *ptype, *pt_prev; | 3196 | struct packet_type *ptype, *pt_prev; |
| @@ -3164,14 +3200,27 @@ static int __netif_receive_skb(struct sk_buff *skb) | |||
| 3164 | bool deliver_exact = false; | 3200 | bool deliver_exact = false; |
| 3165 | int ret = NET_RX_DROP; | 3201 | int ret = NET_RX_DROP; |
| 3166 | __be16 type; | 3202 | __be16 type; |
| 3203 | unsigned long pflags = current->flags; | ||
| 3167 | 3204 | ||
| 3168 | net_timestamp_check(!netdev_tstamp_prequeue, skb); | 3205 | net_timestamp_check(!netdev_tstamp_prequeue, skb); |
| 3169 | 3206 | ||
| 3170 | trace_netif_receive_skb(skb); | 3207 | trace_netif_receive_skb(skb); |
| 3171 | 3208 | ||
| 3209 | /* | ||
| 3210 | * PFMEMALLOC skbs are special, they should | ||
| 3211 | * - be delivered to SOCK_MEMALLOC sockets only | ||
| 3212 | * - stay away from userspace | ||
| 3213 | * - have bounded memory usage | ||
| 3214 | * | ||
| 3215 | * Use PF_MEMALLOC as this saves us from propagating the allocation | ||
| 3216 | * context down to all allocation sites. | ||
| 3217 | */ | ||
| 3218 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
| 3219 | current->flags |= PF_MEMALLOC; | ||
| 3220 | |||
| 3172 | /* if we've gotten here through NAPI, check netpoll */ | 3221 | /* if we've gotten here through NAPI, check netpoll */ |
| 3173 | if (netpoll_receive_skb(skb)) | 3222 | if (netpoll_receive_skb(skb)) |
| 3174 | return NET_RX_DROP; | 3223 | goto out; |
| 3175 | 3224 | ||
| 3176 | orig_dev = skb->dev; | 3225 | orig_dev = skb->dev; |
| 3177 | 3226 | ||
| @@ -3191,7 +3240,7 @@ another_round: | |||
| 3191 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | 3240 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { |
| 3192 | skb = vlan_untag(skb); | 3241 | skb = vlan_untag(skb); |
| 3193 | if (unlikely(!skb)) | 3242 | if (unlikely(!skb)) |
| 3194 | goto out; | 3243 | goto unlock; |
| 3195 | } | 3244 | } |
| 3196 | 3245 | ||
| 3197 | #ifdef CONFIG_NET_CLS_ACT | 3246 | #ifdef CONFIG_NET_CLS_ACT |
| @@ -3201,6 +3250,9 @@ another_round: | |||
| 3201 | } | 3250 | } |
| 3202 | #endif | 3251 | #endif |
| 3203 | 3252 | ||
| 3253 | if (sk_memalloc_socks() && skb_pfmemalloc(skb)) | ||
| 3254 | goto skip_taps; | ||
| 3255 | |||
| 3204 | list_for_each_entry_rcu(ptype, &ptype_all, list) { | 3256 | list_for_each_entry_rcu(ptype, &ptype_all, list) { |
| 3205 | if (!ptype->dev || ptype->dev == skb->dev) { | 3257 | if (!ptype->dev || ptype->dev == skb->dev) { |
| 3206 | if (pt_prev) | 3258 | if (pt_prev) |
| @@ -3209,13 +3261,18 @@ another_round: | |||
| 3209 | } | 3261 | } |
| 3210 | } | 3262 | } |
| 3211 | 3263 | ||
| 3264 | skip_taps: | ||
| 3212 | #ifdef CONFIG_NET_CLS_ACT | 3265 | #ifdef CONFIG_NET_CLS_ACT |
| 3213 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); | 3266 | skb = handle_ing(skb, &pt_prev, &ret, orig_dev); |
| 3214 | if (!skb) | 3267 | if (!skb) |
| 3215 | goto out; | 3268 | goto unlock; |
| 3216 | ncls: | 3269 | ncls: |
| 3217 | #endif | 3270 | #endif |
| 3218 | 3271 | ||
| 3272 | if (sk_memalloc_socks() && skb_pfmemalloc(skb) | ||
| 3273 | && !skb_pfmemalloc_protocol(skb)) | ||
| 3274 | goto drop; | ||
| 3275 | |||
| 3219 | rx_handler = rcu_dereference(skb->dev->rx_handler); | 3276 | rx_handler = rcu_dereference(skb->dev->rx_handler); |
| 3220 | if (vlan_tx_tag_present(skb)) { | 3277 | if (vlan_tx_tag_present(skb)) { |
| 3221 | if (pt_prev) { | 3278 | if (pt_prev) { |
| @@ -3225,7 +3282,7 @@ ncls: | |||
| 3225 | if (vlan_do_receive(&skb, !rx_handler)) | 3282 | if (vlan_do_receive(&skb, !rx_handler)) |
| 3226 | goto another_round; | 3283 | goto another_round; |
| 3227 | else if (unlikely(!skb)) | 3284 | else if (unlikely(!skb)) |
| 3228 | goto out; | 3285 | goto unlock; |
| 3229 | } | 3286 | } |
| 3230 | 3287 | ||
| 3231 | if (rx_handler) { | 3288 | if (rx_handler) { |
| @@ -3235,7 +3292,7 @@ ncls: | |||
| 3235 | } | 3292 | } |
| 3236 | switch (rx_handler(&skb)) { | 3293 | switch (rx_handler(&skb)) { |
| 3237 | case RX_HANDLER_CONSUMED: | 3294 | case RX_HANDLER_CONSUMED: |
| 3238 | goto out; | 3295 | goto unlock; |
| 3239 | case RX_HANDLER_ANOTHER: | 3296 | case RX_HANDLER_ANOTHER: |
| 3240 | goto another_round; | 3297 | goto another_round; |
| 3241 | case RX_HANDLER_EXACT: | 3298 | case RX_HANDLER_EXACT: |
| @@ -3268,6 +3325,7 @@ ncls: | |||
| 3268 | else | 3325 | else |
| 3269 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | 3326 | ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); |
| 3270 | } else { | 3327 | } else { |
| 3328 | drop: | ||
| 3271 | atomic_long_inc(&skb->dev->rx_dropped); | 3329 | atomic_long_inc(&skb->dev->rx_dropped); |
| 3272 | kfree_skb(skb); | 3330 | kfree_skb(skb); |
| 3273 | /* Jamal, now you will not able to escape explaining | 3331 | /* Jamal, now you will not able to escape explaining |
| @@ -3276,8 +3334,10 @@ ncls: | |||
| 3276 | ret = NET_RX_DROP; | 3334 | ret = NET_RX_DROP; |
| 3277 | } | 3335 | } |
| 3278 | 3336 | ||
| 3279 | out: | 3337 | unlock: |
| 3280 | rcu_read_unlock(); | 3338 | rcu_read_unlock(); |
| 3339 | out: | ||
| 3340 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
| 3281 | return ret; | 3341 | return ret; |
| 3282 | } | 3342 | } |
| 3283 | 3343 | ||
| @@ -4801,6 +4861,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |||
| 4801 | err = ops->ndo_set_mac_address(dev, sa); | 4861 | err = ops->ndo_set_mac_address(dev, sa); |
| 4802 | if (!err) | 4862 | if (!err) |
| 4803 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); | 4863 | call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); |
| 4864 | add_device_randomness(dev->dev_addr, dev->addr_len); | ||
| 4804 | return err; | 4865 | return err; |
| 4805 | } | 4866 | } |
| 4806 | EXPORT_SYMBOL(dev_set_mac_address); | 4867 | EXPORT_SYMBOL(dev_set_mac_address); |
| @@ -5579,6 +5640,7 @@ int register_netdevice(struct net_device *dev) | |||
| 5579 | dev_init_scheduler(dev); | 5640 | dev_init_scheduler(dev); |
| 5580 | dev_hold(dev); | 5641 | dev_hold(dev); |
| 5581 | list_netdevice(dev); | 5642 | list_netdevice(dev); |
| 5643 | add_device_randomness(dev->dev_addr, dev->addr_len); | ||
| 5582 | 5644 | ||
| 5583 | /* Notify protocols, that a new device appeared. */ | 5645 | /* Notify protocols, that a new device appeared. */ |
| 5584 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); | 5646 | ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); |
| @@ -5682,6 +5744,7 @@ EXPORT_SYMBOL(netdev_refcnt_read); | |||
| 5682 | 5744 | ||
| 5683 | /** | 5745 | /** |
| 5684 | * netdev_wait_allrefs - wait until all references are gone. | 5746 | * netdev_wait_allrefs - wait until all references are gone. |
| 5747 | * @dev: target net_device | ||
| 5685 | * | 5748 | * |
| 5686 | * This is called when unregistering network devices. | 5749 | * This is called when unregistering network devices. |
| 5687 | * | 5750 | * |
| @@ -5942,6 +6005,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, | |||
| 5942 | dev_net_set(dev, &init_net); | 6005 | dev_net_set(dev, &init_net); |
| 5943 | 6006 | ||
| 5944 | dev->gso_max_size = GSO_MAX_SIZE; | 6007 | dev->gso_max_size = GSO_MAX_SIZE; |
| 6008 | dev->gso_max_segs = GSO_MAX_SEGS; | ||
| 5945 | 6009 | ||
| 5946 | INIT_LIST_HEAD(&dev->napi_list); | 6010 | INIT_LIST_HEAD(&dev->napi_list); |
| 5947 | INIT_LIST_HEAD(&dev->unreg_list); | 6011 | INIT_LIST_HEAD(&dev->unreg_list); |
diff --git a/net/core/dst.c b/net/core/dst.c index 069d51d29414..56d63612e1e4 100644 --- a/net/core/dst.c +++ b/net/core/dst.c | |||
| @@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb) | |||
| 149 | } | 149 | } |
| 150 | EXPORT_SYMBOL(dst_discard); | 150 | EXPORT_SYMBOL(dst_discard); |
| 151 | 151 | ||
| 152 | const u32 dst_default_metrics[RTAX_MAX]; | 152 | const u32 dst_default_metrics[RTAX_MAX + 1] = { |
| 153 | /* This initializer is needed to force linker to place this variable | ||
| 154 | * into const section. Otherwise it might end into bss section. | ||
| 155 | * We really want to avoid false sharing on this variable, and catch | ||
| 156 | * any writes on it. | ||
| 157 | */ | ||
| 158 | [RTAX_MAX] = 0xdeadbeef, | ||
| 159 | }; | ||
| 160 | |||
| 153 | 161 | ||
| 154 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, | 162 | void *dst_alloc(struct dst_ops *ops, struct net_device *dev, |
| 155 | int initial_ref, int initial_obsolete, unsigned short flags) | 163 | int initial_ref, int initial_obsolete, unsigned short flags) |
diff --git a/net/core/filter.c b/net/core/filter.c index d4ce2dc712e3..907efd27ec77 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) | |||
| 83 | int err; | 83 | int err; |
| 84 | struct sk_filter *filter; | 84 | struct sk_filter *filter; |
| 85 | 85 | ||
| 86 | /* | ||
| 87 | * If the skb was allocated from pfmemalloc reserves, only | ||
| 88 | * allow SOCK_MEMALLOC sockets to use it as this socket is | ||
| 89 | * helping free memory | ||
| 90 | */ | ||
| 91 | if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) | ||
| 92 | return -ENOMEM; | ||
| 93 | |||
| 86 | err = security_sock_rcv_skb(sk, skb); | 94 | err = security_sock_rcv_skb(sk, skb); |
| 87 | if (err) | 95 | if (err) |
| 88 | return err; | 96 | return err; |
diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b4c90e42b443..e4ba3e70c174 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/workqueue.h> | 26 | #include <linux/workqueue.h> |
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/export.h> | 28 | #include <linux/export.h> |
| 29 | #include <linux/if_vlan.h> | ||
| 29 | #include <net/tcp.h> | 30 | #include <net/tcp.h> |
| 30 | #include <net/udp.h> | 31 | #include <net/udp.h> |
| 31 | #include <asm/unaligned.h> | 32 | #include <asm/unaligned.h> |
| @@ -54,7 +55,7 @@ static atomic_t trapped; | |||
| 54 | MAX_UDP_CHUNK) | 55 | MAX_UDP_CHUNK) |
| 55 | 56 | ||
| 56 | static void zap_completion_queue(void); | 57 | static void zap_completion_queue(void); |
| 57 | static void arp_reply(struct sk_buff *skb); | 58 | static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo); |
| 58 | 59 | ||
| 59 | static unsigned int carrier_timeout = 4; | 60 | static unsigned int carrier_timeout = 4; |
| 60 | module_param(carrier_timeout, uint, 0644); | 61 | module_param(carrier_timeout, uint, 0644); |
| @@ -170,7 +171,8 @@ static void poll_napi(struct net_device *dev) | |||
| 170 | list_for_each_entry(napi, &dev->napi_list, dev_list) { | 171 | list_for_each_entry(napi, &dev->napi_list, dev_list) { |
| 171 | if (napi->poll_owner != smp_processor_id() && | 172 | if (napi->poll_owner != smp_processor_id() && |
| 172 | spin_trylock(&napi->poll_lock)) { | 173 | spin_trylock(&napi->poll_lock)) { |
| 173 | budget = poll_one_napi(dev->npinfo, napi, budget); | 174 | budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), |
| 175 | napi, budget); | ||
| 174 | spin_unlock(&napi->poll_lock); | 176 | spin_unlock(&napi->poll_lock); |
| 175 | 177 | ||
| 176 | if (!budget) | 178 | if (!budget) |
| @@ -185,13 +187,14 @@ static void service_arp_queue(struct netpoll_info *npi) | |||
| 185 | struct sk_buff *skb; | 187 | struct sk_buff *skb; |
| 186 | 188 | ||
| 187 | while ((skb = skb_dequeue(&npi->arp_tx))) | 189 | while ((skb = skb_dequeue(&npi->arp_tx))) |
| 188 | arp_reply(skb); | 190 | netpoll_arp_reply(skb, npi); |
| 189 | } | 191 | } |
| 190 | } | 192 | } |
| 191 | 193 | ||
| 192 | static void netpoll_poll_dev(struct net_device *dev) | 194 | static void netpoll_poll_dev(struct net_device *dev) |
| 193 | { | 195 | { |
| 194 | const struct net_device_ops *ops; | 196 | const struct net_device_ops *ops; |
| 197 | struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); | ||
| 195 | 198 | ||
| 196 | if (!dev || !netif_running(dev)) | 199 | if (!dev || !netif_running(dev)) |
| 197 | return; | 200 | return; |
| @@ -206,17 +209,18 @@ static void netpoll_poll_dev(struct net_device *dev) | |||
| 206 | poll_napi(dev); | 209 | poll_napi(dev); |
| 207 | 210 | ||
| 208 | if (dev->flags & IFF_SLAVE) { | 211 | if (dev->flags & IFF_SLAVE) { |
| 209 | if (dev->npinfo) { | 212 | if (ni) { |
| 210 | struct net_device *bond_dev = dev->master; | 213 | struct net_device *bond_dev = dev->master; |
| 211 | struct sk_buff *skb; | 214 | struct sk_buff *skb; |
| 212 | while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) { | 215 | struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo); |
| 216 | while ((skb = skb_dequeue(&ni->arp_tx))) { | ||
| 213 | skb->dev = bond_dev; | 217 | skb->dev = bond_dev; |
| 214 | skb_queue_tail(&bond_dev->npinfo->arp_tx, skb); | 218 | skb_queue_tail(&bond_ni->arp_tx, skb); |
| 215 | } | 219 | } |
| 216 | } | 220 | } |
| 217 | } | 221 | } |
| 218 | 222 | ||
| 219 | service_arp_queue(dev->npinfo); | 223 | service_arp_queue(ni); |
| 220 | 224 | ||
| 221 | zap_completion_queue(); | 225 | zap_completion_queue(); |
| 222 | } | 226 | } |
| @@ -302,6 +306,7 @@ static int netpoll_owner_active(struct net_device *dev) | |||
| 302 | return 0; | 306 | return 0; |
| 303 | } | 307 | } |
| 304 | 308 | ||
| 309 | /* call with IRQ disabled */ | ||
| 305 | void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | 310 | void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, |
| 306 | struct net_device *dev) | 311 | struct net_device *dev) |
| 307 | { | 312 | { |
| @@ -309,8 +314,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | |||
| 309 | unsigned long tries; | 314 | unsigned long tries; |
| 310 | const struct net_device_ops *ops = dev->netdev_ops; | 315 | const struct net_device_ops *ops = dev->netdev_ops; |
| 311 | /* It is up to the caller to keep npinfo alive. */ | 316 | /* It is up to the caller to keep npinfo alive. */ |
| 312 | struct netpoll_info *npinfo = np->dev->npinfo; | 317 | struct netpoll_info *npinfo; |
| 318 | |||
| 319 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 313 | 320 | ||
| 321 | npinfo = rcu_dereference_bh(np->dev->npinfo); | ||
| 314 | if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { | 322 | if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { |
| 315 | __kfree_skb(skb); | 323 | __kfree_skb(skb); |
| 316 | return; | 324 | return; |
| @@ -319,16 +327,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | |||
| 319 | /* don't get messages out of order, and no recursion */ | 327 | /* don't get messages out of order, and no recursion */ |
| 320 | if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { | 328 | if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { |
| 321 | struct netdev_queue *txq; | 329 | struct netdev_queue *txq; |
| 322 | unsigned long flags; | ||
| 323 | 330 | ||
| 324 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | 331 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
| 325 | 332 | ||
| 326 | local_irq_save(flags); | ||
| 327 | /* try until next clock tick */ | 333 | /* try until next clock tick */ |
| 328 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; | 334 | for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; |
| 329 | tries > 0; --tries) { | 335 | tries > 0; --tries) { |
| 330 | if (__netif_tx_trylock(txq)) { | 336 | if (__netif_tx_trylock(txq)) { |
| 331 | if (!netif_xmit_stopped(txq)) { | 337 | if (!netif_xmit_stopped(txq)) { |
| 338 | if (vlan_tx_tag_present(skb) && | ||
| 339 | !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { | ||
| 340 | skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); | ||
| 341 | if (unlikely(!skb)) | ||
| 342 | break; | ||
| 343 | skb->vlan_tci = 0; | ||
| 344 | } | ||
| 345 | |||
| 332 | status = ops->ndo_start_xmit(skb, dev); | 346 | status = ops->ndo_start_xmit(skb, dev); |
| 333 | if (status == NETDEV_TX_OK) | 347 | if (status == NETDEV_TX_OK) |
| 334 | txq_trans_update(txq); | 348 | txq_trans_update(txq); |
| @@ -347,10 +361,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, | |||
| 347 | } | 361 | } |
| 348 | 362 | ||
| 349 | WARN_ONCE(!irqs_disabled(), | 363 | WARN_ONCE(!irqs_disabled(), |
| 350 | "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n", | 364 | "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", |
| 351 | dev->name, ops->ndo_start_xmit); | 365 | dev->name, ops->ndo_start_xmit); |
| 352 | 366 | ||
| 353 | local_irq_restore(flags); | ||
| 354 | } | 367 | } |
| 355 | 368 | ||
| 356 | if (status != NETDEV_TX_OK) { | 369 | if (status != NETDEV_TX_OK) { |
| @@ -423,9 +436,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) | |||
| 423 | } | 436 | } |
| 424 | EXPORT_SYMBOL(netpoll_send_udp); | 437 | EXPORT_SYMBOL(netpoll_send_udp); |
| 425 | 438 | ||
| 426 | static void arp_reply(struct sk_buff *skb) | 439 | static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo) |
| 427 | { | 440 | { |
| 428 | struct netpoll_info *npinfo = skb->dev->npinfo; | ||
| 429 | struct arphdr *arp; | 441 | struct arphdr *arp; |
| 430 | unsigned char *arp_ptr; | 442 | unsigned char *arp_ptr; |
| 431 | int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; | 443 | int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; |
| @@ -543,13 +555,12 @@ static void arp_reply(struct sk_buff *skb) | |||
| 543 | spin_unlock_irqrestore(&npinfo->rx_lock, flags); | 555 | spin_unlock_irqrestore(&npinfo->rx_lock, flags); |
| 544 | } | 556 | } |
| 545 | 557 | ||
| 546 | int __netpoll_rx(struct sk_buff *skb) | 558 | int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) |
| 547 | { | 559 | { |
| 548 | int proto, len, ulen; | 560 | int proto, len, ulen; |
| 549 | int hits = 0; | 561 | int hits = 0; |
| 550 | const struct iphdr *iph; | 562 | const struct iphdr *iph; |
| 551 | struct udphdr *uh; | 563 | struct udphdr *uh; |
| 552 | struct netpoll_info *npinfo = skb->dev->npinfo; | ||
| 553 | struct netpoll *np, *tmp; | 564 | struct netpoll *np, *tmp; |
| 554 | 565 | ||
| 555 | if (list_empty(&npinfo->rx_np)) | 566 | if (list_empty(&npinfo->rx_np)) |
| @@ -565,6 +576,12 @@ int __netpoll_rx(struct sk_buff *skb) | |||
| 565 | return 1; | 576 | return 1; |
| 566 | } | 577 | } |
| 567 | 578 | ||
| 579 | if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { | ||
| 580 | skb = vlan_untag(skb); | ||
| 581 | if (unlikely(!skb)) | ||
| 582 | goto out; | ||
| 583 | } | ||
| 584 | |||
| 568 | proto = ntohs(eth_hdr(skb)->h_proto); | 585 | proto = ntohs(eth_hdr(skb)->h_proto); |
| 569 | if (proto != ETH_P_IP) | 586 | if (proto != ETH_P_IP) |
| 570 | goto out; | 587 | goto out; |
| @@ -715,7 +732,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) | |||
| 715 | } | 732 | } |
| 716 | EXPORT_SYMBOL(netpoll_parse_options); | 733 | EXPORT_SYMBOL(netpoll_parse_options); |
| 717 | 734 | ||
| 718 | int __netpoll_setup(struct netpoll *np, struct net_device *ndev) | 735 | int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) |
| 719 | { | 736 | { |
| 720 | struct netpoll_info *npinfo; | 737 | struct netpoll_info *npinfo; |
| 721 | const struct net_device_ops *ops; | 738 | const struct net_device_ops *ops; |
| @@ -734,7 +751,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) | |||
| 734 | } | 751 | } |
| 735 | 752 | ||
| 736 | if (!ndev->npinfo) { | 753 | if (!ndev->npinfo) { |
| 737 | npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); | 754 | npinfo = kmalloc(sizeof(*npinfo), gfp); |
| 738 | if (!npinfo) { | 755 | if (!npinfo) { |
| 739 | err = -ENOMEM; | 756 | err = -ENOMEM; |
| 740 | goto out; | 757 | goto out; |
| @@ -752,7 +769,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) | |||
| 752 | 769 | ||
| 753 | ops = np->dev->netdev_ops; | 770 | ops = np->dev->netdev_ops; |
| 754 | if (ops->ndo_netpoll_setup) { | 771 | if (ops->ndo_netpoll_setup) { |
| 755 | err = ops->ndo_netpoll_setup(ndev, npinfo); | 772 | err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); |
| 756 | if (err) | 773 | if (err) |
| 757 | goto free_npinfo; | 774 | goto free_npinfo; |
| 758 | } | 775 | } |
| @@ -857,7 +874,7 @@ int netpoll_setup(struct netpoll *np) | |||
| 857 | refill_skbs(); | 874 | refill_skbs(); |
| 858 | 875 | ||
| 859 | rtnl_lock(); | 876 | rtnl_lock(); |
| 860 | err = __netpoll_setup(np, ndev); | 877 | err = __netpoll_setup(np, ndev, GFP_KERNEL); |
| 861 | rtnl_unlock(); | 878 | rtnl_unlock(); |
| 862 | 879 | ||
| 863 | if (err) | 880 | if (err) |
| @@ -878,6 +895,24 @@ static int __init netpoll_init(void) | |||
| 878 | } | 895 | } |
| 879 | core_initcall(netpoll_init); | 896 | core_initcall(netpoll_init); |
| 880 | 897 | ||
| 898 | static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) | ||
| 899 | { | ||
| 900 | struct netpoll_info *npinfo = | ||
| 901 | container_of(rcu_head, struct netpoll_info, rcu); | ||
| 902 | |||
| 903 | skb_queue_purge(&npinfo->arp_tx); | ||
| 904 | skb_queue_purge(&npinfo->txq); | ||
| 905 | |||
| 906 | /* we can't call cancel_delayed_work_sync here, as we are in softirq */ | ||
| 907 | cancel_delayed_work(&npinfo->tx_work); | ||
| 908 | |||
| 909 | /* clean after last, unfinished work */ | ||
| 910 | __skb_queue_purge(&npinfo->txq); | ||
| 911 | /* now cancel it again */ | ||
| 912 | cancel_delayed_work(&npinfo->tx_work); | ||
| 913 | kfree(npinfo); | ||
| 914 | } | ||
| 915 | |||
| 881 | void __netpoll_cleanup(struct netpoll *np) | 916 | void __netpoll_cleanup(struct netpoll *np) |
| 882 | { | 917 | { |
| 883 | struct netpoll_info *npinfo; | 918 | struct netpoll_info *npinfo; |
| @@ -903,20 +938,24 @@ void __netpoll_cleanup(struct netpoll *np) | |||
| 903 | ops->ndo_netpoll_cleanup(np->dev); | 938 | ops->ndo_netpoll_cleanup(np->dev); |
| 904 | 939 | ||
| 905 | RCU_INIT_POINTER(np->dev->npinfo, NULL); | 940 | RCU_INIT_POINTER(np->dev->npinfo, NULL); |
| 941 | call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); | ||
| 942 | } | ||
| 943 | } | ||
| 944 | EXPORT_SYMBOL_GPL(__netpoll_cleanup); | ||
| 906 | 945 | ||
| 907 | /* avoid racing with NAPI reading npinfo */ | 946 | static void rcu_cleanup_netpoll(struct rcu_head *rcu_head) |
| 908 | synchronize_rcu_bh(); | 947 | { |
| 948 | struct netpoll *np = container_of(rcu_head, struct netpoll, rcu); | ||
| 909 | 949 | ||
| 910 | skb_queue_purge(&npinfo->arp_tx); | 950 | __netpoll_cleanup(np); |
| 911 | skb_queue_purge(&npinfo->txq); | 951 | kfree(np); |
| 912 | cancel_delayed_work_sync(&npinfo->tx_work); | 952 | } |
| 913 | 953 | ||
| 914 | /* clean after last, unfinished work */ | 954 | void __netpoll_free_rcu(struct netpoll *np) |
| 915 | __skb_queue_purge(&npinfo->txq); | 955 | { |
| 916 | kfree(npinfo); | 956 | call_rcu_bh(&np->rcu, rcu_cleanup_netpoll); |
| 917 | } | ||
| 918 | } | 957 | } |
| 919 | EXPORT_SYMBOL_GPL(__netpoll_cleanup); | 958 | EXPORT_SYMBOL_GPL(__netpoll_free_rcu); |
| 920 | 959 | ||
| 921 | void netpoll_cleanup(struct netpoll *np) | 960 | void netpoll_cleanup(struct netpoll *np) |
| 922 | { | 961 | { |
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index ed0c0431fcd8..c75e3f9d060f 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c | |||
| @@ -101,12 +101,10 @@ static int write_update_netdev_table(struct net_device *dev) | |||
| 101 | u32 max_len; | 101 | u32 max_len; |
| 102 | struct netprio_map *map; | 102 | struct netprio_map *map; |
| 103 | 103 | ||
| 104 | rtnl_lock(); | ||
| 105 | max_len = atomic_read(&max_prioidx) + 1; | 104 | max_len = atomic_read(&max_prioidx) + 1; |
| 106 | map = rtnl_dereference(dev->priomap); | 105 | map = rtnl_dereference(dev->priomap); |
| 107 | if (!map || map->priomap_len < max_len) | 106 | if (!map || map->priomap_len < max_len) |
| 108 | ret = extend_netdev_table(dev, max_len); | 107 | ret = extend_netdev_table(dev, max_len); |
| 109 | rtnl_unlock(); | ||
| 110 | 108 | ||
| 111 | return ret; | 109 | return ret; |
| 112 | } | 110 | } |
| @@ -256,17 +254,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft, | |||
| 256 | if (!dev) | 254 | if (!dev) |
| 257 | goto out_free_devname; | 255 | goto out_free_devname; |
| 258 | 256 | ||
| 257 | rtnl_lock(); | ||
| 259 | ret = write_update_netdev_table(dev); | 258 | ret = write_update_netdev_table(dev); |
| 260 | if (ret < 0) | 259 | if (ret < 0) |
| 261 | goto out_put_dev; | 260 | goto out_put_dev; |
| 262 | 261 | ||
| 263 | rcu_read_lock(); | 262 | map = rtnl_dereference(dev->priomap); |
| 264 | map = rcu_dereference(dev->priomap); | ||
| 265 | if (map) | 263 | if (map) |
| 266 | map->priomap[prioidx] = priority; | 264 | map->priomap[prioidx] = priority; |
| 267 | rcu_read_unlock(); | ||
| 268 | 265 | ||
| 269 | out_put_dev: | 266 | out_put_dev: |
| 267 | rtnl_unlock(); | ||
| 270 | dev_put(dev); | 268 | dev_put(dev); |
| 271 | 269 | ||
| 272 | out_free_devname: | 270 | out_free_devname: |
| @@ -277,12 +275,6 @@ out_free_devname: | |||
| 277 | void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | 275 | void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
| 278 | { | 276 | { |
| 279 | struct task_struct *p; | 277 | struct task_struct *p; |
| 280 | char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL); | ||
| 281 | |||
| 282 | if (!tmp) { | ||
| 283 | pr_warn("Unable to attach cgrp due to alloc failure!\n"); | ||
| 284 | return; | ||
| 285 | } | ||
| 286 | 278 | ||
| 287 | cgroup_taskset_for_each(p, cgrp, tset) { | 279 | cgroup_taskset_for_each(p, cgrp, tset) { |
| 288 | unsigned int fd; | 280 | unsigned int fd; |
| @@ -296,32 +288,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
| 296 | continue; | 288 | continue; |
| 297 | } | 289 | } |
| 298 | 290 | ||
| 299 | rcu_read_lock(); | 291 | spin_lock(&files->file_lock); |
| 300 | fdt = files_fdtable(files); | 292 | fdt = files_fdtable(files); |
| 301 | for (fd = 0; fd < fdt->max_fds; fd++) { | 293 | for (fd = 0; fd < fdt->max_fds; fd++) { |
| 302 | char *path; | ||
| 303 | struct file *file; | 294 | struct file *file; |
| 304 | struct socket *sock; | 295 | struct socket *sock; |
| 305 | unsigned long s; | 296 | int err; |
| 306 | int rv, err = 0; | ||
| 307 | 297 | ||
| 308 | file = fcheck_files(files, fd); | 298 | file = fcheck_files(files, fd); |
| 309 | if (!file) | 299 | if (!file) |
| 310 | continue; | 300 | continue; |
| 311 | 301 | ||
| 312 | path = d_path(&file->f_path, tmp, PAGE_SIZE); | ||
| 313 | rv = sscanf(path, "socket:[%lu]", &s); | ||
| 314 | if (rv <= 0) | ||
| 315 | continue; | ||
| 316 | |||
| 317 | sock = sock_from_file(file, &err); | 302 | sock = sock_from_file(file, &err); |
| 318 | if (!err) | 303 | if (sock) |
| 319 | sock_update_netprioidx(sock->sk, p); | 304 | sock_update_netprioidx(sock->sk, p); |
| 320 | } | 305 | } |
| 321 | rcu_read_unlock(); | 306 | spin_unlock(&files->file_lock); |
| 322 | task_unlock(p); | 307 | task_unlock(p); |
| 323 | } | 308 | } |
| 324 | kfree(tmp); | ||
| 325 | } | 309 | } |
| 326 | 310 | ||
| 327 | static struct cftype ss_files[] = { | 311 | static struct cftype ss_files[] = { |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 334b930e0de3..2c5a0a06c4ce 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
| @@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, | |||
| 625 | .rta_id = id, | 625 | .rta_id = id, |
| 626 | }; | 626 | }; |
| 627 | 627 | ||
| 628 | if (expires) | 628 | if (expires) { |
| 629 | ci.rta_expires = jiffies_to_clock_t(expires); | 629 | unsigned long clock; |
| 630 | 630 | ||
| 631 | clock = jiffies_to_clock_t(abs(expires)); | ||
| 632 | clock = min_t(unsigned long, clock, INT_MAX); | ||
| 633 | ci.rta_expires = (expires > 0) ? clock : -clock; | ||
| 634 | } | ||
| 631 | return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); | 635 | return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); |
| 632 | } | 636 | } |
| 633 | EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); | 637 | EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); |
| @@ -659,6 +663,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition) | |||
| 659 | } | 663 | } |
| 660 | } | 664 | } |
| 661 | 665 | ||
| 666 | static unsigned int rtnl_dev_get_flags(const struct net_device *dev) | ||
| 667 | { | ||
| 668 | return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) | | ||
| 669 | (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); | ||
| 670 | } | ||
| 671 | |||
| 662 | static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, | 672 | static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, |
| 663 | const struct ifinfomsg *ifm) | 673 | const struct ifinfomsg *ifm) |
| 664 | { | 674 | { |
| @@ -667,7 +677,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, | |||
| 667 | /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ | 677 | /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ |
| 668 | if (ifm->ifi_change) | 678 | if (ifm->ifi_change) |
| 669 | flags = (flags & ifm->ifi_change) | | 679 | flags = (flags & ifm->ifi_change) | |
| 670 | (dev->flags & ~ifm->ifi_change); | 680 | (rtnl_dev_get_flags(dev) & ~ifm->ifi_change); |
| 671 | 681 | ||
| 672 | return flags; | 682 | return flags; |
| 673 | } | 683 | } |
| @@ -1371,6 +1381,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, | |||
| 1371 | goto errout; | 1381 | goto errout; |
| 1372 | send_addr_notify = 1; | 1382 | send_addr_notify = 1; |
| 1373 | modified = 1; | 1383 | modified = 1; |
| 1384 | add_device_randomness(dev->dev_addr, dev->addr_len); | ||
| 1374 | } | 1385 | } |
| 1375 | 1386 | ||
| 1376 | if (tb[IFLA_MTU]) { | 1387 | if (tb[IFLA_MTU]) { |
diff --git a/net/core/scm.c b/net/core/scm.c index 8f6ccfd68ef4..040cebeed45b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c | |||
| @@ -265,6 +265,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) | |||
| 265 | for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; | 265 | for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax; |
| 266 | i++, cmfptr++) | 266 | i++, cmfptr++) |
| 267 | { | 267 | { |
| 268 | struct socket *sock; | ||
| 268 | int new_fd; | 269 | int new_fd; |
| 269 | err = security_file_receive(fp[i]); | 270 | err = security_file_receive(fp[i]); |
| 270 | if (err) | 271 | if (err) |
| @@ -281,6 +282,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) | |||
| 281 | } | 282 | } |
| 282 | /* Bump the usage count and install the file. */ | 283 | /* Bump the usage count and install the file. */ |
| 283 | get_file(fp[i]); | 284 | get_file(fp[i]); |
| 285 | sock = sock_from_file(fp[i], &err); | ||
| 286 | if (sock) | ||
| 287 | sock_update_netprioidx(sock->sk, current); | ||
| 284 | fd_install(new_fd, fp[i]); | 288 | fd_install(new_fd, fp[i]); |
| 285 | } | 289 | } |
| 286 | 290 | ||
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368f65c15e4f..fe00d1208167 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
| @@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 145 | BUG(); | 145 | BUG(); |
| 146 | } | 146 | } |
| 147 | 147 | ||
| 148 | |||
| 149 | /* | ||
| 150 | * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells | ||
| 151 | * the caller if emergency pfmemalloc reserves are being used. If it is and | ||
| 152 | * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves | ||
| 153 | * may be used. Otherwise, the packet data may be discarded until enough | ||
| 154 | * memory is free | ||
| 155 | */ | ||
| 156 | #define kmalloc_reserve(size, gfp, node, pfmemalloc) \ | ||
| 157 | __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc) | ||
| 158 | void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip, | ||
| 159 | bool *pfmemalloc) | ||
| 160 | { | ||
| 161 | void *obj; | ||
| 162 | bool ret_pfmemalloc = false; | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Try a regular allocation, when that fails and we're not entitled | ||
| 166 | * to the reserves, fail. | ||
| 167 | */ | ||
| 168 | obj = kmalloc_node_track_caller(size, | ||
| 169 | flags | __GFP_NOMEMALLOC | __GFP_NOWARN, | ||
| 170 | node); | ||
| 171 | if (obj || !(gfp_pfmemalloc_allowed(flags))) | ||
| 172 | goto out; | ||
| 173 | |||
| 174 | /* Try again but now we are using pfmemalloc reserves */ | ||
| 175 | ret_pfmemalloc = true; | ||
| 176 | obj = kmalloc_node_track_caller(size, flags, node); | ||
| 177 | |||
| 178 | out: | ||
| 179 | if (pfmemalloc) | ||
| 180 | *pfmemalloc = ret_pfmemalloc; | ||
| 181 | |||
| 182 | return obj; | ||
| 183 | } | ||
| 184 | |||
| 148 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 185 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
| 149 | * 'private' fields and also do memory statistics to find all the | 186 | * 'private' fields and also do memory statistics to find all the |
| 150 | * [BEEP] leaks. | 187 | * [BEEP] leaks. |
| @@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 155 | * __alloc_skb - allocate a network buffer | 192 | * __alloc_skb - allocate a network buffer |
| 156 | * @size: size to allocate | 193 | * @size: size to allocate |
| 157 | * @gfp_mask: allocation mask | 194 | * @gfp_mask: allocation mask |
| 158 | * @fclone: allocate from fclone cache instead of head cache | 195 | * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache |
| 159 | * and allocate a cloned (child) skb | 196 | * instead of head cache and allocate a cloned (child) skb. |
| 197 | * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for | ||
| 198 | * allocations in case the data is required for writeback | ||
| 160 | * @node: numa node to allocate memory on | 199 | * @node: numa node to allocate memory on |
| 161 | * | 200 | * |
| 162 | * Allocate a new &sk_buff. The returned buffer has no headroom and a | 201 | * Allocate a new &sk_buff. The returned buffer has no headroom and a |
| @@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
| 167 | * %GFP_ATOMIC. | 206 | * %GFP_ATOMIC. |
| 168 | */ | 207 | */ |
| 169 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | 208 | struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, |
| 170 | int fclone, int node) | 209 | int flags, int node) |
| 171 | { | 210 | { |
| 172 | struct kmem_cache *cache; | 211 | struct kmem_cache *cache; |
| 173 | struct skb_shared_info *shinfo; | 212 | struct skb_shared_info *shinfo; |
| 174 | struct sk_buff *skb; | 213 | struct sk_buff *skb; |
| 175 | u8 *data; | 214 | u8 *data; |
| 215 | bool pfmemalloc; | ||
| 176 | 216 | ||
| 177 | cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; | 217 | cache = (flags & SKB_ALLOC_FCLONE) |
| 218 | ? skbuff_fclone_cache : skbuff_head_cache; | ||
| 219 | |||
| 220 | if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX)) | ||
| 221 | gfp_mask |= __GFP_MEMALLOC; | ||
| 178 | 222 | ||
| 179 | /* Get the HEAD */ | 223 | /* Get the HEAD */ |
| 180 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); | 224 | skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); |
| @@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 189 | */ | 233 | */ |
| 190 | size = SKB_DATA_ALIGN(size); | 234 | size = SKB_DATA_ALIGN(size); |
| 191 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 235 | size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
| 192 | data = kmalloc_node_track_caller(size, gfp_mask, node); | 236 | data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); |
| 193 | if (!data) | 237 | if (!data) |
| 194 | goto nodata; | 238 | goto nodata; |
| 195 | /* kmalloc(size) might give us more room than requested. | 239 | /* kmalloc(size) might give us more room than requested. |
| @@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 207 | memset(skb, 0, offsetof(struct sk_buff, tail)); | 251 | memset(skb, 0, offsetof(struct sk_buff, tail)); |
| 208 | /* Account for allocated memory : skb + skb->head */ | 252 | /* Account for allocated memory : skb + skb->head */ |
| 209 | skb->truesize = SKB_TRUESIZE(size); | 253 | skb->truesize = SKB_TRUESIZE(size); |
| 254 | skb->pfmemalloc = pfmemalloc; | ||
| 210 | atomic_set(&skb->users, 1); | 255 | atomic_set(&skb->users, 1); |
| 211 | skb->head = data; | 256 | skb->head = data; |
| 212 | skb->data = data; | 257 | skb->data = data; |
| @@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 222 | atomic_set(&shinfo->dataref, 1); | 267 | atomic_set(&shinfo->dataref, 1); |
| 223 | kmemcheck_annotate_variable(shinfo->destructor_arg); | 268 | kmemcheck_annotate_variable(shinfo->destructor_arg); |
| 224 | 269 | ||
| 225 | if (fclone) { | 270 | if (flags & SKB_ALLOC_FCLONE) { |
| 226 | struct sk_buff *child = skb + 1; | 271 | struct sk_buff *child = skb + 1; |
| 227 | atomic_t *fclone_ref = (atomic_t *) (child + 1); | 272 | atomic_t *fclone_ref = (atomic_t *) (child + 1); |
| 228 | 273 | ||
| @@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, | |||
| 232 | atomic_set(fclone_ref, 1); | 277 | atomic_set(fclone_ref, 1); |
| 233 | 278 | ||
| 234 | child->fclone = SKB_FCLONE_UNAVAILABLE; | 279 | child->fclone = SKB_FCLONE_UNAVAILABLE; |
| 280 | child->pfmemalloc = pfmemalloc; | ||
| 235 | } | 281 | } |
| 236 | out: | 282 | out: |
| 237 | return skb; | 283 | return skb; |
| @@ -302,14 +348,7 @@ static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); | |||
| 302 | 348 | ||
| 303 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) | 349 | #define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES) |
| 304 | 350 | ||
| 305 | /** | 351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) |
| 306 | * netdev_alloc_frag - allocate a page fragment | ||
| 307 | * @fragsz: fragment size | ||
| 308 | * | ||
| 309 | * Allocates a frag from a page for receive buffer. | ||
| 310 | * Uses GFP_ATOMIC allocations. | ||
| 311 | */ | ||
| 312 | void *netdev_alloc_frag(unsigned int fragsz) | ||
| 313 | { | 352 | { |
| 314 | struct netdev_alloc_cache *nc; | 353 | struct netdev_alloc_cache *nc; |
| 315 | void *data = NULL; | 354 | void *data = NULL; |
| @@ -319,7 +358,7 @@ void *netdev_alloc_frag(unsigned int fragsz) | |||
| 319 | nc = &__get_cpu_var(netdev_alloc_cache); | 358 | nc = &__get_cpu_var(netdev_alloc_cache); |
| 320 | if (unlikely(!nc->page)) { | 359 | if (unlikely(!nc->page)) { |
| 321 | refill: | 360 | refill: |
| 322 | nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD); | 361 | nc->page = alloc_page(gfp_mask); |
| 323 | if (unlikely(!nc->page)) | 362 | if (unlikely(!nc->page)) |
| 324 | goto end; | 363 | goto end; |
| 325 | recycle: | 364 | recycle: |
| @@ -343,6 +382,18 @@ end: | |||
| 343 | local_irq_restore(flags); | 382 | local_irq_restore(flags); |
| 344 | return data; | 383 | return data; |
| 345 | } | 384 | } |
| 385 | |||
| 386 | /** | ||
| 387 | * netdev_alloc_frag - allocate a page fragment | ||
| 388 | * @fragsz: fragment size | ||
| 389 | * | ||
| 390 | * Allocates a frag from a page for receive buffer. | ||
| 391 | * Uses GFP_ATOMIC allocations. | ||
| 392 | */ | ||
| 393 | void *netdev_alloc_frag(unsigned int fragsz) | ||
| 394 | { | ||
| 395 | return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); | ||
| 396 | } | ||
| 346 | EXPORT_SYMBOL(netdev_alloc_frag); | 397 | EXPORT_SYMBOL(netdev_alloc_frag); |
| 347 | 398 | ||
| 348 | /** | 399 | /** |
| @@ -366,7 +417,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
| 366 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); | 417 | SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); |
| 367 | 418 | ||
| 368 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { | 419 | if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) { |
| 369 | void *data = netdev_alloc_frag(fragsz); | 420 | void *data; |
| 421 | |||
| 422 | if (sk_memalloc_socks()) | ||
| 423 | gfp_mask |= __GFP_MEMALLOC; | ||
| 424 | |||
| 425 | data = __netdev_alloc_frag(fragsz, gfp_mask); | ||
| 370 | 426 | ||
| 371 | if (likely(data)) { | 427 | if (likely(data)) { |
| 372 | skb = build_skb(data, fragsz); | 428 | skb = build_skb(data, fragsz); |
| @@ -374,7 +430,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, | |||
| 374 | put_page(virt_to_head_page(data)); | 430 | put_page(virt_to_head_page(data)); |
| 375 | } | 431 | } |
| 376 | } else { | 432 | } else { |
| 377 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE); | 433 | skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, |
| 434 | SKB_ALLOC_RX, NUMA_NO_NODE); | ||
| 378 | } | 435 | } |
| 379 | if (likely(skb)) { | 436 | if (likely(skb)) { |
| 380 | skb_reserve(skb, NET_SKB_PAD); | 437 | skb_reserve(skb, NET_SKB_PAD); |
| @@ -656,6 +713,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 656 | #if IS_ENABLED(CONFIG_IP_VS) | 713 | #if IS_ENABLED(CONFIG_IP_VS) |
| 657 | new->ipvs_property = old->ipvs_property; | 714 | new->ipvs_property = old->ipvs_property; |
| 658 | #endif | 715 | #endif |
| 716 | new->pfmemalloc = old->pfmemalloc; | ||
| 659 | new->protocol = old->protocol; | 717 | new->protocol = old->protocol; |
| 660 | new->mark = old->mark; | 718 | new->mark = old->mark; |
| 661 | new->skb_iif = old->skb_iif; | 719 | new->skb_iif = old->skb_iif; |
| @@ -814,6 +872,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) | |||
| 814 | n->fclone = SKB_FCLONE_CLONE; | 872 | n->fclone = SKB_FCLONE_CLONE; |
| 815 | atomic_inc(fclone_ref); | 873 | atomic_inc(fclone_ref); |
| 816 | } else { | 874 | } else { |
| 875 | if (skb_pfmemalloc(skb)) | ||
| 876 | gfp_mask |= __GFP_MEMALLOC; | ||
| 877 | |||
| 817 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); | 878 | n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); |
| 818 | if (!n) | 879 | if (!n) |
| 819 | return NULL; | 880 | return NULL; |
| @@ -850,6 +911,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) | |||
| 850 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; | 911 | skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; |
| 851 | } | 912 | } |
| 852 | 913 | ||
| 914 | static inline int skb_alloc_rx_flag(const struct sk_buff *skb) | ||
| 915 | { | ||
| 916 | if (skb_pfmemalloc(skb)) | ||
| 917 | return SKB_ALLOC_RX; | ||
| 918 | return 0; | ||
| 919 | } | ||
| 920 | |||
| 853 | /** | 921 | /** |
| 854 | * skb_copy - create private copy of an sk_buff | 922 | * skb_copy - create private copy of an sk_buff |
| 855 | * @skb: buffer to copy | 923 | * @skb: buffer to copy |
| @@ -871,7 +939,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) | |||
| 871 | { | 939 | { |
| 872 | int headerlen = skb_headroom(skb); | 940 | int headerlen = skb_headroom(skb); |
| 873 | unsigned int size = skb_end_offset(skb) + skb->data_len; | 941 | unsigned int size = skb_end_offset(skb) + skb->data_len; |
| 874 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 942 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
| 943 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
| 875 | 944 | ||
| 876 | if (!n) | 945 | if (!n) |
| 877 | return NULL; | 946 | return NULL; |
| @@ -906,7 +975,8 @@ EXPORT_SYMBOL(skb_copy); | |||
| 906 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) | 975 | struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) |
| 907 | { | 976 | { |
| 908 | unsigned int size = skb_headlen(skb) + headroom; | 977 | unsigned int size = skb_headlen(skb) + headroom; |
| 909 | struct sk_buff *n = alloc_skb(size, gfp_mask); | 978 | struct sk_buff *n = __alloc_skb(size, gfp_mask, |
| 979 | skb_alloc_rx_flag(skb), NUMA_NO_NODE); | ||
| 910 | 980 | ||
| 911 | if (!n) | 981 | if (!n) |
| 912 | goto out; | 982 | goto out; |
| @@ -979,8 +1049,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, | |||
| 979 | 1049 | ||
| 980 | size = SKB_DATA_ALIGN(size); | 1050 | size = SKB_DATA_ALIGN(size); |
| 981 | 1051 | ||
| 982 | data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | 1052 | if (skb_pfmemalloc(skb)) |
| 983 | gfp_mask); | 1053 | gfp_mask |= __GFP_MEMALLOC; |
| 1054 | data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)), | ||
| 1055 | gfp_mask, NUMA_NO_NODE, NULL); | ||
| 984 | if (!data) | 1056 | if (!data) |
| 985 | goto nodata; | 1057 | goto nodata; |
| 986 | size = SKB_WITH_OVERHEAD(ksize(data)); | 1058 | size = SKB_WITH_OVERHEAD(ksize(data)); |
| @@ -1092,8 +1164,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, | |||
| 1092 | /* | 1164 | /* |
| 1093 | * Allocate the copy buffer | 1165 | * Allocate the copy buffer |
| 1094 | */ | 1166 | */ |
| 1095 | struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, | 1167 | struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, |
| 1096 | gfp_mask); | 1168 | gfp_mask, skb_alloc_rx_flag(skb), |
| 1169 | NUMA_NO_NODE); | ||
| 1097 | int oldheadroom = skb_headroom(skb); | 1170 | int oldheadroom = skb_headroom(skb); |
| 1098 | int head_copy_len, head_copy_off; | 1171 | int head_copy_len, head_copy_off; |
| 1099 | int off; | 1172 | int off; |
| @@ -2775,8 +2848,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) | |||
| 2775 | skb_release_head_state(nskb); | 2848 | skb_release_head_state(nskb); |
| 2776 | __skb_push(nskb, doffset); | 2849 | __skb_push(nskb, doffset); |
| 2777 | } else { | 2850 | } else { |
| 2778 | nskb = alloc_skb(hsize + doffset + headroom, | 2851 | nskb = __alloc_skb(hsize + doffset + headroom, |
| 2779 | GFP_ATOMIC); | 2852 | GFP_ATOMIC, skb_alloc_rx_flag(skb), |
| 2853 | NUMA_NO_NODE); | ||
| 2780 | 2854 | ||
| 2781 | if (unlikely(!nskb)) | 2855 | if (unlikely(!nskb)) |
| 2782 | goto err; | 2856 | goto err; |
diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..8f67ced8d6a8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
| @@ -142,7 +142,7 @@ | |||
| 142 | static DEFINE_MUTEX(proto_list_mutex); | 142 | static DEFINE_MUTEX(proto_list_mutex); |
| 143 | static LIST_HEAD(proto_list); | 143 | static LIST_HEAD(proto_list); |
| 144 | 144 | ||
| 145 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 145 | #ifdef CONFIG_MEMCG_KMEM |
| 146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 146 | int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
| 147 | { | 147 | { |
| 148 | struct proto *proto; | 148 | struct proto *proto; |
| @@ -271,6 +271,61 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | |||
| 271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 271 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); |
| 272 | EXPORT_SYMBOL(sysctl_optmem_max); | 272 | EXPORT_SYMBOL(sysctl_optmem_max); |
| 273 | 273 | ||
| 274 | struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE; | ||
| 275 | EXPORT_SYMBOL_GPL(memalloc_socks); | ||
| 276 | |||
| 277 | /** | ||
| 278 | * sk_set_memalloc - sets %SOCK_MEMALLOC | ||
| 279 | * @sk: socket to set it on | ||
| 280 | * | ||
| 281 | * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. | ||
| 282 | * It's the responsibility of the admin to adjust min_free_kbytes | ||
| 283 | * to meet the requirements | ||
| 284 | */ | ||
| 285 | void sk_set_memalloc(struct sock *sk) | ||
| 286 | { | ||
| 287 | sock_set_flag(sk, SOCK_MEMALLOC); | ||
| 288 | sk->sk_allocation |= __GFP_MEMALLOC; | ||
| 289 | static_key_slow_inc(&memalloc_socks); | ||
| 290 | } | ||
| 291 | EXPORT_SYMBOL_GPL(sk_set_memalloc); | ||
| 292 | |||
| 293 | void sk_clear_memalloc(struct sock *sk) | ||
| 294 | { | ||
| 295 | sock_reset_flag(sk, SOCK_MEMALLOC); | ||
| 296 | sk->sk_allocation &= ~__GFP_MEMALLOC; | ||
| 297 | static_key_slow_dec(&memalloc_socks); | ||
| 298 | |||
| 299 | /* | ||
| 300 | * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward | ||
| 301 | * progress of swapping. However, if SOCK_MEMALLOC is cleared while | ||
| 302 | * it has rmem allocations there is a risk that the user of the | ||
| 303 | * socket cannot make forward progress due to exceeding the rmem | ||
| 304 | * limits. By rights, sk_clear_memalloc() should only be called | ||
| 305 | * on sockets being torn down but warn and reset the accounting if | ||
| 306 | * that assumption breaks. | ||
| 307 | */ | ||
| 308 | if (WARN_ON(sk->sk_forward_alloc)) | ||
| 309 | sk_mem_reclaim(sk); | ||
| 310 | } | ||
| 311 | EXPORT_SYMBOL_GPL(sk_clear_memalloc); | ||
| 312 | |||
| 313 | int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) | ||
| 314 | { | ||
| 315 | int ret; | ||
| 316 | unsigned long pflags = current->flags; | ||
| 317 | |||
| 318 | /* these should have been dropped before queueing */ | ||
| 319 | BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); | ||
| 320 | |||
| 321 | current->flags |= PF_MEMALLOC; | ||
| 322 | ret = sk->sk_backlog_rcv(sk, skb); | ||
| 323 | tsk_restore_flags(current, pflags, PF_MEMALLOC); | ||
| 324 | |||
| 325 | return ret; | ||
| 326 | } | ||
| 327 | EXPORT_SYMBOL(__sk_backlog_rcv); | ||
| 328 | |||
| 274 | #if defined(CONFIG_CGROUPS) | 329 | #if defined(CONFIG_CGROUPS) |
| 275 | #if !defined(CONFIG_NET_CLS_CGROUP) | 330 | #if !defined(CONFIG_NET_CLS_CGROUP) |
| 276 | int net_cls_subsys_id = -1; | 331 | int net_cls_subsys_id = -1; |
| @@ -353,7 +408,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
| 353 | if (err) | 408 | if (err) |
| 354 | return err; | 409 | return err; |
| 355 | 410 | ||
| 356 | if (!sk_rmem_schedule(sk, skb->truesize)) { | 411 | if (!sk_rmem_schedule(sk, skb, skb->truesize)) { |
| 357 | atomic_inc(&sk->sk_drops); | 412 | atomic_inc(&sk->sk_drops); |
| 358 | return -ENOBUFS; | 413 | return -ENOBUFS; |
| 359 | } | 414 | } |
| @@ -1403,6 +1458,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | |||
| 1403 | } else { | 1458 | } else { |
| 1404 | sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; | 1459 | sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; |
| 1405 | sk->sk_gso_max_size = dst->dev->gso_max_size; | 1460 | sk->sk_gso_max_size = dst->dev->gso_max_size; |
| 1461 | sk->sk_gso_max_segs = dst->dev->gso_max_segs; | ||
| 1406 | } | 1462 | } |
| 1407 | } | 1463 | } |
| 1408 | } | 1464 | } |
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c3582a7678..fb85d371a8de 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h | |||
| @@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, | |||
| 246 | u32 __user *optval, int __user *optlen) | 246 | u32 __user *optval, int __user *optlen) |
| 247 | { | 247 | { |
| 248 | int rc = -ENOPROTOOPT; | 248 | int rc = -ENOPROTOOPT; |
| 249 | if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) | 249 | if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL) |
| 250 | rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, | 250 | rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len, |
| 251 | optval, optlen); | 251 | optval, optlen); |
| 252 | return rc; | 252 | return rc; |
| @@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, | |||
| 257 | u32 __user *optval, int __user *optlen) | 257 | u32 __user *optval, int __user *optlen) |
| 258 | { | 258 | { |
| 259 | int rc = -ENOPROTOOPT; | 259 | int rc = -ENOPROTOOPT; |
| 260 | if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) | 260 | if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL) |
| 261 | rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, | 261 | rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len, |
| 262 | optval, optlen); | 262 | optval, optlen); |
| 263 | return rc; | 263 | return rc; |
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d65e98798eca..119c04317d48 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
| @@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | |||
| 535 | case DCCP_SOCKOPT_CCID_TX_INFO: | 535 | case DCCP_SOCKOPT_CCID_TX_INFO: |
| 536 | if (len < sizeof(tfrc)) | 536 | if (len < sizeof(tfrc)) |
| 537 | return -EINVAL; | 537 | return -EINVAL; |
| 538 | memset(&tfrc, 0, sizeof(tfrc)); | ||
| 538 | tfrc.tfrctx_x = hc->tx_x; | 539 | tfrc.tfrctx_x = hc->tx_x; |
| 539 | tfrc.tfrctx_x_recv = hc->tx_x_recv; | 540 | tfrc.tfrctx_x_recv = hc->tx_x_recv; |
| 540 | tfrc.tfrctx_x_calc = hc->tx_x_calc; | 541 | tfrc.tfrctx_x_calc = hc->tx_x_calc; |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
| @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
| 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 49 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
| 50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 50 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
| 51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 51 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
| 52 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o | 52 | obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o |
| 53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 53 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
| 54 | 54 | ||
| 55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 55 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a0124eb7dbea..77e87aff419a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
| @@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb) | |||
| 827 | } | 827 | } |
| 828 | 828 | ||
| 829 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 829 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
| 830 | ip_route_input(skb, tip, sip, 0, dev) == 0) { | 830 | ip_route_input_noref(skb, tip, sip, 0, dev) == 0) { |
| 831 | 831 | ||
| 832 | rt = skb_rtable(skb); | 832 | rt = skb_rtable(skb); |
| 833 | addr_type = rt->rt_type; | 833 | addr_type = rt->rt_type; |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8732cc7920ed..c43ae3fba792 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
| @@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo | |||
| 1046 | 1046 | ||
| 1047 | if (event == NETDEV_UNREGISTER) { | 1047 | if (event == NETDEV_UNREGISTER) { |
| 1048 | fib_disable_ip(dev, 2, -1); | 1048 | fib_disable_ip(dev, 2, -1); |
| 1049 | rt_flush_dev(dev); | ||
| 1049 | return NOTIFY_DONE; | 1050 | return NOTIFY_DONE; |
| 1050 | } | 1051 | } |
| 1051 | 1052 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e55171f184f9..da80dc14cc76 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
| @@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { | |||
| 140 | }, | 140 | }, |
| 141 | }; | 141 | }; |
| 142 | 142 | ||
| 143 | static void rt_fibinfo_free(struct rtable __rcu **rtp) | ||
| 144 | { | ||
| 145 | struct rtable *rt = rcu_dereference_protected(*rtp, 1); | ||
| 146 | |||
| 147 | if (!rt) | ||
| 148 | return; | ||
| 149 | |||
| 150 | /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); | ||
| 151 | * because we waited an RCU grace period before calling | ||
| 152 | * free_fib_info_rcu() | ||
| 153 | */ | ||
| 154 | |||
| 155 | dst_free(&rt->dst); | ||
| 156 | } | ||
| 157 | |||
| 143 | static void free_nh_exceptions(struct fib_nh *nh) | 158 | static void free_nh_exceptions(struct fib_nh *nh) |
| 144 | { | 159 | { |
| 145 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; | 160 | struct fnhe_hash_bucket *hash = nh->nh_exceptions; |
| @@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) | |||
| 153 | struct fib_nh_exception *next; | 168 | struct fib_nh_exception *next; |
| 154 | 169 | ||
| 155 | next = rcu_dereference_protected(fnhe->fnhe_next, 1); | 170 | next = rcu_dereference_protected(fnhe->fnhe_next, 1); |
| 171 | |||
| 172 | rt_fibinfo_free(&fnhe->fnhe_rth); | ||
| 173 | |||
| 156 | kfree(fnhe); | 174 | kfree(fnhe); |
| 157 | 175 | ||
| 158 | fnhe = next; | 176 | fnhe = next; |
| @@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh) | |||
| 161 | kfree(hash); | 179 | kfree(hash); |
| 162 | } | 180 | } |
| 163 | 181 | ||
| 182 | static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) | ||
| 183 | { | ||
| 184 | int cpu; | ||
| 185 | |||
| 186 | if (!rtp) | ||
| 187 | return; | ||
| 188 | |||
| 189 | for_each_possible_cpu(cpu) { | ||
| 190 | struct rtable *rt; | ||
| 191 | |||
| 192 | rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); | ||
| 193 | if (rt) | ||
| 194 | dst_free(&rt->dst); | ||
| 195 | } | ||
| 196 | free_percpu(rtp); | ||
| 197 | } | ||
| 198 | |||
| 164 | /* Release a nexthop info record */ | 199 | /* Release a nexthop info record */ |
| 165 | static void free_fib_info_rcu(struct rcu_head *head) | 200 | static void free_fib_info_rcu(struct rcu_head *head) |
| 166 | { | 201 | { |
| @@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) | |||
| 171 | dev_put(nexthop_nh->nh_dev); | 206 | dev_put(nexthop_nh->nh_dev); |
| 172 | if (nexthop_nh->nh_exceptions) | 207 | if (nexthop_nh->nh_exceptions) |
| 173 | free_nh_exceptions(nexthop_nh); | 208 | free_nh_exceptions(nexthop_nh); |
| 174 | if (nexthop_nh->nh_rth_output) | 209 | rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); |
| 175 | dst_release(&nexthop_nh->nh_rth_output->dst); | 210 | rt_fibinfo_free(&nexthop_nh->nh_rth_input); |
| 176 | if (nexthop_nh->nh_rth_input) | ||
| 177 | dst_release(&nexthop_nh->nh_rth_input->dst); | ||
| 178 | } endfor_nexthops(fi); | 211 | } endfor_nexthops(fi); |
| 179 | 212 | ||
| 180 | release_net(fi->fib_net); | 213 | release_net(fi->fib_net); |
| @@ -804,6 +837,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
| 804 | fi->fib_nhs = nhs; | 837 | fi->fib_nhs = nhs; |
| 805 | change_nexthops(fi) { | 838 | change_nexthops(fi) { |
| 806 | nexthop_nh->nh_parent = fi; | 839 | nexthop_nh->nh_parent = fi; |
| 840 | nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); | ||
| 807 | } endfor_nexthops(fi) | 841 | } endfor_nexthops(fi) |
| 808 | 842 | ||
| 809 | if (cfg->fc_mx) { | 843 | if (cfg->fc_mx) { |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 18cbc15b20d5..57bd978483e1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
| @@ -159,7 +159,6 @@ struct trie { | |||
| 159 | #endif | 159 | #endif |
| 160 | }; | 160 | }; |
| 161 | 161 | ||
| 162 | static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n); | ||
| 163 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, | 162 | static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n, |
| 164 | int wasfull); | 163 | int wasfull); |
| 165 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); | 164 | static struct rt_trie_node *resize(struct trie *t, struct tnode *tn); |
| @@ -368,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head) | |||
| 368 | 367 | ||
| 369 | static inline void free_leaf(struct leaf *l) | 368 | static inline void free_leaf(struct leaf *l) |
| 370 | { | 369 | { |
| 371 | call_rcu_bh(&l->rcu, __leaf_free_rcu); | 370 | call_rcu(&l->rcu, __leaf_free_rcu); |
| 372 | } | 371 | } |
| 373 | 372 | ||
| 374 | static inline void free_leaf_info(struct leaf_info *leaf) | 373 | static inline void free_leaf_info(struct leaf_info *leaf) |
| @@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits) | |||
| 473 | } | 472 | } |
| 474 | 473 | ||
| 475 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), | 474 | pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode), |
| 476 | sizeof(struct rt_trie_node) << bits); | 475 | sizeof(struct rt_trie_node *) << bits); |
| 477 | return tn; | 476 | return tn; |
| 478 | } | 477 | } |
| 479 | 478 | ||
| @@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node * | |||
| 490 | return ((struct tnode *) n)->pos == tn->pos + tn->bits; | 489 | return ((struct tnode *) n)->pos == tn->pos + tn->bits; |
| 491 | } | 490 | } |
| 492 | 491 | ||
| 493 | static inline void put_child(struct trie *t, struct tnode *tn, int i, | 492 | static inline void put_child(struct tnode *tn, int i, |
| 494 | struct rt_trie_node *n) | 493 | struct rt_trie_node *n) |
| 495 | { | 494 | { |
| 496 | tnode_put_child_reorg(tn, i, n, -1); | 495 | tnode_put_child_reorg(tn, i, n, -1); |
| @@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 754 | goto nomem; | 753 | goto nomem; |
| 755 | } | 754 | } |
| 756 | 755 | ||
| 757 | put_child(t, tn, 2*i, (struct rt_trie_node *) left); | 756 | put_child(tn, 2*i, (struct rt_trie_node *) left); |
| 758 | put_child(t, tn, 2*i+1, (struct rt_trie_node *) right); | 757 | put_child(tn, 2*i+1, (struct rt_trie_node *) right); |
| 759 | } | 758 | } |
| 760 | } | 759 | } |
| 761 | 760 | ||
| @@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 776 | if (tkey_extract_bits(node->key, | 775 | if (tkey_extract_bits(node->key, |
| 777 | oldtnode->pos + oldtnode->bits, | 776 | oldtnode->pos + oldtnode->bits, |
| 778 | 1) == 0) | 777 | 1) == 0) |
| 779 | put_child(t, tn, 2*i, node); | 778 | put_child(tn, 2*i, node); |
| 780 | else | 779 | else |
| 781 | put_child(t, tn, 2*i+1, node); | 780 | put_child(tn, 2*i+1, node); |
| 782 | continue; | 781 | continue; |
| 783 | } | 782 | } |
| 784 | 783 | ||
| @@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 786 | inode = (struct tnode *) node; | 785 | inode = (struct tnode *) node; |
| 787 | 786 | ||
| 788 | if (inode->bits == 1) { | 787 | if (inode->bits == 1) { |
| 789 | put_child(t, tn, 2*i, rtnl_dereference(inode->child[0])); | 788 | put_child(tn, 2*i, rtnl_dereference(inode->child[0])); |
| 790 | put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1])); | 789 | put_child(tn, 2*i+1, rtnl_dereference(inode->child[1])); |
| 791 | 790 | ||
| 792 | tnode_free_safe(inode); | 791 | tnode_free_safe(inode); |
| 793 | continue; | 792 | continue; |
| @@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn) | |||
| 817 | */ | 816 | */ |
| 818 | 817 | ||
| 819 | left = (struct tnode *) tnode_get_child(tn, 2*i); | 818 | left = (struct tnode *) tnode_get_child(tn, 2*i); |
| 820 | put_child(t, tn, 2*i, NULL); | 819 | put_child(tn, 2*i, NULL); |
| 821 | 820 | ||
| 822 | BUG_ON(!left); | 821 | BUG_ON(!left); |
| 823 | 822 | ||
| 824 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); | 823 | right = (struct tnode *) tnode_get_child(tn, 2*i+1); |
| 825 | put_child(t, tn, 2*i+1, NULL); | 824 | put_child(tn, 2*i+1, NULL); |
| 826 | 825 | ||
| 827 | BUG_ON(!right); | 826 | BUG_ON(!right); |
| 828 | 827 | ||
| 829 | size = tnode_child_length(left); | 828 | size = tnode_child_length(left); |
| 830 | for (j = 0; j < size; j++) { | 829 | for (j = 0; j < size; j++) { |
| 831 | put_child(t, left, j, rtnl_dereference(inode->child[j])); | 830 | put_child(left, j, rtnl_dereference(inode->child[j])); |
| 832 | put_child(t, right, j, rtnl_dereference(inode->child[j + size])); | 831 | put_child(right, j, rtnl_dereference(inode->child[j + size])); |
| 833 | } | 832 | } |
| 834 | put_child(t, tn, 2*i, resize(t, left)); | 833 | put_child(tn, 2*i, resize(t, left)); |
| 835 | put_child(t, tn, 2*i+1, resize(t, right)); | 834 | put_child(tn, 2*i+1, resize(t, right)); |
| 836 | 835 | ||
| 837 | tnode_free_safe(inode); | 836 | tnode_free_safe(inode); |
| 838 | } | 837 | } |
| @@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
| 877 | if (!newn) | 876 | if (!newn) |
| 878 | goto nomem; | 877 | goto nomem; |
| 879 | 878 | ||
| 880 | put_child(t, tn, i/2, (struct rt_trie_node *)newn); | 879 | put_child(tn, i/2, (struct rt_trie_node *)newn); |
| 881 | } | 880 | } |
| 882 | 881 | ||
| 883 | } | 882 | } |
| @@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn) | |||
| 892 | if (left == NULL) { | 891 | if (left == NULL) { |
| 893 | if (right == NULL) /* Both are empty */ | 892 | if (right == NULL) /* Both are empty */ |
| 894 | continue; | 893 | continue; |
| 895 | put_child(t, tn, i/2, right); | 894 | put_child(tn, i/2, right); |
| 896 | continue; | 895 | continue; |
| 897 | } | 896 | } |
| 898 | 897 | ||
| 899 | if (right == NULL) { | 898 | if (right == NULL) { |
| 900 | put_child(t, tn, i/2, left); | 899 | put_child(tn, i/2, left); |
| 901 | continue; | 900 | continue; |
| 902 | } | 901 | } |
| 903 | 902 | ||
| 904 | /* Two nonempty children */ | 903 | /* Two nonempty children */ |
| 905 | newBinNode = (struct tnode *) tnode_get_child(tn, i/2); | 904 | newBinNode = (struct tnode *) tnode_get_child(tn, i/2); |
| 906 | put_child(t, tn, i/2, NULL); | 905 | put_child(tn, i/2, NULL); |
| 907 | put_child(t, newBinNode, 0, left); | 906 | put_child(newBinNode, 0, left); |
| 908 | put_child(t, newBinNode, 1, right); | 907 | put_child(newBinNode, 1, right); |
| 909 | put_child(t, tn, i/2, resize(t, newBinNode)); | 908 | put_child(tn, i/2, resize(t, newBinNode)); |
| 910 | } | 909 | } |
| 911 | tnode_free_safe(oldtnode); | 910 | tnode_free_safe(oldtnode); |
| 912 | return tn; | 911 | return tn; |
| @@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1125 | node_set_parent((struct rt_trie_node *)l, tp); | 1124 | node_set_parent((struct rt_trie_node *)l, tp); |
| 1126 | 1125 | ||
| 1127 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1126 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
| 1128 | put_child(t, tp, cindex, (struct rt_trie_node *)l); | 1127 | put_child(tp, cindex, (struct rt_trie_node *)l); |
| 1129 | } else { | 1128 | } else { |
| 1130 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ | 1129 | /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */ |
| 1131 | /* | 1130 | /* |
| @@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen) | |||
| 1155 | node_set_parent((struct rt_trie_node *)tn, tp); | 1154 | node_set_parent((struct rt_trie_node *)tn, tp); |
| 1156 | 1155 | ||
| 1157 | missbit = tkey_extract_bits(key, newpos, 1); | 1156 | missbit = tkey_extract_bits(key, newpos, 1); |
| 1158 | put_child(t, tn, missbit, (struct rt_trie_node *)l); | 1157 | put_child(tn, missbit, (struct rt_trie_node *)l); |
| 1159 | put_child(t, tn, 1-missbit, n); | 1158 | put_child(tn, 1-missbit, n); |
| 1160 | 1159 | ||
| 1161 | if (tp) { | 1160 | if (tp) { |
| 1162 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); | 1161 | cindex = tkey_extract_bits(key, tp->pos, tp->bits); |
| 1163 | put_child(t, tp, cindex, (struct rt_trie_node *)tn); | 1162 | put_child(tp, cindex, (struct rt_trie_node *)tn); |
| 1164 | } else { | 1163 | } else { |
| 1165 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); | 1164 | rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn); |
| 1166 | tp = tn; | 1165 | tp = tn; |
| @@ -1619,7 +1618,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l) | |||
| 1619 | 1618 | ||
| 1620 | if (tp) { | 1619 | if (tp) { |
| 1621 | t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); | 1620 | t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits); |
| 1622 | put_child(t, tp, cindex, NULL); | 1621 | put_child(tp, cindex, NULL); |
| 1623 | trie_rebalance(t, tp); | 1622 | trie_rebalance(t, tp); |
| 1624 | } else | 1623 | } else |
| 1625 | RCU_INIT_POINTER(t->trie, NULL); | 1624 | RCU_INIT_POINTER(t->trie, NULL); |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index db0cf17c00f7..7f75f21d7b83 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -404,12 +404,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | |||
| 404 | { | 404 | { |
| 405 | const struct inet_request_sock *ireq = inet_rsk(req); | 405 | const struct inet_request_sock *ireq = inet_rsk(req); |
| 406 | struct inet_sock *newinet = inet_sk(newsk); | 406 | struct inet_sock *newinet = inet_sk(newsk); |
| 407 | struct ip_options_rcu *opt = ireq->opt; | 407 | struct ip_options_rcu *opt; |
| 408 | struct net *net = sock_net(sk); | 408 | struct net *net = sock_net(sk); |
| 409 | struct flowi4 *fl4; | 409 | struct flowi4 *fl4; |
| 410 | struct rtable *rt; | 410 | struct rtable *rt; |
| 411 | 411 | ||
| 412 | fl4 = &newinet->cork.fl.u.ip4; | 412 | fl4 = &newinet->cork.fl.u.ip4; |
| 413 | |||
| 414 | rcu_read_lock(); | ||
| 415 | opt = rcu_dereference(newinet->inet_opt); | ||
| 413 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, | 416 | flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, |
| 414 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, | 417 | RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, |
| 415 | sk->sk_protocol, inet_sk_flowi_flags(sk), | 418 | sk->sk_protocol, inet_sk_flowi_flags(sk), |
| @@ -421,11 +424,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, | |||
| 421 | goto no_route; | 424 | goto no_route; |
| 422 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) | 425 | if (opt && opt->opt.is_strictroute && rt->rt_gateway) |
| 423 | goto route_err; | 426 | goto route_err; |
| 427 | rcu_read_unlock(); | ||
| 424 | return &rt->dst; | 428 | return &rt->dst; |
| 425 | 429 | ||
| 426 | route_err: | 430 | route_err: |
| 427 | ip_rt_put(rt); | 431 | ip_rt_put(rt); |
| 428 | no_route: | 432 | no_route: |
| 433 | rcu_read_unlock(); | ||
| 429 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 434 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
| 430 | return NULL; | 435 | return NULL; |
| 431 | } | 436 | } |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7ad88e5e7110..8d07c973409c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
| @@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg) | |||
| 258 | /* skb dst is stale, drop it, and perform route lookup again */ | 258 | /* skb dst is stale, drop it, and perform route lookup again */ |
| 259 | skb_dst_drop(head); | 259 | skb_dst_drop(head); |
| 260 | iph = ip_hdr(head); | 260 | iph = ip_hdr(head); |
| 261 | err = ip_route_input(head, iph->daddr, iph->saddr, | 261 | err = ip_route_input_noref(head, iph->daddr, iph->saddr, |
| 262 | iph->tos, head->dev); | 262 | iph->tos, head->dev); |
| 263 | if (err) | 263 | if (err) |
| 264 | goto out_rcu_unlock; | 264 | goto out_rcu_unlock; |
| 265 | 265 | ||
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4ebc6feee250..f1395a6fb35f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
| @@ -314,6 +314,7 @@ drop: | |||
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | int sysctl_ip_early_demux __read_mostly = 1; | 316 | int sysctl_ip_early_demux __read_mostly = 1; |
| 317 | EXPORT_SYMBOL(sysctl_ip_early_demux); | ||
| 317 | 318 | ||
| 318 | static int ip_rcv_finish(struct sk_buff *skb) | 319 | static int ip_rcv_finish(struct sk_buff *skb) |
| 319 | { | 320 | { |
| @@ -324,11 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 324 | const struct net_protocol *ipprot; | 325 | const struct net_protocol *ipprot; |
| 325 | int protocol = iph->protocol; | 326 | int protocol = iph->protocol; |
| 326 | 327 | ||
| 327 | rcu_read_lock(); | ||
| 328 | ipprot = rcu_dereference(inet_protos[protocol]); | 328 | ipprot = rcu_dereference(inet_protos[protocol]); |
| 329 | if (ipprot && ipprot->early_demux) | 329 | if (ipprot && ipprot->early_demux) { |
| 330 | ipprot->early_demux(skb); | 330 | ipprot->early_demux(skb); |
| 331 | rcu_read_unlock(); | 331 | /* must reload iph, skb->head might have changed */ |
| 332 | iph = ip_hdr(skb); | ||
| 333 | } | ||
| 332 | } | 334 | } |
| 333 | 335 | ||
| 334 | /* | 336 | /* |
| @@ -336,8 +338,8 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
| 336 | * how the packet travels inside Linux networking. | 338 | * how the packet travels inside Linux networking. |
| 337 | */ | 339 | */ |
| 338 | if (!skb_dst(skb)) { | 340 | if (!skb_dst(skb)) { |
| 339 | int err = ip_route_input(skb, iph->daddr, iph->saddr, | 341 | int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
| 340 | iph->tos, skb->dev); | 342 | iph->tos, skb->dev); |
| 341 | if (unlikely(err)) { | 343 | if (unlikely(err)) { |
| 342 | if (err == -EXDEV) | 344 | if (err == -EXDEV) |
| 343 | NET_INC_STATS_BH(dev_net(skb->dev), | 345 | NET_INC_STATS_BH(dev_net(skb->dev), |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ba39a52d18c1..c196d749daf2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
| @@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) | |||
| 197 | neigh = __ipv4_neigh_lookup_noref(dev, nexthop); | 197 | neigh = __ipv4_neigh_lookup_noref(dev, nexthop); |
| 198 | if (unlikely(!neigh)) | 198 | if (unlikely(!neigh)) |
| 199 | neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); | 199 | neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); |
| 200 | if (neigh) { | 200 | if (!IS_ERR(neigh)) { |
| 201 | int res = dst_neigh_output(dst, neigh, skb); | 201 | int res = dst_neigh_output(dst, neigh, skb); |
| 202 | 202 | ||
| 203 | rcu_read_unlock_bh(); | 203 | rcu_read_unlock_bh(); |
| @@ -1338,10 +1338,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, | |||
| 1338 | iph->ihl = 5; | 1338 | iph->ihl = 5; |
| 1339 | iph->tos = inet->tos; | 1339 | iph->tos = inet->tos; |
| 1340 | iph->frag_off = df; | 1340 | iph->frag_off = df; |
| 1341 | ip_select_ident(iph, &rt->dst, sk); | ||
| 1342 | iph->ttl = ttl; | 1341 | iph->ttl = ttl; |
| 1343 | iph->protocol = sk->sk_protocol; | 1342 | iph->protocol = sk->sk_protocol; |
| 1344 | ip_copy_addrs(iph, fl4); | 1343 | ip_copy_addrs(iph, fl4); |
| 1344 | ip_select_ident(iph, &rt->dst, sk); | ||
| 1345 | 1345 | ||
| 1346 | if (opt) { | 1346 | if (opt) { |
| 1347 | iph->ihl += opt->optlen>>2; | 1347 | iph->ihl += opt->optlen>>2; |
| @@ -1366,9 +1366,8 @@ out: | |||
| 1366 | return skb; | 1366 | return skb; |
| 1367 | } | 1367 | } |
| 1368 | 1368 | ||
| 1369 | int ip_send_skb(struct sk_buff *skb) | 1369 | int ip_send_skb(struct net *net, struct sk_buff *skb) |
| 1370 | { | 1370 | { |
| 1371 | struct net *net = sock_net(skb->sk); | ||
| 1372 | int err; | 1371 | int err; |
| 1373 | 1372 | ||
| 1374 | err = ip_local_out(skb); | 1373 | err = ip_local_out(skb); |
| @@ -1391,7 +1390,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4) | |||
| 1391 | return 0; | 1390 | return 0; |
| 1392 | 1391 | ||
| 1393 | /* Netfilter gets whole the not fragmented skb. */ | 1392 | /* Netfilter gets whole the not fragmented skb. */ |
| 1394 | return ip_send_skb(skb); | 1393 | return ip_send_skb(sock_net(sk), skb); |
| 1395 | } | 1394 | } |
| 1396 | 1395 | ||
| 1397 | /* | 1396 | /* |
| @@ -1536,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, | |||
| 1536 | arg->csumoffset) = csum_fold(csum_add(nskb->csum, | 1535 | arg->csumoffset) = csum_fold(csum_add(nskb->csum, |
| 1537 | arg->csum)); | 1536 | arg->csum)); |
| 1538 | nskb->ip_summed = CHECKSUM_NONE; | 1537 | nskb->ip_summed = CHECKSUM_NONE; |
| 1538 | skb_orphan(nskb); | ||
| 1539 | skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); | 1539 | skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); |
| 1540 | ip_push_pending_frames(sk, &fl4); | 1540 | ip_push_pending_frames(sk, &fl4); |
| 1541 | } | 1541 | } |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..ebdf06f938bf 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
| @@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); | |||
| 124 | static struct kmem_cache *mrt_cachep __read_mostly; | 124 | static struct kmem_cache *mrt_cachep __read_mostly; |
| 125 | 125 | ||
| 126 | static struct mr_table *ipmr_new_table(struct net *net, u32 id); | 126 | static struct mr_table *ipmr_new_table(struct net *net, u32 id); |
| 127 | static void ipmr_free_table(struct mr_table *mrt); | ||
| 128 | |||
| 127 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, | 129 | static int ip_mr_forward(struct net *net, struct mr_table *mrt, |
| 128 | struct sk_buff *skb, struct mfc_cache *cache, | 130 | struct sk_buff *skb, struct mfc_cache *cache, |
| 129 | int local); | 131 | int local); |
| @@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt, | |||
| 131 | struct sk_buff *pkt, vifi_t vifi, int assert); | 133 | struct sk_buff *pkt, vifi_t vifi, int assert); |
| 132 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, | 134 | static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, |
| 133 | struct mfc_cache *c, struct rtmsg *rtm); | 135 | struct mfc_cache *c, struct rtmsg *rtm); |
| 136 | static void mroute_clean_tables(struct mr_table *mrt); | ||
| 134 | static void ipmr_expire_process(unsigned long arg); | 137 | static void ipmr_expire_process(unsigned long arg); |
| 135 | 138 | ||
| 136 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES | 139 | #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES |
| @@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net) | |||
| 271 | 274 | ||
| 272 | list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { | 275 | list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { |
| 273 | list_del(&mrt->list); | 276 | list_del(&mrt->list); |
| 274 | kfree(mrt); | 277 | ipmr_free_table(mrt); |
| 275 | } | 278 | } |
| 276 | fib_rules_unregister(net->ipv4.mr_rules_ops); | 279 | fib_rules_unregister(net->ipv4.mr_rules_ops); |
| 277 | } | 280 | } |
| @@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net) | |||
| 299 | 302 | ||
| 300 | static void __net_exit ipmr_rules_exit(struct net *net) | 303 | static void __net_exit ipmr_rules_exit(struct net *net) |
| 301 | { | 304 | { |
| 302 | kfree(net->ipv4.mrt); | 305 | ipmr_free_table(net->ipv4.mrt); |
| 303 | } | 306 | } |
| 304 | #endif | 307 | #endif |
| 305 | 308 | ||
| @@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) | |||
| 336 | return mrt; | 339 | return mrt; |
| 337 | } | 340 | } |
| 338 | 341 | ||
| 342 | static void ipmr_free_table(struct mr_table *mrt) | ||
| 343 | { | ||
| 344 | del_timer_sync(&mrt->ipmr_expire_timer); | ||
| 345 | mroute_clean_tables(mrt); | ||
| 346 | kfree(mrt); | ||
| 347 | } | ||
| 348 | |||
| 339 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ | 349 | /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ |
| 340 | 350 | ||
| 341 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | 351 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) |
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ea4a23813d26..9c87cde28ff8 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c | |||
| @@ -148,7 +148,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, | |||
| 148 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, | 148 | if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, |
| 149 | hdr, NULL, &matchoff, &matchlen, | 149 | hdr, NULL, &matchoff, &matchlen, |
| 150 | &addr, &port) > 0) { | 150 | &addr, &port) > 0) { |
| 151 | unsigned int matchend, poff, plen, buflen, n; | 151 | unsigned int olen, matchend, poff, plen, buflen, n; |
| 152 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; | 152 | char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; |
| 153 | 153 | ||
| 154 | /* We're only interested in headers related to this | 154 | /* We're only interested in headers related to this |
| @@ -163,17 +163,18 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, | |||
| 163 | goto next; | 163 | goto next; |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | olen = *datalen; | ||
| 166 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, | 167 | if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, |
| 167 | &addr, port)) | 168 | &addr, port)) |
| 168 | return NF_DROP; | 169 | return NF_DROP; |
| 169 | 170 | ||
| 170 | matchend = matchoff + matchlen; | 171 | matchend = matchoff + matchlen + *datalen - olen; |
| 171 | 172 | ||
| 172 | /* The maddr= parameter (RFC 2361) specifies where to send | 173 | /* The maddr= parameter (RFC 2361) specifies where to send |
| 173 | * the reply. */ | 174 | * the reply. */ |
| 174 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | 175 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, |
| 175 | "maddr=", &poff, &plen, | 176 | "maddr=", &poff, &plen, |
| 176 | &addr) > 0 && | 177 | &addr, true) > 0 && |
| 177 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && | 178 | addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && |
| 178 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { | 179 | addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { |
| 179 | buflen = sprintf(buffer, "%pI4", | 180 | buflen = sprintf(buffer, "%pI4", |
| @@ -187,7 +188,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, | |||
| 187 | * from which the server received the request. */ | 188 | * from which the server received the request. */ |
| 188 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, | 189 | if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen, |
| 189 | "received=", &poff, &plen, | 190 | "received=", &poff, &plen, |
| 190 | &addr) > 0 && | 191 | &addr, false) > 0 && |
| 191 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && | 192 | addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && |
| 192 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { | 193 | addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { |
| 193 | buflen = sprintf(buffer, "%pI4", | 194 | buflen = sprintf(buffer, "%pI4", |
| @@ -501,7 +502,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, | |||
| 501 | ret = nf_ct_expect_related(rtcp_exp); | 502 | ret = nf_ct_expect_related(rtcp_exp); |
| 502 | if (ret == 0) | 503 | if (ret == 0) |
| 503 | break; | 504 | break; |
| 504 | else if (ret != -EBUSY) { | 505 | else if (ret == -EBUSY) { |
| 506 | nf_ct_unexpect_related(rtp_exp); | ||
| 507 | continue; | ||
| 508 | } else if (ret < 0) { | ||
| 505 | nf_ct_unexpect_related(rtp_exp); | 509 | nf_ct_unexpect_related(rtp_exp); |
| 506 | port = 0; | 510 | port = 0; |
| 507 | break; | 511 | break; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6bcb8fc71cbc..82cf2a722b23 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -70,7 +70,6 @@ | |||
| 70 | #include <linux/types.h> | 70 | #include <linux/types.h> |
| 71 | #include <linux/kernel.h> | 71 | #include <linux/kernel.h> |
| 72 | #include <linux/mm.h> | 72 | #include <linux/mm.h> |
| 73 | #include <linux/bootmem.h> | ||
| 74 | #include <linux/string.h> | 73 | #include <linux/string.h> |
| 75 | #include <linux/socket.h> | 74 | #include <linux/socket.h> |
| 76 | #include <linux/sockios.h> | 75 | #include <linux/sockios.h> |
| @@ -80,7 +79,6 @@ | |||
| 80 | #include <linux/netdevice.h> | 79 | #include <linux/netdevice.h> |
| 81 | #include <linux/proc_fs.h> | 80 | #include <linux/proc_fs.h> |
| 82 | #include <linux/init.h> | 81 | #include <linux/init.h> |
| 83 | #include <linux/workqueue.h> | ||
| 84 | #include <linux/skbuff.h> | 82 | #include <linux/skbuff.h> |
| 85 | #include <linux/inetdevice.h> | 83 | #include <linux/inetdevice.h> |
| 86 | #include <linux/igmp.h> | 84 | #include <linux/igmp.h> |
| @@ -88,11 +86,9 @@ | |||
| 88 | #include <linux/mroute.h> | 86 | #include <linux/mroute.h> |
| 89 | #include <linux/netfilter_ipv4.h> | 87 | #include <linux/netfilter_ipv4.h> |
| 90 | #include <linux/random.h> | 88 | #include <linux/random.h> |
| 91 | #include <linux/jhash.h> | ||
| 92 | #include <linux/rcupdate.h> | 89 | #include <linux/rcupdate.h> |
| 93 | #include <linux/times.h> | 90 | #include <linux/times.h> |
| 94 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
| 95 | #include <linux/prefetch.h> | ||
| 96 | #include <net/dst.h> | 92 | #include <net/dst.h> |
| 97 | #include <net/net_namespace.h> | 93 | #include <net/net_namespace.h> |
| 98 | #include <net/protocol.h> | 94 | #include <net/protocol.h> |
| @@ -147,6 +143,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |||
| 147 | struct sk_buff *skb, u32 mtu); | 143 | struct sk_buff *skb, u32 mtu); |
| 148 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, | 144 | static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, |
| 149 | struct sk_buff *skb); | 145 | struct sk_buff *skb); |
| 146 | static void ipv4_dst_destroy(struct dst_entry *dst); | ||
| 150 | 147 | ||
| 151 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | 148 | static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, |
| 152 | int how) | 149 | int how) |
| @@ -170,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = { | |||
| 170 | .default_advmss = ipv4_default_advmss, | 167 | .default_advmss = ipv4_default_advmss, |
| 171 | .mtu = ipv4_mtu, | 168 | .mtu = ipv4_mtu, |
| 172 | .cow_metrics = ipv4_cow_metrics, | 169 | .cow_metrics = ipv4_cow_metrics, |
| 170 | .destroy = ipv4_dst_destroy, | ||
| 173 | .ifdown = ipv4_dst_ifdown, | 171 | .ifdown = ipv4_dst_ifdown, |
| 174 | .negative_advice = ipv4_negative_advice, | 172 | .negative_advice = ipv4_negative_advice, |
| 175 | .link_failure = ipv4_link_failure, | 173 | .link_failure = ipv4_link_failure, |
| @@ -444,7 +442,7 @@ static inline int ip_rt_proc_init(void) | |||
| 444 | } | 442 | } |
| 445 | #endif /* CONFIG_PROC_FS */ | 443 | #endif /* CONFIG_PROC_FS */ |
| 446 | 444 | ||
| 447 | static inline int rt_is_expired(struct rtable *rth) | 445 | static inline bool rt_is_expired(const struct rtable *rth) |
| 448 | { | 446 | { |
| 449 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); | 447 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
| 450 | } | 448 | } |
| @@ -587,11 +585,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, | |||
| 587 | build_sk_flow_key(fl4, sk); | 585 | build_sk_flow_key(fl4, sk); |
| 588 | } | 586 | } |
| 589 | 587 | ||
| 590 | static DEFINE_SEQLOCK(fnhe_seqlock); | 588 | static inline void rt_free(struct rtable *rt) |
| 589 | { | ||
| 590 | call_rcu(&rt->dst.rcu_head, dst_rcu_free); | ||
| 591 | } | ||
| 592 | |||
| 593 | static DEFINE_SPINLOCK(fnhe_lock); | ||
| 591 | 594 | ||
| 592 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | 595 | static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) |
| 593 | { | 596 | { |
| 594 | struct fib_nh_exception *fnhe, *oldest; | 597 | struct fib_nh_exception *fnhe, *oldest; |
| 598 | struct rtable *orig; | ||
| 595 | 599 | ||
| 596 | oldest = rcu_dereference(hash->chain); | 600 | oldest = rcu_dereference(hash->chain); |
| 597 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; | 601 | for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; |
| @@ -599,6 +603,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) | |||
| 599 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) | 603 | if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) |
| 600 | oldest = fnhe; | 604 | oldest = fnhe; |
| 601 | } | 605 | } |
| 606 | orig = rcu_dereference(oldest->fnhe_rth); | ||
| 607 | if (orig) { | ||
| 608 | RCU_INIT_POINTER(oldest->fnhe_rth, NULL); | ||
| 609 | rt_free(orig); | ||
| 610 | } | ||
| 602 | return oldest; | 611 | return oldest; |
| 603 | } | 612 | } |
| 604 | 613 | ||
| @@ -620,7 +629,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
| 620 | int depth; | 629 | int depth; |
| 621 | u32 hval = fnhe_hashfun(daddr); | 630 | u32 hval = fnhe_hashfun(daddr); |
| 622 | 631 | ||
| 623 | write_seqlock_bh(&fnhe_seqlock); | 632 | spin_lock_bh(&fnhe_lock); |
| 624 | 633 | ||
| 625 | hash = nh->nh_exceptions; | 634 | hash = nh->nh_exceptions; |
| 626 | if (!hash) { | 635 | if (!hash) { |
| @@ -667,7 +676,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, | |||
| 667 | fnhe->fnhe_stamp = jiffies; | 676 | fnhe->fnhe_stamp = jiffies; |
| 668 | 677 | ||
| 669 | out_unlock: | 678 | out_unlock: |
| 670 | write_sequnlock_bh(&fnhe_seqlock); | 679 | spin_unlock_bh(&fnhe_lock); |
| 671 | return; | 680 | return; |
| 672 | } | 681 | } |
| 673 | 682 | ||
| @@ -925,12 +934,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) | |||
| 925 | if (mtu < ip_rt_min_pmtu) | 934 | if (mtu < ip_rt_min_pmtu) |
| 926 | mtu = ip_rt_min_pmtu; | 935 | mtu = ip_rt_min_pmtu; |
| 927 | 936 | ||
| 937 | rcu_read_lock(); | ||
| 928 | if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { | 938 | if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { |
| 929 | struct fib_nh *nh = &FIB_RES_NH(res); | 939 | struct fib_nh *nh = &FIB_RES_NH(res); |
| 930 | 940 | ||
| 931 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, | 941 | update_or_create_fnhe(nh, fl4->daddr, 0, mtu, |
| 932 | jiffies + ip_rt_mtu_expires); | 942 | jiffies + ip_rt_mtu_expires); |
| 933 | } | 943 | } |
| 944 | rcu_read_unlock(); | ||
| 934 | return mtu; | 945 | return mtu; |
| 935 | } | 946 | } |
| 936 | 947 | ||
| @@ -947,7 +958,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, | |||
| 947 | dst->obsolete = DST_OBSOLETE_KILL; | 958 | dst->obsolete = DST_OBSOLETE_KILL; |
| 948 | } else { | 959 | } else { |
| 949 | rt->rt_pmtu = mtu; | 960 | rt->rt_pmtu = mtu; |
| 950 | dst_set_expires(&rt->dst, ip_rt_mtu_expires); | 961 | rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); |
| 951 | } | 962 | } |
| 952 | } | 963 | } |
| 953 | 964 | ||
| @@ -1164,67 +1175,126 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) | |||
| 1164 | return NULL; | 1175 | return NULL; |
| 1165 | } | 1176 | } |
| 1166 | 1177 | ||
| 1167 | static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, | 1178 | static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, |
| 1168 | __be32 daddr) | 1179 | __be32 daddr) |
| 1169 | { | 1180 | { |
| 1170 | __be32 fnhe_daddr, gw; | 1181 | bool ret = false; |
| 1171 | unsigned long expires; | 1182 | |
| 1172 | unsigned int seq; | 1183 | spin_lock_bh(&fnhe_lock); |
| 1173 | u32 pmtu; | ||
| 1174 | |||
| 1175 | restart: | ||
| 1176 | seq = read_seqbegin(&fnhe_seqlock); | ||
| 1177 | fnhe_daddr = fnhe->fnhe_daddr; | ||
| 1178 | gw = fnhe->fnhe_gw; | ||
| 1179 | pmtu = fnhe->fnhe_pmtu; | ||
| 1180 | expires = fnhe->fnhe_expires; | ||
| 1181 | if (read_seqretry(&fnhe_seqlock, seq)) | ||
| 1182 | goto restart; | ||
| 1183 | |||
| 1184 | if (daddr != fnhe_daddr) | ||
| 1185 | return; | ||
| 1186 | 1184 | ||
| 1187 | if (pmtu) { | 1185 | if (daddr == fnhe->fnhe_daddr) { |
| 1188 | unsigned long diff = expires - jiffies; | 1186 | struct rtable *orig; |
| 1189 | 1187 | ||
| 1190 | if (time_before(jiffies, expires)) { | 1188 | if (fnhe->fnhe_pmtu) { |
| 1191 | rt->rt_pmtu = pmtu; | 1189 | unsigned long expires = fnhe->fnhe_expires; |
| 1192 | dst_set_expires(&rt->dst, diff); | 1190 | unsigned long diff = expires - jiffies; |
| 1191 | |||
| 1192 | if (time_before(jiffies, expires)) { | ||
| 1193 | rt->rt_pmtu = fnhe->fnhe_pmtu; | ||
| 1194 | dst_set_expires(&rt->dst, diff); | ||
| 1195 | } | ||
| 1193 | } | 1196 | } |
| 1197 | if (fnhe->fnhe_gw) { | ||
| 1198 | rt->rt_flags |= RTCF_REDIRECTED; | ||
| 1199 | rt->rt_gateway = fnhe->fnhe_gw; | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | orig = rcu_dereference(fnhe->fnhe_rth); | ||
| 1203 | rcu_assign_pointer(fnhe->fnhe_rth, rt); | ||
| 1204 | if (orig) | ||
| 1205 | rt_free(orig); | ||
| 1206 | |||
| 1207 | fnhe->fnhe_stamp = jiffies; | ||
| 1208 | ret = true; | ||
| 1209 | } else { | ||
| 1210 | /* Routes we intend to cache in nexthop exception have | ||
| 1211 | * the DST_NOCACHE bit clear. However, if we are | ||
| 1212 | * unsuccessful at storing this route into the cache | ||
| 1213 | * we really need to set it. | ||
| 1214 | */ | ||
| 1215 | rt->dst.flags |= DST_NOCACHE; | ||
| 1194 | } | 1216 | } |
| 1195 | if (gw) { | 1217 | spin_unlock_bh(&fnhe_lock); |
| 1196 | rt->rt_flags |= RTCF_REDIRECTED; | ||
| 1197 | rt->rt_gateway = gw; | ||
| 1198 | } | ||
| 1199 | fnhe->fnhe_stamp = jiffies; | ||
| 1200 | } | ||
| 1201 | 1218 | ||
| 1202 | static inline void rt_release_rcu(struct rcu_head *head) | 1219 | return ret; |
| 1203 | { | ||
| 1204 | struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head); | ||
| 1205 | dst_release(dst); | ||
| 1206 | } | 1220 | } |
| 1207 | 1221 | ||
| 1208 | static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) | 1222 | static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) |
| 1209 | { | 1223 | { |
| 1210 | struct rtable *orig, *prev, **p = &nh->nh_rth_output; | 1224 | struct rtable *orig, *prev, **p; |
| 1211 | 1225 | bool ret = true; | |
| 1212 | if (rt_is_input_route(rt)) | ||
| 1213 | p = &nh->nh_rth_input; | ||
| 1214 | 1226 | ||
| 1227 | if (rt_is_input_route(rt)) { | ||
| 1228 | p = (struct rtable **)&nh->nh_rth_input; | ||
| 1229 | } else { | ||
| 1230 | if (!nh->nh_pcpu_rth_output) | ||
| 1231 | goto nocache; | ||
| 1232 | p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); | ||
| 1233 | } | ||
| 1215 | orig = *p; | 1234 | orig = *p; |
| 1216 | 1235 | ||
| 1217 | prev = cmpxchg(p, orig, rt); | 1236 | prev = cmpxchg(p, orig, rt); |
| 1218 | if (prev == orig) { | 1237 | if (prev == orig) { |
| 1219 | dst_clone(&rt->dst); | ||
| 1220 | if (orig) | 1238 | if (orig) |
| 1221 | call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu); | 1239 | rt_free(orig); |
| 1240 | } else { | ||
| 1241 | /* Routes we intend to cache in the FIB nexthop have | ||
| 1242 | * the DST_NOCACHE bit clear. However, if we are | ||
| 1243 | * unsuccessful at storing this route into the cache | ||
| 1244 | * we really need to set it. | ||
| 1245 | */ | ||
| 1246 | nocache: | ||
| 1247 | rt->dst.flags |= DST_NOCACHE; | ||
| 1248 | ret = false; | ||
| 1249 | } | ||
| 1250 | |||
| 1251 | return ret; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | static DEFINE_SPINLOCK(rt_uncached_lock); | ||
| 1255 | static LIST_HEAD(rt_uncached_list); | ||
| 1256 | |||
| 1257 | static void rt_add_uncached_list(struct rtable *rt) | ||
| 1258 | { | ||
| 1259 | spin_lock_bh(&rt_uncached_lock); | ||
| 1260 | list_add_tail(&rt->rt_uncached, &rt_uncached_list); | ||
| 1261 | spin_unlock_bh(&rt_uncached_lock); | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | static void ipv4_dst_destroy(struct dst_entry *dst) | ||
| 1265 | { | ||
| 1266 | struct rtable *rt = (struct rtable *) dst; | ||
| 1267 | |||
| 1268 | if (!list_empty(&rt->rt_uncached)) { | ||
| 1269 | spin_lock_bh(&rt_uncached_lock); | ||
| 1270 | list_del(&rt->rt_uncached); | ||
| 1271 | spin_unlock_bh(&rt_uncached_lock); | ||
| 1222 | } | 1272 | } |
| 1223 | } | 1273 | } |
| 1224 | 1274 | ||
| 1225 | static bool rt_cache_valid(struct rtable *rt) | 1275 | void rt_flush_dev(struct net_device *dev) |
| 1226 | { | 1276 | { |
| 1227 | return (rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK); | 1277 | if (!list_empty(&rt_uncached_list)) { |
| 1278 | struct net *net = dev_net(dev); | ||
| 1279 | struct rtable *rt; | ||
| 1280 | |||
| 1281 | spin_lock_bh(&rt_uncached_lock); | ||
| 1282 | list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { | ||
| 1283 | if (rt->dst.dev != dev) | ||
| 1284 | continue; | ||
| 1285 | rt->dst.dev = net->loopback_dev; | ||
| 1286 | dev_hold(rt->dst.dev); | ||
| 1287 | dev_put(dev); | ||
| 1288 | } | ||
| 1289 | spin_unlock_bh(&rt_uncached_lock); | ||
| 1290 | } | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | static bool rt_cache_valid(const struct rtable *rt) | ||
| 1294 | { | ||
| 1295 | return rt && | ||
| 1296 | rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && | ||
| 1297 | !rt_is_expired(rt); | ||
| 1228 | } | 1298 | } |
| 1229 | 1299 | ||
| 1230 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | 1300 | static void rt_set_nexthop(struct rtable *rt, __be32 daddr, |
| @@ -1232,20 +1302,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, | |||
| 1232 | struct fib_nh_exception *fnhe, | 1302 | struct fib_nh_exception *fnhe, |
| 1233 | struct fib_info *fi, u16 type, u32 itag) | 1303 | struct fib_info *fi, u16 type, u32 itag) |
| 1234 | { | 1304 | { |
| 1305 | bool cached = false; | ||
| 1306 | |||
| 1235 | if (fi) { | 1307 | if (fi) { |
| 1236 | struct fib_nh *nh = &FIB_RES_NH(*res); | 1308 | struct fib_nh *nh = &FIB_RES_NH(*res); |
| 1237 | 1309 | ||
| 1238 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | 1310 | if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) |
| 1239 | rt->rt_gateway = nh->nh_gw; | 1311 | rt->rt_gateway = nh->nh_gw; |
| 1240 | if (unlikely(fnhe)) | ||
| 1241 | rt_bind_exception(rt, fnhe, daddr); | ||
| 1242 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); | 1312 | dst_init_metrics(&rt->dst, fi->fib_metrics, true); |
| 1243 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1313 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1244 | rt->dst.tclassid = nh->nh_tclassid; | 1314 | rt->dst.tclassid = nh->nh_tclassid; |
| 1245 | #endif | 1315 | #endif |
| 1246 | if (!(rt->dst.flags & DST_HOST)) | 1316 | if (unlikely(fnhe)) |
| 1247 | rt_cache_route(nh, rt); | 1317 | cached = rt_bind_exception(rt, fnhe, daddr); |
| 1318 | else if (!(rt->dst.flags & DST_NOCACHE)) | ||
| 1319 | cached = rt_cache_route(nh, rt); | ||
| 1248 | } | 1320 | } |
| 1321 | if (unlikely(!cached)) | ||
| 1322 | rt_add_uncached_list(rt); | ||
| 1249 | 1323 | ||
| 1250 | #ifdef CONFIG_IP_ROUTE_CLASSID | 1324 | #ifdef CONFIG_IP_ROUTE_CLASSID |
| 1251 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1325 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
| @@ -1259,7 +1333,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev, | |||
| 1259 | bool nopolicy, bool noxfrm, bool will_cache) | 1333 | bool nopolicy, bool noxfrm, bool will_cache) |
| 1260 | { | 1334 | { |
| 1261 | return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, | 1335 | return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, |
| 1262 | (will_cache ? 0 : DST_HOST) | DST_NOCACHE | | 1336 | (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) | |
| 1263 | (nopolicy ? DST_NOPOLICY : 0) | | 1337 | (nopolicy ? DST_NOPOLICY : 0) | |
| 1264 | (noxfrm ? DST_NOXFRM : 0)); | 1338 | (noxfrm ? DST_NOXFRM : 0)); |
| 1265 | } | 1339 | } |
| @@ -1312,6 +1386,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1312 | rth->rt_iif = 0; | 1386 | rth->rt_iif = 0; |
| 1313 | rth->rt_pmtu = 0; | 1387 | rth->rt_pmtu = 0; |
| 1314 | rth->rt_gateway = 0; | 1388 | rth->rt_gateway = 0; |
| 1389 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
| 1315 | if (our) { | 1390 | if (our) { |
| 1316 | rth->dst.input= ip_local_deliver; | 1391 | rth->dst.input= ip_local_deliver; |
| 1317 | rth->rt_flags |= RTCF_LOCAL; | 1392 | rth->rt_flags |= RTCF_LOCAL; |
| @@ -1364,8 +1439,7 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
| 1364 | static int __mkroute_input(struct sk_buff *skb, | 1439 | static int __mkroute_input(struct sk_buff *skb, |
| 1365 | const struct fib_result *res, | 1440 | const struct fib_result *res, |
| 1366 | struct in_device *in_dev, | 1441 | struct in_device *in_dev, |
| 1367 | __be32 daddr, __be32 saddr, u32 tos, | 1442 | __be32 daddr, __be32 saddr, u32 tos) |
| 1368 | struct rtable **result) | ||
| 1369 | { | 1443 | { |
| 1370 | struct rtable *rth; | 1444 | struct rtable *rth; |
| 1371 | int err; | 1445 | int err; |
| @@ -1414,9 +1488,9 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1414 | do_cache = false; | 1488 | do_cache = false; |
| 1415 | if (res->fi) { | 1489 | if (res->fi) { |
| 1416 | if (!itag) { | 1490 | if (!itag) { |
| 1417 | rth = FIB_RES_NH(*res).nh_rth_input; | 1491 | rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); |
| 1418 | if (rt_cache_valid(rth)) { | 1492 | if (rt_cache_valid(rth)) { |
| 1419 | dst_hold(&rth->dst); | 1493 | skb_dst_set_noref(skb, &rth->dst); |
| 1420 | goto out; | 1494 | goto out; |
| 1421 | } | 1495 | } |
| 1422 | do_cache = true; | 1496 | do_cache = true; |
| @@ -1438,13 +1512,14 @@ static int __mkroute_input(struct sk_buff *skb, | |||
| 1438 | rth->rt_iif = 0; | 1512 | rth->rt_iif = 0; |
| 1439 | rth->rt_pmtu = 0; | 1513 | rth->rt_pmtu = 0; |
| 1440 | rth->rt_gateway = 0; | 1514 | rth->rt_gateway = 0; |
| 1515 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
| 1441 | 1516 | ||
| 1442 | rth->dst.input = ip_forward; | 1517 | rth->dst.input = ip_forward; |
| 1443 | rth->dst.output = ip_output; | 1518 | rth->dst.output = ip_output; |
| 1444 | 1519 | ||
| 1445 | rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); | 1520 | rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); |
| 1521 | skb_dst_set(skb, &rth->dst); | ||
| 1446 | out: | 1522 | out: |
| 1447 | *result = rth; | ||
| 1448 | err = 0; | 1523 | err = 0; |
| 1449 | cleanup: | 1524 | cleanup: |
| 1450 | return err; | 1525 | return err; |
| @@ -1456,21 +1531,13 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
| 1456 | struct in_device *in_dev, | 1531 | struct in_device *in_dev, |
| 1457 | __be32 daddr, __be32 saddr, u32 tos) | 1532 | __be32 daddr, __be32 saddr, u32 tos) |
| 1458 | { | 1533 | { |
| 1459 | struct rtable *rth = NULL; | ||
| 1460 | int err; | ||
| 1461 | |||
| 1462 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1534 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
| 1463 | if (res->fi && res->fi->fib_nhs > 1) | 1535 | if (res->fi && res->fi->fib_nhs > 1) |
| 1464 | fib_select_multipath(res); | 1536 | fib_select_multipath(res); |
| 1465 | #endif | 1537 | #endif |
| 1466 | 1538 | ||
| 1467 | /* create a routing cache entry */ | 1539 | /* create a routing cache entry */ |
| 1468 | err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); | 1540 | return __mkroute_input(skb, res, in_dev, daddr, saddr, tos); |
| 1469 | if (err) | ||
| 1470 | return err; | ||
| 1471 | |||
| 1472 | skb_dst_set(skb, &rth->dst); | ||
| 1473 | return 0; | ||
| 1474 | } | 1541 | } |
| 1475 | 1542 | ||
| 1476 | /* | 1543 | /* |
| @@ -1584,10 +1651,11 @@ local_input: | |||
| 1584 | do_cache = false; | 1651 | do_cache = false; |
| 1585 | if (res.fi) { | 1652 | if (res.fi) { |
| 1586 | if (!itag) { | 1653 | if (!itag) { |
| 1587 | rth = FIB_RES_NH(res).nh_rth_input; | 1654 | rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input); |
| 1588 | if (rt_cache_valid(rth)) { | 1655 | if (rt_cache_valid(rth)) { |
| 1589 | dst_hold(&rth->dst); | 1656 | skb_dst_set_noref(skb, &rth->dst); |
| 1590 | goto set_and_out; | 1657 | err = 0; |
| 1658 | goto out; | ||
| 1591 | } | 1659 | } |
| 1592 | do_cache = true; | 1660 | do_cache = true; |
| 1593 | } | 1661 | } |
| @@ -1611,6 +1679,7 @@ local_input: | |||
| 1611 | rth->rt_iif = 0; | 1679 | rth->rt_iif = 0; |
| 1612 | rth->rt_pmtu = 0; | 1680 | rth->rt_pmtu = 0; |
| 1613 | rth->rt_gateway = 0; | 1681 | rth->rt_gateway = 0; |
| 1682 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
| 1614 | if (res.type == RTN_UNREACHABLE) { | 1683 | if (res.type == RTN_UNREACHABLE) { |
| 1615 | rth->dst.input= ip_error; | 1684 | rth->dst.input= ip_error; |
| 1616 | rth->dst.error= -err; | 1685 | rth->dst.error= -err; |
| @@ -1618,7 +1687,6 @@ local_input: | |||
| 1618 | } | 1687 | } |
| 1619 | if (do_cache) | 1688 | if (do_cache) |
| 1620 | rt_cache_route(&FIB_RES_NH(res), rth); | 1689 | rt_cache_route(&FIB_RES_NH(res), rth); |
| 1621 | set_and_out: | ||
| 1622 | skb_dst_set(skb, &rth->dst); | 1690 | skb_dst_set(skb, &rth->dst); |
| 1623 | err = 0; | 1691 | err = 0; |
| 1624 | goto out; | 1692 | goto out; |
| @@ -1656,8 +1724,8 @@ martian_source_keep_err: | |||
| 1656 | goto out; | 1724 | goto out; |
| 1657 | } | 1725 | } |
| 1658 | 1726 | ||
| 1659 | int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1727 | int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
| 1660 | u8 tos, struct net_device *dev) | 1728 | u8 tos, struct net_device *dev) |
| 1661 | { | 1729 | { |
| 1662 | int res; | 1730 | int res; |
| 1663 | 1731 | ||
| @@ -1700,7 +1768,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
| 1700 | rcu_read_unlock(); | 1768 | rcu_read_unlock(); |
| 1701 | return res; | 1769 | return res; |
| 1702 | } | 1770 | } |
| 1703 | EXPORT_SYMBOL(ip_route_input); | 1771 | EXPORT_SYMBOL(ip_route_input_noref); |
| 1704 | 1772 | ||
| 1705 | /* called with rcu_read_lock() */ | 1773 | /* called with rcu_read_lock() */ |
| 1706 | static struct rtable *__mkroute_output(const struct fib_result *res, | 1774 | static struct rtable *__mkroute_output(const struct fib_result *res, |
| @@ -1750,19 +1818,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
| 1750 | 1818 | ||
| 1751 | fnhe = NULL; | 1819 | fnhe = NULL; |
| 1752 | if (fi) { | 1820 | if (fi) { |
| 1821 | struct rtable __rcu **prth; | ||
| 1822 | |||
| 1753 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); | 1823 | fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); |
| 1754 | if (!fnhe) { | 1824 | if (fnhe) |
| 1755 | rth = FIB_RES_NH(*res).nh_rth_output; | 1825 | prth = &fnhe->fnhe_rth; |
| 1756 | if (rt_cache_valid(rth)) { | 1826 | else |
| 1757 | dst_hold(&rth->dst); | 1827 | prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); |
| 1758 | return rth; | 1828 | rth = rcu_dereference(*prth); |
| 1759 | } | 1829 | if (rt_cache_valid(rth)) { |
| 1830 | dst_hold(&rth->dst); | ||
| 1831 | return rth; | ||
| 1760 | } | 1832 | } |
| 1761 | } | 1833 | } |
| 1762 | rth = rt_dst_alloc(dev_out, | 1834 | rth = rt_dst_alloc(dev_out, |
| 1763 | IN_DEV_CONF_GET(in_dev, NOPOLICY), | 1835 | IN_DEV_CONF_GET(in_dev, NOPOLICY), |
| 1764 | IN_DEV_CONF_GET(in_dev, NOXFRM), | 1836 | IN_DEV_CONF_GET(in_dev, NOXFRM), |
| 1765 | fi && !fnhe); | 1837 | fi); |
| 1766 | if (!rth) | 1838 | if (!rth) |
| 1767 | return ERR_PTR(-ENOBUFS); | 1839 | return ERR_PTR(-ENOBUFS); |
| 1768 | 1840 | ||
| @@ -1775,6 +1847,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, | |||
| 1775 | rth->rt_iif = orig_oif ? : 0; | 1847 | rth->rt_iif = orig_oif ? : 0; |
| 1776 | rth->rt_pmtu = 0; | 1848 | rth->rt_pmtu = 0; |
| 1777 | rth->rt_gateway = 0; | 1849 | rth->rt_gateway = 0; |
| 1850 | INIT_LIST_HEAD(&rth->rt_uncached); | ||
| 1778 | 1851 | ||
| 1779 | RT_CACHE_STAT_INC(out_slow_tot); | 1852 | RT_CACHE_STAT_INC(out_slow_tot); |
| 1780 | 1853 | ||
| @@ -1957,7 +2030,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) | |||
| 1957 | } | 2030 | } |
| 1958 | dev_out = net->loopback_dev; | 2031 | dev_out = net->loopback_dev; |
| 1959 | fl4->flowi4_oif = dev_out->ifindex; | 2032 | fl4->flowi4_oif = dev_out->ifindex; |
| 1960 | res.fi = NULL; | ||
| 1961 | flags |= RTCF_LOCAL; | 2033 | flags |= RTCF_LOCAL; |
| 1962 | goto make_route; | 2034 | goto make_route; |
| 1963 | } | 2035 | } |
| @@ -2054,6 +2126,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or | |||
| 2054 | rt->rt_type = ort->rt_type; | 2126 | rt->rt_type = ort->rt_type; |
| 2055 | rt->rt_gateway = ort->rt_gateway; | 2127 | rt->rt_gateway = ort->rt_gateway; |
| 2056 | 2128 | ||
| 2129 | INIT_LIST_HEAD(&rt->rt_uncached); | ||
| 2130 | |||
| 2057 | dst_free(new); | 2131 | dst_free(new); |
| 2058 | } | 2132 | } |
| 2059 | 2133 | ||
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..1b5ce96707a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
| @@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
| 184 | int ret; | 184 | int ret; |
| 185 | unsigned long vec[3]; | 185 | unsigned long vec[3]; |
| 186 | struct net *net = current->nsproxy->net_ns; | 186 | struct net *net = current->nsproxy->net_ns; |
| 187 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 187 | #ifdef CONFIG_MEMCG_KMEM |
| 188 | struct mem_cgroup *memcg; | 188 | struct mem_cgroup *memcg; |
| 189 | #endif | 189 | #endif |
| 190 | 190 | ||
| @@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
| 203 | if (ret) | 203 | if (ret) |
| 204 | return ret; | 204 | return ret; |
| 205 | 205 | ||
| 206 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 206 | #ifdef CONFIG_MEMCG_KMEM |
| 207 | rcu_read_lock(); | 207 | rcu_read_lock(); |
| 208 | memcg = mem_cgroup_from_task(current); | 208 | memcg = mem_cgroup_from_task(current); |
| 209 | 209 | ||
| @@ -784,13 +784,6 @@ static struct ctl_table ipv4_net_table[] = { | |||
| 784 | .proc_handler = proc_dointvec | 784 | .proc_handler = proc_dointvec |
| 785 | }, | 785 | }, |
| 786 | { | 786 | { |
| 787 | .procname = "rt_cache_rebuild_count", | ||
| 788 | .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, | ||
| 789 | .maxlen = sizeof(int), | ||
| 790 | .mode = 0644, | ||
| 791 | .proc_handler = proc_dointvec | ||
| 792 | }, | ||
| 793 | { | ||
| 794 | .procname = "ping_group_range", | 787 | .procname = "ping_group_range", |
| 795 | .data = &init_net.ipv4.sysctl_ping_group_range, | 788 | .data = &init_net.ipv4.sysctl_ping_group_range, |
| 796 | .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), | 789 | .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range), |
| @@ -829,8 +822,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
| 829 | table[5].data = | 822 | table[5].data = |
| 830 | &net->ipv4.sysctl_icmp_ratemask; | 823 | &net->ipv4.sysctl_icmp_ratemask; |
| 831 | table[6].data = | 824 | table[6].data = |
| 832 | &net->ipv4.sysctl_rt_cache_rebuild_count; | ||
| 833 | table[7].data = | ||
| 834 | &net->ipv4.sysctl_ping_group_range; | 825 | &net->ipv4.sysctl_ping_group_range; |
| 835 | 826 | ||
| 836 | } | 827 | } |
| @@ -842,8 +833,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) | |||
| 842 | net->ipv4.sysctl_ping_group_range[0] = 1; | 833 | net->ipv4.sysctl_ping_group_range[0] = 1; |
| 843 | net->ipv4.sysctl_ping_group_range[1] = 0; | 834 | net->ipv4.sysctl_ping_group_range[1] = 0; |
| 844 | 835 | ||
| 845 | net->ipv4.sysctl_rt_cache_rebuild_count = 4; | ||
| 846 | |||
| 847 | tcp_init_mem(net); | 836 | tcp_init_mem(net); |
| 848 | 837 | ||
| 849 | net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); | 838 | net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 581ecf02c6b5..2109ff4a1daf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -811,7 +811,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | |||
| 811 | old_size_goal + mss_now > xmit_size_goal)) { | 811 | old_size_goal + mss_now > xmit_size_goal)) { |
| 812 | xmit_size_goal = old_size_goal; | 812 | xmit_size_goal = old_size_goal; |
| 813 | } else { | 813 | } else { |
| 814 | tp->xmit_size_goal_segs = xmit_size_goal / mss_now; | 814 | tp->xmit_size_goal_segs = |
| 815 | min_t(u16, xmit_size_goal / mss_now, | ||
| 816 | sk->sk_gso_max_segs); | ||
| 815 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; | 817 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; |
| 816 | } | 818 | } |
| 817 | } | 819 | } |
| @@ -2681,7 +2683,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
| 2681 | /* Cap the max timeout in ms TCP will retry/retrans | 2683 | /* Cap the max timeout in ms TCP will retry/retrans |
| 2682 | * before giving up and aborting (ETIMEDOUT) a connection. | 2684 | * before giving up and aborting (ETIMEDOUT) a connection. |
| 2683 | */ | 2685 | */ |
| 2684 | icsk->icsk_user_timeout = msecs_to_jiffies(val); | 2686 | if (val < 0) |
| 2687 | err = -EINVAL; | ||
| 2688 | else | ||
| 2689 | icsk->icsk_user_timeout = msecs_to_jiffies(val); | ||
| 2685 | break; | 2690 | break; |
| 2686 | default: | 2691 | default: |
| 2687 | err = -ENOPROTOOPT; | 2692 | err = -ENOPROTOOPT; |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4d4db16e336e..1432cdb0644c 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
| @@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) | |||
| 291 | left = tp->snd_cwnd - in_flight; | 291 | left = tp->snd_cwnd - in_flight; |
| 292 | if (sk_can_gso(sk) && | 292 | if (sk_can_gso(sk) && |
| 293 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && | 293 | left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && |
| 294 | left * tp->mss_cache < sk->sk_gso_max_size) | 294 | left * tp->mss_cache < sk->sk_gso_max_size && |
| 295 | left < sk->sk_gso_max_segs) | ||
| 295 | return true; | 296 | return true; |
| 296 | return left <= tcp_max_tso_deferred_mss(tp); | 297 | return left <= tcp_max_tso_deferred_mss(tp); |
| 297 | } | 298 | } |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e07a64ca44e..6e38c6c23caa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -2926,13 +2926,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) | |||
| 2926 | * tcp_xmit_retransmit_queue(). | 2926 | * tcp_xmit_retransmit_queue(). |
| 2927 | */ | 2927 | */ |
| 2928 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | 2928 | static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, |
| 2929 | int newly_acked_sacked, bool is_dupack, | 2929 | int prior_sacked, bool is_dupack, |
| 2930 | int flag) | 2930 | int flag) |
| 2931 | { | 2931 | { |
| 2932 | struct inet_connection_sock *icsk = inet_csk(sk); | 2932 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 2933 | struct tcp_sock *tp = tcp_sk(sk); | 2933 | struct tcp_sock *tp = tcp_sk(sk); |
| 2934 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && | 2934 | int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && |
| 2935 | (tcp_fackets_out(tp) > tp->reordering)); | 2935 | (tcp_fackets_out(tp) > tp->reordering)); |
| 2936 | int newly_acked_sacked = 0; | ||
| 2936 | int fast_rexmit = 0; | 2937 | int fast_rexmit = 0; |
| 2937 | 2938 | ||
| 2938 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) | 2939 | if (WARN_ON(!tp->packets_out && tp->sacked_out)) |
| @@ -2992,6 +2993,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 2992 | tcp_add_reno_sack(sk); | 2993 | tcp_add_reno_sack(sk); |
| 2993 | } else | 2994 | } else |
| 2994 | do_lost = tcp_try_undo_partial(sk, pkts_acked); | 2995 | do_lost = tcp_try_undo_partial(sk, pkts_acked); |
| 2996 | newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; | ||
| 2995 | break; | 2997 | break; |
| 2996 | case TCP_CA_Loss: | 2998 | case TCP_CA_Loss: |
| 2997 | if (flag & FLAG_DATA_ACKED) | 2999 | if (flag & FLAG_DATA_ACKED) |
| @@ -3013,6 +3015,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
| 3013 | if (is_dupack) | 3015 | if (is_dupack) |
| 3014 | tcp_add_reno_sack(sk); | 3016 | tcp_add_reno_sack(sk); |
| 3015 | } | 3017 | } |
| 3018 | newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; | ||
| 3016 | 3019 | ||
| 3017 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) | 3020 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) |
| 3018 | tcp_try_undo_dsack(sk); | 3021 | tcp_try_undo_dsack(sk); |
| @@ -3590,7 +3593,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3590 | int prior_packets; | 3593 | int prior_packets; |
| 3591 | int prior_sacked = tp->sacked_out; | 3594 | int prior_sacked = tp->sacked_out; |
| 3592 | int pkts_acked = 0; | 3595 | int pkts_acked = 0; |
| 3593 | int newly_acked_sacked = 0; | ||
| 3594 | bool frto_cwnd = false; | 3596 | bool frto_cwnd = false; |
| 3595 | 3597 | ||
| 3596 | /* If the ack is older than previous acks | 3598 | /* If the ack is older than previous acks |
| @@ -3666,8 +3668,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3666 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); | 3668 | flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); |
| 3667 | 3669 | ||
| 3668 | pkts_acked = prior_packets - tp->packets_out; | 3670 | pkts_acked = prior_packets - tp->packets_out; |
| 3669 | newly_acked_sacked = (prior_packets - prior_sacked) - | ||
| 3670 | (tp->packets_out - tp->sacked_out); | ||
| 3671 | 3671 | ||
| 3672 | if (tp->frto_counter) | 3672 | if (tp->frto_counter) |
| 3673 | frto_cwnd = tcp_process_frto(sk, flag); | 3673 | frto_cwnd = tcp_process_frto(sk, flag); |
| @@ -3681,7 +3681,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3681 | tcp_may_raise_cwnd(sk, flag)) | 3681 | tcp_may_raise_cwnd(sk, flag)) |
| 3682 | tcp_cong_avoid(sk, ack, prior_in_flight); | 3682 | tcp_cong_avoid(sk, ack, prior_in_flight); |
| 3683 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); | 3683 | is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); |
| 3684 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, | 3684 | tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, |
| 3685 | is_dupack, flag); | 3685 | is_dupack, flag); |
| 3686 | } else { | 3686 | } else { |
| 3687 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) | 3687 | if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) |
| @@ -3698,7 +3698,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
| 3698 | no_queue: | 3698 | no_queue: |
| 3699 | /* If data was DSACKed, see if we can undo a cwnd reduction. */ | 3699 | /* If data was DSACKed, see if we can undo a cwnd reduction. */ |
| 3700 | if (flag & FLAG_DSACKING_ACK) | 3700 | if (flag & FLAG_DSACKING_ACK) |
| 3701 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, | 3701 | tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, |
| 3702 | is_dupack, flag); | 3702 | is_dupack, flag); |
| 3703 | /* If this ack opens up a zero window, clear backoff. It was | 3703 | /* If this ack opens up a zero window, clear backoff. It was |
| 3704 | * being used to time the probes, and is probably far higher than | 3704 | * being used to time the probes, and is probably far higher than |
| @@ -3718,8 +3718,7 @@ old_ack: | |||
| 3718 | */ | 3718 | */ |
| 3719 | if (TCP_SKB_CB(skb)->sacked) { | 3719 | if (TCP_SKB_CB(skb)->sacked) { |
| 3720 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3720 | flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); |
| 3721 | newly_acked_sacked = tp->sacked_out - prior_sacked; | 3721 | tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, |
| 3722 | tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, | ||
| 3723 | is_dupack, flag); | 3722 | is_dupack, flag); |
| 3724 | } | 3723 | } |
| 3725 | 3724 | ||
| @@ -4351,19 +4350,20 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 4351 | static bool tcp_prune_ofo_queue(struct sock *sk); | 4350 | static bool tcp_prune_ofo_queue(struct sock *sk); |
| 4352 | static int tcp_prune_queue(struct sock *sk); | 4351 | static int tcp_prune_queue(struct sock *sk); |
| 4353 | 4352 | ||
| 4354 | static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | 4353 | static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, |
| 4354 | unsigned int size) | ||
| 4355 | { | 4355 | { |
| 4356 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 4356 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || |
| 4357 | !sk_rmem_schedule(sk, size)) { | 4357 | !sk_rmem_schedule(sk, skb, size)) { |
| 4358 | 4358 | ||
| 4359 | if (tcp_prune_queue(sk) < 0) | 4359 | if (tcp_prune_queue(sk) < 0) |
| 4360 | return -1; | 4360 | return -1; |
| 4361 | 4361 | ||
| 4362 | if (!sk_rmem_schedule(sk, size)) { | 4362 | if (!sk_rmem_schedule(sk, skb, size)) { |
| 4363 | if (!tcp_prune_ofo_queue(sk)) | 4363 | if (!tcp_prune_ofo_queue(sk)) |
| 4364 | return -1; | 4364 | return -1; |
| 4365 | 4365 | ||
| 4366 | if (!sk_rmem_schedule(sk, size)) | 4366 | if (!sk_rmem_schedule(sk, skb, size)) |
| 4367 | return -1; | 4367 | return -1; |
| 4368 | } | 4368 | } |
| 4369 | } | 4369 | } |
| @@ -4418,7 +4418,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
| 4418 | 4418 | ||
| 4419 | TCP_ECN_check_ce(tp, skb); | 4419 | TCP_ECN_check_ce(tp, skb); |
| 4420 | 4420 | ||
| 4421 | if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) { | 4421 | if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { |
| 4422 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); | 4422 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP); |
| 4423 | __kfree_skb(skb); | 4423 | __kfree_skb(skb); |
| 4424 | return; | 4424 | return; |
| @@ -4552,17 +4552,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int | |||
| 4552 | 4552 | ||
| 4553 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) | 4553 | int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) |
| 4554 | { | 4554 | { |
| 4555 | struct sk_buff *skb; | 4555 | struct sk_buff *skb = NULL; |
| 4556 | struct tcphdr *th; | 4556 | struct tcphdr *th; |
| 4557 | bool fragstolen; | 4557 | bool fragstolen; |
| 4558 | 4558 | ||
| 4559 | if (tcp_try_rmem_schedule(sk, size + sizeof(*th))) | ||
| 4560 | goto err; | ||
| 4561 | |||
| 4562 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); | 4559 | skb = alloc_skb(size + sizeof(*th), sk->sk_allocation); |
| 4563 | if (!skb) | 4560 | if (!skb) |
| 4564 | goto err; | 4561 | goto err; |
| 4565 | 4562 | ||
| 4563 | if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th))) | ||
| 4564 | goto err_free; | ||
| 4565 | |||
| 4566 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); | 4566 | th = (struct tcphdr *)skb_put(skb, sizeof(*th)); |
| 4567 | skb_reset_transport_header(skb); | 4567 | skb_reset_transport_header(skb); |
| 4568 | memset(th, 0, sizeof(*th)); | 4568 | memset(th, 0, sizeof(*th)); |
| @@ -4633,7 +4633,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 4633 | if (eaten <= 0) { | 4633 | if (eaten <= 0) { |
| 4634 | queue_and_out: | 4634 | queue_and_out: |
| 4635 | if (eaten < 0 && | 4635 | if (eaten < 0 && |
| 4636 | tcp_try_rmem_schedule(sk, skb->truesize)) | 4636 | tcp_try_rmem_schedule(sk, skb, skb->truesize)) |
| 4637 | goto drop; | 4637 | goto drop; |
| 4638 | 4638 | ||
| 4639 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); | 4639 | eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); |
| @@ -5391,6 +5391,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 5391 | { | 5391 | { |
| 5392 | struct tcp_sock *tp = tcp_sk(sk); | 5392 | struct tcp_sock *tp = tcp_sk(sk); |
| 5393 | 5393 | ||
| 5394 | if (unlikely(sk->sk_rx_dst == NULL)) | ||
| 5395 | inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); | ||
| 5394 | /* | 5396 | /* |
| 5395 | * Header prediction. | 5397 | * Header prediction. |
| 5396 | * The code loosely follows the one in the famous | 5398 | * The code loosely follows the one in the famous |
| @@ -5475,7 +5477,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 5475 | if (tp->copied_seq == tp->rcv_nxt && | 5477 | if (tp->copied_seq == tp->rcv_nxt && |
| 5476 | len - tcp_header_len <= tp->ucopy.len) { | 5478 | len - tcp_header_len <= tp->ucopy.len) { |
| 5477 | #ifdef CONFIG_NET_DMA | 5479 | #ifdef CONFIG_NET_DMA |
| 5478 | if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { | 5480 | if (tp->ucopy.task == current && |
| 5481 | sock_owned_by_user(sk) && | ||
| 5482 | tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { | ||
| 5479 | copied_early = 1; | 5483 | copied_early = 1; |
| 5480 | eaten = 1; | 5484 | eaten = 1; |
| 5481 | } | 5485 | } |
| @@ -5602,7 +5606,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) | |||
| 5602 | tcp_set_state(sk, TCP_ESTABLISHED); | 5606 | tcp_set_state(sk, TCP_ESTABLISHED); |
| 5603 | 5607 | ||
| 5604 | if (skb != NULL) { | 5608 | if (skb != NULL) { |
| 5605 | sk->sk_rx_dst = dst_clone(skb_dst(skb)); | 5609 | icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); |
| 5606 | security_inet_conn_established(sk, skb); | 5610 | security_inet_conn_established(sk, skb); |
| 5607 | } | 5611 | } |
| 5608 | 5612 | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3e30548ac32a..00a748d14062 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
| @@ -417,10 +417,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) | |||
| 417 | 417 | ||
| 418 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ | 418 | if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ |
| 419 | tp->mtu_info = info; | 419 | tp->mtu_info = info; |
| 420 | if (!sock_owned_by_user(sk)) | 420 | if (!sock_owned_by_user(sk)) { |
| 421 | tcp_v4_mtu_reduced(sk); | 421 | tcp_v4_mtu_reduced(sk); |
| 422 | else | 422 | } else { |
| 423 | set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); | 423 | if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags)) |
| 424 | sock_hold(sk); | ||
| 425 | } | ||
| 424 | goto out; | 426 | goto out; |
| 425 | } | 427 | } |
| 426 | 428 | ||
| @@ -1462,6 +1464,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1462 | goto exit_nonewsk; | 1464 | goto exit_nonewsk; |
| 1463 | 1465 | ||
| 1464 | newsk->sk_gso_type = SKB_GSO_TCPV4; | 1466 | newsk->sk_gso_type = SKB_GSO_TCPV4; |
| 1467 | inet_sk_rx_dst_set(newsk, skb); | ||
| 1465 | 1468 | ||
| 1466 | newtp = tcp_sk(newsk); | 1469 | newtp = tcp_sk(newsk); |
| 1467 | newinet = inet_sk(newsk); | 1470 | newinet = inet_sk(newsk); |
| @@ -1617,21 +1620,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1617 | #endif | 1620 | #endif |
| 1618 | 1621 | ||
| 1619 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1622 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
| 1623 | struct dst_entry *dst = sk->sk_rx_dst; | ||
| 1624 | |||
| 1620 | sock_rps_save_rxhash(sk, skb); | 1625 | sock_rps_save_rxhash(sk, skb); |
| 1621 | if (sk->sk_rx_dst) { | 1626 | if (dst) { |
| 1622 | struct dst_entry *dst = sk->sk_rx_dst; | 1627 | if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || |
| 1623 | if (dst->ops->check(dst, 0) == NULL) { | 1628 | dst->ops->check(dst, 0) == NULL) { |
| 1624 | dst_release(dst); | 1629 | dst_release(dst); |
| 1625 | sk->sk_rx_dst = NULL; | 1630 | sk->sk_rx_dst = NULL; |
| 1626 | } | 1631 | } |
| 1627 | } | 1632 | } |
| 1628 | if (unlikely(sk->sk_rx_dst == NULL)) { | ||
| 1629 | struct inet_sock *icsk = inet_sk(sk); | ||
| 1630 | struct rtable *rt = skb_rtable(skb); | ||
| 1631 | |||
| 1632 | sk->sk_rx_dst = dst_clone(&rt->dst); | ||
| 1633 | icsk->rx_dst_ifindex = inet_iif(skb); | ||
| 1634 | } | ||
| 1635 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { | 1633 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { |
| 1636 | rsk = sk; | 1634 | rsk = sk; |
| 1637 | goto reset; | 1635 | goto reset; |
| @@ -1686,7 +1684,6 @@ void tcp_v4_early_demux(struct sk_buff *skb) | |||
| 1686 | struct net *net = dev_net(skb->dev); | 1684 | struct net *net = dev_net(skb->dev); |
| 1687 | const struct iphdr *iph; | 1685 | const struct iphdr *iph; |
| 1688 | const struct tcphdr *th; | 1686 | const struct tcphdr *th; |
| 1689 | struct net_device *dev; | ||
| 1690 | struct sock *sk; | 1687 | struct sock *sk; |
| 1691 | 1688 | ||
| 1692 | if (skb->pkt_type != PACKET_HOST) | 1689 | if (skb->pkt_type != PACKET_HOST) |
| @@ -1701,24 +1698,20 @@ void tcp_v4_early_demux(struct sk_buff *skb) | |||
| 1701 | if (th->doff < sizeof(struct tcphdr) / 4) | 1698 | if (th->doff < sizeof(struct tcphdr) / 4) |
| 1702 | return; | 1699 | return; |
| 1703 | 1700 | ||
| 1704 | if (!pskb_may_pull(skb, ip_hdrlen(skb) + th->doff * 4)) | ||
| 1705 | return; | ||
| 1706 | |||
| 1707 | dev = skb->dev; | ||
| 1708 | sk = __inet_lookup_established(net, &tcp_hashinfo, | 1701 | sk = __inet_lookup_established(net, &tcp_hashinfo, |
| 1709 | iph->saddr, th->source, | 1702 | iph->saddr, th->source, |
| 1710 | iph->daddr, ntohs(th->dest), | 1703 | iph->daddr, ntohs(th->dest), |
| 1711 | dev->ifindex); | 1704 | skb->skb_iif); |
| 1712 | if (sk) { | 1705 | if (sk) { |
| 1713 | skb->sk = sk; | 1706 | skb->sk = sk; |
| 1714 | skb->destructor = sock_edemux; | 1707 | skb->destructor = sock_edemux; |
| 1715 | if (sk->sk_state != TCP_TIME_WAIT) { | 1708 | if (sk->sk_state != TCP_TIME_WAIT) { |
| 1716 | struct dst_entry *dst = sk->sk_rx_dst; | 1709 | struct dst_entry *dst = sk->sk_rx_dst; |
| 1717 | struct inet_sock *icsk = inet_sk(sk); | 1710 | |
| 1718 | if (dst) | 1711 | if (dst) |
| 1719 | dst = dst_check(dst, 0); | 1712 | dst = dst_check(dst, 0); |
| 1720 | if (dst && | 1713 | if (dst && |
| 1721 | icsk->rx_dst_ifindex == dev->ifindex) | 1714 | inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) |
| 1722 | skb_dst_set_noref(skb, dst); | 1715 | skb_dst_set_noref(skb, dst); |
| 1723 | } | 1716 | } |
| 1724 | } | 1717 | } |
| @@ -1879,10 +1872,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { | |||
| 1879 | .twsk_destructor= tcp_twsk_destructor, | 1872 | .twsk_destructor= tcp_twsk_destructor, |
| 1880 | }; | 1873 | }; |
| 1881 | 1874 | ||
| 1875 | void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) | ||
| 1876 | { | ||
| 1877 | struct dst_entry *dst = skb_dst(skb); | ||
| 1878 | |||
| 1879 | dst_hold(dst); | ||
| 1880 | sk->sk_rx_dst = dst; | ||
| 1881 | inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; | ||
| 1882 | } | ||
| 1883 | EXPORT_SYMBOL(inet_sk_rx_dst_set); | ||
| 1884 | |||
| 1882 | const struct inet_connection_sock_af_ops ipv4_specific = { | 1885 | const struct inet_connection_sock_af_ops ipv4_specific = { |
| 1883 | .queue_xmit = ip_queue_xmit, | 1886 | .queue_xmit = ip_queue_xmit, |
| 1884 | .send_check = tcp_v4_send_check, | 1887 | .send_check = tcp_v4_send_check, |
| 1885 | .rebuild_header = inet_sk_rebuild_header, | 1888 | .rebuild_header = inet_sk_rebuild_header, |
| 1889 | .sk_rx_dst_set = inet_sk_rx_dst_set, | ||
| 1886 | .conn_request = tcp_v4_conn_request, | 1890 | .conn_request = tcp_v4_conn_request, |
| 1887 | .syn_recv_sock = tcp_v4_syn_recv_sock, | 1891 | .syn_recv_sock = tcp_v4_syn_recv_sock, |
| 1888 | .net_header_len = sizeof(struct iphdr), | 1892 | .net_header_len = sizeof(struct iphdr), |
| @@ -2640,7 +2644,7 @@ struct proto tcp_prot = { | |||
| 2640 | .compat_setsockopt = compat_tcp_setsockopt, | 2644 | .compat_setsockopt = compat_tcp_setsockopt, |
| 2641 | .compat_getsockopt = compat_tcp_getsockopt, | 2645 | .compat_getsockopt = compat_tcp_getsockopt, |
| 2642 | #endif | 2646 | #endif |
| 2643 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 2647 | #ifdef CONFIG_MEMCG_KMEM |
| 2644 | .init_cgroup = tcp_init_cgroup, | 2648 | .init_cgroup = tcp_init_cgroup, |
| 2645 | .destroy_cgroup = tcp_destroy_cgroup, | 2649 | .destroy_cgroup = tcp_destroy_cgroup, |
| 2646 | .proto_cgroup = tcp_proto_cgroup, | 2650 | .proto_cgroup = tcp_proto_cgroup, |
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 2288a6399e1e..0abe67bb4d3a 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c | |||
| @@ -731,6 +731,18 @@ static int __net_init tcp_net_metrics_init(struct net *net) | |||
| 731 | 731 | ||
| 732 | static void __net_exit tcp_net_metrics_exit(struct net *net) | 732 | static void __net_exit tcp_net_metrics_exit(struct net *net) |
| 733 | { | 733 | { |
| 734 | unsigned int i; | ||
| 735 | |||
| 736 | for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) { | ||
| 737 | struct tcp_metrics_block *tm, *next; | ||
| 738 | |||
| 739 | tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1); | ||
| 740 | while (tm) { | ||
| 741 | next = rcu_dereference_protected(tm->tcpm_next, 1); | ||
| 742 | kfree(tm); | ||
| 743 | tm = next; | ||
| 744 | } | ||
| 745 | } | ||
| 734 | kfree(net->ipv4.tcp_metrics_hash); | 746 | kfree(net->ipv4.tcp_metrics_hash); |
| 735 | } | 747 | } |
| 736 | 748 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 5912ac3fd240..6ff7f10dce9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
| @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
| 387 | struct tcp_sock *oldtp = tcp_sk(sk); | 387 | struct tcp_sock *oldtp = tcp_sk(sk); |
| 388 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; | 388 | struct tcp_cookie_values *oldcvp = oldtp->cookie_values; |
| 389 | 389 | ||
| 390 | newsk->sk_rx_dst = dst_clone(skb_dst(skb)); | ||
| 391 | |||
| 392 | /* TCP Cookie Transactions require space for the cookie pair, | 390 | /* TCP Cookie Transactions require space for the cookie pair, |
| 393 | * as it differs for each connection. There is no need to | 391 | * as it differs for each connection. There is no need to |
| 394 | * copy any s_data_payload stored at the original socket. | 392 | * copy any s_data_payload stored at the original socket. |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..d04632673a9e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -910,14 +910,18 @@ void tcp_release_cb(struct sock *sk) | |||
| 910 | if (flags & (1UL << TCP_TSQ_DEFERRED)) | 910 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
| 911 | tcp_tsq_handler(sk); | 911 | tcp_tsq_handler(sk); |
| 912 | 912 | ||
| 913 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) | 913 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { |
| 914 | tcp_write_timer_handler(sk); | 914 | tcp_write_timer_handler(sk); |
| 915 | 915 | __sock_put(sk); | |
| 916 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) | 916 | } |
| 917 | if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) { | ||
| 917 | tcp_delack_timer_handler(sk); | 918 | tcp_delack_timer_handler(sk); |
| 918 | 919 | __sock_put(sk); | |
| 919 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) | 920 | } |
| 921 | if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { | ||
| 920 | sk->sk_prot->mtu_reduced(sk); | 922 | sk->sk_prot->mtu_reduced(sk); |
| 923 | __sock_put(sk); | ||
| 924 | } | ||
| 921 | } | 925 | } |
| 922 | EXPORT_SYMBOL(tcp_release_cb); | 926 | EXPORT_SYMBOL(tcp_release_cb); |
| 923 | 927 | ||
| @@ -940,7 +944,7 @@ void __init tcp_tasklet_init(void) | |||
| 940 | * We cant xmit new skbs from this context, as we might already | 944 | * We cant xmit new skbs from this context, as we might already |
| 941 | * hold qdisc lock. | 945 | * hold qdisc lock. |
| 942 | */ | 946 | */ |
| 943 | void tcp_wfree(struct sk_buff *skb) | 947 | static void tcp_wfree(struct sk_buff *skb) |
| 944 | { | 948 | { |
| 945 | struct sock *sk = skb->sk; | 949 | struct sock *sk = skb->sk; |
| 946 | struct tcp_sock *tp = tcp_sk(sk); | 950 | struct tcp_sock *tp = tcp_sk(sk); |
| @@ -1522,21 +1526,21 @@ static void tcp_cwnd_validate(struct sock *sk) | |||
| 1522 | * when we would be allowed to send the split-due-to-Nagle skb fully. | 1526 | * when we would be allowed to send the split-due-to-Nagle skb fully. |
| 1523 | */ | 1527 | */ |
| 1524 | static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, | 1528 | static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, |
| 1525 | unsigned int mss_now, unsigned int cwnd) | 1529 | unsigned int mss_now, unsigned int max_segs) |
| 1526 | { | 1530 | { |
| 1527 | const struct tcp_sock *tp = tcp_sk(sk); | 1531 | const struct tcp_sock *tp = tcp_sk(sk); |
| 1528 | u32 needed, window, cwnd_len; | 1532 | u32 needed, window, max_len; |
| 1529 | 1533 | ||
| 1530 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 1534 | window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
| 1531 | cwnd_len = mss_now * cwnd; | 1535 | max_len = mss_now * max_segs; |
| 1532 | 1536 | ||
| 1533 | if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) | 1537 | if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) |
| 1534 | return cwnd_len; | 1538 | return max_len; |
| 1535 | 1539 | ||
| 1536 | needed = min(skb->len, window); | 1540 | needed = min(skb->len, window); |
| 1537 | 1541 | ||
| 1538 | if (cwnd_len <= needed) | 1542 | if (max_len <= needed) |
| 1539 | return cwnd_len; | 1543 | return max_len; |
| 1540 | 1544 | ||
| 1541 | return needed - needed % mss_now; | 1545 | return needed - needed % mss_now; |
| 1542 | } | 1546 | } |
| @@ -1765,7 +1769,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1765 | limit = min(send_win, cong_win); | 1769 | limit = min(send_win, cong_win); |
| 1766 | 1770 | ||
| 1767 | /* If a full-sized TSO skb can be sent, do it. */ | 1771 | /* If a full-sized TSO skb can be sent, do it. */ |
| 1768 | if (limit >= sk->sk_gso_max_size) | 1772 | if (limit >= min_t(unsigned int, sk->sk_gso_max_size, |
| 1773 | sk->sk_gso_max_segs * tp->mss_cache)) | ||
| 1769 | goto send_now; | 1774 | goto send_now; |
| 1770 | 1775 | ||
| 1771 | /* Middle in queue won't get any more data, full sendable already? */ | 1776 | /* Middle in queue won't get any more data, full sendable already? */ |
| @@ -1999,7 +2004,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1999 | limit = mss_now; | 2004 | limit = mss_now; |
| 2000 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 2005 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
| 2001 | limit = tcp_mss_split_point(sk, skb, mss_now, | 2006 | limit = tcp_mss_split_point(sk, skb, mss_now, |
| 2002 | cwnd_quota); | 2007 | min_t(unsigned int, |
| 2008 | cwnd_quota, | ||
| 2009 | sk->sk_gso_max_segs)); | ||
| 2003 | 2010 | ||
| 2004 | if (skb->len > limit && | 2011 | if (skb->len > limit && |
| 2005 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | 2012 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
| @@ -2045,7 +2052,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, | |||
| 2045 | if (unlikely(sk->sk_state == TCP_CLOSE)) | 2052 | if (unlikely(sk->sk_state == TCP_CLOSE)) |
| 2046 | return; | 2053 | return; |
| 2047 | 2054 | ||
| 2048 | if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) | 2055 | if (tcp_write_xmit(sk, cur_mss, nonagle, 0, |
| 2056 | sk_gfp_atomic(sk, GFP_ATOMIC))) | ||
| 2049 | tcp_check_probe_timer(sk); | 2057 | tcp_check_probe_timer(sk); |
| 2050 | } | 2058 | } |
| 2051 | 2059 | ||
| @@ -2666,7 +2674,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2666 | 2674 | ||
| 2667 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) | 2675 | if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) |
| 2668 | s_data_desired = cvp->s_data_desired; | 2676 | s_data_desired = cvp->s_data_desired; |
| 2669 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); | 2677 | skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, |
| 2678 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
| 2670 | if (unlikely(!skb)) { | 2679 | if (unlikely(!skb)) { |
| 2671 | dst_release(dst); | 2680 | dst_release(dst); |
| 2672 | return NULL; | 2681 | return NULL; |
| @@ -3064,7 +3073,7 @@ void tcp_send_ack(struct sock *sk) | |||
| 3064 | * tcp_transmit_skb() will set the ownership to this | 3073 | * tcp_transmit_skb() will set the ownership to this |
| 3065 | * sock. | 3074 | * sock. |
| 3066 | */ | 3075 | */ |
| 3067 | buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 3076 | buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); |
| 3068 | if (buff == NULL) { | 3077 | if (buff == NULL) { |
| 3069 | inet_csk_schedule_ack(sk); | 3078 | inet_csk_schedule_ack(sk); |
| 3070 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; | 3079 | inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; |
| @@ -3079,7 +3088,7 @@ void tcp_send_ack(struct sock *sk) | |||
| 3079 | 3088 | ||
| 3080 | /* Send it off, this clears delayed acks for us. */ | 3089 | /* Send it off, this clears delayed acks for us. */ |
| 3081 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 3090 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
| 3082 | tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); | 3091 | tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); |
| 3083 | } | 3092 | } |
| 3084 | 3093 | ||
| 3085 | /* This routine sends a packet with an out of date sequence | 3094 | /* This routine sends a packet with an out of date sequence |
| @@ -3099,7 +3108,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
| 3099 | struct sk_buff *skb; | 3108 | struct sk_buff *skb; |
| 3100 | 3109 | ||
| 3101 | /* We don't queue it, tcp_transmit_skb() sets ownership. */ | 3110 | /* We don't queue it, tcp_transmit_skb() sets ownership. */ |
| 3102 | skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); | 3111 | skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); |
| 3103 | if (skb == NULL) | 3112 | if (skb == NULL) |
| 3104 | return -1; | 3113 | return -1; |
| 3105 | 3114 | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6df36ad55a38..b774a03bd1dc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data) | |||
| 252 | inet_csk(sk)->icsk_ack.blocked = 1; | 252 | inet_csk(sk)->icsk_ack.blocked = 1; |
| 253 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | 253 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); |
| 254 | /* deleguate our work to tcp_release_cb() */ | 254 | /* deleguate our work to tcp_release_cb() */ |
| 255 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | 255 | if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) |
| 256 | sock_hold(sk); | ||
| 256 | } | 257 | } |
| 257 | bh_unlock_sock(sk); | 258 | bh_unlock_sock(sk); |
| 258 | sock_put(sk); | 259 | sock_put(sk); |
| @@ -481,7 +482,8 @@ static void tcp_write_timer(unsigned long data) | |||
| 481 | tcp_write_timer_handler(sk); | 482 | tcp_write_timer_handler(sk); |
| 482 | } else { | 483 | } else { |
| 483 | /* deleguate our work to tcp_release_cb() */ | 484 | /* deleguate our work to tcp_release_cb() */ |
| 484 | set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags); | 485 | if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) |
| 486 | sock_hold(sk); | ||
| 485 | } | 487 | } |
| 486 | bh_unlock_sock(sk); | 488 | bh_unlock_sock(sk); |
| 487 | sock_put(sk); | 489 | sock_put(sk); |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..6f6d1aca3c3d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
| @@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) | |||
| 758 | uh->check = CSUM_MANGLED_0; | 758 | uh->check = CSUM_MANGLED_0; |
| 759 | 759 | ||
| 760 | send: | 760 | send: |
| 761 | err = ip_send_skb(skb); | 761 | err = ip_send_skb(sock_net(sk), skb); |
| 762 | if (err) { | 762 | if (err) { |
| 763 | if (err == -ENOBUFS && !inet->recverr) { | 763 | if (err == -ENOBUFS && !inet->recverr) { |
| 764 | UDP_INC_STATS_USER(sock_net(sk), | 764 | UDP_INC_STATS_USER(sock_net(sk), |
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 58d23a572509..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
| @@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) | |||
| 27 | if (skb_dst(skb) == NULL) { | 27 | if (skb_dst(skb) == NULL) { |
| 28 | const struct iphdr *iph = ip_hdr(skb); | 28 | const struct iphdr *iph = ip_hdr(skb); |
| 29 | 29 | ||
| 30 | if (ip_route_input(skb, iph->daddr, iph->saddr, | 30 | if (ip_route_input_noref(skb, iph->daddr, iph->saddr, |
| 31 | iph->tos, skb->dev)) | 31 | iph->tos, skb->dev)) |
| 32 | goto drop; | 32 | goto drop; |
| 33 | } | 33 | } |
| 34 | return dst_input(skb); | 34 | return dst_input(skb); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c6281847f16a..681ea2f413e2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
| @@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, | |||
| 92 | xdst->u.rt.rt_type = rt->rt_type; | 92 | xdst->u.rt.rt_type = rt->rt_type; |
| 93 | xdst->u.rt.rt_gateway = rt->rt_gateway; | 93 | xdst->u.rt.rt_gateway = rt->rt_gateway; |
| 94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; | 94 | xdst->u.rt.rt_pmtu = rt->rt_pmtu; |
| 95 | INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); | ||
| 95 | 96 | ||
| 96 | return 0; | 97 | return 0; |
| 97 | } | 98 | } |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79181819a24f..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
| @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) | |||
| 494 | struct net_device *dev; | 494 | struct net_device *dev; |
| 495 | struct inet6_dev *idev; | 495 | struct inet6_dev *idev; |
| 496 | 496 | ||
| 497 | rcu_read_lock(); | 497 | for_each_netdev(net, dev) { |
| 498 | for_each_netdev_rcu(net, dev) { | ||
| 499 | idev = __in6_dev_get(dev); | 498 | idev = __in6_dev_get(dev); |
| 500 | if (idev) { | 499 | if (idev) { |
| 501 | int changed = (!idev->cnf.forwarding) ^ (!newf); | 500 | int changed = (!idev->cnf.forwarding) ^ (!newf); |
| @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) | |||
| 504 | dev_forward_change(idev); | 503 | dev_forward_change(idev); |
| 505 | } | 504 | } |
| 506 | } | 505 | } |
| 507 | rcu_read_unlock(); | ||
| 508 | } | 506 | } |
| 509 | 507 | ||
| 510 | static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) | 508 | static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) |
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6dc7fd353ef5..282f3723ee19 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c | |||
| @@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 167 | struct esp_data *esp = x->data; | 167 | struct esp_data *esp = x->data; |
| 168 | 168 | ||
| 169 | /* skb is pure payload to encrypt */ | 169 | /* skb is pure payload to encrypt */ |
| 170 | err = -ENOMEM; | ||
| 171 | |||
| 172 | aead = esp->aead; | 170 | aead = esp->aead; |
| 173 | alen = crypto_aead_authsize(aead); | 171 | alen = crypto_aead_authsize(aead); |
| 174 | 172 | ||
| @@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) | |||
| 203 | } | 201 | } |
| 204 | 202 | ||
| 205 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); | 203 | tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); |
| 206 | if (!tmp) | 204 | if (!tmp) { |
| 205 | err = -ENOMEM; | ||
| 207 | goto error; | 206 | goto error; |
| 207 | } | ||
| 208 | 208 | ||
| 209 | seqhi = esp_tmp_seqhi(tmp); | 209 | seqhi = esp_tmp_seqhi(tmp); |
| 210 | iv = esp_tmp_iv(aead, tmp, seqhilen); | 210 | iv = esp_tmp_iv(aead, tmp, seqhilen); |
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..a52d864d562b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c | |||
| @@ -47,9 +47,16 @@ | |||
| 47 | 47 | ||
| 48 | 48 | ||
| 49 | 49 | ||
| 50 | inline int ip6_rcv_finish( struct sk_buff *skb) | 50 | int ip6_rcv_finish(struct sk_buff *skb) |
| 51 | { | 51 | { |
| 52 | if (skb_dst(skb) == NULL) | 52 | if (sysctl_ip_early_demux && !skb_dst(skb)) { |
| 53 | const struct inet6_protocol *ipprot; | ||
| 54 | |||
| 55 | ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); | ||
| 56 | if (ipprot && ipprot->early_demux) | ||
| 57 | ipprot->early_demux(skb); | ||
| 58 | } | ||
| 59 | if (!skb_dst(skb)) | ||
| 53 | ip6_route_input(skb); | 60 | ip6_route_input(skb); |
| 54 | 61 | ||
| 55 | return dst_input(skb); | 62 | return dst_input(skb); |
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d05c15..745a32042950 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c | |||
| @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) | |||
| 307 | goto proc_dev_snmp6_fail; | 307 | goto proc_dev_snmp6_fail; |
| 308 | return 0; | 308 | return 0; |
| 309 | 309 | ||
| 310 | proc_dev_snmp6_fail: | ||
| 311 | proc_net_remove(net, "snmp6"); | ||
| 310 | proc_snmp6_fail: | 312 | proc_snmp6_fail: |
| 311 | proc_net_remove(net, "sockstat6"); | 313 | proc_net_remove(net, "sockstat6"); |
| 312 | proc_dev_snmp6_fail: | ||
| 313 | proc_net_remove(net, "dev_snmp6"); | ||
| 314 | return -ENOMEM; | 314 | return -ENOMEM; |
| 315 | } | 315 | } |
| 316 | 316 | ||
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf02cb97bbdd..8e80fd279100 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
| @@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net, | |||
| 2480 | goto nla_put_failure; | 2480 | goto nla_put_failure; |
| 2481 | if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) | 2481 | if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) |
| 2482 | goto nla_put_failure; | 2482 | goto nla_put_failure; |
| 2483 | if (!(rt->rt6i_flags & RTF_EXPIRES)) | 2483 | |
| 2484 | expires = 0; | 2484 | expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; |
| 2485 | else if (rt->dst.expires - jiffies < INT_MAX) | ||
| 2486 | expires = rt->dst.expires - jiffies; | ||
| 2487 | else | ||
| 2488 | expires = INT_MAX; | ||
| 2489 | 2485 | ||
| 2490 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) | 2486 | if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) |
| 2491 | goto nla_put_failure; | 2487 | goto nla_put_failure; |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..a3e60cc04a8a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
| @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, | |||
| 94 | } | 94 | } |
| 95 | #endif | 95 | #endif |
| 96 | 96 | ||
| 97 | static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) | ||
| 98 | { | ||
| 99 | struct dst_entry *dst = skb_dst(skb); | ||
| 100 | const struct rt6_info *rt = (const struct rt6_info *)dst; | ||
| 101 | |||
| 102 | dst_hold(dst); | ||
| 103 | sk->sk_rx_dst = dst; | ||
| 104 | inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; | ||
| 105 | if (rt->rt6i_node) | ||
| 106 | inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; | ||
| 107 | } | ||
| 108 | |||
| 97 | static void tcp_v6_hash(struct sock *sk) | 109 | static void tcp_v6_hash(struct sock *sk) |
| 98 | { | 110 | { |
| 99 | if (sk->sk_state != TCP_CLOSE) { | 111 | if (sk->sk_state != TCP_CLOSE) { |
| @@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1270 | 1282 | ||
| 1271 | newsk->sk_gso_type = SKB_GSO_TCPV6; | 1283 | newsk->sk_gso_type = SKB_GSO_TCPV6; |
| 1272 | __ip6_dst_store(newsk, dst, NULL, NULL); | 1284 | __ip6_dst_store(newsk, dst, NULL, NULL); |
| 1285 | inet6_sk_rx_dst_set(newsk, skb); | ||
| 1273 | 1286 | ||
| 1274 | newtcp6sk = (struct tcp6_sock *)newsk; | 1287 | newtcp6sk = (struct tcp6_sock *)newsk; |
| 1275 | inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; | 1288 | inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; |
| @@ -1299,7 +1312,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1299 | /* Clone pktoptions received with SYN */ | 1312 | /* Clone pktoptions received with SYN */ |
| 1300 | newnp->pktoptions = NULL; | 1313 | newnp->pktoptions = NULL; |
| 1301 | if (treq->pktopts != NULL) { | 1314 | if (treq->pktopts != NULL) { |
| 1302 | newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); | 1315 | newnp->pktoptions = skb_clone(treq->pktopts, |
| 1316 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
| 1303 | consume_skb(treq->pktopts); | 1317 | consume_skb(treq->pktopts); |
| 1304 | treq->pktopts = NULL; | 1318 | treq->pktopts = NULL; |
| 1305 | if (newnp->pktoptions) | 1319 | if (newnp->pktoptions) |
| @@ -1349,7 +1363,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
| 1349 | * across. Shucks. | 1363 | * across. Shucks. |
| 1350 | */ | 1364 | */ |
| 1351 | tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, | 1365 | tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, |
| 1352 | AF_INET6, key->key, key->keylen, GFP_ATOMIC); | 1366 | AF_INET6, key->key, key->keylen, |
| 1367 | sk_gfp_atomic(sk, GFP_ATOMIC)); | ||
| 1353 | } | 1368 | } |
| 1354 | #endif | 1369 | #endif |
| 1355 | 1370 | ||
| @@ -1442,10 +1457,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) | |||
| 1442 | --ANK (980728) | 1457 | --ANK (980728) |
| 1443 | */ | 1458 | */ |
| 1444 | if (np->rxopt.all) | 1459 | if (np->rxopt.all) |
| 1445 | opt_skb = skb_clone(skb, GFP_ATOMIC); | 1460 | opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); |
| 1446 | 1461 | ||
| 1447 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ | 1462 | if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ |
| 1463 | struct dst_entry *dst = sk->sk_rx_dst; | ||
| 1464 | |||
| 1448 | sock_rps_save_rxhash(sk, skb); | 1465 | sock_rps_save_rxhash(sk, skb); |
| 1466 | if (dst) { | ||
| 1467 | if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || | ||
| 1468 | dst->ops->check(dst, np->rx_dst_cookie) == NULL) { | ||
| 1469 | dst_release(dst); | ||
| 1470 | sk->sk_rx_dst = NULL; | ||
| 1471 | } | ||
| 1472 | } | ||
| 1473 | |||
| 1449 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) | 1474 | if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) |
| 1450 | goto reset; | 1475 | goto reset; |
| 1451 | if (opt_skb) | 1476 | if (opt_skb) |
| @@ -1674,6 +1699,43 @@ do_time_wait: | |||
| 1674 | goto discard_it; | 1699 | goto discard_it; |
| 1675 | } | 1700 | } |
| 1676 | 1701 | ||
| 1702 | static void tcp_v6_early_demux(struct sk_buff *skb) | ||
| 1703 | { | ||
| 1704 | const struct ipv6hdr *hdr; | ||
| 1705 | const struct tcphdr *th; | ||
| 1706 | struct sock *sk; | ||
| 1707 | |||
| 1708 | if (skb->pkt_type != PACKET_HOST) | ||
| 1709 | return; | ||
| 1710 | |||
| 1711 | if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) | ||
| 1712 | return; | ||
| 1713 | |||
| 1714 | hdr = ipv6_hdr(skb); | ||
| 1715 | th = tcp_hdr(skb); | ||
| 1716 | |||
| 1717 | if (th->doff < sizeof(struct tcphdr) / 4) | ||
| 1718 | return; | ||
| 1719 | |||
| 1720 | sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, | ||
| 1721 | &hdr->saddr, th->source, | ||
| 1722 | &hdr->daddr, ntohs(th->dest), | ||
| 1723 | inet6_iif(skb)); | ||
| 1724 | if (sk) { | ||
| 1725 | skb->sk = sk; | ||
| 1726 | skb->destructor = sock_edemux; | ||
| 1727 | if (sk->sk_state != TCP_TIME_WAIT) { | ||
| 1728 | struct dst_entry *dst = sk->sk_rx_dst; | ||
| 1729 | struct inet_sock *icsk = inet_sk(sk); | ||
| 1730 | if (dst) | ||
| 1731 | dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); | ||
| 1732 | if (dst && | ||
| 1733 | icsk->rx_dst_ifindex == skb->skb_iif) | ||
| 1734 | skb_dst_set_noref(skb, dst); | ||
| 1735 | } | ||
| 1736 | } | ||
| 1737 | } | ||
| 1738 | |||
| 1677 | static struct timewait_sock_ops tcp6_timewait_sock_ops = { | 1739 | static struct timewait_sock_ops tcp6_timewait_sock_ops = { |
| 1678 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), | 1740 | .twsk_obj_size = sizeof(struct tcp6_timewait_sock), |
| 1679 | .twsk_unique = tcp_twsk_unique, | 1741 | .twsk_unique = tcp_twsk_unique, |
| @@ -1684,6 +1746,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { | |||
| 1684 | .queue_xmit = inet6_csk_xmit, | 1746 | .queue_xmit = inet6_csk_xmit, |
| 1685 | .send_check = tcp_v6_send_check, | 1747 | .send_check = tcp_v6_send_check, |
| 1686 | .rebuild_header = inet6_sk_rebuild_header, | 1748 | .rebuild_header = inet6_sk_rebuild_header, |
| 1749 | .sk_rx_dst_set = inet6_sk_rx_dst_set, | ||
| 1687 | .conn_request = tcp_v6_conn_request, | 1750 | .conn_request = tcp_v6_conn_request, |
| 1688 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1751 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
| 1689 | .net_header_len = sizeof(struct ipv6hdr), | 1752 | .net_header_len = sizeof(struct ipv6hdr), |
| @@ -1715,6 +1778,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { | |||
| 1715 | .queue_xmit = ip_queue_xmit, | 1778 | .queue_xmit = ip_queue_xmit, |
| 1716 | .send_check = tcp_v4_send_check, | 1779 | .send_check = tcp_v4_send_check, |
| 1717 | .rebuild_header = inet_sk_rebuild_header, | 1780 | .rebuild_header = inet_sk_rebuild_header, |
| 1781 | .sk_rx_dst_set = inet_sk_rx_dst_set, | ||
| 1718 | .conn_request = tcp_v6_conn_request, | 1782 | .conn_request = tcp_v6_conn_request, |
| 1719 | .syn_recv_sock = tcp_v6_syn_recv_sock, | 1783 | .syn_recv_sock = tcp_v6_syn_recv_sock, |
| 1720 | .net_header_len = sizeof(struct iphdr), | 1784 | .net_header_len = sizeof(struct iphdr), |
| @@ -1978,12 +2042,13 @@ struct proto tcpv6_prot = { | |||
| 1978 | .compat_setsockopt = compat_tcp_setsockopt, | 2042 | .compat_setsockopt = compat_tcp_setsockopt, |
| 1979 | .compat_getsockopt = compat_tcp_getsockopt, | 2043 | .compat_getsockopt = compat_tcp_getsockopt, |
| 1980 | #endif | 2044 | #endif |
| 1981 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 2045 | #ifdef CONFIG_MEMCG_KMEM |
| 1982 | .proto_cgroup = tcp_proto_cgroup, | 2046 | .proto_cgroup = tcp_proto_cgroup, |
| 1983 | #endif | 2047 | #endif |
| 1984 | }; | 2048 | }; |
| 1985 | 2049 | ||
| 1986 | static const struct inet6_protocol tcpv6_protocol = { | 2050 | static const struct inet6_protocol tcpv6_protocol = { |
| 2051 | .early_demux = tcp_v6_early_demux, | ||
| 1987 | .handler = tcp_v6_rcv, | 2052 | .handler = tcp_v6_rcv, |
| 1988 | .err_handler = tcp_v6_err, | 2053 | .err_handler = tcp_v6_err, |
| 1989 | .gso_send_check = tcp_v6_gso_send_check, | 2054 | .gso_send_check = tcp_v6_gso_send_check, |
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812107b1..f8c4c08ffb60 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c | |||
| @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) | |||
| 73 | return 0; | 73 | return 0; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) | ||
| 77 | { | ||
| 78 | struct rt6_info *rt = (struct rt6_info *)xdst; | ||
| 79 | |||
| 80 | rt6_init_peer(rt, net->ipv6.peers); | ||
| 81 | } | ||
| 82 | |||
| 76 | static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, | 83 | static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, |
| 77 | int nfheader_len) | 84 | int nfheader_len) |
| 78 | { | 85 | { |
| @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { | |||
| 286 | .get_saddr = xfrm6_get_saddr, | 293 | .get_saddr = xfrm6_get_saddr, |
| 287 | .decode_session = _decode_session6, | 294 | .decode_session = _decode_session6, |
| 288 | .get_tos = xfrm6_get_tos, | 295 | .get_tos = xfrm6_get_tos, |
| 296 | .init_dst = xfrm6_init_dst, | ||
| 289 | .init_path = xfrm6_init_path, | 297 | .init_path = xfrm6_init_path, |
| 290 | .fill_dst = xfrm6_fill_dst, | 298 | .fill_dst = xfrm6_fill_dst, |
| 291 | .blackhole_route = ip6_blackhole_route, | 299 | .blackhole_route = ip6_blackhole_route, |
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 393355d37b47..513cab08a986 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c | |||
| @@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) | |||
| 1347 | /* Remove from tunnel list */ | 1347 | /* Remove from tunnel list */ |
| 1348 | spin_lock_bh(&pn->l2tp_tunnel_list_lock); | 1348 | spin_lock_bh(&pn->l2tp_tunnel_list_lock); |
| 1349 | list_del_rcu(&tunnel->list); | 1349 | list_del_rcu(&tunnel->list); |
| 1350 | kfree_rcu(tunnel, rcu); | ||
| 1350 | spin_unlock_bh(&pn->l2tp_tunnel_list_lock); | 1351 | spin_unlock_bh(&pn->l2tp_tunnel_list_lock); |
| 1351 | synchronize_rcu(); | ||
| 1352 | 1352 | ||
| 1353 | atomic_dec(&l2tp_tunnel_count); | 1353 | atomic_dec(&l2tp_tunnel_count); |
| 1354 | kfree(tunnel); | ||
| 1355 | } | 1354 | } |
| 1356 | 1355 | ||
| 1357 | /* Create a socket for the tunnel, if one isn't set up by | 1356 | /* Create a socket for the tunnel, if one isn't set up by |
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a38ec6cdeee1..56d583e083a7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h | |||
| @@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg { | |||
| 163 | 163 | ||
| 164 | struct l2tp_tunnel { | 164 | struct l2tp_tunnel { |
| 165 | int magic; /* Should be L2TP_TUNNEL_MAGIC */ | 165 | int magic; /* Should be L2TP_TUNNEL_MAGIC */ |
| 166 | struct rcu_head rcu; | ||
| 166 | rwlock_t hlist_lock; /* protect session_hlist */ | 167 | rwlock_t hlist_lock; /* protect session_hlist */ |
| 167 | struct hlist_head session_hlist[L2TP_HASH_SIZE]; | 168 | struct hlist_head session_hlist[L2TP_HASH_SIZE]; |
| 168 | /* hashed list of sessions, | 169 | /* hashed list of sessions, |
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 35e1e4bde587..927547171bc7 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c | |||
| @@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, | |||
| 410 | lsa->l2tp_family = AF_INET6; | 410 | lsa->l2tp_family = AF_INET6; |
| 411 | lsa->l2tp_flowinfo = 0; | 411 | lsa->l2tp_flowinfo = 0; |
| 412 | lsa->l2tp_scope_id = 0; | 412 | lsa->l2tp_scope_id = 0; |
| 413 | lsa->l2tp_unused = 0; | ||
| 413 | if (peer) { | 414 | if (peer) { |
| 414 | if (!lsk->peer_conn_id) | 415 | if (!lsk->peer_conn_id) |
| 415 | return -ENOTCONN; | 416 | return -ENOTCONN; |
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f6fe4d400502..c2190005a114 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c | |||
| @@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr, | |||
| 969 | struct sockaddr_llc sllc; | 969 | struct sockaddr_llc sllc; |
| 970 | struct sock *sk = sock->sk; | 970 | struct sock *sk = sock->sk; |
| 971 | struct llc_sock *llc = llc_sk(sk); | 971 | struct llc_sock *llc = llc_sk(sk); |
| 972 | int rc = 0; | 972 | int rc = -EBADF; |
| 973 | 973 | ||
| 974 | memset(&sllc, 0, sizeof(sllc)); | 974 | memset(&sllc, 0, sizeof(sllc)); |
| 975 | lock_sock(sk); | 975 | lock_sock(sk); |
| 976 | if (sock_flag(sk, SOCK_ZAPPED)) | 976 | if (sock_flag(sk, SOCK_ZAPPED)) |
| 977 | goto out; | 977 | goto out; |
| 978 | *uaddrlen = sizeof(sllc); | 978 | *uaddrlen = sizeof(sllc); |
| 979 | memset(uaddr, 0, *uaddrlen); | ||
| 980 | if (peer) { | 979 | if (peer) { |
| 981 | rc = -ENOTCONN; | 980 | rc = -ENOTCONN; |
| 982 | if (sk->sk_state != TCP_ESTABLISHED) | 981 | if (sk->sk_state != TCP_ESTABLISHED) |
| @@ -1206,7 +1205,7 @@ static int __init llc2_init(void) | |||
| 1206 | rc = llc_proc_init(); | 1205 | rc = llc_proc_init(); |
| 1207 | if (rc != 0) { | 1206 | if (rc != 0) { |
| 1208 | printk(llc_proc_err_msg); | 1207 | printk(llc_proc_err_msg); |
| 1209 | goto out_unregister_llc_proto; | 1208 | goto out_station; |
| 1210 | } | 1209 | } |
| 1211 | rc = llc_sysctl_init(); | 1210 | rc = llc_sysctl_init(); |
| 1212 | if (rc) { | 1211 | if (rc) { |
| @@ -1226,7 +1225,8 @@ out_sysctl: | |||
| 1226 | llc_sysctl_exit(); | 1225 | llc_sysctl_exit(); |
| 1227 | out_proc: | 1226 | out_proc: |
| 1228 | llc_proc_exit(); | 1227 | llc_proc_exit(); |
| 1229 | out_unregister_llc_proto: | 1228 | out_station: |
| 1229 | llc_station_exit(); | ||
| 1230 | proto_unregister(&llc_proto); | 1230 | proto_unregister(&llc_proto); |
| 1231 | goto out; | 1231 | goto out; |
| 1232 | } | 1232 | } |
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index e32cab44ea95..dd3e83328ad5 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c | |||
| @@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap, | |||
| 42 | void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, | 42 | void llc_add_pack(int type, void (*handler)(struct llc_sap *sap, |
| 43 | struct sk_buff *skb)) | 43 | struct sk_buff *skb)) |
| 44 | { | 44 | { |
| 45 | smp_wmb(); /* ensure initialisation is complete before it's called */ | ||
| 45 | if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) | 46 | if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) |
| 46 | llc_type_handlers[type - 1] = handler; | 47 | llc_type_handlers[type - 1] = handler; |
| 47 | } | 48 | } |
| @@ -50,11 +51,19 @@ void llc_remove_pack(int type) | |||
| 50 | { | 51 | { |
| 51 | if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) | 52 | if (type == LLC_DEST_SAP || type == LLC_DEST_CONN) |
| 52 | llc_type_handlers[type - 1] = NULL; | 53 | llc_type_handlers[type - 1] = NULL; |
| 54 | synchronize_net(); | ||
| 53 | } | 55 | } |
| 54 | 56 | ||
| 55 | void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) | 57 | void llc_set_station_handler(void (*handler)(struct sk_buff *skb)) |
| 56 | { | 58 | { |
| 59 | /* Ensure initialisation is complete before it's called */ | ||
| 60 | if (handler) | ||
| 61 | smp_wmb(); | ||
| 62 | |||
| 57 | llc_station_handler = handler; | 63 | llc_station_handler = handler; |
| 64 | |||
| 65 | if (!handler) | ||
| 66 | synchronize_net(); | ||
| 58 | } | 67 | } |
| 59 | 68 | ||
| 60 | /** | 69 | /** |
| @@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 150 | int dest; | 159 | int dest; |
| 151 | int (*rcv)(struct sk_buff *, struct net_device *, | 160 | int (*rcv)(struct sk_buff *, struct net_device *, |
| 152 | struct packet_type *, struct net_device *); | 161 | struct packet_type *, struct net_device *); |
| 162 | void (*sta_handler)(struct sk_buff *skb); | ||
| 163 | void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); | ||
| 153 | 164 | ||
| 154 | if (!net_eq(dev_net(dev), &init_net)) | 165 | if (!net_eq(dev_net(dev), &init_net)) |
| 155 | goto drop; | 166 | goto drop; |
| @@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 182 | */ | 193 | */ |
| 183 | rcv = rcu_dereference(sap->rcv_func); | 194 | rcv = rcu_dereference(sap->rcv_func); |
| 184 | dest = llc_pdu_type(skb); | 195 | dest = llc_pdu_type(skb); |
| 185 | if (unlikely(!dest || !llc_type_handlers[dest - 1])) { | 196 | sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; |
| 197 | if (unlikely(!sap_handler)) { | ||
| 186 | if (rcv) | 198 | if (rcv) |
| 187 | rcv(skb, dev, pt, orig_dev); | 199 | rcv(skb, dev, pt, orig_dev); |
| 188 | else | 200 | else |
| @@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, | |||
| 193 | if (cskb) | 205 | if (cskb) |
| 194 | rcv(cskb, dev, pt, orig_dev); | 206 | rcv(cskb, dev, pt, orig_dev); |
| 195 | } | 207 | } |
| 196 | llc_type_handlers[dest - 1](sap, skb); | 208 | sap_handler(sap, skb); |
| 197 | } | 209 | } |
| 198 | llc_sap_put(sap); | 210 | llc_sap_put(sap); |
| 199 | out: | 211 | out: |
| @@ -202,9 +214,10 @@ drop: | |||
| 202 | kfree_skb(skb); | 214 | kfree_skb(skb); |
| 203 | goto out; | 215 | goto out; |
| 204 | handle_station: | 216 | handle_station: |
| 205 | if (!llc_station_handler) | 217 | sta_handler = ACCESS_ONCE(llc_station_handler); |
| 218 | if (!sta_handler) | ||
| 206 | goto drop; | 219 | goto drop; |
| 207 | llc_station_handler(skb); | 220 | sta_handler(skb); |
| 208 | goto out; | 221 | goto out; |
| 209 | } | 222 | } |
| 210 | 223 | ||
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 39a8d8924b9c..b2f2bac2c2a2 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c | |||
| @@ -268,7 +268,7 @@ static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb) | |||
| 268 | out: | 268 | out: |
| 269 | return rc; | 269 | return rc; |
| 270 | free: | 270 | free: |
| 271 | kfree_skb(skb); | 271 | kfree_skb(nskb); |
| 272 | goto out; | 272 | goto out; |
| 273 | } | 273 | } |
| 274 | 274 | ||
| @@ -293,7 +293,7 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb) | |||
| 293 | out: | 293 | out: |
| 294 | return rc; | 294 | return rc; |
| 295 | free: | 295 | free: |
| 296 | kfree_skb(skb); | 296 | kfree_skb(nskb); |
| 297 | goto out; | 297 | goto out; |
| 298 | } | 298 | } |
| 299 | 299 | ||
| @@ -322,7 +322,7 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb) | |||
| 322 | out: | 322 | out: |
| 323 | return rc; | 323 | return rc; |
| 324 | free: | 324 | free: |
| 325 | kfree_skb(skb); | 325 | kfree_skb(nskb); |
| 326 | goto out; | 326 | goto out; |
| 327 | } | 327 | } |
| 328 | 328 | ||
| @@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb) | |||
| 687 | llc_station_state_process(skb); | 687 | llc_station_state_process(skb); |
| 688 | } | 688 | } |
| 689 | 689 | ||
| 690 | int __init llc_station_init(void) | 690 | void __init llc_station_init(void) |
| 691 | { | 691 | { |
| 692 | int rc = -ENOBUFS; | ||
| 693 | struct sk_buff *skb; | ||
| 694 | struct llc_station_state_ev *ev; | ||
| 695 | |||
| 696 | skb_queue_head_init(&llc_main_station.mac_pdu_q); | 692 | skb_queue_head_init(&llc_main_station.mac_pdu_q); |
| 697 | skb_queue_head_init(&llc_main_station.ev_q.list); | 693 | skb_queue_head_init(&llc_main_station.ev_q.list); |
| 698 | spin_lock_init(&llc_main_station.ev_q.lock); | 694 | spin_lock_init(&llc_main_station.ev_q.lock); |
| @@ -700,23 +696,12 @@ int __init llc_station_init(void) | |||
| 700 | (unsigned long)&llc_main_station); | 696 | (unsigned long)&llc_main_station); |
| 701 | llc_main_station.ack_timer.expires = jiffies + | 697 | llc_main_station.ack_timer.expires = jiffies + |
| 702 | sysctl_llc_station_ack_timeout; | 698 | sysctl_llc_station_ack_timeout; |
| 703 | skb = alloc_skb(0, GFP_ATOMIC); | ||
| 704 | if (!skb) | ||
| 705 | goto out; | ||
| 706 | rc = 0; | ||
| 707 | llc_set_station_handler(llc_station_rcv); | ||
| 708 | ev = llc_station_ev(skb); | ||
| 709 | memset(ev, 0, sizeof(*ev)); | ||
| 710 | llc_main_station.maximum_retry = 1; | 699 | llc_main_station.maximum_retry = 1; |
| 711 | llc_main_station.state = LLC_STATION_STATE_DOWN; | 700 | llc_main_station.state = LLC_STATION_STATE_UP; |
| 712 | ev->type = LLC_STATION_EV_TYPE_SIMPLE; | 701 | llc_set_station_handler(llc_station_rcv); |
| 713 | ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK; | ||
| 714 | rc = llc_station_next_state(skb); | ||
| 715 | out: | ||
| 716 | return rc; | ||
| 717 | } | 702 | } |
| 718 | 703 | ||
| 719 | void __exit llc_station_exit(void) | 704 | void llc_station_exit(void) |
| 720 | { | 705 | { |
| 721 | llc_set_station_handler(NULL); | 706 | llc_set_station_handler(NULL); |
| 722 | } | 707 | } |
diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 1bf7903496f8..bcffa6903129 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c | |||
| @@ -276,7 +276,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) | |||
| 276 | 276 | ||
| 277 | read_lock(&tpt_trig->trig.leddev_list_lock); | 277 | read_lock(&tpt_trig->trig.leddev_list_lock); |
| 278 | list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) | 278 | list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) |
| 279 | led_brightness_set(led_cdev, LED_OFF); | 279 | led_set_brightness(led_cdev, LED_OFF); |
| 280 | read_unlock(&tpt_trig->trig.leddev_list_lock); | 280 | read_unlock(&tpt_trig->trig.leddev_list_lock); |
| 281 | } | 281 | } |
| 282 | 282 | ||
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6fac18c0423f..85572353a7e3 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c | |||
| @@ -622,6 +622,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) | |||
| 622 | 622 | ||
| 623 | del_timer_sync(&sdata->u.mesh.housekeeping_timer); | 623 | del_timer_sync(&sdata->u.mesh.housekeeping_timer); |
| 624 | del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); | 624 | del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); |
| 625 | del_timer_sync(&sdata->u.mesh.mesh_path_timer); | ||
| 625 | /* | 626 | /* |
| 626 | * If the timer fired while we waited for it, it will have | 627 | * If the timer fired while we waited for it, it will have |
| 627 | * requeued the work. Now the work will be running again | 628 | * requeued the work. Now the work will be running again |
| @@ -634,6 +635,8 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) | |||
| 634 | local->fif_other_bss--; | 635 | local->fif_other_bss--; |
| 635 | atomic_dec(&local->iff_allmultis); | 636 | atomic_dec(&local->iff_allmultis); |
| 636 | ieee80211_configure_filter(local); | 637 | ieee80211_configure_filter(local); |
| 638 | |||
| 639 | sdata->u.mesh.timers_running = 0; | ||
| 637 | } | 640 | } |
| 638 | 641 | ||
| 639 | static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, | 642 | static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cef0c9e79aba..a4a5acdbaa4d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c | |||
| @@ -1430,6 +1430,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, | |||
| 1430 | del_timer_sync(&sdata->u.mgd.bcn_mon_timer); | 1430 | del_timer_sync(&sdata->u.mgd.bcn_mon_timer); |
| 1431 | del_timer_sync(&sdata->u.mgd.timer); | 1431 | del_timer_sync(&sdata->u.mgd.timer); |
| 1432 | del_timer_sync(&sdata->u.mgd.chswitch_timer); | 1432 | del_timer_sync(&sdata->u.mgd.chswitch_timer); |
| 1433 | |||
| 1434 | sdata->u.mgd.timers_running = 0; | ||
| 1433 | } | 1435 | } |
| 1434 | 1436 | ||
| 1435 | void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, | 1437 | void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, |
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index bcaee5d12839..839dd9737989 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c | |||
| @@ -299,7 +299,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted, | |||
| 299 | if (local->scan_req != local->int_scan_req) | 299 | if (local->scan_req != local->int_scan_req) |
| 300 | cfg80211_scan_done(local->scan_req, aborted); | 300 | cfg80211_scan_done(local->scan_req, aborted); |
| 301 | local->scan_req = NULL; | 301 | local->scan_req = NULL; |
| 302 | local->scan_sdata = NULL; | 302 | rcu_assign_pointer(local->scan_sdata, NULL); |
| 303 | 303 | ||
| 304 | local->scanning = 0; | 304 | local->scanning = 0; |
| 305 | local->scan_channel = NULL; | 305 | local->scan_channel = NULL; |
| @@ -984,7 +984,6 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata) | |||
| 984 | kfree(local->sched_scan_ies.ie[i]); | 984 | kfree(local->sched_scan_ies.ie[i]); |
| 985 | 985 | ||
| 986 | drv_sched_scan_stop(local, sdata); | 986 | drv_sched_scan_stop(local, sdata); |
| 987 | rcu_assign_pointer(local->sched_scan_sdata, NULL); | ||
| 988 | } | 987 | } |
| 989 | out: | 988 | out: |
| 990 | mutex_unlock(&local->mtx); | 989 | mutex_unlock(&local->mtx); |
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index acf712ffb5e6..c5e8c9c31f76 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c | |||
| @@ -1811,37 +1811,31 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, | |||
| 1811 | meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, | 1811 | meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, |
| 1812 | sdata, NULL, NULL); | 1812 | sdata, NULL, NULL); |
| 1813 | } else { | 1813 | } else { |
| 1814 | int is_mesh_mcast = 1; | 1814 | /* DS -> MBSS (802.11-2012 13.11.3.3). |
| 1815 | const u8 *mesh_da; | 1815 | * For unicast with unknown forwarding information, |
| 1816 | * destination might be in the MBSS or if that fails | ||
| 1817 | * forwarded to another mesh gate. In either case | ||
| 1818 | * resolution will be handled in ieee80211_xmit(), so | ||
| 1819 | * leave the original DA. This also works for mcast */ | ||
| 1820 | const u8 *mesh_da = skb->data; | ||
| 1821 | |||
| 1822 | if (mppath) | ||
| 1823 | mesh_da = mppath->mpp; | ||
| 1824 | else if (mpath) | ||
| 1825 | mesh_da = mpath->dst; | ||
| 1826 | rcu_read_unlock(); | ||
| 1816 | 1827 | ||
| 1817 | if (is_multicast_ether_addr(skb->data)) | ||
| 1818 | /* DA TA mSA AE:SA */ | ||
| 1819 | mesh_da = skb->data; | ||
| 1820 | else { | ||
| 1821 | static const u8 bcast[ETH_ALEN] = | ||
| 1822 | { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; | ||
| 1823 | if (mppath) { | ||
| 1824 | /* RA TA mDA mSA AE:DA SA */ | ||
| 1825 | mesh_da = mppath->mpp; | ||
| 1826 | is_mesh_mcast = 0; | ||
| 1827 | } else if (mpath) { | ||
| 1828 | mesh_da = mpath->dst; | ||
| 1829 | is_mesh_mcast = 0; | ||
| 1830 | } else { | ||
| 1831 | /* DA TA mSA AE:SA */ | ||
| 1832 | mesh_da = bcast; | ||
| 1833 | } | ||
| 1834 | } | ||
| 1835 | hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, | 1828 | hdrlen = ieee80211_fill_mesh_addresses(&hdr, &fc, |
| 1836 | mesh_da, sdata->vif.addr); | 1829 | mesh_da, sdata->vif.addr); |
| 1837 | rcu_read_unlock(); | 1830 | if (is_multicast_ether_addr(mesh_da)) |
| 1838 | if (is_mesh_mcast) | 1831 | /* DA TA mSA AE:SA */ |
| 1839 | meshhdrlen = | 1832 | meshhdrlen = |
| 1840 | ieee80211_new_mesh_header(&mesh_hdr, | 1833 | ieee80211_new_mesh_header(&mesh_hdr, |
| 1841 | sdata, | 1834 | sdata, |
| 1842 | skb->data + ETH_ALEN, | 1835 | skb->data + ETH_ALEN, |
| 1843 | NULL); | 1836 | NULL); |
| 1844 | else | 1837 | else |
| 1838 | /* RA TA mDA mSA AE:DA SA */ | ||
| 1845 | meshhdrlen = | 1839 | meshhdrlen = |
| 1846 | ieee80211_new_mesh_header(&mesh_hdr, | 1840 | ieee80211_new_mesh_header(&mesh_hdr, |
| 1847 | sdata, | 1841 | sdata, |
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 84444dda194b..f51013c07b9f 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c | |||
| @@ -1171,8 +1171,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, | |||
| 1171 | goto out_err; | 1171 | goto out_err; |
| 1172 | } | 1172 | } |
| 1173 | svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); | 1173 | svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); |
| 1174 | if (!svc->stats.cpustats) | 1174 | if (!svc->stats.cpustats) { |
| 1175 | ret = -ENOMEM; | ||
| 1175 | goto out_err; | 1176 | goto out_err; |
| 1177 | } | ||
| 1176 | 1178 | ||
| 1177 | /* I'm the first user of the service */ | 1179 | /* I'm the first user of the service */ |
| 1178 | atomic_set(&svc->usecnt, 0); | 1180 | atomic_set(&svc->usecnt, 0); |
| @@ -2759,6 +2761,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
| 2759 | { | 2761 | { |
| 2760 | struct ip_vs_timeout_user t; | 2762 | struct ip_vs_timeout_user t; |
| 2761 | 2763 | ||
| 2764 | memset(&t, 0, sizeof(t)); | ||
| 2762 | __ip_vs_get_timeouts(net, &t); | 2765 | __ip_vs_get_timeouts(net, &t); |
| 2763 | if (copy_to_user(user, &t, sizeof(t)) != 0) | 2766 | if (copy_to_user(user, &t, sizeof(t)) != 0) |
| 2764 | ret = -EFAULT; | 2767 | ret = -EFAULT; |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index cf4875565d67..2ceec64b19f9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
| @@ -249,12 +249,15 @@ static void death_by_event(unsigned long ul_conntrack) | |||
| 249 | { | 249 | { |
| 250 | struct nf_conn *ct = (void *)ul_conntrack; | 250 | struct nf_conn *ct = (void *)ul_conntrack; |
| 251 | struct net *net = nf_ct_net(ct); | 251 | struct net *net = nf_ct_net(ct); |
| 252 | struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); | ||
| 253 | |||
| 254 | BUG_ON(ecache == NULL); | ||
| 252 | 255 | ||
| 253 | if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { | 256 | if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { |
| 254 | /* bad luck, let's retry again */ | 257 | /* bad luck, let's retry again */ |
| 255 | ct->timeout.expires = jiffies + | 258 | ecache->timeout.expires = jiffies + |
| 256 | (random32() % net->ct.sysctl_events_retry_timeout); | 259 | (random32() % net->ct.sysctl_events_retry_timeout); |
| 257 | add_timer(&ct->timeout); | 260 | add_timer(&ecache->timeout); |
| 258 | return; | 261 | return; |
| 259 | } | 262 | } |
| 260 | /* we've got the event delivered, now it's dying */ | 263 | /* we've got the event delivered, now it's dying */ |
| @@ -268,6 +271,9 @@ static void death_by_event(unsigned long ul_conntrack) | |||
| 268 | void nf_ct_insert_dying_list(struct nf_conn *ct) | 271 | void nf_ct_insert_dying_list(struct nf_conn *ct) |
| 269 | { | 272 | { |
| 270 | struct net *net = nf_ct_net(ct); | 273 | struct net *net = nf_ct_net(ct); |
| 274 | struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct); | ||
| 275 | |||
| 276 | BUG_ON(ecache == NULL); | ||
| 271 | 277 | ||
| 272 | /* add this conntrack to the dying list */ | 278 | /* add this conntrack to the dying list */ |
| 273 | spin_lock_bh(&nf_conntrack_lock); | 279 | spin_lock_bh(&nf_conntrack_lock); |
| @@ -275,10 +281,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct) | |||
| 275 | &net->ct.dying); | 281 | &net->ct.dying); |
| 276 | spin_unlock_bh(&nf_conntrack_lock); | 282 | spin_unlock_bh(&nf_conntrack_lock); |
| 277 | /* set a new timer to retry event delivery */ | 283 | /* set a new timer to retry event delivery */ |
| 278 | setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); | 284 | setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); |
| 279 | ct->timeout.expires = jiffies + | 285 | ecache->timeout.expires = jiffies + |
| 280 | (random32() % net->ct.sysctl_events_retry_timeout); | 286 | (random32() % net->ct.sysctl_events_retry_timeout); |
| 281 | add_timer(&ct->timeout); | 287 | add_timer(&ecache->timeout); |
| 282 | } | 288 | } |
| 283 | EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); | 289 | EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); |
| 284 | 290 | ||
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 45cf602a76bc..527651a53a45 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c | |||
| @@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master, | |||
| 361 | } | 361 | } |
| 362 | } | 362 | } |
| 363 | 363 | ||
| 364 | static inline int refresh_timer(struct nf_conntrack_expect *i) | ||
| 365 | { | ||
| 366 | struct nf_conn_help *master_help = nfct_help(i->master); | ||
| 367 | const struct nf_conntrack_expect_policy *p; | ||
| 368 | |||
| 369 | if (!del_timer(&i->timeout)) | ||
| 370 | return 0; | ||
| 371 | |||
| 372 | p = &rcu_dereference_protected( | ||
| 373 | master_help->helper, | ||
| 374 | lockdep_is_held(&nf_conntrack_lock) | ||
| 375 | )->expect_policy[i->class]; | ||
| 376 | i->timeout.expires = jiffies + p->timeout * HZ; | ||
| 377 | add_timer(&i->timeout); | ||
| 378 | return 1; | ||
| 379 | } | ||
| 380 | |||
| 381 | static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | 364 | static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) |
| 382 | { | 365 | { |
| 383 | const struct nf_conntrack_expect_policy *p; | 366 | const struct nf_conntrack_expect_policy *p; |
| @@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | |||
| 386 | struct nf_conn_help *master_help = nfct_help(master); | 369 | struct nf_conn_help *master_help = nfct_help(master); |
| 387 | struct nf_conntrack_helper *helper; | 370 | struct nf_conntrack_helper *helper; |
| 388 | struct net *net = nf_ct_exp_net(expect); | 371 | struct net *net = nf_ct_exp_net(expect); |
| 389 | struct hlist_node *n; | 372 | struct hlist_node *n, *next; |
| 390 | unsigned int h; | 373 | unsigned int h; |
| 391 | int ret = 1; | 374 | int ret = 1; |
| 392 | 375 | ||
| @@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) | |||
| 395 | goto out; | 378 | goto out; |
| 396 | } | 379 | } |
| 397 | h = nf_ct_expect_dst_hash(&expect->tuple); | 380 | h = nf_ct_expect_dst_hash(&expect->tuple); |
| 398 | hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { | 381 | hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) { |
| 399 | if (expect_matches(i, expect)) { | 382 | if (expect_matches(i, expect)) { |
| 400 | /* Refresh timer: if it's dying, ignore.. */ | 383 | if (del_timer(&i->timeout)) { |
| 401 | if (refresh_timer(i)) { | 384 | nf_ct_unlink_expect(i); |
| 402 | ret = 0; | 385 | nf_ct_expect_put(i); |
| 403 | goto out; | 386 | break; |
| 404 | } | 387 | } |
| 405 | } else if (expect_clash(i, expect)) { | 388 | } else if (expect_clash(i, expect)) { |
| 406 | ret = -EBUSY; | 389 | ret = -EBUSY; |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 14f67a2cbcb5..9807f3278fcb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
| @@ -1896,10 +1896,15 @@ static int | |||
| 1896 | ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) | 1896 | ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) |
| 1897 | { | 1897 | { |
| 1898 | struct nlattr *cda[CTA_MAX+1]; | 1898 | struct nlattr *cda[CTA_MAX+1]; |
| 1899 | int ret; | ||
| 1899 | 1900 | ||
| 1900 | nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); | 1901 | nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); |
| 1901 | 1902 | ||
| 1902 | return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); | 1903 | spin_lock_bh(&nf_conntrack_lock); |
| 1904 | ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); | ||
| 1905 | spin_unlock_bh(&nf_conntrack_lock); | ||
| 1906 | |||
| 1907 | return ret; | ||
| 1903 | } | 1908 | } |
| 1904 | 1909 | ||
| 1905 | static struct nfq_ct_hook ctnetlink_nfqueue_hook = { | 1910 | static struct nfq_ct_hook ctnetlink_nfqueue_hook = { |
| @@ -2785,7 +2790,8 @@ static int __init ctnetlink_init(void) | |||
| 2785 | goto err_unreg_subsys; | 2790 | goto err_unreg_subsys; |
| 2786 | } | 2791 | } |
| 2787 | 2792 | ||
| 2788 | if (register_pernet_subsys(&ctnetlink_net_ops)) { | 2793 | ret = register_pernet_subsys(&ctnetlink_net_ops); |
| 2794 | if (ret < 0) { | ||
| 2789 | pr_err("ctnetlink_init: cannot register pernet operations\n"); | 2795 | pr_err("ctnetlink_init: cannot register pernet operations\n"); |
| 2790 | goto err_unreg_exp_subsys; | 2796 | goto err_unreg_exp_subsys; |
| 2791 | } | 2797 | } |
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 758a1bacc126..5c0a112aeee6 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c | |||
| @@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr, | |||
| 183 | return len + digits_len(ct, dptr, limit, shift); | 183 | return len + digits_len(ct, dptr, limit, shift); |
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | static int parse_addr(const struct nf_conn *ct, const char *cp, | 186 | static int sip_parse_addr(const struct nf_conn *ct, const char *cp, |
| 187 | const char **endp, union nf_inet_addr *addr, | 187 | const char **endp, union nf_inet_addr *addr, |
| 188 | const char *limit) | 188 | const char *limit, bool delim) |
| 189 | { | 189 | { |
| 190 | const char *end; | 190 | const char *end; |
| 191 | int ret = 0; | 191 | int ret; |
| 192 | 192 | ||
| 193 | if (!ct) | 193 | if (!ct) |
| 194 | return 0; | 194 | return 0; |
| @@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp, | |||
| 197 | switch (nf_ct_l3num(ct)) { | 197 | switch (nf_ct_l3num(ct)) { |
| 198 | case AF_INET: | 198 | case AF_INET: |
| 199 | ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); | 199 | ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); |
| 200 | if (ret == 0) | ||
| 201 | return 0; | ||
| 200 | break; | 202 | break; |
| 201 | case AF_INET6: | 203 | case AF_INET6: |
| 204 | if (cp < limit && *cp == '[') | ||
| 205 | cp++; | ||
| 206 | else if (delim) | ||
| 207 | return 0; | ||
| 208 | |||
| 202 | ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); | 209 | ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); |
| 210 | if (ret == 0) | ||
| 211 | return 0; | ||
| 212 | |||
| 213 | if (end < limit && *end == ']') | ||
| 214 | end++; | ||
| 215 | else if (delim) | ||
| 216 | return 0; | ||
| 203 | break; | 217 | break; |
| 204 | default: | 218 | default: |
| 205 | BUG(); | 219 | BUG(); |
| 206 | } | 220 | } |
| 207 | 221 | ||
| 208 | if (ret == 0 || end == cp) | ||
| 209 | return 0; | ||
| 210 | if (endp) | 222 | if (endp) |
| 211 | *endp = end; | 223 | *endp = end; |
| 212 | return 1; | 224 | return 1; |
| @@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr, | |||
| 219 | union nf_inet_addr addr; | 231 | union nf_inet_addr addr; |
| 220 | const char *aux = dptr; | 232 | const char *aux = dptr; |
| 221 | 233 | ||
| 222 | if (!parse_addr(ct, dptr, &dptr, &addr, limit)) { | 234 | if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) { |
| 223 | pr_debug("ip: %s parse failed.!\n", dptr); | 235 | pr_debug("ip: %s parse failed.!\n", dptr); |
| 224 | return 0; | 236 | return 0; |
| 225 | } | 237 | } |
| @@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, | |||
| 296 | return 0; | 308 | return 0; |
| 297 | dptr += shift; | 309 | dptr += shift; |
| 298 | 310 | ||
| 299 | if (!parse_addr(ct, dptr, &end, addr, limit)) | 311 | if (!sip_parse_addr(ct, dptr, &end, addr, limit, true)) |
| 300 | return -1; | 312 | return -1; |
| 301 | if (end < limit && *end == ':') { | 313 | if (end < limit && *end == ':') { |
| 302 | end++; | 314 | end++; |
| @@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, | |||
| 550 | if (ret == 0) | 562 | if (ret == 0) |
| 551 | return ret; | 563 | return ret; |
| 552 | 564 | ||
| 553 | if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit)) | 565 | if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true)) |
| 554 | return -1; | 566 | return -1; |
| 555 | if (*c == ':') { | 567 | if (*c == ':') { |
| 556 | c++; | 568 | c++; |
| @@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, | |||
| 599 | unsigned int dataoff, unsigned int datalen, | 611 | unsigned int dataoff, unsigned int datalen, |
| 600 | const char *name, | 612 | const char *name, |
| 601 | unsigned int *matchoff, unsigned int *matchlen, | 613 | unsigned int *matchoff, unsigned int *matchlen, |
| 602 | union nf_inet_addr *addr) | 614 | union nf_inet_addr *addr, bool delim) |
| 603 | { | 615 | { |
| 604 | const char *limit = dptr + datalen; | 616 | const char *limit = dptr + datalen; |
| 605 | const char *start, *end; | 617 | const char *start, *end; |
| @@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, | |||
| 613 | return 0; | 625 | return 0; |
| 614 | 626 | ||
| 615 | start += strlen(name); | 627 | start += strlen(name); |
| 616 | if (!parse_addr(ct, start, &end, addr, limit)) | 628 | if (!sip_parse_addr(ct, start, &end, addr, limit, delim)) |
| 617 | return 0; | 629 | return 0; |
| 618 | *matchoff = start - dptr; | 630 | *matchoff = start - dptr; |
| 619 | *matchlen = end - start; | 631 | *matchlen = end - start; |
| @@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, | |||
| 675 | return 1; | 687 | return 1; |
| 676 | } | 688 | } |
| 677 | 689 | ||
| 690 | static int sdp_parse_addr(const struct nf_conn *ct, const char *cp, | ||
| 691 | const char **endp, union nf_inet_addr *addr, | ||
| 692 | const char *limit) | ||
| 693 | { | ||
| 694 | const char *end; | ||
| 695 | int ret; | ||
| 696 | |||
| 697 | memset(addr, 0, sizeof(*addr)); | ||
| 698 | switch (nf_ct_l3num(ct)) { | ||
| 699 | case AF_INET: | ||
| 700 | ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end); | ||
| 701 | break; | ||
| 702 | case AF_INET6: | ||
| 703 | ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end); | ||
| 704 | break; | ||
| 705 | default: | ||
| 706 | BUG(); | ||
| 707 | } | ||
| 708 | |||
| 709 | if (ret == 0) | ||
| 710 | return 0; | ||
| 711 | if (endp) | ||
| 712 | *endp = end; | ||
| 713 | return 1; | ||
| 714 | } | ||
| 715 | |||
| 716 | /* skip ip address. returns its length. */ | ||
| 717 | static int sdp_addr_len(const struct nf_conn *ct, const char *dptr, | ||
| 718 | const char *limit, int *shift) | ||
| 719 | { | ||
| 720 | union nf_inet_addr addr; | ||
| 721 | const char *aux = dptr; | ||
| 722 | |||
| 723 | if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) { | ||
| 724 | pr_debug("ip: %s parse failed.!\n", dptr); | ||
| 725 | return 0; | ||
| 726 | } | ||
| 727 | |||
| 728 | return dptr - aux; | ||
| 729 | } | ||
| 730 | |||
| 678 | /* SDP header parsing: a SDP session description contains an ordered set of | 731 | /* SDP header parsing: a SDP session description contains an ordered set of |
| 679 | * headers, starting with a section containing general session parameters, | 732 | * headers, starting with a section containing general session parameters, |
| 680 | * optionally followed by multiple media descriptions. | 733 | * optionally followed by multiple media descriptions. |
| @@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, | |||
| 686 | */ | 739 | */ |
| 687 | static const struct sip_header ct_sdp_hdrs[] = { | 740 | static const struct sip_header ct_sdp_hdrs[] = { |
| 688 | [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), | 741 | [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len), |
| 689 | [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len), | 742 | [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len), |
| 690 | [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len), | 743 | [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len), |
| 691 | [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len), | 744 | [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len), |
| 692 | [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len), | 745 | [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len), |
| 693 | [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), | 746 | [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len), |
| 694 | }; | 747 | }; |
| 695 | 748 | ||
| @@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, | |||
| 775 | if (ret <= 0) | 828 | if (ret <= 0) |
| 776 | return ret; | 829 | return ret; |
| 777 | 830 | ||
| 778 | if (!parse_addr(ct, dptr + *matchoff, NULL, addr, | 831 | if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr, |
| 779 | dptr + *matchoff + *matchlen)) | 832 | dptr + *matchoff + *matchlen)) |
| 780 | return -1; | 833 | return -1; |
| 781 | return 1; | 834 | return 1; |
| 782 | } | 835 | } |
| @@ -1515,7 +1568,6 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, | |||
| 1515 | } | 1568 | } |
| 1516 | 1569 | ||
| 1517 | static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; | 1570 | static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; |
| 1518 | static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; | ||
| 1519 | 1571 | ||
| 1520 | static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { | 1572 | static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { |
| 1521 | [SIP_EXPECT_SIGNALLING] = { | 1573 | [SIP_EXPECT_SIGNALLING] = { |
| @@ -1585,9 +1637,9 @@ static int __init nf_conntrack_sip_init(void) | |||
| 1585 | sip[i][j].me = THIS_MODULE; | 1637 | sip[i][j].me = THIS_MODULE; |
| 1586 | 1638 | ||
| 1587 | if (ports[i] == SIP_PORT) | 1639 | if (ports[i] == SIP_PORT) |
| 1588 | sprintf(sip_names[i][j], "sip"); | 1640 | sprintf(sip[i][j].name, "sip"); |
| 1589 | else | 1641 | else |
| 1590 | sprintf(sip_names[i][j], "sip-%u", i); | 1642 | sprintf(sip[i][j].name, "sip-%u", i); |
| 1591 | 1643 | ||
| 1592 | pr_debug("port #%u: %u\n", i, ports[i]); | 1644 | pr_debug("port #%u: %u\n", i, ports[i]); |
| 1593 | 1645 | ||
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 169ab59ed9d4..14e2f3903142 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
| @@ -480,7 +480,7 @@ __build_packet_message(struct nfulnl_instance *inst, | |||
| 480 | } | 480 | } |
| 481 | 481 | ||
| 482 | if (indev && skb_mac_header_was_set(skb)) { | 482 | if (indev && skb_mac_header_was_set(skb)) { |
| 483 | if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || | 483 | if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || |
| 484 | nla_put_be16(inst->skb, NFULA_HWLEN, | 484 | nla_put_be16(inst->skb, NFULA_HWLEN, |
| 485 | htons(skb->dev->hard_header_len)) || | 485 | htons(skb->dev->hard_header_len)) || |
| 486 | nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, | 486 | nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, |
| @@ -996,8 +996,10 @@ static int __init nfnetlink_log_init(void) | |||
| 996 | 996 | ||
| 997 | #ifdef CONFIG_PROC_FS | 997 | #ifdef CONFIG_PROC_FS |
| 998 | if (!proc_create("nfnetlink_log", 0440, | 998 | if (!proc_create("nfnetlink_log", 0440, |
| 999 | proc_net_netfilter, &nful_file_ops)) | 999 | proc_net_netfilter, &nful_file_ops)) { |
| 1000 | status = -ENOMEM; | ||
| 1000 | goto cleanup_logger; | 1001 | goto cleanup_logger; |
| 1002 | } | ||
| 1001 | #endif | 1003 | #endif |
| 1002 | return status; | 1004 | return status; |
| 1003 | 1005 | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5463969da45b..527023823b5c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
| @@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1362 | if (NULL == siocb->scm) | 1362 | if (NULL == siocb->scm) |
| 1363 | siocb->scm = &scm; | 1363 | siocb->scm = &scm; |
| 1364 | 1364 | ||
| 1365 | err = scm_send(sock, msg, siocb->scm); | 1365 | err = scm_send(sock, msg, siocb->scm, true); |
| 1366 | if (err < 0) | 1366 | if (err < 0) |
| 1367 | return err; | 1367 | return err; |
| 1368 | 1368 | ||
| @@ -1373,7 +1373,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1373 | dst_pid = addr->nl_pid; | 1373 | dst_pid = addr->nl_pid; |
| 1374 | dst_group = ffs(addr->nl_groups); | 1374 | dst_group = ffs(addr->nl_groups); |
| 1375 | err = -EPERM; | 1375 | err = -EPERM; |
| 1376 | if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) | 1376 | if ((dst_group || dst_pid) && |
| 1377 | !netlink_capable(sock, NL_NONROOT_SEND)) | ||
| 1377 | goto out; | 1378 | goto out; |
| 1378 | } else { | 1379 | } else { |
| 1379 | dst_pid = nlk->dst_pid; | 1380 | dst_pid = nlk->dst_pid; |
| @@ -2147,6 +2148,7 @@ static void __init netlink_add_usersock_entry(void) | |||
| 2147 | rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); | 2148 | rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); |
| 2148 | nl_table[NETLINK_USERSOCK].module = THIS_MODULE; | 2149 | nl_table[NETLINK_USERSOCK].module = THIS_MODULE; |
| 2149 | nl_table[NETLINK_USERSOCK].registered = 1; | 2150 | nl_table[NETLINK_USERSOCK].registered = 1; |
| 2151 | nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; | ||
| 2150 | 2152 | ||
| 2151 | netlink_table_ungrab(); | 2153 | netlink_table_ungrab(); |
| 2152 | } | 2154 | } |
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 320fa0e6951a..f3f96badf5aa 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c | |||
| @@ -325,9 +325,6 @@ static int sample(struct datapath *dp, struct sk_buff *skb, | |||
| 325 | } | 325 | } |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | if (!acts_list) | ||
| 329 | return 0; | ||
| 330 | |||
| 331 | return do_execute_actions(dp, skb, nla_data(acts_list), | 328 | return do_execute_actions(dp, skb, nla_data(acts_list), |
| 332 | nla_len(acts_list), true); | 329 | nla_len(acts_list), true); |
| 333 | } | 330 | } |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..c5c9e2a54218 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -1079,7 +1079,7 @@ static void *packet_current_rx_frame(struct packet_sock *po, | |||
| 1079 | default: | 1079 | default: |
| 1080 | WARN(1, "TPACKET version not supported\n"); | 1080 | WARN(1, "TPACKET version not supported\n"); |
| 1081 | BUG(); | 1081 | BUG(); |
| 1082 | return 0; | 1082 | return NULL; |
| 1083 | } | 1083 | } |
| 1084 | } | 1084 | } |
| 1085 | 1085 | ||
| @@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) | |||
| 1273 | spin_unlock(&f->lock); | 1273 | spin_unlock(&f->lock); |
| 1274 | } | 1274 | } |
| 1275 | 1275 | ||
| 1276 | static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) | ||
| 1277 | { | ||
| 1278 | if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) | ||
| 1279 | return true; | ||
| 1280 | |||
| 1281 | return false; | ||
| 1282 | } | ||
| 1283 | |||
| 1276 | static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | 1284 | static int fanout_add(struct sock *sk, u16 id, u16 type_flags) |
| 1277 | { | 1285 | { |
| 1278 | struct packet_sock *po = pkt_sk(sk); | 1286 | struct packet_sock *po = pkt_sk(sk); |
| @@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) | |||
| 1325 | match->prot_hook.dev = po->prot_hook.dev; | 1333 | match->prot_hook.dev = po->prot_hook.dev; |
| 1326 | match->prot_hook.func = packet_rcv_fanout; | 1334 | match->prot_hook.func = packet_rcv_fanout; |
| 1327 | match->prot_hook.af_packet_priv = match; | 1335 | match->prot_hook.af_packet_priv = match; |
| 1336 | match->prot_hook.id_match = match_fanout_group; | ||
| 1328 | dev_add_pack(&match->prot_hook); | 1337 | dev_add_pack(&match->prot_hook); |
| 1329 | list_add(&match->list, &fanout_list); | 1338 | list_add(&match->list, &fanout_list); |
| 1330 | } | 1339 | } |
| @@ -1936,7 +1945,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) | |||
| 1936 | 1945 | ||
| 1937 | if (likely(po->tx_ring.pg_vec)) { | 1946 | if (likely(po->tx_ring.pg_vec)) { |
| 1938 | ph = skb_shinfo(skb)->destructor_arg; | 1947 | ph = skb_shinfo(skb)->destructor_arg; |
| 1939 | BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); | ||
| 1940 | BUG_ON(atomic_read(&po->tx_ring.pending) == 0); | 1948 | BUG_ON(atomic_read(&po->tx_ring.pending) == 0); |
| 1941 | atomic_dec(&po->tx_ring.pending); | 1949 | atomic_dec(&po->tx_ring.pending); |
| 1942 | __packet_set_status(po, ph, TP_STATUS_AVAILABLE); | 1950 | __packet_set_status(po, ph, TP_STATUS_AVAILABLE); |
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f10fb8256442..05d60859d8e3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c | |||
| @@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, | |||
| 67 | struct tcf_common *pc; | 67 | struct tcf_common *pc; |
| 68 | int ret = 0; | 68 | int ret = 0; |
| 69 | int err; | 69 | int err; |
| 70 | #ifdef CONFIG_GACT_PROB | ||
| 71 | struct tc_gact_p *p_parm = NULL; | ||
| 72 | #endif | ||
| 70 | 73 | ||
| 71 | if (nla == NULL) | 74 | if (nla == NULL) |
| 72 | return -EINVAL; | 75 | return -EINVAL; |
| @@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, | |||
| 82 | #ifndef CONFIG_GACT_PROB | 85 | #ifndef CONFIG_GACT_PROB |
| 83 | if (tb[TCA_GACT_PROB] != NULL) | 86 | if (tb[TCA_GACT_PROB] != NULL) |
| 84 | return -EOPNOTSUPP; | 87 | return -EOPNOTSUPP; |
| 88 | #else | ||
| 89 | if (tb[TCA_GACT_PROB]) { | ||
| 90 | p_parm = nla_data(tb[TCA_GACT_PROB]); | ||
| 91 | if (p_parm->ptype >= MAX_RAND) | ||
| 92 | return -EINVAL; | ||
| 93 | } | ||
| 85 | #endif | 94 | #endif |
| 86 | 95 | ||
| 87 | pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); | 96 | pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); |
| @@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, | |||
| 103 | spin_lock_bh(&gact->tcf_lock); | 112 | spin_lock_bh(&gact->tcf_lock); |
| 104 | gact->tcf_action = parm->action; | 113 | gact->tcf_action = parm->action; |
| 105 | #ifdef CONFIG_GACT_PROB | 114 | #ifdef CONFIG_GACT_PROB |
| 106 | if (tb[TCA_GACT_PROB] != NULL) { | 115 | if (p_parm) { |
| 107 | struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]); | ||
| 108 | gact->tcfg_paction = p_parm->paction; | 116 | gact->tcfg_paction = p_parm->paction; |
| 109 | gact->tcfg_pval = p_parm->pval; | 117 | gact->tcfg_pval = p_parm->pval; |
| 110 | gact->tcfg_ptype = p_parm->ptype; | 118 | gact->tcfg_ptype = p_parm->ptype; |
| @@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a, | |||
| 133 | 141 | ||
| 134 | spin_lock(&gact->tcf_lock); | 142 | spin_lock(&gact->tcf_lock); |
| 135 | #ifdef CONFIG_GACT_PROB | 143 | #ifdef CONFIG_GACT_PROB |
| 136 | if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) | 144 | if (gact->tcfg_ptype) |
| 137 | action = gact_rand[gact->tcfg_ptype](gact); | 145 | action = gact_rand[gact->tcfg_ptype](gact); |
| 138 | else | 146 | else |
| 139 | action = gact->tcf_action; | 147 | action = gact->tcf_action; |
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60e281ad0f07..58fb3c7aab9e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c | |||
| @@ -185,7 +185,12 @@ err3: | |||
| 185 | err2: | 185 | err2: |
| 186 | kfree(tname); | 186 | kfree(tname); |
| 187 | err1: | 187 | err1: |
| 188 | kfree(pc); | 188 | if (ret == ACT_P_CREATED) { |
| 189 | if (est) | ||
| 190 | gen_kill_estimator(&pc->tcfc_bstats, | ||
| 191 | &pc->tcfc_rate_est); | ||
| 192 | kfree_rcu(pc, tcfc_rcu); | ||
| 193 | } | ||
| 189 | return err; | 194 | return err; |
| 190 | } | 195 | } |
| 191 | 196 | ||
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fe81cc18e9e0..9c0fd0c78814 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
| @@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, | |||
| 200 | out: | 200 | out: |
| 201 | if (err) { | 201 | if (err) { |
| 202 | m->tcf_qstats.overlimits++; | 202 | m->tcf_qstats.overlimits++; |
| 203 | /* should we be asking for packet to be dropped? | 203 | if (m->tcfm_eaction != TCA_EGRESS_MIRROR) |
| 204 | * may make sense for redirect case only | 204 | retval = TC_ACT_SHOT; |
| 205 | */ | 205 | else |
| 206 | retval = TC_ACT_SHOT; | 206 | retval = m->tcf_action; |
| 207 | } else { | 207 | } else |
| 208 | retval = m->tcf_action; | 208 | retval = m->tcf_action; |
| 209 | } | ||
| 210 | spin_unlock(&m->tcf_lock); | 209 | spin_unlock(&m->tcf_lock); |
| 211 | 210 | ||
| 212 | return retval; | 211 | return retval; |
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 26aa2f6ce257..45c53ab067a6 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c | |||
| @@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, | |||
| 74 | p = to_pedit(pc); | 74 | p = to_pedit(pc); |
| 75 | keys = kmalloc(ksize, GFP_KERNEL); | 75 | keys = kmalloc(ksize, GFP_KERNEL); |
| 76 | if (keys == NULL) { | 76 | if (keys == NULL) { |
| 77 | kfree(pc); | 77 | if (est) |
| 78 | gen_kill_estimator(&pc->tcfc_bstats, | ||
| 79 | &pc->tcfc_rate_est); | ||
| 80 | kfree_rcu(pc, tcfc_rcu); | ||
| 78 | return -ENOMEM; | 81 | return -ENOMEM; |
| 79 | } | 82 | } |
| 80 | ret = ACT_P_CREATED; | 83 | ret = ACT_P_CREATED; |
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 3922f2a2821b..3714f60f0b3c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c | |||
| @@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, | |||
| 131 | d = to_defact(pc); | 131 | d = to_defact(pc); |
| 132 | ret = alloc_defdata(d, defdata); | 132 | ret = alloc_defdata(d, defdata); |
| 133 | if (ret < 0) { | 133 | if (ret < 0) { |
| 134 | kfree(pc); | 134 | if (est) |
| 135 | gen_kill_estimator(&pc->tcfc_bstats, | ||
| 136 | &pc->tcfc_rate_est); | ||
| 137 | kfree_rcu(pc, tcfc_rcu); | ||
| 135 | return ret; | 138 | return ret; |
| 136 | } | 139 | } |
| 137 | d->tcf_action = parm->action; | 140 | d->tcf_action = parm->action; |
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 9af01f3df18c..e4723d31fdd5 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c | |||
| @@ -203,6 +203,34 @@ out: | |||
| 203 | return index; | 203 | return index; |
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | /* Length of the next packet (0 if the queue is empty). */ | ||
| 207 | static unsigned int qdisc_peek_len(struct Qdisc *sch) | ||
| 208 | { | ||
| 209 | struct sk_buff *skb; | ||
| 210 | |||
| 211 | skb = sch->ops->peek(sch); | ||
| 212 | return skb ? qdisc_pkt_len(skb) : 0; | ||
| 213 | } | ||
| 214 | |||
| 215 | static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *); | ||
| 216 | static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, | ||
| 217 | unsigned int len); | ||
| 218 | |||
| 219 | static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl, | ||
| 220 | u32 lmax, u32 inv_w, int delta_w) | ||
| 221 | { | ||
| 222 | int i; | ||
| 223 | |||
| 224 | /* update qfq-specific data */ | ||
| 225 | cl->lmax = lmax; | ||
| 226 | cl->inv_w = inv_w; | ||
| 227 | i = qfq_calc_index(cl->inv_w, cl->lmax); | ||
| 228 | |||
| 229 | cl->grp = &q->groups[i]; | ||
| 230 | |||
| 231 | q->wsum += delta_w; | ||
| 232 | } | ||
| 233 | |||
| 206 | static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | 234 | static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, |
| 207 | struct nlattr **tca, unsigned long *arg) | 235 | struct nlattr **tca, unsigned long *arg) |
| 208 | { | 236 | { |
| @@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
| 250 | lmax = 1UL << QFQ_MTU_SHIFT; | 278 | lmax = 1UL << QFQ_MTU_SHIFT; |
| 251 | 279 | ||
| 252 | if (cl != NULL) { | 280 | if (cl != NULL) { |
| 281 | bool need_reactivation = false; | ||
| 282 | |||
| 253 | if (tca[TCA_RATE]) { | 283 | if (tca[TCA_RATE]) { |
| 254 | err = gen_replace_estimator(&cl->bstats, &cl->rate_est, | 284 | err = gen_replace_estimator(&cl->bstats, &cl->rate_est, |
| 255 | qdisc_root_sleeping_lock(sch), | 285 | qdisc_root_sleeping_lock(sch), |
| @@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
| 258 | return err; | 288 | return err; |
| 259 | } | 289 | } |
| 260 | 290 | ||
| 261 | if (inv_w != cl->inv_w) { | 291 | if (lmax == cl->lmax && inv_w == cl->inv_w) |
| 262 | sch_tree_lock(sch); | 292 | return 0; /* nothing to update */ |
| 263 | q->wsum += delta_w; | 293 | |
| 264 | cl->inv_w = inv_w; | 294 | i = qfq_calc_index(inv_w, lmax); |
| 265 | sch_tree_unlock(sch); | 295 | sch_tree_lock(sch); |
| 296 | if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { | ||
| 297 | /* | ||
| 298 | * shift cl->F back, to not charge the | ||
| 299 | * class for the not-yet-served head | ||
| 300 | * packet | ||
| 301 | */ | ||
| 302 | cl->F = cl->S; | ||
| 303 | /* remove class from its slot in the old group */ | ||
| 304 | qfq_deactivate_class(q, cl); | ||
| 305 | need_reactivation = true; | ||
| 266 | } | 306 | } |
| 307 | |||
| 308 | qfq_update_class_params(q, cl, lmax, inv_w, delta_w); | ||
| 309 | |||
| 310 | if (need_reactivation) /* activate in new group */ | ||
| 311 | qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc)); | ||
| 312 | sch_tree_unlock(sch); | ||
| 313 | |||
| 267 | return 0; | 314 | return 0; |
| 268 | } | 315 | } |
| 269 | 316 | ||
| @@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
| 273 | 320 | ||
| 274 | cl->refcnt = 1; | 321 | cl->refcnt = 1; |
| 275 | cl->common.classid = classid; | 322 | cl->common.classid = classid; |
| 276 | cl->lmax = lmax; | ||
| 277 | cl->inv_w = inv_w; | ||
| 278 | i = qfq_calc_index(cl->inv_w, cl->lmax); | ||
| 279 | 323 | ||
| 280 | cl->grp = &q->groups[i]; | 324 | qfq_update_class_params(q, cl, lmax, inv_w, delta_w); |
| 281 | 325 | ||
| 282 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, | 326 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, |
| 283 | &pfifo_qdisc_ops, classid); | 327 | &pfifo_qdisc_ops, classid); |
| @@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
| 294 | return err; | 338 | return err; |
| 295 | } | 339 | } |
| 296 | } | 340 | } |
| 297 | q->wsum += weight; | ||
| 298 | 341 | ||
| 299 | sch_tree_lock(sch); | 342 | sch_tree_lock(sch); |
| 300 | qdisc_class_hash_insert(&q->clhash, &cl->common); | 343 | qdisc_class_hash_insert(&q->clhash, &cl->common); |
| @@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) | |||
| 711 | } | 754 | } |
| 712 | } | 755 | } |
| 713 | 756 | ||
| 714 | /* What is length of next packet in queue (0 if queue is empty) */ | ||
| 715 | static unsigned int qdisc_peek_len(struct Qdisc *sch) | ||
| 716 | { | ||
| 717 | struct sk_buff *skb; | ||
| 718 | |||
| 719 | skb = sch->ops->peek(sch); | ||
| 720 | return skb ? qdisc_pkt_len(skb) : 0; | ||
| 721 | } | ||
| 722 | |||
| 723 | /* | 757 | /* |
| 724 | * Updates the class, returns true if also the group needs to be updated. | 758 | * Updates the class, returns true if also the group needs to be updated. |
| 725 | */ | 759 | */ |
| @@ -843,11 +877,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) | |||
| 843 | static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | 877 | static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
| 844 | { | 878 | { |
| 845 | struct qfq_sched *q = qdisc_priv(sch); | 879 | struct qfq_sched *q = qdisc_priv(sch); |
| 846 | struct qfq_group *grp; | ||
| 847 | struct qfq_class *cl; | 880 | struct qfq_class *cl; |
| 848 | int err; | 881 | int err; |
| 849 | u64 roundedS; | ||
| 850 | int s; | ||
| 851 | 882 | ||
| 852 | cl = qfq_classify(skb, sch, &err); | 883 | cl = qfq_classify(skb, sch, &err); |
| 853 | if (cl == NULL) { | 884 | if (cl == NULL) { |
| @@ -876,11 +907,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
| 876 | return err; | 907 | return err; |
| 877 | 908 | ||
| 878 | /* If reach this point, queue q was idle */ | 909 | /* If reach this point, queue q was idle */ |
| 879 | grp = cl->grp; | 910 | qfq_activate_class(q, cl, qdisc_pkt_len(skb)); |
| 911 | |||
| 912 | return err; | ||
| 913 | } | ||
| 914 | |||
| 915 | /* | ||
| 916 | * Handle class switch from idle to backlogged. | ||
| 917 | */ | ||
| 918 | static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl, | ||
| 919 | unsigned int pkt_len) | ||
| 920 | { | ||
| 921 | struct qfq_group *grp = cl->grp; | ||
| 922 | u64 roundedS; | ||
| 923 | int s; | ||
| 924 | |||
| 880 | qfq_update_start(q, cl); | 925 | qfq_update_start(q, cl); |
| 881 | 926 | ||
| 882 | /* compute new finish time and rounded start. */ | 927 | /* compute new finish time and rounded start. */ |
| 883 | cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; | 928 | cl->F = cl->S + (u64)pkt_len * cl->inv_w; |
| 884 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | 929 | roundedS = qfq_round_down(cl->S, grp->slot_shift); |
| 885 | 930 | ||
| 886 | /* | 931 | /* |
| @@ -917,8 +962,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
| 917 | 962 | ||
| 918 | skip_update: | 963 | skip_update: |
| 919 | qfq_slot_insert(grp, cl, roundedS); | 964 | qfq_slot_insert(grp, cl, roundedS); |
| 920 | |||
| 921 | return err; | ||
| 922 | } | 965 | } |
| 923 | 966 | ||
| 924 | 967 | ||
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 33d894776192..10c018a5b9fe 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c | |||
| @@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, | |||
| 702 | if (rx_count >= asoc->base.sk->sk_rcvbuf) { | 702 | if (rx_count >= asoc->base.sk->sk_rcvbuf) { |
| 703 | 703 | ||
| 704 | if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || | 704 | if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || |
| 705 | (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize))) | 705 | (!sk_rmem_schedule(asoc->base.sk, chunk->skb, |
| 706 | chunk->skb->truesize))) | ||
| 706 | goto fail; | 707 | goto fail; |
| 707 | } | 708 | } |
| 708 | 709 | ||
diff --git a/net/socket.c b/net/socket.c index dfe5b66c97e0..edc3c4af9085 100644 --- a/net/socket.c +++ b/net/socket.c | |||
| @@ -2604,7 +2604,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock, | |||
| 2604 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); | 2604 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); |
| 2605 | set_fs(old_fs); | 2605 | set_fs(old_fs); |
| 2606 | if (!err) | 2606 | if (!err) |
| 2607 | err = compat_put_timeval(up, &ktv); | 2607 | err = compat_put_timeval(&ktv, up); |
| 2608 | 2608 | ||
| 2609 | return err; | 2609 | return err; |
| 2610 | } | 2610 | } |
| @@ -2620,7 +2620,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock, | |||
| 2620 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); | 2620 | err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); |
| 2621 | set_fs(old_fs); | 2621 | set_fs(old_fs); |
| 2622 | if (!err) | 2622 | if (!err) |
| 2623 | err = compat_put_timespec(up, &kts); | 2623 | err = compat_put_timespec(&kts, up); |
| 2624 | 2624 | ||
| 2625 | return err; | 2625 | return err; |
| 2626 | } | 2626 | } |
| @@ -2657,6 +2657,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) | |||
| 2657 | if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) | 2657 | if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) |
| 2658 | return -EFAULT; | 2658 | return -EFAULT; |
| 2659 | 2659 | ||
| 2660 | memset(&ifc, 0, sizeof(ifc)); | ||
| 2660 | if (ifc32.ifcbuf == 0) { | 2661 | if (ifc32.ifcbuf == 0) { |
| 2661 | ifc32.ifc_len = 0; | 2662 | ifc32.ifc_len = 0; |
| 2662 | ifc.ifc_len = 0; | 2663 | ifc.ifc_len = 0; |
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 9fe8857d8d59..03d03e37a7d5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig | |||
| @@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA | |||
| 21 | 21 | ||
| 22 | If unsure, say N. | 22 | If unsure, say N. |
| 23 | 23 | ||
| 24 | config SUNRPC_SWAP | ||
| 25 | bool | ||
| 26 | depends on SUNRPC | ||
| 27 | select NETVM | ||
| 28 | |||
| 24 | config RPCSEC_GSS_KRB5 | 29 | config RPCSEC_GSS_KRB5 |
| 25 | tristate "Secure RPC: Kerberos V mechanism" | 30 | tristate "Secure RPC: Kerberos V mechanism" |
| 26 | depends on SUNRPC && CRYPTO | 31 | depends on SUNRPC && CRYPTO |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 727e506cacda..b5c067bccc45 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
| 14 | #include <linux/hash.h> | 14 | #include <linux/hash.h> |
| 15 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
| 16 | #include <linux/sunrpc/gss_api.h> | ||
| 16 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 17 | 18 | ||
| 18 | #ifdef RPC_DEBUG | 19 | #ifdef RPC_DEBUG |
| @@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops) | |||
| 122 | } | 123 | } |
| 123 | EXPORT_SYMBOL_GPL(rpcauth_unregister); | 124 | EXPORT_SYMBOL_GPL(rpcauth_unregister); |
| 124 | 125 | ||
| 126 | /** | ||
| 127 | * rpcauth_list_flavors - discover registered flavors and pseudoflavors | ||
| 128 | * @array: array to fill in | ||
| 129 | * @size: size of "array" | ||
| 130 | * | ||
| 131 | * Returns the number of array items filled in, or a negative errno. | ||
| 132 | * | ||
| 133 | * The returned array is not sorted by any policy. Callers should not | ||
| 134 | * rely on the order of the items in the returned array. | ||
| 135 | */ | ||
| 136 | int | ||
| 137 | rpcauth_list_flavors(rpc_authflavor_t *array, int size) | ||
| 138 | { | ||
| 139 | rpc_authflavor_t flavor; | ||
| 140 | int result = 0; | ||
| 141 | |||
| 142 | spin_lock(&rpc_authflavor_lock); | ||
| 143 | for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { | ||
| 144 | const struct rpc_authops *ops = auth_flavors[flavor]; | ||
| 145 | rpc_authflavor_t pseudos[4]; | ||
| 146 | int i, len; | ||
| 147 | |||
| 148 | if (result >= size) { | ||
| 149 | result = -ENOMEM; | ||
| 150 | break; | ||
| 151 | } | ||
| 152 | |||
| 153 | if (ops == NULL) | ||
| 154 | continue; | ||
| 155 | if (ops->list_pseudoflavors == NULL) { | ||
| 156 | array[result++] = ops->au_flavor; | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); | ||
| 160 | if (len < 0) { | ||
| 161 | result = len; | ||
| 162 | break; | ||
| 163 | } | ||
| 164 | for (i = 0; i < len; i++) { | ||
| 165 | if (result >= size) { | ||
| 166 | result = -ENOMEM; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | array[result++] = pseudos[i]; | ||
| 170 | } | ||
| 171 | } | ||
| 172 | spin_unlock(&rpc_authflavor_lock); | ||
| 173 | |||
| 174 | dprintk("RPC: %s returns %d\n", __func__, result); | ||
| 175 | return result; | ||
| 176 | } | ||
| 177 | EXPORT_SYMBOL_GPL(rpcauth_list_flavors); | ||
| 178 | |||
| 125 | struct rpc_auth * | 179 | struct rpc_auth * |
| 126 | rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) | 180 | rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) |
| 127 | { | 181 | { |
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d3ad81f8da5b..34c522021004 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
| @@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = { | |||
| 1619 | .crcreate = gss_create_cred, | 1619 | .crcreate = gss_create_cred, |
| 1620 | .pipes_create = gss_pipes_dentries_create, | 1620 | .pipes_create = gss_pipes_dentries_create, |
| 1621 | .pipes_destroy = gss_pipes_dentries_destroy, | 1621 | .pipes_destroy = gss_pipes_dentries_destroy, |
| 1622 | .list_pseudoflavors = gss_mech_list_pseudoflavors, | ||
| 1622 | }; | 1623 | }; |
| 1623 | 1624 | ||
| 1624 | static const struct rpc_credops gss_credops = { | 1625 | static const struct rpc_credops gss_credops = { |
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 782bfe1b6465..b174fcd9ff4c 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
| @@ -239,14 +239,28 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) | |||
| 239 | 239 | ||
| 240 | EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); | 240 | EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor); |
| 241 | 241 | ||
| 242 | int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) | 242 | /** |
| 243 | * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors | ||
| 244 | * @array: array to fill in | ||
| 245 | * @size: size of "array" | ||
| 246 | * | ||
| 247 | * Returns the number of array items filled in, or a negative errno. | ||
| 248 | * | ||
| 249 | * The returned array is not sorted by any policy. Callers should not | ||
| 250 | * rely on the order of the items in the returned array. | ||
| 251 | */ | ||
| 252 | int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) | ||
| 243 | { | 253 | { |
| 244 | struct gss_api_mech *pos = NULL; | 254 | struct gss_api_mech *pos = NULL; |
| 245 | int j, i = 0; | 255 | int j, i = 0; |
| 246 | 256 | ||
| 247 | spin_lock(®istered_mechs_lock); | 257 | spin_lock(®istered_mechs_lock); |
| 248 | list_for_each_entry(pos, ®istered_mechs, gm_list) { | 258 | list_for_each_entry(pos, ®istered_mechs, gm_list) { |
| 249 | for (j=0; j < pos->gm_pf_num; j++) { | 259 | for (j = 0; j < pos->gm_pf_num; j++) { |
| 260 | if (i >= size) { | ||
| 261 | spin_unlock(®istered_mechs_lock); | ||
| 262 | return -ENOMEM; | ||
| 263 | } | ||
| 250 | array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; | 264 | array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; |
| 251 | } | 265 | } |
| 252 | } | 266 | } |
| @@ -254,8 +268,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr) | |||
| 254 | return i; | 268 | return i; |
| 255 | } | 269 | } |
| 256 | 270 | ||
| 257 | EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors); | ||
| 258 | |||
| 259 | u32 | 271 | u32 |
| 260 | gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) | 272 | gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) |
| 261 | { | 273 | { |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 47ad2666fdf6..2afd2a84dc35 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
| @@ -1349,8 +1349,11 @@ static int c_show(struct seq_file *m, void *p) | |||
| 1349 | if (cache_check(cd, cp, NULL)) | 1349 | if (cache_check(cd, cp, NULL)) |
| 1350 | /* cache_check does a cache_put on failure */ | 1350 | /* cache_check does a cache_put on failure */ |
| 1351 | seq_printf(m, "# "); | 1351 | seq_printf(m, "# "); |
| 1352 | else | 1352 | else { |
| 1353 | if (cache_is_expired(cd, cp)) | ||
| 1354 | seq_printf(m, "# "); | ||
| 1353 | cache_put(cp, cd); | 1355 | cache_put(cp, cd); |
| 1356 | } | ||
| 1354 | 1357 | ||
| 1355 | return cd->cache_show(m, cd, cp); | 1358 | return cd->cache_show(m, cd, cp); |
| 1356 | } | 1359 | } |
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 00eb859b7de5..fa48c60aef23 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
| @@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) | |||
| 717 | atomic_inc(&clnt->cl_count); | 717 | atomic_inc(&clnt->cl_count); |
| 718 | if (clnt->cl_softrtry) | 718 | if (clnt->cl_softrtry) |
| 719 | task->tk_flags |= RPC_TASK_SOFT; | 719 | task->tk_flags |= RPC_TASK_SOFT; |
| 720 | if (sk_memalloc_socks()) { | ||
| 721 | struct rpc_xprt *xprt; | ||
| 722 | |||
| 723 | rcu_read_lock(); | ||
| 724 | xprt = rcu_dereference(clnt->cl_xprt); | ||
| 725 | if (xprt->swapper) | ||
| 726 | task->tk_flags |= RPC_TASK_SWAPPER; | ||
| 727 | rcu_read_unlock(); | ||
| 728 | } | ||
| 720 | /* Add to the client's list of all tasks */ | 729 | /* Add to the client's list of all tasks */ |
| 721 | spin_lock(&clnt->cl_lock); | 730 | spin_lock(&clnt->cl_lock); |
| 722 | list_add_tail(&task->tk_task, &clnt->cl_tasks); | 731 | list_add_tail(&task->tk_task, &clnt->cl_tasks); |
| @@ -1844,12 +1853,13 @@ call_timeout(struct rpc_task *task) | |||
| 1844 | return; | 1853 | return; |
| 1845 | } | 1854 | } |
| 1846 | if (RPC_IS_SOFT(task)) { | 1855 | if (RPC_IS_SOFT(task)) { |
| 1847 | if (clnt->cl_chatty) | 1856 | if (clnt->cl_chatty) { |
| 1848 | rcu_read_lock(); | 1857 | rcu_read_lock(); |
| 1849 | printk(KERN_NOTICE "%s: server %s not responding, timed out\n", | 1858 | printk(KERN_NOTICE "%s: server %s not responding, timed out\n", |
| 1850 | clnt->cl_protname, | 1859 | clnt->cl_protname, |
| 1851 | rcu_dereference(clnt->cl_xprt)->servername); | 1860 | rcu_dereference(clnt->cl_xprt)->servername); |
| 1852 | rcu_read_unlock(); | 1861 | rcu_read_unlock(); |
| 1862 | } | ||
| 1853 | if (task->tk_flags & RPC_TASK_TIMEOUT) | 1863 | if (task->tk_flags & RPC_TASK_TIMEOUT) |
| 1854 | rpc_exit(task, -ETIMEDOUT); | 1864 | rpc_exit(task, -ETIMEDOUT); |
| 1855 | else | 1865 | else |
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 92509ffe15fc..a70acae496e4 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c | |||
| @@ -251,7 +251,7 @@ static int rpcb_create_local_unix(struct net *net) | |||
| 251 | if (IS_ERR(clnt)) { | 251 | if (IS_ERR(clnt)) { |
| 252 | dprintk("RPC: failed to create AF_LOCAL rpcbind " | 252 | dprintk("RPC: failed to create AF_LOCAL rpcbind " |
| 253 | "client (errno %ld).\n", PTR_ERR(clnt)); | 253 | "client (errno %ld).\n", PTR_ERR(clnt)); |
| 254 | result = -PTR_ERR(clnt); | 254 | result = PTR_ERR(clnt); |
| 255 | goto out; | 255 | goto out; |
| 256 | } | 256 | } |
| 257 | 257 | ||
| @@ -298,7 +298,7 @@ static int rpcb_create_local_net(struct net *net) | |||
| 298 | if (IS_ERR(clnt)) { | 298 | if (IS_ERR(clnt)) { |
| 299 | dprintk("RPC: failed to create local rpcbind " | 299 | dprintk("RPC: failed to create local rpcbind " |
| 300 | "client (errno %ld).\n", PTR_ERR(clnt)); | 300 | "client (errno %ld).\n", PTR_ERR(clnt)); |
| 301 | result = -PTR_ERR(clnt); | 301 | result = PTR_ERR(clnt); |
| 302 | goto out; | 302 | goto out; |
| 303 | } | 303 | } |
| 304 | 304 | ||
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 994cfea2bad6..128494ec9a64 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
| @@ -300,8 +300,9 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); | |||
| 300 | /* | 300 | /* |
| 301 | * Make an RPC task runnable. | 301 | * Make an RPC task runnable. |
| 302 | * | 302 | * |
| 303 | * Note: If the task is ASYNC, this must be called with | 303 | * Note: If the task is ASYNC, and is being made runnable after sitting on an |
| 304 | * the spinlock held to protect the wait queue operation. | 304 | * rpc_wait_queue, this must be called with the queue spinlock held to protect |
| 305 | * the wait queue operation. | ||
| 305 | */ | 306 | */ |
| 306 | static void rpc_make_runnable(struct rpc_task *task) | 307 | static void rpc_make_runnable(struct rpc_task *task) |
| 307 | { | 308 | { |
| @@ -790,7 +791,9 @@ void rpc_execute(struct rpc_task *task) | |||
| 790 | 791 | ||
| 791 | static void rpc_async_schedule(struct work_struct *work) | 792 | static void rpc_async_schedule(struct work_struct *work) |
| 792 | { | 793 | { |
| 794 | current->flags |= PF_FSTRANS; | ||
| 793 | __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); | 795 | __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); |
| 796 | current->flags &= ~PF_FSTRANS; | ||
| 794 | } | 797 | } |
| 795 | 798 | ||
| 796 | /** | 799 | /** |
| @@ -812,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work) | |||
| 812 | void *rpc_malloc(struct rpc_task *task, size_t size) | 815 | void *rpc_malloc(struct rpc_task *task, size_t size) |
| 813 | { | 816 | { |
| 814 | struct rpc_buffer *buf; | 817 | struct rpc_buffer *buf; |
| 815 | gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT; | 818 | gfp_t gfp = GFP_NOWAIT; |
| 819 | |||
| 820 | if (RPC_IS_SWAPPER(task)) | ||
| 821 | gfp |= __GFP_MEMALLOC; | ||
| 816 | 822 | ||
| 817 | size += sizeof(struct rpc_buffer); | 823 | size += sizeof(struct rpc_buffer); |
| 818 | if (size <= RPC_BUFFER_MAXSIZE) | 824 | if (size <= RPC_BUFFER_MAXSIZE) |
| @@ -886,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta | |||
| 886 | static struct rpc_task * | 892 | static struct rpc_task * |
| 887 | rpc_alloc_task(void) | 893 | rpc_alloc_task(void) |
| 888 | { | 894 | { |
| 889 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); | 895 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO); |
| 890 | } | 896 | } |
| 891 | 897 | ||
| 892 | /* | 898 | /* |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 88f2bf671960..bac973a31367 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c | |||
| @@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) | |||
| 316 | */ | 316 | */ |
| 317 | void svc_xprt_enqueue(struct svc_xprt *xprt) | 317 | void svc_xprt_enqueue(struct svc_xprt *xprt) |
| 318 | { | 318 | { |
| 319 | struct svc_serv *serv = xprt->xpt_server; | ||
| 320 | struct svc_pool *pool; | 319 | struct svc_pool *pool; |
| 321 | struct svc_rqst *rqstp; | 320 | struct svc_rqst *rqstp; |
| 322 | int cpu; | 321 | int cpu; |
| @@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) | |||
| 362 | rqstp, rqstp->rq_xprt); | 361 | rqstp, rqstp->rq_xprt); |
| 363 | rqstp->rq_xprt = xprt; | 362 | rqstp->rq_xprt = xprt; |
| 364 | svc_xprt_get(xprt); | 363 | svc_xprt_get(xprt); |
| 365 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 366 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
| 367 | pool->sp_stats.threads_woken++; | 364 | pool->sp_stats.threads_woken++; |
| 368 | wake_up(&rqstp->rq_wait); | 365 | wake_up(&rqstp->rq_wait); |
| 369 | } else { | 366 | } else { |
| @@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) | |||
| 640 | if (xprt) { | 637 | if (xprt) { |
| 641 | rqstp->rq_xprt = xprt; | 638 | rqstp->rq_xprt = xprt; |
| 642 | svc_xprt_get(xprt); | 639 | svc_xprt_get(xprt); |
| 643 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 644 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
| 645 | 640 | ||
| 646 | /* As there is a shortage of threads and this request | 641 | /* As there is a shortage of threads and this request |
| 647 | * had to be queued, don't allow the thread to wait so | 642 | * had to be queued, don't allow the thread to wait so |
| @@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) | |||
| 738 | else | 733 | else |
| 739 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); | 734 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); |
| 740 | dprintk("svc: got len=%d\n", len); | 735 | dprintk("svc: got len=%d\n", len); |
| 736 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 737 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
| 741 | } | 738 | } |
| 742 | svc_xprt_received(xprt); | 739 | svc_xprt_received(xprt); |
| 743 | 740 | ||
| @@ -794,7 +791,8 @@ int svc_send(struct svc_rqst *rqstp) | |||
| 794 | 791 | ||
| 795 | /* Grab mutex to serialize outgoing data. */ | 792 | /* Grab mutex to serialize outgoing data. */ |
| 796 | mutex_lock(&xprt->xpt_mutex); | 793 | mutex_lock(&xprt->xpt_mutex); |
| 797 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | 794 | if (test_bit(XPT_DEAD, &xprt->xpt_flags) |
| 795 | || test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
| 798 | len = -ENOTCONN; | 796 | len = -ENOTCONN; |
| 799 | else | 797 | else |
| 800 | len = xprt->xpt_ops->xpo_sendto(rqstp); | 798 | len = xprt->xpt_ops->xpo_sendto(rqstp); |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 18bc130255a7..998aa8c1807c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
| @@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1129 | if (len >= 0) | 1129 | if (len >= 0) |
| 1130 | svsk->sk_tcplen += len; | 1130 | svsk->sk_tcplen += len; |
| 1131 | if (len != want) { | 1131 | if (len != want) { |
| 1132 | svc_tcp_save_pages(svsk, rqstp); | ||
| 1132 | if (len < 0 && len != -EAGAIN) | 1133 | if (len < 0 && len != -EAGAIN) |
| 1133 | goto err_other; | 1134 | goto err_other; |
| 1134 | svc_tcp_save_pages(svsk, rqstp); | ||
| 1135 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 1135 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
| 1136 | svsk->sk_tcplen, svsk->sk_reclen); | 1136 | svsk->sk_tcplen, svsk->sk_reclen); |
| 1137 | goto err_noclose; | 1137 | goto err_noclose; |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 0cf165580d8d..0afba1b4b656 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
| @@ -129,34 +129,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) | |||
| 129 | EXPORT_SYMBOL_GPL(xdr_terminate_string); | 129 | EXPORT_SYMBOL_GPL(xdr_terminate_string); |
| 130 | 130 | ||
| 131 | void | 131 | void |
| 132 | xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, | ||
| 133 | unsigned int len) | ||
| 134 | { | ||
| 135 | struct kvec *tail = xdr->tail; | ||
| 136 | u32 *p; | ||
| 137 | |||
| 138 | xdr->pages = pages; | ||
| 139 | xdr->page_base = base; | ||
| 140 | xdr->page_len = len; | ||
| 141 | |||
| 142 | p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); | ||
| 143 | tail->iov_base = p; | ||
| 144 | tail->iov_len = 0; | ||
| 145 | |||
| 146 | if (len & 3) { | ||
| 147 | unsigned int pad = 4 - (len & 3); | ||
| 148 | |||
| 149 | *p = 0; | ||
| 150 | tail->iov_base = (char *)p + (len & 3); | ||
| 151 | tail->iov_len = pad; | ||
| 152 | len += pad; | ||
| 153 | } | ||
| 154 | xdr->buflen += len; | ||
| 155 | xdr->len += len; | ||
| 156 | } | ||
| 157 | EXPORT_SYMBOL_GPL(xdr_encode_pages); | ||
| 158 | |||
| 159 | void | ||
| 160 | xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, | 132 | xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, |
| 161 | struct page **pages, unsigned int base, unsigned int len) | 133 | struct page **pages, unsigned int base, unsigned int len) |
| 162 | { | 134 | { |
| @@ -457,6 +429,16 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) | |||
| 457 | EXPORT_SYMBOL_GPL(xdr_shift_buf); | 429 | EXPORT_SYMBOL_GPL(xdr_shift_buf); |
| 458 | 430 | ||
| 459 | /** | 431 | /** |
| 432 | * xdr_stream_pos - Return the current offset from the start of the xdr_stream | ||
| 433 | * @xdr: pointer to struct xdr_stream | ||
| 434 | */ | ||
| 435 | unsigned int xdr_stream_pos(const struct xdr_stream *xdr) | ||
| 436 | { | ||
| 437 | return (unsigned int)(XDR_QUADLEN(xdr->buf->len) - xdr->nwords) << 2; | ||
| 438 | } | ||
| 439 | EXPORT_SYMBOL_GPL(xdr_stream_pos); | ||
| 440 | |||
| 441 | /** | ||
| 460 | * xdr_init_encode - Initialize a struct xdr_stream for sending data. | 442 | * xdr_init_encode - Initialize a struct xdr_stream for sending data. |
| 461 | * @xdr: pointer to xdr_stream struct | 443 | * @xdr: pointer to xdr_stream struct |
| 462 | * @buf: pointer to XDR buffer in which to encode data | 444 | * @buf: pointer to XDR buffer in which to encode data |
| @@ -556,13 +538,11 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b | |||
| 556 | EXPORT_SYMBOL_GPL(xdr_write_pages); | 538 | EXPORT_SYMBOL_GPL(xdr_write_pages); |
| 557 | 539 | ||
| 558 | static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, | 540 | static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov, |
| 559 | __be32 *p, unsigned int len) | 541 | unsigned int len) |
| 560 | { | 542 | { |
| 561 | if (len > iov->iov_len) | 543 | if (len > iov->iov_len) |
| 562 | len = iov->iov_len; | 544 | len = iov->iov_len; |
| 563 | if (p == NULL) | 545 | xdr->p = (__be32*)iov->iov_base; |
| 564 | p = (__be32*)iov->iov_base; | ||
| 565 | xdr->p = p; | ||
| 566 | xdr->end = (__be32*)(iov->iov_base + len); | 546 | xdr->end = (__be32*)(iov->iov_base + len); |
| 567 | xdr->iov = iov; | 547 | xdr->iov = iov; |
| 568 | xdr->page_ptr = NULL; | 548 | xdr->page_ptr = NULL; |
| @@ -609,7 +589,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr) | |||
| 609 | newbase -= xdr->buf->page_base; | 589 | newbase -= xdr->buf->page_base; |
| 610 | 590 | ||
| 611 | if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) | 591 | if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0) |
| 612 | xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); | 592 | xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); |
| 613 | } | 593 | } |
| 614 | 594 | ||
| 615 | static bool xdr_set_next_buffer(struct xdr_stream *xdr) | 595 | static bool xdr_set_next_buffer(struct xdr_stream *xdr) |
| @@ -618,7 +598,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr) | |||
| 618 | xdr_set_next_page(xdr); | 598 | xdr_set_next_page(xdr); |
| 619 | else if (xdr->iov == xdr->buf->head) { | 599 | else if (xdr->iov == xdr->buf->head) { |
| 620 | if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) | 600 | if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0) |
| 621 | xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len); | 601 | xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len); |
| 622 | } | 602 | } |
| 623 | return xdr->p != xdr->end; | 603 | return xdr->p != xdr->end; |
| 624 | } | 604 | } |
| @@ -634,10 +614,15 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) | |||
| 634 | xdr->buf = buf; | 614 | xdr->buf = buf; |
| 635 | xdr->scratch.iov_base = NULL; | 615 | xdr->scratch.iov_base = NULL; |
| 636 | xdr->scratch.iov_len = 0; | 616 | xdr->scratch.iov_len = 0; |
| 617 | xdr->nwords = XDR_QUADLEN(buf->len); | ||
| 637 | if (buf->head[0].iov_len != 0) | 618 | if (buf->head[0].iov_len != 0) |
| 638 | xdr_set_iov(xdr, buf->head, p, buf->len); | 619 | xdr_set_iov(xdr, buf->head, buf->len); |
| 639 | else if (buf->page_len != 0) | 620 | else if (buf->page_len != 0) |
| 640 | xdr_set_page_base(xdr, 0, buf->len); | 621 | xdr_set_page_base(xdr, 0, buf->len); |
| 622 | if (p != NULL && p > xdr->p && xdr->end >= p) { | ||
| 623 | xdr->nwords -= p - xdr->p; | ||
| 624 | xdr->p = p; | ||
| 625 | } | ||
| 641 | } | 626 | } |
| 642 | EXPORT_SYMBOL_GPL(xdr_init_decode); | 627 | EXPORT_SYMBOL_GPL(xdr_init_decode); |
| 643 | 628 | ||
| @@ -662,12 +647,14 @@ EXPORT_SYMBOL_GPL(xdr_init_decode_pages); | |||
| 662 | 647 | ||
| 663 | static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) | 648 | static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) |
| 664 | { | 649 | { |
| 650 | unsigned int nwords = XDR_QUADLEN(nbytes); | ||
| 665 | __be32 *p = xdr->p; | 651 | __be32 *p = xdr->p; |
| 666 | __be32 *q = p + XDR_QUADLEN(nbytes); | 652 | __be32 *q = p + nwords; |
| 667 | 653 | ||
| 668 | if (unlikely(q > xdr->end || q < p)) | 654 | if (unlikely(nwords > xdr->nwords || q > xdr->end || q < p)) |
| 669 | return NULL; | 655 | return NULL; |
| 670 | xdr->p = q; | 656 | xdr->p = q; |
| 657 | xdr->nwords -= nwords; | ||
| 671 | return p; | 658 | return p; |
| 672 | } | 659 | } |
| 673 | 660 | ||
| @@ -734,6 +721,31 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) | |||
| 734 | } | 721 | } |
| 735 | EXPORT_SYMBOL_GPL(xdr_inline_decode); | 722 | EXPORT_SYMBOL_GPL(xdr_inline_decode); |
| 736 | 723 | ||
| 724 | static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) | ||
| 725 | { | ||
| 726 | struct xdr_buf *buf = xdr->buf; | ||
| 727 | struct kvec *iov; | ||
| 728 | unsigned int nwords = XDR_QUADLEN(len); | ||
| 729 | unsigned int cur = xdr_stream_pos(xdr); | ||
| 730 | |||
| 731 | if (xdr->nwords == 0) | ||
| 732 | return 0; | ||
| 733 | if (nwords > xdr->nwords) { | ||
| 734 | nwords = xdr->nwords; | ||
| 735 | len = nwords << 2; | ||
| 736 | } | ||
| 737 | /* Realign pages to current pointer position */ | ||
| 738 | iov = buf->head; | ||
| 739 | if (iov->iov_len > cur) | ||
| 740 | xdr_shrink_bufhead(buf, iov->iov_len - cur); | ||
| 741 | |||
| 742 | /* Truncate page data and move it into the tail */ | ||
| 743 | if (buf->page_len > len) | ||
| 744 | xdr_shrink_pagelen(buf, buf->page_len - len); | ||
| 745 | xdr->nwords = XDR_QUADLEN(buf->len - cur); | ||
| 746 | return len; | ||
| 747 | } | ||
| 748 | |||
| 737 | /** | 749 | /** |
| 738 | * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position | 750 | * xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position |
| 739 | * @xdr: pointer to xdr_stream struct | 751 | * @xdr: pointer to xdr_stream struct |
| @@ -742,39 +754,37 @@ EXPORT_SYMBOL_GPL(xdr_inline_decode); | |||
| 742 | * Moves data beyond the current pointer position from the XDR head[] buffer | 754 | * Moves data beyond the current pointer position from the XDR head[] buffer |
| 743 | * into the page list. Any data that lies beyond current position + "len" | 755 | * into the page list. Any data that lies beyond current position + "len" |
| 744 | * bytes is moved into the XDR tail[]. | 756 | * bytes is moved into the XDR tail[]. |
| 757 | * | ||
| 758 | * Returns the number of XDR encoded bytes now contained in the pages | ||
| 745 | */ | 759 | */ |
| 746 | void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) | 760 | unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len) |
| 747 | { | 761 | { |
| 748 | struct xdr_buf *buf = xdr->buf; | 762 | struct xdr_buf *buf = xdr->buf; |
| 749 | struct kvec *iov; | 763 | struct kvec *iov; |
| 750 | ssize_t shift; | 764 | unsigned int nwords; |
| 751 | unsigned int end; | 765 | unsigned int end; |
| 752 | int padding; | 766 | unsigned int padding; |
| 753 | 767 | ||
| 754 | /* Realign pages to current pointer position */ | 768 | len = xdr_align_pages(xdr, len); |
| 755 | iov = buf->head; | 769 | if (len == 0) |
| 756 | shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p; | 770 | return 0; |
| 757 | if (shift > 0) | 771 | nwords = XDR_QUADLEN(len); |
| 758 | xdr_shrink_bufhead(buf, shift); | 772 | padding = (nwords << 2) - len; |
| 759 | |||
| 760 | /* Truncate page data and move it into the tail */ | ||
| 761 | if (buf->page_len > len) | ||
| 762 | xdr_shrink_pagelen(buf, buf->page_len - len); | ||
| 763 | padding = (XDR_QUADLEN(len) << 2) - len; | ||
| 764 | xdr->iov = iov = buf->tail; | 773 | xdr->iov = iov = buf->tail; |
| 765 | /* Compute remaining message length. */ | 774 | /* Compute remaining message length. */ |
| 766 | end = iov->iov_len; | 775 | end = ((xdr->nwords - nwords) << 2) + padding; |
| 767 | shift = buf->buflen - buf->len; | 776 | if (end > iov->iov_len) |
| 768 | if (shift < end) | 777 | end = iov->iov_len; |
| 769 | end -= shift; | 778 | |
| 770 | else if (shift > 0) | ||
| 771 | end = 0; | ||
| 772 | /* | 779 | /* |
| 773 | * Position current pointer at beginning of tail, and | 780 | * Position current pointer at beginning of tail, and |
| 774 | * set remaining message length. | 781 | * set remaining message length. |
| 775 | */ | 782 | */ |
| 776 | xdr->p = (__be32 *)((char *)iov->iov_base + padding); | 783 | xdr->p = (__be32 *)((char *)iov->iov_base + padding); |
| 777 | xdr->end = (__be32 *)((char *)iov->iov_base + end); | 784 | xdr->end = (__be32 *)((char *)iov->iov_base + end); |
| 785 | xdr->page_ptr = NULL; | ||
| 786 | xdr->nwords = XDR_QUADLEN(end - padding); | ||
| 787 | return len; | ||
| 778 | } | 788 | } |
| 779 | EXPORT_SYMBOL_GPL(xdr_read_pages); | 789 | EXPORT_SYMBOL_GPL(xdr_read_pages); |
| 780 | 790 | ||
| @@ -790,12 +800,13 @@ EXPORT_SYMBOL_GPL(xdr_read_pages); | |||
| 790 | */ | 800 | */ |
| 791 | void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) | 801 | void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) |
| 792 | { | 802 | { |
| 793 | xdr_read_pages(xdr, len); | 803 | len = xdr_align_pages(xdr, len); |
| 794 | /* | 804 | /* |
| 795 | * Position current pointer at beginning of tail, and | 805 | * Position current pointer at beginning of tail, and |
| 796 | * set remaining message length. | 806 | * set remaining message length. |
| 797 | */ | 807 | */ |
| 798 | xdr_set_page_base(xdr, 0, len); | 808 | if (len != 0) |
| 809 | xdr_set_page_base(xdr, 0, len); | ||
| 799 | } | 810 | } |
| 800 | EXPORT_SYMBOL_GPL(xdr_enter_page); | 811 | EXPORT_SYMBOL_GPL(xdr_enter_page); |
| 801 | 812 | ||
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index b446e100286f..06cdbff79e4a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
| 200 | int rc = 0; | 200 | int rc = 0; |
| 201 | 201 | ||
| 202 | if (!xprt->shutdown) { | 202 | if (!xprt->shutdown) { |
| 203 | current->flags |= PF_FSTRANS; | ||
| 203 | xprt_clear_connected(xprt); | 204 | xprt_clear_connected(xprt); |
| 204 | 205 | ||
| 205 | dprintk("RPC: %s: %sconnect\n", __func__, | 206 | dprintk("RPC: %s: %sconnect\n", __func__, |
| @@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
| 212 | 213 | ||
| 213 | out: | 214 | out: |
| 214 | xprt_wake_pending_tasks(xprt, rc); | 215 | xprt_wake_pending_tasks(xprt, rc); |
| 215 | |||
| 216 | out_clear: | 216 | out_clear: |
| 217 | dprintk("RPC: %s: exit\n", __func__); | 217 | dprintk("RPC: %s: exit\n", __func__); |
| 218 | xprt_clear_connecting(xprt); | 218 | xprt_clear_connecting(xprt); |
| 219 | current->flags &= ~PF_FSTRANS; | ||
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | /* | 222 | /* |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 62d0dac8f780..400567243f84 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
| @@ -1892,6 +1892,8 @@ static void xs_local_setup_socket(struct work_struct *work) | |||
| 1892 | if (xprt->shutdown) | 1892 | if (xprt->shutdown) |
| 1893 | goto out; | 1893 | goto out; |
| 1894 | 1894 | ||
| 1895 | current->flags |= PF_FSTRANS; | ||
| 1896 | |||
| 1895 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | 1897 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); |
| 1896 | status = __sock_create(xprt->xprt_net, AF_LOCAL, | 1898 | status = __sock_create(xprt->xprt_net, AF_LOCAL, |
| 1897 | SOCK_STREAM, 0, &sock, 1); | 1899 | SOCK_STREAM, 0, &sock, 1); |
| @@ -1925,7 +1927,47 @@ static void xs_local_setup_socket(struct work_struct *work) | |||
| 1925 | out: | 1927 | out: |
| 1926 | xprt_clear_connecting(xprt); | 1928 | xprt_clear_connecting(xprt); |
| 1927 | xprt_wake_pending_tasks(xprt, status); | 1929 | xprt_wake_pending_tasks(xprt, status); |
| 1930 | current->flags &= ~PF_FSTRANS; | ||
| 1931 | } | ||
| 1932 | |||
| 1933 | #ifdef CONFIG_SUNRPC_SWAP | ||
| 1934 | static void xs_set_memalloc(struct rpc_xprt *xprt) | ||
| 1935 | { | ||
| 1936 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | ||
| 1937 | xprt); | ||
| 1938 | |||
| 1939 | if (xprt->swapper) | ||
| 1940 | sk_set_memalloc(transport->inet); | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | /** | ||
| 1944 | * xs_swapper - Tag this transport as being used for swap. | ||
| 1945 | * @xprt: transport to tag | ||
| 1946 | * @enable: enable/disable | ||
| 1947 | * | ||
| 1948 | */ | ||
| 1949 | int xs_swapper(struct rpc_xprt *xprt, int enable) | ||
| 1950 | { | ||
| 1951 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | ||
| 1952 | xprt); | ||
| 1953 | int err = 0; | ||
| 1954 | |||
| 1955 | if (enable) { | ||
| 1956 | xprt->swapper++; | ||
| 1957 | xs_set_memalloc(xprt); | ||
| 1958 | } else if (xprt->swapper) { | ||
| 1959 | xprt->swapper--; | ||
| 1960 | sk_clear_memalloc(transport->inet); | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | return err; | ||
| 1964 | } | ||
| 1965 | EXPORT_SYMBOL_GPL(xs_swapper); | ||
| 1966 | #else | ||
| 1967 | static void xs_set_memalloc(struct rpc_xprt *xprt) | ||
| 1968 | { | ||
| 1928 | } | 1969 | } |
| 1970 | #endif | ||
| 1929 | 1971 | ||
| 1930 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | 1972 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
| 1931 | { | 1973 | { |
| @@ -1951,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
| 1951 | transport->sock = sock; | 1993 | transport->sock = sock; |
| 1952 | transport->inet = sk; | 1994 | transport->inet = sk; |
| 1953 | 1995 | ||
| 1996 | xs_set_memalloc(xprt); | ||
| 1997 | |||
| 1954 | write_unlock_bh(&sk->sk_callback_lock); | 1998 | write_unlock_bh(&sk->sk_callback_lock); |
| 1955 | } | 1999 | } |
| 1956 | xs_udp_do_set_buffer_size(xprt); | 2000 | xs_udp_do_set_buffer_size(xprt); |
| @@ -1967,6 +2011,8 @@ static void xs_udp_setup_socket(struct work_struct *work) | |||
| 1967 | if (xprt->shutdown) | 2011 | if (xprt->shutdown) |
| 1968 | goto out; | 2012 | goto out; |
| 1969 | 2013 | ||
| 2014 | current->flags |= PF_FSTRANS; | ||
| 2015 | |||
| 1970 | /* Start by resetting any existing state */ | 2016 | /* Start by resetting any existing state */ |
| 1971 | xs_reset_transport(transport); | 2017 | xs_reset_transport(transport); |
| 1972 | sock = xs_create_sock(xprt, transport, | 2018 | sock = xs_create_sock(xprt, transport, |
| @@ -1985,6 +2031,7 @@ static void xs_udp_setup_socket(struct work_struct *work) | |||
| 1985 | out: | 2031 | out: |
| 1986 | xprt_clear_connecting(xprt); | 2032 | xprt_clear_connecting(xprt); |
| 1987 | xprt_wake_pending_tasks(xprt, status); | 2033 | xprt_wake_pending_tasks(xprt, status); |
| 2034 | current->flags &= ~PF_FSTRANS; | ||
| 1988 | } | 2035 | } |
| 1989 | 2036 | ||
| 1990 | /* | 2037 | /* |
| @@ -2075,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
| 2075 | if (!xprt_bound(xprt)) | 2122 | if (!xprt_bound(xprt)) |
| 2076 | goto out; | 2123 | goto out; |
| 2077 | 2124 | ||
| 2125 | xs_set_memalloc(xprt); | ||
| 2126 | |||
| 2078 | /* Tell the socket layer to start connecting... */ | 2127 | /* Tell the socket layer to start connecting... */ |
| 2079 | xprt->stat.connect_count++; | 2128 | xprt->stat.connect_count++; |
| 2080 | xprt->stat.connect_start = jiffies; | 2129 | xprt->stat.connect_start = jiffies; |
| @@ -2110,6 +2159,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
| 2110 | if (xprt->shutdown) | 2159 | if (xprt->shutdown) |
| 2111 | goto out; | 2160 | goto out; |
| 2112 | 2161 | ||
| 2162 | current->flags |= PF_FSTRANS; | ||
| 2163 | |||
| 2113 | if (!sock) { | 2164 | if (!sock) { |
| 2114 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); | 2165 | clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); |
| 2115 | sock = xs_create_sock(xprt, transport, | 2166 | sock = xs_create_sock(xprt, transport, |
| @@ -2159,6 +2210,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) | |||
| 2159 | case -EINPROGRESS: | 2210 | case -EINPROGRESS: |
| 2160 | case -EALREADY: | 2211 | case -EALREADY: |
| 2161 | xprt_clear_connecting(xprt); | 2212 | xprt_clear_connecting(xprt); |
| 2213 | current->flags &= ~PF_FSTRANS; | ||
| 2162 | return; | 2214 | return; |
| 2163 | case -EINVAL: | 2215 | case -EINVAL: |
| 2164 | /* Happens, for instance, if the user specified a link | 2216 | /* Happens, for instance, if the user specified a link |
| @@ -2171,6 +2223,7 @@ out_eagain: | |||
| 2171 | out: | 2223 | out: |
| 2172 | xprt_clear_connecting(xprt); | 2224 | xprt_clear_connecting(xprt); |
| 2173 | xprt_wake_pending_tasks(xprt, status); | 2225 | xprt_wake_pending_tasks(xprt, status); |
| 2226 | current->flags &= ~PF_FSTRANS; | ||
| 2174 | } | 2227 | } |
| 2175 | 2228 | ||
| 2176 | /** | 2229 | /** |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 79981d97bc9c..c5ee4ff61364 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
| @@ -823,6 +823,34 @@ fail: | |||
| 823 | return NULL; | 823 | return NULL; |
| 824 | } | 824 | } |
| 825 | 825 | ||
| 826 | static int unix_mknod(const char *sun_path, umode_t mode, struct path *res) | ||
| 827 | { | ||
| 828 | struct dentry *dentry; | ||
| 829 | struct path path; | ||
| 830 | int err = 0; | ||
| 831 | /* | ||
| 832 | * Get the parent directory, calculate the hash for last | ||
| 833 | * component. | ||
| 834 | */ | ||
| 835 | dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | ||
| 836 | err = PTR_ERR(dentry); | ||
| 837 | if (IS_ERR(dentry)) | ||
| 838 | return err; | ||
| 839 | |||
| 840 | /* | ||
| 841 | * All right, let's create it. | ||
| 842 | */ | ||
| 843 | err = security_path_mknod(&path, dentry, mode, 0); | ||
| 844 | if (!err) { | ||
| 845 | err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); | ||
| 846 | if (!err) { | ||
| 847 | res->mnt = mntget(path.mnt); | ||
| 848 | res->dentry = dget(dentry); | ||
| 849 | } | ||
| 850 | } | ||
| 851 | done_path_create(&path, dentry); | ||
| 852 | return err; | ||
| 853 | } | ||
| 826 | 854 | ||
| 827 | static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 855 | static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
| 828 | { | 856 | { |
| @@ -831,8 +859,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
| 831 | struct unix_sock *u = unix_sk(sk); | 859 | struct unix_sock *u = unix_sk(sk); |
| 832 | struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; | 860 | struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; |
| 833 | char *sun_path = sunaddr->sun_path; | 861 | char *sun_path = sunaddr->sun_path; |
| 834 | struct dentry *dentry = NULL; | ||
| 835 | struct path path; | ||
| 836 | int err; | 862 | int err; |
| 837 | unsigned int hash; | 863 | unsigned int hash; |
| 838 | struct unix_address *addr; | 864 | struct unix_address *addr; |
| @@ -869,43 +895,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | |||
| 869 | atomic_set(&addr->refcnt, 1); | 895 | atomic_set(&addr->refcnt, 1); |
| 870 | 896 | ||
| 871 | if (sun_path[0]) { | 897 | if (sun_path[0]) { |
| 872 | umode_t mode; | 898 | struct path path; |
| 873 | err = 0; | 899 | umode_t mode = S_IFSOCK | |
| 874 | /* | ||
| 875 | * Get the parent directory, calculate the hash for last | ||
| 876 | * component. | ||
| 877 | */ | ||
| 878 | dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0); | ||
| 879 | err = PTR_ERR(dentry); | ||
| 880 | if (IS_ERR(dentry)) | ||
| 881 | goto out_mknod_parent; | ||
| 882 | |||
| 883 | /* | ||
| 884 | * All right, let's create it. | ||
| 885 | */ | ||
| 886 | mode = S_IFSOCK | | ||
| 887 | (SOCK_INODE(sock)->i_mode & ~current_umask()); | 900 | (SOCK_INODE(sock)->i_mode & ~current_umask()); |
| 888 | err = mnt_want_write(path.mnt); | 901 | err = unix_mknod(sun_path, mode, &path); |
| 889 | if (err) | 902 | if (err) { |
| 890 | goto out_mknod_dput; | 903 | if (err == -EEXIST) |
| 891 | err = security_path_mknod(&path, dentry, mode, 0); | 904 | err = -EADDRINUSE; |
| 892 | if (err) | 905 | unix_release_addr(addr); |
| 893 | goto out_mknod_drop_write; | 906 | goto out_up; |
| 894 | err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0); | 907 | } |
| 895 | out_mknod_drop_write: | ||
| 896 | mnt_drop_write(path.mnt); | ||
| 897 | if (err) | ||
| 898 | goto out_mknod_dput; | ||
| 899 | mutex_unlock(&path.dentry->d_inode->i_mutex); | ||
| 900 | dput(path.dentry); | ||
| 901 | path.dentry = dentry; | ||
| 902 | |||
| 903 | addr->hash = UNIX_HASH_SIZE; | 908 | addr->hash = UNIX_HASH_SIZE; |
| 904 | } | 909 | hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1); |
| 905 | 910 | spin_lock(&unix_table_lock); | |
| 906 | spin_lock(&unix_table_lock); | 911 | u->path = path; |
| 907 | 912 | list = &unix_socket_table[hash]; | |
| 908 | if (!sun_path[0]) { | 913 | } else { |
| 914 | spin_lock(&unix_table_lock); | ||
| 909 | err = -EADDRINUSE; | 915 | err = -EADDRINUSE; |
| 910 | if (__unix_find_socket_byname(net, sunaddr, addr_len, | 916 | if (__unix_find_socket_byname(net, sunaddr, addr_len, |
| 911 | sk->sk_type, hash)) { | 917 | sk->sk_type, hash)) { |
| @@ -914,9 +920,6 @@ out_mknod_drop_write: | |||
| 914 | } | 920 | } |
| 915 | 921 | ||
| 916 | list = &unix_socket_table[addr->hash]; | 922 | list = &unix_socket_table[addr->hash]; |
| 917 | } else { | ||
| 918 | list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; | ||
| 919 | u->path = path; | ||
| 920 | } | 923 | } |
| 921 | 924 | ||
| 922 | err = 0; | 925 | err = 0; |
| @@ -930,16 +933,6 @@ out_up: | |||
| 930 | mutex_unlock(&u->readlock); | 933 | mutex_unlock(&u->readlock); |
| 931 | out: | 934 | out: |
| 932 | return err; | 935 | return err; |
| 933 | |||
| 934 | out_mknod_dput: | ||
| 935 | dput(dentry); | ||
| 936 | mutex_unlock(&path.dentry->d_inode->i_mutex); | ||
| 937 | path_put(&path); | ||
| 938 | out_mknod_parent: | ||
| 939 | if (err == -EEXIST) | ||
| 940 | err = -EADDRINUSE; | ||
| 941 | unix_release_addr(addr); | ||
| 942 | goto out_up; | ||
| 943 | } | 936 | } |
| 944 | 937 | ||
| 945 | static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) | 938 | static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) |
| @@ -1457,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1457 | if (NULL == siocb->scm) | 1450 | if (NULL == siocb->scm) |
| 1458 | siocb->scm = &tmp_scm; | 1451 | siocb->scm = &tmp_scm; |
| 1459 | wait_for_unix_gc(); | 1452 | wait_for_unix_gc(); |
| 1460 | err = scm_send(sock, msg, siocb->scm); | 1453 | err = scm_send(sock, msg, siocb->scm, false); |
| 1461 | if (err < 0) | 1454 | if (err < 0) |
| 1462 | return err; | 1455 | return err; |
| 1463 | 1456 | ||
| @@ -1626,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, | |||
| 1626 | if (NULL == siocb->scm) | 1619 | if (NULL == siocb->scm) |
| 1627 | siocb->scm = &tmp_scm; | 1620 | siocb->scm = &tmp_scm; |
| 1628 | wait_for_unix_gc(); | 1621 | wait_for_unix_gc(); |
| 1629 | err = scm_send(sock, msg, siocb->scm); | 1622 | err = scm_send(sock, msg, siocb->scm, false); |
| 1630 | if (err < 0) | 1623 | if (err < 0) |
| 1631 | return err; | 1624 | return err; |
| 1632 | 1625 | ||
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 788a12c1eb5d..2ab785064b7e 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c | |||
| @@ -602,36 +602,31 @@ static int wanrouter_device_new_if(struct wan_device *wandev, | |||
| 602 | * successfully, add it to the interface list. | 602 | * successfully, add it to the interface list. |
| 603 | */ | 603 | */ |
| 604 | 604 | ||
| 605 | if (dev->name == NULL) { | 605 | #ifdef WANDEBUG |
| 606 | err = -EINVAL; | 606 | printk(KERN_INFO "%s: registering interface %s...\n", |
| 607 | } else { | 607 | wanrouter_modname, dev->name); |
| 608 | #endif | ||
| 608 | 609 | ||
| 609 | #ifdef WANDEBUG | 610 | err = register_netdev(dev); |
| 610 | printk(KERN_INFO "%s: registering interface %s...\n", | 611 | if (!err) { |
| 611 | wanrouter_modname, dev->name); | 612 | struct net_device *slave = NULL; |
| 612 | #endif | 613 | unsigned long smp_flags=0; |
| 613 | 614 | ||
| 614 | err = register_netdev(dev); | 615 | lock_adapter_irq(&wandev->lock, &smp_flags); |
| 615 | if (!err) { | 616 | |
| 616 | struct net_device *slave = NULL; | 617 | if (wandev->dev == NULL) { |
| 617 | unsigned long smp_flags=0; | 618 | wandev->dev = dev; |
| 618 | 619 | } else { | |
| 619 | lock_adapter_irq(&wandev->lock, &smp_flags); | 620 | for (slave=wandev->dev; |
| 620 | 621 | DEV_TO_SLAVE(slave); | |
| 621 | if (wandev->dev == NULL) { | 622 | slave = DEV_TO_SLAVE(slave)) |
| 622 | wandev->dev = dev; | 623 | DEV_TO_SLAVE(slave) = dev; |
| 623 | } else { | ||
| 624 | for (slave=wandev->dev; | ||
| 625 | DEV_TO_SLAVE(slave); | ||
| 626 | slave = DEV_TO_SLAVE(slave)) | ||
| 627 | DEV_TO_SLAVE(slave) = dev; | ||
| 628 | } | ||
| 629 | ++wandev->ndev; | ||
| 630 | |||
| 631 | unlock_adapter_irq(&wandev->lock, &smp_flags); | ||
| 632 | err = 0; /* done !!! */ | ||
| 633 | goto out; | ||
| 634 | } | 624 | } |
| 625 | ++wandev->ndev; | ||
| 626 | |||
| 627 | unlock_adapter_irq(&wandev->lock, &smp_flags); | ||
| 628 | err = 0; /* done !!! */ | ||
| 629 | goto out; | ||
| 635 | } | 630 | } |
| 636 | if (wandev->del_if) | 631 | if (wandev->del_if) |
| 637 | wandev->del_if(wandev, dev); | 632 | wandev->del_if(wandev, dev); |
diff --git a/net/wireless/core.c b/net/wireless/core.c index 31b40cc4a9c3..dcd64d5b07aa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c | |||
| @@ -952,6 +952,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, | |||
| 952 | */ | 952 | */ |
| 953 | synchronize_rcu(); | 953 | synchronize_rcu(); |
| 954 | INIT_LIST_HEAD(&wdev->list); | 954 | INIT_LIST_HEAD(&wdev->list); |
| 955 | /* | ||
| 956 | * Ensure that all events have been processed and | ||
| 957 | * freed. | ||
| 958 | */ | ||
| 959 | cfg80211_process_wdev_events(wdev); | ||
| 955 | break; | 960 | break; |
| 956 | case NETDEV_PRE_UP: | 961 | case NETDEV_PRE_UP: |
| 957 | if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) | 962 | if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) |
diff --git a/net/wireless/core.h b/net/wireless/core.h index 5206c6844fd7..bc7430b54771 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h | |||
| @@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, | |||
| 426 | struct net_device *dev, enum nl80211_iftype ntype, | 426 | struct net_device *dev, enum nl80211_iftype ntype, |
| 427 | u32 *flags, struct vif_params *params); | 427 | u32 *flags, struct vif_params *params); |
| 428 | void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); | 428 | void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); |
| 429 | void cfg80211_process_wdev_events(struct wireless_dev *wdev); | ||
| 429 | 430 | ||
| 430 | int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, | 431 | int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, |
| 431 | struct wireless_dev *wdev, | 432 | struct wireless_dev *wdev, |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2303ee73b50a..2ded3c7fad06 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c | |||
| @@ -680,6 +680,8 @@ static u32 map_regdom_flags(u32 rd_flags) | |||
| 680 | channel_flags |= IEEE80211_CHAN_NO_IBSS; | 680 | channel_flags |= IEEE80211_CHAN_NO_IBSS; |
| 681 | if (rd_flags & NL80211_RRF_DFS) | 681 | if (rd_flags & NL80211_RRF_DFS) |
| 682 | channel_flags |= IEEE80211_CHAN_RADAR; | 682 | channel_flags |= IEEE80211_CHAN_RADAR; |
| 683 | if (rd_flags & NL80211_RRF_NO_OFDM) | ||
| 684 | channel_flags |= IEEE80211_CHAN_NO_OFDM; | ||
| 683 | return channel_flags; | 685 | return channel_flags; |
| 684 | } | 686 | } |
| 685 | 687 | ||
| @@ -901,7 +903,21 @@ static void handle_channel(struct wiphy *wiphy, | |||
| 901 | chan->max_antenna_gain = min(chan->orig_mag, | 903 | chan->max_antenna_gain = min(chan->orig_mag, |
| 902 | (int) MBI_TO_DBI(power_rule->max_antenna_gain)); | 904 | (int) MBI_TO_DBI(power_rule->max_antenna_gain)); |
| 903 | chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); | 905 | chan->max_reg_power = (int) MBM_TO_DBM(power_rule->max_eirp); |
| 904 | chan->max_power = min(chan->max_power, chan->max_reg_power); | 906 | if (chan->orig_mpwr) { |
| 907 | /* | ||
| 908 | * Devices that have their own custom regulatory domain | ||
| 909 | * but also use WIPHY_FLAG_STRICT_REGULATORY will follow the | ||
| 910 | * passed country IE power settings. | ||
| 911 | */ | ||
| 912 | if (initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && | ||
| 913 | wiphy->flags & WIPHY_FLAG_CUSTOM_REGULATORY && | ||
| 914 | wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) | ||
| 915 | chan->max_power = chan->max_reg_power; | ||
| 916 | else | ||
| 917 | chan->max_power = min(chan->orig_mpwr, | ||
| 918 | chan->max_reg_power); | ||
| 919 | } else | ||
| 920 | chan->max_power = chan->max_reg_power; | ||
| 905 | } | 921 | } |
| 906 | 922 | ||
| 907 | static void handle_band(struct wiphy *wiphy, | 923 | static void handle_band(struct wiphy *wiphy, |
| @@ -1885,6 +1901,7 @@ static void restore_custom_reg_settings(struct wiphy *wiphy) | |||
| 1885 | chan->flags = chan->orig_flags; | 1901 | chan->flags = chan->orig_flags; |
| 1886 | chan->max_antenna_gain = chan->orig_mag; | 1902 | chan->max_antenna_gain = chan->orig_mag; |
| 1887 | chan->max_power = chan->orig_mpwr; | 1903 | chan->max_power = chan->orig_mpwr; |
| 1904 | chan->beacon_found = false; | ||
| 1888 | } | 1905 | } |
| 1889 | } | 1906 | } |
| 1890 | } | 1907 | } |
diff --git a/net/wireless/util.c b/net/wireless/util.c index 26f8cd30f712..994e2f0cc7a8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c | |||
| @@ -735,7 +735,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) | |||
| 735 | wdev->connect_keys = NULL; | 735 | wdev->connect_keys = NULL; |
| 736 | } | 736 | } |
| 737 | 737 | ||
| 738 | static void cfg80211_process_wdev_events(struct wireless_dev *wdev) | 738 | void cfg80211_process_wdev_events(struct wireless_dev *wdev) |
| 739 | { | 739 | { |
| 740 | struct cfg80211_event *ev; | 740 | struct cfg80211_event *ev; |
| 741 | unsigned long flags; | 741 | unsigned long flags; |
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..5a2aa17e4d3c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c | |||
| @@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) | |||
| 1357 | 1357 | ||
| 1358 | memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); | 1358 | memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); |
| 1359 | xdst->flo.ops = &xfrm_bundle_fc_ops; | 1359 | xdst->flo.ops = &xfrm_bundle_fc_ops; |
| 1360 | if (afinfo->init_dst) | ||
| 1361 | afinfo->init_dst(net, xdst); | ||
| 1360 | } else | 1362 | } else |
| 1361 | xdst = ERR_PTR(-ENOBUFS); | 1363 | xdst = ERR_PTR(-ENOBUFS); |
| 1362 | 1364 | ||
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5b228f97d4b3..210be48d8ae3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
| @@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) | |||
| 415 | if (x->lft.hard_add_expires_seconds) { | 415 | if (x->lft.hard_add_expires_seconds) { |
| 416 | long tmo = x->lft.hard_add_expires_seconds + | 416 | long tmo = x->lft.hard_add_expires_seconds + |
| 417 | x->curlft.add_time - now; | 417 | x->curlft.add_time - now; |
| 418 | if (tmo <= 0) | 418 | if (tmo <= 0) { |
| 419 | goto expired; | 419 | if (x->xflags & XFRM_SOFT_EXPIRE) { |
| 420 | /* enter hard expire without soft expire first?! | ||
| 421 | * setting a new date could trigger this. | ||
| 422 | * workarbound: fix x->curflt.add_time by below: | ||
| 423 | */ | ||
| 424 | x->curlft.add_time = now - x->saved_tmo - 1; | ||
| 425 | tmo = x->lft.hard_add_expires_seconds - x->saved_tmo; | ||
| 426 | } else | ||
| 427 | goto expired; | ||
| 428 | } | ||
| 420 | if (tmo < next) | 429 | if (tmo < next) |
| 421 | next = tmo; | 430 | next = tmo; |
| 422 | } | 431 | } |
| @@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me) | |||
| 433 | if (x->lft.soft_add_expires_seconds) { | 442 | if (x->lft.soft_add_expires_seconds) { |
| 434 | long tmo = x->lft.soft_add_expires_seconds + | 443 | long tmo = x->lft.soft_add_expires_seconds + |
| 435 | x->curlft.add_time - now; | 444 | x->curlft.add_time - now; |
| 436 | if (tmo <= 0) | 445 | if (tmo <= 0) { |
| 437 | warn = 1; | 446 | warn = 1; |
| 438 | else if (tmo < next) | 447 | x->xflags &= ~XFRM_SOFT_EXPIRE; |
| 448 | } else if (tmo < next) { | ||
| 439 | next = tmo; | 449 | next = tmo; |
| 450 | x->xflags |= XFRM_SOFT_EXPIRE; | ||
| 451 | x->saved_tmo = tmo; | ||
| 452 | } | ||
| 440 | } | 453 | } |
| 441 | if (x->lft.soft_use_expires_seconds) { | 454 | if (x->lft.soft_use_expires_seconds) { |
| 442 | long tmo = x->lft.soft_use_expires_seconds + | 455 | long tmo = x->lft.soft_use_expires_seconds + |
| @@ -1981,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) | |||
| 1981 | goto error; | 1994 | goto error; |
| 1982 | 1995 | ||
| 1983 | x->outer_mode = xfrm_get_mode(x->props.mode, family); | 1996 | x->outer_mode = xfrm_get_mode(x->props.mode, family); |
| 1984 | if (x->outer_mode == NULL) | 1997 | if (x->outer_mode == NULL) { |
| 1998 | err = -EPROTONOSUPPORT; | ||
| 1985 | goto error; | 1999 | goto error; |
| 2000 | } | ||
| 1986 | 2001 | ||
| 1987 | if (init_replay) { | 2002 | if (init_replay) { |
| 1988 | err = xfrm_init_replay(x); | 2003 | err = xfrm_init_replay(x); |
